├── .github
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── feature_request.yml
    │   └── bug_report.yml
    ├── workflows
    │   ├── docs.yml
    │   └── build.yml
    └── PULL_REQUEST_TEMPLATE.md
├── docs
    └── benchmarks
    │   ├── par_tree_build.png
    │   ├── concurrent_vector.png
    │   ├── nested_for_large.png
    │   ├── nested_for_medium.png
    │   ├── nested_for_small.png
    │   ├── pipelines_64thread.png
    │   └── pipelines_256thread.png
├── dispenso
    ├── detail
    │   ├── quanta.h
    │   ├── per_thread_info.cpp
    │   ├── can_invoke.h
    │   ├── result_of.h
    │   ├── quanta.cpp
    │   ├── notifier_common.h
    │   ├── math.h
    │   ├── once_callable_impl.h
    │   ├── per_thread_info.h
    │   ├── timed_task_impl.h
    │   ├── op_result.h
    │   ├── graph_executor_impl.h
    │   ├── rw_lock_impl.h
    │   ├── task_set_impl.h
    │   └── concurrent_vector_impl2.h
    ├── timing.h
    ├── thread_id.cpp
    ├── thread_id.h
    ├── priority.h
    ├── third-party
    │   └── moodycamel
    │   │   ├── README.txt
    │   │   └── LICENSE.md
    ├── tsan_annotations.cpp
    ├── latch.h
    ├── completion_event.h
    ├── graph_executor.h
    ├── pool_allocator.h
    ├── utils
    │   └── graph_dot.h
    ├── schedulable.h
    ├── tsan_annotations.h
    ├── once_function.h
    ├── timed_task.cpp
    ├── pool_allocator.cpp
    ├── small_buffer_allocator.cpp
    ├── rw_lock.h
    ├── async_request.h
    ├── CMakeLists.txt
    ├── resource_pool.h
    ├── small_buffer_allocator.h
    ├── timing.cpp
    ├── task_set.cpp
    ├── pipeline.h
    ├── priority.cpp
    └── graph.cpp
├── cmake
    └── DispensoConfig.cmake.in
├── tests
    ├── forward_shared_pool.cpp
    ├── concurrent_vector_a_test.cpp
    ├── concurrent_vector_b_test.cpp
    ├── concurrent_vector_default_test.cpp
    ├── shared_pool_test.cpp
    ├── test_tid.h
    ├── concurrent_vector_test_common_types.h
    ├── async_request_test.cpp
    ├── thread_id_test.cpp
    ├── resource_pool_test.cpp
    ├── CMakeLists.txt
    ├── rw_lock_test.cpp
    ├── latch_test.cpp
    ├── concurrent_object_arena_test.cpp
    ├── completion_event_test.cpp
    ├── pool_allocator_test.cpp
    ├── priority_test.cpp
    └── once_function_test.cpp
├── benchmarks
    ├── benchmark_common.h
    ├── small_buffer_benchmark.cpp
    ├── CMakeLists.txt
    ├── thread_benchmark_common.h
    ├── rw_lock_benchmark.cpp
    ├── once_function_benchmark.cpp
    ├── for_latency_benchmark.cpp
    └── trivial_compute_benchmark.cpp
├── .gitignore
├── LICENSE
├── CMakeLists.txt
├── CONTRIBUTING.md
├── .clang-format
└── CODE_OF_CONDUCT.md


/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | 


--------------------------------------------------------------------------------
/docs/benchmarks/par_tree_build.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/par_tree_build.png


--------------------------------------------------------------------------------
/docs/benchmarks/concurrent_vector.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/concurrent_vector.png


--------------------------------------------------------------------------------
/docs/benchmarks/nested_for_large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/nested_for_large.png


--------------------------------------------------------------------------------
/docs/benchmarks/nested_for_medium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/nested_for_medium.png


--------------------------------------------------------------------------------
/docs/benchmarks/nested_for_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/nested_for_small.png


--------------------------------------------------------------------------------
/docs/benchmarks/pipelines_64thread.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/pipelines_64thread.png


--------------------------------------------------------------------------------
/docs/benchmarks/pipelines_256thread.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/pipelines_256thread.png


--------------------------------------------------------------------------------
/dispenso/detail/quanta.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | namespace dispenso {
11 | namespace detail {
12 | void registerFineSchedulerQuanta();
13 | } // namespace detail
14 | } // namespace dispenso
15 | 


--------------------------------------------------------------------------------
/cmake/DispensoConfig.cmake.in:
--------------------------------------------------------------------------------
 1 | # 
 2 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 3 | # 
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | #
 7 | 
 8 | @PACKAGE_INIT@
 9 | 
10 | include(CMakeFindDependencyMacro)
11 | 
12 | find_dependency(Threads)
13 | 
14 | include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@_Exports.cmake")
15 | 
16 | check_required_components("@PROJECT_NAME@")
17 | 


--------------------------------------------------------------------------------
/dispenso/timing.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | /**
 9 |  * @file Utilities for getting the current time.
10 |  **/
11 | 
12 | #pragma once
13 | 
14 | #include <dispenso/platform.h>
15 | 
16 | namespace dispenso {
17 | 
18 | DISPENSO_DLL_ACCESS double getTime();
19 | 
20 | } // namespace dispenso
21 | 


--------------------------------------------------------------------------------
/tests/forward_shared_pool.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <dispenso/thread_pool.h>
 9 | 
10 | #ifdef _WIN32
11 | __declspec(dllexport)
12 | #else
13 | __attribute__((visibility("default")))
14 | #endif
15 | void* DISPENSO_EXPORT_NAME() {
16 |   return &dispenso::globalThreadPool();
17 | }
18 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: Docs
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 | 
 7 | jobs:
 8 |   build:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |     - uses: actions/checkout@v2
12 | 
13 |     - name: Doxygen Action
14 |       uses: mattnotmitt/doxygen-action@v1
15 |       with:
16 |         working-directory: "docs/"
17 |         doxyfile-path: "./Doxyfile"
18 | 
19 | 
20 |     - name: Deploy
21 |       uses: peaceiris/actions-gh-pages@v3
22 |       with:
23 |         github_token: ${{ secrets.GITHUB_TOKEN }}
24 |         publish_dir: ./docs/doxygen/html
25 | 


--------------------------------------------------------------------------------
/dispenso/detail/per_thread_info.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <dispenso/detail/per_thread_info.h>
 9 | 
10 | namespace dispenso {
11 | namespace detail {
12 | 
13 | namespace {
14 | DISPENSO_THREAD_LOCAL PerThreadInfo g_perThreadInfo;
15 | }
16 | PerThreadInfo& PerPoolPerThreadInfo::info() {
17 |   return g_perThreadInfo;
18 | }
19 | 
20 | } // namespace detail
21 | } // namespace dispenso
22 | 


--------------------------------------------------------------------------------
/benchmarks/benchmark_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <benchmark/benchmark.h>
11 | 
12 | #if defined(__GNUC__) || defined(__clang__)
13 | #define UNUSED_VAR myLocalForLoopVar __attribute__((unused))
14 | #elif defined(_MSC_VER)
15 | #define UNUSED_VAR myLocalForLoopVar __pragma(warning(suppress : 4100))
16 | #else
17 | #define UNUSED_VAR myLocalForLoopVar
18 | #endif
19 | 


--------------------------------------------------------------------------------
/tests/concurrent_vector_a_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include "concurrent_vector_test_common_types.h"
 9 | 
10 | using TestTraitsTypes = ::testing::Types<TestTraitsA>;
11 | DISPENSO_DISABLE_WARNING_PUSH
12 | DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
13 | TYPED_TEST_SUITE(ConcurrentVectorTest, TestTraitsTypes);
14 | DISPENSO_DISABLE_WARNING_POP
15 | 
16 | #include "concurrent_vector_test_common.h"
17 | 


--------------------------------------------------------------------------------
/tests/concurrent_vector_b_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include "concurrent_vector_test_common_types.h"
 9 | 
10 | using TestTraitsTypes = ::testing::Types<TestTraitsB>;
11 | DISPENSO_DISABLE_WARNING_PUSH
12 | DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
13 | TYPED_TEST_SUITE(ConcurrentVectorTest, TestTraitsTypes);
14 | DISPENSO_DISABLE_WARNING_POP
15 | 
16 | #include "concurrent_vector_test_common.h"
17 | 


--------------------------------------------------------------------------------
/tests/concurrent_vector_default_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include "concurrent_vector_test_common_types.h"
 9 | 
10 | using TestTraitsTypes = ::testing::Types<dispenso::DefaultConcurrentVectorTraits>;
11 | DISPENSO_DISABLE_WARNING_PUSH
12 | DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS
13 | TYPED_TEST_SUITE(ConcurrentVectorTest, TestTraitsTypes);
14 | DISPENSO_DISABLE_WARNING_POP
15 | 
16 | #include "concurrent_vector_test_common.h"
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled source #
 2 | ###################
 3 | *.com
 4 | *.class
 5 | *.dll
 6 | *.exe
 7 | *.o
 8 | *.so
 9 | *.a
10 | bin
11 | lib
12 | 
13 | # Packages #
14 | ############
15 | # it's better to unpack these files and commit the raw source
16 | # git has its own built in compression methods
17 | *.7z
18 | *.dmg
19 | *.gz
20 | *.iso
21 | *.jar
22 | *.rar
23 | *.tar
24 | *.zip
25 | /.project
26 | 
27 | # generated cmake files #
28 | #########################
29 | *CMakeCache.txt
30 | *.log
31 | *.make
32 | *.cmake
33 | CMakeFiles
34 | Makefile
35 | *Dir
36 | 
37 | /build/*
38 | docs/doxygen/
39 | 
40 | # Clang #
41 | #########
42 | .cache/
43 | 


--------------------------------------------------------------------------------
/tests/shared_pool_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <gmock/gmock.h>
 9 | #include <gtest/gtest.h>
10 | 
11 | #ifdef _WIN32
12 | __declspec(dllimport) void* sharedPoolA();
13 | __declspec(dllimport) void* sharedPoolB();
14 | #else
15 | __attribute__((visibility("default"))) void* sharedPoolA();
16 | __attribute__((visibility("default"))) void* sharedPoolB();
17 | #endif
18 | 
19 | TEST(ThreadPool, SharedPool) {
20 |   EXPECT_EQ(sharedPoolA(), sharedPoolB());
21 | }
22 | 


--------------------------------------------------------------------------------
/dispenso/detail/can_invoke.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <stdint.h>
 9 | 
10 | #include <type_traits>
11 | #include <utility>
12 | 
13 | namespace dispenso {
14 | namespace detail {
15 | 
16 | template <class>
17 | using void_t = void;
18 | template <class Sig, class = void>
19 | struct CanInvoke : std::false_type {};
20 | template <class F, class... Args>
21 | struct CanInvoke<F(Args...), void_t<decltype(std::declval<F>()(std::declval<Args>()...))>>
22 |     : std::true_type {};
23 | 
24 | } // namespace detail
25 | } // namespace dispenso
26 | 


--------------------------------------------------------------------------------
/dispenso/thread_id.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <dispenso/thread_id.h>
 9 | 
10 | namespace dispenso {
11 | 
12 | std::atomic<uint64_t> nextThread{0};
13 | constexpr uint64_t kInvalidThread = std::numeric_limits<uint64_t>::max();
14 | DISPENSO_THREAD_LOCAL uint64_t currentThread = kInvalidThread;
15 | 
16 | uint64_t threadId() {
17 |   if (currentThread == kInvalidThread) {
18 |     currentThread = nextThread.fetch_add(uint64_t{1}, std::memory_order_relaxed);
19 |   }
20 |   return currentThread;
21 | }
22 | 
23 | } // namespace dispenso
24 | 


--------------------------------------------------------------------------------
/dispenso/detail/result_of.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <type_traits>
11 | 
12 | namespace dispenso {
13 | namespace detail {
14 | 
15 | #if defined(__cpp_lib_is_invocable) && __cpp_lib_is_invocable >= 201703L
16 | template <typename F, typename... Args>
17 | using ResultOf = typename std::invoke_result_t<std::decay_t<F>, std::decay_t<Args>...>;
18 | #else
19 | template <typename F, typename... Args>
20 | using ResultOf =
21 |     typename std::result_of<typename std::decay<F>::type(typename std::decay<Args>::type...)>::type;
22 | #endif // c++17
23 | 
24 | } // namespace detail
25 | } // namespace dispenso
26 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build and test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   build-tests:
11 |     name: Build on ${{ matrix.os }}
12 |     runs-on: ${{ matrix.os }}
13 |     strategy:
14 |       matrix:
15 |         os: [macos-latest, ubuntu-latest, windows-latest] 
16 |     env:
17 |       CTEST_OUTPUT_ON_FAILURE: 1
18 |     steps:
19 |       - uses: actions/checkout@v2
20 |       - name: Configuring
21 |         run: |
22 |           mkdir build && cd build && cmake .. -DDISPENSO_BUILD_TESTS=ON  -DCMAKE_BUILD_TYPE=Release
23 |       - name: Build
24 |         working-directory: ./build
25 |         run: |
26 |           cmake --build . --parallel 4 --config Release
27 |       - name: Running Unit Tests
28 |         working-directory: ./build
29 |         run: |
30 |           ctest -LE flaky --build-config Release
31 | 


--------------------------------------------------------------------------------
/dispenso/detail/quanta.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #ifdef _WIN32
 9 | #include <Windows.h>
10 | #include <timeapi.h>
11 | #endif
12 | 
13 | #include <dispenso/detail/quanta.h>
14 | 
15 | namespace dispenso {
16 | #ifdef _WIN32
17 | 
18 | namespace {
19 | struct OsQuantaSetter {
20 |   OsQuantaSetter() {
21 |     timeBeginPeriod(1);
22 |   }
23 |   ~OsQuantaSetter() {
24 |     timeEndPeriod(1);
25 |   }
26 | };
27 | } // namespace
28 | #else
29 | namespace {
30 | struct OsQuantaSetter {};
31 | } // namespace
32 | 
33 | #endif // _WIN32
34 | 
35 | namespace detail {
36 | void registerFineSchedulerQuanta() {
37 |   static OsQuantaSetter setter;
38 |   (void)setter;
39 | }
40 | } // namespace detail
41 | } // namespace dispenso
42 | 


--------------------------------------------------------------------------------
/tests/test_tid.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <dispenso/platform.h>
11 | 
12 | // Note that this header is intended for direct inclusion into test cpps that require the
13 | // functionality.  tids are essentially independent for tranlation units (don't expect
14 | // coordinated/sane behavior if used from multiple cpps in the same binary).
15 | 
16 | namespace {
17 | 
18 | std::atomic<int> g_nextTid(0);
19 | DISPENSO_THREAD_LOCAL int g_tid = -1;
20 | 
21 | inline void resetTestTid() {
22 |   g_tid = -1;
23 |   g_nextTid.store(0);
24 | }
25 | 
26 | inline int getTestTid() {
27 |   if (g_tid < 0) {
28 |     g_tid = g_nextTid.fetch_add(1, std::memory_order_relaxed);
29 |   }
30 |   return g_tid;
31 | }
32 | 
33 | } // namespace
34 | 


--------------------------------------------------------------------------------
/dispenso/thread_id.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | /**
 9 |  * @file Utilities for getting a unique thread identifier
10 |  **/
11 | 
12 | #pragma once
13 | 
14 | #include <dispenso/platform.h>
15 | 
16 | namespace dispenso {
17 | 
18 | /**
19 |  * Get the current thread's identifier, unique within the current process.
20 |  *
21 |  * @return An integer representing the current thread.
22 |  *
23 |  * @note Thread IDs are assumed to not be reused over the lifetime of a process, but this should
24 |  * still enable processes running for thousands of years, even with very poor spawn/kill thread
25 |  * patterns.
26 |  *
27 |  * @note If thread ID is needed for cross-process synchronization, one must fall back on
28 |  * system-specific thread IDs.
29 |  **/
30 | DISPENSO_DLL_ACCESS uint64_t threadId();
31 | 
32 | } // namespace dispenso
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature Request
 2 | description: File a feature request
 3 | title: "[Feature Request]: "
 4 | labels: ["feature", "request"]
 5 | assignees:
 6 |   - graphicsMan
 7 | body:
 8 |   - type: markdown
 9 |     attributes:
10 |       value: |
11 |         Thanks for taking the time to fill out this feature request!
12 |   - type: input
13 |     id: contact
14 |     attributes:
15 |       label: Contact Details
16 |       description: How can we get in touch with you if we need more info?
17 |       placeholder: ex. email@example.com
18 |     validations:
19 |       required: false
20 |   - type: textarea
21 |     id: whats-wanted
22 |     attributes:
23 |       label: What is the desired feature?
24 |       description: Give some details
25 |       value: "Details here"
26 |     validations:
27 |       required: true
28 |   - type: checkboxes
29 |     id: terms
30 |     attributes:
31 |       label: Code of Conduct
32 |       description: By submitting this issue, you agree to follow our [Code of Conduct](https://example.com)
33 |       options:
34 |         - label: I agree to follow this project's Code of Conduct
35 |           required: true
36 | 


--------------------------------------------------------------------------------
/tests/concurrent_vector_test_common_types.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <dispenso/concurrent_vector.h>
 9 | 
10 | #include <algorithm>
11 | #include <memory>
12 | #include <vector>
13 | 
14 | #include <dispenso/parallel_for.h>
15 | #include <gtest/gtest.h>
16 | 
17 | template <typename Traits>
18 | class ConcurrentVectorTest : public testing::Test {
19 |  public:
20 | };
21 | 
22 | using dispenso::ConcurrentVectorReallocStrategy;
23 | 
24 | struct TestTraitsA {
25 |   static constexpr bool kPreferBuffersInline = false;
26 |   static constexpr ConcurrentVectorReallocStrategy kReallocStrategy =
27 |       ConcurrentVectorReallocStrategy::kHalfBufferAhead;
28 |   static constexpr bool kIteratorPreferSpeed = false;
29 | };
30 | 
31 | struct TestTraitsB {
32 |   static constexpr bool kPreferBuffersInline = true;
33 |   static constexpr ConcurrentVectorReallocStrategy kReallocStrategy =
34 |       ConcurrentVectorReallocStrategy::kFullBufferAhead;
35 |   static constexpr bool kIteratorPreferSpeed = true;
36 | };
37 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: File a bug report
 3 | title: "[Bug]: "
 4 | labels: ["bug", "triage"]
 5 | assignees:
 6 |   - graphicsMan
 7 | body:
 8 |   - type: markdown
 9 |     attributes:
10 |       value: |
11 |         Thanks for taking the time to fill out this bug report!
12 |   - type: input
13 |     id: contact
14 |     attributes:
15 |       label: Contact Details
16 |       description: How can we get in touch with you if we need more info?
17 |       placeholder: ex. email@example.com
18 |     validations:
19 |       required: false
20 |   - type: textarea
21 |     id: what-happened
22 |     attributes:
23 |       label: What happened?
24 |       description: Also tell us, what did you expect to happen?
25 |       placeholder: Tell us what you see!
26 |       value: "A bug happened!"
27 |     validations:
28 |       required: true
29 |   - type: dropdown
30 |     id: version
31 |     attributes:
32 |       label: Version
33 |       description: What version of our software are you running?
34 |       options:
35 |         - 1.0 (Default)
36 |         - latest (Edge)
37 |     validations:
38 |       required: true
39 |   - type: checkboxes
40 |     id: terms
41 |     attributes:
42 |       label: Code of Conduct
43 |       description: By submitting this issue, you agree to follow our [Code of Conduct](https://example.com)
44 |       options:
45 |         - label: I agree to follow this project's Code of Conduct
46 |           required: true
47 | 


--------------------------------------------------------------------------------
/dispenso/detail/notifier_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | // For fallback path
11 | #include <condition_variable>
12 | #include <mutex>
13 | 
14 | #if defined(__linux__)
15 | #include <errno.h>
16 | #include <linux/futex.h>
17 | #include <sys/syscall.h>
18 | #include <unistd.h>
19 | 
20 | namespace dispenso {
21 | namespace detail {
22 | static int futex(
23 |     int* uaddr,
24 |     int futex_op,
25 |     int val,
26 |     const struct timespec* timeout,
27 |     int* /*uaddr2*/,
28 |     int val3) {
29 |   return static_cast<int>(syscall(SYS_futex, uaddr, futex_op, val, timeout, uaddr, val3));
30 | }
31 | } // namespace detail
32 | } // namespace dispenso
33 | 
34 | #elif defined(__MACH__)
35 | #include <mach/mach.h>
36 | 
37 | #elif defined(_WIN32)
38 | 
39 | #if (defined(_M_ARM64) || defined(_M_ARM)) && !defined(_ARM_)
40 | #define _ARM_
41 | #elif _WIN64
42 | #define _AMD64_
43 | #elif _WIN32
44 | #define _X86_
45 | #else
46 | #error "No valid windows platform"
47 | #endif // platform
48 | 
49 | #include <errhandlingapi.h>
50 | #include <synchapi.h>
51 | 
52 | namespace dispenso {
53 | namespace detail {
54 | 
55 | constexpr int kErrorTimeoutWin = 0x000005B4;
56 | constexpr unsigned long kInfiniteWin = static_cast<unsigned long>(-1);
57 | 
58 | } // namespace detail
59 | } // namespace dispenso
60 | 
61 | #endif // PLATFORM
62 | 


--------------------------------------------------------------------------------
/dispenso/detail/math.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <stdint.h>
11 | 
12 | #if defined(_WIN32)
13 | #include <intrin.h>
14 | #endif //_WIN32
15 | 
16 | namespace dispenso {
17 | 
18 | namespace detail {
19 | 
20 | constexpr uint64_t nextPow2(uint64_t v) {
21 |   // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
22 |   v--;
23 |   v |= v >> 1;
24 |   v |= v >> 2;
25 |   v |= v >> 4;
26 |   v |= v >> 8;
27 |   v |= v >> 16;
28 |   v |= v >> 32;
29 |   v++;
30 |   return v;
31 | }
32 | 
33 | constexpr inline uint32_t log2const(uint64_t v) {
34 |   constexpr uint64_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000, 0xFFFFFFFF00000000UL};
35 |   constexpr uint32_t S[] = {1, 2, 4, 8, 16, 32};
36 | 
37 |   uint32_t r = 0;
38 |   for (uint32_t i = 6; i--;) {
39 |     if (v & b[i]) {
40 |       v >>= S[i];
41 |       r |= S[i];
42 |     }
43 |   }
44 | 
45 |   return r;
46 | }
47 | 
48 | #if (defined(__GNUC__) || defined(__clang__))
49 | inline uint32_t log2(uint64_t v) {
50 |   return static_cast<uint32_t>(63 - __builtin_clzll(v));
51 | }
52 | #elif defined(_WIN32)
53 | inline uint32_t log2(uint64_t v) {
54 |   return static_cast<uint32_t>(63 - __lzcnt64(v));
55 | }
56 | #else
57 | inline uint32_t log2(uint64_t v) {
58 |   return log2const(v);
59 | }
60 | 
61 | #endif // PLATFORM
62 | 
63 | } // namespace detail
64 | } // namespace dispenso
65 | 


--------------------------------------------------------------------------------
/dispenso/detail/once_callable_impl.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <dispenso/detail/math.h>
 9 | #include <dispenso/small_buffer_allocator.h>
10 | 
11 | namespace dispenso {
12 | namespace detail {
13 | 
14 | class OnceCallable {
15 |  public:
16 |   virtual void run() = 0;
17 |   virtual ~OnceCallable() = default;
18 | };
19 | 
20 | template <size_t kBufferSize, typename F>
21 | class OnceCallableImpl : public OnceCallable {
22 |  public:
23 |   template <typename G>
24 |   OnceCallableImpl(G&& f) : f_(std::forward<G>(f)) {}
25 | 
26 |   void run() override {
27 |     f_();
28 |     // This is admittedly playing nasty games here; however, the base class is empty, and we
29 |     // completely control our own polymorphic existence.  No need to make the virtual base class
30 |     // destructor get called (optimization).
31 |     this->OnceCallableImpl::~OnceCallableImpl();
32 |     deallocSmallBuffer<kBufferSize>(this);
33 |   }
34 | 
35 |   ~OnceCallableImpl() override = default;
36 | 
37 |  private:
38 |   F f_;
39 | };
40 | 
41 | template <typename F>
42 | inline OnceCallable* createOnceCallable(F&& f) {
43 |   using FNoRef = typename std::remove_reference<F>::type;
44 | 
45 |   constexpr size_t kImplSize = nextPow2(sizeof(OnceCallableImpl<16, FNoRef>));
46 | 
47 |   return new (allocSmallBuffer<kImplSize>())
48 |       OnceCallableImpl<kImplSize, FNoRef>(std::forward<F>(f));
49 | }
50 | 
51 | } // namespace detail
52 | } // namespace dispenso
53 | 


--------------------------------------------------------------------------------
/tests/async_request_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <thread>
 9 | 
10 | #include <dispenso/async_request.h>
11 | 
12 | #include <gtest/gtest.h>
13 | 
14 | TEST(AsyncRequest, SequentialAsExpected) {
15 |   dispenso::AsyncRequest<int> req;
16 | 
17 |   EXPECT_FALSE(req.updateRequested());
18 |   EXPECT_FALSE(req.tryEmplaceUpdate(5));
19 |   EXPECT_FALSE(req.getUpdate());
20 | 
21 |   req.requestUpdate();
22 | 
23 |   EXPECT_TRUE(req.updateRequested());
24 | 
25 |   EXPECT_FALSE(req.getUpdate());
26 | 
27 |   EXPECT_TRUE(req.tryEmplaceUpdate(0));
28 | 
29 |   auto result = req.getUpdate();
30 |   EXPECT_TRUE(result);
31 |   EXPECT_EQ(0, result.value());
32 | }
33 | 
34 | TEST(AsyncRequest, AsyncAsExpected) {
35 |   dispenso::AsyncRequest<int> req;
36 |   std::atomic<bool> running(true);
37 |   std::thread t([&req, &running]() {
38 |     int next = 0;
39 |     while (running.load(std::memory_order_relaxed)) {
40 |       if (req.updateRequested()) {
41 |         req.tryEmplaceUpdate(next++);
42 |       }
43 |     }
44 |   });
45 | 
46 |   int sum = 0;
47 |   int sumExpected = 0;
48 |   for (int i = 0; i < 5000; ++i) {
49 |     sumExpected += i;
50 | 
51 |     req.requestUpdate();
52 |     while (true) {
53 |       auto result = req.getUpdate();
54 |       if (result.has_value()) {
55 |         sum += result.value();
56 |         break;
57 |       }
58 |     }
59 |   }
60 | 
61 |   running.store(false, std::memory_order_release);
62 |   t.join();
63 | 
64 |   EXPECT_EQ(sum, sumExpected);
65 | }
66 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # PR Details
 2 | 
 3 | <!--- Provide a general summary of your changes in the Title above -->
 4 | 
 5 | ## Description
 6 | 
 7 | <!--- Describe your changes in detail -->
 8 | 
 9 | ## Related Issue
10 | 
11 | <!--- Please link to the issue here: -->
12 | 
13 | ## Motivation and Context
14 | 
15 | <!--- Why is this change required? What problem does it solve? -->
16 | 
17 | ## Test Plan
18 | 
19 | <!--- Please describe in detail how you tested your changes. -->
20 | <!--- Include details of your testing environment, and the tests you ran to -->
21 | <!--- see how your change affects other areas of the code, etc. -->
22 | 
23 | ## Types of changes
24 | 
25 | <!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
26 | 
27 | - [ ] Docs change
28 | - [ ] Refactoring
29 | - [ ] Dependency upgrade
30 | - [ ] Bug fix (non-breaking change which fixes an issue)
31 | - [ ] New feature (non-breaking change which adds functionality)
32 | - [ ] Breaking change (fix or feature that would cause existing functionality to change)
33 | 
34 | ## Checklist
35 | 
36 | <!--- Go over all the following points, and put an `x` in all the boxes that apply. -->
37 | <!--- If you're unsure about any of these, don't hesitate to ask. We're here to help! -->
38 | 
39 | - [ ] My code follows the code style of this project.
40 | - [ ] I have run clang-format.
41 | - [ ] My change requires a change to the documentation.
42 | - [ ] I have updated the documentation accordingly.
43 | - [ ] I have read the **CONTRIBUTING** document.
44 | - [ ] I have added tests to cover my changes.
45 | - [ ] All new and existing tests passed, including in ASAN and TSAN modes (if available on your platform).
46 | 


--------------------------------------------------------------------------------
/dispenso/priority.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <dispenso/platform.h>
11 | 
12 | /**
13 |  * @file priority.h
14 |  *
15 |  * Utilities for getting and setting thread priority.  This is an attempt to unify concepts for
16 |  * thread priority usefully across multiple platforms.  For finer control, use platform specific
17 |  * functionality.
18 |  *
19 |  * @note When using higher-than-normal priority, use caution!  Too many threads running at too high
20 |  * priority can have a strong negative impact on the responsivity of the machine.  Prefer to use
21 |  * realtime priority only for short running tasks that need to be very responsively run.
22 |  **/
23 | 
24 | namespace dispenso {
25 | 
26 | /**
27 |  * A thread priority setting.  Enum values in increasing order of priority.
28 |  **/
29 | enum class ThreadPriority { kLow, kNormal, kHigh, kRealtime };
30 | 
31 | /**
32 |  * Access the current thread priority as set by setCurrentThreadPriority.
33 |  *
34 |  * @return The priority of the current thread
35 |  *
36 |  * @note If the current thread priority has been set via a platform-specific mechanism, this may
37 |  * return an incorrect value.
38 |  **/
39 | DISPENSO_DLL_ACCESS ThreadPriority getCurrentThreadPriority();
40 | 
41 | /**
42 |  * Set the current thread's priority
43 |  *
44 |  * @param prio The priority to set to
45 |  *
46 |  * @return true if the priority was modified, false otherwise.
47 |  **/
48 | DISPENSO_DLL_ACCESS bool setCurrentThreadPriority(ThreadPriority prio);
49 | 
50 | } // namespace dispenso
51 | 


--------------------------------------------------------------------------------
/dispenso/detail/per_thread_info.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #include <dispenso/platform.h>
11 | 
12 | namespace dispenso {
13 | namespace detail {
14 | 
15 | struct alignas(kCacheLineSize) PerThreadInfo {
16 |   void* pool = nullptr;
17 |   void* producer = nullptr;
18 |   int parForRecursionLevel = 0;
19 | };
20 | 
21 | class ParForRecursion {
22 |  public:
23 |   ~ParForRecursion() {
24 |     --parForRecursionLevel_;
25 |   }
26 | 
27 |  private:
28 |   ParForRecursion(int& parForRecursionLevel) : parForRecursionLevel_(parForRecursionLevel) {
29 |     ++parForRecursionLevel_;
30 |   }
31 | 
32 |   int& parForRecursionLevel_;
33 |   friend class PerPoolPerThreadInfo;
34 | };
35 | 
36 | class PerPoolPerThreadInfo {
37 |  public:
38 |   static void registerPool(void* pool, void* producer) {
39 |     auto& i = info();
40 |     i.pool = pool;
41 |     i.producer = producer;
42 |   }
43 | 
44 |   static void* producer(void* pool) {
45 |     auto& i = info();
46 |     return i.pool == pool ? i.producer : nullptr;
47 |   }
48 | 
49 |   static bool isParForRecursive(void* pool) {
50 |     auto& i = info();
51 |     return (!i.pool || i.pool == pool) && i.parForRecursionLevel > 0;
52 |   }
53 | 
54 |   static bool isPoolRecursive(void* pool) {
55 |     return info().pool == pool;
56 |   }
57 | 
58 |   static ParForRecursion parForRecurse() {
59 |     return ParForRecursion(info().parForRecursionLevel);
60 |   }
61 | 
62 |  private:
63 |   DISPENSO_DLL_ACCESS static PerThreadInfo& info();
64 | };
65 | 
66 | } // namespace detail
67 | } // namespace dispenso
68 | 


--------------------------------------------------------------------------------
/tests/thread_id_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <dispenso/thread_id.h>
 9 | 
10 | #include <unordered_set>
11 | #include <vector>
12 | 
13 | #include <gmock/gmock.h>
14 | #include <gtest/gtest.h>
15 | 
16 | TEST(ThreadId, Repeatable) {
17 |   constexpr int kRounds = 100;
18 |   constexpr int kThreadsPerRound = 8;
19 |   for (int round = 0; round < kRounds; ++round) {
20 |     std::vector<std::thread> threads;
21 |     for (int i = 0; i < kThreadsPerRound; ++i) {
22 |       threads.emplace_back([]() {
23 |         constexpr int kTrials = 1000;
24 |         auto id = dispenso::threadId();
25 | 
26 |         for (int i = 0; i < kTrials; ++i) {
27 |           EXPECT_EQ(id, dispenso::threadId());
28 |         }
29 |       });
30 |     }
31 | 
32 |     for (auto& t : threads) {
33 |       t.join();
34 |     }
35 |   }
36 | }
37 | 
38 | TEST(ThreadId, Unique) {
39 |   constexpr int kRounds = 1000;
40 |   constexpr int kThreadsPerRound = 8;
41 | 
42 |   std::vector<uint64_t> ids(kRounds * kThreadsPerRound);
43 |   std::atomic<size_t> slot(0);
44 | 
45 |   for (int round = 0; round < kRounds; ++round) {
46 |     std::vector<std::thread> threads;
47 |     for (int i = 0; i < kThreadsPerRound; ++i) {
48 |       threads.emplace_back([&ids, &slot]() {
49 |         ids[slot.fetch_add(1, std::memory_order_relaxed)] = dispenso::threadId();
50 |       });
51 |     }
52 | 
53 |     for (auto& t : threads) {
54 |       t.join();
55 |     }
56 |   }
57 | 
58 |   std::unordered_set<uint64_t> uniquenessSet;
59 |   for (uint64_t id : ids) {
60 |     EXPECT_TRUE(uniquenessSet.insert(id).second);
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/dispenso/detail/timed_task_impl.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <dispenso/thread_pool.h>
 9 | 
10 | namespace dispenso {
11 | namespace detail {
12 | 
13 | enum FunctionFlags : uint32_t { kFFlagsNone = 0, kFFlagsDetached = 1, kFFlagsCancelled = 2 };
14 | 
15 | struct TimedTaskImpl {
16 |   alignas(kCacheLineSize) std::atomic<size_t> count{0};
17 |   std::atomic<size_t> timesToRun;
18 |   std::atomic<uint32_t> flags{kFFlagsNone};
19 |   std::atomic<uint32_t> inProgress{0};
20 |   double nextAbsTime;
21 |   double period;
22 |   bool steady;
23 |   std::function<void(std::shared_ptr<TimedTaskImpl>)> func;
24 | 
25 |   template <typename F, typename Schedulable>
26 |   TimedTaskImpl(size_t times, double next, double per, F&& f, Schedulable& sched, bool stdy)
27 |       : timesToRun(times), nextAbsTime(next), period(per), steady(stdy) {
28 |     func = [&sched, f = std::move(f), this](std::shared_ptr<TimedTaskImpl> me) {
29 |       if (flags.load(std::memory_order_acquire) & kFFlagsCancelled) {
30 |         return;
31 |       }
32 | 
33 |       inProgress.fetch_add(1, std::memory_order_acq_rel);
34 | 
35 |       auto wrap = [&f, this, me = std::move(me)]() mutable {
36 |         if (!(flags.load(std::memory_order_acquire) & kFFlagsCancelled)) {
37 |           if (!f()) {
38 |             timesToRun.store(0, std::memory_order_release);
39 |             flags.fetch_or(kFFlagsCancelled, std::memory_order_acq_rel);
40 |             func = {};
41 |           }
42 |           count.fetch_add(1, std::memory_order_acq_rel);
43 |         }
44 | 
45 |         inProgress.fetch_sub(1, std::memory_order_release);
46 |         me.reset();
47 |       };
48 |       sched.schedule(wrap, ForceQueuingTag());
49 |     };
50 |   }
51 | };
52 | 
53 | } // namespace detail
54 | } // namespace dispenso
55 | 


--------------------------------------------------------------------------------
/dispenso/third-party/moodycamel/README.txt:
--------------------------------------------------------------------------------
 1 | https://github.com/cameron314/concurrentqueue
 2 | 
 3 | commit 65d6970912fc3f6bb62d80edf95ca30e0df85137 (HEAD -> master, origin/master, origin/HEAD)
 4 | Merge: d49fa2b 08dcafc
 5 | Author: Cameron <cameron@moodycamel.com>
 6 | Date:   Sun Jul 24 10:02:12 2022 -0400
 7 | 
 8 |     Merge pull request #308 from r8bhavneet/master
 9 |     
10 |     Update README.md
11 | 
12 | commit 08dcafcd131b46e1a63abdc9b5f73c852193edca
13 | Author: r8bhavneet <98200254+r8bhavneet@users.noreply.github.com>
14 | Date:   Sun Jul 24 02:35:57 2022 -0700
15 | 
16 |     Update README.md
17 |     
18 |     Hey, I really liked the project and was reading through the Readme.md file when I came across some redundant words and phrases which you might have missed whil
19 | e editing the documentation. It would be really a great opportunity for me if I could contribute to this project. Thank you.
20 | 
21 | commit d49fa2b0bd1c6185d93509f48c8987f9759d7238
22 | Merge: 0a40449 9dc1b2c
23 | Author: Cameron <cameron@moodycamel.com>
24 | Date:   Mon May 9 07:43:29 2022 -0400
25 | 
26 |     Merge pull request #296 from MathiasMagnus/fix-c4554
27 |     
28 |     Proper MSVC warning fix and note
29 | 
30 | commit 9dc1b2cfcad03b4ee22ea57ddb5c453c41c19ac9
31 | Author: Máté Ferenc Nagy-Egri <mate@streamhpc.com>
32 | Date:   Mon May 9 13:19:39 2022 +0200
33 | 
34 |     Proper MSVC warning fix and note
35 | 
36 | commit 0a404492ac2c0bba0f62eb2b859ec152e494f8bf
37 | Author: Cameron <cameron@moodycamel.com>
38 | Date:   Sat May 7 12:04:00 2022 -0400
39 | 
40 |     Attempt to resolve -Wsign-conversion warnings in concurrentqueue.h (see #294)
41 | 
42 | commit 22c78daf65d2c8cce9399a29171676054aa98807
43 | Merge: c52e5ef 263c55d
44 | Author: Cameron <cameron@moodycamel.com>
45 | Date:   Sun Mar 20 15:16:30 2022 -0400
46 | 
47 |     Merge pull request #290 from usurai/master
48 |     
49 |     Fix link in README
50 | 
51 | commit 263c55d5c95545abee1ef25662c752c5296d7c34
52 | Author: usurai <crowdwei@gmail.com>
53 | Date:   Thu Mar 17 16:09:14 2022 +0800
54 | 
55 |     Fix link in README
56 | 


--------------------------------------------------------------------------------
/dispenso/detail/op_result.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | namespace dispenso {
11 | namespace detail {
12 | 
13 | template <typename T>
14 | class OpResult {
15 |  public:
16 |   OpResult() : ptr_(nullptr) {}
17 | 
18 |   template <typename U>
19 |   OpResult(U&& u) : ptr_(new (buf_) T(std::forward<U>(u))) {}
20 | 
21 |   OpResult(const OpResult<T>& oth) : ptr_(oth ? new (buf_) T(*oth.ptr_) : nullptr) {}
22 | 
23 |   OpResult(OpResult<T>&& oth) : ptr_(oth ? new (buf_) T(std::move(*oth.ptr_)) : nullptr) {
24 |     oth.ptr_ = nullptr;
25 |   }
26 | 
27 |   OpResult& operator=(const OpResult& oth) {
28 |     if (&oth == this) {
29 |       return *this;
30 |     }
31 |     if (ptr_) {
32 |       ptr_->~T();
33 |     }
34 | 
35 |     if (oth) {
36 |       ptr_ = new (buf_) T(*oth.ptr_);
37 |     } else {
38 |       ptr_ = nullptr;
39 |     }
40 |     return *this;
41 |   }
42 | 
43 |   OpResult& operator=(OpResult&& oth) {
44 |     if (&oth == this) {
45 |       return *this;
46 |     }
47 |     if (ptr_) {
48 |       ptr_->~T();
49 |     }
50 | 
51 |     if (oth) {
52 |       ptr_ = new (buf_) T(std::move(*oth.ptr_));
53 |       oth.ptr_ = nullptr;
54 |     } else {
55 |       ptr_ = nullptr;
56 |     }
57 | 
58 |     return *this;
59 |   }
60 | 
61 |   ~OpResult() {
62 |     if (ptr_) {
63 |       ptr_->~T();
64 |     }
65 |   }
66 | 
67 |   template <typename... Args>
68 |   T& emplace(Args&&... args) {
69 |     if (ptr_) {
70 |       ptr_->~T();
71 |     }
72 |     ptr_ = new (buf_) T(std::forward<Args>(args)...);
73 |     return *ptr_;
74 |   }
75 | 
76 |   operator bool() const {
77 |     return ptr_;
78 |   }
79 | 
80 |   bool has_value() const {
81 |     return ptr_;
82 |   }
83 | 
84 |   T& value() {
85 |     return *ptr_;
86 |   }
87 | 
88 |  private:
89 |   alignas(T) char buf_[sizeof(T)];
90 |   T* ptr_;
91 | };
92 | 
93 | } // namespace detail
94 | } // namespace dispenso
95 | 


--------------------------------------------------------------------------------
/dispenso/tsan_annotations.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <dispenso/tsan_annotations.h>
 9 | 
10 | #if DISPENSO_HAS_TSAN
11 | 
12 | #ifdef __GNUC__
13 | #define ATTRIBUTE_WEAK __attribute__((weak))
14 | #else
15 | #define ATTRIBUTE_WEAK
16 | #endif
17 | 
18 | // These are found in the accompanying libtsan, but there is no header exposing them.  We want to
19 | // also avoid exposing them in a header to to discourage folks from calling them directly.
20 | extern "C" {
21 | void AnnotateIgnoreReadsBegin(const char* f, int l) ATTRIBUTE_WEAK;
22 | 
23 | void AnnotateIgnoreReadsEnd(const char* f, int l) ATTRIBUTE_WEAK;
24 | 
25 | void AnnotateIgnoreWritesBegin(const char* f, int l) ATTRIBUTE_WEAK;
26 | 
27 | void AnnotateIgnoreWritesEnd(const char* f, int l) ATTRIBUTE_WEAK;
28 | 
29 | void AnnotateNewMemory(const char* f, int l, const volatile void* address, long size)
30 |     ATTRIBUTE_WEAK;
31 | 
32 | void AnnotateHappensBefore(const char* f, int l, const volatile void* address) ATTRIBUTE_WEAK;
33 | void AnnotateHappensAfter(const char* f, int l, const volatile void* address) ATTRIBUTE_WEAK;
34 | }
35 | 
36 | namespace dispenso {
37 | namespace detail {
38 | 
39 | void annotateIgnoreWritesBegin(const char* f, int l) {
40 |   AnnotateIgnoreWritesBegin(f, l);
41 | }
42 | void annotateIgnoreWritesEnd(const char* f, int l) {
43 |   AnnotateIgnoreWritesEnd(f, l);
44 | }
45 | void annotateIgnoreReadsBegin(const char* f, int l) {
46 |   AnnotateIgnoreReadsBegin(f, l);
47 | }
48 | void annotateIgnoreReadsEnd(const char* f, int l) {
49 |   AnnotateIgnoreReadsEnd(f, l);
50 | }
51 | 
52 | void annotateNewMemory(const char* f, int l, const volatile void* address, long size) {
53 |   AnnotateNewMemory(f, l, address, size);
54 | }
55 | 
56 | void annotateHappensBefore(const char* f, int l, const volatile void* address) {
57 |   AnnotateHappensBefore(f, l, address);
58 | }
59 | 
60 | void annotateHappensAfter(const char* f, int l, const volatile void* address) {
61 |   AnnotateHappensAfter(f, l, address);
62 | }
63 | 
64 | } // namespace detail
65 | } // namespace dispenso
66 | 
67 | #endif // TSAN
68 | 


--------------------------------------------------------------------------------
/dispenso/latch.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | /**
 9 |  * @file latch.h
10 |  * A file providing a Latch barrier type, which gives a way for threads to wait until all expected
11 |  * threads have reached this point.  This is intended to match API and behavior of C++20 std::latch.
12 |  **/
13 | 
14 | #pragma once
15 | 
16 | #include <dispenso/platform.h>
17 | 
18 | #include <dispenso/detail/completion_event_impl.h>
19 | 
20 | namespace dispenso {
21 | 
22 | /**
23 |  * A class which can be used for barrier scenarios.  See e.g.
24 |  * https://en.cppreference.com/w/cpp/thread/latch
25 |  **/
26 | class Latch {
27 |  public:
28 |   /**
29 |    * Construct a latch with expected number of threads to wait on.
30 |    *
31 |    * @param threadGroupCount The number of threads in the group.
32 |    **/
33 |   explicit Latch(uint32_t threadGroupCount) noexcept : impl_(threadGroupCount) {}
34 | 
35 |   /**
36 |    * Decrement the counter in a non-blocking manner.
37 |    **/
38 |   void count_down(uint32_t n = 1) noexcept {
39 |     if (impl_.intrusiveStatus().fetch_sub(n, std::memory_order_acq_rel) == 1) {
40 |       impl_.notify(0);
41 |     }
42 |   }
43 | 
44 |   /**
45 |    * See if the count has been reduced to zero, indicating all necessary threads
46 |    * have synchronized.
47 |    *
48 |    * @note try_wait is a misnomer, as the function never blocks.  We kept the name to match C++20
49 |    * API.
50 |    * @return true only if the internal counter has reached zero.
51 |    **/
52 |   bool try_wait() const noexcept {
53 |     return impl_.intrusiveStatus().load(std::memory_order_acquire) == 0;
54 |   }
55 | 
56 |   /**
57 |    * Wait for all threads to have synchronized.
58 |    **/
59 |   void wait() const noexcept {
60 |     impl_.wait(0);
61 |   }
62 | 
63 |   /**
64 |    * Decrement the counter and wait
65 |    **/
66 |   void arrive_and_wait() noexcept {
67 |     if (impl_.intrusiveStatus().fetch_sub(1, std::memory_order_acq_rel) > 1) {
68 |       impl_.wait(0);
69 |     } else {
70 |       impl_.notify(0);
71 |     }
72 |   }
73 | 
74 |  private:
75 |   detail::CompletionEventImpl impl_;
76 | };
77 | 
78 | } // namespace dispenso
79 | 


--------------------------------------------------------------------------------
/tests/resource_pool_test.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <gtest/gtest.h>
 9 | 
10 | #include <dispenso/resource_pool.h>
11 | #include <dispenso/thread_pool.h>
12 | 
13 | namespace {
14 | // In real use cases, the buffer may perform some expensive initialization such as allocate a large
15 | // chunk of memory.
16 | struct Buffer {
17 |   Buffer(std::atomic_int& _total_count, std::atomic_int& _num_buffers)
18 |       : total_count(_total_count), num_buffers(_num_buffers), count(0) {}
19 |   // On destruction, add the count
20 |   ~Buffer() {
21 |     total_count += count;
22 |     num_buffers += 1;
23 |   }
24 |   std::atomic_int& total_count;
25 |   std::atomic_int& num_buffers;
26 |   int count;
27 | };
28 | 
29 | void BuffersTest(const int num_threads, const int num_buffers) {
30 |   constexpr int kNumTasks = 100000;
31 |   std::atomic_int total_count(0);
32 |   std::atomic_int num_buffers_created(0);
33 |   {
34 |     dispenso::ResourcePool<Buffer> buffer_pool(num_buffers, [&total_count, &num_buffers_created]() {
35 |       return Buffer(total_count, num_buffers_created);
36 |     });
37 |     dispenso::ThreadPool thread_pool(num_threads);
38 |     for (int i = 0; i < kNumTasks; ++i) {
39 |       thread_pool.schedule([&]() {
40 |         auto buffer_resource = buffer_pool.acquire();
41 |         ++buffer_resource.get().count;
42 |       });
43 |     }
44 |   }
45 | 
46 |   // The sum of all the buffers counts should be equal to the number of tasks.
47 |   EXPECT_EQ(total_count, kNumTasks);
48 |   EXPECT_EQ(num_buffers_created, num_buffers);
49 | }
50 | 
51 | } // namespace
52 | 
53 | TEST(ResourcePool, SameNumBuffersAsThreadsTest) {
54 |   constexpr int kNumBuffers = 2;
55 |   constexpr int kNumThreads = 2;
56 |   BuffersTest(kNumBuffers, kNumThreads);
57 | }
58 | 
59 | TEST(ResourcePool, FewerBuffersThanThreadsTest) {
60 |   constexpr int kNumBuffers = 1;
61 |   constexpr int kNumThreads = 2;
62 |   BuffersTest(kNumBuffers, kNumThreads);
63 | }
64 | 
65 | TEST(ResourcePool, MoreBuffersThanThreadsTest) {
66 |   constexpr int kNumBuffers = 2;
67 |   constexpr int kNumThreads = 1;
68 |   BuffersTest(kNumBuffers, kNumThreads);
69 | }
70 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | 
 7 | cmake_minimum_required(VERSION 3.12)
 8 | project(
 9 |   Dispenso
10 |   VERSION 1.4.0
11 |   DESCRIPTION "Dispenso is a library for working with sets of parallel tasks"
12 |   LANGUAGES CXX)
13 | 
14 | if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
15 |   set(DISPENSO_STANDALONE TRUE)
16 | else()
17 |   set(DISPENSO_STANDALONE FALSE)
18 | endif()
19 | 
20 | if (DISPENSO_STANDALONE)
21 |   include(GNUInstallDirs)
22 | endif()
23 | 
24 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules)
25 | 
26 | # Main project setup
27 | if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
28 |   set(CMAKE_CXX_EXTENSIONS OFF)
29 |   set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
30 | 
31 |   option(DISPENSO_SHARED_LIB "Build Dispenso shared library" ON)
32 | 
33 | 
34 |   # Windows-specific
35 |   if(WIN32)
36 |     set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS 1)
37 |   endif()
38 | endif()
39 | 
40 | option(ADDRESS_SANITIZER "Use Address Sanitizer, incompatible with THREAD_SANITIZER" OFF)
41 | option(THREAD_SANITIZER "Use Thread Sanitizer, incompatible with ADDRESS_SANITIZER" OFF)
42 | 
43 | if (ADDRESS_SANITIZER)
44 |   add_compile_options(-fsanitize=address -fsanitize=undefined)
45 |   add_link_options(-fsanitize=address -fsanitize=undefined)
46 | elseif (THREAD_SANITIZER)
47 |   add_compile_options(-fsanitize=thread)
48 |   add_link_options(-fsanitize=thread)
49 | endif()
50 | 
51 | set(CMAKE_CXX_STANDARD 14 CACHE STRING "the C++ standard to use for this project")
52 | 
53 | ###########################################################
54 | # Targets
55 | add_subdirectory(dispenso)
56 | 
57 | set(DISPENSO_BUILD_TESTS OFF CACHE BOOL "Should tests be built?")
58 | set(DISPENSO_BUILD_BENCHMARKS OFF CACHE BOOL "Should benchmarks be built?")
59 | 
60 | if(DISPENSO_BUILD_TESTS)
61 |   enable_testing()
62 |   add_subdirectory(tests)
63 | endif()
64 | 
65 | if(DISPENSO_BUILD_BENCHMARKS)
66 |   # Sadly any given release of folly seems to have some problem or another.  Leave disabled by default.
67 |   set(BENCHMARK_WITHOUT_FOLLY ON CACHE BOOL "Should folly benchmarks be disabled?")
68 |   add_subdirectory(benchmarks)
69 | endif()
70 | 


--------------------------------------------------------------------------------
/benchmarks/small_buffer_benchmark.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <dispenso/small_buffer_allocator.h>
 9 | 
10 | #include "benchmark_common.h"
11 | 
12 | constexpr size_t kSmallSize = 32;
13 | constexpr size_t kMediumSize = 128;
14 | constexpr size_t kLargeSize = 256;
15 | 
16 | template <typename Alloc, typename Free>
17 | void run(benchmark::State& state, Alloc alloc, Free dealloc) {
18 |   std::vector<char*> ptrs(state.range(0));
19 |   for (auto UNUSED_VAR : state) {
20 |     for (char*& p : ptrs) {
21 |       p = alloc();
22 |     }
23 |     for (char* p : ptrs) {
24 |       dealloc(p);
25 |     }
26 |   }
27 | }
28 | 
29 | template <size_t kSize>
30 | void BM_newdelete(benchmark::State& state) {
31 |   run(state, []() { return new char[kSize]; }, [](char* buf) { delete[] (buf); });
32 | }
33 | 
34 | template <size_t kSize>
35 | void BM_small_buffer_allocator(benchmark::State& state) {
36 |   run(
37 |       state,
38 |       []() { return dispenso::allocSmallBuffer<kSize>(); },
39 |       [](char* buf) { dispenso::deallocSmallBuffer<kSize>(buf); });
40 | }
41 | 
42 | BENCHMARK_TEMPLATE(BM_newdelete, kSmallSize)->Range(1 << 13, 1 << 15);
43 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kSmallSize)->Range(1 << 13, 1 << 15);
44 | 
45 | BENCHMARK_TEMPLATE(BM_newdelete, kMediumSize)->Range(1 << 13, 1 << 15);
46 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kMediumSize)->Range(1 << 13, 1 << 15);
47 | 
48 | BENCHMARK_TEMPLATE(BM_newdelete, kLargeSize)->Range(1 << 13, 1 << 15);
49 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kLargeSize)->Range(1 << 13, 1 << 15);
50 | 
51 | BENCHMARK_TEMPLATE(BM_newdelete, kSmallSize)->Threads(16)->Range(1 << 13, 1 << 15);
52 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kSmallSize)->Threads(16)->Range(1 << 13, 1 << 15);
53 | 
54 | BENCHMARK_TEMPLATE(BM_newdelete, kMediumSize)->Threads(16)->Range(1 << 13, 1 << 15);
55 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kMediumSize)->Threads(16)->Range(1 << 13, 1 << 15);
56 | 
57 | BENCHMARK_TEMPLATE(BM_newdelete, kLargeSize)->Threads(16)->Range(1 << 13, 1 << 15);
58 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kLargeSize)->Threads(16)->Range(1 << 13, 1 << 15);
59 | 
60 | BENCHMARK_MAIN();
61 | 


--------------------------------------------------------------------------------
/benchmarks/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | cmake_minimum_required(VERSION 3.12)
 7 | 
 8 | 
 9 | include(FetchContent)
10 | 
11 | if(CMAKE_CXX_STANDARD LESS 17)
12 |   message(STATUS, "Using older taskflow due to c++14 support")
13 | FetchContent_Declare(
14 |   taskflow
15 |   GIT_REPOSITORY https://github.com/taskflow/taskflow.git
16 |   GIT_TAG        v2.7.0
17 |   CONFIGURE_COMMAND ""
18 |   BUILD_COMMAND ""
19 |   )
20 | else()
21 |   message(STATUS, "Using up-to-date taskflow")
22 | FetchContent_Declare(
23 |   taskflow
24 |   GIT_REPOSITORY https://github.com/taskflow/taskflow.git
25 |   GIT_TAG        v3.6.0
26 |   CONFIGURE_COMMAND ""
27 |   BUILD_COMMAND ""
28 |   )
29 | endif()
30 | FetchContent_GetProperties(taskflow)
31 | if(NOT taskflow_POPULATED)
32 |   FetchContent_Populate(taskflow)
33 | endif()
34 | 
35 | FetchContent_MakeAvailable(taskflow)
36 | 
37 | add_library(taskflow INTERFACE)
38 | target_include_directories(taskflow INTERFACE ${taskflow_SOURCE_DIR})
39 | 
40 | find_package(benchmark REQUIRED)
41 | if (NOT WIN32)
42 |   find_package(OpenMP)
43 | endif (NOT WIN32)
44 | find_package(TBB)
45 | find_package(folly)
46 | 
47 | if (WIN32)
48 |   set (REQUIRED_LIBS dispenso benchmark::benchmark benchmark::benchmark_main taskflow)
49 | else (WIN32)
50 |   set (REQUIRED_LIBS dispenso benchmark::benchmark benchmark::benchmark_main pthread taskflow)
51 | endif (WIN32)
52 | 
53 | if (TBB_FOUND)
54 |   set (OPTIONAL_LIBS ${OPTIONAL_LIBS} tbb)
55 | else (TBB_FOUND)
56 |   add_compile_definitions(BENCHMARK_WITHOUT_TBB)
57 | endif (TBB_FOUND)
58 | 
59 | if (OpenMP_CXX_FOUND)
60 |   set (OPTIONAL_LIBS ${OPTIONAL_LIBS} OpenMP::OpenMP_CXX)
61 | endif (OpenMP_CXX_FOUND)
62 | 
63 | if (FOLLY_LIBRARIES AND NOT ${BENCHMARK_WITHOUT_FOLLY})
64 |   find_package(gflags)
65 |   set (OPTIONAL_LIBS ${OPTIONAL_LIBS} ${FOLLY_LIBRARIES})
66 | else (FOLLY_LIBRARIES AND NOT ${BENCHMARK_WITHOUT_FOLLY})
67 |   add_compile_definitions(BENCHMARK_WITHOUT_FOLLY)
68 | endif (FOLLY_LIBRARIES AND NOT ${BENCHMARK_WITHOUT_FOLLY})
69 | 
70 | file(GLOB BENCHMARK_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/benchmarks/*.cpp)
71 | 
72 | foreach(BENCHMARK_FILE ${BENCHMARK_FILES})
73 |   set(BENCHMARK_NAME)
74 |   get_filename_component(BENCHMARK_NAME ${BENCHMARK_FILE} NAME_WE)
75 |   add_executable(${BENCHMARK_NAME} ${BENCHMARK_FILE})
76 |   target_link_libraries(${BENCHMARK_NAME} ${REQUIRED_LIBS} ${OPTIONAL_LIBS})
77 | endforeach()
78 | 
79 | 


--------------------------------------------------------------------------------
/benchmarks/thread_benchmark_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | 
10 | #ifdef _POSIX_C_SOURCE
11 | #include <sys/resource.h>
12 | #endif // _POSIX_C_SOURCE
13 | 
14 | #include <cmath>
15 | #include <iostream>
16 | #include <thread>
17 | 
18 | #include "benchmark_common.h"
19 | 
20 | inline std::vector<int> pow2HalfStepThreads() {
21 |   const int kRunningThreads = std::thread::hardware_concurrency();
22 |   std::vector<int> result;
23 |   result.push_back(1);
24 |   for (int block = 2; block <= kRunningThreads; block *= 2) {
25 |     int step = block / 2;
26 | 
27 |     for (int i = block; i < 2 * block && i <= kRunningThreads; i += step) {
28 |       result.push_back(i);
29 |     }
30 |   }
31 |   return result;
32 | }
33 | 
34 | #if defined(_POSIX_C_SOURCE) || defined(__MACH__)
35 | struct rusage g_rusage;
36 | 
37 | inline void startRusage() {
38 |   std::atomic_thread_fence(std::memory_order_acquire);
39 |   getrusage(RUSAGE_SELF, &g_rusage);
40 |   std::atomic_thread_fence(std::memory_order_release);
41 | }
42 | 
43 | inline double duration(struct timeval start, struct timeval end) {
44 |   return (end.tv_sec + 1e-6 * end.tv_usec) - (start.tv_sec + 1e-6 * start.tv_usec);
45 | }
46 | 
47 | inline void endRusage(benchmark::State& state) {
48 |   std::atomic_thread_fence(std::memory_order_acquire);
49 |   struct rusage res;
50 |   getrusage(RUSAGE_SELF, &res);
51 |   std::atomic_thread_fence(std::memory_order_release);
52 | 
53 |   double userTime = duration(g_rusage.ru_utime, res.ru_utime);
54 |   double sysTime = duration(g_rusage.ru_stime, res.ru_stime);
55 | 
56 |   state.counters["\t0 User"] = userTime;
57 |   state.counters["\t1 System"] = sysTime;
58 | }
59 | #else
60 | inline void startRusage() {}
61 | inline void endRusage(benchmark::State& state) {}
62 | #endif //_POSIX_C_SOURCE
63 | 
64 | inline double getMean(const std::vector<double>& data) {
65 |   double sum = 0.0;
66 |   for (auto d : data) {
67 |     sum += d;
68 |   }
69 |   return sum / data.size();
70 | }
71 | 
72 | inline double getStddev(double mean, const std::vector<double>& data) {
73 |   double sumsq = 0.0;
74 |   for (auto d : data) {
75 |     auto dev = mean - d;
76 |     sumsq += dev * dev;
77 |   }
78 |   return std::sqrt(sumsq / data.size());
79 | }
80 | 
81 | void doStats(const std::vector<double>& times, benchmark::State& state) {
82 |   double mean = getMean(times);
83 |   state.counters["mean"] = mean;
84 |   state.counters["stddev"] = getStddev(mean, times);
85 | }
86 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to dispenso
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.  There is a design ethos behind the library, so it is recommended to reach out via a GitHub 
 4 | issue on the project to discuss non-trivial changes you may wish to make.  These changes include, for 
 5 | example, wanting to change existing API, wanting to furnish a new utility, or wanting to change 
 6 | underlying behavior substantially.  Let's avoid situations where you put in a lot of hard work, only 
 7 | to have to change it substantially or get your pull request rejected.
 8 | 
 9 | ## Our Development Process
10 | This library has another home inside Facebook repos.  From there it is subjected to regular continuous integration testing on many platforms, and used by many projects.
11 | 
12 | ## Pull Requests
13 | We actively welcome your pull requests.
14 | 
15 | 1. Fork the repo and create your branch from `master`.
16 | 2. If you've added code that should be tested, add tests.
17 | 3. If you've changed APIs, update the documentation.
18 | 4. Ensure the test suite passes.
19 | 5. Utilize clang-format.
20 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
21 | 
22 | ## Contributor License Agreement ("CLA")
23 | In order to accept your pull request, we need you to submit a CLA. You only need
24 | to do this once to work on any of Facebook's open source projects.
25 | 
26 | Complete your CLA here: <https://code.facebook.com/cla>
27 | 
28 | ## Issues
29 | We use GitHub issues to track public bugs. Please ensure your description is
30 | clear and has sufficient instructions to be able to reproduce the issue.
31 | 
32 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
33 | disclosure of security bugs. In those cases, please go through the process
34 | outlined on that page and do not file a public issue.
35 | 
36 | ## Coding Style  
37 | * 2 spaces for indentation rather than tabs
38 | * 100 character line length
39 | * Member variables have trailing underscore_
40 | * BigCamelCase for classes and structs, and smallCamelCase for functions and variables (exception is if you are trying to match a substantial part of a standard library interface).
41 | * [1TBS braces](https://en.wikipedia.org/wiki/Indentation_style#Variant:_1TBS_(OTBS))
42 | * Most of all, try to be consistent with the surrounding code.  We have automated tools that will
43 |   enforce clang-format style for some files (e.g. the C++ core) once we import your pull request
44 |   into our internal code reviewing tools.
45 | 
46 | ## License
47 | By contributing to dispenso, you agree that your contributions will be licensed
48 | under the LICENSE.md file in the root directory of this source tree.
49 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | cmake_minimum_required(VERSION 3.14)
 7 | 
 8 | ### TODO(bbudge): Set up testing with github actions
 9 | # Ideally these tests are all run in (Release, Debug) X (N/A, TSAN, ASAN, -fno-exceptions)
10 | ###
11 | 
12 | include(FetchContent)
13 | FetchContent_Declare(
14 |   GoogleTest
15 |   GIT_REPOSITORY https://github.com/google/googletest.git
16 |   GIT_TAG        release-1.12.1
17 | )
18 | 
19 | # For Windows, Prevent overriding the parent project's compiler/linker settings
20 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
21 | set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
22 | FetchContent_MakeAvailable(GoogleTest)
23 | include(GoogleTest)
24 | 
25 | macro(package_add_test TEST_NAME LABEL TEST_FILE)
26 |     add_executable(${TEST_NAME} ${TEST_FILE})
27 |     target_compile_features(${TEST_NAME} PRIVATE cxx_std_14)
28 |     target_compile_options(${TEST_NAME} PRIVATE
29 |       $<$<CXX_COMPILER_ID:MSVC>:/W3 /WX>
30 |       $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -pedantic -Wconversion -Wno-sign-conversion -Werror>
31 |       )
32 |     target_link_libraries(${TEST_NAME} gmock_main dispenso)
33 |     gtest_discover_tests(${TEST_NAME}
34 |       WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
35 |       PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
36 |       LABELS "${LABEL}")
37 |     set_target_properties(${TEST_NAME} PROPERTIES FOLDER tests)
38 | endmacro()
39 | 
40 | file(GLOB TEST_FILES CONFIGURE_DEPENDS "*test.cpp")
41 | 
42 | #TODO(elliotsegal, bbudge): Help add the shared_pool_test for CMake
43 | LIST(REMOVE_ITEM TEST_FILES
44 |   ${CMAKE_CURRENT_SOURCE_DIR}/shared_pool_test.cpp)
45 | 
46 | # Filter out these tests specifically because they are inherently flaky because they rely on OS behaviors that are not
47 | # guaranteed, and only really useful for manual runs when making changes to the related functionality.  Note that
48 | # possibly an even better test for both priority and timed_task behavior is to use the timed_task_benchmark.
49 | LIST(REMOVE_ITEM TEST_FILES
50 |   ${CMAKE_CURRENT_SOURCE_DIR}/priority_test.cpp
51 |   ${CMAKE_CURRENT_SOURCE_DIR}/timed_task_test.cpp)
52 | 
53 | foreach(TEST_FILE ${TEST_FILES})
54 |   set(TEST_NAME)
55 |   get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE)
56 |   package_add_test(${TEST_NAME} unittest ${TEST_FILE})
57 | endforeach()
58 | 
59 | SET(FLAKY_TEST_FILES ${CMAKE_CURRENT_SOURCE_DIR}/priority_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/timed_task_test.cpp)
60 | 
61 | foreach(TEST_FILE ${FLAKY_TEST_FILES})
62 |   set(TEST_NAME)
63 |   get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE)
64 |   package_add_test(${TEST_NAME} flaky ${TEST_FILE})
65 | endforeach()
66 | 


--------------------------------------------------------------------------------
/dispenso/completion_event.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | /**
 9 |  * @file completion_event.h
10 |  * A file providing a CompletionEvent type, which gives a way to signal to waiting threads that some
11 |  * event has been completed.
12 |  **/
13 | 
14 | #pragma once
15 | 
16 | #include <dispenso/platform.h>
17 | 
18 | #include <dispenso/detail/completion_event_impl.h>
19 | 
20 | namespace dispenso {
21 | 
22 | /**
23 |  * A class which can be used for one-time notify/wait scenarios.  It is basically a way to signal to
24 |  * any waiting threads that some event has completed.  There must be a single publisher thread
25 |  * and zero or more waiters on arbitrary threads.  <code>reset</code> may be called to restart a
26 |  * sequence (e.g. after <code>notify</code> occurs and all waiters have successfully exited
27 |  * <code>wait*</code>).
28 |  **/
29 | class CompletionEvent {
30 |  public:
31 |   /**
32 |    * Notify any waiting threads that the event has completed.  It is safe for this to be called
33 |    * before threads call <code>wait</code>.
34 |    **/
35 |   void notify() {
36 |     impl_.notify(1);
37 |   }
38 | 
39 |   /**
40 |    * Wait for another thread to <code>notify</code>
41 |    **/
42 |   void wait() const {
43 |     impl_.wait(1);
44 |   }
45 | 
46 |   /**
47 |    * Peek to see if the event has been notified in any thread
48 |    **/
49 |   bool completed() const {
50 |     return impl_.intrusiveStatus().load(std::memory_order_acquire);
51 |   }
52 | 
53 |   /**
54 |    * Wait for another thread to <code>notify</code> or for the relative timeout to expire, whichever
55 |    * is first.
56 |    *
57 |    * @return true if status is "completed", false if timed out.
58 |    **/
59 |   template <class Rep, class Period>
60 |   bool waitFor(const std::chrono::duration<Rep, Period>& relTime) const {
61 |     return impl_.waitFor(1, relTime);
62 |   }
63 | 
64 |   /**
65 |    * Wait for another thread to <code>notify</code> or for the absolute timeout to expire, whichever
66 |    * is first.
67 |    *
68 |    * @return true if status is "completed", false if timed out.
69 |    **/
70 |   template <class Clock, class Duration>
71 |   bool waitUntil(const std::chrono::time_point<Clock, Duration>& absTime) const {
72 |     return impl_.waitUntil(1, absTime);
73 |   }
74 | 
75 |   /**
76 |    * Resets the event to "not-completed".  This should not be called while an active
77 |    * <code>wait*\/notify</code> sequence is still currently in play.
78 |    **/
79 |   void reset() {
80 |     impl_.intrusiveStatus().store(0, std::memory_order_seq_cst);
81 |   }
82 | 
83 |  private:
84 |   detail::CompletionEventImpl impl_{0};
85 | };
86 | 
87 | } // namespace dispenso
88 | 


--------------------------------------------------------------------------------
/dispenso/detail/graph_executor_impl.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | #include <dispenso/graph.h>
10 | #include <dispenso/task_set.h>
11 | #include <unordered_set>
12 | 
13 | namespace detail {
14 | 
15 | class ExecutorBase {
16 |  protected:
17 |   inline static bool hasNoIncompletePredecessors(const dispenso::Node& node) {
18 |     return node.numIncompletePredecessors_.load(std::memory_order_relaxed) == 0;
19 |   }
20 | 
21 |   inline static void addIncompletePredecessor(const dispenso::Node& node) {
22 |     if (node.isCompleted()) {
23 |       node.numIncompletePredecessors_.store(1, std::memory_order_relaxed);
24 |     } else {
25 |       node.numIncompletePredecessors_.fetch_add(1, std::memory_order_relaxed);
26 |     }
27 |   }
28 | 
29 |   inline static void ifIncompleteAddIncompletePredecessor(const dispenso::Node& node) {
30 |     if (!node.isCompleted()) {
31 |       node.numIncompletePredecessors_.fetch_add(1, std::memory_order_relaxed);
32 |     }
33 |   }
34 | 
35 |   inline static bool decNumIncompletePredecessors(
36 |       const dispenso::Node& node,
37 |       std::memory_order order) {
38 |     return node.numIncompletePredecessors_.fetch_sub(1, order) == 1;
39 |   }
40 | 
41 |   inline static bool decNumIncompletePredecessors(
42 |       const dispenso::BiPropNode& node,
43 |       std::memory_order order) {
44 |     const std::memory_order loadOrder =
45 |         order == std::memory_order_relaxed ? std::memory_order_relaxed : std::memory_order_acquire;
46 |     if (node.numIncompletePredecessors_.load(loadOrder) == dispenso::Node::kCompleted) {
47 |       return false;
48 |     }
49 | 
50 |     return node.numIncompletePredecessors_.fetch_sub(1, order) == 1;
51 |   }
52 | 
53 |   template <class N>
54 |   inline static void evaluateNodeConcurrently(dispenso::ConcurrentTaskSet& tasks, const N* node) {
55 |     node->run();
56 |     for (const dispenso::Node* const d : node->dependents_) {
57 |       if (decNumIncompletePredecessors(static_cast<const N&>(*d), std::memory_order_acq_rel)) {
58 |         tasks.schedule(
59 |             [&tasks, d]() { evaluateNodeConcurrently(tasks, static_cast<const N*>(d)); });
60 |       }
61 |     }
62 |   }
63 | 
64 |   static void appendGroup(
65 |       const dispenso::Node* /* node */,
66 |       std::unordered_set<const std::vector<const dispenso::BiPropNode*>*>& /* groups */) {}
67 | 
68 |   static void appendGroup(
69 |       const dispenso::BiPropNode* node,
70 |       std::unordered_set<const std::vector<const dispenso::BiPropNode*>*>& groups) {
71 |     const std::vector<const dispenso::BiPropNode*>* group = node->biPropSet_.get();
72 |     if (group != nullptr) {
73 |       groups.insert(group);
74 |     }
75 |   }
76 | };
77 | 
78 | } // namespace detail
79 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | AccessModifierOffset: -1
 3 | AlignAfterOpenBracket: AlwaysBreak
 4 | AlignConsecutiveAssignments: false
 5 | AlignConsecutiveDeclarations: false
 6 | AlignEscapedNewlinesLeft: true
 7 | AlignOperands:   false
 8 | AlignTrailingComments: false
 9 | AllowAllParametersOfDeclarationOnNextLine: false
10 | AllowShortBlocksOnASingleLine: false
11 | AllowShortCaseLabelsOnASingleLine: false
12 | AllowShortFunctionsOnASingleLine: Empty
13 | AllowShortIfStatementsOnASingleLine: false
14 | AllowShortLoopsOnASingleLine: false
15 | AlwaysBreakAfterReturnType: None
16 | AlwaysBreakBeforeMultilineStrings: true
17 | AlwaysBreakTemplateDeclarations: true
18 | BinPackArguments: false
19 | BinPackParameters: false
20 | BraceWrapping:
21 |   AfterClass:      false
22 |   AfterControlStatement: false
23 |   AfterEnum:       false
24 |   AfterFunction:   false
25 |   AfterNamespace:  false
26 |   AfterObjCDeclaration: false
27 |   AfterStruct:     false
28 |   AfterUnion:      false
29 |   BeforeCatch:     false
30 |   BeforeElse:      false
31 |   IndentBraces:    false
32 | BreakBeforeBinaryOperators: None
33 | BreakBeforeBraces: Attach
34 | BreakBeforeTernaryOperators: true
35 | BreakConstructorInitializersBeforeComma: false
36 | BreakAfterJavaFieldAnnotations: false
37 | BreakStringLiterals: false
38 | ColumnLimit:     100
39 | CommentPragmas:  '^ IWYU pragma:'
40 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
41 | ConstructorInitializerIndentWidth: 4
42 | ContinuationIndentWidth: 4
43 | Cpp11BracedListStyle: true
44 | DerivePointerAlignment: false
45 | DisableFormat:   false
46 | ForEachMacros:   [ FOR_EACH_RANGE, FOR_EACH, ]
47 | IncludeCategories:
48 |   - Regex:           '^<.*\.h(pp)?>'
49 |     Priority:        1
50 |   - Regex:           '^<.*'
51 |     Priority:        2
52 |   - Regex:           '.*'
53 |     Priority:        3
54 | IndentCaseLabels: true
55 | IndentWidth:     2
56 | IndentWrappedFunctionNames: false
57 | KeepEmptyLinesAtTheStartOfBlocks: false
58 | MacroBlockBegin: ''
59 | MacroBlockEnd:   ''
60 | MaxEmptyLinesToKeep: 1
61 | NamespaceIndentation: None
62 | ObjCBlockIndentWidth: 2
63 | ObjCSpaceAfterProperty: false
64 | ObjCSpaceBeforeProtocolList: false
65 | PenaltyBreakBeforeFirstCallParameter: 1
66 | PenaltyBreakComment: 300
67 | PenaltyBreakFirstLessLess: 120
68 | PenaltyBreakString: 1000
69 | PenaltyExcessCharacter: 1000000
70 | PenaltyReturnTypeOnItsOwnLine: 200
71 | PointerAlignment: Left
72 | RawStringFormats:
73 |   - Language:        TextProto
74 |     Delimiters:
75 |       - pb
76 | ReflowComments:  true
77 | SortIncludes:    true
78 | SpaceAfterCStyleCast: false
79 | SpaceBeforeAssignmentOperators: true
80 | SpaceBeforeParens: ControlStatements
81 | SpaceInEmptyParentheses: false
82 | SpacesBeforeTrailingComments: 1
83 | SpacesInAngles:  false
84 | SpacesInContainerLiterals: true
85 | SpacesInCStyleCastParentheses: false
86 | SpacesInParentheses: false
87 | SpacesInSquareBrackets: false
88 | Standard:        Cpp11
89 | TabWidth:        4
90 | UseTab:          Never
91 | ...
92 | 


--------------------------------------------------------------------------------
/dispenso/graph_executor.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #pragma once
 9 | #include <dispenso/concurrent_vector.h>
10 | #include <dispenso/detail/graph_executor_impl.h>
11 | #include <dispenso/graph.h>
12 | #include <dispenso/parallel_for.h>
13 | #include <dispenso/platform.h>
14 | 
15 | namespace dispenso {
16 | /**
17 |  * Class to invoke <code>Graph</code> or <code>BiPropGraph</code> on current thread.
18 |  **/
19 | class SingleThreadExecutor : public ::detail::ExecutorBase {
20 |  public:
21 |   /**
22 |    * Invoke the graph. This is not concurrency safe.
23 |    *
24 |    * @param graph graph to invoke
25 |    **/
26 |   template <typename G>
27 |   void operator()(const G& graph);
28 | 
29 |  private:
30 |   std::vector<const Node*> nodesToExecute_;
31 |   std::vector<const Node*> nodesToExecuteNext_;
32 | };
33 | /**
34 |  * Class to invoke <code>Graph</code> or <code>BiPropGraph</code> using
35 |  * <code>dispenso::parallel_for</code> for every layer of the graph.
36 |  **/
37 | class ParallelForExecutor : public ::detail::ExecutorBase {
38 |  public:
39 |   /**
40 |    * Invoke the graph. This is not concurrency safe.
41 |    *
42 |    * @param taskSet taksSet to use with <code>parallel_for</code>.
43 |    * @param graph graph to invoke
44 |    **/
45 |   template <typename TaskSetT, typename G>
46 |   void operator()(TaskSetT& taskSet, const G& graph);
47 | 
48 |  private:
49 |   dispenso::ConcurrentVector<const Node*> nodesToExecute_;
50 |   dispenso::ConcurrentVector<const Node*> nodesToExecuteNext_;
51 | };
52 | /**
53 |  * Class to invoke <code>Graph</code> or <code>BiPropGraph</code> using
54 |  * <code>dispenso::ConcurrentTaskSet</code>
55 |  **/
56 | class ConcurrentTaskSetExecutor : public ::detail::ExecutorBase {
57 |  public:
58 |   /**
59 |    * Invoke the graph. This is not concurrency safe.
60 |    *
61 |    * @param tasks <code>ConcurrentTaskSet</code> to schedule tasks.
62 |    * @param graph graph to invoke
63 |    * @param wait if true run <code>tasks.wait()</code> at the end of the function
64 |    **/
65 |   template <typename G>
66 |   void operator()(dispenso::ConcurrentTaskSet& tasks, const G& graph, bool wait = true);
67 | 
68 |  private:
69 |   std::vector<const Node*> startNodes_;
70 | };
71 | 
72 | /**
73 |  * Class to propagate incomplete state recursively from nodes to dependents
74 |  **/
75 | class ForwardPropagator : public ::detail::ExecutorBase {
76 |  public:
77 |   /**
78 |    * Propagate incomplete state recursively from nodes to dependents
79 |    * This is not concurrency safe.
80 |    **/
81 |   template <class G>
82 |   void operator()(const G& graph);
83 | 
84 |  private:
85 |   template <class N>
86 |   void propagateIncompleteStateBidirectionally();
87 | 
88 |   std::vector<const Node*> nodesToVisit_;
89 |   std::vector<const Node*> nodesToVisitNext_;
90 |   std::unordered_set<const Node*> visited_;
91 |   std::unordered_set<const std::vector<const BiPropNode*>*> groups_;
92 | };
93 | } // namespace dispenso
94 | 


--------------------------------------------------------------------------------
/dispenso/pool_allocator.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | /**
 9 |  * @file pool_allocator.h
10 |  * A pool allocator to help reduce calls to the underlying allocation and deallocation functions
11 |  * that can be provided custom backing allocation and deallocation functions, e.g. cudaMalloc,
12 |  * cudaFree.
13 |  **/
14 | 
15 | #pragma once
16 | 
17 | #include <atomic>
18 | #include <functional>
19 | #include <vector>
20 | 
21 | #include <dispenso/platform.h>
22 | 
23 | namespace dispenso {
24 | 
25 | /**
26 |  * A pool allocator to help reduce calls to the underlying allocation and deallocation functions.
27 |  **/
28 | template <bool kThreadSafe>
29 | class PoolAllocatorT {
30 |  public:
31 |   /**
32 |    * Construct a PoolAllocator.
33 |    *
34 |    * @param chunkSize The chunk size for each pool allocation
35 |    * @param allocSize The size of underlying slabs to be chunked
36 |    * @param allocFunc The underlying allocation function for allocating slabs
37 |    * @param deallocFunc The underlying deallocation function.  Currently only called on destruction.
38 |    **/
39 |   DISPENSO_DLL_ACCESS PoolAllocatorT(
40 |       size_t chunkSize,
41 |       size_t allocSize,
42 |       std::function<void*(size_t)> allocFunc,
43 |       std::function<void(void*)> deallocFunc);
44 | 
45 |   /**
46 |    * Allocate a chunk from a slab
47 |    *
48 |    * @return The pointer to a buffer of chunkSize bytes
49 |    **/
50 |   DISPENSO_DLL_ACCESS char* alloc();
51 | 
52 |   /**
53 |    * Deallocate a previously allocated chunk
54 |    *
55 |    * @param ptr The chunk to return to the available pool
56 |    **/
57 |   DISPENSO_DLL_ACCESS void dealloc(char* ptr);
58 | 
59 |   /**
60 |    * Effectively dealloc all previously allocated chunks.  Useful for arenas.
61 |    * This function is not thread safe, and no previously allocated chunks may be dealloc'd after
62 |    * clear.
63 |    **/
64 |   DISPENSO_DLL_ACCESS void clear();
65 | 
66 |   /**
67 |    * Get the total capicity allocated in chunks (how many alloc() could be called without triggering
68 |    * allocFunc() if all chunks were available)
69 |    **/
70 |   size_t totalChunkCapacity() const {
71 |     return (backingAllocs2_.size() + backingAllocs_.size()) * chunksPerAlloc_;
72 |   }
73 |   /**
74 |    * Destruct a PoolAllocator
75 |    **/
76 |   DISPENSO_DLL_ACCESS ~PoolAllocatorT();
77 | 
78 |  private:
79 |   const size_t chunkSize_;
80 |   const size_t allocSize_;
81 |   const size_t chunksPerAlloc_;
82 | 
83 |   std::function<void*(size_t)> allocFunc_;
84 |   std::function<void(void*)> deallocFunc_;
85 | 
86 |   // Use of a spin lock was found to be faster than std::mutex in benchmarks.
87 |   alignas(kCacheLineSize) std::atomic<uint32_t> backingAllocLock_{0};
88 |   std::vector<char*> backingAllocs_;
89 |   std::vector<char*> backingAllocs2_;
90 | 
91 |   std::vector<char*> chunks_;
92 | };
93 | 
94 | using PoolAllocator = PoolAllocatorT<true>;
95 | using NoLockPoolAllocator = PoolAllocatorT<false>;
96 | 
97 | } // namespace dispenso
98 | 


--------------------------------------------------------------------------------
/dispenso/utils/graph_dot.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | #include <dispenso/graph.h>
 9 | #include <fstream>
10 | #include <string>
11 | #include <unordered_map>
12 | 
13 | namespace detail {
14 | inline std::string getName(
15 |     const void* ptr,
16 |     const size_t index,
17 |     const std::unordered_map<uintptr_t, std::string>* nodeNames) {
18 |   const uintptr_t key = reinterpret_cast<uintptr_t>(ptr);
19 |   if (nodeNames) {
20 |     auto it = nodeNames->find(key);
21 |     if (it != nodeNames->end()) {
22 |       return it->second;
23 |     }
24 |   }
25 |   return std::to_string(index);
26 | }
27 | } // namespace detail
28 | 
29 | namespace dispenso {
30 | template <typename G>
31 | void graphsToDot(
32 |     const char* filename,
33 |     const G& graph,
34 |     const std::unordered_map<uintptr_t, std::string>* nodeNames) {
35 |   using SubgraphType = typename G::SubgraphType;
36 |   using NodeType = typename G::NodeType;
37 |   std::ofstream datfile(filename);
38 |   datfile << R"dot(digraph {
39 |   rankdir = LR
40 |   node [shape = rectangle, style = filled, colorscheme=pastel19]
41 |   graph [style = filled, color = Gray95]
42 | 
43 |   subgraph cluster_l { label = "Legend"; style=solid; color=black
44 |     empty1 [style = invis, shape=point]
45 |     empty2 [style = invis, shape=point]
46 |     incomplete [color = 1]
47 |     completed [color = 2]
48 |     incomplete -> empty1 [label = "normal"]
49 |     completed -> empty2 [arrowhead = onormal,label = "bidirectional\lpropagation"]
50 |   }
51 | )dot";
52 | 
53 |   const size_t numSubgraphs = graph.numSubgraphs();
54 |   for (size_t i = 0; i < numSubgraphs; ++i) {
55 |     const SubgraphType& s = graph.subgraph(i);
56 |     if (i != 0) {
57 |       datfile << "  " << "subgraph cluster_" << i << " { label = \""
58 |               << ::detail::getName(&s, i, nodeNames) << "\"\n";
59 |     }
60 |     const size_t numNodes = s.numNodes();
61 |     for (size_t j = 0; j < numNodes; ++j) {
62 |       const NodeType& node = s.node(j);
63 |       datfile << "    " << reinterpret_cast<uintptr_t>(&node)
64 |               << " [color = " << (node.isCompleted() ? 2 : 1);
65 |       datfile << " label = \"" << ::detail::getName(&node, j, nodeNames) << "\"]\n";
66 |     }
67 | 
68 |     if (i != 0) {
69 |       datfile << "  }\n";
70 |     }
71 |   }
72 | 
73 |   graph.forEachNode([&](const NodeType& node) {
74 |     node.forEachDependent([&](const dispenso::Node& d) {
75 |       datfile << "    " << reinterpret_cast<uintptr_t>(&node) << " -> "
76 |               << reinterpret_cast<uintptr_t>(&d);
77 | 
78 |       if (std::is_same<dispenso::BiPropNode, NodeType>::value) {
79 |         const auto& node1 = static_cast<const dispenso::BiPropNode&>(node);
80 |         const auto& node2 = static_cast<const dispenso::BiPropNode&>(d);
81 |         datfile << (node1.isSameSet(node2) ? "[arrowhead=onormal]" : "");
82 |       }
83 |       datfile << '\n';
84 |     });
85 |   });
86 | 
87 |   datfile << "}";
88 |   datfile.close();
89 | }
90 | } // namespace dispenso
91 | 


--------------------------------------------------------------------------------
/dispenso/schedulable.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | /**
 9 |  * @file schedulable.h
10 |  * Classes providing simple schedulables that match scheduling interfaces of *TaskSet and ThreadPool
11 |  *
12 |  **/
13 | 
14 | #pragma once
15 | 
16 | #include <dispenso/detail/completion_event_impl.h>
17 | #include <dispenso/task_set.h>
18 | 
19 | namespace dispenso {
20 | 
21 | /**
22 |  * A class fullfilling the Schedulable concept that immediately invokes the functor.  This can be
23 |  * used in place of <code>ThreadPool</code> or <code>TaskSet</code> with <code>Future</code>s at
24 |  * construction or through <code>then</code>, or it may be used in TimedTask scheduling for
25 |  * short-running tasks.
26 |  **/
27 | class ImmediateInvoker {
28 |  public:
29 |   /**
30 |    * Schedule a functor to be executed.  It will be invoked immediately.
31 |    *
32 |    * @param f The functor to be executed.  <code>f</code>'s signature must match void().  Best
33 |    * performance will come from passing lambdas, other concrete functors, or OnceFunction, but
34 |    * std::function or similarly type-erased objects will also work.
35 |    **/
36 |   template <typename F>
37 |   void schedule(F&& f) const {
38 |     f();
39 |   }
40 | 
41 |   /**
42 |    * Schedule a functor to be executed.  It is a bit oxymoronical to call this function, since
43 |    * ForceQueuingTag will have no effect, and it's use is discouraged.
44 |    *
45 |    **/
46 |   template <typename F>
47 |   void schedule(F&& f, ForceQueuingTag) const {
48 |     f();
49 |   }
50 | };
51 | 
52 | constexpr ImmediateInvoker kImmediateInvoker;
53 | 
54 | /**
55 |  * A class fullfilling the Schedulable concept that always invokes on a new thread.  This can be
56 |  * used in place of <code>ThreadPool</code> or <code>TaskSet</code> with <code>Future</code>s at
57 |  * construction or through <code>then</code>.
58 |  **/
59 | class NewThreadInvoker {
60 |  public:
61 |   /**
62 |    * Schedule a functor to be executed on a new thread.
63 |    *
64 |    * @param f The functor to be executed.  <code>f</code>'s signature must match void().  Best
65 |    * performance will come from passing lambdas, other concrete functors, or OnceFunction, but
66 |    * std::function or similarly type-erased objects will also work.
67 |    **/
68 |   template <typename F>
69 |   void schedule(F&& f) const {
70 |     schedule(std::forward<F>(f), ForceQueuingTag());
71 |   }
72 |   /**
73 |    * Schedule a functor to be executed on a new thread.
74 |    *
75 |    * @param f The functor to be executed.  <code>f</code>'s signature must match void().  Best
76 |    * performance will come from passing lambdas, other concrete functors, or OnceFunction, but
77 |    * std::function or similarly type-erased objects will also work.
78 |    **/
79 |   template <typename F>
80 |   void schedule(F&& f, ForceQueuingTag) const {
81 |     std::thread thread([f = std::move(f)]() { f(); });
82 |     thread.detach();
83 |   }
84 | 
85 |  private:
86 | };
87 | 
88 | constexpr NewThreadInvoker kNewThreadInvoker;
89 | 
90 | } // namespace dispenso
91 | 


--------------------------------------------------------------------------------
/tests/rw_lock_test.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/rw_lock.h>
  9 | 
 10 | #include <chrono>
 11 | #include <mutex>
 12 | #include <shared_mutex>
 13 | #include <thread>
 14 | 
 15 | #include <gtest/gtest.h>
 16 | 
 17 | using namespace std::chrono_literals;
 18 | 
 19 | TEST(RWLock, SimpleUncontested) {
 20 |   dispenso::RWLock mtx;
 21 |   int foo = 0;
 22 |   std::unique_lock<dispenso::RWLock> lk(mtx);
 23 |   foo = 1;
 24 | 
 25 |   lk.unlock();
 26 | 
 27 |   EXPECT_EQ(foo, 1);
 28 | }
 29 | 
 30 | TEST(RWLock, BasicWriterTest) {
 31 |   int count = 0;
 32 |   dispenso::RWLock mtx;
 33 |   constexpr int kPerThreadTotal = 100000;
 34 | 
 35 |   auto toRun = [&]() {
 36 |     for (int i = 0; i < kPerThreadTotal; ++i) {
 37 |       std::unique_lock<dispenso::RWLock> lk(mtx);
 38 |       ++count;
 39 |     }
 40 |   };
 41 | 
 42 |   std::thread thread0(toRun);
 43 |   std::thread thread1(toRun);
 44 | 
 45 |   thread0.join();
 46 |   thread1.join();
 47 | 
 48 |   EXPECT_EQ(count, 2 * kPerThreadTotal);
 49 | }
 50 | 
 51 | TEST(RWLock, HighContentionReaderWriterTest) {
 52 |   int count = 0;
 53 |   dispenso::RWLock mtx;
 54 |   constexpr int kPerThreadTotal = 100000;
 55 | 
 56 |   auto toRunWriter = [&]() {
 57 |     for (int i = 0; i < kPerThreadTotal; ++i) {
 58 |       std::unique_lock<dispenso::RWLock> lk(mtx);
 59 |       ++count;
 60 |     }
 61 |   };
 62 | 
 63 |   int64_t someVal = 0;
 64 | 
 65 |   auto toRunReader = [&]() {
 66 |     for (int i = 0; i < kPerThreadTotal; ++i) {
 67 |       std::shared_lock<dispenso::RWLock> lk(mtx);
 68 |       someVal += count;
 69 |     }
 70 |   };
 71 | 
 72 |   std::thread thread0(toRunWriter);
 73 |   std::thread thread1(toRunReader);
 74 | 
 75 |   thread0.join();
 76 |   thread1.join();
 77 | 
 78 |   EXPECT_EQ(count, kPerThreadTotal);
 79 |   EXPECT_GE(someVal, 0);
 80 | }
 81 | 
 82 | TEST(RWLock, ReaderWriterTest) {
 83 |   int guardedCount = 0;
 84 |   dispenso::RWLock mtx;
 85 |   constexpr int kWriterTotal = 100;
 86 |   constexpr int kReaderTotal = 100000;
 87 | 
 88 |   auto toRunWriter = [&]() {
 89 |     for (int i = 0; i < kWriterTotal; ++i) {
 90 |       std::unique_lock<dispenso::RWLock> lk(mtx);
 91 |       ++guardedCount;
 92 |       lk.unlock();
 93 |       // Just hang out for a while til we write again.
 94 |       std::this_thread::sleep_for(1ms);
 95 |     }
 96 |   };
 97 | 
 98 |   int64_t sum = 0;
 99 | 
100 |   auto toRunReader = [&]() {
101 |     for (int i = 0; i < kReaderTotal; ++i) {
102 |       std::shared_lock<dispenso::RWLock> lk(mtx);
103 |       sum += guardedCount;
104 |     }
105 |   };
106 | 
107 |   std::thread thread0(toRunWriter);
108 |   std::thread thread1(toRunReader);
109 | 
110 |   thread0.join();
111 |   thread1.join();
112 | 
113 |   EXPECT_EQ(guardedCount, kWriterTotal);
114 |   EXPECT_GE(sum, 0);
115 | }
116 | 
117 | TEST(RWLock, TestAlignment) {
118 |   static_assert(
119 |       alignof(dispenso::RWLock) >= dispenso::kCacheLineSize,
120 |       "Somehow RWLock not aligned to avoid false sharing");
121 |   static_assert(
122 |       alignof(dispenso::UnalignedRWLock) < dispenso::kCacheLineSize,
123 |       "UnalignedRWLock is overaligned");
124 | }
125 | 


--------------------------------------------------------------------------------
/dispenso/tsan_annotations.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
 3 |  *
 4 |  * This source code is licensed under the MIT license found in the
 5 |  * LICENSE file in the root directory of this source tree.
 6 |  */
 7 | 
 8 | /**
 9 |  * @file tsan_annotations.h
10 |  * This file exposes a set of macros for ignoring tsan errors.  These should generally not
11 |  * be used just to shut up TSAN, because most of the time, TSAN reports real bugs.  They should be
12 |  * used only when there is a high level of certainty that TSAN is spitting out a false positive, as
13 |  * can occasionally happen with lock-free algorithms.
14 |  *
15 |  * When these are required, it is best to keep the scope as small as possible to avoid blinding TSAN
16 |  * to real bugs. Note that several libraries already expose macros like these, but we want to
17 |  * keep dependencies to a bare minimum.
18 |  **/
19 | 
20 | #pragma once
21 | 
22 | #include <dispenso/platform.h>
23 | 
24 | #if defined(__SANITIZE_THREAD__)
25 | #define DISPENSO_HAS_TSAN 1
26 | #elif defined(__has_feature)
27 | #if __has_feature(thread_sanitizer)
28 | #define DISPENSO_HAS_TSAN 1
29 | #else
30 | #define DISPENSO_HAS_TSAN 0
31 | #endif // TSAN
32 | #else
33 | #define DISPENSO_HAS_TSAN 0
34 | #endif // feature
35 | 
36 | #if DISPENSO_HAS_TSAN
37 | 
38 | namespace dispenso {
39 | namespace detail {
40 | 
41 | DISPENSO_DLL_ACCESS void annotateIgnoreWritesBegin(const char* f, int l);
42 | DISPENSO_DLL_ACCESS void annotateIgnoreWritesEnd(const char* f, int l);
43 | DISPENSO_DLL_ACCESS void annotateIgnoreReadsBegin(const char* f, int l);
44 | DISPENSO_DLL_ACCESS void annotateIgnoreReadsEnd(const char* f, int l);
45 | DISPENSO_DLL_ACCESS void
46 | annotateNewMemory(const char* f, int l, const volatile void* address, long size);
47 | DISPENSO_DLL_ACCESS void annotateHappensBefore(const char* f, int l, const volatile void* address);
48 | DISPENSO_DLL_ACCESS void annotateHappensAfter(const char* f, int l, const volatile void* address);
49 | 
50 | } // namespace detail
51 | } // namespace dispenso
52 | 
53 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN() \
54 |   ::dispenso::detail::annotateIgnoreWritesBegin(__FILE__, __LINE__)
55 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END() \
56 |   ::dispenso::detail::annotateIgnoreWritesEnd(__FILE__, __LINE__)
57 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_BEGIN() \
58 |   ::dispenso::detail::annotateIgnoreReadsBegin(__FILE__, __LINE__)
59 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_END() \
60 |   ::dispenso::detail::annotateIgnoreReadsEnd(__FILE__, __LINE__)
61 | #define DISPENSO_TSAN_ANNOTATE_NEW_MEMORY(address, size) \
62 |   ::dispenso::detail::annotateNewMemory(__FILE__, __LINE__, address, size)
63 | #define DISPENSO_TSAN_ANNOTATE_HAPPENS_BEFORE(address) \
64 |   ::dispenso::detail::annotateHappensBefore(__FILE__, __LINE__, address)
65 | #define DISPENSO_TSAN_ANNOTATE_HAPPENS_AFTER(address) \
66 |   ::dispenso::detail::annotateHappensAfter(__FILE__, __LINE__, address)
67 | 
68 | #else
69 | 
70 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN()
71 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END()
72 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_BEGIN()
73 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_END()
74 | #define DISPENSO_TSAN_ANNOTATE_NEW_MEMORY(address, size)
75 | #define DISPENSO_TSAN_ANNOTATE_HAPPENS_BEFORE(address)
76 | #define DISPENSO_TSAN_ANNOTATE_HAPPENS_AFTER(address)
77 | 
78 | #endif // DISPENSO_HAS_TSAN
79 | 


--------------------------------------------------------------------------------
/dispenso/once_function.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | /**
  9 |  * @file once_function.h
 10 |  * A file providing OnceFunction, a class providing void() signature for closure to be called only
 11 |  * once.  It is built to be cheap to create and move.
 12 |  **/
 13 | 
 14 | #pragma once
 15 | 
 16 | #include <utility>
 17 | 
 18 | #include <dispenso/detail/once_callable_impl.h>
 19 | 
 20 | namespace dispenso {
 21 | namespace detail {
 22 | template <typename Result>
 23 | class FutureBase;
 24 | template <typename Result>
 25 | class FutureImplBase;
 26 | } // namespace detail
 27 | 
 28 | /**
 29 |  * A class fullfilling the void() signature, and operator() must be called exactly once for valid
 30 |  * <code>OnceFunction</code>s.  This class can be much more efficient than std::function for type
 31 |  * erasing functors without too much state (currently < ~250 bytes).
 32 |  * @note The wrapped type-erased functor in OnceFunction is *not* deleted upon destruction, but
 33 |  * rather when operator() is called.  It is the user's responsibility to ensure that operator() is
 34 |  * called.
 35 |  *
 36 |  **/
 37 | class OnceFunction {
 38 |  public:
 39 |   /**
 40 |    * Construct a <code>OnceFunction</code> with invalid state.
 41 |    **/
 42 |   OnceFunction()
 43 | #if defined DISPENSO_DEBUG
 44 |       : onceCallable_(nullptr)
 45 | #endif // DISPENSO_DEBUG
 46 |   {
 47 |   }
 48 | 
 49 |   /**
 50 |    * Construct a <code>OnceFunction</code> with a valid functor.
 51 |    *
 52 |    * @param f A functor with signature void().  Ideally this should be a concrete functor (e.g. from
 53 |    * lambda), though it will work with e.g. std::function.  The downside in the latter case is extra
 54 |    * overhead for double type erasure.
 55 |    **/
 56 |   template <typename F>
 57 |   OnceFunction(F&& f) : onceCallable_(detail::createOnceCallable(std::forward<F>(f))) {}
 58 | 
 59 |   OnceFunction(const OnceFunction& other) = delete;
 60 | 
 61 |   OnceFunction(OnceFunction&& other) : onceCallable_(other.onceCallable_) {
 62 | #if defined DISPENSO_DEBUG
 63 |     other.onceCallable_ = nullptr;
 64 | #endif // DISPENSO_DEBUG
 65 |   }
 66 | 
 67 |   OnceFunction& operator=(OnceFunction&& other) {
 68 |     onceCallable_ = other.onceCallable_;
 69 | #if defined DISPENSO_DEBUG
 70 |     if (&other != this) {
 71 |       other.onceCallable_ = nullptr;
 72 |     }
 73 | #endif // DISPENSO_DEBUG
 74 |     return *this;
 75 |   }
 76 | 
 77 |   /**
 78 |    * Invoke the type-erased functor.  This function must be called exactly once.  Fewer will result
 79 |    * in a leak, while more will invoke on an invalid object.
 80 |    **/
 81 |   void operator()() const {
 82 | #if defined DISPENSO_DEBUG
 83 |     assert(onceCallable_ != nullptr && "Must not use OnceFunction more than once!");
 84 | #endif // DISPENSO_DEBUG
 85 | 
 86 |     onceCallable_->run();
 87 | 
 88 | #if defined DISPENSO_DEBUG
 89 |     onceCallable_ = nullptr;
 90 | #endif // DISPENSO_DEBUG
 91 |   }
 92 | 
 93 |  private:
 94 |   OnceFunction(detail::OnceCallable* func, bool) : onceCallable_(func) {}
 95 | 
 96 |   mutable detail::OnceCallable* onceCallable_;
 97 | 
 98 |   template <typename Result>
 99 |   friend class detail::FutureBase;
100 |   template <typename Result>
101 |   friend class detail::FutureImplBase;
102 | };
103 | 
104 | } // namespace dispenso
105 | 


--------------------------------------------------------------------------------
/tests/latch_test.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <deque>
  9 | #include <thread>
 10 | 
 11 | #include <dispenso/latch.h>
 12 | 
 13 | #include <gtest/gtest.h>
 14 | 
 15 | using namespace std::chrono_literals;
 16 | 
 17 | TEST(Latch, ArriveAndWait) {
 18 |   size_t publishData = 0;
 19 |   dispenso::Latch latch(3);
 20 | 
 21 |   std::deque<std::thread> threads;
 22 | 
 23 |   for (size_t i = 0; i < 2; ++i) {
 24 |     threads.emplace_back([&latch, &publishData]() {
 25 |       latch.arrive_and_wait();
 26 |       EXPECT_EQ(publishData, 3);
 27 |     });
 28 |   }
 29 | 
 30 |   // Give plenty of time for hijinx if there were any bug.
 31 |   std::this_thread::sleep_for(10ms);
 32 | 
 33 |   publishData = 3;
 34 | 
 35 |   // Wait cannot succeed until we also throw our hat in the ring, since we have 3 threads in the
 36 |   // group, but only two threads waiting to check for a new value of publishData.  We do this
 37 |   // after setting the value of publishData, from only one thread (main thread).  After
 38 |   // arrive_and_wait, wait succeeds, and waiting threads are woken, and they should see the correct
 39 |   // value of publishData.
 40 |   latch.arrive_and_wait();
 41 | 
 42 |   for (auto& t : threads) {
 43 |     t.join();
 44 |   }
 45 | }
 46 | 
 47 | TEST(Latch, CountDown) {
 48 |   size_t publishData = 0;
 49 |   dispenso::Latch latch(3);
 50 | 
 51 |   std::deque<std::thread> threads;
 52 | 
 53 |   for (size_t i = 0; i < 2; ++i) {
 54 |     threads.emplace_back([&latch, &publishData]() {
 55 |       latch.count_down();
 56 | 
 57 |       if (latch.try_wait()) {
 58 |         EXPECT_EQ(publishData, 3);
 59 |       } else {
 60 |         latch.wait();
 61 |         EXPECT_EQ(publishData, 3);
 62 |       }
 63 |     });
 64 |   }
 65 | 
 66 |   publishData = 3;
 67 | 
 68 |   // Wait cannot succeed until we also throw our hat in the ring, since we have 3 threads in the
 69 |   // group, but only two threads waiting to check for a new value of publishData.  We do this
 70 |   // after setting the value of publishData, from only one thread (main thread).  After count_down,
 71 |   // wait succeeds, and waiting threads are woken, and they should see the correct value of
 72 |   // publishData.
 73 |   latch.count_down();
 74 | 
 75 |   // Wait isn't required here.
 76 | 
 77 |   for (auto& t : threads) {
 78 |     t.join();
 79 |   }
 80 | }
 81 | 
 82 | TEST(Latch, ArriveAndWaitWithCountDown) {
 83 |   size_t publishData = 0;
 84 |   dispenso::Latch latch(3);
 85 | 
 86 |   std::deque<std::thread> threads;
 87 | 
 88 |   for (size_t i = 0; i < 2; ++i) {
 89 |     threads.emplace_back([&latch, &publishData]() {
 90 |       latch.arrive_and_wait();
 91 |       EXPECT_EQ(publishData, 3);
 92 |     });
 93 |   }
 94 | 
 95 |   publishData = 3;
 96 | 
 97 |   // Wait cannot succeed until we also throw our hat in the ring, since we have 3 threads in the
 98 |   // group, but only two threads waiting to check for a new value of publishData.  We do this
 99 |   // after setting the value of publishData, from only one thread (main thread).  After count_down,
100 |   // wait succeeds, and waiting threads are woken, and they should see the correct value of
101 |   // publishData.
102 |   latch.count_down();
103 | 
104 |   // Wait isn't required here.
105 | 
106 |   for (auto& t : threads) {
107 |     t.join();
108 |   }
109 | }
110 | 


--------------------------------------------------------------------------------
/dispenso/third-party/moodycamel/LICENSE.md:
--------------------------------------------------------------------------------
 1 | This license file applies to everything in this repository except that which
 2 | is explicitly annotated as being written by other authors, i.e. the Boost
 3 | queue (included in the benchmarks for comparison), Intel's TBB library (ditto),
 4 | dlib::pipe (ditto),
 5 | the CDSChecker tool (used for verification), the Relacy model checker (ditto),
 6 | and Jeff Preshing's semaphore implementation (used in the blocking queue) which
 7 | has a zlib license (embedded in lightweightsempahore.h).
 8 | 
 9 | ---
10 | 
11 | Simplified BSD License:
12 | 
13 | Copyright (c) 2013-2016, Cameron Desrochers.
14 | All rights reserved.
15 | 
16 | Redistribution and use in source and binary forms, with or without modification,
17 | are permitted provided that the following conditions are met:
18 | 
19 | - Redistributions of source code must retain the above copyright notice, this list of
20 | conditions and the following disclaimer.
21 | - Redistributions in binary form must reproduce the above copyright notice, this list of
22 | conditions and the following disclaimer in the documentation and/or other materials
23 | provided with the distribution.
24 | 
25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
26 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
27 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
28 | THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
30 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
32 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 | 
35 | ---
36 | 
37 | I have also chosen to dual-license under the Boost Software License as an alternative to
38 | the Simplified BSD license above:
39 | 
40 | Boost Software License - Version 1.0 - August 17th, 2003
41 | 
42 | Permission is hereby granted, free of charge, to any person or organization
43 | obtaining a copy of the software and accompanying documentation covered by
44 | this license (the "Software") to use, reproduce, display, distribute,
45 | execute, and transmit the Software, and to prepare derivative works of the
46 | Software, and to permit third-parties to whom the Software is furnished to
47 | do so, all subject to the following:
48 | 
49 | The copyright notices in the Software and this entire statement, including
50 | the above license grant, this restriction and the following disclaimer,
51 | must be included in all copies of the Software, in whole or in part, and
52 | all derivative works of the Software, unless such copies or derivative
53 | works are solely in the form of machine-executable object code generated by
54 | a source language processor.
55 | 
56 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
57 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
58 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
59 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
60 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
61 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
62 | DEALINGS IN THE SOFTWARE.
63 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at <opensource-conduct@fb.com>. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/benchmarks/rw_lock_benchmark.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | // This benchmark relies on shared_lock from C++17
  9 | #if __cplusplus >= 201703L
 10 | 
 11 | #include <dispenso/rw_lock.h>
 12 | 
 13 | #include <map>
 14 | 
 15 | #include <shared_mutex>
 16 | 
 17 | #include <dispenso/task_set.h>
 18 | 
 19 | #include "thread_benchmark_common.h"
 20 | 
 21 | constexpr size_t kNumValues = 1 << 20;
 22 | 
 23 | // Precondition: Start < writePeriod.  Note that this is enforced in BM_serial and BM_parallel
 24 | template <typename MtxType>
 25 | int64_t iterate(MtxType& mtx, std::vector<int64_t>& values, int start, int writePeriod) {
 26 |   int64_t total = 0;
 27 |   int w = start;
 28 |   for (auto& p : values) {
 29 |     if (w++ == writePeriod) {
 30 |       std::lock_guard<MtxType> lk(mtx);
 31 |       ++p;
 32 |       w = 0;
 33 |     } else {
 34 |       std::shared_lock<MtxType> lk(mtx);
 35 |       total += p;
 36 |     }
 37 |   }
 38 |   return total;
 39 | }
 40 | 
 41 | struct NopMutex {
 42 |   void lock() {}
 43 |   void unlock() {}
 44 |   void lock_shared() {}
 45 |   void unlock_shared() {}
 46 | };
 47 | 
 48 | template <typename MutexT>
 49 | void BM_serial(benchmark::State& state) {
 50 |   int writePeriod = state.range(0);
 51 |   std::vector<int64_t> values(kNumValues);
 52 |   int64_t total = 0;
 53 |   MutexT mtx;
 54 |   int start = 0;
 55 |   for (auto UNUSED_VAR : state) {
 56 |     total += iterate(mtx, values, start++, writePeriod);
 57 |     if (start == writePeriod) {
 58 |       start = 0;
 59 |     }
 60 |   }
 61 | 
 62 |   benchmark::DoNotOptimize(total);
 63 | }
 64 | 
 65 | static void CustomArgumentsSerial(benchmark::internal::Benchmark* b) {
 66 |   for (int j : {2, 8, 32, 128, 512}) {
 67 |     b->Args({j});
 68 |   }
 69 | }
 70 | 
 71 | template <typename MutexT>
 72 | void BM_parallel(benchmark::State& state) {
 73 |   int concurrency = state.range(0);
 74 |   int writePeriod = state.range(1);
 75 |   std::vector<int64_t> values(kNumValues);
 76 |   std::atomic<int64_t> total(0);
 77 |   MutexT mtx;
 78 |   int start = 0;
 79 | 
 80 |   dispenso::TaskSet tasks(dispenso::globalThreadPool());
 81 |   for (auto UNUSED_VAR : state) {
 82 |     for (int c = 0; c < concurrency; ++c) {
 83 |       tasks.schedule([&total, start, &mtx, &values, writePeriod]() {
 84 |         total.fetch_add(iterate(mtx, values, start, writePeriod), std::memory_order_acq_rel);
 85 |       });
 86 |       if (++start == writePeriod) {
 87 |         start = 0;
 88 |       }
 89 |     }
 90 |     tasks.wait();
 91 |   }
 92 | 
 93 |   benchmark::DoNotOptimize(total.load(std::memory_order_acquire));
 94 | }
 95 | 
 96 | static void CustomArgumentsParallel(benchmark::internal::Benchmark* b) {
 97 |   for (int j : {2, 8, 32, 128, 512}) {
 98 |     for (int s : {1, 2, 4, 8, 16, 32}) {
 99 |       if (s > static_cast<int>(std::thread::hardware_concurrency())) {
100 |         break;
101 |       }
102 |       b->Args({s, j});
103 |     }
104 |   }
105 | }
106 | 
107 | BENCHMARK_TEMPLATE(BM_serial, NopMutex)->Apply(CustomArgumentsSerial)->UseRealTime();
108 | 
109 | BENCHMARK_TEMPLATE(BM_serial, std::shared_mutex)->Apply(CustomArgumentsSerial)->UseRealTime();
110 | 
111 | BENCHMARK_TEMPLATE(BM_serial, dispenso::RWLock)->Apply(CustomArgumentsSerial)->UseRealTime();
112 | 
113 | BENCHMARK_TEMPLATE(BM_parallel, std::shared_mutex)->Apply(CustomArgumentsParallel)->UseRealTime();
114 | 
115 | BENCHMARK_TEMPLATE(BM_parallel, dispenso::RWLock)->Apply(CustomArgumentsParallel)->UseRealTime();
116 | 
117 | #endif // C++17
118 | 


--------------------------------------------------------------------------------
/tests/concurrent_object_arena_test.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/concurrent_object_arena.h>
  9 | #include <dispenso/task_set.h>
 10 | 
 11 | #include <gtest/gtest.h>
 12 | 
 13 | TEST(ConcurrentObjectArena, ParallelGrowBy) {
 14 |   constexpr size_t numTasks = 20;
 15 |   constexpr size_t numLoops = 100;
 16 |   constexpr size_t delta = 7;
 17 |   constexpr size_t bufSize = 16;
 18 | 
 19 |   dispenso::ConcurrentObjectArena<size_t> arena(bufSize);
 20 | 
 21 |   dispenso::TaskSet taskSet(dispenso::globalThreadPool());
 22 | 
 23 |   for (size_t ti = 0; ti < numTasks; ++ti) {
 24 |     taskSet.schedule([=, &arena]() {
 25 |       for (size_t i = 0; i < numLoops; i++) {
 26 |         const size_t p = arena.grow_by(delta);
 27 |         for (size_t j = 0; j < delta; j++) {
 28 |           arena[p + j] = ti * numLoops * delta + i;
 29 |         }
 30 |       }
 31 |     });
 32 |   }
 33 |   taskSet.wait();
 34 | 
 35 |   EXPECT_EQ(delta * numLoops * numTasks, arena.size());
 36 |   EXPECT_EQ(arena.capacity() / arena.numBuffers(), arena.getBufferSize(0));
 37 | 
 38 |   size_t totalSize = 0;
 39 |   for (size_t i = 0; i < arena.numBuffers(); ++i) {
 40 |     totalSize += arena.getBufferSize(i);
 41 |   }
 42 |   EXPECT_EQ(totalSize, arena.size());
 43 | 
 44 |   for (size_t i = 0; i < numLoops * numTasks; i++) {
 45 |     const size_t firstElement = arena[i * delta];
 46 |     for (size_t j = 1; j < delta; j++) {
 47 |       EXPECT_EQ(arena[i * delta + j], firstElement);
 48 |     }
 49 |   }
 50 | }
 51 | 
 52 | TEST(ConcurrentObjectArena, ObjectsConstuction) {
 53 |   constexpr size_t defaultValue = 17;
 54 |   constexpr size_t bufSize = 16;
 55 |   constexpr size_t smallGrow = bufSize / 3;
 56 |   constexpr size_t bigGrow = bufSize * 3;
 57 | 
 58 |   struct TestData {
 59 |     TestData() : value(defaultValue) {}
 60 |     size_t value;
 61 |   };
 62 | 
 63 |   dispenso::ConcurrentObjectArena<TestData>* arena =
 64 |       new dispenso::ConcurrentObjectArena<TestData>(bufSize);
 65 | 
 66 |   arena->grow_by(smallGrow);
 67 |   arena->grow_by(bigGrow);
 68 | 
 69 |   const size_t num = arena->size();
 70 |   for (size_t i = 0; i < num; ++i) {
 71 |     EXPECT_EQ((*arena)[i].value, defaultValue);
 72 |   }
 73 | 
 74 |   dispenso::ConcurrentObjectArena<TestData> copyArena(*arena);
 75 | 
 76 |   dispenso::ConcurrentObjectArena<TestData> copyAssignmentArena(bufSize / 2);
 77 |   copyAssignmentArena = *arena;
 78 | 
 79 |   EXPECT_EQ(copyArena.size(), arena->size());
 80 |   EXPECT_EQ(copyAssignmentArena.size(), arena->size());
 81 | 
 82 |   const size_t numBuffers = arena->numBuffers();
 83 |   std::vector<const TestData*> bufferPtrs(numBuffers);
 84 |   for (size_t i = 0; i < numBuffers; ++i) {
 85 |     bufferPtrs[i] = arena->getBuffer(i);
 86 |   }
 87 | 
 88 |   dispenso::ConcurrentObjectArena<TestData> moveArena(std::move(*arena));
 89 | 
 90 |   EXPECT_EQ(arena->size(), 0);
 91 |   EXPECT_EQ(arena->numBuffers(), 0);
 92 |   EXPECT_EQ(arena->capacity(), 0);
 93 | 
 94 |   delete arena;
 95 | 
 96 |   EXPECT_EQ(copyArena.numBuffers(), numBuffers);
 97 |   EXPECT_EQ(copyAssignmentArena.numBuffers(), numBuffers);
 98 | 
 99 |   for (size_t i = 0; i < num; ++i) {
100 |     EXPECT_EQ(copyArena[i].value, defaultValue);
101 |     EXPECT_EQ(copyAssignmentArena[i].value, defaultValue);
102 |   }
103 | 
104 |   for (size_t i = 0; i < numBuffers; ++i) {
105 |     EXPECT_NE(copyArena.getBuffer(i), bufferPtrs[i]);
106 |     EXPECT_NE(copyAssignmentArena.getBuffer(i), bufferPtrs[i]);
107 |     EXPECT_EQ(moveArena.getBuffer(i), bufferPtrs[i]);
108 |   }
109 | }
110 | 


--------------------------------------------------------------------------------
/dispenso/timed_task.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <iostream>
  9 | 
 10 | #include <dispenso/detail/quanta.h>
 11 | #include <dispenso/timed_task.h>
 12 | 
 13 | namespace dispenso {
 14 | 
 15 | TimedTaskScheduler::TimedTaskScheduler(ThreadPriority prio) : priority_(prio) {
 16 |   thread_ = std::thread([this, prio]() {
 17 |     detail::registerFineSchedulerQuanta();
 18 |     if (!setCurrentThreadPriority(prio)) {
 19 |       std::cerr << "Couldn't set thread priority" << std::endl;
 20 |     }
 21 |     timeQueueRunLoop();
 22 |   });
 23 | }
 24 | TimedTaskScheduler::~TimedTaskScheduler() {
 25 |   {
 26 |     std::lock_guard<std::mutex> lk(queueMutex_);
 27 |     running_ = false;
 28 |   }
 29 |   epoch_.bumpAndWake();
 30 |   thread_.join();
 31 | }
 32 | 
 33 | void TimedTaskScheduler::kickOffTask(std::shared_ptr<detail::TimedTaskImpl> next, double curTime) {
 34 |   size_t remaining = next->timesToRun.fetch_sub(1, std::memory_order_acq_rel);
 35 |   if (remaining == 1) {
 36 |     auto* np = next.get();
 37 |     np->func(std::move(next));
 38 |   } else if (remaining > 1) {
 39 |     next->func(next);
 40 | 
 41 |     if (next->steady) {
 42 |       next->nextAbsTime += next->period;
 43 |     } else {
 44 |       next->nextAbsTime = curTime + next->period;
 45 |     }
 46 |     std::lock_guard<std::mutex> lk(queueMutex_);
 47 |     tasks_.push(std::move(next));
 48 |   }
 49 | }
 50 | 
 51 | constexpr double kSmallTimeBuffer = 10e-6;
 52 | 
 53 | void TimedTaskScheduler::timeQueueRunLoop() {
 54 | #if defined(_WIN32)
 55 |   constexpr double kSpinYieldBuffer = 1e-3;
 56 |   constexpr double kSpinBuffer = 100e-6;
 57 | #else
 58 |   constexpr double kSpinYieldBuffer = 500e-6;
 59 |   constexpr double kSpinBuffer = 50e-6;
 60 | #endif // platform
 61 |   constexpr double kConvertToUs = 1e6;
 62 | 
 63 |   uint32_t curEpoch = epoch_.current();
 64 | 
 65 |   while (true) {
 66 |     {
 67 |       std::unique_lock<std::mutex> lk(queueMutex_);
 68 |       if (priority_ != getCurrentThreadPriority()) {
 69 |         setCurrentThreadPriority(priority_);
 70 |       }
 71 | 
 72 |       if (!running_) {
 73 |         break;
 74 |       }
 75 |       if (tasks_.empty()) {
 76 |         lk.unlock();
 77 |         curEpoch = epoch_.wait(curEpoch);
 78 |         continue;
 79 |       }
 80 |     }
 81 |     double curTime = getTime();
 82 |     double timeRemaining;
 83 |     std::unique_lock<std::mutex> lk(queueMutex_);
 84 |     timeRemaining = tasks_.top()->nextAbsTime - curTime;
 85 |     if (timeRemaining < kSmallTimeBuffer) {
 86 |       auto next = tasks_.top();
 87 |       tasks_.pop();
 88 |       lk.unlock();
 89 | 
 90 |       kickOffTask(std::move(next), curTime);
 91 |     } else if (timeRemaining < kSpinBuffer) {
 92 |       continue;
 93 |     } else if (timeRemaining < kSpinYieldBuffer) {
 94 |       lk.unlock();
 95 |       std::this_thread::yield();
 96 |       continue;
 97 |     } else {
 98 |       lk.unlock();
 99 |       curEpoch = epoch_.waitFor(
100 |           curEpoch, static_cast<uint64_t>((timeRemaining - kSpinBuffer) * kConvertToUs));
101 |     }
102 |   }
103 | }
104 | 
105 | void TimedTaskScheduler::addTimedTask(std::shared_ptr<detail::TimedTaskImpl> task) {
106 |   double curTime = getTime();
107 |   double timeRemaining;
108 |   timeRemaining = task->nextAbsTime - curTime;
109 |   if (timeRemaining < kSmallTimeBuffer) {
110 |     kickOffTask(std::move(task), curTime);
111 |   } else {
112 |     std::lock_guard<std::mutex> lk(queueMutex_);
113 |     tasks_.push(std::move(task));
114 |   }
115 |   epoch_.bumpAndWake();
116 | }
117 | 
118 | TimedTaskScheduler& globalTimedTaskScheduler() {
119 |   static TimedTaskScheduler scheduler;
120 |   return scheduler;
121 | }
122 | 
123 | } // namespace dispenso
124 | 


--------------------------------------------------------------------------------
/tests/completion_event_test.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <deque>
  9 | #include <thread>
 10 | 
 11 | #include <dispenso/completion_event.h>
 12 | 
 13 | #include <gtest/gtest.h>
 14 | 
 15 | TEST(CompletionEvent, NotifyBeforeWait) {
 16 |   dispenso::CompletionEvent event;
 17 | 
 18 |   event.notify();
 19 |   // Should immediately return;
 20 |   event.wait();
 21 | }
 22 | 
 23 | TEST(CompletionEvent, NotifyBeforeWaitFor) {
 24 |   dispenso::CompletionEvent event;
 25 | 
 26 |   event.notify();
 27 |   // Should immediately return;
 28 |   EXPECT_TRUE(event.waitFor(std::chrono::microseconds(1)));
 29 | }
 30 | 
 31 | // In an ideal world, we could expect the following test to loop 10 times or so.  In reality, we
 32 | // can't make such guarantees when it comes to sleep() and wait() functions.  For instance, on Linux
 33 | // with 64 mostly-idle cores, 100 out of 100 runs of this test resulted in looping between 8 and 12
 34 | // times, even under TSAN.  On Mac with 4 less-idle cores, the test would pass about 90 out of 100.
 35 | // Inflating the interval to 7 to 13 passed 98 out of 100.  In the end, we cannot really count on
 36 | // any concrete number of times through the loop (think TSAN, think loaded machine, etc...), and so
 37 | // we simply let this test fall back to "will this time out?".
 38 | TEST(CompletionEvent, WaitForSomeTime) {
 39 |   dispenso::CompletionEvent event;
 40 | 
 41 |   std::thread t([&event]() {
 42 |     std::this_thread::sleep_for(std::chrono::milliseconds(20));
 43 |     event.notify();
 44 |   });
 45 | 
 46 |   while (true) {
 47 |     if (event.waitFor(std::chrono::milliseconds(2))) {
 48 |       break;
 49 |     }
 50 |   }
 51 | 
 52 |   t.join();
 53 | }
 54 | 
 55 | TEST(CompletionEvent, WaitForSomeTimeWithReset) {
 56 |   dispenso::CompletionEvent event;
 57 |   std::atomic<bool> barrier(0);
 58 | 
 59 |   std::thread t([&event, &barrier]() {
 60 |     std::this_thread::sleep_for(std::chrono::milliseconds(20));
 61 |     event.notify();
 62 | 
 63 |     while (!barrier.load(std::memory_order_acquire)) {
 64 |     }
 65 |     std::this_thread::sleep_for(std::chrono::milliseconds(20));
 66 |     event.notify();
 67 |   });
 68 | 
 69 |   while (!(event.waitFor(std::chrono::milliseconds(2)))) {
 70 |   }
 71 | 
 72 |   EXPECT_TRUE(event.waitFor(std::chrono::microseconds(1))) << "This should immediately return true";
 73 | 
 74 |   // No threads waiting, nor notifying, so we can reset.
 75 |   event.reset();
 76 | 
 77 |   // Trigger the barrier so that the event can be notified.
 78 |   barrier.store(1, std::memory_order_release);
 79 | 
 80 |   while (true) {
 81 |     if (event.waitFor(std::chrono::milliseconds(2))) {
 82 |       break;
 83 |     }
 84 |   }
 85 | 
 86 |   t.join();
 87 | }
 88 | 
 89 | TEST(CompletionEvent, EffectiveBarrier) {
 90 |   dispenso::CompletionEvent event;
 91 | 
 92 |   std::deque<std::thread> threads;
 93 | 
 94 |   std::atomic<int> count(0);
 95 | 
 96 |   constexpr int kThreads = 4;
 97 | 
 98 |   for (size_t i = 0; i < kThreads; ++i) {
 99 |     threads.emplace_back([&event, &count]() {
100 |       count.fetch_sub(1, std::memory_order_relaxed);
101 |       event.wait();
102 |       count.fetch_add(2, std::memory_order_relaxed);
103 |     });
104 |   }
105 | 
106 |   while (count.load(std::memory_order_acquire) > -kThreads) {
107 |   }
108 | 
109 |   // Take a long rest in this thread.  This gives us a chance to ensure that the event cannot
110 |   // spurious wake, and begin modifying "count".
111 |   std::this_thread::sleep_for(std::chrono::milliseconds(20));
112 | 
113 |   EXPECT_EQ(-kThreads, count.load(std::memory_order_acquire));
114 | 
115 |   event.notify();
116 | 
117 |   for (auto& t : threads) {
118 |     t.join();
119 |   }
120 | 
121 |   EXPECT_EQ(kThreads, count.load(std::memory_order_acquire));
122 | }
123 | 


--------------------------------------------------------------------------------
/dispenso/pool_allocator.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/pool_allocator.h>
  9 | 
 10 | namespace dispenso {
 11 | 
 12 | template <bool kThreadSafe>
 13 | PoolAllocatorT<kThreadSafe>::PoolAllocatorT(
 14 |     size_t chunkSize,
 15 |     size_t allocSize,
 16 |     std::function<void*(size_t)> allocFunc,
 17 |     std::function<void(void*)> deallocFunc)
 18 |     : chunkSize_(chunkSize),
 19 |       allocSize_(allocSize),
 20 |       chunksPerAlloc_(allocSize / chunkSize),
 21 |       allocFunc_(std::move(allocFunc)),
 22 |       deallocFunc_(std::move(deallocFunc)) {
 23 |   // Start off with at least enough space to store at least one set of chunks.
 24 |   chunks_.reserve(chunksPerAlloc_);
 25 | }
 26 | 
 27 | template <bool kThreadSafe>
 28 | char* PoolAllocatorT<kThreadSafe>::alloc() {
 29 |   while (true) {
 30 |     uint32_t allocId = 0;
 31 |     if (kThreadSafe) {
 32 |       allocId = backingAllocLock_.fetch_or(1, std::memory_order_acquire);
 33 |     }
 34 | 
 35 |     if (allocId == 0) {
 36 |       if (chunks_.empty()) {
 37 |         char* buffer;
 38 |         if (backingAllocs2_.empty()) {
 39 |           buffer = reinterpret_cast<char*>(allocFunc_(allocSize_));
 40 |         } else {
 41 |           buffer = backingAllocs2_.back();
 42 |           backingAllocs2_.pop_back();
 43 |         }
 44 |         backingAllocs_.push_back(buffer);
 45 |         // Push n-1 values into the chunks_ buffer, and then return the nth.
 46 |         for (size_t i = 0; i < chunksPerAlloc_ - 1; ++i) {
 47 |           chunks_.push_back(buffer);
 48 |           buffer += chunkSize_;
 49 |         }
 50 |         if (kThreadSafe) {
 51 |           backingAllocLock_.store(0, std::memory_order_release);
 52 |         }
 53 |         return buffer;
 54 |       }
 55 |       char* back = chunks_.back();
 56 |       chunks_.pop_back();
 57 |       if (kThreadSafe) {
 58 |         backingAllocLock_.store(0, std::memory_order_release);
 59 |       }
 60 |       return back;
 61 |     } else {
 62 |       std::this_thread::yield();
 63 |     }
 64 |   }
 65 | }
 66 | 
 67 | template <bool kThreadSafe>
 68 | void PoolAllocatorT<kThreadSafe>::dealloc(char* ptr) {
 69 |   // For now do not release any memory back to the deallocFunc until destruction.
 70 |   // TODO(bbudge): Consider cases where we haven't gotten below some threshold of ready chunks
 71 |   // in a while.  In that case, we could begin tracking allocations, and try to assemble entire
 72 |   // starting allocations, possibly deferring a small amount to each alloc call.  This would be
 73 |   // slower, but would ensure we don't get into a situation where we need a bunch of memory up
 74 |   // front, and then never again.
 75 | 
 76 |   while (true) {
 77 |     uint32_t allocId = 0;
 78 |     if (kThreadSafe) {
 79 |       allocId = backingAllocLock_.fetch_or(1, std::memory_order_acquire);
 80 |     }
 81 |     if (allocId == 0) {
 82 |       chunks_.push_back(ptr);
 83 |       if (kThreadSafe) {
 84 |         backingAllocLock_.store(0, std::memory_order_release);
 85 |       }
 86 |       break;
 87 |     }
 88 |   }
 89 | }
 90 | 
 91 | template <bool kThreadSafe>
 92 | void PoolAllocatorT<kThreadSafe>::clear() {
 93 |   chunks_.clear();
 94 |   if (backingAllocs2_.size() < backingAllocs_.size()) {
 95 |     std::swap(backingAllocs2_, backingAllocs_);
 96 |   }
 97 |   for (char* ba : backingAllocs_) {
 98 |     backingAllocs2_.push_back(ba);
 99 |   }
100 |   backingAllocs_.clear();
101 | }
102 | 
103 | template <bool kThreadSafe>
104 | PoolAllocatorT<kThreadSafe>::~PoolAllocatorT() {
105 |   for (char* backing : backingAllocs_) {
106 |     deallocFunc_(backing);
107 |   }
108 |   for (char* backing : backingAllocs2_) {
109 |     deallocFunc_(backing);
110 |   }
111 | }
112 | 
113 | template class PoolAllocatorT<false>;
114 | template class PoolAllocatorT<true>;
115 | 
116 | } // namespace dispenso
117 | 


--------------------------------------------------------------------------------
/dispenso/small_buffer_allocator.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/detail/small_buffer_allocator_impl.h>
  9 | #include <dispenso/small_buffer_allocator.h>
 10 | 
 11 | #include <new>
 12 | 
 13 | namespace dispenso {
 14 | namespace detail {
 15 | 
 16 | template <size_t kChunkSize>
 17 | SmallBufferGlobals& getSmallBufferGlobals() {
 18 |   // controlled leak here
 19 |   static SmallBufferGlobals* globals = new SmallBufferGlobals();
 20 |   return *globals;
 21 | }
 22 | 
 23 | char* allocSmallBufferImpl(size_t ordinal) {
 24 |   switch (ordinal) {
 25 |     case 0:
 26 |       return detail::SmallBufferAllocator<4>::alloc();
 27 |     case 1:
 28 |       return detail::SmallBufferAllocator<8>::alloc();
 29 |     case 2:
 30 |       return detail::SmallBufferAllocator<16>::alloc();
 31 |     case 3:
 32 |       return detail::SmallBufferAllocator<32>::alloc();
 33 |     case 4:
 34 |       return detail::SmallBufferAllocator<64>::alloc();
 35 |     case 5:
 36 |       return detail::SmallBufferAllocator<128>::alloc();
 37 |     case 6:
 38 |       return detail::SmallBufferAllocator<256>::alloc();
 39 |     default:
 40 |       assert(false && "Invalid small buffer ordinal requested");
 41 |       return nullptr;
 42 |   }
 43 | }
 44 | 
 45 | void deallocSmallBufferImpl(size_t ordinal, void* buf) {
 46 |   switch (ordinal) {
 47 |     case 0:
 48 |       detail::SmallBufferAllocator<4>::dealloc(reinterpret_cast<char*>(buf));
 49 |       break;
 50 |     case 1:
 51 |       detail::SmallBufferAllocator<8>::dealloc(reinterpret_cast<char*>(buf));
 52 |       break;
 53 |     case 2:
 54 |       detail::SmallBufferAllocator<16>::dealloc(reinterpret_cast<char*>(buf));
 55 |       break;
 56 |     case 3:
 57 |       detail::SmallBufferAllocator<32>::dealloc(reinterpret_cast<char*>(buf));
 58 |       break;
 59 |     case 4:
 60 |       detail::SmallBufferAllocator<64>::dealloc(reinterpret_cast<char*>(buf));
 61 |       break;
 62 |     case 5:
 63 |       detail::SmallBufferAllocator<128>::dealloc(reinterpret_cast<char*>(buf));
 64 |       break;
 65 |     case 6:
 66 |       detail::SmallBufferAllocator<256>::dealloc(reinterpret_cast<char*>(buf));
 67 |       break;
 68 |     default:
 69 |       assert(false && "Invalid small buffer ordinal requested");
 70 |   }
 71 | }
 72 | 
 73 | size_t approxBytesAllocatedSmallBufferImpl(size_t ordinal) {
 74 |   switch (ordinal) {
 75 |     case 0:
 76 |       return detail::SmallBufferAllocator<4>::bytesAllocated();
 77 |     case 1:
 78 |       return detail::SmallBufferAllocator<8>::bytesAllocated();
 79 |     case 2:
 80 |       return detail::SmallBufferAllocator<16>::bytesAllocated();
 81 |     case 3:
 82 |       return detail::SmallBufferAllocator<32>::bytesAllocated();
 83 |     case 4:
 84 |       return detail::SmallBufferAllocator<64>::bytesAllocated();
 85 |     case 5:
 86 |       return detail::SmallBufferAllocator<128>::bytesAllocated();
 87 |     case 6:
 88 |       return detail::SmallBufferAllocator<256>::bytesAllocated();
 89 |     default:
 90 |       assert(false && "Invalid small buffer ordinal requested");
 91 |       return 0;
 92 |   }
 93 | }
 94 | 
 95 | template <size_t kChunkSize>
 96 | SmallBufferAllocator<kChunkSize>::PerThreadQueuingData::~PerThreadQueuingData() {
 97 |   enqueue_bulk(buffers_, count_);
 98 | 
 99 |   DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
100 |   ptoken().~ProducerToken();
101 |   ctoken().~ConsumerToken();
102 |   DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
103 | }
104 | 
105 | template class SmallBufferAllocator<4>;
106 | template class SmallBufferAllocator<8>;
107 | template class SmallBufferAllocator<16>;
108 | template class SmallBufferAllocator<32>;
109 | template class SmallBufferAllocator<64>;
110 | template class SmallBufferAllocator<128>;
111 | template class SmallBufferAllocator<256>;
112 | 
113 | } // namespace detail
114 | } // namespace dispenso
115 | 


--------------------------------------------------------------------------------
/dispenso/rw_lock.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/detail/rw_lock_impl.h>
  9 | 
 10 | namespace dispenso {
 11 | 
 12 | /**
 13 |  * A reader/writer lock interface compatible with std::shared_mutex (for use with std::unique_lock
 14 |  * and std::shared_lock).  The interface is designed to be very fast in the face of high levels of
 15 |  * contention for high read traffic and low write traffic.
 16 |  *
 17 |  * @note RWLock is not as fully-featured as std::shared_mutex: It does not go to the OS to wait.
 18 |  * This behavior is good for guarding very fast operations, but less good for guarding very slow
 19 |  * operations.  Additionally, RWLock is not compatible with std::condition_variable, though
 20 |  * std::condition_variable_any may work (untested).  It could be possible to extend RWLock with it's
 21 |  * own ConditionVariable, make waiting operations sleep in the OS, and also to add timed functions;
 22 |  * however those may slow things down in the fast case.  If some/all of that functionality is
 23 |  * needed, use std::shared_mutex, or develop a new type.
 24 |  **/
 25 | class alignas(kCacheLineSize) RWLock : public detail::RWLockImpl {
 26 |  public:
 27 |   /**
 28 |    * Locks for write access
 29 |    *
 30 |    * @note It is undefined behavior to recursively lock
 31 |    **/
 32 |   using detail::RWLockImpl::lock;
 33 | 
 34 |   /**
 35 |    * Tries to lock for write access, returns if unable to lock
 36 |    *
 37 |    * @return true if lock was acquired, false otherwise
 38 |    **/
 39 |   using detail::RWLockImpl::try_lock;
 40 | 
 41 |   /**
 42 |    * Unlocks write access
 43 |    *
 44 |    * @note Must already be locked by the current thread of execution, otherwise, the behavior is
 45 |    * undefined.
 46 |    **/
 47 |   using detail::RWLockImpl::unlock;
 48 | 
 49 |   /**
 50 |    * Locks for read access
 51 |    *
 52 |    * @note It is undefined behavior to recursively lock
 53 |    **/
 54 |   using detail::RWLockImpl::lock_shared;
 55 | 
 56 |   /**
 57 |    * Tries to lock for read access, returns if unable to lock
 58 |    *
 59 |    * @return true if lock was acquired, false otherwise
 60 |    *
 61 |    * @note It is undefined behavior to recursively lock
 62 |    **/
 63 |   using detail::RWLockImpl::try_lock_shared;
 64 | 
 65 |   /**
 66 |    * Unlocks read access
 67 |    *
 68 |    * @note Must already be locked by the current thread of execution, otherwise, the behavior is
 69 |    * undefined.
 70 |    **/
 71 |   using detail::RWLockImpl::unlock_shared;
 72 | 
 73 |   /**
 74 |    * Upgrade from a reader lock to a writer lock.  lock_upgrade is a power-user interface.  There is
 75 |    * a very good reason why it is not exposed as upgrade_mutex in the standard.  To use it safely,
 76 |    * you *MUST* ensure only one thread can try to lock for write concurrently.  If that cannot be
 77 |    * guaranteed, you should unlock for read, and lock for write instead of using lock_upgrade to
 78 |    * avoid potential deadlock.
 79 |    *
 80 |    * @note Calling this if the writer lock is already held, or if no reader lock is already held is
 81 |    * undefined behavior.
 82 |    **/
 83 |   using detail::RWLockImpl::lock_upgrade;
 84 | 
 85 |   /**
 86 |    * Downgrade the lock from a writer lock to a reader lock.
 87 |    *
 88 |    * @note Calling this if the writer lock is not held results in undefined behavior
 89 |    **/
 90 |   using detail::RWLockImpl::lock_downgrade;
 91 | };
 92 | 
 93 | /**
 94 |  * An unaligned version of the RWLock.  This could be useful if you e.g. want to create an array of
 95 |  * these to guard a large number of slots, and the likelihood of multiple threads touching any
 96 |  * region concurrently is low.  All other behavior remains the same, so refer to the documentation
 97 |  * for RWLock.
 98 |  **/
 99 | class UnalignedRWLock : public detail::RWLockImpl {};
100 | 
101 | } // namespace dispenso
102 | 


--------------------------------------------------------------------------------
/dispenso/async_request.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | /**
  9 |  * @file async_request.h
 10 |  * A file providing AsyncRequest.  This is a bit like a lightweight channel for storing updates to
 11 |  * one object, mostly intended to be used as a single producer, single consumer update mechanism.
 12 |  **/
 13 | 
 14 | #pragma once
 15 | 
 16 | #if __cplusplus >= 201703L
 17 | #include <optional>
 18 | #else
 19 | #include <dispenso/detail/op_result.h>
 20 | #endif // C++17
 21 | 
 22 | #include <dispenso/platform.h>
 23 | 
 24 | namespace dispenso {
 25 | 
 26 | /**
 27 |  * A type for making async requests.  Although it is safe to use from multiple producers and
 28 |  * consumers, it is primarily intended to be used from single producer, single consumer.
 29 |  *
 30 |  * Typically the consumer will request an update of the value from thread 0, and the producer will
 31 |  * look whether an update was requested from thread 1.  Once the producer determines an update was
 32 |  * requested (updateRequested() returns true), it calls tryEmplaceUpdate() to update the underlying
 33 |  * data.  Then when the consumer on thread 0 next calls getUpdate(), an optional wrapper to the
 34 |  * updated data is returned, and the AsyncRequest object is reset (it no longer has valid data, and
 35 |  * no update will have yet been requested for the next update).
 36 |  **/
 37 | template <typename T>
 38 | class AsyncRequest {
 39 |  public:
 40 |   // A lightweight std::optional-like type with a subset of functionality.
 41 | #if __cplusplus >= 201703L
 42 |   using OpResult = std::optional<T>;
 43 | #else
 44 |   using OpResult = detail::OpResult<T>;
 45 | #endif // C++17
 46 | 
 47 |   /**
 48 |    * The consumer can call this to request an update to the underlying data.  If request has already
 49 |    * been made or fulfilled, this is a no-op.
 50 |    **/
 51 |   void requestUpdate() {
 52 |     RequestState state = kNone;
 53 |     state_.compare_exchange_strong(state, kNeedsUpdate, std::memory_order_acq_rel);
 54 |   }
 55 | 
 56 |   /**
 57 |    * The producer can check this to determine if an update is needed.
 58 |    *
 59 |    * @return true if an update is required, false otherwise.
 60 |    **/
 61 |   bool updateRequested() const {
 62 |     return state_.load(std::memory_order_acquire) == kNeedsUpdate;
 63 |   }
 64 | 
 65 |   /**
 66 |    * The producer can try to emplace a new T object in response to a request.
 67 |    * @param args The arguments to emplace.
 68 |    * @return true if the underlying data was updated.  false if the underlying data is not in need
 69 |    * of an update.
 70 |    * @note For cases where calling this superflously could be expensive, it is wise to check
 71 |    * updateRequested() first.
 72 |    **/
 73 |   template <typename... Args>
 74 |   bool tryEmplaceUpdate(Args&&... args) {
 75 |     RequestState state = kNeedsUpdate;
 76 |     if (!state_.compare_exchange_strong(state, kUpdating, std::memory_order_acq_rel)) {
 77 |       return false;
 78 |     }
 79 |     obj_.emplace(std::forward<Args>(args)...);
 80 |     state_.store(kReady, std::memory_order_release);
 81 |     return true;
 82 |   }
 83 | 
 84 |   /**
 85 |    * The consumer can attempt to get an update.
 86 |    * @return An optional wrapper to the underlying data.  If no update is ready, nullopt is
 87 |    * returned. Once an update has been returned, the AsyncRequest object is returned to a state with
 88 |    * no underlying data.
 89 |    **/
 90 |   OpResult getUpdate() {
 91 |     if (state_.load(std::memory_order_acquire) == kReady) {
 92 |       auto obj = std::move(obj_);
 93 |       state_.store(kNone, std::memory_order_release);
 94 |       return obj;
 95 |     }
 96 |     return {};
 97 |   }
 98 | 
 99 |  private:
100 |   enum RequestState { kNone, kNeedsUpdate, kUpdating, kReady };
101 |   alignas(kCacheLineSize) std::atomic<RequestState> state_ = {kNone};
102 |   OpResult obj_;
103 | };
104 | 
105 | } // namespace dispenso
106 | 


--------------------------------------------------------------------------------
/dispenso/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | #
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | cmake_minimum_required(VERSION 3.12)
  7 | 
  8 | file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp)
  9 | file(GLOB_RECURSE HEADERS CONFIGURE_DEPENDS *.h)
 10 | message("SOURCES:  ${SOURCES}")
 11 | 
 12 | if(DISPENSO_SHARED_LIB)
 13 |   add_compile_definitions(DISPENSO_SHARED_LIB DISPENSO_LIB_EXPORT)
 14 |   add_library(dispenso SHARED ${SOURCES} ${HEADERS})
 15 | 
 16 |   target_compile_options(dispenso PRIVATE
 17 |   $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-fvisibility=hidden>
 18 |   )
 19 | else()
 20 |   add_library(dispenso STATIC ${SOURCES} ${HEADERS})
 21 | endif()
 22 | 
 23 | target_compile_options(dispenso PRIVATE
 24 |   $<$<CXX_COMPILER_ID:MSVC>:/W3 /WX>
 25 |   $<$<CXX_COMPILER_ID:GNU>: -Wno-stringop-overflow>
 26 |   $<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wall -Wextra -pedantic -Wconversion -Wno-sign-conversion -Werror>
 27 | )
 28 | 
 29 | if(WIN32)
 30 |   target_compile_definitions(dispenso PUBLIC NOMINMAX)
 31 | endif()
 32 | 
 33 | target_include_directories(dispenso
 34 | PUBLIC
 35 |   $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/..>
 36 |   $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/third-party>
 37 |   $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>
 38 |   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
 39 |   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/dispenso/third-party>
 40 | )
 41 | 
 42 | set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
 43 | set(THREADS_PREFER_PTHREAD_FLAG TRUE)
 44 | find_package(Threads REQUIRED)
 45 | target_link_libraries(dispenso PUBLIC Threads::Threads)
 46 | 
 47 | check_cxx_source_compiles("
 48 | #include <atomic>
 49 | #include <stdint.h>
 50 | std::atomic<int8_t> a(0);
 51 | std::atomic<int16_t> b(0);
 52 | std::atomic<int32_t> c(0);
 53 | std::atomic<int64_t> d(0);
 54 | int main() {
 55 |   ++a;
 56 |   ++b;
 57 |   ++c;
 58 |   return ++d;
 59 | }
 60 | " DISPENSO_HAS_ATOMIC_WITHOUT_LIB)
 61 | 
 62 | if (NOT DISPENSO_HAS_ATOMIC_WITHOUT_LIB)
 63 |   target_link_libraries(dispenso PUBLIC atomic)
 64 | endif()
 65 | 
 66 | if(WIN32)
 67 |   target_link_libraries(dispenso PUBLIC Synchronization Winmm)
 68 | endif()
 69 | 
 70 | if (NOT DISPENSO_STANDALONE)
 71 |   return()
 72 | endif()
 73 | 
 74 | ## Install library ##
 75 | 
 76 | set_target_properties(dispenso
 77 |     PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR})
 78 | 
 79 | install(TARGETS dispenso
 80 |   EXPORT ${PROJECT_NAME}_Exports
 81 |   LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
 82 |     NAMELINK_SKIP
 83 |   # on Windows put the dlls into bin
 84 |   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
 85 |   # ... and the import lib into the devel package
 86 |   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
 87 | )
 88 | 
 89 | install(EXPORT ${PROJECT_NAME}_Exports
 90 |   DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}-${PROJECT_VERSION}
 91 |   NAMESPACE Dispenso::
 92 | )
 93 | 
 94 | install(TARGETS dispenso
 95 |   LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
 96 |     NAMELINK_ONLY
 97 |   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
 98 |   ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
 99 | )
100 | 
101 | ## Install headers ##
102 | 
103 | install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
104 |   DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
105 |   FILES_MATCHING
106 |     PATTERN *.h
107 | )
108 | 
109 | ## Generate and install CMake target exports ##
110 | 
111 | include(CMakePackageConfigHelpers)
112 | 
113 | configure_package_config_file(
114 |   "${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in"
115 |   "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake"
116 | INSTALL_DESTINATION
117 |   ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}-${PROJECT_VERSION}
118 | )
119 | 
120 | write_basic_package_version_file(
121 |   "${PROJECT_NAME}ConfigVersion.cmake"
122 |   VERSION ${PROJECT_VERSION}
123 |   COMPATIBILITY SameMajorVersion
124 | )
125 | 
126 | install(FILES
127 |   ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
128 |   ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
129 | DESTINATION
130 |   ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}-${PROJECT_VERSION}
131 | )
132 | 


--------------------------------------------------------------------------------
/tests/pool_allocator_test.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/pool_allocator.h>
  9 | 
 10 | #include <deque>
 11 | 
 12 | #include <gtest/gtest.h>
 13 | 
 14 | TEST(PoolAllocator, SimpleMallocFree) {
 15 |   dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free);
 16 | 
 17 |   char* buf = allocator.alloc();
 18 | 
 19 |   *buf = 'a';
 20 | 
 21 |   allocator.dealloc(buf);
 22 | }
 23 | 
 24 | TEST(PoolAllocator, TrackAllocations) {
 25 |   std::map<char*, size_t> allocMap;
 26 | 
 27 |   auto allocFunc = [&allocMap](size_t len) -> void* {
 28 |     char* ret = reinterpret_cast<char*>(::malloc(len));
 29 |     allocMap.emplace(ret, len);
 30 |     return ret;
 31 |   };
 32 | 
 33 |   auto deallocFunc = [&allocMap](void* ptr) {
 34 |     EXPECT_EQ(1, allocMap.erase(reinterpret_cast<char*>(ptr)));
 35 |     ::free(ptr);
 36 |   };
 37 | 
 38 |   // Check to make sure that the ptr returned by the allocator below is in one of the buffers
 39 |   // generated by allocFunc.  We do this by examining the closest buffer (via lower_bound) in the
 40 |   // map, and then verify that that buffer contains ptr.
 41 |   auto checkInValidRange = [&allocMap](char* ptr) {
 42 |     auto it = allocMap.upper_bound(ptr);
 43 |     --it;
 44 |     EXPECT_GE(ptr, it->first);
 45 |     EXPECT_LT(ptr, it->first + it->second);
 46 |     return ptr;
 47 |   };
 48 | 
 49 |   {
 50 |     dispenso::PoolAllocator allocator(64, 256, allocFunc, deallocFunc);
 51 | 
 52 |     char* bufs[5];
 53 | 
 54 |     bufs[0] = checkInValidRange(allocator.alloc());
 55 | 
 56 |     EXPECT_EQ(1, allocMap.size());
 57 | 
 58 |     bufs[1] = checkInValidRange(allocator.alloc());
 59 | 
 60 |     EXPECT_EQ(1, allocMap.size());
 61 | 
 62 |     allocator.dealloc(bufs[0]);
 63 | 
 64 |     EXPECT_EQ(1, allocMap.size());
 65 | 
 66 |     bufs[0] = checkInValidRange(allocator.alloc());
 67 | 
 68 |     EXPECT_EQ(1, allocMap.size());
 69 | 
 70 |     bufs[2] = checkInValidRange(allocator.alloc());
 71 | 
 72 |     EXPECT_EQ(1, allocMap.size());
 73 | 
 74 |     bufs[3] = checkInValidRange(allocator.alloc());
 75 | 
 76 |     EXPECT_EQ(1, allocMap.size());
 77 | 
 78 |     bufs[4] = checkInValidRange(allocator.alloc());
 79 | 
 80 |     EXPECT_EQ(2, allocMap.size());
 81 | 
 82 |     allocator.dealloc(bufs[4]);
 83 |     EXPECT_LE(2, allocMap.size());
 84 |   }
 85 | 
 86 |   EXPECT_EQ(allocMap.size(), 0);
 87 | }
 88 | 
 89 | TEST(PoolAllocator, SimpleThreaded) {
 90 |   constexpr size_t kNumThreads = 8;
 91 | 
 92 |   dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free);
 93 | 
 94 |   std::deque<std::thread> threads;
 95 | 
 96 |   for (size_t i = 0; i < kNumThreads; ++i) {
 97 |     threads.emplace_back([&allocator, tid = i]() {
 98 |       constexpr size_t kNumBufs = 8;
 99 |       char* bufs[kNumBufs];
100 | 
101 |       for (size_t i = 0; i < 1000; ++i) {
102 |         for (size_t j = 0; j < kNumBufs; ++j) {
103 |           bufs[j] = allocator.alloc();
104 |           *bufs[j] = static_cast<char>(tid);
105 |         }
106 |         for (size_t j = 0; j < kNumBufs; ++j) {
107 |           EXPECT_EQ(*bufs[j], tid);
108 |           allocator.dealloc(bufs[j]);
109 |         }
110 |       }
111 |     });
112 |   }
113 | 
114 |   for (auto& t : threads) {
115 |     t.join();
116 |   }
117 | }
118 | 
119 | TEST(PoolAllocator, Arena) {
120 |   dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free);
121 | 
122 |   std::vector<char*> vec(2000);
123 |   for (char*& c : vec) {
124 |     c = allocator.alloc();
125 |     std::fill_n(c, 64, 0x7f);
126 |   }
127 | 
128 |   for (char* c : vec) {
129 |     EXPECT_TRUE(std::all_of(c, c + 64, [](char v) { return v == 0x7f; }));
130 |   }
131 | 
132 |   allocator.clear();
133 |   vec.resize(128);
134 |   for (char*& c : vec) {
135 |     c = allocator.alloc();
136 |     std::fill_n(c, 64, 0x22);
137 |   }
138 | 
139 |   for (char* c : vec) {
140 |     EXPECT_TRUE(std::all_of(c, c + 64, [](char v) { return v == 0x22; }));
141 |   }
142 | 
143 |   allocator.clear();
144 |   vec.resize(48);
145 |   for (char*& c : vec) {
146 |     c = allocator.alloc();
147 |     std::fill_n(c, 64, 0x11);
148 |   }
149 | 
150 |   for (char* c : vec) {
151 |     EXPECT_TRUE(std::all_of(c, c + 64, [](char v) { return v == 0x11; }));
152 |   }
153 | }
154 | 


--------------------------------------------------------------------------------
/tests/priority_test.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <chrono>
  9 | #include <cmath>
 10 | 
 11 | #include <dispenso/completion_event.h>
 12 | #include <dispenso/latch.h>
 13 | #include <dispenso/parallel_for.h>
 14 | #include <dispenso/priority.h>
 15 | #include <dispenso/timing.h>
 16 | 
 17 | #include <gtest/gtest.h>
 18 | 
 19 | // NOTE: This isn't suitable for an automated unit test for multiple reasons.  With OS
 20 | // scheduling we have some amount of nondeterminism.  Additionally, many (most?) machines will not
 21 | // have permissions for kHigh and kRealtime priorities depending on OS and policies.
 22 | //
 23 | //  On Linux, with permissions appropriate, this test passes about 14/15 times.  On the single
 24 | //  failure, I see an average sleep error like this:
 25 | // Expected: (info[2].error()) >= (info[3].error()), actual: 5.25784e-05 vs 5.41972e-05
 26 | // or about 50ish microseconds average error for both kHigh and kRealtime priorities.
 27 | 
 28 | using namespace std::chrono_literals;
 29 | 
 30 | struct ThreadInfo {
 31 |   uint64_t count = 0;
 32 |   double sleepErrorSum = 0.0;
 33 |   bool prioOk = false;
 34 | 
 35 |   double error() const {
 36 |     return sleepErrorSum / static_cast<double>(count);
 37 |   }
 38 | };
 39 | 
 40 | void run(
 41 |     size_t index,
 42 |     ThreadInfo& info,
 43 |     dispenso::CompletionEvent& notifier,
 44 |     dispenso::Latch& started) {
 45 |   switch (index) {
 46 |     case 0:
 47 |       info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kLow);
 48 |       break;
 49 |     case 1:
 50 |       info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kNormal);
 51 |       break;
 52 |     case 2:
 53 |       info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kHigh);
 54 |       break;
 55 |     case 3:
 56 |       info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kRealtime);
 57 |       break;
 58 |     default:
 59 |       info.prioOk = true;
 60 |       break;
 61 |   }
 62 | 
 63 |   // Ensure all threads reach this point before we begin, so that we don't let the first threads
 64 |   // make progress before the system is bogged down.
 65 |   started.arrive_and_wait();
 66 | 
 67 |   // Keep other threads busy.  If cores are idle, the result will be a crapshoot.
 68 |   if (index > 3) {
 69 |     while (!notifier.completed()) {
 70 |       ++info.count;
 71 | #if defined(DISPENSO_HAS_TSAN)
 72 |       // In TSAN atomics are implemented via reader/writer locks, and I believe these are not
 73 |       // guaranteeing progress.  We need to take some time out from the tight loop calling
 74 |       // notifier.completed() in order to allow the atomic write to succeed.
 75 |       std::this_thread::yield();
 76 | #endif // TSAN
 77 |     }
 78 |     return;
 79 |   }
 80 | 
 81 |   while (true) {
 82 |     double start = dispenso::getTime();
 83 |     if (!notifier.waitFor(1ms)) {
 84 |       double end = dispenso::getTime();
 85 |       ++info.count;
 86 |       info.sleepErrorSum += std::abs((end - start) - 1e-3);
 87 |     } else {
 88 |       break;
 89 |     }
 90 |   }
 91 | }
 92 | 
 93 | TEST(Priorty, PriorityGetsCycles) {
 94 |   dispenso::ParForOptions options;
 95 |   options.wait = false;
 96 | 
 97 |   int overloadConcurrency = 2 * std::thread::hardware_concurrency();
 98 | 
 99 |   if (sizeof(void*) == 4) {
100 |     overloadConcurrency = std::min(overloadConcurrency, 62);
101 |   }
102 | 
103 |   dispenso::ThreadPool pool(std::max<dispenso::ssize_t>(10, overloadConcurrency));
104 | 
105 |   std::vector<ThreadInfo> info(pool.numThreads());
106 | 
107 |   dispenso::CompletionEvent stop;
108 |   dispenso::Latch started(static_cast<uint32_t>(pool.numThreads()));
109 | 
110 |   dispenso::TaskSet tasks(pool);
111 |   dispenso::parallel_for(
112 |       tasks,
113 |       0,
114 |       pool.numThreads(),
115 |       [&info, &stop, &started](size_t index) { run(index, info[index], stop, started); },
116 |       options);
117 | 
118 |   // Let threads wake about 5000 times.
119 |   std::this_thread::sleep_for(5s);
120 | 
121 |   stop.notify();
122 | 
123 |   tasks.wait();
124 | 
125 |   for (auto& i : info) {
126 |     EXPECT_TRUE(i.prioOk) << "Failed for " << &i - info.data();
127 |   }
128 | 
129 | #if !defined(DISPENSO_HAS_TSAN)
130 |   // TSAN messes with scheduling enough that all bets are off.
131 |   EXPECT_GE(info[0].error(), info[1].error());
132 |   EXPECT_GE(info[1].error(), info[2].error());
133 |   EXPECT_GE(info[2].error(), info[3].error());
134 | #endif // TSAN
135 | }
136 | 


--------------------------------------------------------------------------------
/benchmarks/once_function_benchmark.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <cstring>
  9 | #include <deque>
 10 | #include <functional>
 11 | 
 12 | #include <dispenso/once_function.h>
 13 | 
 14 | #include "benchmark_common.h"
 15 | 
 16 | constexpr size_t kSmallSize = 24;
 17 | constexpr size_t kMediumSize = 120;
 18 | constexpr size_t kLargeSize = 248;
 19 | // 1000 is larger than our largest optimized chunk size, so we may expect to see performance falloff
 20 | // here.
 21 | constexpr size_t kExtraLargeSize = 1000;
 22 | 
 23 | template <typename ExeType, typename Func>
 24 | void runMoveLoop(benchmark::State& state, Func f) {
 25 |   for (auto UNUSED_VAR : state) {
 26 |     ExeType t(f);
 27 |     ExeType o;
 28 |     for (int i = 0; i < 10; ++i) {
 29 |       o = std::move(t);
 30 |       t = std::move(o);
 31 |     }
 32 |     t();
 33 |   }
 34 | }
 35 | 
 36 | template <typename Func>
 37 | class FuncConsumer {
 38 |  public:
 39 |   void add(Func&& f) {
 40 |     funcs_.emplace_back(std::move(f));
 41 |   }
 42 | 
 43 |   void consumeAll() {
 44 |     while (!funcs_.empty()) {
 45 |       Func f = std::move(funcs_.front());
 46 |       funcs_.pop_front();
 47 |       f();
 48 |     }
 49 |   }
 50 | 
 51 |  private:
 52 |   std::deque<Func> funcs_;
 53 | };
 54 | 
 55 | template <size_t kSize>
 56 | struct Foo {
 57 |   Foo() {
 58 |     buf[0] = 0;
 59 |     benchmark::ClobberMemory();
 60 |   }
 61 | 
 62 |   Foo(Foo<kSize>&& f) {
 63 |     std::memcpy(buf, f.buf, kSize);
 64 |   }
 65 | 
 66 |   Foo(const Foo<kSize>& f) {
 67 |     std::memcpy(buf, f.buf, kSize);
 68 |   }
 69 | 
 70 |   void operator()() {
 71 |     benchmark::DoNotOptimize(++buf[0]);
 72 |   }
 73 | 
 74 |   uint32_t buf[kSize / 4];
 75 | };
 76 | 
 77 | template <typename F>
 78 | void onceCall(F&& f) {
 79 |   F lf = std::move(f);
 80 |   lf();
 81 | }
 82 | 
 83 | template <size_t kSize>
 84 | void BM_move_std_function(benchmark::State& state) {
 85 |   runMoveLoop<std::function<void()>>(state, Foo<kSize>());
 86 | }
 87 | 
 88 | template <size_t kSize>
 89 | void BM_move_once_function(benchmark::State& state) {
 90 |   runMoveLoop<dispenso::OnceFunction>(state, Foo<kSize>());
 91 | }
 92 | 
 93 | constexpr int kMediumLoopLen = 200;
 94 | 
 95 | template <size_t kSize>
 96 | void BM_queue_inline_function(benchmark::State& state) {
 97 |   FuncConsumer<Foo<kSize>> consumer;
 98 |   for (auto UNUSED_VAR : state) {
 99 |     for (int i = 0; i < kMediumLoopLen; ++i) {
100 |       consumer.add(Foo<kSize>());
101 |     }
102 |     consumer.consumeAll();
103 |   }
104 | }
105 | 
106 | template <size_t kSize>
107 | void BM_queue_std_function(benchmark::State& state) {
108 |   FuncConsumer<std::function<void()>> consumer;
109 |   for (auto UNUSED_VAR : state) {
110 |     for (int i = 0; i < kMediumLoopLen; ++i) {
111 |       consumer.add(Foo<kSize>());
112 |     }
113 |     consumer.consumeAll();
114 |   }
115 | }
116 | 
117 | template <size_t kSize>
118 | void BM_queue_once_function(benchmark::State& state) {
119 |   FuncConsumer<dispenso::OnceFunction> consumer;
120 |   for (auto UNUSED_VAR : state) {
121 |     for (int i = 0; i < kMediumLoopLen; ++i) {
122 |       consumer.add(Foo<kSize>());
123 |     }
124 |     consumer.consumeAll();
125 |   }
126 | }
127 | 
128 | BENCHMARK_TEMPLATE(BM_move_std_function, kSmallSize);
129 | BENCHMARK_TEMPLATE(BM_move_once_function, kSmallSize);
130 | 
131 | BENCHMARK_TEMPLATE(BM_move_std_function, kMediumSize);
132 | BENCHMARK_TEMPLATE(BM_move_once_function, kMediumSize);
133 | 
134 | BENCHMARK_TEMPLATE(BM_move_std_function, kLargeSize);
135 | BENCHMARK_TEMPLATE(BM_move_once_function, kLargeSize);
136 | 
137 | BENCHMARK_TEMPLATE(BM_move_std_function, kExtraLargeSize);
138 | BENCHMARK_TEMPLATE(BM_move_once_function, kExtraLargeSize);
139 | 
140 | BENCHMARK_TEMPLATE(BM_queue_inline_function, kSmallSize);
141 | BENCHMARK_TEMPLATE(BM_queue_std_function, kSmallSize);
142 | BENCHMARK_TEMPLATE(BM_queue_once_function, kSmallSize);
143 | 
144 | BENCHMARK_TEMPLATE(BM_queue_inline_function, kMediumSize);
145 | BENCHMARK_TEMPLATE(BM_queue_std_function, kMediumSize);
146 | BENCHMARK_TEMPLATE(BM_queue_once_function, kMediumSize);
147 | 
148 | BENCHMARK_TEMPLATE(BM_queue_inline_function, kLargeSize);
149 | BENCHMARK_TEMPLATE(BM_queue_std_function, kLargeSize);
150 | BENCHMARK_TEMPLATE(BM_queue_once_function, kLargeSize);
151 | 
152 | BENCHMARK_TEMPLATE(BM_queue_inline_function, kExtraLargeSize);
153 | BENCHMARK_TEMPLATE(BM_queue_std_function, kExtraLargeSize);
154 | BENCHMARK_TEMPLATE(BM_queue_once_function, kExtraLargeSize);
155 | 
156 | BENCHMARK_MAIN();
157 | 


--------------------------------------------------------------------------------
/dispenso/resource_pool.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | /**
  9 |  * @file resource_pool.h
 10 |  * A file providing ResourcePool.  This is syntactic sugar over what is essentially a set of
 11 |  * semaphore guarded resources.
 12 |  **/
 13 | 
 14 | #pragma once
 15 | 
 16 | #include <dispenso/platform.h>
 17 | #include <dispenso/tsan_annotations.h>
 18 | #include <moodycamel/blockingconcurrentqueue.h>
 19 | 
 20 | namespace dispenso {
 21 | 
 22 | template <typename T>
 23 | class ResourcePool;
 24 | 
 25 | /**
 26 |  * A RIAA wrapper for a user's type that can manage accessibility and ensures the resource will go
 27 |  * back into the ResourcePool upon destruction.
 28 |  **/
 29 | template <typename T>
 30 | class Resource {
 31 |  public:
 32 |   Resource(Resource&& other) : resource_(other.resource_), pool_(other.pool_) {
 33 |     other.resource_ = nullptr;
 34 |   }
 35 | 
 36 |   Resource& operator=(Resource&& other) {
 37 |     if (&other != this) {
 38 |       recycle();
 39 |       resource_ = other.resource_;
 40 |       pool_ = other.pool_;
 41 |       other.resource_ = nullptr;
 42 |     }
 43 |     return *this;
 44 |   }
 45 | 
 46 |   /**
 47 |    * Access the underlying resource object.
 48 |    *
 49 |    * @return a reference to the resource.
 50 |    **/
 51 |   T& get() {
 52 |     return *resource_;
 53 |   }
 54 | 
 55 |   ~Resource() {
 56 |     recycle();
 57 |   }
 58 | 
 59 |  private:
 60 |   Resource(T* res, ResourcePool<T>* pool) : resource_(res), pool_(pool) {}
 61 | 
 62 |   void recycle();
 63 | 
 64 |   T* resource_;
 65 |   ResourcePool<T>* pool_;
 66 | 
 67 |   friend class ResourcePool<T>;
 68 | };
 69 | 
 70 | /**
 71 |  * A pool of resources that can be accessed from multiple threads.  This is akin to a set of
 72 |  * resources and a semaphore ensuring enough resources exist.
 73 |  **/
 74 | template <typename T>
 75 | class ResourcePool {
 76 |  public:
 77 |   /**
 78 |    * Construct a ResourcePool.
 79 |    *
 80 |    * @param size The number of <code>T</code> objects in the pool.
 81 |    * @param init A functor with signature T() which can be called to initialize the pool's
 82 |    * resources.
 83 |    **/
 84 |   template <typename F>
 85 |   ResourcePool(size_t size, const F& init)
 86 |       : pool_(size),
 87 |         backingResources_(
 88 |             reinterpret_cast<char*>(
 89 |                 detail::alignedMalloc(size * detail::alignToCacheLine(sizeof(T))))),
 90 |         size_(size) {
 91 |     char* buf = backingResources_;
 92 | 
 93 |     // There are three reasons we create our own buffer and use placement new:
 94 |     // 1. We want to be able to handle non-movable non-copyable objects
 95 |     //   * Note that we could do this with std::deque
 96 |     // 2. We want to minimize memory allocations, since that can be a common point of contention in
 97 |     //    multithreaded programs.
 98 |     // 3. We can easily ensure that the objects are cache aligned to help avoid false sharing.
 99 | 
100 |     for (size_t i = 0; i < size; ++i) {
101 |       pool_.enqueue(new (buf) T(init()));
102 |       buf += detail::alignToCacheLine(sizeof(T));
103 |     }
104 |   }
105 | 
106 |   /**
107 |    * Acquire a resource from the pool.  This function may block until a resource becomes available.
108 |    *
109 |    * @return a <code>Resource</code>-wrapped resource.
110 |    **/
111 |   Resource<T> acquire() {
112 |     T* t;
113 |     DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
114 |     pool_.wait_dequeue(t);
115 |     DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
116 |     return Resource<T>(t, this);
117 |   }
118 | 
119 |   /**
120 |    * Destruct the ResourcePool.  The user must ensure that all resources are returned to the pool
121 |    * prior to destroying the pool.
122 |    **/
123 |   ~ResourcePool() {
124 |     assert(pool_.size_approx() == size_);
125 |     for (size_t i = 0; i < size_; ++i) {
126 |       T* t;
127 |       DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
128 |       pool_.wait_dequeue(t);
129 |       DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
130 |       t->~T();
131 |     }
132 |     detail::alignedFree(backingResources_);
133 |   }
134 | 
135 |  private:
136 |   void recycle(T* t) {
137 |     DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN();
138 |     pool_.enqueue(t);
139 |     DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END();
140 |   }
141 | 
142 |   moodycamel::BlockingConcurrentQueue<T*> pool_;
143 |   char* backingResources_;
144 |   size_t size_;
145 | 
146 |   friend class Resource<T>;
147 | };
148 | 
149 | template <typename T>
150 | void Resource<T>::recycle() {
151 |   if (resource_) {
152 |     pool_->recycle(resource_);
153 |   }
154 | }
155 | 
156 | } // namespace dispenso
157 | 


--------------------------------------------------------------------------------
/dispenso/detail/rw_lock_impl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/platform.h>
  9 | 
 10 | namespace dispenso {
 11 | namespace detail {
 12 | class RWLockImpl {
 13 |  public:
 14 |   /**
 15 |    * Locks for write access
 16 |    *
 17 |    * @note It is undefined behavior to recursively lock
 18 |    **/
 19 |   void lock();
 20 | 
 21 |   /**
 22 |    * Tries to lock for write access, returns if unable to lock
 23 |    *
 24 |    * @return true if lock was acquired, false otherwise
 25 |    **/
 26 |   bool try_lock();
 27 | 
 28 |   /**
 29 |    * Unlocks write access
 30 |    *
 31 |    * @note Must already be locked by the current thread of execution, otherwise, the behavior is
 32 |    * undefined.
 33 |    **/
 34 |   void unlock();
 35 | 
 36 |   /**
 37 |    * Locks for read access
 38 |    *
 39 |    * @note It is undefined behavior to recursively lock
 40 |    **/
 41 |   void lock_shared();
 42 | 
 43 |   /**
 44 |    * Tries to lock for read access, returns if unable to lock
 45 |    *
 46 |    * @return true if lock was acquired, false otherwise
 47 |    *
 48 |    * @note It is undefined behavior to recursively lock
 49 |    **/
 50 |   bool try_lock_shared();
 51 | 
 52 |   /**
 53 |    * Unlocks read access
 54 |    *
 55 |    * @note Must already be locked by the current thread of execution, otherwise, the behavior is
 56 |    * undefined.
 57 |    **/
 58 |   void unlock_shared();
 59 | 
 60 |   /**
 61 |    * Upgrade from a reader lock to a writer lock.  lock_upgrade is a power-user interface.  There is
 62 |    * a very good reason why it is not exposed as upgrade_mutex in the standard.  To use it safely,
 63 |    * you *MUST* ensure only one thread can try to lock for write concurrently.  If that cannot be
 64 |    * guaranteed, you should unlock for read, and lock for write instead of using lock_upgrade to
 65 |    * avoid potential deadlock.
 66 |    *
 67 |    * @note Calling this if the writer lock is already held, or if no reader lock is already held is
 68 |    * undefined behavior.
 69 |    **/
 70 |   void lock_upgrade();
 71 | 
 72 |   /**
 73 |    * Downgrade the lock from a writer lock to a reader lock.
 74 |    *
 75 |    * @note Calling this if the writer lock is not held results in undefined behavior
 76 |    **/
 77 |   void lock_downgrade();
 78 | 
 79 |  private:
 80 |   static constexpr uint32_t kWriteBit = 0x80000000;
 81 |   static constexpr uint32_t kReaderBits = 0x7fffffff;
 82 |   std::atomic<uint32_t> lock_{0};
 83 | };
 84 | 
 85 | inline void RWLockImpl::lock() {
 86 |   uint32_t val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel);
 87 |   while (val & kWriteBit) {
 88 |     val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel);
 89 |   }
 90 |   // We've claimed single write ownership now.  We need to drain off readers
 91 |   while (val != kWriteBit) {
 92 |     val = lock_.load(std::memory_order_acquire);
 93 |   }
 94 | }
 95 | 
 96 | inline bool RWLockImpl::try_lock() {
 97 |   uint32_t val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel);
 98 |   return !(val & kWriteBit);
 99 | }
100 | 
101 | inline void RWLockImpl::unlock() {
102 |   lock_.fetch_and(kReaderBits, std::memory_order_acq_rel);
103 | }
104 | 
105 | inline void RWLockImpl::lock_shared() {
106 |   uint32_t val = lock_.fetch_add(1, std::memory_order_acq_rel);
107 |   while (val & kWriteBit) {
108 |     val = lock_.fetch_sub(1, std::memory_order_acq_rel);
109 |     while (val & kWriteBit) {
110 |       val = lock_.load(std::memory_order_acquire);
111 |     }
112 | 
113 |     val = lock_.fetch_add(1, std::memory_order_acq_rel);
114 |   }
115 | }
116 | 
117 | inline bool RWLockImpl::try_lock_shared() {
118 |   uint32_t val = lock_.fetch_add(1, std::memory_order_acq_rel);
119 |   if (val & kWriteBit) {
120 |     lock_.fetch_sub(1, std::memory_order_acq_rel);
121 |     return false;
122 |   }
123 |   return true;
124 | }
125 | 
126 | inline void RWLockImpl::unlock_shared() {
127 |   lock_.fetch_sub(1, std::memory_order_acq_rel);
128 | }
129 | 
130 | inline void RWLockImpl::lock_upgrade() {
131 |   uint32_t val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel);
132 |   while (val & kWriteBit) {
133 |     val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel);
134 |   }
135 |   // We've claimed single write ownership now.  We need to drain off readers, including ourself
136 |   lock_.fetch_sub(1, std::memory_order_acq_rel);
137 |   while (val != kWriteBit) {
138 |     val = lock_.load(std::memory_order_acquire);
139 |   }
140 | }
141 | 
142 | inline void RWLockImpl::lock_downgrade() {
143 |   // Get reader ownership first
144 |   lock_.fetch_add(1, std::memory_order_acq_rel);
145 |   unlock();
146 | }
147 | } // namespace detail
148 | } // namespace dispenso
149 | 


--------------------------------------------------------------------------------
/dispenso/small_buffer_allocator.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | /**
  9 |  * @file small_buffer_allocator.h
 10 |  * A file providing SmallBufferAllocator.  This allocator can allocate and deallocate chunks of a
 11 |  * set size in a way that is efficient and scales quite well across many threads.
 12 |  **/
 13 | 
 14 | #pragma once
 15 | 
 16 | #include <dispenso/detail/math.h>
 17 | #include <dispenso/platform.h>
 18 | 
 19 | namespace dispenso {
 20 | 
 21 | /**
 22 |  * Set a standard for the maximum chunk size for use within dispenso.  The reason for this limit is
 23 |  * that there are diminishing returns after a certain size, and each new pool has it's own memory
 24 |  * overhead.
 25 |  **/
 26 | constexpr size_t kMaxSmallBufferSize = 256;
 27 | 
 28 | namespace detail {
 29 | 
 30 | DISPENSO_DLL_ACCESS char* allocSmallBufferImpl(size_t ordinal);
 31 | DISPENSO_DLL_ACCESS void deallocSmallBufferImpl(size_t ordinal, void* buf);
 32 | 
 33 | DISPENSO_DLL_ACCESS size_t approxBytesAllocatedSmallBufferImpl(size_t ordinal);
 34 | 
 35 | // This has the effect of selecting actual block sizes starting with 4 bytes.  Smaller requests
 36 | // (e.g. 1 byte, 2 bytes) will still utilize 4-byte blocks.  Choice of 4 bytes as the smallest
 37 | // mainly aligns to sizeof(ptr) on 32-bit platforms, where we'd expect most common use cases to be
 38 | // no smaller than one pointer.  Retaining 4-byte buckets on 64-bit platforms doesn't cost much
 39 | // (tiny startup/teardown cost, and trivial amount of memory) when not using 4-byte or smaller
 40 | // allocations, and makes the code simpler.
 41 | constexpr size_t getOrdinal(size_t blockSize) {
 42 |   return std::max<ssize_t>(0, log2const(blockSize) - 2);
 43 | }
 44 | 
 45 | template <size_t kBlockSize>
 46 | inline std::enable_if_t<(kBlockSize <= kMaxSmallBufferSize), char*> allocSmallOrLarge() {
 47 | #if defined(DISPENSO_NO_SMALL_BUFFER_ALLOCATOR)
 48 |   return reinterpret_cast<char*>(alignedMalloc(kBlockSize, kBlockSize));
 49 | #else
 50 |   return allocSmallBufferImpl(getOrdinal(kBlockSize));
 51 | #endif // DISPENSO_NO_SMALL_BUFFER_ALLOCATOR
 52 | }
 53 | 
 54 | template <size_t kBlockSize>
 55 | inline std::enable_if_t<(kBlockSize > kMaxSmallBufferSize), char*> allocSmallOrLarge() {
 56 |   return reinterpret_cast<char*>(alignedMalloc(kBlockSize, kBlockSize));
 57 | }
 58 | 
 59 | template <size_t kBlockSize>
 60 | inline std::enable_if_t<(kBlockSize <= kMaxSmallBufferSize), void> deallocSmallOrLarge(void* buf) {
 61 | #if defined(DISPENSO_NO_SMALL_BUFFER_ALLOCATOR)
 62 |   alignedFree(buf);
 63 | #else
 64 |   deallocSmallBufferImpl(getOrdinal(kBlockSize), buf);
 65 | #endif // DISPENSO_NO_SMALL_BUFFER_ALLOCATOR
 66 | }
 67 | 
 68 | template <size_t kBlockSize>
 69 | inline std::enable_if_t<(kBlockSize > kMaxSmallBufferSize), void> deallocSmallOrLarge(void* buf) {
 70 |   alignedFree(buf);
 71 | }
 72 | 
 73 | } // namespace detail
 74 | 
 75 | /**
 76 |  * Allocate a small buffer from a small buffer pool.
 77 |  *
 78 |  * @tparam kBlockSize The size of the block to allocate.  Must be a power of two, and must be less
 79 |  * than or equal to kMaxSmallBufferSize.
 80 |  * @return The pointer to the allocated block of memory.
 81 |  * @note: The returned buffer must be returned to the pool via deallocSmallBuffer templatized on the
 82 |  * same block size.  If kBlockSize > kMaxSmallBufferSize, this function falls back on alignedMalloc.
 83 |  * If DISPENSO_NO_SMALL_BUFFER_ALLOCATOR is defined, we will always fall back on
 84 |  * alignedMalloc/alignedFree.
 85 |  **/
 86 | template <size_t kBlockSize>
 87 | inline char* allocSmallBuffer() {
 88 |   return detail::allocSmallOrLarge<kBlockSize>();
 89 | }
 90 | /**
 91 |  * Free a small buffer from a small buffer pool.
 92 |  *
 93 |  * @tparam kBlockSize The size of the block to allocate.  Must be a power of two, and must be less
 94 |  * than or equal to kMaxSmallBufferSize.
 95 |  * @param buf the pointer to block of memory to return to the pool.  Must have been allocated with
 96 |  * allocSmallBuffer templatized on the same block size.
 97 |  * @note: If kBlockSize > kMaxSmallBufferSize, this function falls back on alignedFree.
 98 |  **/
 99 | template <size_t kBlockSize>
100 | inline void deallocSmallBuffer(void* buf) {
101 |   detail::deallocSmallOrLarge<kBlockSize>(buf);
102 | }
103 | 
104 | /**
105 |  * Get the approximate bytes allocated for a single small buffer pool (associated with
106 |  *kBlockSize). This function is not highly performant and locks, and should only be used for
107 |  *diagnostics (e.g. tests).
108 |  *
109 |  * @tparam kBlockSize The block size for the pool to query.
110 |  **/
111 | template <size_t kBlockSize>
112 | size_t approxBytesAllocatedSmallBuffer() {
113 |   return detail::approxBytesAllocatedSmallBufferImpl(detail::getOrdinal(kBlockSize));
114 | }
115 | 
116 | } // namespace dispenso
117 | 


--------------------------------------------------------------------------------
/tests/once_function_test.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/once_function.h>
  9 | 
 10 | #include <deque>
 11 | 
 12 | #include <gtest/gtest.h>
 13 | 
 14 | constexpr size_t kExtraSmall = 8;
 15 | constexpr size_t kSmall = 24;
 16 | constexpr size_t kMedium = 120;
 17 | constexpr size_t kLarge = 248;
 18 | constexpr size_t kExtraLarge = 10000;
 19 | 
 20 | using dispenso::OnceFunction;
 21 | 
 22 | TEST(OnceFunction, Empty) {
 23 |   OnceFunction f([]() {});
 24 |   f();
 25 | }
 26 | 
 27 | TEST(OnceFunction, MoveConstructor) {
 28 |   OnceFunction f([]() {});
 29 |   OnceFunction g(std::move(f));
 30 |   g();
 31 | }
 32 | 
 33 | TEST(OnceFunction, MoveOperator) {
 34 |   OnceFunction f([]() {});
 35 |   OnceFunction g;
 36 |   g = std::move(f);
 37 |   g();
 38 | }
 39 | 
 40 | template <size_t kSize>
 41 | void testSize() {
 42 |   constexpr size_t kNumElts = kSize - sizeof(int*);
 43 |   struct Foo {
 44 |     void operator()() {
 45 |       int s = 0;
 46 |       for (uint8_t b : buf) {
 47 |         s += b;
 48 |       }
 49 |       *sum = s;
 50 |     }
 51 |     uint8_t buf[kNumElts];
 52 |     int* sum;
 53 |   } foo;
 54 |   for (size_t i = 0; i < kNumElts; ++i) {
 55 |     foo.buf[i] = static_cast<uint8_t>(i);
 56 |   }
 57 |   int answer;
 58 |   foo.sum = &answer;
 59 |   OnceFunction f(foo);
 60 |   OnceFunction g(foo);
 61 |   g();
 62 |   f();
 63 |   int expected = 0;
 64 |   for (size_t i = 0; i < kNumElts; ++i) {
 65 |     expected += static_cast<int>(i & 255);
 66 |   }
 67 |   EXPECT_EQ(answer, expected);
 68 | }
 69 | 
 70 | template <>
 71 | void testSize<8>() {
 72 |   struct Foo {
 73 |     void operator()() {
 74 |       int s = 0;
 75 |       *sum = s;
 76 |     }
 77 |     int* sum;
 78 |   } foo;
 79 |   int answer;
 80 |   foo.sum = &answer;
 81 |   OnceFunction f(foo);
 82 |   OnceFunction g(foo);
 83 |   g();
 84 |   f();
 85 |   int expected = 0;
 86 |   EXPECT_EQ(answer, expected);
 87 | }
 88 | 
 89 | TEST(OnceFunction, ExtraSmall) {
 90 |   testSize<kExtraSmall>();
 91 | }
 92 | 
 93 | TEST(OnceFunction, Small) {
 94 |   testSize<kSmall>();
 95 | }
 96 | 
 97 | TEST(OnceFunction, Medium) {
 98 |   testSize<kMedium>();
 99 | }
100 | 
101 | TEST(OnceFunction, Large) {
102 |   testSize<kLarge>();
103 | }
104 | 
105 | TEST(OnceFunction, ExtraLarge) {
106 |   testSize<kExtraLarge>();
107 | }
108 | 
109 | TEST(OnceFunction, MoveWithResult) {
110 |   int result = 5;
111 |   OnceFunction f([&result]() { result = 17; });
112 |   EXPECT_EQ(result, 5);
113 |   OnceFunction g(std::move(f));
114 |   EXPECT_EQ(result, 5);
115 |   g();
116 |   EXPECT_EQ(result, 17);
117 | }
118 | 
119 | template <size_t kNumElts>
120 | void ensureDestructor() {
121 |   int value = 0;
122 |   struct FooWithDestructor {
123 |     void operator()() {
124 |       ++*value;
125 |     }
126 |     ~FooWithDestructor() {
127 |       ++*value;
128 |     }
129 |     uint8_t buf[kNumElts];
130 |     int* value;
131 |   } foo;
132 | 
133 |   foo.value = &value;
134 | 
135 |   OnceFunction f(foo);
136 |   f();
137 |   EXPECT_EQ(value, 2);
138 | }
139 | 
140 | TEST(OnceFunction, EnsureDestructionExtraSmall) {
141 |   ensureDestructor<kExtraSmall>();
142 | }
143 | 
144 | TEST(OnceFunction, EnsureDestructionSmall) {
145 |   ensureDestructor<kSmall>();
146 | }
147 | 
148 | TEST(OnceFunction, EnsureDestructionMedium) {
149 |   ensureDestructor<kMedium>();
150 | }
151 | 
152 | TEST(OnceFunction, EnsureDestructionLarge) {
153 |   ensureDestructor<kLarge>();
154 | }
155 | 
156 | TEST(OnceFunction, EnsureDestructionExtraLarge) {
157 |   ensureDestructor<kExtraLarge>();
158 | }
159 | 
160 | template <size_t alignment>
161 | struct EnsureAlign {
162 |   void operator()() {
163 |     uintptr_t bloc = reinterpret_cast<uintptr_t>(&b);
164 |     EXPECT_EQ(0, bloc & (alignment - 1)) << "broken for alignment: " << alignment;
165 |   }
166 | 
167 |   alignas(alignment) char b = 0;
168 | };
169 | 
170 | TEST(OnceFunction, EnsureAlignment1) {
171 |   EnsureAlign<1> e;
172 |   OnceFunction f(e);
173 |   f();
174 | }
175 | 
176 | TEST(OnceFunction, EnsureAlignment2) {
177 |   EnsureAlign<2> e;
178 |   OnceFunction f(e);
179 |   f();
180 | }
181 | TEST(OnceFunction, EnsureAlignment4) {
182 |   EnsureAlign<4> e;
183 |   OnceFunction f(e);
184 |   f();
185 | }
186 | TEST(OnceFunction, EnsureAlignment8) {
187 |   EnsureAlign<8> e;
188 |   OnceFunction f(e);
189 |   f();
190 | }
191 | TEST(OnceFunction, EnsureAlignment16) {
192 |   EnsureAlign<16> e;
193 |   OnceFunction f(e);
194 |   f();
195 | }
196 | TEST(OnceFunction, EnsureAlignment32) {
197 |   EnsureAlign<32> e;
198 |   OnceFunction f(e);
199 |   f();
200 | }
201 | TEST(OnceFunction, EnsureAlignment64) {
202 |   EnsureAlign<64> e;
203 |   OnceFunction f(e);
204 |   f();
205 | }
206 | TEST(OnceFunction, EnsureAlignment128) {
207 |   EnsureAlign<128> e;
208 |   OnceFunction f(e);
209 |   f();
210 | }
211 | TEST(OnceFunction, EnsureAlignment256) {
212 |   EnsureAlign<256> e;
213 |   OnceFunction f(e);
214 |   f();
215 | }
216 | 


--------------------------------------------------------------------------------
/dispenso/detail/task_set_impl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | /**
  9 |  * @file task_set.h
 10 |  * A file providing TaskSet and ConcurrentTaskSet.  These interfaces allow the user to
 11 |  * submit/schedule multiple closures and then wait on them.
 12 |  **/
 13 | 
 14 | #pragma once
 15 | 
 16 | #include <dispenso/thread_pool.h>
 17 | 
 18 | namespace dispenso {
 19 | 
 20 | class TaskSetBase;
 21 | 
 22 | namespace detail {
 23 | template <typename Result>
 24 | class FutureBase;
 25 | 
 26 | class LimitGatedScheduler;
 27 | 
 28 | DISPENSO_DLL_ACCESS void pushThreadTaskSet(TaskSetBase* tasks);
 29 | DISPENSO_DLL_ACCESS void popThreadTaskSet();
 30 | 
 31 | } // namespace detail
 32 | 
 33 | DISPENSO_DLL_ACCESS TaskSetBase* parentTaskSet();
 34 | 
 35 | class TaskSetBase {
 36 |  public:
 37 |   TaskSetBase(
 38 |       ThreadPool& p,
 39 |       ParentCascadeCancel registerForParentCancel = ParentCascadeCancel::kOff,
 40 |       ssize_t stealingLoadMultiplier = 4)
 41 |       : pool_(p), taskSetLoadFactor_(stealingLoadMultiplier * p.numThreads()) {
 42 | #if defined DISPENSO_DEBUG
 43 |     assert(stealingLoadMultiplier > 0);
 44 |     pool_.outstandingTaskSets_.fetch_add(1, std::memory_order_acquire);
 45 | #endif
 46 | 
 47 |     parent_ = (registerForParentCancel == ParentCascadeCancel::kOn) ? parentTaskSet() : nullptr;
 48 | 
 49 |     if (parent_) {
 50 |       parent_->registerChild(this);
 51 |       if (parent_->canceled()) {
 52 |         canceled_.store(true, std::memory_order_release);
 53 |       }
 54 |     }
 55 |   }
 56 | 
 57 |   TaskSetBase(TaskSetBase&& other) = delete;
 58 |   TaskSetBase& operator=(TaskSetBase&& other) = delete;
 59 | 
 60 |   ssize_t numPoolThreads() const {
 61 |     return pool_.numThreads();
 62 |   }
 63 | 
 64 |   ThreadPool& pool() {
 65 |     return pool_;
 66 |   }
 67 | 
 68 |   void cancel() {
 69 |     canceled_.store(true, std::memory_order_release);
 70 |     cancelChildren();
 71 |   }
 72 | 
 73 |   bool canceled() const {
 74 |     return canceled_.load(std::memory_order_acquire);
 75 |   }
 76 | 
 77 |   ~TaskSetBase() {
 78 | #if defined DISPENSO_DEBUG
 79 |     pool_.outstandingTaskSets_.fetch_sub(1, std::memory_order_release);
 80 | #endif
 81 | 
 82 |     if (parent_) {
 83 |       parent_->unregisterChild(this);
 84 |     }
 85 |   }
 86 | 
 87 |  protected:
 88 |   template <typename F>
 89 |   auto packageTask(F&& f) {
 90 |     outstandingTaskCount_.fetch_add(1, std::memory_order_acquire);
 91 |     return [this, f = std::move(f)]() mutable {
 92 |       detail::pushThreadTaskSet(this);
 93 |       if (!canceled_.load(std::memory_order_acquire)) {
 94 | #if defined(__cpp_exceptions)
 95 |         try {
 96 |           f();
 97 |         } catch (...) {
 98 |           trySetCurrentException();
 99 |         }
100 | #else
101 |         f();
102 | #endif // __cpp_exceptions
103 |       }
104 |       detail::popThreadTaskSet();
105 |       outstandingTaskCount_.fetch_sub(1, std::memory_order_release);
106 |     };
107 |   }
108 | 
109 |   DISPENSO_DLL_ACCESS void trySetCurrentException();
110 |   bool testAndResetException();
111 | 
112 |   void registerChild(TaskSetBase* child) {
113 |     std::lock_guard<std::mutex> lk(mtx_);
114 | 
115 |     child->prev_ = tail_;
116 |     child->next_ = nullptr;
117 |     if (tail_) {
118 |       tail_->next_ = child;
119 |       tail_ = child;
120 |     } else {
121 |       head_ = tail_ = child;
122 |     }
123 |   }
124 | 
125 |   void unregisterChild(TaskSetBase* child) {
126 |     std::lock_guard<std::mutex> lk(mtx_);
127 | 
128 |     if (child->prev_) {
129 |       child->prev_->next_ = child->next_;
130 |     } else {
131 |       // We're head
132 |       assert(child == head_);
133 |       head_ = child->next_;
134 |     }
135 |     if (child->next_) {
136 |       child->next_->prev_ = child->prev_;
137 |     } else {
138 |       // We're tail
139 |       assert(child == tail_);
140 |       tail_ = child->prev_;
141 |     }
142 |   }
143 | 
144 |   void cancelChildren() {
145 |     std::lock_guard<std::mutex> lk(mtx_);
146 | 
147 |     auto* node = head_;
148 |     while (node) {
149 |       node->cancel();
150 |       node = node->next_;
151 |     }
152 |   }
153 | 
154 |   alignas(kCacheLineSize) std::atomic<ssize_t> outstandingTaskCount_{0};
155 |   alignas(kCacheLineSize) ThreadPool& pool_;
156 |   alignas(kCacheLineSize) std::atomic<bool> canceled_{false};
157 |   const ssize_t taskSetLoadFactor_;
158 | #if defined(__cpp_exceptions)
159 |   enum ExceptionState { kUnset, kSetting, kSet };
160 |   std::atomic<ExceptionState> guardException_{kUnset};
161 |   std::exception_ptr exception_;
162 | #endif // __cpp_exceptions
163 | 
164 |   TaskSetBase* parent_;
165 | 
166 |   // This mutex guards modifications/use of the intusive linked list between head_ and tail_
167 |   std::mutex mtx_;
168 |   TaskSetBase* head_{nullptr};
169 |   TaskSetBase* tail_{nullptr};
170 | 
171 |   // prev_ and next_ are links in our *parent's* intrusive linked list.
172 |   TaskSetBase* prev_;
173 |   TaskSetBase* next_;
174 | };
175 | 
176 | } // namespace dispenso
177 | 


--------------------------------------------------------------------------------
/dispenso/timing.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/timing.h>
  9 | 
 10 | #include <chrono>
 11 | #include <cmath>
 12 | 
 13 | #if defined(_MSC_VER)
 14 | #include <intrin.h>
 15 | #endif // _MSC_VER
 16 | 
 17 | #if defined(_WIN32)
 18 | #include <Windows.h>
 19 | #endif // _WIN32
 20 | 
 21 | #if defined(__MACH__)
 22 | #include <mach/mach.h>
 23 | #include <mach/mach_time.h>
 24 | #endif // __MACH__
 25 | 
 26 | namespace dispenso {
 27 | namespace {
 28 | #if defined(__x86_64__) || defined(_M_AMD64)
 29 | #define DISPENSO_HAS_TIMESTAMP
 30 | #if defined(_MSC_VER)
 31 | inline uint64_t rdtscp() {
 32 |   uint32_t ui;
 33 |   return __rdtscp(&ui);
 34 | }
 35 | 
 36 | #else
 37 | inline uint64_t rdtscp() {
 38 |   uint32_t lo, hi;
 39 |   __asm__ volatile("rdtscp"
 40 |                    : /* outputs */ "=a"(lo), "=d"(hi)
 41 |                    : /* no inputs */
 42 |                    : /* clobbers */ "%rcx");
 43 |   return (uint64_t)lo | (((uint64_t)hi) << 32);
 44 | }
 45 | #endif // OS
 46 | #elif (defined(__GNUC__) || defined(__clang__)) && defined(__aarch64__)
 47 | #define DISPENSO_HAS_TIMESTAMP
 48 | uint64_t rdtscp(void) {
 49 |   uint64_t val;
 50 |   __asm__ volatile("mrs %0, cntvct_el0" : "=r"(val));
 51 |   return val;
 52 | }
 53 | #endif // ARCH
 54 | } // namespace
 55 | 
 56 | #if defined(DISPENSO_HAS_TIMESTAMP)
 57 | 
 58 | #if !defined(__aarch64__)
 59 | 
 60 | static bool snapFreq(double& firstApprox) {
 61 |   switch (static_cast<int>(firstApprox)) {
 62 |     case 0:
 63 |       if (std::abs(int(firstApprox * 10.0)) <= 1) {
 64 |         firstApprox = 0.0;
 65 |         return true;
 66 |       }
 67 |       break;
 68 |     case 9:
 69 |       if (std::abs(int(firstApprox * 10.0) - 99) <= 1) {
 70 |         firstApprox = 10.0;
 71 | 
 72 |         return true;
 73 |       }
 74 |       break;
 75 |     case 3:
 76 |       if (std::abs(int(firstApprox * 10.0) - 33) <= 1) {
 77 |         firstApprox = 3.0 + 1.0 / 3.0;
 78 |         return true;
 79 |       }
 80 |       break;
 81 |     case 6:
 82 |       if (std::abs(int(firstApprox * 10.0) - 66) <= 1) {
 83 |         firstApprox = 6.0 + 2.0 / 3.0;
 84 |         return true;
 85 |       }
 86 |       break;
 87 |   }
 88 |   return false;
 89 | }
 90 | 
 91 | static double fallbackTicksPerSecond() {
 92 |   using namespace std::chrono_literals;
 93 |   constexpr double kChronoOverheadBias = 250e-9;
 94 | 
 95 |   auto baseStart = std::chrono::high_resolution_clock::now();
 96 |   auto start = rdtscp();
 97 |   std::this_thread::sleep_for(50ms);
 98 |   auto end = rdtscp();
 99 |   auto baseEnd = std::chrono::high_resolution_clock::now();
100 | 
101 |   auto base = std::chrono::duration<double>(baseEnd - baseStart).count() - kChronoOverheadBias;
102 |   double firstApprox = (static_cast<double>(end - start)) / base;
103 | 
104 |   // Try to refine the approximation.  In some circumstances we can "snap" the frequency to a very
105 |   // good guess that is off by less than one part in thousands.  Accuracy should already be quite
106 |   // good in any case, but this allows us to improve in some cases.
107 | 
108 |   // Get first 3 digits
109 |   firstApprox *= 1e-7;
110 | 
111 |   int firstInt = static_cast<int>(firstApprox);
112 |   firstApprox -= firstInt;
113 | 
114 |   firstApprox *= 10.0;
115 | 
116 |   if (!snapFreq(firstApprox)) {
117 |     int secondInt = static_cast<int>(firstApprox);
118 |     firstApprox -= secondInt;
119 |     firstApprox *= 10.0;
120 |     snapFreq(firstApprox);
121 |     firstApprox *= 0.1;
122 |     firstApprox += secondInt;
123 |   }
124 | 
125 |   firstApprox *= 0.1;
126 | 
127 |   firstApprox += firstInt;
128 |   firstApprox *= 1e7;
129 |   return firstApprox;
130 | }
131 | #endif // !__aarch64__
132 | 
133 | #if defined(__aarch64__)
134 | static double ticksPerSecond() {
135 |   uint64_t val;
136 |   __asm__ volatile("mrs %0, cntfrq_el0" : "=r"(val));
137 |   return static_cast<double>(val);
138 | }
139 | #elif defined(__MACH__)
140 | static double ticksPerSecond() {
141 |   mach_timebase_info_data_t info;
142 |   if (mach_timebase_info(&info) != KERN_SUCCESS) {
143 |     return fallbackTicksPerSecond();
144 |   }
145 |   return 1e9 * static_cast<double>(info.denom) / static_cast<double>(info.numer);
146 | }
147 | #else
148 | double ticksPerSecond() {
149 |   return fallbackTicksPerSecond();
150 | }
151 | #endif
152 | 
153 | double getTime() {
154 |   static double secondsPerTick = 1.0 / ticksPerSecond();
155 |   static double startTime = static_cast<double>(rdtscp()) * secondsPerTick;
156 | 
157 |   double t = static_cast<double>(rdtscp()) * secondsPerTick;
158 |   return t - startTime;
159 | }
160 | #else
161 | double getTime() {
162 |   static auto startTime = std::chrono::high_resolution_clock::now();
163 |   auto cur = std::chrono::high_resolution_clock::now();
164 | 
165 |   return std::chrono::duration<double>(cur - startTime).count();
166 | }
167 | #endif // DISPENSO_HAS_TIMESTAMP
168 | 
169 | namespace {
170 | // This should ensure that we initialize the time before main.
171 | double g_dummyTime = getTime();
172 | } // namespace
173 | 
174 | } // namespace dispenso
175 | 


--------------------------------------------------------------------------------
/dispenso/detail/concurrent_vector_impl2.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | // This file intended for textual inclusion into concurrent_vector.h only
  9 | 
 10 | namespace cv {
 11 | 
 12 | template <typename VecT, typename T>
 13 | DISPENSO_INLINE ConVecIterBase<VecT, T>::ConVecIterBase(const VecT* vec, cv::BucketInfo info)
 14 |     : vb_(reinterpret_cast<uintptr_t>(vec) | info.bucket),
 15 |       bucketStart_(vec->buffers_[info.bucket].load(std::memory_order_relaxed)),
 16 |       bucketPtr_(bucketStart_ + info.bucketIndex),
 17 |       bucketEnd_(bucketStart_ + info.bucketCapacity) {}
 18 | 
 19 | template <typename VecT, typename T, bool kIsConst>
 20 | DISPENSO_INLINE ConcurrentVectorIterator<VecT, T, kIsConst>&
 21 | ConcurrentVectorIterator<VecT, T, kIsConst>::operator++() {
 22 |   ++bucketPtr_;
 23 |   if (bucketPtr_ == bucketEnd_) {
 24 |     auto len = bucketEnd_ - bucketStart_;
 25 |     ++vb_;
 26 |     auto vb = getVecAndBucket();
 27 |     len <<= int{vb.bucket > 1};
 28 |     bucketPtr_ = bucketStart_ = vb.vec->buffers_[vb.bucket].load(std::memory_order_relaxed);
 29 |     bucketEnd_ = bucketPtr_ + len;
 30 |   }
 31 |   return *this;
 32 | }
 33 | 
 34 | template <typename VecT, typename T, bool kIsConst>
 35 | DISPENSO_INLINE ConcurrentVectorIterator<VecT, T, kIsConst>&
 36 | ConcurrentVectorIterator<VecT, T, kIsConst>::operator--() {
 37 |   --bucketPtr_;
 38 |   if (bucketPtr_ < bucketStart_) {
 39 |     auto vb = getVecAndBucket();
 40 |     if (vb.bucket) {
 41 |       auto len = bucketEnd_ - bucketStart_;
 42 |       --vb_;
 43 |       len >>= int{vb.bucket > 1};
 44 |       bucketStart_ = vb.vec->buffers_[vb.bucket - 1].load(std::memory_order_relaxed);
 45 |       bucketPtr_ = bucketStart_ + len;
 46 |       bucketEnd_ = bucketPtr_;
 47 |       --bucketPtr_;
 48 |     }
 49 |   }
 50 |   return *this;
 51 | }
 52 | 
 53 | template <typename VecT, typename T, bool kIsConst>
 54 | DISPENSO_INLINE typename ConcurrentVectorIterator<VecT, T, kIsConst>::reference
 55 | ConcurrentVectorIterator<VecT, T, kIsConst>::operator*() const {
 56 |   return *bucketPtr_;
 57 | }
 58 | template <typename VecT, typename T, bool kIsConst>
 59 | DISPENSO_INLINE typename ConcurrentVectorIterator<VecT, T, kIsConst>::pointer
 60 | ConcurrentVectorIterator<VecT, T, kIsConst>::operator->() const {
 61 |   return &operator*();
 62 | }
 63 | 
 64 | template <typename VecT, typename T, bool kIsConst>
 65 | DISPENSO_INLINE typename ConcurrentVectorIterator<VecT, T, kIsConst>::reference
 66 | ConcurrentVectorIterator<VecT, T, kIsConst>::operator[](difference_type n) const {
 67 |   T* nPtr = bucketPtr_ + n;
 68 |   if (nPtr >= bucketStart_ && nPtr < bucketEnd_) {
 69 |     return *nPtr;
 70 |   }
 71 | 
 72 |   auto vb = getVecAndBucket();
 73 | 
 74 |   // Reconstruct index
 75 |   ssize_t oldIndex = bucketPtr_ - bucketStart_;
 76 |   oldIndex += (bool)vb.bucket * (bucketEnd_ - bucketStart_);
 77 |   auto binfo = vb.vec->bucketAndSubIndexForIndex(oldIndex + n);
 78 |   return *(vb.vec->buffers_[binfo.bucket].load(std::memory_order_relaxed) + binfo.bucketIndex);
 79 | }
 80 | 
 81 | template <typename VecT, typename T, bool kIsConst>
 82 | DISPENSO_INLINE ConcurrentVectorIterator<VecT, T, kIsConst>&
 83 | ConcurrentVectorIterator<VecT, T, kIsConst>::operator+=(difference_type n) {
 84 |   T* nPtr = bucketPtr_ + n;
 85 |   if (nPtr >= bucketStart_ && nPtr < bucketEnd_) {
 86 |     bucketPtr_ = nPtr;
 87 |     return *this;
 88 |   }
 89 | 
 90 |   auto vb = getVecAndBucket();
 91 | 
 92 |   // Reconstruct index
 93 |   ssize_t oldIndex = bucketPtr_ - bucketStart_;
 94 |   oldIndex += (bool)vb.bucket * (bucketEnd_ - bucketStart_);
 95 |   auto binfo = vb.vec->bucketAndSubIndexForIndex(oldIndex + n);
 96 |   bucketStart_ = vb.vec->buffers_[binfo.bucket].load(std::memory_order_relaxed);
 97 |   bucketEnd_ = bucketStart_ + binfo.bucketCapacity;
 98 |   bucketPtr_ = bucketStart_ + binfo.bucketIndex;
 99 |   vb_ = reinterpret_cast<uintptr_t>(vb.vec) | binfo.bucket;
100 |   return *this;
101 | }
102 | 
103 | template <typename VecT, typename T, bool kIsConst>
104 | DISPENSO_INLINE ConcurrentVectorIterator<VecT, T, kIsConst>
105 | ConcurrentVectorIterator<VecT, T, kIsConst>::operator+(difference_type n) const {
106 |   T* nPtr = bucketPtr_ + n;
107 |   if (nPtr >= bucketStart_ && nPtr < bucketEnd_) {
108 |     return {vb_, bucketStart_, nPtr, bucketEnd_};
109 |   }
110 | 
111 |   auto vb = getVecAndBucket();
112 |   // Reconstruct index
113 |   ssize_t oldIndex = bucketPtr_ - bucketStart_;
114 |   oldIndex += (bool)vb.bucket * (bucketEnd_ - bucketStart_);
115 |   auto binfo = vb.vec->bucketAndSubIndexForIndex(oldIndex + n);
116 |   return {vb.vec, binfo};
117 | }
118 | 
119 | template <typename VecT, typename T, bool kIsConst>
120 | DISPENSO_INLINE typename CompactCVecIterator<VecT, T, kIsConst>::reference
121 | CompactCVecIterator<VecT, T, kIsConst>::operator*() const {
122 |   return const_cast<VecT&>(*vec_)[index_];
123 | }
124 | 
125 | template <typename VecT, typename T, bool kIsConst>
126 | DISPENSO_INLINE typename CompactCVecIterator<VecT, T, kIsConst>::pointer
127 | CompactCVecIterator<VecT, T, kIsConst>::operator->() const {
128 |   return &operator*();
129 | }
130 | 
131 | template <typename VecT, typename T, bool kIsConst>
132 | DISPENSO_INLINE typename CompactCVecIterator<VecT, T, kIsConst>::reference
133 | CompactCVecIterator<VecT, T, kIsConst>::operator[](ssize_t n) const {
134 |   return const_cast<VecT&>(*vec_)[index_ + n];
135 | }
136 | } // namespace cv
137 | 


--------------------------------------------------------------------------------
/dispenso/task_set.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include "task_set.h"
  9 | 
 10 | #include <cstdio>
 11 | 
 12 | namespace dispenso {
 13 | 
 14 | namespace detail {
 15 | // 64 depth is pretty ridiculous, but try not to step on anyone's feet.
 16 | constexpr int32_t kMaxTasksStackSize = 64;
 17 | 
 18 | DISPENSO_THREAD_LOCAL TaskSetBase* g_taskStack[kMaxTasksStackSize];
 19 | DISPENSO_THREAD_LOCAL int32_t g_taskStackSize = 0;
 20 | 
 21 | void pushThreadTaskSet(TaskSetBase* t) {
 22 | #ifndef NDEBUG
 23 |   if (g_taskStackSize < 0 || g_taskStackSize >= kMaxTasksStackSize) {
 24 |     fprintf(stderr, "TaskSet parent stack index is invalid when pushing: %d\n", g_taskStackSize);
 25 |     std::abort();
 26 |   }
 27 | #endif // NDEBUG
 28 |   g_taskStack[g_taskStackSize++] = t;
 29 | }
 30 | void popThreadTaskSet() {
 31 | #ifndef NDEBUG
 32 |   if (g_taskStackSize <= 0) {
 33 |     fprintf(stderr, "TaskSet parent stack index is invalid when popping: %d\n", g_taskStackSize);
 34 |     std::abort();
 35 |   }
 36 | #endif // NDEBUG
 37 |   --g_taskStackSize;
 38 | }
 39 | } // namespace detail
 40 | 
 41 | TaskSetBase* parentTaskSet() {
 42 |   using namespace detail;
 43 | 
 44 | #ifndef NDEBUG
 45 |   if (g_taskStackSize < 0 || g_taskStackSize >= kMaxTasksStackSize) {
 46 |     fprintf(stderr, "TaskSet parent stack index is invalid when accessing: %d\n", g_taskStackSize);
 47 |     std::abort();
 48 |   }
 49 | #endif // NDEBUG
 50 | 
 51 |   return g_taskStackSize ? g_taskStack[g_taskStackSize - 1] : nullptr;
 52 | }
 53 | 
 54 | void TaskSetBase::trySetCurrentException() {
 55 | #if defined(__cpp_exceptions)
 56 |   auto status = kUnset;
 57 |   if (guardException_.compare_exchange_strong(status, kSetting, std::memory_order_acq_rel)) {
 58 |     exception_ = std::current_exception();
 59 |     guardException_.store(kSet, std::memory_order_release);
 60 |     canceled_.store(true, std::memory_order_release);
 61 |   }
 62 | #endif // __cpp_exceptions
 63 | }
 64 | 
 65 | inline bool TaskSetBase::testAndResetException() {
 66 | #if defined(__cpp_exceptions)
 67 |   if (guardException_.load(std::memory_order_acquire) == kSet) {
 68 |     auto exception = std::move(exception_);
 69 |     guardException_.store(kUnset, std::memory_order_release);
 70 |     std::rethrow_exception(exception);
 71 |   }
 72 | #endif // __cpp_exceptions
 73 |   return canceled_.load(std::memory_order_acquire);
 74 | }
 75 | 
 76 | bool ConcurrentTaskSet::wait() {
 77 |   // Steal work until our set is unblocked.  Note that this is not the
 78 |   // fastest possible way to unblock the current set, but it will alleviate
 79 |   // deadlock, and should provide decent throughput for all waiters.
 80 | 
 81 |   // The deadlock scenario mentioned goes as follows:  N threads in the
 82 |   // ThreadPool.  Each thread is running code that is using TaskSets.  No
 83 |   // progress could be made without stealing.
 84 |   while (outstandingTaskCount_.load(std::memory_order_acquire)) {
 85 |     if (!pool_.tryExecuteNext()) {
 86 |       std::this_thread::yield();
 87 |     }
 88 |   }
 89 | 
 90 |   return testAndResetException();
 91 | }
 92 | 
 93 | bool ConcurrentTaskSet::tryWait(size_t maxToExecute) {
 94 |   while (outstandingTaskCount_.load(std::memory_order_acquire) && maxToExecute--) {
 95 |     if (!pool_.tryExecuteNext()) {
 96 |       break;
 97 |     }
 98 |   }
 99 | 
100 |   // Must check completion prior to checking exceptions, otherwise there could be a case where
101 |   // exceptions are checked, then an exception is propagated, and then we return whether all items
102 |   // have been completed, thus dropping the exception.
103 |   if (outstandingTaskCount_.load(std::memory_order_acquire)) {
104 |     return false;
105 |   }
106 | 
107 |   return !testAndResetException();
108 | }
109 | 
110 | moodycamel::ProducerToken TaskSet::makeToken(moodycamel::ConcurrentQueue<OnceFunction>& pool) {
111 |   return moodycamel::ProducerToken(pool);
112 | }
113 | 
114 | bool TaskSet::wait() {
115 |   // Steal work until our set is unblocked.
116 |   // The deadlock scenario mentioned goes as follows:  N threads in the
117 |   // ThreadPool.  Each thread is running code that is using TaskSets.  No
118 |   // progress could be made without stealing.
119 |   while (pool_.tryExecuteNextFromProducerToken(token_)) {
120 |   }
121 | 
122 |   while (outstandingTaskCount_.load(std::memory_order_acquire)) {
123 |     if (!pool_.tryExecuteNext()) {
124 |       std::this_thread::yield();
125 |     }
126 |   }
127 | 
128 |   return testAndResetException();
129 | }
130 | 
131 | bool TaskSet::tryWait(size_t maxToExecute) {
132 |   ssize_t maxToExe = static_cast<ssize_t>(maxToExecute);
133 |   while (outstandingTaskCount_.load(std::memory_order_acquire) && maxToExe--) {
134 |     if (!pool_.tryExecuteNextFromProducerToken(token_)) {
135 |       break;
136 |     }
137 |   }
138 | 
139 |   // Must check completion prior to checking exceptions, otherwise there could be a case where
140 |   // exceptions are checked, then an exception is propagated, and then we return whether all items
141 |   // have been completed, thus dropping the exception.
142 | 
143 |   maxToExe = std::max<ssize_t>(0, maxToExe);
144 | 
145 |   while (outstandingTaskCount_.load(std::memory_order_acquire) && maxToExe--) {
146 |     if (!pool_.tryExecuteNext()) {
147 |       std::this_thread::yield();
148 |     }
149 |   }
150 | 
151 |   if (outstandingTaskCount_.load(std::memory_order_acquire)) {
152 |     return false;
153 |   }
154 | 
155 |   return !testAndResetException();
156 | }
157 | 
158 | } // namespace dispenso
159 | 


--------------------------------------------------------------------------------
/benchmarks/for_latency_benchmark.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/parallel_for.h>
  9 | #include <dispenso/thread_pool.h>
 10 | #include <dispenso/timing.h>
 11 | 
 12 | #if defined(_OPENMP)
 13 | #include <omp.h>
 14 | #endif
 15 | 
 16 | #include <random>
 17 | #include <unordered_map>
 18 | 
 19 | #if !defined(BENCHMARK_WITHOUT_TBB)
 20 | #include "tbb/blocked_range.h"
 21 | #include "tbb/parallel_for.h"
 22 | #include "tbb/task_scheduler_init.h"
 23 | #endif // !BENCHMARK_WITHOUT_TBB
 24 | 
 25 | #include "thread_benchmark_common.h"
 26 | 
 27 | namespace {
 28 | 
 29 | using namespace std::chrono_literals;
 30 | 
 31 | uint32_t kSeed(8);
 32 | constexpr int kSize = 50000;
 33 | constexpr auto kSleep = 30ms;
 34 | } // namespace
 35 | 
 36 | // Adapted from Google gtest examples
 37 | // Returns true iff n is a prime number.
 38 | bool isPrime(int n) {
 39 |   // Trivial case 1: small numbers
 40 |   if (n <= 1)
 41 |     return false;
 42 | 
 43 |   // Trivial case 2: even numbers
 44 |   if (n % 2 == 0)
 45 |     return n == 2;
 46 | 
 47 |   // Now, we have that n is odd and n >= 3.
 48 | 
 49 |   // Try to divide n by every odd number i, starting from 3
 50 |   for (int i = 3;; i += 2) {
 51 |     // We only have to try i up to the squre root of n
 52 |     if (i > n / i)
 53 |       break;
 54 | 
 55 |     // Now, we have i <= n/i < n.
 56 |     // If n is divisible by i, n is not prime.
 57 |     if (n % i == 0)
 58 |       return false;
 59 |   }
 60 | 
 61 |   // n has no integer factor in the range (1, n), and thus is prime.
 62 |   return true;
 63 | }
 64 | 
 65 | const std::vector<int>& getInputs(int numElements) {
 66 |   static std::unordered_map<int, std::vector<int>> vecs;
 67 |   auto it = vecs.find(numElements);
 68 |   if (it != vecs.end()) {
 69 |     return it->second;
 70 |   }
 71 | 
 72 |   std::mt19937_64 gen64(kSeed);
 73 |   std::uniform_int_distribution<> distribution(100000, 1000000);
 74 |   std::vector<int> values;
 75 |   values.reserve(numElements);
 76 |   for (int i = 0; i < numElements; ++i) {
 77 |     values.push_back(distribution(gen64));
 78 |   }
 79 |   auto res = vecs.emplace(numElements, std::move(values));
 80 |   assert(res.second);
 81 |   return res.first->second;
 82 | }
 83 | 
 84 | void BM_serial(benchmark::State& state) {
 85 |   std::vector<int> output(kSize, 0);
 86 |   auto& input = getInputs(kSize);
 87 | 
 88 |   std::vector<double> times;
 89 |   times.reserve(1000);
 90 | 
 91 |   for (auto UNUSED_VAR : state) {
 92 |     std::this_thread::sleep_for(kSleep);
 93 |     times.push_back(dispenso::getTime());
 94 |     for (size_t i = 0; i < kSize; ++i) {
 95 |       output[i] = isPrime(input[i]);
 96 |     }
 97 |     times.back() = dispenso::getTime() - times.back();
 98 |   }
 99 | 
100 |   doStats(times, state);
101 | }
102 | 
103 | void BM_dispenso(benchmark::State& state) {
104 |   const int numThreads = state.range(0) - 1;
105 | 
106 |   std::vector<int> output(kSize, 0);
107 |   dispenso::resizeGlobalThreadPool(numThreads);
108 | 
109 |   std::vector<double> times;
110 |   times.reserve(1000);
111 | 
112 |   auto& input = getInputs(kSize);
113 |   for (auto UNUSED_VAR : state) {
114 |     std::this_thread::sleep_for(kSleep);
115 |     times.push_back(dispenso::getTime());
116 |     dispenso::parallel_for(
117 |         dispenso::makeChunkedRange(0, kSize), [&input, &output](size_t i, size_t e) {
118 |           for (; i != e; ++i) {
119 |             output[i] = isPrime(input[i]);
120 |           }
121 |         });
122 |     times.back() = dispenso::getTime() - times.back();
123 |   }
124 | 
125 |   doStats(times, state);
126 | }
127 | 
128 | #if defined(_OPENMP)
129 | void BM_omp(benchmark::State& state) {
130 |   const int numThreads = state.range(0);
131 | 
132 |   std::vector<int> output(kSize, 0);
133 |   omp_set_num_threads(numThreads);
134 | 
135 |   std::vector<double> times;
136 |   times.reserve(1000);
137 | 
138 |   auto& input = getInputs(kSize);
139 |   for (auto UNUSED_VAR : state) {
140 |     std::this_thread::sleep_for(kSleep);
141 |     times.push_back(dispenso::getTime());
142 | #pragma omp parallel for
143 |     for (int i = 0; i < kSize; ++i) {
144 |       output[i] = isPrime(input[i]);
145 |     }
146 |     times.back() = dispenso::getTime() - times.back();
147 |   }
148 |   doStats(times, state);
149 | }
150 | #endif /*defined(_OPENMP)*/
151 | 
152 | #if !defined(BENCHMARK_WITHOUT_TBB)
153 | void BM_tbb(benchmark::State& state) {
154 |   const int numThreads = state.range(0);
155 | 
156 |   std::vector<int> output(kSize, 0);
157 | 
158 |   tbb::task_scheduler_init initsched(numThreads);
159 | 
160 |   std::vector<double> times;
161 |   times.reserve(1000);
162 | 
163 |   auto& input = getInputs(kSize);
164 |   for (auto UNUSED_VAR : state) {
165 |     std::this_thread::sleep_for(kSleep);
166 |     times.push_back(dispenso::getTime());
167 |     tbb::parallel_for(
168 |         tbb::blocked_range<size_t>(0, kSize),
169 |         [&input, &output](const tbb::blocked_range<size_t>& r) {
170 |           for (size_t i = r.begin(); i < r.end(); ++i) {
171 |             output[i] = isPrime(input[i]);
172 |           }
173 |         });
174 |     times.back() = dispenso::getTime() - times.back();
175 |   }
176 |   doStats(times, state);
177 | }
178 | #endif // !BENCHMARK_WITHOUT_TBB
179 | 
180 | static void CustomArguments(benchmark::internal::Benchmark* b) {
181 |   for (int i : pow2HalfStepThreads()) {
182 |     b->Arg(i);
183 |   }
184 | }
185 | 
186 | BENCHMARK(BM_serial)->UseRealTime();
187 | 
188 | #if defined(_OPENMP)
189 | BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime();
190 | #endif // OPENMP
191 | #if !defined(BENCHMARK_WITHOUT_TBB)
192 | BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime();
193 | #endif // !BENCHMARK_WITHOUT_TBB
194 | 
195 | BENCHMARK(BM_dispenso)->Apply(CustomArguments)->UseRealTime();
196 | 
197 | BENCHMARK_MAIN();
198 | 


--------------------------------------------------------------------------------
/dispenso/pipeline.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | /**
  9 |  * @file pipeline.h
 10 |  * A file providing utilities for parallel pipelining of work.
 11 |  **/
 12 | 
 13 | #pragma once
 14 | 
 15 | #include <limits>
 16 | 
 17 | #include <dispenso/detail/pipeline_impl.h>
 18 | 
 19 | namespace dispenso {
 20 | 
 21 | /**
 22 |  * OpResult is like a poor-man's std::optional for those who wish to use dispenso pipeline filtering
 23 |  * in C++14.  In C++17 and beyond, it is recommended to use std::optional instead.  OpResult has
 24 |  * implicit construct from T, just like std::optional, and move/copy constructors and operators,
 25 |  * bool conversion, and value() function, but otherwise provides less functionality than
 26 |  * std::optional.
 27 |  **/
 28 | template <typename T>
 29 | using OpResult = detail::OpResult<T>;
 30 | 
 31 | /**
 32 |  * A simple constant representing maximum parallelism for a stage.  This number has no particular
 33 |  * significance, and is simply here for convenience.
 34 |  **/
 35 | constexpr ssize_t kStageNoLimit = std::numeric_limits<ssize_t>::max();
 36 | 
 37 | /**
 38 |  * Create a stage for use in the pipeline function.
 39 |  *
 40 |  * @param f A function-like object that can accept the result of the previous stage (if any), and
 41 |  * which produces the output for the next stage (if any).
 42 |  * @param limit How many threads may concurrently run work for this stage.  Values larger than the
 43 |  * number of threads in the associated thread pool of the used ConcurrentTaskSet will be capped to
 44 |  * the size of the pool.
 45 |  * @return A stage object suitable for pipelining.
 46 |  **/
 47 | template <typename F>
 48 | auto stage(F&& f, ssize_t limit) {
 49 |   return detail::Stage<F>(std::forward<F>(f), limit);
 50 | }
 51 | 
 52 | /**
 53 |  * Pipeline work in stages.  Pipelines allow stages to specify parallelism limits by using the
 54 |  * <code>stage</code> function, or a function-like object can simply be passed directly, indicating
 55 |  * a serial stage.  Even if stages are serial, there can be parallelism between stages, so in a 3
 56 |  * stage serial pipeline, the expected runtime is the max of the 3 stages runtimes (note that this
 57 |  * is in the absence of pipeline overheads and with an infinitely long workstream.  In practice
 58 |  * speedup is somewhat less). This function will block until the entire pipeline has completed.
 59 |  *
 60 |  * @param pool The ThreadPool to run the work in.  This inherently determines the upper bound for
 61 |  * parallelism of the pipeline.
 62 |  * @param sIn The stages to run.  The first stage must be a Generator stage, the last must be a Sink
 63 |  * stage, and intermediate stages are Transform stages.
 64 |  * - If there is only one stage, it takes no
 65 |  * arguments, but returns a bool indicating completion (false means the pipeline is complete).
 66 |  * - Otherwise, the Generator stage takes no arguments and  must return an OpResult or std::optional
 67 |  * value, and an invalid/nullopt result indicates that the Generator is done (no more values
 68 |  * forthcoming).
 69 |  * - Transform stages should accept the output of the prior stage (or output.value() in the case of
 70 |  * OpResult or std::optional), and should return either a value or an OpResult or std::optional
 71 |  * value if the Transform is capable of filtering results. Invalid/nullopt OpResult or std::optional
 72 |  * values indicate that the value should be filtered, and not passed on to the next stage.
 73 |  * - The Sink stage should accept the output of the prior stage, just as a Transform stage does, but
 74 |  * does not return any value (or at least the pipeline will ignore it).
 75 |  **/
 76 | template <typename... Stages>
 77 | void pipeline(ThreadPool& pool, Stages&&... sIn) {
 78 |   ConcurrentTaskSet tasks(pool);
 79 |   auto pipes = detail::makePipes(tasks, std::forward<Stages>(sIn)...);
 80 |   pipes.execute();
 81 |   pipes.wait();
 82 | }
 83 | 
 84 | /**
 85 |  * Pipeline work in stages.  Pipelines allow stages to specify parallelism limits by using the
 86 |  * <code>stage</code> function, or a function-like object can simply be passed directly, indicating
 87 |  * a serial stage.  Even if stages are serial, there can be parallelism between stages, so in a 3
 88 |  * stage serial pipeline, the expected runtime is the max of the 3 stages runtimes (note that this
 89 |  * is in the absence of pipeline overheads and with an infinitely long workstream.  In practice
 90 |  * speedup is somewhat less). Work will be run on dispenso's global thread pool.  This function will
 91 |  * block until the entire pipeline has completed.
 92 |  *
 93 |  * @param sIn The stages to run.  The first stage must be a Generator stage, the last must be a Sink
 94 |  * stage, and intermediate stages are Transform stages.
 95 |  * - If there is only one stage, it takes no
 96 |  * arguments, but returns a bool indicating completion (false means the pipeline is complete).
 97 |  * - Otherwise, the Generator stage takes no arguments and  must return an OpResult or std::optional
 98 |  * value, and an invalid/nullopt result indicates that the Generator is done (no more values
 99 |  * forthcoming).
100 |  * - Transform stages should accept the output of the prior stage (or output.value() in the case of
101 |  * OpResult or std::optional), and should return either a value or an OpResult or std::optional
102 |  * value if the Transform is capable of filtering results. Invalid/nullopt OpResult or std::optional
103 |  * values indicate that the value should be filtered, and not passed on to the next stage.
104 |  * - The Sink stage should accept the output of the prior stage, just as a Transform stage does, but
105 |  * does not return any value (or at least the pipeline will ignore it).
106 |  **/
107 | template <typename... Stages>
108 | void pipeline(Stages&&... sIn) {
109 |   pipeline(globalThreadPool(), std::forward<Stages>(sIn)...);
110 | }
111 | 
112 | } // namespace dispenso
113 | 


--------------------------------------------------------------------------------
/dispenso/priority.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/priority.h>
  9 | 
 10 | #if (defined(__unix__) || defined(unix)) && !defined(USG)
 11 | #include <sys/param.h>
 12 | #endif
 13 | 
 14 | #if defined(__linux__)
 15 | #include <pthread.h>
 16 | #include <sys/resource.h>
 17 | #include <unistd.h>
 18 | #elif defined(__MACH__)
 19 | #include <mach/mach_time.h>
 20 | #include <mach/thread_act.h>
 21 | #include <pthread.h>
 22 | #elif defined(_WIN32)
 23 | #include <Windows.h>
 24 | #elif defined(BSD)
 25 | #include <sys/rtprio.h>
 26 | #include <sys/types.h>
 27 | #endif
 28 | 
 29 | namespace dispenso {
 30 | 
 31 | namespace {
 32 | DISPENSO_THREAD_LOCAL ThreadPriority g_threadPriority = ThreadPriority::kNormal;
 33 | } // namespace
 34 | 
 35 | ThreadPriority getCurrentThreadPriority() {
 36 |   return g_threadPriority;
 37 | }
 38 | 
 39 | #ifdef __MACH__
 40 | bool setCurrentThreadPriority(ThreadPriority prio) {
 41 |   mach_port_t threadport = pthread_mach_thread_np(pthread_self());
 42 |   if (prio == ThreadPriority::kRealtime) {
 43 |     mach_timebase_info_data_t info;
 44 |     mach_timebase_info(&info);
 45 |     double msToAbsTime = ((double)info.denom / (double)info.numer) * 1000000.0;
 46 |     thread_time_constraint_policy_data_t time_constraints;
 47 |     time_constraints.period = 0;
 48 |     time_constraints.computation = static_cast<uint32_t>(1.0 * msToAbsTime);
 49 |     time_constraints.constraint = static_cast<uint32_t>(10.0 * msToAbsTime);
 50 |     time_constraints.preemptible = 0;
 51 | 
 52 |     if (thread_policy_set(
 53 |             threadport,
 54 |             THREAD_TIME_CONSTRAINT_POLICY,
 55 |             (thread_policy_t)&time_constraints,
 56 |             THREAD_TIME_CONSTRAINT_POLICY_COUNT) != KERN_SUCCESS) {
 57 |       return false;
 58 |     }
 59 |   }
 60 | 
 61 |   // https://fergofrog.com/code/cbowser/xnu/osfmk/kern/sched.h.html#_M/MAXPRI_USER
 62 |   struct thread_precedence_policy ttcpolicy;
 63 | 
 64 |   switch (prio) {
 65 |     case ThreadPriority::kLow:
 66 |       ttcpolicy.importance = 20;
 67 |       break;
 68 |     case ThreadPriority::kNormal:
 69 |       ttcpolicy.importance = 37;
 70 |       break;
 71 |     case ThreadPriority::kHigh: // fallthrough
 72 |     case ThreadPriority::kRealtime:
 73 |       ttcpolicy.importance = 63;
 74 |       break;
 75 |   }
 76 | 
 77 |   if (thread_policy_set(
 78 |           threadport,
 79 |           THREAD_PRECEDENCE_POLICY,
 80 |           (thread_policy_t)&ttcpolicy,
 81 |           THREAD_PRECEDENCE_POLICY_COUNT) != KERN_SUCCESS) {
 82 |     return false;
 83 |   }
 84 | 
 85 |   g_threadPriority = prio;
 86 |   return true;
 87 | }
 88 | #elif defined(_WIN32)
 89 | // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
 90 | bool setCurrentThreadPriority(ThreadPriority prio) {
 91 |   if (prio == ThreadPriority::kRealtime) {
 92 |     if (!SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)) {
 93 |       return false;
 94 |     }
 95 |   }
 96 | 
 97 |   if (prio == ThreadPriority::kHigh) {
 98 |     // Best effort
 99 |     SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);
100 |   }
101 | 
102 |   bool success = false;
103 |   switch (prio) {
104 |     case ThreadPriority::kLow:
105 |       success = SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_LOWEST);
106 |       break;
107 |     case ThreadPriority::kNormal:
108 |       success = SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_NORMAL);
109 |       break;
110 |     case ThreadPriority::kHigh: // fallthrough
111 |     case ThreadPriority::kRealtime:
112 |       success = SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
113 |       break;
114 |   }
115 | 
116 |   if (!success) {
117 |     return false;
118 |   }
119 | 
120 |   g_threadPriority = prio;
121 |   return true;
122 | }
123 | #elif defined(__linux__)
124 | bool setCurrentThreadPriority(ThreadPriority prio) {
125 |   if (prio == ThreadPriority::kRealtime) {
126 |     struct sched_param param;
127 |     param.sched_priority = 99;
128 |     if (pthread_setschedparam(pthread_self(), SCHED_FIFO, &param)) {
129 |       return false;
130 |     }
131 |   }
132 | 
133 |   switch (prio) {
134 |     case ThreadPriority::kLow:
135 |       errno = 0;
136 |       (void)!nice(10);
137 |       break;
138 |     case ThreadPriority::kNormal:
139 |       errno = 0;
140 |       (void)!nice(0);
141 |       break;
142 |     case ThreadPriority::kHigh: // fallthrough
143 |     case ThreadPriority::kRealtime: {
144 |       struct rlimit rlim;
145 |       getrlimit(RLIMIT_NICE, &rlim);
146 |       if (rlim.rlim_max <= 20) {
147 |         return false;
148 |       }
149 |       rlim.rlim_cur = rlim.rlim_max;
150 |       setrlimit(RLIMIT_NICE, &rlim);
151 |       errno = 0;
152 |       (void)!nice(static_cast<int>(20 - rlim.rlim_max));
153 |     }
154 |   }
155 |   if (errno != 0) {
156 |     return false;
157 |   }
158 |   g_threadPriority = prio;
159 |   return true;
160 | }
161 | #elif defined(__FreeBSD__)
162 | // TODO: Find someone who has a FreeBSD system to test this code.
163 | bool setCurrentThreadPriority(ThreadPriority prio) {
164 |   struct rtprio rtp;
165 | 
166 |   if (prio == ThreadPriority::kRealtime) {
167 |     rtp.type = RTP_PRIO_REALTIME;
168 |     rtp.prio = 10;
169 |     if (rtprio_thread(RTP_SET, 0, &rtp)) {
170 |       return false;
171 |     }
172 |   } else {
173 |     rtp.type = RTP_PRIO_NORMAL;
174 |     switch (prio) {
175 |       case ThreadPriority::kLow:
176 |         rtp.prio = 31;
177 |         break;
178 |       case ThreadPriority::kNormal:
179 |         rtp.prio = 15;
180 |         break;
181 |       case ThreadPriority::kHigh: // fallthrough
182 |       case ThreadPriority::kRealtime:
183 |         rtp.prio = 0;
184 |         break;
185 |     }
186 |     if (rtprio_thread(RTP_SET, 0, &rtp)) {
187 |       return false;
188 |     }
189 |   }
190 |   g_threadPriority = prio;
191 |   return true;
192 | }
193 | #else
194 | bool setCurrentThreadPriority(ThreadPriority prio) {
195 |   return false;
196 | }
197 | 
198 | #endif // platform
199 | 
200 | } // namespace dispenso
201 | 


--------------------------------------------------------------------------------
/benchmarks/trivial_compute_benchmark.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <cmath>
  9 | #include <future>
 10 | 
 11 | #include <dispenso/parallel_for.h>
 12 | 
 13 | #if defined(_OPENMP)
 14 | #include <omp.h>
 15 | #endif
 16 | 
 17 | #if !defined(BENCHMARK_WITHOUT_TBB)
 18 | #include "tbb/blocked_range.h"
 19 | #include "tbb/parallel_reduce.h"
 20 | #include "tbb/task_scheduler_init.h"
 21 | #endif // !BENCHMARK_WITHOUT_TBB
 22 | 
 23 | #include "thread_benchmark_common.h"
 24 | 
 25 | static constexpr int kSmallSize = 100;
 26 | static constexpr int kMediumSize = 1000000;
 27 | static constexpr int kLargeSize = 100000000;
 28 | 
 29 | uint32_t getInputs(int num_elements) {
 30 |   srand(num_elements);
 31 |   return rand() & 127;
 32 | }
 33 | 
 34 | inline uint64_t calculate(uint64_t input, uint64_t index, size_t foo) {
 35 |   return std::cos(
 36 |       std::log(
 37 |           std::sin(std::exp(std::sqrt(static_cast<double>((input ^ index) - 3 * foo * input))))));
 38 | }
 39 | 
 40 | void checkResults(uint32_t input, uint64_t actual, int foo, size_t num_elements) {
 41 |   if (!foo)
 42 |     return;
 43 |   if (input != getInputs(num_elements)) {
 44 |     std::cerr << "Failed to recover input!" << std::endl;
 45 |     abort();
 46 |   }
 47 |   uint64_t expected = 0;
 48 |   for (size_t i = 0; i < num_elements; ++i) {
 49 |     expected += calculate(input, i, foo);
 50 |   }
 51 |   if (expected != actual) {
 52 |     std::cerr << "FAIL! " << expected << " vs " << actual << std::endl;
 53 |     abort();
 54 |   }
 55 | }
 56 | 
 57 | template <int num_elements>
 58 | void BM_serial(benchmark::State& state) {
 59 |   auto input = getInputs(num_elements);
 60 |   uint64_t sum = 0;
 61 |   int foo = 0;
 62 |   for (auto UNUSED_VAR : state) {
 63 |     sum = 0;
 64 |     ++foo;
 65 |     for (size_t i = 0; i < num_elements; ++i) {
 66 |       sum += calculate(input, i, foo);
 67 |     }
 68 |   }
 69 |   checkResults(input, sum, foo, num_elements);
 70 | }
 71 | 
 72 | void BM_dispenso(benchmark::State& state) {
 73 |   const int num_threads = state.range(0) - 1;
 74 |   const int num_elements = state.range(1);
 75 | 
 76 |   dispenso::ThreadPool pool(num_threads);
 77 | 
 78 |   uint64_t sum = 0;
 79 |   int foo = 0;
 80 | 
 81 |   dispenso::ParForOptions options;
 82 |   options.minItemsPerChunk = 4000;
 83 | 
 84 |   auto input = getInputs(num_elements);
 85 |   for (auto UNUSED_VAR : state) {
 86 |     dispenso::TaskSet tasks(pool);
 87 | 
 88 |     std::vector<uint64_t> sums;
 89 |     sums.reserve(num_threads + 1);
 90 |     ++foo;
 91 |     dispenso::parallel_for(
 92 |         tasks,
 93 |         sums,
 94 |         []() { return uint64_t{0}; },
 95 |         dispenso::makeChunkedRange(0, num_elements, dispenso::ParForChunking::kStatic),
 96 |         [input, foo](uint64_t& lsumStore, size_t i, size_t end) {
 97 |           uint64_t lsum = 0;
 98 |           for (; i != end; ++i) {
 99 |             lsum += calculate(input, i, foo);
100 |           }
101 |           lsumStore += lsum;
102 |         },
103 |         options);
104 |     sum = 0;
105 |     for (auto s : sums) {
106 |       sum += s;
107 |     }
108 |   }
109 | 
110 |   checkResults(input, sum, foo, num_elements);
111 | }
112 | 
113 | #if defined(_OPENMP)
114 | void BM_omp(benchmark::State& state) {
115 |   const int num_threads = state.range(0);
116 |   const int num_elements = state.range(1);
117 | 
118 |   omp_set_num_threads(num_threads);
119 | 
120 |   uint64_t sum = 0;
121 | 
122 |   int foo = 0;
123 | 
124 |   auto input = getInputs(num_elements);
125 |   for (auto UNUSED_VAR : state) {
126 |     sum = 0;
127 |     ++foo;
128 | #pragma omp parallel for reduction(+ : sum)
129 |     for (int i = 0; i < num_elements; ++i) {
130 |       sum += calculate(input, i, foo);
131 |     }
132 |   }
133 |   checkResults(input, sum, foo, num_elements);
134 | }
135 | #endif /* defined(_OPENMP)*/
136 | 
137 | #if !defined(BENCHMARK_WITHOUT_TBB)
138 | void BM_tbb(benchmark::State& state) {
139 |   const int num_threads = state.range(0);
140 |   const int num_elements = state.range(1);
141 | 
142 |   uint64_t sum = 0;
143 | 
144 |   int foo = 0;
145 | 
146 |   auto input = getInputs(num_elements);
147 |   for (auto UNUSED_VAR : state) {
148 |     tbb::task_scheduler_init initsched(num_threads);
149 |     ++foo;
150 |     sum = tbb::parallel_reduce(
151 |         tbb::blocked_range<size_t>(0, num_elements),
152 |         uint64_t{0},
153 |         [input, foo](const tbb::blocked_range<size_t>& r, uint64_t init) -> uint64_t {
154 |           for (size_t a = r.begin(); a != r.end(); ++a)
155 |             init += calculate(input, a, foo);
156 |           return init;
157 |         },
158 |         [](uint64_t x, uint64_t y) -> uint64_t { return x + y; });
159 |   }
160 |   checkResults(input, sum, foo, num_elements);
161 | }
162 | #endif // !BENCHMARK_WITHOUT_TBB
163 | 
164 | void BM_async(benchmark::State& state) {
165 |   const int num_threads = state.range(0);
166 |   const int num_elements = state.range(1);
167 |   uint64_t sum = 0;
168 |   int foo = 0;
169 | 
170 |   auto input = getInputs(num_elements);
171 |   for (auto UNUSED_VAR : state) {
172 |     std::vector<uint64_t> sums;
173 |     ++foo;
174 | 
175 |     size_t chunkSize = (num_elements + num_threads - 1) / num_threads;
176 | 
177 |     std::vector<std::future<uint64_t>> futures;
178 | 
179 |     for (int i = 0; i < num_elements; i += chunkSize) {
180 |       futures.push_back(
181 |           std::async([input, foo, i, end = std::min<int>(num_elements, i + chunkSize)]() mutable {
182 |             uint64_t lsum = 0;
183 |             for (; i != end; ++i) {
184 |               lsum += calculate(input, i, foo);
185 |             }
186 |             return lsum;
187 |           }));
188 |     }
189 |     sum = 0;
190 |     for (auto& s : futures) {
191 |       sum += s.get();
192 |     }
193 |   }
194 | 
195 |   checkResults(input, sum, foo, num_elements);
196 | }
197 | 
198 | static void CustomArguments(benchmark::internal::Benchmark* b) {
199 |   for (int j : {kSmallSize, kMediumSize, kLargeSize}) {
200 |     for (int i : pow2HalfStepThreads()) {
201 |       b->Args({i, j});
202 |     }
203 |   }
204 | }
205 | 
206 | BENCHMARK_TEMPLATE(BM_serial, kSmallSize);
207 | BENCHMARK_TEMPLATE(BM_serial, kMediumSize);
208 | BENCHMARK_TEMPLATE(BM_serial, kLargeSize);
209 | 
210 | #if defined(_OPENMP)
211 | BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime();
212 | #endif // OPENMP
213 | #if !defined(BENCHMARK_WITHOUT_TBB)
214 | BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime();
215 | #endif // !BENCHMARK_WITHOUT_TBB
216 | BENCHMARK(BM_async)->Apply(CustomArguments)->UseRealTime();
217 | BENCHMARK(BM_dispenso)->Apply(CustomArguments)->UseRealTime();
218 | 
219 | BENCHMARK_MAIN();
220 | 


--------------------------------------------------------------------------------
/dispenso/graph.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) Meta Platforms, Inc. and affiliates.
  3 |  *
  4 |  * This source code is licensed under the MIT license found in the
  5 |  * LICENSE file in the root directory of this source tree.
  6 |  */
  7 | 
  8 | #include <dispenso/graph.h>
  9 | 
 10 | #include <iterator>
 11 | #include <mutex>
 12 | 
 13 | namespace {
 14 | constexpr size_t kToDelete = std::numeric_limits<size_t>::max();
 15 | 
 16 | void set_union(
 17 |     std::vector<const dispenso::BiPropNode*>& s1,
 18 |     const std::vector<const dispenso::BiPropNode*>& s2) {
 19 |   std::vector<const dispenso::BiPropNode*> tmp(s1);
 20 |   s1.clear();
 21 |   std::set_union(tmp.cbegin(), tmp.cend(), s2.cbegin(), s2.cend(), std::back_inserter(s1));
 22 | }
 23 | 
 24 | void set_insert(std::vector<const dispenso::BiPropNode*>& s, const dispenso::BiPropNode* node) {
 25 |   auto it = std::upper_bound(s.begin(), s.end(), node);
 26 |   if (it == s.begin() || *(it - 1) != node) {
 27 |     s.insert(it, node);
 28 |   }
 29 | }
 30 | } // anonymous namespace
 31 | 
 32 | namespace dispenso {
 33 | 
 34 | void BiPropNode::biPropDependsOnOneNode(BiPropNode& node) {
 35 |   Node::dependsOnOneNode(node);
 36 |   if (node.biPropSet_ == nullptr && biPropSet_ == nullptr) {
 37 |     biPropSet_ = std::make_shared<std::vector<const BiPropNode*>>();
 38 |     set_insert(*biPropSet_, this);
 39 |     set_insert(*biPropSet_, &node);
 40 |     node.biPropSet_ = biPropSet_;
 41 |   } else if (node.biPropSet_ != nullptr && biPropSet_ != nullptr) {
 42 |     set_union(*biPropSet_, *node.biPropSet_);
 43 |     node.biPropSet_ = biPropSet_;
 44 |   } else if (biPropSet_ == nullptr) {
 45 |     biPropSet_ = node.biPropSet_;
 46 |     set_insert(*biPropSet_, this);
 47 |   } else {
 48 |     node.biPropSet_ = biPropSet_;
 49 |     set_insert(*biPropSet_, &node);
 50 |   }
 51 | }
 52 | 
 53 | template <class N>
 54 | void SubgraphT<N>::clear() {
 55 |   decrementDependentCounters();
 56 |   const size_t numGraphPredecessors = markNodesWithPredicessors();
 57 |   if (numGraphPredecessors != 0) {
 58 |     removePredecessorDependencies(numGraphPredecessors);
 59 |   }
 60 |   destroyNodes();
 61 | }
 62 | 
 63 | template <class N>
 64 | void SubgraphT<N>::destroyNodes() {
 65 |   for (NodeType* n : nodes_) {
 66 |     n->~NodeType();
 67 |   }
 68 |   allocator_->clear();
 69 |   nodes_.clear();
 70 | }
 71 | 
 72 | template <class N>
 73 | SubgraphT<N>::~SubgraphT() {
 74 |   for (NodeType* n : nodes_) {
 75 |     n->~NodeType();
 76 |   }
 77 | }
 78 | 
 79 | template <class N>
 80 | void SubgraphT<N>::decrementDependentCounters() {
 81 |   for (N* node : nodes_) {
 82 |     for (Node* const dependent : node->dependents_) {
 83 |       dependent->numPredecessors_--;
 84 |     }
 85 |     removeNodeFromBiPropSet(node);
 86 |   }
 87 | }
 88 | 
 89 | template <class N>
 90 | size_t SubgraphT<N>::markNodesWithPredicessors() {
 91 |   size_t numGraphPredecessors = 0;
 92 |   for (N* node : nodes_) {
 93 |     if (node->numPredecessors_ != 0) {
 94 |       numGraphPredecessors += node->numPredecessors_;
 95 |       node->numPredecessors_ = kToDelete;
 96 |     }
 97 |   }
 98 |   return numGraphPredecessors;
 99 | }
100 | 
101 | template <class N>
102 | void SubgraphT<N>::removePredecessorDependencies(size_t numGraphPredecessors) {
103 |   for (SubgraphT<N>& subgraph : graph_->subgraphs_) {
104 |     if (&subgraph == this) {
105 |       continue;
106 |     }
107 |     for (N* node : subgraph.nodes_) {
108 |       std::vector<Node*>& dependents = node->dependents_;
109 |       size_t num = dependents.size();
110 |       for (size_t i = 0; i < num;) {
111 |         if (dependents[i]->numPredecessors_ == kToDelete) {
112 |           dependents[i] = dependents[num - 1];
113 |           --num;
114 |           if (--numGraphPredecessors == 0) {
115 |             dependents.resize(num);
116 |             return;
117 |           }
118 |         } else {
119 |           i++;
120 |         }
121 |       }
122 |       dependents.resize(num);
123 |     }
124 |   }
125 | }
126 | 
127 | namespace {
128 | constexpr size_t kMaxCache = 8;
129 | // Don't cache too-large allocators.  This way we will have at most 8*(2**16) = 512K outstanding
130 | // nodes worth of memory per node type.
131 | // TODO(bbudge): Make these caching values macro configurable for lightweight platforms.
132 | constexpr size_t kMaxChunkCapacity = 1 << 16;
133 | 
134 | using AlignedNodePoolPtr =
135 |     std::unique_ptr<NoLockPoolAllocator, detail::AlignedFreeDeleter<NoLockPoolAllocator>>;
136 | 
137 | std::vector<AlignedNodePoolPtr> g_sgcache[2];
138 | std::mutex g_sgcacheMtx;
139 | 
140 | template <class T>
141 | constexpr size_t kCacheIndex = size_t{std::is_same<T, BiPropNode>::value};
142 | 
143 | } // namespace
144 | 
145 | template <class N>
146 | typename SubgraphT<N>::PoolPtr SubgraphT<N>::getAllocator() {
147 |   AlignedNodePoolPtr ptr;
148 | 
149 |   auto& cache = g_sgcache[kCacheIndex<N>];
150 | 
151 |   {
152 |     std::lock_guard<std::mutex> lk(g_sgcacheMtx);
153 |     if (cache.empty()) {
154 |       void* alloc =
155 |           detail::alignedMalloc(sizeof(NoLockPoolAllocator), alignof(NoLockPoolAllocator));
156 |       auto* pool = new (alloc)
157 |           NoLockPoolAllocator(sizeof(NodeType), 128 * sizeof(NodeType), ::malloc, ::free);
158 |       ptr.reset(pool);
159 |     } else {
160 |       ptr = std::move(cache.back());
161 |       ptr->clear();
162 |       cache.pop_back();
163 |     }
164 |   }
165 |   return PoolPtr(ptr.release(), releaseAllocator);
166 | }
167 | 
168 | template <class N>
169 | void SubgraphT<N>::releaseAllocator(NoLockPoolAllocator* ptr) {
170 |   if (!ptr) {
171 |     return;
172 |   }
173 |   if (ptr->totalChunkCapacity() < kMaxChunkCapacity) {
174 |     auto& cache = g_sgcache[kCacheIndex<N>];
175 |     {
176 |       std::lock_guard<std::mutex> lk(g_sgcacheMtx);
177 |       if (cache.size() < kMaxCache) {
178 |         cache.emplace_back(ptr);
179 |         return;
180 |       }
181 |     }
182 |   }
183 |   detail::AlignedFreeDeleter<NoLockPoolAllocator>()(ptr);
184 | }
185 | 
186 | template <class N>
187 | GraphT<N>::GraphT(GraphT<N>&& other) : subgraphs_(std::move(other.subgraphs_)) {
188 |   for (SubgraphT<N>& subgraph : subgraphs_) {
189 |     subgraph.graph_ = this;
190 |   }
191 | }
192 | 
193 | template <class N>
194 | GraphT<N>& GraphT<N>::operator=(GraphT&& other) noexcept {
195 |   subgraphs_ = std::move(other.subgraphs_);
196 |   for (SubgraphT<N>& subgraph : subgraphs_) {
197 |     subgraph.graph_ = this;
198 |   }
199 |   return *this;
200 | }
201 | 
202 | template <class N>
203 | SubgraphT<N>& GraphT<N>::addSubgraph() {
204 |   subgraphs_.push_back(SubgraphType(this));
205 |   return subgraphs_.back();
206 | }
207 | 
208 | template class DISPENSO_DLL_ACCESS SubgraphT<Node>;
209 | template class DISPENSO_DLL_ACCESS SubgraphT<BiPropNode>;
210 | template class DISPENSO_DLL_ACCESS GraphT<Node>;
211 | template class DISPENSO_DLL_ACCESS GraphT<BiPropNode>;
212 | } // namespace dispenso
213 | 


--------------------------------------------------------------------------------