├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── feature_request.yml │ └── bug_report.yml ├── workflows │ ├── docs.yml │ └── build.yml └── PULL_REQUEST_TEMPLATE.md ├── docs └── benchmarks │ ├── par_tree_build.png │ ├── concurrent_vector.png │ ├── nested_for_large.png │ ├── nested_for_medium.png │ ├── nested_for_small.png │ ├── pipelines_64thread.png │ └── pipelines_256thread.png ├── dispenso ├── detail │ ├── quanta.h │ ├── per_thread_info.cpp │ ├── can_invoke.h │ ├── result_of.h │ ├── quanta.cpp │ ├── notifier_common.h │ ├── math.h │ ├── once_callable_impl.h │ ├── per_thread_info.h │ ├── timed_task_impl.h │ ├── op_result.h │ ├── graph_executor_impl.h │ ├── rw_lock_impl.h │ ├── task_set_impl.h │ └── concurrent_vector_impl2.h ├── timing.h ├── thread_id.cpp ├── thread_id.h ├── priority.h ├── third-party │ └── moodycamel │ │ ├── README.txt │ │ └── LICENSE.md ├── tsan_annotations.cpp ├── latch.h ├── completion_event.h ├── graph_executor.h ├── pool_allocator.h ├── utils │ └── graph_dot.h ├── schedulable.h ├── tsan_annotations.h ├── once_function.h ├── timed_task.cpp ├── pool_allocator.cpp ├── small_buffer_allocator.cpp ├── rw_lock.h ├── async_request.h ├── CMakeLists.txt ├── resource_pool.h ├── small_buffer_allocator.h ├── timing.cpp ├── task_set.cpp ├── pipeline.h ├── priority.cpp └── graph.cpp ├── cmake └── DispensoConfig.cmake.in ├── tests ├── forward_shared_pool.cpp ├── concurrent_vector_a_test.cpp ├── concurrent_vector_b_test.cpp ├── concurrent_vector_default_test.cpp ├── shared_pool_test.cpp ├── test_tid.h ├── concurrent_vector_test_common_types.h ├── async_request_test.cpp ├── thread_id_test.cpp ├── resource_pool_test.cpp ├── CMakeLists.txt ├── rw_lock_test.cpp ├── latch_test.cpp ├── concurrent_object_arena_test.cpp ├── completion_event_test.cpp ├── pool_allocator_test.cpp ├── priority_test.cpp └── once_function_test.cpp ├── benchmarks ├── benchmark_common.h ├── small_buffer_benchmark.cpp ├── CMakeLists.txt ├── thread_benchmark_common.h ├── rw_lock_benchmark.cpp ├── once_function_benchmark.cpp ├── for_latency_benchmark.cpp └── trivial_compute_benchmark.cpp ├── .gitignore ├── LICENSE ├── CMakeLists.txt ├── CONTRIBUTING.md ├── .clang-format └── CODE_OF_CONDUCT.md /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | -------------------------------------------------------------------------------- /docs/benchmarks/par_tree_build.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/par_tree_build.png -------------------------------------------------------------------------------- /docs/benchmarks/concurrent_vector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/concurrent_vector.png -------------------------------------------------------------------------------- /docs/benchmarks/nested_for_large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/nested_for_large.png -------------------------------------------------------------------------------- /docs/benchmarks/nested_for_medium.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/nested_for_medium.png -------------------------------------------------------------------------------- /docs/benchmarks/nested_for_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/nested_for_small.png -------------------------------------------------------------------------------- /docs/benchmarks/pipelines_64thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/pipelines_64thread.png -------------------------------------------------------------------------------- /docs/benchmarks/pipelines_256thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookincubator/dispenso/HEAD/docs/benchmarks/pipelines_256thread.png -------------------------------------------------------------------------------- /dispenso/detail/quanta.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | 10 | namespace dispenso { 11 | namespace detail { 12 | void registerFineSchedulerQuanta(); 13 | } // namespace detail 14 | } // namespace dispenso 15 | -------------------------------------------------------------------------------- /cmake/DispensoConfig.cmake.in: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. 3 | # 4 | # This source code is licensed under the MIT license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | # 7 | 8 | @PACKAGE_INIT@ 9 | 10 | include(CMakeFindDependencyMacro) 11 | 12 | find_dependency(Threads) 13 | 14 | include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@_Exports.cmake") 15 | 16 | check_required_components("@PROJECT_NAME@") 17 | -------------------------------------------------------------------------------- /dispenso/timing.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file Utilities for getting the current time. 10 | **/ 11 | 12 | #pragma once 13 | 14 | #include 15 | 16 | namespace dispenso { 17 | 18 | DISPENSO_DLL_ACCESS double getTime(); 19 | 20 | } // namespace dispenso 21 | -------------------------------------------------------------------------------- /tests/forward_shared_pool.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #ifdef _WIN32 11 | __declspec(dllexport) 12 | #else 13 | __attribute__((visibility("default"))) 14 | #endif 15 | void* DISPENSO_EXPORT_NAME() { 16 | return &dispenso::globalThreadPool(); 17 | } 18 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Docs 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | 13 | - name: Doxygen Action 14 | uses: mattnotmitt/doxygen-action@v1 15 | with: 16 | working-directory: "docs/" 17 | doxyfile-path: "./Doxyfile" 18 | 19 | 20 | - name: Deploy 21 | uses: peaceiris/actions-gh-pages@v3 22 | with: 23 | github_token: ${{ secrets.GITHUB_TOKEN }} 24 | publish_dir: ./docs/doxygen/html 25 | -------------------------------------------------------------------------------- /dispenso/detail/per_thread_info.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | namespace dispenso { 11 | namespace detail { 12 | 13 | namespace { 14 | DISPENSO_THREAD_LOCAL PerThreadInfo g_perThreadInfo; 15 | } 16 | PerThreadInfo& PerPoolPerThreadInfo::info() { 17 | return g_perThreadInfo; 18 | } 19 | 20 | } // namespace detail 21 | } // namespace dispenso 22 | -------------------------------------------------------------------------------- /benchmarks/benchmark_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | #if defined(__GNUC__) || defined(__clang__) 13 | #define UNUSED_VAR myLocalForLoopVar __attribute__((unused)) 14 | #elif defined(_MSC_VER) 15 | #define UNUSED_VAR myLocalForLoopVar __pragma(warning(suppress : 4100)) 16 | #else 17 | #define UNUSED_VAR myLocalForLoopVar 18 | #endif 19 | -------------------------------------------------------------------------------- /tests/concurrent_vector_a_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include "concurrent_vector_test_common_types.h" 9 | 10 | using TestTraitsTypes = ::testing::Types; 11 | DISPENSO_DISABLE_WARNING_PUSH 12 | DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS 13 | TYPED_TEST_SUITE(ConcurrentVectorTest, TestTraitsTypes); 14 | DISPENSO_DISABLE_WARNING_POP 15 | 16 | #include "concurrent_vector_test_common.h" 17 | -------------------------------------------------------------------------------- /tests/concurrent_vector_b_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include "concurrent_vector_test_common_types.h" 9 | 10 | using TestTraitsTypes = ::testing::Types; 11 | DISPENSO_DISABLE_WARNING_PUSH 12 | DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS 13 | TYPED_TEST_SUITE(ConcurrentVectorTest, TestTraitsTypes); 14 | DISPENSO_DISABLE_WARNING_POP 15 | 16 | #include "concurrent_vector_test_common.h" 17 | -------------------------------------------------------------------------------- /tests/concurrent_vector_default_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include "concurrent_vector_test_common_types.h" 9 | 10 | using TestTraitsTypes = ::testing::Types; 11 | DISPENSO_DISABLE_WARNING_PUSH 12 | DISPENSO_DISABLE_WARNING_ZERO_VARIADIC_MACRO_ARGUMENTS 13 | TYPED_TEST_SUITE(ConcurrentVectorTest, TestTraitsTypes); 14 | DISPENSO_DISABLE_WARNING_POP 15 | 16 | #include "concurrent_vector_test_common.h" 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | *.a 10 | bin 11 | lib 12 | 13 | # Packages # 14 | ############ 15 | # it's better to unpack these files and commit the raw source 16 | # git has its own built in compression methods 17 | *.7z 18 | *.dmg 19 | *.gz 20 | *.iso 21 | *.jar 22 | *.rar 23 | *.tar 24 | *.zip 25 | /.project 26 | 27 | # generated cmake files # 28 | ######################### 29 | *CMakeCache.txt 30 | *.log 31 | *.make 32 | *.cmake 33 | CMakeFiles 34 | Makefile 35 | *Dir 36 | 37 | /build/* 38 | docs/doxygen/ 39 | 40 | # Clang # 41 | ######### 42 | .cache/ 43 | -------------------------------------------------------------------------------- /tests/shared_pool_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #ifdef _WIN32 12 | __declspec(dllimport) void* sharedPoolA(); 13 | __declspec(dllimport) void* sharedPoolB(); 14 | #else 15 | __attribute__((visibility("default"))) void* sharedPoolA(); 16 | __attribute__((visibility("default"))) void* sharedPoolB(); 17 | #endif 18 | 19 | TEST(ThreadPool, SharedPool) { 20 | EXPECT_EQ(sharedPoolA(), sharedPoolB()); 21 | } 22 | -------------------------------------------------------------------------------- /dispenso/detail/can_invoke.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | namespace dispenso { 14 | namespace detail { 15 | 16 | template 17 | using void_t = void; 18 | template 19 | struct CanInvoke : std::false_type {}; 20 | template 21 | struct CanInvoke()(std::declval()...))>> 22 | : std::true_type {}; 23 | 24 | } // namespace detail 25 | } // namespace dispenso 26 | -------------------------------------------------------------------------------- /dispenso/thread_id.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | namespace dispenso { 11 | 12 | std::atomic nextThread{0}; 13 | constexpr uint64_t kInvalidThread = std::numeric_limits::max(); 14 | DISPENSO_THREAD_LOCAL uint64_t currentThread = kInvalidThread; 15 | 16 | uint64_t threadId() { 17 | if (currentThread == kInvalidThread) { 18 | currentThread = nextThread.fetch_add(uint64_t{1}, std::memory_order_relaxed); 19 | } 20 | return currentThread; 21 | } 22 | 23 | } // namespace dispenso 24 | -------------------------------------------------------------------------------- /dispenso/detail/result_of.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | namespace dispenso { 13 | namespace detail { 14 | 15 | #if defined(__cpp_lib_is_invocable) && __cpp_lib_is_invocable >= 201703L 16 | template 17 | using ResultOf = typename std::invoke_result_t, std::decay_t...>; 18 | #else 19 | template 20 | using ResultOf = 21 | typename std::result_of::type(typename std::decay::type...)>::type; 22 | #endif // c++17 23 | 24 | } // namespace detail 25 | } // namespace dispenso 26 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | build-tests: 11 | name: Build on ${{ matrix.os }} 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | matrix: 15 | os: [macos-latest, ubuntu-latest, windows-latest] 16 | env: 17 | CTEST_OUTPUT_ON_FAILURE: 1 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Configuring 21 | run: | 22 | mkdir build && cd build && cmake .. -DDISPENSO_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release 23 | - name: Build 24 | working-directory: ./build 25 | run: | 26 | cmake --build . --parallel 4 --config Release 27 | - name: Running Unit Tests 28 | working-directory: ./build 29 | run: | 30 | ctest -LE flaky --build-config Release 31 | -------------------------------------------------------------------------------- /dispenso/detail/quanta.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #ifdef _WIN32 9 | #include 10 | #include 11 | #endif 12 | 13 | #include 14 | 15 | namespace dispenso { 16 | #ifdef _WIN32 17 | 18 | namespace { 19 | struct OsQuantaSetter { 20 | OsQuantaSetter() { 21 | timeBeginPeriod(1); 22 | } 23 | ~OsQuantaSetter() { 24 | timeEndPeriod(1); 25 | } 26 | }; 27 | } // namespace 28 | #else 29 | namespace { 30 | struct OsQuantaSetter {}; 31 | } // namespace 32 | 33 | #endif // _WIN32 34 | 35 | namespace detail { 36 | void registerFineSchedulerQuanta() { 37 | static OsQuantaSetter setter; 38 | (void)setter; 39 | } 40 | } // namespace detail 41 | } // namespace dispenso 42 | -------------------------------------------------------------------------------- /tests/test_tid.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | // Note that this header is intended for direct inclusion into test cpps that require the 13 | // functionality. tids are essentially independent for tranlation units (don't expect 14 | // coordinated/sane behavior if used from multiple cpps in the same binary). 15 | 16 | namespace { 17 | 18 | std::atomic g_nextTid(0); 19 | DISPENSO_THREAD_LOCAL int g_tid = -1; 20 | 21 | inline void resetTestTid() { 22 | g_tid = -1; 23 | g_nextTid.store(0); 24 | } 25 | 26 | inline int getTestTid() { 27 | if (g_tid < 0) { 28 | g_tid = g_nextTid.fetch_add(1, std::memory_order_relaxed); 29 | } 30 | return g_tid; 31 | } 32 | 33 | } // namespace 34 | -------------------------------------------------------------------------------- /dispenso/thread_id.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file Utilities for getting a unique thread identifier 10 | **/ 11 | 12 | #pragma once 13 | 14 | #include 15 | 16 | namespace dispenso { 17 | 18 | /** 19 | * Get the current thread's identifier, unique within the current process. 20 | * 21 | * @return An integer representing the current thread. 22 | * 23 | * @note Thread IDs are assumed to not be reused over the lifetime of a process, but this should 24 | * still enable processes running for thousands of years, even with very poor spawn/kill thread 25 | * patterns. 26 | * 27 | * @note If thread ID is needed for cross-process synchronization, one must fall back on 28 | * system-specific thread IDs. 29 | **/ 30 | DISPENSO_DLL_ACCESS uint64_t threadId(); 31 | 32 | } // namespace dispenso 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: File a feature request 3 | title: "[Feature Request]: " 4 | labels: ["feature", "request"] 5 | assignees: 6 | - graphicsMan 7 | body: 8 | - type: markdown 9 | attributes: 10 | value: | 11 | Thanks for taking the time to fill out this feature request! 12 | - type: input 13 | id: contact 14 | attributes: 15 | label: Contact Details 16 | description: How can we get in touch with you if we need more info? 17 | placeholder: ex. email@example.com 18 | validations: 19 | required: false 20 | - type: textarea 21 | id: whats-wanted 22 | attributes: 23 | label: What is the desired feature? 24 | description: Give some details 25 | value: "Details here" 26 | validations: 27 | required: true 28 | - type: checkboxes 29 | id: terms 30 | attributes: 31 | label: Code of Conduct 32 | description: By submitting this issue, you agree to follow our [Code of Conduct](https://example.com) 33 | options: 34 | - label: I agree to follow this project's Code of Conduct 35 | required: true 36 | -------------------------------------------------------------------------------- /tests/concurrent_vector_test_common_types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | template 18 | class ConcurrentVectorTest : public testing::Test { 19 | public: 20 | }; 21 | 22 | using dispenso::ConcurrentVectorReallocStrategy; 23 | 24 | struct TestTraitsA { 25 | static constexpr bool kPreferBuffersInline = false; 26 | static constexpr ConcurrentVectorReallocStrategy kReallocStrategy = 27 | ConcurrentVectorReallocStrategy::kHalfBufferAhead; 28 | static constexpr bool kIteratorPreferSpeed = false; 29 | }; 30 | 31 | struct TestTraitsB { 32 | static constexpr bool kPreferBuffersInline = true; 33 | static constexpr ConcurrentVectorReallocStrategy kReallocStrategy = 34 | ConcurrentVectorReallocStrategy::kFullBufferAhead; 35 | static constexpr bool kIteratorPreferSpeed = true; 36 | }; 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: File a bug report 3 | title: "[Bug]: " 4 | labels: ["bug", "triage"] 5 | assignees: 6 | - graphicsMan 7 | body: 8 | - type: markdown 9 | attributes: 10 | value: | 11 | Thanks for taking the time to fill out this bug report! 12 | - type: input 13 | id: contact 14 | attributes: 15 | label: Contact Details 16 | description: How can we get in touch with you if we need more info? 17 | placeholder: ex. email@example.com 18 | validations: 19 | required: false 20 | - type: textarea 21 | id: what-happened 22 | attributes: 23 | label: What happened? 24 | description: Also tell us, what did you expect to happen? 25 | placeholder: Tell us what you see! 26 | value: "A bug happened!" 27 | validations: 28 | required: true 29 | - type: dropdown 30 | id: version 31 | attributes: 32 | label: Version 33 | description: What version of our software are you running? 34 | options: 35 | - 1.0 (Default) 36 | - latest (Edge) 37 | validations: 38 | required: true 39 | - type: checkboxes 40 | id: terms 41 | attributes: 42 | label: Code of Conduct 43 | description: By submitting this issue, you agree to follow our [Code of Conduct](https://example.com) 44 | options: 45 | - label: I agree to follow this project's Code of Conduct 46 | required: true 47 | -------------------------------------------------------------------------------- /dispenso/detail/notifier_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | 10 | // For fallback path 11 | #include 12 | #include 13 | 14 | #if defined(__linux__) 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace dispenso { 21 | namespace detail { 22 | static int futex( 23 | int* uaddr, 24 | int futex_op, 25 | int val, 26 | const struct timespec* timeout, 27 | int* /*uaddr2*/, 28 | int val3) { 29 | return static_cast(syscall(SYS_futex, uaddr, futex_op, val, timeout, uaddr, val3)); 30 | } 31 | } // namespace detail 32 | } // namespace dispenso 33 | 34 | #elif defined(__MACH__) 35 | #include 36 | 37 | #elif defined(_WIN32) 38 | 39 | #if (defined(_M_ARM64) || defined(_M_ARM)) && !defined(_ARM_) 40 | #define _ARM_ 41 | #elif _WIN64 42 | #define _AMD64_ 43 | #elif _WIN32 44 | #define _X86_ 45 | #else 46 | #error "No valid windows platform" 47 | #endif // platform 48 | 49 | #include 50 | #include 51 | 52 | namespace dispenso { 53 | namespace detail { 54 | 55 | constexpr int kErrorTimeoutWin = 0x000005B4; 56 | constexpr unsigned long kInfiniteWin = static_cast(-1); 57 | 58 | } // namespace detail 59 | } // namespace dispenso 60 | 61 | #endif // PLATFORM 62 | -------------------------------------------------------------------------------- /dispenso/detail/math.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | #if defined(_WIN32) 13 | #include 14 | #endif //_WIN32 15 | 16 | namespace dispenso { 17 | 18 | namespace detail { 19 | 20 | constexpr uint64_t nextPow2(uint64_t v) { 21 | // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 22 | v--; 23 | v |= v >> 1; 24 | v |= v >> 2; 25 | v |= v >> 4; 26 | v |= v >> 8; 27 | v |= v >> 16; 28 | v |= v >> 32; 29 | v++; 30 | return v; 31 | } 32 | 33 | constexpr inline uint32_t log2const(uint64_t v) { 34 | constexpr uint64_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000, 0xFFFFFFFF00000000UL}; 35 | constexpr uint32_t S[] = {1, 2, 4, 8, 16, 32}; 36 | 37 | uint32_t r = 0; 38 | for (uint32_t i = 6; i--;) { 39 | if (v & b[i]) { 40 | v >>= S[i]; 41 | r |= S[i]; 42 | } 43 | } 44 | 45 | return r; 46 | } 47 | 48 | #if (defined(__GNUC__) || defined(__clang__)) 49 | inline uint32_t log2(uint64_t v) { 50 | return static_cast(63 - __builtin_clzll(v)); 51 | } 52 | #elif defined(_WIN32) 53 | inline uint32_t log2(uint64_t v) { 54 | return static_cast(63 - __lzcnt64(v)); 55 | } 56 | #else 57 | inline uint32_t log2(uint64_t v) { 58 | return log2const(v); 59 | } 60 | 61 | #endif // PLATFORM 62 | 63 | } // namespace detail 64 | } // namespace dispenso 65 | -------------------------------------------------------------------------------- /dispenso/detail/once_callable_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | namespace dispenso { 12 | namespace detail { 13 | 14 | class OnceCallable { 15 | public: 16 | virtual void run() = 0; 17 | virtual ~OnceCallable() = default; 18 | }; 19 | 20 | template 21 | class OnceCallableImpl : public OnceCallable { 22 | public: 23 | template 24 | OnceCallableImpl(G&& f) : f_(std::forward(f)) {} 25 | 26 | void run() override { 27 | f_(); 28 | // This is admittedly playing nasty games here; however, the base class is empty, and we 29 | // completely control our own polymorphic existence. No need to make the virtual base class 30 | // destructor get called (optimization). 31 | this->OnceCallableImpl::~OnceCallableImpl(); 32 | deallocSmallBuffer(this); 33 | } 34 | 35 | ~OnceCallableImpl() override = default; 36 | 37 | private: 38 | F f_; 39 | }; 40 | 41 | template 42 | inline OnceCallable* createOnceCallable(F&& f) { 43 | using FNoRef = typename std::remove_reference::type; 44 | 45 | constexpr size_t kImplSize = nextPow2(sizeof(OnceCallableImpl<16, FNoRef>)); 46 | 47 | return new (allocSmallBuffer()) 48 | OnceCallableImpl(std::forward(f)); 49 | } 50 | 51 | } // namespace detail 52 | } // namespace dispenso 53 | -------------------------------------------------------------------------------- /tests/async_request_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | 14 | TEST(AsyncRequest, SequentialAsExpected) { 15 | dispenso::AsyncRequest req; 16 | 17 | EXPECT_FALSE(req.updateRequested()); 18 | EXPECT_FALSE(req.tryEmplaceUpdate(5)); 19 | EXPECT_FALSE(req.getUpdate()); 20 | 21 | req.requestUpdate(); 22 | 23 | EXPECT_TRUE(req.updateRequested()); 24 | 25 | EXPECT_FALSE(req.getUpdate()); 26 | 27 | EXPECT_TRUE(req.tryEmplaceUpdate(0)); 28 | 29 | auto result = req.getUpdate(); 30 | EXPECT_TRUE(result); 31 | EXPECT_EQ(0, result.value()); 32 | } 33 | 34 | TEST(AsyncRequest, AsyncAsExpected) { 35 | dispenso::AsyncRequest req; 36 | std::atomic running(true); 37 | std::thread t([&req, &running]() { 38 | int next = 0; 39 | while (running.load(std::memory_order_relaxed)) { 40 | if (req.updateRequested()) { 41 | req.tryEmplaceUpdate(next++); 42 | } 43 | } 44 | }); 45 | 46 | int sum = 0; 47 | int sumExpected = 0; 48 | for (int i = 0; i < 5000; ++i) { 49 | sumExpected += i; 50 | 51 | req.requestUpdate(); 52 | while (true) { 53 | auto result = req.getUpdate(); 54 | if (result.has_value()) { 55 | sum += result.value(); 56 | break; 57 | } 58 | } 59 | } 60 | 61 | running.store(false, std::memory_order_release); 62 | t.join(); 63 | 64 | EXPECT_EQ(sum, sumExpected); 65 | } 66 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # PR Details 2 | 3 | 4 | 5 | ## Description 6 | 7 | 8 | 9 | ## Related Issue 10 | 11 | 12 | 13 | ## Motivation and Context 14 | 15 | 16 | 17 | ## Test Plan 18 | 19 | 20 | 21 | 22 | 23 | ## Types of changes 24 | 25 | 26 | 27 | - [ ] Docs change 28 | - [ ] Refactoring 29 | - [ ] Dependency upgrade 30 | - [ ] Bug fix (non-breaking change which fixes an issue) 31 | - [ ] New feature (non-breaking change which adds functionality) 32 | - [ ] Breaking change (fix or feature that would cause existing functionality to change) 33 | 34 | ## Checklist 35 | 36 | 37 | 38 | 39 | - [ ] My code follows the code style of this project. 40 | - [ ] I have run clang-format. 41 | - [ ] My change requires a change to the documentation. 42 | - [ ] I have updated the documentation accordingly. 43 | - [ ] I have read the **CONTRIBUTING** document. 44 | - [ ] I have added tests to cover my changes. 45 | - [ ] All new and existing tests passed, including in ASAN and TSAN modes (if available on your platform). 46 | -------------------------------------------------------------------------------- /dispenso/priority.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | /** 13 | * @file priority.h 14 | * 15 | * Utilities for getting and setting thread priority. This is an attempt to unify concepts for 16 | * thread priority usefully across multiple platforms. For finer control, use platform specific 17 | * functionality. 18 | * 19 | * @note When using higher-than-normal priority, use caution! Too many threads running at too high 20 | * priority can have a strong negative impact on the responsivity of the machine. Prefer to use 21 | * realtime priority only for short running tasks that need to be very responsively run. 22 | **/ 23 | 24 | namespace dispenso { 25 | 26 | /** 27 | * A thread priority setting. Enum values in increasing order of priority. 28 | **/ 29 | enum class ThreadPriority { kLow, kNormal, kHigh, kRealtime }; 30 | 31 | /** 32 | * Access the current thread priority as set by setCurrentThreadPriority. 33 | * 34 | * @return The priority of the current thread 35 | * 36 | * @note If the current thread priority has been set via a platform-specific mechanism, this may 37 | * return an incorrect value. 38 | **/ 39 | DISPENSO_DLL_ACCESS ThreadPriority getCurrentThreadPriority(); 40 | 41 | /** 42 | * Set the current thread's priority 43 | * 44 | * @param prio The priority to set to 45 | * 46 | * @return true if the priority was modified, false otherwise. 47 | **/ 48 | DISPENSO_DLL_ACCESS bool setCurrentThreadPriority(ThreadPriority prio); 49 | 50 | } // namespace dispenso 51 | -------------------------------------------------------------------------------- /dispenso/detail/per_thread_info.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | 12 | namespace dispenso { 13 | namespace detail { 14 | 15 | struct alignas(kCacheLineSize) PerThreadInfo { 16 | void* pool = nullptr; 17 | void* producer = nullptr; 18 | int parForRecursionLevel = 0; 19 | }; 20 | 21 | class ParForRecursion { 22 | public: 23 | ~ParForRecursion() { 24 | --parForRecursionLevel_; 25 | } 26 | 27 | private: 28 | ParForRecursion(int& parForRecursionLevel) : parForRecursionLevel_(parForRecursionLevel) { 29 | ++parForRecursionLevel_; 30 | } 31 | 32 | int& parForRecursionLevel_; 33 | friend class PerPoolPerThreadInfo; 34 | }; 35 | 36 | class PerPoolPerThreadInfo { 37 | public: 38 | static void registerPool(void* pool, void* producer) { 39 | auto& i = info(); 40 | i.pool = pool; 41 | i.producer = producer; 42 | } 43 | 44 | static void* producer(void* pool) { 45 | auto& i = info(); 46 | return i.pool == pool ? i.producer : nullptr; 47 | } 48 | 49 | static bool isParForRecursive(void* pool) { 50 | auto& i = info(); 51 | return (!i.pool || i.pool == pool) && i.parForRecursionLevel > 0; 52 | } 53 | 54 | static bool isPoolRecursive(void* pool) { 55 | return info().pool == pool; 56 | } 57 | 58 | static ParForRecursion parForRecurse() { 59 | return ParForRecursion(info().parForRecursionLevel); 60 | } 61 | 62 | private: 63 | DISPENSO_DLL_ACCESS static PerThreadInfo& info(); 64 | }; 65 | 66 | } // namespace detail 67 | } // namespace dispenso 68 | -------------------------------------------------------------------------------- /tests/thread_id_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | TEST(ThreadId, Repeatable) { 17 | constexpr int kRounds = 100; 18 | constexpr int kThreadsPerRound = 8; 19 | for (int round = 0; round < kRounds; ++round) { 20 | std::vector threads; 21 | for (int i = 0; i < kThreadsPerRound; ++i) { 22 | threads.emplace_back([]() { 23 | constexpr int kTrials = 1000; 24 | auto id = dispenso::threadId(); 25 | 26 | for (int i = 0; i < kTrials; ++i) { 27 | EXPECT_EQ(id, dispenso::threadId()); 28 | } 29 | }); 30 | } 31 | 32 | for (auto& t : threads) { 33 | t.join(); 34 | } 35 | } 36 | } 37 | 38 | TEST(ThreadId, Unique) { 39 | constexpr int kRounds = 1000; 40 | constexpr int kThreadsPerRound = 8; 41 | 42 | std::vector ids(kRounds * kThreadsPerRound); 43 | std::atomic slot(0); 44 | 45 | for (int round = 0; round < kRounds; ++round) { 46 | std::vector threads; 47 | for (int i = 0; i < kThreadsPerRound; ++i) { 48 | threads.emplace_back([&ids, &slot]() { 49 | ids[slot.fetch_add(1, std::memory_order_relaxed)] = dispenso::threadId(); 50 | }); 51 | } 52 | 53 | for (auto& t : threads) { 54 | t.join(); 55 | } 56 | } 57 | 58 | std::unordered_set uniquenessSet; 59 | for (uint64_t id : ids) { 60 | EXPECT_TRUE(uniquenessSet.insert(id).second); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /dispenso/detail/timed_task_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | namespace dispenso { 11 | namespace detail { 12 | 13 | enum FunctionFlags : uint32_t { kFFlagsNone = 0, kFFlagsDetached = 1, kFFlagsCancelled = 2 }; 14 | 15 | struct TimedTaskImpl { 16 | alignas(kCacheLineSize) std::atomic count{0}; 17 | std::atomic timesToRun; 18 | std::atomic flags{kFFlagsNone}; 19 | std::atomic inProgress{0}; 20 | double nextAbsTime; 21 | double period; 22 | bool steady; 23 | std::function)> func; 24 | 25 | template 26 | TimedTaskImpl(size_t times, double next, double per, F&& f, Schedulable& sched, bool stdy) 27 | : timesToRun(times), nextAbsTime(next), period(per), steady(stdy) { 28 | func = [&sched, f = std::move(f), this](std::shared_ptr me) { 29 | if (flags.load(std::memory_order_acquire) & kFFlagsCancelled) { 30 | return; 31 | } 32 | 33 | inProgress.fetch_add(1, std::memory_order_acq_rel); 34 | 35 | auto wrap = [&f, this, me = std::move(me)]() mutable { 36 | if (!(flags.load(std::memory_order_acquire) & kFFlagsCancelled)) { 37 | if (!f()) { 38 | timesToRun.store(0, std::memory_order_release); 39 | flags.fetch_or(kFFlagsCancelled, std::memory_order_acq_rel); 40 | func = {}; 41 | } 42 | count.fetch_add(1, std::memory_order_acq_rel); 43 | } 44 | 45 | inProgress.fetch_sub(1, std::memory_order_release); 46 | me.reset(); 47 | }; 48 | sched.schedule(wrap, ForceQueuingTag()); 49 | }; 50 | } 51 | }; 52 | 53 | } // namespace detail 54 | } // namespace dispenso 55 | -------------------------------------------------------------------------------- /dispenso/third-party/moodycamel/README.txt: -------------------------------------------------------------------------------- 1 | https://github.com/cameron314/concurrentqueue 2 | 3 | commit 65d6970912fc3f6bb62d80edf95ca30e0df85137 (HEAD -> master, origin/master, origin/HEAD) 4 | Merge: d49fa2b 08dcafc 5 | Author: Cameron 6 | Date: Sun Jul 24 10:02:12 2022 -0400 7 | 8 | Merge pull request #308 from r8bhavneet/master 9 | 10 | Update README.md 11 | 12 | commit 08dcafcd131b46e1a63abdc9b5f73c852193edca 13 | Author: r8bhavneet <98200254+r8bhavneet@users.noreply.github.com> 14 | Date: Sun Jul 24 02:35:57 2022 -0700 15 | 16 | Update README.md 17 | 18 | Hey, I really liked the project and was reading through the Readme.md file when I came across some redundant words and phrases which you might have missed whil 19 | e editing the documentation. It would be really a great opportunity for me if I could contribute to this project. Thank you. 20 | 21 | commit d49fa2b0bd1c6185d93509f48c8987f9759d7238 22 | Merge: 0a40449 9dc1b2c 23 | Author: Cameron 24 | Date: Mon May 9 07:43:29 2022 -0400 25 | 26 | Merge pull request #296 from MathiasMagnus/fix-c4554 27 | 28 | Proper MSVC warning fix and note 29 | 30 | commit 9dc1b2cfcad03b4ee22ea57ddb5c453c41c19ac9 31 | Author: Máté Ferenc Nagy-Egri 32 | Date: Mon May 9 13:19:39 2022 +0200 33 | 34 | Proper MSVC warning fix and note 35 | 36 | commit 0a404492ac2c0bba0f62eb2b859ec152e494f8bf 37 | Author: Cameron 38 | Date: Sat May 7 12:04:00 2022 -0400 39 | 40 | Attempt to resolve -Wsign-conversion warnings in concurrentqueue.h (see #294) 41 | 42 | commit 22c78daf65d2c8cce9399a29171676054aa98807 43 | Merge: c52e5ef 263c55d 44 | Author: Cameron 45 | Date: Sun Mar 20 15:16:30 2022 -0400 46 | 47 | Merge pull request #290 from usurai/master 48 | 49 | Fix link in README 50 | 51 | commit 263c55d5c95545abee1ef25662c752c5296d7c34 52 | Author: usurai 53 | Date: Thu Mar 17 16:09:14 2022 +0800 54 | 55 | Fix link in README 56 | -------------------------------------------------------------------------------- /dispenso/detail/op_result.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | 10 | namespace dispenso { 11 | namespace detail { 12 | 13 | template 14 | class OpResult { 15 | public: 16 | OpResult() : ptr_(nullptr) {} 17 | 18 | template 19 | OpResult(U&& u) : ptr_(new (buf_) T(std::forward(u))) {} 20 | 21 | OpResult(const OpResult& oth) : ptr_(oth ? new (buf_) T(*oth.ptr_) : nullptr) {} 22 | 23 | OpResult(OpResult&& oth) : ptr_(oth ? new (buf_) T(std::move(*oth.ptr_)) : nullptr) { 24 | oth.ptr_ = nullptr; 25 | } 26 | 27 | OpResult& operator=(const OpResult& oth) { 28 | if (&oth == this) { 29 | return *this; 30 | } 31 | if (ptr_) { 32 | ptr_->~T(); 33 | } 34 | 35 | if (oth) { 36 | ptr_ = new (buf_) T(*oth.ptr_); 37 | } else { 38 | ptr_ = nullptr; 39 | } 40 | return *this; 41 | } 42 | 43 | OpResult& operator=(OpResult&& oth) { 44 | if (&oth == this) { 45 | return *this; 46 | } 47 | if (ptr_) { 48 | ptr_->~T(); 49 | } 50 | 51 | if (oth) { 52 | ptr_ = new (buf_) T(std::move(*oth.ptr_)); 53 | oth.ptr_ = nullptr; 54 | } else { 55 | ptr_ = nullptr; 56 | } 57 | 58 | return *this; 59 | } 60 | 61 | ~OpResult() { 62 | if (ptr_) { 63 | ptr_->~T(); 64 | } 65 | } 66 | 67 | template 68 | T& emplace(Args&&... args) { 69 | if (ptr_) { 70 | ptr_->~T(); 71 | } 72 | ptr_ = new (buf_) T(std::forward(args)...); 73 | return *ptr_; 74 | } 75 | 76 | operator bool() const { 77 | return ptr_; 78 | } 79 | 80 | bool has_value() const { 81 | return ptr_; 82 | } 83 | 84 | T& value() { 85 | return *ptr_; 86 | } 87 | 88 | private: 89 | alignas(T) char buf_[sizeof(T)]; 90 | T* ptr_; 91 | }; 92 | 93 | } // namespace detail 94 | } // namespace dispenso 95 | -------------------------------------------------------------------------------- /dispenso/tsan_annotations.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #if DISPENSO_HAS_TSAN 11 | 12 | #ifdef __GNUC__ 13 | #define ATTRIBUTE_WEAK __attribute__((weak)) 14 | #else 15 | #define ATTRIBUTE_WEAK 16 | #endif 17 | 18 | // These are found in the accompanying libtsan, but there is no header exposing them. We want to 19 | // also avoid exposing them in a header to to discourage folks from calling them directly. 20 | extern "C" { 21 | void AnnotateIgnoreReadsBegin(const char* f, int l) ATTRIBUTE_WEAK; 22 | 23 | void AnnotateIgnoreReadsEnd(const char* f, int l) ATTRIBUTE_WEAK; 24 | 25 | void AnnotateIgnoreWritesBegin(const char* f, int l) ATTRIBUTE_WEAK; 26 | 27 | void AnnotateIgnoreWritesEnd(const char* f, int l) ATTRIBUTE_WEAK; 28 | 29 | void AnnotateNewMemory(const char* f, int l, const volatile void* address, long size) 30 | ATTRIBUTE_WEAK; 31 | 32 | void AnnotateHappensBefore(const char* f, int l, const volatile void* address) ATTRIBUTE_WEAK; 33 | void AnnotateHappensAfter(const char* f, int l, const volatile void* address) ATTRIBUTE_WEAK; 34 | } 35 | 36 | namespace dispenso { 37 | namespace detail { 38 | 39 | void annotateIgnoreWritesBegin(const char* f, int l) { 40 | AnnotateIgnoreWritesBegin(f, l); 41 | } 42 | void annotateIgnoreWritesEnd(const char* f, int l) { 43 | AnnotateIgnoreWritesEnd(f, l); 44 | } 45 | void annotateIgnoreReadsBegin(const char* f, int l) { 46 | AnnotateIgnoreReadsBegin(f, l); 47 | } 48 | void annotateIgnoreReadsEnd(const char* f, int l) { 49 | AnnotateIgnoreReadsEnd(f, l); 50 | } 51 | 52 | void annotateNewMemory(const char* f, int l, const volatile void* address, long size) { 53 | AnnotateNewMemory(f, l, address, size); 54 | } 55 | 56 | void annotateHappensBefore(const char* f, int l, const volatile void* address) { 57 | AnnotateHappensBefore(f, l, address); 58 | } 59 | 60 | void annotateHappensAfter(const char* f, int l, const volatile void* address) { 61 | AnnotateHappensAfter(f, l, address); 62 | } 63 | 64 | } // namespace detail 65 | } // namespace dispenso 66 | 67 | #endif // TSAN 68 | -------------------------------------------------------------------------------- /dispenso/latch.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file latch.h 10 | * A file providing a Latch barrier type, which gives a way for threads to wait until all expected 11 | * threads have reached this point. This is intended to match API and behavior of C++20 std::latch. 12 | **/ 13 | 14 | #pragma once 15 | 16 | #include 17 | 18 | #include 19 | 20 | namespace dispenso { 21 | 22 | /** 23 | * A class which can be used for barrier scenarios. See e.g. 24 | * https://en.cppreference.com/w/cpp/thread/latch 25 | **/ 26 | class Latch { 27 | public: 28 | /** 29 | * Construct a latch with expected number of threads to wait on. 30 | * 31 | * @param threadGroupCount The number of threads in the group. 32 | **/ 33 | explicit Latch(uint32_t threadGroupCount) noexcept : impl_(threadGroupCount) {} 34 | 35 | /** 36 | * Decrement the counter in a non-blocking manner. 37 | **/ 38 | void count_down(uint32_t n = 1) noexcept { 39 | if (impl_.intrusiveStatus().fetch_sub(n, std::memory_order_acq_rel) == 1) { 40 | impl_.notify(0); 41 | } 42 | } 43 | 44 | /** 45 | * See if the count has been reduced to zero, indicating all necessary threads 46 | * have synchronized. 47 | * 48 | * @note try_wait is a misnomer, as the function never blocks. We kept the name to match C++20 49 | * API. 50 | * @return true only if the internal counter has reached zero. 51 | **/ 52 | bool try_wait() const noexcept { 53 | return impl_.intrusiveStatus().load(std::memory_order_acquire) == 0; 54 | } 55 | 56 | /** 57 | * Wait for all threads to have synchronized. 58 | **/ 59 | void wait() const noexcept { 60 | impl_.wait(0); 61 | } 62 | 63 | /** 64 | * Decrement the counter and wait 65 | **/ 66 | void arrive_and_wait() noexcept { 67 | if (impl_.intrusiveStatus().fetch_sub(1, std::memory_order_acq_rel) > 1) { 68 | impl_.wait(0); 69 | } else { 70 | impl_.notify(0); 71 | } 72 | } 73 | 74 | private: 75 | detail::CompletionEventImpl impl_; 76 | }; 77 | 78 | } // namespace dispenso 79 | -------------------------------------------------------------------------------- /tests/resource_pool_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | namespace { 14 | // In real use cases, the buffer may perform some expensive initialization such as allocate a large 15 | // chunk of memory. 16 | struct Buffer { 17 | Buffer(std::atomic_int& _total_count, std::atomic_int& _num_buffers) 18 | : total_count(_total_count), num_buffers(_num_buffers), count(0) {} 19 | // On destruction, add the count 20 | ~Buffer() { 21 | total_count += count; 22 | num_buffers += 1; 23 | } 24 | std::atomic_int& total_count; 25 | std::atomic_int& num_buffers; 26 | int count; 27 | }; 28 | 29 | void BuffersTest(const int num_threads, const int num_buffers) { 30 | constexpr int kNumTasks = 100000; 31 | std::atomic_int total_count(0); 32 | std::atomic_int num_buffers_created(0); 33 | { 34 | dispenso::ResourcePool buffer_pool(num_buffers, [&total_count, &num_buffers_created]() { 35 | return Buffer(total_count, num_buffers_created); 36 | }); 37 | dispenso::ThreadPool thread_pool(num_threads); 38 | for (int i = 0; i < kNumTasks; ++i) { 39 | thread_pool.schedule([&]() { 40 | auto buffer_resource = buffer_pool.acquire(); 41 | ++buffer_resource.get().count; 42 | }); 43 | } 44 | } 45 | 46 | // The sum of all the buffers counts should be equal to the number of tasks. 47 | EXPECT_EQ(total_count, kNumTasks); 48 | EXPECT_EQ(num_buffers_created, num_buffers); 49 | } 50 | 51 | } // namespace 52 | 53 | TEST(ResourcePool, SameNumBuffersAsThreadsTest) { 54 | constexpr int kNumBuffers = 2; 55 | constexpr int kNumThreads = 2; 56 | BuffersTest(kNumBuffers, kNumThreads); 57 | } 58 | 59 | TEST(ResourcePool, FewerBuffersThanThreadsTest) { 60 | constexpr int kNumBuffers = 1; 61 | constexpr int kNumThreads = 2; 62 | BuffersTest(kNumBuffers, kNumThreads); 63 | } 64 | 65 | TEST(ResourcePool, MoreBuffersThanThreadsTest) { 66 | constexpr int kNumBuffers = 2; 67 | constexpr int kNumThreads = 1; 68 | BuffersTest(kNumBuffers, kNumThreads); 69 | } 70 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | 7 | cmake_minimum_required(VERSION 3.12) 8 | project( 9 | Dispenso 10 | VERSION 1.4.0 11 | DESCRIPTION "Dispenso is a library for working with sets of parallel tasks" 12 | LANGUAGES CXX) 13 | 14 | if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") 15 | set(DISPENSO_STANDALONE TRUE) 16 | else() 17 | set(DISPENSO_STANDALONE FALSE) 18 | endif() 19 | 20 | if (DISPENSO_STANDALONE) 21 | include(GNUInstallDirs) 22 | endif() 23 | 24 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules) 25 | 26 | # Main project setup 27 | if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) 28 | set(CMAKE_CXX_EXTENSIONS OFF) 29 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 30 | 31 | option(DISPENSO_SHARED_LIB "Build Dispenso shared library" ON) 32 | 33 | 34 | # Windows-specific 35 | if(WIN32) 36 | set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS 1) 37 | endif() 38 | endif() 39 | 40 | option(ADDRESS_SANITIZER "Use Address Sanitizer, incompatible with THREAD_SANITIZER" OFF) 41 | option(THREAD_SANITIZER "Use Thread Sanitizer, incompatible with ADDRESS_SANITIZER" OFF) 42 | 43 | if (ADDRESS_SANITIZER) 44 | add_compile_options(-fsanitize=address -fsanitize=undefined) 45 | add_link_options(-fsanitize=address -fsanitize=undefined) 46 | elseif (THREAD_SANITIZER) 47 | add_compile_options(-fsanitize=thread) 48 | add_link_options(-fsanitize=thread) 49 | endif() 50 | 51 | set(CMAKE_CXX_STANDARD 14 CACHE STRING "the C++ standard to use for this project") 52 | 53 | ########################################################### 54 | # Targets 55 | add_subdirectory(dispenso) 56 | 57 | set(DISPENSO_BUILD_TESTS OFF CACHE BOOL "Should tests be built?") 58 | set(DISPENSO_BUILD_BENCHMARKS OFF CACHE BOOL "Should benchmarks be built?") 59 | 60 | if(DISPENSO_BUILD_TESTS) 61 | enable_testing() 62 | add_subdirectory(tests) 63 | endif() 64 | 65 | if(DISPENSO_BUILD_BENCHMARKS) 66 | # Sadly any given release of folly seems to have some problem or another. Leave disabled by default. 67 | set(BENCHMARK_WITHOUT_FOLLY ON CACHE BOOL "Should folly benchmarks be disabled?") 68 | add_subdirectory(benchmarks) 69 | endif() 70 | -------------------------------------------------------------------------------- /benchmarks/small_buffer_benchmark.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include "benchmark_common.h" 11 | 12 | constexpr size_t kSmallSize = 32; 13 | constexpr size_t kMediumSize = 128; 14 | constexpr size_t kLargeSize = 256; 15 | 16 | template 17 | void run(benchmark::State& state, Alloc alloc, Free dealloc) { 18 | std::vector ptrs(state.range(0)); 19 | for (auto UNUSED_VAR : state) { 20 | for (char*& p : ptrs) { 21 | p = alloc(); 22 | } 23 | for (char* p : ptrs) { 24 | dealloc(p); 25 | } 26 | } 27 | } 28 | 29 | template 30 | void BM_newdelete(benchmark::State& state) { 31 | run(state, []() { return new char[kSize]; }, [](char* buf) { delete[] (buf); }); 32 | } 33 | 34 | template 35 | void BM_small_buffer_allocator(benchmark::State& state) { 36 | run( 37 | state, 38 | []() { return dispenso::allocSmallBuffer(); }, 39 | [](char* buf) { dispenso::deallocSmallBuffer(buf); }); 40 | } 41 | 42 | BENCHMARK_TEMPLATE(BM_newdelete, kSmallSize)->Range(1 << 13, 1 << 15); 43 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kSmallSize)->Range(1 << 13, 1 << 15); 44 | 45 | BENCHMARK_TEMPLATE(BM_newdelete, kMediumSize)->Range(1 << 13, 1 << 15); 46 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kMediumSize)->Range(1 << 13, 1 << 15); 47 | 48 | BENCHMARK_TEMPLATE(BM_newdelete, kLargeSize)->Range(1 << 13, 1 << 15); 49 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kLargeSize)->Range(1 << 13, 1 << 15); 50 | 51 | BENCHMARK_TEMPLATE(BM_newdelete, kSmallSize)->Threads(16)->Range(1 << 13, 1 << 15); 52 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kSmallSize)->Threads(16)->Range(1 << 13, 1 << 15); 53 | 54 | BENCHMARK_TEMPLATE(BM_newdelete, kMediumSize)->Threads(16)->Range(1 << 13, 1 << 15); 55 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kMediumSize)->Threads(16)->Range(1 << 13, 1 << 15); 56 | 57 | BENCHMARK_TEMPLATE(BM_newdelete, kLargeSize)->Threads(16)->Range(1 << 13, 1 << 15); 58 | BENCHMARK_TEMPLATE(BM_small_buffer_allocator, kLargeSize)->Threads(16)->Range(1 << 13, 1 << 15); 59 | 60 | BENCHMARK_MAIN(); 61 | -------------------------------------------------------------------------------- /benchmarks/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | cmake_minimum_required(VERSION 3.12) 7 | 8 | 9 | include(FetchContent) 10 | 11 | if(CMAKE_CXX_STANDARD LESS 17) 12 | message(STATUS, "Using older taskflow due to c++14 support") 13 | FetchContent_Declare( 14 | taskflow 15 | GIT_REPOSITORY https://github.com/taskflow/taskflow.git 16 | GIT_TAG v2.7.0 17 | CONFIGURE_COMMAND "" 18 | BUILD_COMMAND "" 19 | ) 20 | else() 21 | message(STATUS, "Using up-to-date taskflow") 22 | FetchContent_Declare( 23 | taskflow 24 | GIT_REPOSITORY https://github.com/taskflow/taskflow.git 25 | GIT_TAG v3.6.0 26 | CONFIGURE_COMMAND "" 27 | BUILD_COMMAND "" 28 | ) 29 | endif() 30 | FetchContent_GetProperties(taskflow) 31 | if(NOT taskflow_POPULATED) 32 | FetchContent_Populate(taskflow) 33 | endif() 34 | 35 | FetchContent_MakeAvailable(taskflow) 36 | 37 | add_library(taskflow INTERFACE) 38 | target_include_directories(taskflow INTERFACE ${taskflow_SOURCE_DIR}) 39 | 40 | find_package(benchmark REQUIRED) 41 | if (NOT WIN32) 42 | find_package(OpenMP) 43 | endif (NOT WIN32) 44 | find_package(TBB) 45 | find_package(folly) 46 | 47 | if (WIN32) 48 | set (REQUIRED_LIBS dispenso benchmark::benchmark benchmark::benchmark_main taskflow) 49 | else (WIN32) 50 | set (REQUIRED_LIBS dispenso benchmark::benchmark benchmark::benchmark_main pthread taskflow) 51 | endif (WIN32) 52 | 53 | if (TBB_FOUND) 54 | set (OPTIONAL_LIBS ${OPTIONAL_LIBS} tbb) 55 | else (TBB_FOUND) 56 | add_compile_definitions(BENCHMARK_WITHOUT_TBB) 57 | endif (TBB_FOUND) 58 | 59 | if (OpenMP_CXX_FOUND) 60 | set (OPTIONAL_LIBS ${OPTIONAL_LIBS} OpenMP::OpenMP_CXX) 61 | endif (OpenMP_CXX_FOUND) 62 | 63 | if (FOLLY_LIBRARIES AND NOT ${BENCHMARK_WITHOUT_FOLLY}) 64 | find_package(gflags) 65 | set (OPTIONAL_LIBS ${OPTIONAL_LIBS} ${FOLLY_LIBRARIES}) 66 | else (FOLLY_LIBRARIES AND NOT ${BENCHMARK_WITHOUT_FOLLY}) 67 | add_compile_definitions(BENCHMARK_WITHOUT_FOLLY) 68 | endif (FOLLY_LIBRARIES AND NOT ${BENCHMARK_WITHOUT_FOLLY}) 69 | 70 | file(GLOB BENCHMARK_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/benchmarks/*.cpp) 71 | 72 | foreach(BENCHMARK_FILE ${BENCHMARK_FILES}) 73 | set(BENCHMARK_NAME) 74 | get_filename_component(BENCHMARK_NAME ${BENCHMARK_FILE} NAME_WE) 75 | add_executable(${BENCHMARK_NAME} ${BENCHMARK_FILE}) 76 | target_link_libraries(${BENCHMARK_NAME} ${REQUIRED_LIBS} ${OPTIONAL_LIBS}) 77 | endforeach() 78 | 79 | -------------------------------------------------------------------------------- /benchmarks/thread_benchmark_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | 10 | #ifdef _POSIX_C_SOURCE 11 | #include 12 | #endif // _POSIX_C_SOURCE 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | #include "benchmark_common.h" 19 | 20 | inline std::vector pow2HalfStepThreads() { 21 | const int kRunningThreads = std::thread::hardware_concurrency(); 22 | std::vector result; 23 | result.push_back(1); 24 | for (int block = 2; block <= kRunningThreads; block *= 2) { 25 | int step = block / 2; 26 | 27 | for (int i = block; i < 2 * block && i <= kRunningThreads; i += step) { 28 | result.push_back(i); 29 | } 30 | } 31 | return result; 32 | } 33 | 34 | #if defined(_POSIX_C_SOURCE) || defined(__MACH__) 35 | struct rusage g_rusage; 36 | 37 | inline void startRusage() { 38 | std::atomic_thread_fence(std::memory_order_acquire); 39 | getrusage(RUSAGE_SELF, &g_rusage); 40 | std::atomic_thread_fence(std::memory_order_release); 41 | } 42 | 43 | inline double duration(struct timeval start, struct timeval end) { 44 | return (end.tv_sec + 1e-6 * end.tv_usec) - (start.tv_sec + 1e-6 * start.tv_usec); 45 | } 46 | 47 | inline void endRusage(benchmark::State& state) { 48 | std::atomic_thread_fence(std::memory_order_acquire); 49 | struct rusage res; 50 | getrusage(RUSAGE_SELF, &res); 51 | std::atomic_thread_fence(std::memory_order_release); 52 | 53 | double userTime = duration(g_rusage.ru_utime, res.ru_utime); 54 | double sysTime = duration(g_rusage.ru_stime, res.ru_stime); 55 | 56 | state.counters["\t0 User"] = userTime; 57 | state.counters["\t1 System"] = sysTime; 58 | } 59 | #else 60 | inline void startRusage() {} 61 | inline void endRusage(benchmark::State& state) {} 62 | #endif //_POSIX_C_SOURCE 63 | 64 | inline double getMean(const std::vector& data) { 65 | double sum = 0.0; 66 | for (auto d : data) { 67 | sum += d; 68 | } 69 | return sum / data.size(); 70 | } 71 | 72 | inline double getStddev(double mean, const std::vector& data) { 73 | double sumsq = 0.0; 74 | for (auto d : data) { 75 | auto dev = mean - d; 76 | sumsq += dev * dev; 77 | } 78 | return std::sqrt(sumsq / data.size()); 79 | } 80 | 81 | void doStats(const std::vector& times, benchmark::State& state) { 82 | double mean = getMean(times); 83 | state.counters["mean"] = mean; 84 | state.counters["stddev"] = getStddev(mean, times); 85 | } 86 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to dispenso 2 | We want to make contributing to this project as easy and transparent as 3 | possible. There is a design ethos behind the library, so it is recommended to reach out via a GitHub 4 | issue on the project to discuss non-trivial changes you may wish to make. These changes include, for 5 | example, wanting to change existing API, wanting to furnish a new utility, or wanting to change 6 | underlying behavior substantially. Let's avoid situations where you put in a lot of hard work, only 7 | to have to change it substantially or get your pull request rejected. 8 | 9 | ## Our Development Process 10 | This library has another home inside Facebook repos. From there it is subjected to regular continuous integration testing on many platforms, and used by many projects. 11 | 12 | ## Pull Requests 13 | We actively welcome your pull requests. 14 | 15 | 1. Fork the repo and create your branch from `master`. 16 | 2. If you've added code that should be tested, add tests. 17 | 3. If you've changed APIs, update the documentation. 18 | 4. Ensure the test suite passes. 19 | 5. Utilize clang-format. 20 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 21 | 22 | ## Contributor License Agreement ("CLA") 23 | In order to accept your pull request, we need you to submit a CLA. You only need 24 | to do this once to work on any of Facebook's open source projects. 25 | 26 | Complete your CLA here: 27 | 28 | ## Issues 29 | We use GitHub issues to track public bugs. Please ensure your description is 30 | clear and has sufficient instructions to be able to reproduce the issue. 31 | 32 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 33 | disclosure of security bugs. In those cases, please go through the process 34 | outlined on that page and do not file a public issue. 35 | 36 | ## Coding Style 37 | * 2 spaces for indentation rather than tabs 38 | * 100 character line length 39 | * Member variables have trailing underscore_ 40 | * BigCamelCase for classes and structs, and smallCamelCase for functions and variables (exception is if you are trying to match a substantial part of a standard library interface). 41 | * [1TBS braces](https://en.wikipedia.org/wiki/Indentation_style#Variant:_1TBS_(OTBS)) 42 | * Most of all, try to be consistent with the surrounding code. We have automated tools that will 43 | enforce clang-format style for some files (e.g. the C++ core) once we import your pull request 44 | into our internal code reviewing tools. 45 | 46 | ## License 47 | By contributing to dispenso, you agree that your contributions will be licensed 48 | under the LICENSE.md file in the root directory of this source tree. 49 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | cmake_minimum_required(VERSION 3.14) 7 | 8 | ### TODO(bbudge): Set up testing with github actions 9 | # Ideally these tests are all run in (Release, Debug) X (N/A, TSAN, ASAN, -fno-exceptions) 10 | ### 11 | 12 | include(FetchContent) 13 | FetchContent_Declare( 14 | GoogleTest 15 | GIT_REPOSITORY https://github.com/google/googletest.git 16 | GIT_TAG release-1.12.1 17 | ) 18 | 19 | # For Windows, Prevent overriding the parent project's compiler/linker settings 20 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 21 | set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) 22 | FetchContent_MakeAvailable(GoogleTest) 23 | include(GoogleTest) 24 | 25 | macro(package_add_test TEST_NAME LABEL TEST_FILE) 26 | add_executable(${TEST_NAME} ${TEST_FILE}) 27 | target_compile_features(${TEST_NAME} PRIVATE cxx_std_14) 28 | target_compile_options(${TEST_NAME} PRIVATE 29 | $<$:/W3 /WX> 30 | $<$>:-Wall -Wextra -pedantic -Wconversion -Wno-sign-conversion -Werror> 31 | ) 32 | target_link_libraries(${TEST_NAME} gmock_main dispenso) 33 | gtest_discover_tests(${TEST_NAME} 34 | WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} 35 | PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" 36 | LABELS "${LABEL}") 37 | set_target_properties(${TEST_NAME} PROPERTIES FOLDER tests) 38 | endmacro() 39 | 40 | file(GLOB TEST_FILES CONFIGURE_DEPENDS "*test.cpp") 41 | 42 | #TODO(elliotsegal, bbudge): Help add the shared_pool_test for CMake 43 | LIST(REMOVE_ITEM TEST_FILES 44 | ${CMAKE_CURRENT_SOURCE_DIR}/shared_pool_test.cpp) 45 | 46 | # Filter out these tests specifically because they are inherently flaky because they rely on OS behaviors that are not 47 | # guaranteed, and only really useful for manual runs when making changes to the related functionality. Note that 48 | # possibly an even better test for both priority and timed_task behavior is to use the timed_task_benchmark. 49 | LIST(REMOVE_ITEM TEST_FILES 50 | ${CMAKE_CURRENT_SOURCE_DIR}/priority_test.cpp 51 | ${CMAKE_CURRENT_SOURCE_DIR}/timed_task_test.cpp) 52 | 53 | foreach(TEST_FILE ${TEST_FILES}) 54 | set(TEST_NAME) 55 | get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE) 56 | package_add_test(${TEST_NAME} unittest ${TEST_FILE}) 57 | endforeach() 58 | 59 | SET(FLAKY_TEST_FILES ${CMAKE_CURRENT_SOURCE_DIR}/priority_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/timed_task_test.cpp) 60 | 61 | foreach(TEST_FILE ${FLAKY_TEST_FILES}) 62 | set(TEST_NAME) 63 | get_filename_component(TEST_NAME ${TEST_FILE} NAME_WE) 64 | package_add_test(${TEST_NAME} flaky ${TEST_FILE}) 65 | endforeach() 66 | -------------------------------------------------------------------------------- /dispenso/completion_event.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file completion_event.h 10 | * A file providing a CompletionEvent type, which gives a way to signal to waiting threads that some 11 | * event has been completed. 12 | **/ 13 | 14 | #pragma once 15 | 16 | #include 17 | 18 | #include 19 | 20 | namespace dispenso { 21 | 22 | /** 23 | * A class which can be used for one-time notify/wait scenarios. It is basically a way to signal to 24 | * any waiting threads that some event has completed. There must be a single publisher thread 25 | * and zero or more waiters on arbitrary threads. reset may be called to restart a 26 | * sequence (e.g. after notify occurs and all waiters have successfully exited 27 | * wait*). 28 | **/ 29 | class CompletionEvent { 30 | public: 31 | /** 32 | * Notify any waiting threads that the event has completed. It is safe for this to be called 33 | * before threads call wait. 34 | **/ 35 | void notify() { 36 | impl_.notify(1); 37 | } 38 | 39 | /** 40 | * Wait for another thread to notify 41 | **/ 42 | void wait() const { 43 | impl_.wait(1); 44 | } 45 | 46 | /** 47 | * Peek to see if the event has been notified in any thread 48 | **/ 49 | bool completed() const { 50 | return impl_.intrusiveStatus().load(std::memory_order_acquire); 51 | } 52 | 53 | /** 54 | * Wait for another thread to notify or for the relative timeout to expire, whichever 55 | * is first. 56 | * 57 | * @return true if status is "completed", false if timed out. 58 | **/ 59 | template 60 | bool waitFor(const std::chrono::duration& relTime) const { 61 | return impl_.waitFor(1, relTime); 62 | } 63 | 64 | /** 65 | * Wait for another thread to notify or for the absolute timeout to expire, whichever 66 | * is first. 67 | * 68 | * @return true if status is "completed", false if timed out. 69 | **/ 70 | template 71 | bool waitUntil(const std::chrono::time_point& absTime) const { 72 | return impl_.waitUntil(1, absTime); 73 | } 74 | 75 | /** 76 | * Resets the event to "not-completed". This should not be called while an active 77 | * wait*\/notify sequence is still currently in play. 78 | **/ 79 | void reset() { 80 | impl_.intrusiveStatus().store(0, std::memory_order_seq_cst); 81 | } 82 | 83 | private: 84 | detail::CompletionEventImpl impl_{0}; 85 | }; 86 | 87 | } // namespace dispenso 88 | -------------------------------------------------------------------------------- /dispenso/detail/graph_executor_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | #include 10 | #include 11 | #include 12 | 13 | namespace detail { 14 | 15 | class ExecutorBase { 16 | protected: 17 | inline static bool hasNoIncompletePredecessors(const dispenso::Node& node) { 18 | return node.numIncompletePredecessors_.load(std::memory_order_relaxed) == 0; 19 | } 20 | 21 | inline static void addIncompletePredecessor(const dispenso::Node& node) { 22 | if (node.isCompleted()) { 23 | node.numIncompletePredecessors_.store(1, std::memory_order_relaxed); 24 | } else { 25 | node.numIncompletePredecessors_.fetch_add(1, std::memory_order_relaxed); 26 | } 27 | } 28 | 29 | inline static void ifIncompleteAddIncompletePredecessor(const dispenso::Node& node) { 30 | if (!node.isCompleted()) { 31 | node.numIncompletePredecessors_.fetch_add(1, std::memory_order_relaxed); 32 | } 33 | } 34 | 35 | inline static bool decNumIncompletePredecessors( 36 | const dispenso::Node& node, 37 | std::memory_order order) { 38 | return node.numIncompletePredecessors_.fetch_sub(1, order) == 1; 39 | } 40 | 41 | inline static bool decNumIncompletePredecessors( 42 | const dispenso::BiPropNode& node, 43 | std::memory_order order) { 44 | const std::memory_order loadOrder = 45 | order == std::memory_order_relaxed ? std::memory_order_relaxed : std::memory_order_acquire; 46 | if (node.numIncompletePredecessors_.load(loadOrder) == dispenso::Node::kCompleted) { 47 | return false; 48 | } 49 | 50 | return node.numIncompletePredecessors_.fetch_sub(1, order) == 1; 51 | } 52 | 53 | template 54 | inline static void evaluateNodeConcurrently(dispenso::ConcurrentTaskSet& tasks, const N* node) { 55 | node->run(); 56 | for (const dispenso::Node* const d : node->dependents_) { 57 | if (decNumIncompletePredecessors(static_cast(*d), std::memory_order_acq_rel)) { 58 | tasks.schedule( 59 | [&tasks, d]() { evaluateNodeConcurrently(tasks, static_cast(d)); }); 60 | } 61 | } 62 | } 63 | 64 | static void appendGroup( 65 | const dispenso::Node* /* node */, 66 | std::unordered_set*>& /* groups */) {} 67 | 68 | static void appendGroup( 69 | const dispenso::BiPropNode* node, 70 | std::unordered_set*>& groups) { 71 | const std::vector* group = node->biPropSet_.get(); 72 | if (group != nullptr) { 73 | groups.insert(group); 74 | } 75 | } 76 | }; 77 | 78 | } // namespace detail 79 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | AccessModifierOffset: -1 3 | AlignAfterOpenBracket: AlwaysBreak 4 | AlignConsecutiveAssignments: false 5 | AlignConsecutiveDeclarations: false 6 | AlignEscapedNewlinesLeft: true 7 | AlignOperands: false 8 | AlignTrailingComments: false 9 | AllowAllParametersOfDeclarationOnNextLine: false 10 | AllowShortBlocksOnASingleLine: false 11 | AllowShortCaseLabelsOnASingleLine: false 12 | AllowShortFunctionsOnASingleLine: Empty 13 | AllowShortIfStatementsOnASingleLine: false 14 | AllowShortLoopsOnASingleLine: false 15 | AlwaysBreakAfterReturnType: None 16 | AlwaysBreakBeforeMultilineStrings: true 17 | AlwaysBreakTemplateDeclarations: true 18 | BinPackArguments: false 19 | BinPackParameters: false 20 | BraceWrapping: 21 | AfterClass: false 22 | AfterControlStatement: false 23 | AfterEnum: false 24 | AfterFunction: false 25 | AfterNamespace: false 26 | AfterObjCDeclaration: false 27 | AfterStruct: false 28 | AfterUnion: false 29 | BeforeCatch: false 30 | BeforeElse: false 31 | IndentBraces: false 32 | BreakBeforeBinaryOperators: None 33 | BreakBeforeBraces: Attach 34 | BreakBeforeTernaryOperators: true 35 | BreakConstructorInitializersBeforeComma: false 36 | BreakAfterJavaFieldAnnotations: false 37 | BreakStringLiterals: false 38 | ColumnLimit: 100 39 | CommentPragmas: '^ IWYU pragma:' 40 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 41 | ConstructorInitializerIndentWidth: 4 42 | ContinuationIndentWidth: 4 43 | Cpp11BracedListStyle: true 44 | DerivePointerAlignment: false 45 | DisableFormat: false 46 | ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ] 47 | IncludeCategories: 48 | - Regex: '^<.*\.h(pp)?>' 49 | Priority: 1 50 | - Regex: '^<.*' 51 | Priority: 2 52 | - Regex: '.*' 53 | Priority: 3 54 | IndentCaseLabels: true 55 | IndentWidth: 2 56 | IndentWrappedFunctionNames: false 57 | KeepEmptyLinesAtTheStartOfBlocks: false 58 | MacroBlockBegin: '' 59 | MacroBlockEnd: '' 60 | MaxEmptyLinesToKeep: 1 61 | NamespaceIndentation: None 62 | ObjCBlockIndentWidth: 2 63 | ObjCSpaceAfterProperty: false 64 | ObjCSpaceBeforeProtocolList: false 65 | PenaltyBreakBeforeFirstCallParameter: 1 66 | PenaltyBreakComment: 300 67 | PenaltyBreakFirstLessLess: 120 68 | PenaltyBreakString: 1000 69 | PenaltyExcessCharacter: 1000000 70 | PenaltyReturnTypeOnItsOwnLine: 200 71 | PointerAlignment: Left 72 | RawStringFormats: 73 | - Language: TextProto 74 | Delimiters: 75 | - pb 76 | ReflowComments: true 77 | SortIncludes: true 78 | SpaceAfterCStyleCast: false 79 | SpaceBeforeAssignmentOperators: true 80 | SpaceBeforeParens: ControlStatements 81 | SpaceInEmptyParentheses: false 82 | SpacesBeforeTrailingComments: 1 83 | SpacesInAngles: false 84 | SpacesInContainerLiterals: true 85 | SpacesInCStyleCastParentheses: false 86 | SpacesInParentheses: false 87 | SpacesInSquareBrackets: false 88 | Standard: Cpp11 89 | TabWidth: 4 90 | UseTab: Never 91 | ... 92 | -------------------------------------------------------------------------------- /dispenso/graph_executor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #pragma once 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace dispenso { 16 | /** 17 | * Class to invoke Graph or BiPropGraph on current thread. 18 | **/ 19 | class SingleThreadExecutor : public ::detail::ExecutorBase { 20 | public: 21 | /** 22 | * Invoke the graph. This is not concurrency safe. 23 | * 24 | * @param graph graph to invoke 25 | **/ 26 | template 27 | void operator()(const G& graph); 28 | 29 | private: 30 | std::vector nodesToExecute_; 31 | std::vector nodesToExecuteNext_; 32 | }; 33 | /** 34 | * Class to invoke Graph or BiPropGraph using 35 | * dispenso::parallel_for for every layer of the graph. 36 | **/ 37 | class ParallelForExecutor : public ::detail::ExecutorBase { 38 | public: 39 | /** 40 | * Invoke the graph. This is not concurrency safe. 41 | * 42 | * @param taskSet taksSet to use with parallel_for. 43 | * @param graph graph to invoke 44 | **/ 45 | template 46 | void operator()(TaskSetT& taskSet, const G& graph); 47 | 48 | private: 49 | dispenso::ConcurrentVector nodesToExecute_; 50 | dispenso::ConcurrentVector nodesToExecuteNext_; 51 | }; 52 | /** 53 | * Class to invoke Graph or BiPropGraph using 54 | * dispenso::ConcurrentTaskSet 55 | **/ 56 | class ConcurrentTaskSetExecutor : public ::detail::ExecutorBase { 57 | public: 58 | /** 59 | * Invoke the graph. This is not concurrency safe. 60 | * 61 | * @param tasks ConcurrentTaskSet to schedule tasks. 62 | * @param graph graph to invoke 63 | * @param wait if true run tasks.wait() at the end of the function 64 | **/ 65 | template 66 | void operator()(dispenso::ConcurrentTaskSet& tasks, const G& graph, bool wait = true); 67 | 68 | private: 69 | std::vector startNodes_; 70 | }; 71 | 72 | /** 73 | * Class to propagate incomplete state recursively from nodes to dependents 74 | **/ 75 | class ForwardPropagator : public ::detail::ExecutorBase { 76 | public: 77 | /** 78 | * Propagate incomplete state recursively from nodes to dependents 79 | * This is not concurrency safe. 80 | **/ 81 | template 82 | void operator()(const G& graph); 83 | 84 | private: 85 | template 86 | void propagateIncompleteStateBidirectionally(); 87 | 88 | std::vector nodesToVisit_; 89 | std::vector nodesToVisitNext_; 90 | std::unordered_set visited_; 91 | std::unordered_set*> groups_; 92 | }; 93 | } // namespace dispenso 94 | -------------------------------------------------------------------------------- /dispenso/pool_allocator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file pool_allocator.h 10 | * A pool allocator to help reduce calls to the underlying allocation and deallocation functions 11 | * that can be provided custom backing allocation and deallocation functions, e.g. cudaMalloc, 12 | * cudaFree. 13 | **/ 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | namespace dispenso { 24 | 25 | /** 26 | * A pool allocator to help reduce calls to the underlying allocation and deallocation functions. 27 | **/ 28 | template 29 | class PoolAllocatorT { 30 | public: 31 | /** 32 | * Construct a PoolAllocator. 33 | * 34 | * @param chunkSize The chunk size for each pool allocation 35 | * @param allocSize The size of underlying slabs to be chunked 36 | * @param allocFunc The underlying allocation function for allocating slabs 37 | * @param deallocFunc The underlying deallocation function. Currently only called on destruction. 38 | **/ 39 | DISPENSO_DLL_ACCESS PoolAllocatorT( 40 | size_t chunkSize, 41 | size_t allocSize, 42 | std::function allocFunc, 43 | std::function deallocFunc); 44 | 45 | /** 46 | * Allocate a chunk from a slab 47 | * 48 | * @return The pointer to a buffer of chunkSize bytes 49 | **/ 50 | DISPENSO_DLL_ACCESS char* alloc(); 51 | 52 | /** 53 | * Deallocate a previously allocated chunk 54 | * 55 | * @param ptr The chunk to return to the available pool 56 | **/ 57 | DISPENSO_DLL_ACCESS void dealloc(char* ptr); 58 | 59 | /** 60 | * Effectively dealloc all previously allocated chunks. Useful for arenas. 61 | * This function is not thread safe, and no previously allocated chunks may be dealloc'd after 62 | * clear. 63 | **/ 64 | DISPENSO_DLL_ACCESS void clear(); 65 | 66 | /** 67 | * Get the total capicity allocated in chunks (how many alloc() could be called without triggering 68 | * allocFunc() if all chunks were available) 69 | **/ 70 | size_t totalChunkCapacity() const { 71 | return (backingAllocs2_.size() + backingAllocs_.size()) * chunksPerAlloc_; 72 | } 73 | /** 74 | * Destruct a PoolAllocator 75 | **/ 76 | DISPENSO_DLL_ACCESS ~PoolAllocatorT(); 77 | 78 | private: 79 | const size_t chunkSize_; 80 | const size_t allocSize_; 81 | const size_t chunksPerAlloc_; 82 | 83 | std::function allocFunc_; 84 | std::function deallocFunc_; 85 | 86 | // Use of a spin lock was found to be faster than std::mutex in benchmarks. 87 | alignas(kCacheLineSize) std::atomic backingAllocLock_{0}; 88 | std::vector backingAllocs_; 89 | std::vector backingAllocs2_; 90 | 91 | std::vector chunks_; 92 | }; 93 | 94 | using PoolAllocator = PoolAllocatorT; 95 | using NoLockPoolAllocator = PoolAllocatorT; 96 | 97 | } // namespace dispenso 98 | -------------------------------------------------------------------------------- /dispenso/utils/graph_dot.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace detail { 14 | inline std::string getName( 15 | const void* ptr, 16 | const size_t index, 17 | const std::unordered_map* nodeNames) { 18 | const uintptr_t key = reinterpret_cast(ptr); 19 | if (nodeNames) { 20 | auto it = nodeNames->find(key); 21 | if (it != nodeNames->end()) { 22 | return it->second; 23 | } 24 | } 25 | return std::to_string(index); 26 | } 27 | } // namespace detail 28 | 29 | namespace dispenso { 30 | template 31 | void graphsToDot( 32 | const char* filename, 33 | const G& graph, 34 | const std::unordered_map* nodeNames) { 35 | using SubgraphType = typename G::SubgraphType; 36 | using NodeType = typename G::NodeType; 37 | std::ofstream datfile(filename); 38 | datfile << R"dot(digraph { 39 | rankdir = LR 40 | node [shape = rectangle, style = filled, colorscheme=pastel19] 41 | graph [style = filled, color = Gray95] 42 | 43 | subgraph cluster_l { label = "Legend"; style=solid; color=black 44 | empty1 [style = invis, shape=point] 45 | empty2 [style = invis, shape=point] 46 | incomplete [color = 1] 47 | completed [color = 2] 48 | incomplete -> empty1 [label = "normal"] 49 | completed -> empty2 [arrowhead = onormal,label = "bidirectional\lpropagation"] 50 | } 51 | )dot"; 52 | 53 | const size_t numSubgraphs = graph.numSubgraphs(); 54 | for (size_t i = 0; i < numSubgraphs; ++i) { 55 | const SubgraphType& s = graph.subgraph(i); 56 | if (i != 0) { 57 | datfile << " " << "subgraph cluster_" << i << " { label = \"" 58 | << ::detail::getName(&s, i, nodeNames) << "\"\n"; 59 | } 60 | const size_t numNodes = s.numNodes(); 61 | for (size_t j = 0; j < numNodes; ++j) { 62 | const NodeType& node = s.node(j); 63 | datfile << " " << reinterpret_cast(&node) 64 | << " [color = " << (node.isCompleted() ? 2 : 1); 65 | datfile << " label = \"" << ::detail::getName(&node, j, nodeNames) << "\"]\n"; 66 | } 67 | 68 | if (i != 0) { 69 | datfile << " }\n"; 70 | } 71 | } 72 | 73 | graph.forEachNode([&](const NodeType& node) { 74 | node.forEachDependent([&](const dispenso::Node& d) { 75 | datfile << " " << reinterpret_cast(&node) << " -> " 76 | << reinterpret_cast(&d); 77 | 78 | if (std::is_same::value) { 79 | const auto& node1 = static_cast(node); 80 | const auto& node2 = static_cast(d); 81 | datfile << (node1.isSameSet(node2) ? "[arrowhead=onormal]" : ""); 82 | } 83 | datfile << '\n'; 84 | }); 85 | }); 86 | 87 | datfile << "}"; 88 | datfile.close(); 89 | } 90 | } // namespace dispenso 91 | -------------------------------------------------------------------------------- /dispenso/schedulable.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file schedulable.h 10 | * Classes providing simple schedulables that match scheduling interfaces of *TaskSet and ThreadPool 11 | * 12 | **/ 13 | 14 | #pragma once 15 | 16 | #include 17 | #include 18 | 19 | namespace dispenso { 20 | 21 | /** 22 | * A class fullfilling the Schedulable concept that immediately invokes the functor. This can be 23 | * used in place of ThreadPool or TaskSet with Futures at 24 | * construction or through then, or it may be used in TimedTask scheduling for 25 | * short-running tasks. 26 | **/ 27 | class ImmediateInvoker { 28 | public: 29 | /** 30 | * Schedule a functor to be executed. It will be invoked immediately. 31 | * 32 | * @param f The functor to be executed. f's signature must match void(). Best 33 | * performance will come from passing lambdas, other concrete functors, or OnceFunction, but 34 | * std::function or similarly type-erased objects will also work. 35 | **/ 36 | template 37 | void schedule(F&& f) const { 38 | f(); 39 | } 40 | 41 | /** 42 | * Schedule a functor to be executed. It is a bit oxymoronical to call this function, since 43 | * ForceQueuingTag will have no effect, and it's use is discouraged. 44 | * 45 | **/ 46 | template 47 | void schedule(F&& f, ForceQueuingTag) const { 48 | f(); 49 | } 50 | }; 51 | 52 | constexpr ImmediateInvoker kImmediateInvoker; 53 | 54 | /** 55 | * A class fullfilling the Schedulable concept that always invokes on a new thread. This can be 56 | * used in place of ThreadPool or TaskSet with Futures at 57 | * construction or through then. 58 | **/ 59 | class NewThreadInvoker { 60 | public: 61 | /** 62 | * Schedule a functor to be executed on a new thread. 63 | * 64 | * @param f The functor to be executed. f's signature must match void(). Best 65 | * performance will come from passing lambdas, other concrete functors, or OnceFunction, but 66 | * std::function or similarly type-erased objects will also work. 67 | **/ 68 | template 69 | void schedule(F&& f) const { 70 | schedule(std::forward(f), ForceQueuingTag()); 71 | } 72 | /** 73 | * Schedule a functor to be executed on a new thread. 74 | * 75 | * @param f The functor to be executed. f's signature must match void(). Best 76 | * performance will come from passing lambdas, other concrete functors, or OnceFunction, but 77 | * std::function or similarly type-erased objects will also work. 78 | **/ 79 | template 80 | void schedule(F&& f, ForceQueuingTag) const { 81 | std::thread thread([f = std::move(f)]() { f(); }); 82 | thread.detach(); 83 | } 84 | 85 | private: 86 | }; 87 | 88 | constexpr NewThreadInvoker kNewThreadInvoker; 89 | 90 | } // namespace dispenso 91 | -------------------------------------------------------------------------------- /tests/rw_lock_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include 16 | 17 | using namespace std::chrono_literals; 18 | 19 | TEST(RWLock, SimpleUncontested) { 20 | dispenso::RWLock mtx; 21 | int foo = 0; 22 | std::unique_lock lk(mtx); 23 | foo = 1; 24 | 25 | lk.unlock(); 26 | 27 | EXPECT_EQ(foo, 1); 28 | } 29 | 30 | TEST(RWLock, BasicWriterTest) { 31 | int count = 0; 32 | dispenso::RWLock mtx; 33 | constexpr int kPerThreadTotal = 100000; 34 | 35 | auto toRun = [&]() { 36 | for (int i = 0; i < kPerThreadTotal; ++i) { 37 | std::unique_lock lk(mtx); 38 | ++count; 39 | } 40 | }; 41 | 42 | std::thread thread0(toRun); 43 | std::thread thread1(toRun); 44 | 45 | thread0.join(); 46 | thread1.join(); 47 | 48 | EXPECT_EQ(count, 2 * kPerThreadTotal); 49 | } 50 | 51 | TEST(RWLock, HighContentionReaderWriterTest) { 52 | int count = 0; 53 | dispenso::RWLock mtx; 54 | constexpr int kPerThreadTotal = 100000; 55 | 56 | auto toRunWriter = [&]() { 57 | for (int i = 0; i < kPerThreadTotal; ++i) { 58 | std::unique_lock lk(mtx); 59 | ++count; 60 | } 61 | }; 62 | 63 | int64_t someVal = 0; 64 | 65 | auto toRunReader = [&]() { 66 | for (int i = 0; i < kPerThreadTotal; ++i) { 67 | std::shared_lock lk(mtx); 68 | someVal += count; 69 | } 70 | }; 71 | 72 | std::thread thread0(toRunWriter); 73 | std::thread thread1(toRunReader); 74 | 75 | thread0.join(); 76 | thread1.join(); 77 | 78 | EXPECT_EQ(count, kPerThreadTotal); 79 | EXPECT_GE(someVal, 0); 80 | } 81 | 82 | TEST(RWLock, ReaderWriterTest) { 83 | int guardedCount = 0; 84 | dispenso::RWLock mtx; 85 | constexpr int kWriterTotal = 100; 86 | constexpr int kReaderTotal = 100000; 87 | 88 | auto toRunWriter = [&]() { 89 | for (int i = 0; i < kWriterTotal; ++i) { 90 | std::unique_lock lk(mtx); 91 | ++guardedCount; 92 | lk.unlock(); 93 | // Just hang out for a while til we write again. 94 | std::this_thread::sleep_for(1ms); 95 | } 96 | }; 97 | 98 | int64_t sum = 0; 99 | 100 | auto toRunReader = [&]() { 101 | for (int i = 0; i < kReaderTotal; ++i) { 102 | std::shared_lock lk(mtx); 103 | sum += guardedCount; 104 | } 105 | }; 106 | 107 | std::thread thread0(toRunWriter); 108 | std::thread thread1(toRunReader); 109 | 110 | thread0.join(); 111 | thread1.join(); 112 | 113 | EXPECT_EQ(guardedCount, kWriterTotal); 114 | EXPECT_GE(sum, 0); 115 | } 116 | 117 | TEST(RWLock, TestAlignment) { 118 | static_assert( 119 | alignof(dispenso::RWLock) >= dispenso::kCacheLineSize, 120 | "Somehow RWLock not aligned to avoid false sharing"); 121 | static_assert( 122 | alignof(dispenso::UnalignedRWLock) < dispenso::kCacheLineSize, 123 | "UnalignedRWLock is overaligned"); 124 | } 125 | -------------------------------------------------------------------------------- /dispenso/tsan_annotations.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file tsan_annotations.h 10 | * This file exposes a set of macros for ignoring tsan errors. These should generally not 11 | * be used just to shut up TSAN, because most of the time, TSAN reports real bugs. They should be 12 | * used only when there is a high level of certainty that TSAN is spitting out a false positive, as 13 | * can occasionally happen with lock-free algorithms. 14 | * 15 | * When these are required, it is best to keep the scope as small as possible to avoid blinding TSAN 16 | * to real bugs. Note that several libraries already expose macros like these, but we want to 17 | * keep dependencies to a bare minimum. 18 | **/ 19 | 20 | #pragma once 21 | 22 | #include 23 | 24 | #if defined(__SANITIZE_THREAD__) 25 | #define DISPENSO_HAS_TSAN 1 26 | #elif defined(__has_feature) 27 | #if __has_feature(thread_sanitizer) 28 | #define DISPENSO_HAS_TSAN 1 29 | #else 30 | #define DISPENSO_HAS_TSAN 0 31 | #endif // TSAN 32 | #else 33 | #define DISPENSO_HAS_TSAN 0 34 | #endif // feature 35 | 36 | #if DISPENSO_HAS_TSAN 37 | 38 | namespace dispenso { 39 | namespace detail { 40 | 41 | DISPENSO_DLL_ACCESS void annotateIgnoreWritesBegin(const char* f, int l); 42 | DISPENSO_DLL_ACCESS void annotateIgnoreWritesEnd(const char* f, int l); 43 | DISPENSO_DLL_ACCESS void annotateIgnoreReadsBegin(const char* f, int l); 44 | DISPENSO_DLL_ACCESS void annotateIgnoreReadsEnd(const char* f, int l); 45 | DISPENSO_DLL_ACCESS void 46 | annotateNewMemory(const char* f, int l, const volatile void* address, long size); 47 | DISPENSO_DLL_ACCESS void annotateHappensBefore(const char* f, int l, const volatile void* address); 48 | DISPENSO_DLL_ACCESS void annotateHappensAfter(const char* f, int l, const volatile void* address); 49 | 50 | } // namespace detail 51 | } // namespace dispenso 52 | 53 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN() \ 54 | ::dispenso::detail::annotateIgnoreWritesBegin(__FILE__, __LINE__) 55 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END() \ 56 | ::dispenso::detail::annotateIgnoreWritesEnd(__FILE__, __LINE__) 57 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_BEGIN() \ 58 | ::dispenso::detail::annotateIgnoreReadsBegin(__FILE__, __LINE__) 59 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_END() \ 60 | ::dispenso::detail::annotateIgnoreReadsEnd(__FILE__, __LINE__) 61 | #define DISPENSO_TSAN_ANNOTATE_NEW_MEMORY(address, size) \ 62 | ::dispenso::detail::annotateNewMemory(__FILE__, __LINE__, address, size) 63 | #define DISPENSO_TSAN_ANNOTATE_HAPPENS_BEFORE(address) \ 64 | ::dispenso::detail::annotateHappensBefore(__FILE__, __LINE__, address) 65 | #define DISPENSO_TSAN_ANNOTATE_HAPPENS_AFTER(address) \ 66 | ::dispenso::detail::annotateHappensAfter(__FILE__, __LINE__, address) 67 | 68 | #else 69 | 70 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN() 71 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END() 72 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_BEGIN() 73 | #define DISPENSO_TSAN_ANNOTATE_IGNORE_READS_END() 74 | #define DISPENSO_TSAN_ANNOTATE_NEW_MEMORY(address, size) 75 | #define DISPENSO_TSAN_ANNOTATE_HAPPENS_BEFORE(address) 76 | #define DISPENSO_TSAN_ANNOTATE_HAPPENS_AFTER(address) 77 | 78 | #endif // DISPENSO_HAS_TSAN 79 | -------------------------------------------------------------------------------- /dispenso/once_function.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file once_function.h 10 | * A file providing OnceFunction, a class providing void() signature for closure to be called only 11 | * once. It is built to be cheap to create and move. 12 | **/ 13 | 14 | #pragma once 15 | 16 | #include 17 | 18 | #include 19 | 20 | namespace dispenso { 21 | namespace detail { 22 | template 23 | class FutureBase; 24 | template 25 | class FutureImplBase; 26 | } // namespace detail 27 | 28 | /** 29 | * A class fullfilling the void() signature, and operator() must be called exactly once for valid 30 | * OnceFunctions. This class can be much more efficient than std::function for type 31 | * erasing functors without too much state (currently < ~250 bytes). 32 | * @note The wrapped type-erased functor in OnceFunction is *not* deleted upon destruction, but 33 | * rather when operator() is called. It is the user's responsibility to ensure that operator() is 34 | * called. 35 | * 36 | **/ 37 | class OnceFunction { 38 | public: 39 | /** 40 | * Construct a OnceFunction with invalid state. 41 | **/ 42 | OnceFunction() 43 | #if defined DISPENSO_DEBUG 44 | : onceCallable_(nullptr) 45 | #endif // DISPENSO_DEBUG 46 | { 47 | } 48 | 49 | /** 50 | * Construct a OnceFunction with a valid functor. 51 | * 52 | * @param f A functor with signature void(). Ideally this should be a concrete functor (e.g. from 53 | * lambda), though it will work with e.g. std::function. The downside in the latter case is extra 54 | * overhead for double type erasure. 55 | **/ 56 | template 57 | OnceFunction(F&& f) : onceCallable_(detail::createOnceCallable(std::forward(f))) {} 58 | 59 | OnceFunction(const OnceFunction& other) = delete; 60 | 61 | OnceFunction(OnceFunction&& other) : onceCallable_(other.onceCallable_) { 62 | #if defined DISPENSO_DEBUG 63 | other.onceCallable_ = nullptr; 64 | #endif // DISPENSO_DEBUG 65 | } 66 | 67 | OnceFunction& operator=(OnceFunction&& other) { 68 | onceCallable_ = other.onceCallable_; 69 | #if defined DISPENSO_DEBUG 70 | if (&other != this) { 71 | other.onceCallable_ = nullptr; 72 | } 73 | #endif // DISPENSO_DEBUG 74 | return *this; 75 | } 76 | 77 | /** 78 | * Invoke the type-erased functor. This function must be called exactly once. Fewer will result 79 | * in a leak, while more will invoke on an invalid object. 80 | **/ 81 | void operator()() const { 82 | #if defined DISPENSO_DEBUG 83 | assert(onceCallable_ != nullptr && "Must not use OnceFunction more than once!"); 84 | #endif // DISPENSO_DEBUG 85 | 86 | onceCallable_->run(); 87 | 88 | #if defined DISPENSO_DEBUG 89 | onceCallable_ = nullptr; 90 | #endif // DISPENSO_DEBUG 91 | } 92 | 93 | private: 94 | OnceFunction(detail::OnceCallable* func, bool) : onceCallable_(func) {} 95 | 96 | mutable detail::OnceCallable* onceCallable_; 97 | 98 | template 99 | friend class detail::FutureBase; 100 | template 101 | friend class detail::FutureImplBase; 102 | }; 103 | 104 | } // namespace dispenso 105 | -------------------------------------------------------------------------------- /tests/latch_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | 15 | using namespace std::chrono_literals; 16 | 17 | TEST(Latch, ArriveAndWait) { 18 | size_t publishData = 0; 19 | dispenso::Latch latch(3); 20 | 21 | std::deque threads; 22 | 23 | for (size_t i = 0; i < 2; ++i) { 24 | threads.emplace_back([&latch, &publishData]() { 25 | latch.arrive_and_wait(); 26 | EXPECT_EQ(publishData, 3); 27 | }); 28 | } 29 | 30 | // Give plenty of time for hijinx if there were any bug. 31 | std::this_thread::sleep_for(10ms); 32 | 33 | publishData = 3; 34 | 35 | // Wait cannot succeed until we also throw our hat in the ring, since we have 3 threads in the 36 | // group, but only two threads waiting to check for a new value of publishData. We do this 37 | // after setting the value of publishData, from only one thread (main thread). After 38 | // arrive_and_wait, wait succeeds, and waiting threads are woken, and they should see the correct 39 | // value of publishData. 40 | latch.arrive_and_wait(); 41 | 42 | for (auto& t : threads) { 43 | t.join(); 44 | } 45 | } 46 | 47 | TEST(Latch, CountDown) { 48 | size_t publishData = 0; 49 | dispenso::Latch latch(3); 50 | 51 | std::deque threads; 52 | 53 | for (size_t i = 0; i < 2; ++i) { 54 | threads.emplace_back([&latch, &publishData]() { 55 | latch.count_down(); 56 | 57 | if (latch.try_wait()) { 58 | EXPECT_EQ(publishData, 3); 59 | } else { 60 | latch.wait(); 61 | EXPECT_EQ(publishData, 3); 62 | } 63 | }); 64 | } 65 | 66 | publishData = 3; 67 | 68 | // Wait cannot succeed until we also throw our hat in the ring, since we have 3 threads in the 69 | // group, but only two threads waiting to check for a new value of publishData. We do this 70 | // after setting the value of publishData, from only one thread (main thread). After count_down, 71 | // wait succeeds, and waiting threads are woken, and they should see the correct value of 72 | // publishData. 73 | latch.count_down(); 74 | 75 | // Wait isn't required here. 76 | 77 | for (auto& t : threads) { 78 | t.join(); 79 | } 80 | } 81 | 82 | TEST(Latch, ArriveAndWaitWithCountDown) { 83 | size_t publishData = 0; 84 | dispenso::Latch latch(3); 85 | 86 | std::deque threads; 87 | 88 | for (size_t i = 0; i < 2; ++i) { 89 | threads.emplace_back([&latch, &publishData]() { 90 | latch.arrive_and_wait(); 91 | EXPECT_EQ(publishData, 3); 92 | }); 93 | } 94 | 95 | publishData = 3; 96 | 97 | // Wait cannot succeed until we also throw our hat in the ring, since we have 3 threads in the 98 | // group, but only two threads waiting to check for a new value of publishData. We do this 99 | // after setting the value of publishData, from only one thread (main thread). After count_down, 100 | // wait succeeds, and waiting threads are woken, and they should see the correct value of 101 | // publishData. 102 | latch.count_down(); 103 | 104 | // Wait isn't required here. 105 | 106 | for (auto& t : threads) { 107 | t.join(); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /dispenso/third-party/moodycamel/LICENSE.md: -------------------------------------------------------------------------------- 1 | This license file applies to everything in this repository except that which 2 | is explicitly annotated as being written by other authors, i.e. the Boost 3 | queue (included in the benchmarks for comparison), Intel's TBB library (ditto), 4 | dlib::pipe (ditto), 5 | the CDSChecker tool (used for verification), the Relacy model checker (ditto), 6 | and Jeff Preshing's semaphore implementation (used in the blocking queue) which 7 | has a zlib license (embedded in lightweightsempahore.h). 8 | 9 | --- 10 | 11 | Simplified BSD License: 12 | 13 | Copyright (c) 2013-2016, Cameron Desrochers. 14 | All rights reserved. 15 | 16 | Redistribution and use in source and binary forms, with or without modification, 17 | are permitted provided that the following conditions are met: 18 | 19 | - Redistributions of source code must retain the above copyright notice, this list of 20 | conditions and the following disclaimer. 21 | - Redistributions in binary form must reproduce the above copyright notice, this list of 22 | conditions and the following disclaimer in the documentation and/or other materials 23 | provided with the distribution. 24 | 25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 26 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 27 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 28 | THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 29 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT 30 | OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 32 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 33 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 | 35 | --- 36 | 37 | I have also chosen to dual-license under the Boost Software License as an alternative to 38 | the Simplified BSD license above: 39 | 40 | Boost Software License - Version 1.0 - August 17th, 2003 41 | 42 | Permission is hereby granted, free of charge, to any person or organization 43 | obtaining a copy of the software and accompanying documentation covered by 44 | this license (the "Software") to use, reproduce, display, distribute, 45 | execute, and transmit the Software, and to prepare derivative works of the 46 | Software, and to permit third-parties to whom the Software is furnished to 47 | do so, all subject to the following: 48 | 49 | The copyright notices in the Software and this entire statement, including 50 | the above license grant, this restriction and the following disclaimer, 51 | must be included in all copies of the Software, in whole or in part, and 52 | all derivative works of the Software, unless such copies or derivative 53 | works are solely in the form of machine-executable object code generated by 54 | a source language processor. 55 | 56 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 57 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 58 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 59 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 60 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 61 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 62 | DEALINGS IN THE SOFTWARE. 63 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /benchmarks/rw_lock_benchmark.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | // This benchmark relies on shared_lock from C++17 9 | #if __cplusplus >= 201703L 10 | 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | 17 | #include 18 | 19 | #include "thread_benchmark_common.h" 20 | 21 | constexpr size_t kNumValues = 1 << 20; 22 | 23 | // Precondition: Start < writePeriod. Note that this is enforced in BM_serial and BM_parallel 24 | template 25 | int64_t iterate(MtxType& mtx, std::vector& values, int start, int writePeriod) { 26 | int64_t total = 0; 27 | int w = start; 28 | for (auto& p : values) { 29 | if (w++ == writePeriod) { 30 | std::lock_guard lk(mtx); 31 | ++p; 32 | w = 0; 33 | } else { 34 | std::shared_lock lk(mtx); 35 | total += p; 36 | } 37 | } 38 | return total; 39 | } 40 | 41 | struct NopMutex { 42 | void lock() {} 43 | void unlock() {} 44 | void lock_shared() {} 45 | void unlock_shared() {} 46 | }; 47 | 48 | template 49 | void BM_serial(benchmark::State& state) { 50 | int writePeriod = state.range(0); 51 | std::vector values(kNumValues); 52 | int64_t total = 0; 53 | MutexT mtx; 54 | int start = 0; 55 | for (auto UNUSED_VAR : state) { 56 | total += iterate(mtx, values, start++, writePeriod); 57 | if (start == writePeriod) { 58 | start = 0; 59 | } 60 | } 61 | 62 | benchmark::DoNotOptimize(total); 63 | } 64 | 65 | static void CustomArgumentsSerial(benchmark::internal::Benchmark* b) { 66 | for (int j : {2, 8, 32, 128, 512}) { 67 | b->Args({j}); 68 | } 69 | } 70 | 71 | template 72 | void BM_parallel(benchmark::State& state) { 73 | int concurrency = state.range(0); 74 | int writePeriod = state.range(1); 75 | std::vector values(kNumValues); 76 | std::atomic total(0); 77 | MutexT mtx; 78 | int start = 0; 79 | 80 | dispenso::TaskSet tasks(dispenso::globalThreadPool()); 81 | for (auto UNUSED_VAR : state) { 82 | for (int c = 0; c < concurrency; ++c) { 83 | tasks.schedule([&total, start, &mtx, &values, writePeriod]() { 84 | total.fetch_add(iterate(mtx, values, start, writePeriod), std::memory_order_acq_rel); 85 | }); 86 | if (++start == writePeriod) { 87 | start = 0; 88 | } 89 | } 90 | tasks.wait(); 91 | } 92 | 93 | benchmark::DoNotOptimize(total.load(std::memory_order_acquire)); 94 | } 95 | 96 | static void CustomArgumentsParallel(benchmark::internal::Benchmark* b) { 97 | for (int j : {2, 8, 32, 128, 512}) { 98 | for (int s : {1, 2, 4, 8, 16, 32}) { 99 | if (s > static_cast(std::thread::hardware_concurrency())) { 100 | break; 101 | } 102 | b->Args({s, j}); 103 | } 104 | } 105 | } 106 | 107 | BENCHMARK_TEMPLATE(BM_serial, NopMutex)->Apply(CustomArgumentsSerial)->UseRealTime(); 108 | 109 | BENCHMARK_TEMPLATE(BM_serial, std::shared_mutex)->Apply(CustomArgumentsSerial)->UseRealTime(); 110 | 111 | BENCHMARK_TEMPLATE(BM_serial, dispenso::RWLock)->Apply(CustomArgumentsSerial)->UseRealTime(); 112 | 113 | BENCHMARK_TEMPLATE(BM_parallel, std::shared_mutex)->Apply(CustomArgumentsParallel)->UseRealTime(); 114 | 115 | BENCHMARK_TEMPLATE(BM_parallel, dispenso::RWLock)->Apply(CustomArgumentsParallel)->UseRealTime(); 116 | 117 | #endif // C++17 118 | -------------------------------------------------------------------------------- /tests/concurrent_object_arena_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | TEST(ConcurrentObjectArena, ParallelGrowBy) { 14 | constexpr size_t numTasks = 20; 15 | constexpr size_t numLoops = 100; 16 | constexpr size_t delta = 7; 17 | constexpr size_t bufSize = 16; 18 | 19 | dispenso::ConcurrentObjectArena arena(bufSize); 20 | 21 | dispenso::TaskSet taskSet(dispenso::globalThreadPool()); 22 | 23 | for (size_t ti = 0; ti < numTasks; ++ti) { 24 | taskSet.schedule([=, &arena]() { 25 | for (size_t i = 0; i < numLoops; i++) { 26 | const size_t p = arena.grow_by(delta); 27 | for (size_t j = 0; j < delta; j++) { 28 | arena[p + j] = ti * numLoops * delta + i; 29 | } 30 | } 31 | }); 32 | } 33 | taskSet.wait(); 34 | 35 | EXPECT_EQ(delta * numLoops * numTasks, arena.size()); 36 | EXPECT_EQ(arena.capacity() / arena.numBuffers(), arena.getBufferSize(0)); 37 | 38 | size_t totalSize = 0; 39 | for (size_t i = 0; i < arena.numBuffers(); ++i) { 40 | totalSize += arena.getBufferSize(i); 41 | } 42 | EXPECT_EQ(totalSize, arena.size()); 43 | 44 | for (size_t i = 0; i < numLoops * numTasks; i++) { 45 | const size_t firstElement = arena[i * delta]; 46 | for (size_t j = 1; j < delta; j++) { 47 | EXPECT_EQ(arena[i * delta + j], firstElement); 48 | } 49 | } 50 | } 51 | 52 | TEST(ConcurrentObjectArena, ObjectsConstuction) { 53 | constexpr size_t defaultValue = 17; 54 | constexpr size_t bufSize = 16; 55 | constexpr size_t smallGrow = bufSize / 3; 56 | constexpr size_t bigGrow = bufSize * 3; 57 | 58 | struct TestData { 59 | TestData() : value(defaultValue) {} 60 | size_t value; 61 | }; 62 | 63 | dispenso::ConcurrentObjectArena* arena = 64 | new dispenso::ConcurrentObjectArena(bufSize); 65 | 66 | arena->grow_by(smallGrow); 67 | arena->grow_by(bigGrow); 68 | 69 | const size_t num = arena->size(); 70 | for (size_t i = 0; i < num; ++i) { 71 | EXPECT_EQ((*arena)[i].value, defaultValue); 72 | } 73 | 74 | dispenso::ConcurrentObjectArena copyArena(*arena); 75 | 76 | dispenso::ConcurrentObjectArena copyAssignmentArena(bufSize / 2); 77 | copyAssignmentArena = *arena; 78 | 79 | EXPECT_EQ(copyArena.size(), arena->size()); 80 | EXPECT_EQ(copyAssignmentArena.size(), arena->size()); 81 | 82 | const size_t numBuffers = arena->numBuffers(); 83 | std::vector bufferPtrs(numBuffers); 84 | for (size_t i = 0; i < numBuffers; ++i) { 85 | bufferPtrs[i] = arena->getBuffer(i); 86 | } 87 | 88 | dispenso::ConcurrentObjectArena moveArena(std::move(*arena)); 89 | 90 | EXPECT_EQ(arena->size(), 0); 91 | EXPECT_EQ(arena->numBuffers(), 0); 92 | EXPECT_EQ(arena->capacity(), 0); 93 | 94 | delete arena; 95 | 96 | EXPECT_EQ(copyArena.numBuffers(), numBuffers); 97 | EXPECT_EQ(copyAssignmentArena.numBuffers(), numBuffers); 98 | 99 | for (size_t i = 0; i < num; ++i) { 100 | EXPECT_EQ(copyArena[i].value, defaultValue); 101 | EXPECT_EQ(copyAssignmentArena[i].value, defaultValue); 102 | } 103 | 104 | for (size_t i = 0; i < numBuffers; ++i) { 105 | EXPECT_NE(copyArena.getBuffer(i), bufferPtrs[i]); 106 | EXPECT_NE(copyAssignmentArena.getBuffer(i), bufferPtrs[i]); 107 | EXPECT_EQ(moveArena.getBuffer(i), bufferPtrs[i]); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /dispenso/timed_task.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | namespace dispenso { 14 | 15 | TimedTaskScheduler::TimedTaskScheduler(ThreadPriority prio) : priority_(prio) { 16 | thread_ = std::thread([this, prio]() { 17 | detail::registerFineSchedulerQuanta(); 18 | if (!setCurrentThreadPriority(prio)) { 19 | std::cerr << "Couldn't set thread priority" << std::endl; 20 | } 21 | timeQueueRunLoop(); 22 | }); 23 | } 24 | TimedTaskScheduler::~TimedTaskScheduler() { 25 | { 26 | std::lock_guard lk(queueMutex_); 27 | running_ = false; 28 | } 29 | epoch_.bumpAndWake(); 30 | thread_.join(); 31 | } 32 | 33 | void TimedTaskScheduler::kickOffTask(std::shared_ptr next, double curTime) { 34 | size_t remaining = next->timesToRun.fetch_sub(1, std::memory_order_acq_rel); 35 | if (remaining == 1) { 36 | auto* np = next.get(); 37 | np->func(std::move(next)); 38 | } else if (remaining > 1) { 39 | next->func(next); 40 | 41 | if (next->steady) { 42 | next->nextAbsTime += next->period; 43 | } else { 44 | next->nextAbsTime = curTime + next->period; 45 | } 46 | std::lock_guard lk(queueMutex_); 47 | tasks_.push(std::move(next)); 48 | } 49 | } 50 | 51 | constexpr double kSmallTimeBuffer = 10e-6; 52 | 53 | void TimedTaskScheduler::timeQueueRunLoop() { 54 | #if defined(_WIN32) 55 | constexpr double kSpinYieldBuffer = 1e-3; 56 | constexpr double kSpinBuffer = 100e-6; 57 | #else 58 | constexpr double kSpinYieldBuffer = 500e-6; 59 | constexpr double kSpinBuffer = 50e-6; 60 | #endif // platform 61 | constexpr double kConvertToUs = 1e6; 62 | 63 | uint32_t curEpoch = epoch_.current(); 64 | 65 | while (true) { 66 | { 67 | std::unique_lock lk(queueMutex_); 68 | if (priority_ != getCurrentThreadPriority()) { 69 | setCurrentThreadPriority(priority_); 70 | } 71 | 72 | if (!running_) { 73 | break; 74 | } 75 | if (tasks_.empty()) { 76 | lk.unlock(); 77 | curEpoch = epoch_.wait(curEpoch); 78 | continue; 79 | } 80 | } 81 | double curTime = getTime(); 82 | double timeRemaining; 83 | std::unique_lock lk(queueMutex_); 84 | timeRemaining = tasks_.top()->nextAbsTime - curTime; 85 | if (timeRemaining < kSmallTimeBuffer) { 86 | auto next = tasks_.top(); 87 | tasks_.pop(); 88 | lk.unlock(); 89 | 90 | kickOffTask(std::move(next), curTime); 91 | } else if (timeRemaining < kSpinBuffer) { 92 | continue; 93 | } else if (timeRemaining < kSpinYieldBuffer) { 94 | lk.unlock(); 95 | std::this_thread::yield(); 96 | continue; 97 | } else { 98 | lk.unlock(); 99 | curEpoch = epoch_.waitFor( 100 | curEpoch, static_cast((timeRemaining - kSpinBuffer) * kConvertToUs)); 101 | } 102 | } 103 | } 104 | 105 | void TimedTaskScheduler::addTimedTask(std::shared_ptr task) { 106 | double curTime = getTime(); 107 | double timeRemaining; 108 | timeRemaining = task->nextAbsTime - curTime; 109 | if (timeRemaining < kSmallTimeBuffer) { 110 | kickOffTask(std::move(task), curTime); 111 | } else { 112 | std::lock_guard lk(queueMutex_); 113 | tasks_.push(std::move(task)); 114 | } 115 | epoch_.bumpAndWake(); 116 | } 117 | 118 | TimedTaskScheduler& globalTimedTaskScheduler() { 119 | static TimedTaskScheduler scheduler; 120 | return scheduler; 121 | } 122 | 123 | } // namespace dispenso 124 | -------------------------------------------------------------------------------- /tests/completion_event_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include 14 | 15 | TEST(CompletionEvent, NotifyBeforeWait) { 16 | dispenso::CompletionEvent event; 17 | 18 | event.notify(); 19 | // Should immediately return; 20 | event.wait(); 21 | } 22 | 23 | TEST(CompletionEvent, NotifyBeforeWaitFor) { 24 | dispenso::CompletionEvent event; 25 | 26 | event.notify(); 27 | // Should immediately return; 28 | EXPECT_TRUE(event.waitFor(std::chrono::microseconds(1))); 29 | } 30 | 31 | // In an ideal world, we could expect the following test to loop 10 times or so. In reality, we 32 | // can't make such guarantees when it comes to sleep() and wait() functions. For instance, on Linux 33 | // with 64 mostly-idle cores, 100 out of 100 runs of this test resulted in looping between 8 and 12 34 | // times, even under TSAN. On Mac with 4 less-idle cores, the test would pass about 90 out of 100. 35 | // Inflating the interval to 7 to 13 passed 98 out of 100. In the end, we cannot really count on 36 | // any concrete number of times through the loop (think TSAN, think loaded machine, etc...), and so 37 | // we simply let this test fall back to "will this time out?". 38 | TEST(CompletionEvent, WaitForSomeTime) { 39 | dispenso::CompletionEvent event; 40 | 41 | std::thread t([&event]() { 42 | std::this_thread::sleep_for(std::chrono::milliseconds(20)); 43 | event.notify(); 44 | }); 45 | 46 | while (true) { 47 | if (event.waitFor(std::chrono::milliseconds(2))) { 48 | break; 49 | } 50 | } 51 | 52 | t.join(); 53 | } 54 | 55 | TEST(CompletionEvent, WaitForSomeTimeWithReset) { 56 | dispenso::CompletionEvent event; 57 | std::atomic barrier(0); 58 | 59 | std::thread t([&event, &barrier]() { 60 | std::this_thread::sleep_for(std::chrono::milliseconds(20)); 61 | event.notify(); 62 | 63 | while (!barrier.load(std::memory_order_acquire)) { 64 | } 65 | std::this_thread::sleep_for(std::chrono::milliseconds(20)); 66 | event.notify(); 67 | }); 68 | 69 | while (!(event.waitFor(std::chrono::milliseconds(2)))) { 70 | } 71 | 72 | EXPECT_TRUE(event.waitFor(std::chrono::microseconds(1))) << "This should immediately return true"; 73 | 74 | // No threads waiting, nor notifying, so we can reset. 75 | event.reset(); 76 | 77 | // Trigger the barrier so that the event can be notified. 78 | barrier.store(1, std::memory_order_release); 79 | 80 | while (true) { 81 | if (event.waitFor(std::chrono::milliseconds(2))) { 82 | break; 83 | } 84 | } 85 | 86 | t.join(); 87 | } 88 | 89 | TEST(CompletionEvent, EffectiveBarrier) { 90 | dispenso::CompletionEvent event; 91 | 92 | std::deque threads; 93 | 94 | std::atomic count(0); 95 | 96 | constexpr int kThreads = 4; 97 | 98 | for (size_t i = 0; i < kThreads; ++i) { 99 | threads.emplace_back([&event, &count]() { 100 | count.fetch_sub(1, std::memory_order_relaxed); 101 | event.wait(); 102 | count.fetch_add(2, std::memory_order_relaxed); 103 | }); 104 | } 105 | 106 | while (count.load(std::memory_order_acquire) > -kThreads) { 107 | } 108 | 109 | // Take a long rest in this thread. This gives us a chance to ensure that the event cannot 110 | // spurious wake, and begin modifying "count". 111 | std::this_thread::sleep_for(std::chrono::milliseconds(20)); 112 | 113 | EXPECT_EQ(-kThreads, count.load(std::memory_order_acquire)); 114 | 115 | event.notify(); 116 | 117 | for (auto& t : threads) { 118 | t.join(); 119 | } 120 | 121 | EXPECT_EQ(kThreads, count.load(std::memory_order_acquire)); 122 | } 123 | -------------------------------------------------------------------------------- /dispenso/pool_allocator.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | namespace dispenso { 11 | 12 | template 13 | PoolAllocatorT::PoolAllocatorT( 14 | size_t chunkSize, 15 | size_t allocSize, 16 | std::function allocFunc, 17 | std::function deallocFunc) 18 | : chunkSize_(chunkSize), 19 | allocSize_(allocSize), 20 | chunksPerAlloc_(allocSize / chunkSize), 21 | allocFunc_(std::move(allocFunc)), 22 | deallocFunc_(std::move(deallocFunc)) { 23 | // Start off with at least enough space to store at least one set of chunks. 24 | chunks_.reserve(chunksPerAlloc_); 25 | } 26 | 27 | template 28 | char* PoolAllocatorT::alloc() { 29 | while (true) { 30 | uint32_t allocId = 0; 31 | if (kThreadSafe) { 32 | allocId = backingAllocLock_.fetch_or(1, std::memory_order_acquire); 33 | } 34 | 35 | if (allocId == 0) { 36 | if (chunks_.empty()) { 37 | char* buffer; 38 | if (backingAllocs2_.empty()) { 39 | buffer = reinterpret_cast(allocFunc_(allocSize_)); 40 | } else { 41 | buffer = backingAllocs2_.back(); 42 | backingAllocs2_.pop_back(); 43 | } 44 | backingAllocs_.push_back(buffer); 45 | // Push n-1 values into the chunks_ buffer, and then return the nth. 46 | for (size_t i = 0; i < chunksPerAlloc_ - 1; ++i) { 47 | chunks_.push_back(buffer); 48 | buffer += chunkSize_; 49 | } 50 | if (kThreadSafe) { 51 | backingAllocLock_.store(0, std::memory_order_release); 52 | } 53 | return buffer; 54 | } 55 | char* back = chunks_.back(); 56 | chunks_.pop_back(); 57 | if (kThreadSafe) { 58 | backingAllocLock_.store(0, std::memory_order_release); 59 | } 60 | return back; 61 | } else { 62 | std::this_thread::yield(); 63 | } 64 | } 65 | } 66 | 67 | template 68 | void PoolAllocatorT::dealloc(char* ptr) { 69 | // For now do not release any memory back to the deallocFunc until destruction. 70 | // TODO(bbudge): Consider cases where we haven't gotten below some threshold of ready chunks 71 | // in a while. In that case, we could begin tracking allocations, and try to assemble entire 72 | // starting allocations, possibly deferring a small amount to each alloc call. This would be 73 | // slower, but would ensure we don't get into a situation where we need a bunch of memory up 74 | // front, and then never again. 75 | 76 | while (true) { 77 | uint32_t allocId = 0; 78 | if (kThreadSafe) { 79 | allocId = backingAllocLock_.fetch_or(1, std::memory_order_acquire); 80 | } 81 | if (allocId == 0) { 82 | chunks_.push_back(ptr); 83 | if (kThreadSafe) { 84 | backingAllocLock_.store(0, std::memory_order_release); 85 | } 86 | break; 87 | } 88 | } 89 | } 90 | 91 | template 92 | void PoolAllocatorT::clear() { 93 | chunks_.clear(); 94 | if (backingAllocs2_.size() < backingAllocs_.size()) { 95 | std::swap(backingAllocs2_, backingAllocs_); 96 | } 97 | for (char* ba : backingAllocs_) { 98 | backingAllocs2_.push_back(ba); 99 | } 100 | backingAllocs_.clear(); 101 | } 102 | 103 | template 104 | PoolAllocatorT::~PoolAllocatorT() { 105 | for (char* backing : backingAllocs_) { 106 | deallocFunc_(backing); 107 | } 108 | for (char* backing : backingAllocs2_) { 109 | deallocFunc_(backing); 110 | } 111 | } 112 | 113 | template class PoolAllocatorT; 114 | template class PoolAllocatorT; 115 | 116 | } // namespace dispenso 117 | -------------------------------------------------------------------------------- /dispenso/small_buffer_allocator.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | namespace dispenso { 14 | namespace detail { 15 | 16 | template 17 | SmallBufferGlobals& getSmallBufferGlobals() { 18 | // controlled leak here 19 | static SmallBufferGlobals* globals = new SmallBufferGlobals(); 20 | return *globals; 21 | } 22 | 23 | char* allocSmallBufferImpl(size_t ordinal) { 24 | switch (ordinal) { 25 | case 0: 26 | return detail::SmallBufferAllocator<4>::alloc(); 27 | case 1: 28 | return detail::SmallBufferAllocator<8>::alloc(); 29 | case 2: 30 | return detail::SmallBufferAllocator<16>::alloc(); 31 | case 3: 32 | return detail::SmallBufferAllocator<32>::alloc(); 33 | case 4: 34 | return detail::SmallBufferAllocator<64>::alloc(); 35 | case 5: 36 | return detail::SmallBufferAllocator<128>::alloc(); 37 | case 6: 38 | return detail::SmallBufferAllocator<256>::alloc(); 39 | default: 40 | assert(false && "Invalid small buffer ordinal requested"); 41 | return nullptr; 42 | } 43 | } 44 | 45 | void deallocSmallBufferImpl(size_t ordinal, void* buf) { 46 | switch (ordinal) { 47 | case 0: 48 | detail::SmallBufferAllocator<4>::dealloc(reinterpret_cast(buf)); 49 | break; 50 | case 1: 51 | detail::SmallBufferAllocator<8>::dealloc(reinterpret_cast(buf)); 52 | break; 53 | case 2: 54 | detail::SmallBufferAllocator<16>::dealloc(reinterpret_cast(buf)); 55 | break; 56 | case 3: 57 | detail::SmallBufferAllocator<32>::dealloc(reinterpret_cast(buf)); 58 | break; 59 | case 4: 60 | detail::SmallBufferAllocator<64>::dealloc(reinterpret_cast(buf)); 61 | break; 62 | case 5: 63 | detail::SmallBufferAllocator<128>::dealloc(reinterpret_cast(buf)); 64 | break; 65 | case 6: 66 | detail::SmallBufferAllocator<256>::dealloc(reinterpret_cast(buf)); 67 | break; 68 | default: 69 | assert(false && "Invalid small buffer ordinal requested"); 70 | } 71 | } 72 | 73 | size_t approxBytesAllocatedSmallBufferImpl(size_t ordinal) { 74 | switch (ordinal) { 75 | case 0: 76 | return detail::SmallBufferAllocator<4>::bytesAllocated(); 77 | case 1: 78 | return detail::SmallBufferAllocator<8>::bytesAllocated(); 79 | case 2: 80 | return detail::SmallBufferAllocator<16>::bytesAllocated(); 81 | case 3: 82 | return detail::SmallBufferAllocator<32>::bytesAllocated(); 83 | case 4: 84 | return detail::SmallBufferAllocator<64>::bytesAllocated(); 85 | case 5: 86 | return detail::SmallBufferAllocator<128>::bytesAllocated(); 87 | case 6: 88 | return detail::SmallBufferAllocator<256>::bytesAllocated(); 89 | default: 90 | assert(false && "Invalid small buffer ordinal requested"); 91 | return 0; 92 | } 93 | } 94 | 95 | template 96 | SmallBufferAllocator::PerThreadQueuingData::~PerThreadQueuingData() { 97 | enqueue_bulk(buffers_, count_); 98 | 99 | DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); 100 | ptoken().~ProducerToken(); 101 | ctoken().~ConsumerToken(); 102 | DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END(); 103 | } 104 | 105 | template class SmallBufferAllocator<4>; 106 | template class SmallBufferAllocator<8>; 107 | template class SmallBufferAllocator<16>; 108 | template class SmallBufferAllocator<32>; 109 | template class SmallBufferAllocator<64>; 110 | template class SmallBufferAllocator<128>; 111 | template class SmallBufferAllocator<256>; 112 | 113 | } // namespace detail 114 | } // namespace dispenso 115 | -------------------------------------------------------------------------------- /dispenso/rw_lock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | namespace dispenso { 11 | 12 | /** 13 | * A reader/writer lock interface compatible with std::shared_mutex (for use with std::unique_lock 14 | * and std::shared_lock). The interface is designed to be very fast in the face of high levels of 15 | * contention for high read traffic and low write traffic. 16 | * 17 | * @note RWLock is not as fully-featured as std::shared_mutex: It does not go to the OS to wait. 18 | * This behavior is good for guarding very fast operations, but less good for guarding very slow 19 | * operations. Additionally, RWLock is not compatible with std::condition_variable, though 20 | * std::condition_variable_any may work (untested). It could be possible to extend RWLock with it's 21 | * own ConditionVariable, make waiting operations sleep in the OS, and also to add timed functions; 22 | * however those may slow things down in the fast case. If some/all of that functionality is 23 | * needed, use std::shared_mutex, or develop a new type. 24 | **/ 25 | class alignas(kCacheLineSize) RWLock : public detail::RWLockImpl { 26 | public: 27 | /** 28 | * Locks for write access 29 | * 30 | * @note It is undefined behavior to recursively lock 31 | **/ 32 | using detail::RWLockImpl::lock; 33 | 34 | /** 35 | * Tries to lock for write access, returns if unable to lock 36 | * 37 | * @return true if lock was acquired, false otherwise 38 | **/ 39 | using detail::RWLockImpl::try_lock; 40 | 41 | /** 42 | * Unlocks write access 43 | * 44 | * @note Must already be locked by the current thread of execution, otherwise, the behavior is 45 | * undefined. 46 | **/ 47 | using detail::RWLockImpl::unlock; 48 | 49 | /** 50 | * Locks for read access 51 | * 52 | * @note It is undefined behavior to recursively lock 53 | **/ 54 | using detail::RWLockImpl::lock_shared; 55 | 56 | /** 57 | * Tries to lock for read access, returns if unable to lock 58 | * 59 | * @return true if lock was acquired, false otherwise 60 | * 61 | * @note It is undefined behavior to recursively lock 62 | **/ 63 | using detail::RWLockImpl::try_lock_shared; 64 | 65 | /** 66 | * Unlocks read access 67 | * 68 | * @note Must already be locked by the current thread of execution, otherwise, the behavior is 69 | * undefined. 70 | **/ 71 | using detail::RWLockImpl::unlock_shared; 72 | 73 | /** 74 | * Upgrade from a reader lock to a writer lock. lock_upgrade is a power-user interface. There is 75 | * a very good reason why it is not exposed as upgrade_mutex in the standard. To use it safely, 76 | * you *MUST* ensure only one thread can try to lock for write concurrently. If that cannot be 77 | * guaranteed, you should unlock for read, and lock for write instead of using lock_upgrade to 78 | * avoid potential deadlock. 79 | * 80 | * @note Calling this if the writer lock is already held, or if no reader lock is already held is 81 | * undefined behavior. 82 | **/ 83 | using detail::RWLockImpl::lock_upgrade; 84 | 85 | /** 86 | * Downgrade the lock from a writer lock to a reader lock. 87 | * 88 | * @note Calling this if the writer lock is not held results in undefined behavior 89 | **/ 90 | using detail::RWLockImpl::lock_downgrade; 91 | }; 92 | 93 | /** 94 | * An unaligned version of the RWLock. This could be useful if you e.g. want to create an array of 95 | * these to guard a large number of slots, and the likelihood of multiple threads touching any 96 | * region concurrently is low. All other behavior remains the same, so refer to the documentation 97 | * for RWLock. 98 | **/ 99 | class UnalignedRWLock : public detail::RWLockImpl {}; 100 | 101 | } // namespace dispenso 102 | -------------------------------------------------------------------------------- /dispenso/async_request.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file async_request.h 10 | * A file providing AsyncRequest. This is a bit like a lightweight channel for storing updates to 11 | * one object, mostly intended to be used as a single producer, single consumer update mechanism. 12 | **/ 13 | 14 | #pragma once 15 | 16 | #if __cplusplus >= 201703L 17 | #include 18 | #else 19 | #include 20 | #endif // C++17 21 | 22 | #include 23 | 24 | namespace dispenso { 25 | 26 | /** 27 | * A type for making async requests. Although it is safe to use from multiple producers and 28 | * consumers, it is primarily intended to be used from single producer, single consumer. 29 | * 30 | * Typically the consumer will request an update of the value from thread 0, and the producer will 31 | * look whether an update was requested from thread 1. Once the producer determines an update was 32 | * requested (updateRequested() returns true), it calls tryEmplaceUpdate() to update the underlying 33 | * data. Then when the consumer on thread 0 next calls getUpdate(), an optional wrapper to the 34 | * updated data is returned, and the AsyncRequest object is reset (it no longer has valid data, and 35 | * no update will have yet been requested for the next update). 36 | **/ 37 | template 38 | class AsyncRequest { 39 | public: 40 | // A lightweight std::optional-like type with a subset of functionality. 41 | #if __cplusplus >= 201703L 42 | using OpResult = std::optional; 43 | #else 44 | using OpResult = detail::OpResult; 45 | #endif // C++17 46 | 47 | /** 48 | * The consumer can call this to request an update to the underlying data. If request has already 49 | * been made or fulfilled, this is a no-op. 50 | **/ 51 | void requestUpdate() { 52 | RequestState state = kNone; 53 | state_.compare_exchange_strong(state, kNeedsUpdate, std::memory_order_acq_rel); 54 | } 55 | 56 | /** 57 | * The producer can check this to determine if an update is needed. 58 | * 59 | * @return true if an update is required, false otherwise. 60 | **/ 61 | bool updateRequested() const { 62 | return state_.load(std::memory_order_acquire) == kNeedsUpdate; 63 | } 64 | 65 | /** 66 | * The producer can try to emplace a new T object in response to a request. 67 | * @param args The arguments to emplace. 68 | * @return true if the underlying data was updated. false if the underlying data is not in need 69 | * of an update. 70 | * @note For cases where calling this superflously could be expensive, it is wise to check 71 | * updateRequested() first. 72 | **/ 73 | template 74 | bool tryEmplaceUpdate(Args&&... args) { 75 | RequestState state = kNeedsUpdate; 76 | if (!state_.compare_exchange_strong(state, kUpdating, std::memory_order_acq_rel)) { 77 | return false; 78 | } 79 | obj_.emplace(std::forward(args)...); 80 | state_.store(kReady, std::memory_order_release); 81 | return true; 82 | } 83 | 84 | /** 85 | * The consumer can attempt to get an update. 86 | * @return An optional wrapper to the underlying data. If no update is ready, nullopt is 87 | * returned. Once an update has been returned, the AsyncRequest object is returned to a state with 88 | * no underlying data. 89 | **/ 90 | OpResult getUpdate() { 91 | if (state_.load(std::memory_order_acquire) == kReady) { 92 | auto obj = std::move(obj_); 93 | state_.store(kNone, std::memory_order_release); 94 | return obj; 95 | } 96 | return {}; 97 | } 98 | 99 | private: 100 | enum RequestState { kNone, kNeedsUpdate, kUpdating, kReady }; 101 | alignas(kCacheLineSize) std::atomic state_ = {kNone}; 102 | OpResult obj_; 103 | }; 104 | 105 | } // namespace dispenso 106 | -------------------------------------------------------------------------------- /dispenso/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | cmake_minimum_required(VERSION 3.12) 7 | 8 | file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cpp) 9 | file(GLOB_RECURSE HEADERS CONFIGURE_DEPENDS *.h) 10 | message("SOURCES: ${SOURCES}") 11 | 12 | if(DISPENSO_SHARED_LIB) 13 | add_compile_definitions(DISPENSO_SHARED_LIB DISPENSO_LIB_EXPORT) 14 | add_library(dispenso SHARED ${SOURCES} ${HEADERS}) 15 | 16 | target_compile_options(dispenso PRIVATE 17 | $<$>:-fvisibility=hidden> 18 | ) 19 | else() 20 | add_library(dispenso STATIC ${SOURCES} ${HEADERS}) 21 | endif() 22 | 23 | target_compile_options(dispenso PRIVATE 24 | $<$:/W3 /WX> 25 | $<$: -Wno-stringop-overflow> 26 | $<$>:-Wall -Wextra -pedantic -Wconversion -Wno-sign-conversion -Werror> 27 | ) 28 | 29 | if(WIN32) 30 | target_compile_definitions(dispenso PUBLIC NOMINMAX) 31 | endif() 32 | 33 | target_include_directories(dispenso 34 | PUBLIC 35 | $ 36 | $ 37 | $ 38 | $ 39 | $ 40 | ) 41 | 42 | set(CMAKE_THREAD_PREFER_PTHREAD TRUE) 43 | set(THREADS_PREFER_PTHREAD_FLAG TRUE) 44 | find_package(Threads REQUIRED) 45 | target_link_libraries(dispenso PUBLIC Threads::Threads) 46 | 47 | check_cxx_source_compiles(" 48 | #include 49 | #include 50 | std::atomic a(0); 51 | std::atomic b(0); 52 | std::atomic c(0); 53 | std::atomic d(0); 54 | int main() { 55 | ++a; 56 | ++b; 57 | ++c; 58 | return ++d; 59 | } 60 | " DISPENSO_HAS_ATOMIC_WITHOUT_LIB) 61 | 62 | if (NOT DISPENSO_HAS_ATOMIC_WITHOUT_LIB) 63 | target_link_libraries(dispenso PUBLIC atomic) 64 | endif() 65 | 66 | if(WIN32) 67 | target_link_libraries(dispenso PUBLIC Synchronization Winmm) 68 | endif() 69 | 70 | if (NOT DISPENSO_STANDALONE) 71 | return() 72 | endif() 73 | 74 | ## Install library ## 75 | 76 | set_target_properties(dispenso 77 | PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}) 78 | 79 | install(TARGETS dispenso 80 | EXPORT ${PROJECT_NAME}_Exports 81 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 82 | NAMELINK_SKIP 83 | # on Windows put the dlls into bin 84 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 85 | # ... and the import lib into the devel package 86 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 87 | ) 88 | 89 | install(EXPORT ${PROJECT_NAME}_Exports 90 | DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}-${PROJECT_VERSION} 91 | NAMESPACE Dispenso:: 92 | ) 93 | 94 | install(TARGETS dispenso 95 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 96 | NAMELINK_ONLY 97 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 98 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 99 | ) 100 | 101 | ## Install headers ## 102 | 103 | install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR} 104 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} 105 | FILES_MATCHING 106 | PATTERN *.h 107 | ) 108 | 109 | ## Generate and install CMake target exports ## 110 | 111 | include(CMakePackageConfigHelpers) 112 | 113 | configure_package_config_file( 114 | "${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in" 115 | "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" 116 | INSTALL_DESTINATION 117 | ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}-${PROJECT_VERSION} 118 | ) 119 | 120 | write_basic_package_version_file( 121 | "${PROJECT_NAME}ConfigVersion.cmake" 122 | VERSION ${PROJECT_VERSION} 123 | COMPATIBILITY SameMajorVersion 124 | ) 125 | 126 | install(FILES 127 | ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake 128 | ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake 129 | DESTINATION 130 | ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}-${PROJECT_VERSION} 131 | ) 132 | -------------------------------------------------------------------------------- /tests/pool_allocator_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | 14 | TEST(PoolAllocator, SimpleMallocFree) { 15 | dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free); 16 | 17 | char* buf = allocator.alloc(); 18 | 19 | *buf = 'a'; 20 | 21 | allocator.dealloc(buf); 22 | } 23 | 24 | TEST(PoolAllocator, TrackAllocations) { 25 | std::map allocMap; 26 | 27 | auto allocFunc = [&allocMap](size_t len) -> void* { 28 | char* ret = reinterpret_cast(::malloc(len)); 29 | allocMap.emplace(ret, len); 30 | return ret; 31 | }; 32 | 33 | auto deallocFunc = [&allocMap](void* ptr) { 34 | EXPECT_EQ(1, allocMap.erase(reinterpret_cast(ptr))); 35 | ::free(ptr); 36 | }; 37 | 38 | // Check to make sure that the ptr returned by the allocator below is in one of the buffers 39 | // generated by allocFunc. We do this by examining the closest buffer (via lower_bound) in the 40 | // map, and then verify that that buffer contains ptr. 41 | auto checkInValidRange = [&allocMap](char* ptr) { 42 | auto it = allocMap.upper_bound(ptr); 43 | --it; 44 | EXPECT_GE(ptr, it->first); 45 | EXPECT_LT(ptr, it->first + it->second); 46 | return ptr; 47 | }; 48 | 49 | { 50 | dispenso::PoolAllocator allocator(64, 256, allocFunc, deallocFunc); 51 | 52 | char* bufs[5]; 53 | 54 | bufs[0] = checkInValidRange(allocator.alloc()); 55 | 56 | EXPECT_EQ(1, allocMap.size()); 57 | 58 | bufs[1] = checkInValidRange(allocator.alloc()); 59 | 60 | EXPECT_EQ(1, allocMap.size()); 61 | 62 | allocator.dealloc(bufs[0]); 63 | 64 | EXPECT_EQ(1, allocMap.size()); 65 | 66 | bufs[0] = checkInValidRange(allocator.alloc()); 67 | 68 | EXPECT_EQ(1, allocMap.size()); 69 | 70 | bufs[2] = checkInValidRange(allocator.alloc()); 71 | 72 | EXPECT_EQ(1, allocMap.size()); 73 | 74 | bufs[3] = checkInValidRange(allocator.alloc()); 75 | 76 | EXPECT_EQ(1, allocMap.size()); 77 | 78 | bufs[4] = checkInValidRange(allocator.alloc()); 79 | 80 | EXPECT_EQ(2, allocMap.size()); 81 | 82 | allocator.dealloc(bufs[4]); 83 | EXPECT_LE(2, allocMap.size()); 84 | } 85 | 86 | EXPECT_EQ(allocMap.size(), 0); 87 | } 88 | 89 | TEST(PoolAllocator, SimpleThreaded) { 90 | constexpr size_t kNumThreads = 8; 91 | 92 | dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free); 93 | 94 | std::deque threads; 95 | 96 | for (size_t i = 0; i < kNumThreads; ++i) { 97 | threads.emplace_back([&allocator, tid = i]() { 98 | constexpr size_t kNumBufs = 8; 99 | char* bufs[kNumBufs]; 100 | 101 | for (size_t i = 0; i < 1000; ++i) { 102 | for (size_t j = 0; j < kNumBufs; ++j) { 103 | bufs[j] = allocator.alloc(); 104 | *bufs[j] = static_cast(tid); 105 | } 106 | for (size_t j = 0; j < kNumBufs; ++j) { 107 | EXPECT_EQ(*bufs[j], tid); 108 | allocator.dealloc(bufs[j]); 109 | } 110 | } 111 | }); 112 | } 113 | 114 | for (auto& t : threads) { 115 | t.join(); 116 | } 117 | } 118 | 119 | TEST(PoolAllocator, Arena) { 120 | dispenso::PoolAllocator allocator(64, 256, ::malloc, ::free); 121 | 122 | std::vector vec(2000); 123 | for (char*& c : vec) { 124 | c = allocator.alloc(); 125 | std::fill_n(c, 64, 0x7f); 126 | } 127 | 128 | for (char* c : vec) { 129 | EXPECT_TRUE(std::all_of(c, c + 64, [](char v) { return v == 0x7f; })); 130 | } 131 | 132 | allocator.clear(); 133 | vec.resize(128); 134 | for (char*& c : vec) { 135 | c = allocator.alloc(); 136 | std::fill_n(c, 64, 0x22); 137 | } 138 | 139 | for (char* c : vec) { 140 | EXPECT_TRUE(std::all_of(c, c + 64, [](char v) { return v == 0x22; })); 141 | } 142 | 143 | allocator.clear(); 144 | vec.resize(48); 145 | for (char*& c : vec) { 146 | c = allocator.alloc(); 147 | std::fill_n(c, 64, 0x11); 148 | } 149 | 150 | for (char* c : vec) { 151 | EXPECT_TRUE(std::all_of(c, c + 64, [](char v) { return v == 0x11; })); 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /tests/priority_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | // NOTE: This isn't suitable for an automated unit test for multiple reasons. With OS 20 | // scheduling we have some amount of nondeterminism. Additionally, many (most?) machines will not 21 | // have permissions for kHigh and kRealtime priorities depending on OS and policies. 22 | // 23 | // On Linux, with permissions appropriate, this test passes about 14/15 times. On the single 24 | // failure, I see an average sleep error like this: 25 | // Expected: (info[2].error()) >= (info[3].error()), actual: 5.25784e-05 vs 5.41972e-05 26 | // or about 50ish microseconds average error for both kHigh and kRealtime priorities. 27 | 28 | using namespace std::chrono_literals; 29 | 30 | struct ThreadInfo { 31 | uint64_t count = 0; 32 | double sleepErrorSum = 0.0; 33 | bool prioOk = false; 34 | 35 | double error() const { 36 | return sleepErrorSum / static_cast(count); 37 | } 38 | }; 39 | 40 | void run( 41 | size_t index, 42 | ThreadInfo& info, 43 | dispenso::CompletionEvent& notifier, 44 | dispenso::Latch& started) { 45 | switch (index) { 46 | case 0: 47 | info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kLow); 48 | break; 49 | case 1: 50 | info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kNormal); 51 | break; 52 | case 2: 53 | info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kHigh); 54 | break; 55 | case 3: 56 | info.prioOk = dispenso::setCurrentThreadPriority(dispenso::ThreadPriority::kRealtime); 57 | break; 58 | default: 59 | info.prioOk = true; 60 | break; 61 | } 62 | 63 | // Ensure all threads reach this point before we begin, so that we don't let the first threads 64 | // make progress before the system is bogged down. 65 | started.arrive_and_wait(); 66 | 67 | // Keep other threads busy. If cores are idle, the result will be a crapshoot. 68 | if (index > 3) { 69 | while (!notifier.completed()) { 70 | ++info.count; 71 | #if defined(DISPENSO_HAS_TSAN) 72 | // In TSAN atomics are implemented via reader/writer locks, and I believe these are not 73 | // guaranteeing progress. We need to take some time out from the tight loop calling 74 | // notifier.completed() in order to allow the atomic write to succeed. 75 | std::this_thread::yield(); 76 | #endif // TSAN 77 | } 78 | return; 79 | } 80 | 81 | while (true) { 82 | double start = dispenso::getTime(); 83 | if (!notifier.waitFor(1ms)) { 84 | double end = dispenso::getTime(); 85 | ++info.count; 86 | info.sleepErrorSum += std::abs((end - start) - 1e-3); 87 | } else { 88 | break; 89 | } 90 | } 91 | } 92 | 93 | TEST(Priorty, PriorityGetsCycles) { 94 | dispenso::ParForOptions options; 95 | options.wait = false; 96 | 97 | int overloadConcurrency = 2 * std::thread::hardware_concurrency(); 98 | 99 | if (sizeof(void*) == 4) { 100 | overloadConcurrency = std::min(overloadConcurrency, 62); 101 | } 102 | 103 | dispenso::ThreadPool pool(std::max(10, overloadConcurrency)); 104 | 105 | std::vector info(pool.numThreads()); 106 | 107 | dispenso::CompletionEvent stop; 108 | dispenso::Latch started(static_cast(pool.numThreads())); 109 | 110 | dispenso::TaskSet tasks(pool); 111 | dispenso::parallel_for( 112 | tasks, 113 | 0, 114 | pool.numThreads(), 115 | [&info, &stop, &started](size_t index) { run(index, info[index], stop, started); }, 116 | options); 117 | 118 | // Let threads wake about 5000 times. 119 | std::this_thread::sleep_for(5s); 120 | 121 | stop.notify(); 122 | 123 | tasks.wait(); 124 | 125 | for (auto& i : info) { 126 | EXPECT_TRUE(i.prioOk) << "Failed for " << &i - info.data(); 127 | } 128 | 129 | #if !defined(DISPENSO_HAS_TSAN) 130 | // TSAN messes with scheduling enough that all bets are off. 131 | EXPECT_GE(info[0].error(), info[1].error()); 132 | EXPECT_GE(info[1].error(), info[2].error()); 133 | EXPECT_GE(info[2].error(), info[3].error()); 134 | #endif // TSAN 135 | } 136 | -------------------------------------------------------------------------------- /benchmarks/once_function_benchmark.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | #include "benchmark_common.h" 15 | 16 | constexpr size_t kSmallSize = 24; 17 | constexpr size_t kMediumSize = 120; 18 | constexpr size_t kLargeSize = 248; 19 | // 1000 is larger than our largest optimized chunk size, so we may expect to see performance falloff 20 | // here. 21 | constexpr size_t kExtraLargeSize = 1000; 22 | 23 | template 24 | void runMoveLoop(benchmark::State& state, Func f) { 25 | for (auto UNUSED_VAR : state) { 26 | ExeType t(f); 27 | ExeType o; 28 | for (int i = 0; i < 10; ++i) { 29 | o = std::move(t); 30 | t = std::move(o); 31 | } 32 | t(); 33 | } 34 | } 35 | 36 | template 37 | class FuncConsumer { 38 | public: 39 | void add(Func&& f) { 40 | funcs_.emplace_back(std::move(f)); 41 | } 42 | 43 | void consumeAll() { 44 | while (!funcs_.empty()) { 45 | Func f = std::move(funcs_.front()); 46 | funcs_.pop_front(); 47 | f(); 48 | } 49 | } 50 | 51 | private: 52 | std::deque funcs_; 53 | }; 54 | 55 | template 56 | struct Foo { 57 | Foo() { 58 | buf[0] = 0; 59 | benchmark::ClobberMemory(); 60 | } 61 | 62 | Foo(Foo&& f) { 63 | std::memcpy(buf, f.buf, kSize); 64 | } 65 | 66 | Foo(const Foo& f) { 67 | std::memcpy(buf, f.buf, kSize); 68 | } 69 | 70 | void operator()() { 71 | benchmark::DoNotOptimize(++buf[0]); 72 | } 73 | 74 | uint32_t buf[kSize / 4]; 75 | }; 76 | 77 | template 78 | void onceCall(F&& f) { 79 | F lf = std::move(f); 80 | lf(); 81 | } 82 | 83 | template 84 | void BM_move_std_function(benchmark::State& state) { 85 | runMoveLoop>(state, Foo()); 86 | } 87 | 88 | template 89 | void BM_move_once_function(benchmark::State& state) { 90 | runMoveLoop(state, Foo()); 91 | } 92 | 93 | constexpr int kMediumLoopLen = 200; 94 | 95 | template 96 | void BM_queue_inline_function(benchmark::State& state) { 97 | FuncConsumer> consumer; 98 | for (auto UNUSED_VAR : state) { 99 | for (int i = 0; i < kMediumLoopLen; ++i) { 100 | consumer.add(Foo()); 101 | } 102 | consumer.consumeAll(); 103 | } 104 | } 105 | 106 | template 107 | void BM_queue_std_function(benchmark::State& state) { 108 | FuncConsumer> consumer; 109 | for (auto UNUSED_VAR : state) { 110 | for (int i = 0; i < kMediumLoopLen; ++i) { 111 | consumer.add(Foo()); 112 | } 113 | consumer.consumeAll(); 114 | } 115 | } 116 | 117 | template 118 | void BM_queue_once_function(benchmark::State& state) { 119 | FuncConsumer consumer; 120 | for (auto UNUSED_VAR : state) { 121 | for (int i = 0; i < kMediumLoopLen; ++i) { 122 | consumer.add(Foo()); 123 | } 124 | consumer.consumeAll(); 125 | } 126 | } 127 | 128 | BENCHMARK_TEMPLATE(BM_move_std_function, kSmallSize); 129 | BENCHMARK_TEMPLATE(BM_move_once_function, kSmallSize); 130 | 131 | BENCHMARK_TEMPLATE(BM_move_std_function, kMediumSize); 132 | BENCHMARK_TEMPLATE(BM_move_once_function, kMediumSize); 133 | 134 | BENCHMARK_TEMPLATE(BM_move_std_function, kLargeSize); 135 | BENCHMARK_TEMPLATE(BM_move_once_function, kLargeSize); 136 | 137 | BENCHMARK_TEMPLATE(BM_move_std_function, kExtraLargeSize); 138 | BENCHMARK_TEMPLATE(BM_move_once_function, kExtraLargeSize); 139 | 140 | BENCHMARK_TEMPLATE(BM_queue_inline_function, kSmallSize); 141 | BENCHMARK_TEMPLATE(BM_queue_std_function, kSmallSize); 142 | BENCHMARK_TEMPLATE(BM_queue_once_function, kSmallSize); 143 | 144 | BENCHMARK_TEMPLATE(BM_queue_inline_function, kMediumSize); 145 | BENCHMARK_TEMPLATE(BM_queue_std_function, kMediumSize); 146 | BENCHMARK_TEMPLATE(BM_queue_once_function, kMediumSize); 147 | 148 | BENCHMARK_TEMPLATE(BM_queue_inline_function, kLargeSize); 149 | BENCHMARK_TEMPLATE(BM_queue_std_function, kLargeSize); 150 | BENCHMARK_TEMPLATE(BM_queue_once_function, kLargeSize); 151 | 152 | BENCHMARK_TEMPLATE(BM_queue_inline_function, kExtraLargeSize); 153 | BENCHMARK_TEMPLATE(BM_queue_std_function, kExtraLargeSize); 154 | BENCHMARK_TEMPLATE(BM_queue_once_function, kExtraLargeSize); 155 | 156 | BENCHMARK_MAIN(); 157 | -------------------------------------------------------------------------------- /dispenso/resource_pool.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file resource_pool.h 10 | * A file providing ResourcePool. This is syntactic sugar over what is essentially a set of 11 | * semaphore guarded resources. 12 | **/ 13 | 14 | #pragma once 15 | 16 | #include 17 | #include 18 | #include 19 | 20 | namespace dispenso { 21 | 22 | template 23 | class ResourcePool; 24 | 25 | /** 26 | * A RIAA wrapper for a user's type that can manage accessibility and ensures the resource will go 27 | * back into the ResourcePool upon destruction. 28 | **/ 29 | template 30 | class Resource { 31 | public: 32 | Resource(Resource&& other) : resource_(other.resource_), pool_(other.pool_) { 33 | other.resource_ = nullptr; 34 | } 35 | 36 | Resource& operator=(Resource&& other) { 37 | if (&other != this) { 38 | recycle(); 39 | resource_ = other.resource_; 40 | pool_ = other.pool_; 41 | other.resource_ = nullptr; 42 | } 43 | return *this; 44 | } 45 | 46 | /** 47 | * Access the underlying resource object. 48 | * 49 | * @return a reference to the resource. 50 | **/ 51 | T& get() { 52 | return *resource_; 53 | } 54 | 55 | ~Resource() { 56 | recycle(); 57 | } 58 | 59 | private: 60 | Resource(T* res, ResourcePool* pool) : resource_(res), pool_(pool) {} 61 | 62 | void recycle(); 63 | 64 | T* resource_; 65 | ResourcePool* pool_; 66 | 67 | friend class ResourcePool; 68 | }; 69 | 70 | /** 71 | * A pool of resources that can be accessed from multiple threads. This is akin to a set of 72 | * resources and a semaphore ensuring enough resources exist. 73 | **/ 74 | template 75 | class ResourcePool { 76 | public: 77 | /** 78 | * Construct a ResourcePool. 79 | * 80 | * @param size The number of T objects in the pool. 81 | * @param init A functor with signature T() which can be called to initialize the pool's 82 | * resources. 83 | **/ 84 | template 85 | ResourcePool(size_t size, const F& init) 86 | : pool_(size), 87 | backingResources_( 88 | reinterpret_cast( 89 | detail::alignedMalloc(size * detail::alignToCacheLine(sizeof(T))))), 90 | size_(size) { 91 | char* buf = backingResources_; 92 | 93 | // There are three reasons we create our own buffer and use placement new: 94 | // 1. We want to be able to handle non-movable non-copyable objects 95 | // * Note that we could do this with std::deque 96 | // 2. We want to minimize memory allocations, since that can be a common point of contention in 97 | // multithreaded programs. 98 | // 3. We can easily ensure that the objects are cache aligned to help avoid false sharing. 99 | 100 | for (size_t i = 0; i < size; ++i) { 101 | pool_.enqueue(new (buf) T(init())); 102 | buf += detail::alignToCacheLine(sizeof(T)); 103 | } 104 | } 105 | 106 | /** 107 | * Acquire a resource from the pool. This function may block until a resource becomes available. 108 | * 109 | * @return a Resource-wrapped resource. 110 | **/ 111 | Resource acquire() { 112 | T* t; 113 | DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); 114 | pool_.wait_dequeue(t); 115 | DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END(); 116 | return Resource(t, this); 117 | } 118 | 119 | /** 120 | * Destruct the ResourcePool. The user must ensure that all resources are returned to the pool 121 | * prior to destroying the pool. 122 | **/ 123 | ~ResourcePool() { 124 | assert(pool_.size_approx() == size_); 125 | for (size_t i = 0; i < size_; ++i) { 126 | T* t; 127 | DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); 128 | pool_.wait_dequeue(t); 129 | DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END(); 130 | t->~T(); 131 | } 132 | detail::alignedFree(backingResources_); 133 | } 134 | 135 | private: 136 | void recycle(T* t) { 137 | DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_BEGIN(); 138 | pool_.enqueue(t); 139 | DISPENSO_TSAN_ANNOTATE_IGNORE_WRITES_END(); 140 | } 141 | 142 | moodycamel::BlockingConcurrentQueue pool_; 143 | char* backingResources_; 144 | size_t size_; 145 | 146 | friend class Resource; 147 | }; 148 | 149 | template 150 | void Resource::recycle() { 151 | if (resource_) { 152 | pool_->recycle(resource_); 153 | } 154 | } 155 | 156 | } // namespace dispenso 157 | -------------------------------------------------------------------------------- /dispenso/detail/rw_lock_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | namespace dispenso { 11 | namespace detail { 12 | class RWLockImpl { 13 | public: 14 | /** 15 | * Locks for write access 16 | * 17 | * @note It is undefined behavior to recursively lock 18 | **/ 19 | void lock(); 20 | 21 | /** 22 | * Tries to lock for write access, returns if unable to lock 23 | * 24 | * @return true if lock was acquired, false otherwise 25 | **/ 26 | bool try_lock(); 27 | 28 | /** 29 | * Unlocks write access 30 | * 31 | * @note Must already be locked by the current thread of execution, otherwise, the behavior is 32 | * undefined. 33 | **/ 34 | void unlock(); 35 | 36 | /** 37 | * Locks for read access 38 | * 39 | * @note It is undefined behavior to recursively lock 40 | **/ 41 | void lock_shared(); 42 | 43 | /** 44 | * Tries to lock for read access, returns if unable to lock 45 | * 46 | * @return true if lock was acquired, false otherwise 47 | * 48 | * @note It is undefined behavior to recursively lock 49 | **/ 50 | bool try_lock_shared(); 51 | 52 | /** 53 | * Unlocks read access 54 | * 55 | * @note Must already be locked by the current thread of execution, otherwise, the behavior is 56 | * undefined. 57 | **/ 58 | void unlock_shared(); 59 | 60 | /** 61 | * Upgrade from a reader lock to a writer lock. lock_upgrade is a power-user interface. There is 62 | * a very good reason why it is not exposed as upgrade_mutex in the standard. To use it safely, 63 | * you *MUST* ensure only one thread can try to lock for write concurrently. If that cannot be 64 | * guaranteed, you should unlock for read, and lock for write instead of using lock_upgrade to 65 | * avoid potential deadlock. 66 | * 67 | * @note Calling this if the writer lock is already held, or if no reader lock is already held is 68 | * undefined behavior. 69 | **/ 70 | void lock_upgrade(); 71 | 72 | /** 73 | * Downgrade the lock from a writer lock to a reader lock. 74 | * 75 | * @note Calling this if the writer lock is not held results in undefined behavior 76 | **/ 77 | void lock_downgrade(); 78 | 79 | private: 80 | static constexpr uint32_t kWriteBit = 0x80000000; 81 | static constexpr uint32_t kReaderBits = 0x7fffffff; 82 | std::atomic lock_{0}; 83 | }; 84 | 85 | inline void RWLockImpl::lock() { 86 | uint32_t val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel); 87 | while (val & kWriteBit) { 88 | val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel); 89 | } 90 | // We've claimed single write ownership now. We need to drain off readers 91 | while (val != kWriteBit) { 92 | val = lock_.load(std::memory_order_acquire); 93 | } 94 | } 95 | 96 | inline bool RWLockImpl::try_lock() { 97 | uint32_t val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel); 98 | return !(val & kWriteBit); 99 | } 100 | 101 | inline void RWLockImpl::unlock() { 102 | lock_.fetch_and(kReaderBits, std::memory_order_acq_rel); 103 | } 104 | 105 | inline void RWLockImpl::lock_shared() { 106 | uint32_t val = lock_.fetch_add(1, std::memory_order_acq_rel); 107 | while (val & kWriteBit) { 108 | val = lock_.fetch_sub(1, std::memory_order_acq_rel); 109 | while (val & kWriteBit) { 110 | val = lock_.load(std::memory_order_acquire); 111 | } 112 | 113 | val = lock_.fetch_add(1, std::memory_order_acq_rel); 114 | } 115 | } 116 | 117 | inline bool RWLockImpl::try_lock_shared() { 118 | uint32_t val = lock_.fetch_add(1, std::memory_order_acq_rel); 119 | if (val & kWriteBit) { 120 | lock_.fetch_sub(1, std::memory_order_acq_rel); 121 | return false; 122 | } 123 | return true; 124 | } 125 | 126 | inline void RWLockImpl::unlock_shared() { 127 | lock_.fetch_sub(1, std::memory_order_acq_rel); 128 | } 129 | 130 | inline void RWLockImpl::lock_upgrade() { 131 | uint32_t val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel); 132 | while (val & kWriteBit) { 133 | val = lock_.fetch_or(kWriteBit, std::memory_order_acq_rel); 134 | } 135 | // We've claimed single write ownership now. We need to drain off readers, including ourself 136 | lock_.fetch_sub(1, std::memory_order_acq_rel); 137 | while (val != kWriteBit) { 138 | val = lock_.load(std::memory_order_acquire); 139 | } 140 | } 141 | 142 | inline void RWLockImpl::lock_downgrade() { 143 | // Get reader ownership first 144 | lock_.fetch_add(1, std::memory_order_acq_rel); 145 | unlock(); 146 | } 147 | } // namespace detail 148 | } // namespace dispenso 149 | -------------------------------------------------------------------------------- /dispenso/small_buffer_allocator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file small_buffer_allocator.h 10 | * A file providing SmallBufferAllocator. This allocator can allocate and deallocate chunks of a 11 | * set size in a way that is efficient and scales quite well across many threads. 12 | **/ 13 | 14 | #pragma once 15 | 16 | #include 17 | #include 18 | 19 | namespace dispenso { 20 | 21 | /** 22 | * Set a standard for the maximum chunk size for use within dispenso. The reason for this limit is 23 | * that there are diminishing returns after a certain size, and each new pool has it's own memory 24 | * overhead. 25 | **/ 26 | constexpr size_t kMaxSmallBufferSize = 256; 27 | 28 | namespace detail { 29 | 30 | DISPENSO_DLL_ACCESS char* allocSmallBufferImpl(size_t ordinal); 31 | DISPENSO_DLL_ACCESS void deallocSmallBufferImpl(size_t ordinal, void* buf); 32 | 33 | DISPENSO_DLL_ACCESS size_t approxBytesAllocatedSmallBufferImpl(size_t ordinal); 34 | 35 | // This has the effect of selecting actual block sizes starting with 4 bytes. Smaller requests 36 | // (e.g. 1 byte, 2 bytes) will still utilize 4-byte blocks. Choice of 4 bytes as the smallest 37 | // mainly aligns to sizeof(ptr) on 32-bit platforms, where we'd expect most common use cases to be 38 | // no smaller than one pointer. Retaining 4-byte buckets on 64-bit platforms doesn't cost much 39 | // (tiny startup/teardown cost, and trivial amount of memory) when not using 4-byte or smaller 40 | // allocations, and makes the code simpler. 41 | constexpr size_t getOrdinal(size_t blockSize) { 42 | return std::max(0, log2const(blockSize) - 2); 43 | } 44 | 45 | template 46 | inline std::enable_if_t<(kBlockSize <= kMaxSmallBufferSize), char*> allocSmallOrLarge() { 47 | #if defined(DISPENSO_NO_SMALL_BUFFER_ALLOCATOR) 48 | return reinterpret_cast(alignedMalloc(kBlockSize, kBlockSize)); 49 | #else 50 | return allocSmallBufferImpl(getOrdinal(kBlockSize)); 51 | #endif // DISPENSO_NO_SMALL_BUFFER_ALLOCATOR 52 | } 53 | 54 | template 55 | inline std::enable_if_t<(kBlockSize > kMaxSmallBufferSize), char*> allocSmallOrLarge() { 56 | return reinterpret_cast(alignedMalloc(kBlockSize, kBlockSize)); 57 | } 58 | 59 | template 60 | inline std::enable_if_t<(kBlockSize <= kMaxSmallBufferSize), void> deallocSmallOrLarge(void* buf) { 61 | #if defined(DISPENSO_NO_SMALL_BUFFER_ALLOCATOR) 62 | alignedFree(buf); 63 | #else 64 | deallocSmallBufferImpl(getOrdinal(kBlockSize), buf); 65 | #endif // DISPENSO_NO_SMALL_BUFFER_ALLOCATOR 66 | } 67 | 68 | template 69 | inline std::enable_if_t<(kBlockSize > kMaxSmallBufferSize), void> deallocSmallOrLarge(void* buf) { 70 | alignedFree(buf); 71 | } 72 | 73 | } // namespace detail 74 | 75 | /** 76 | * Allocate a small buffer from a small buffer pool. 77 | * 78 | * @tparam kBlockSize The size of the block to allocate. Must be a power of two, and must be less 79 | * than or equal to kMaxSmallBufferSize. 80 | * @return The pointer to the allocated block of memory. 81 | * @note: The returned buffer must be returned to the pool via deallocSmallBuffer templatized on the 82 | * same block size. If kBlockSize > kMaxSmallBufferSize, this function falls back on alignedMalloc. 83 | * If DISPENSO_NO_SMALL_BUFFER_ALLOCATOR is defined, we will always fall back on 84 | * alignedMalloc/alignedFree. 85 | **/ 86 | template 87 | inline char* allocSmallBuffer() { 88 | return detail::allocSmallOrLarge(); 89 | } 90 | /** 91 | * Free a small buffer from a small buffer pool. 92 | * 93 | * @tparam kBlockSize The size of the block to allocate. Must be a power of two, and must be less 94 | * than or equal to kMaxSmallBufferSize. 95 | * @param buf the pointer to block of memory to return to the pool. Must have been allocated with 96 | * allocSmallBuffer templatized on the same block size. 97 | * @note: If kBlockSize > kMaxSmallBufferSize, this function falls back on alignedFree. 98 | **/ 99 | template 100 | inline void deallocSmallBuffer(void* buf) { 101 | detail::deallocSmallOrLarge(buf); 102 | } 103 | 104 | /** 105 | * Get the approximate bytes allocated for a single small buffer pool (associated with 106 | *kBlockSize). This function is not highly performant and locks, and should only be used for 107 | *diagnostics (e.g. tests). 108 | * 109 | * @tparam kBlockSize The block size for the pool to query. 110 | **/ 111 | template 112 | size_t approxBytesAllocatedSmallBuffer() { 113 | return detail::approxBytesAllocatedSmallBufferImpl(detail::getOrdinal(kBlockSize)); 114 | } 115 | 116 | } // namespace dispenso 117 | -------------------------------------------------------------------------------- /tests/once_function_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | 14 | constexpr size_t kExtraSmall = 8; 15 | constexpr size_t kSmall = 24; 16 | constexpr size_t kMedium = 120; 17 | constexpr size_t kLarge = 248; 18 | constexpr size_t kExtraLarge = 10000; 19 | 20 | using dispenso::OnceFunction; 21 | 22 | TEST(OnceFunction, Empty) { 23 | OnceFunction f([]() {}); 24 | f(); 25 | } 26 | 27 | TEST(OnceFunction, MoveConstructor) { 28 | OnceFunction f([]() {}); 29 | OnceFunction g(std::move(f)); 30 | g(); 31 | } 32 | 33 | TEST(OnceFunction, MoveOperator) { 34 | OnceFunction f([]() {}); 35 | OnceFunction g; 36 | g = std::move(f); 37 | g(); 38 | } 39 | 40 | template 41 | void testSize() { 42 | constexpr size_t kNumElts = kSize - sizeof(int*); 43 | struct Foo { 44 | void operator()() { 45 | int s = 0; 46 | for (uint8_t b : buf) { 47 | s += b; 48 | } 49 | *sum = s; 50 | } 51 | uint8_t buf[kNumElts]; 52 | int* sum; 53 | } foo; 54 | for (size_t i = 0; i < kNumElts; ++i) { 55 | foo.buf[i] = static_cast(i); 56 | } 57 | int answer; 58 | foo.sum = &answer; 59 | OnceFunction f(foo); 60 | OnceFunction g(foo); 61 | g(); 62 | f(); 63 | int expected = 0; 64 | for (size_t i = 0; i < kNumElts; ++i) { 65 | expected += static_cast(i & 255); 66 | } 67 | EXPECT_EQ(answer, expected); 68 | } 69 | 70 | template <> 71 | void testSize<8>() { 72 | struct Foo { 73 | void operator()() { 74 | int s = 0; 75 | *sum = s; 76 | } 77 | int* sum; 78 | } foo; 79 | int answer; 80 | foo.sum = &answer; 81 | OnceFunction f(foo); 82 | OnceFunction g(foo); 83 | g(); 84 | f(); 85 | int expected = 0; 86 | EXPECT_EQ(answer, expected); 87 | } 88 | 89 | TEST(OnceFunction, ExtraSmall) { 90 | testSize(); 91 | } 92 | 93 | TEST(OnceFunction, Small) { 94 | testSize(); 95 | } 96 | 97 | TEST(OnceFunction, Medium) { 98 | testSize(); 99 | } 100 | 101 | TEST(OnceFunction, Large) { 102 | testSize(); 103 | } 104 | 105 | TEST(OnceFunction, ExtraLarge) { 106 | testSize(); 107 | } 108 | 109 | TEST(OnceFunction, MoveWithResult) { 110 | int result = 5; 111 | OnceFunction f([&result]() { result = 17; }); 112 | EXPECT_EQ(result, 5); 113 | OnceFunction g(std::move(f)); 114 | EXPECT_EQ(result, 5); 115 | g(); 116 | EXPECT_EQ(result, 17); 117 | } 118 | 119 | template 120 | void ensureDestructor() { 121 | int value = 0; 122 | struct FooWithDestructor { 123 | void operator()() { 124 | ++*value; 125 | } 126 | ~FooWithDestructor() { 127 | ++*value; 128 | } 129 | uint8_t buf[kNumElts]; 130 | int* value; 131 | } foo; 132 | 133 | foo.value = &value; 134 | 135 | OnceFunction f(foo); 136 | f(); 137 | EXPECT_EQ(value, 2); 138 | } 139 | 140 | TEST(OnceFunction, EnsureDestructionExtraSmall) { 141 | ensureDestructor(); 142 | } 143 | 144 | TEST(OnceFunction, EnsureDestructionSmall) { 145 | ensureDestructor(); 146 | } 147 | 148 | TEST(OnceFunction, EnsureDestructionMedium) { 149 | ensureDestructor(); 150 | } 151 | 152 | TEST(OnceFunction, EnsureDestructionLarge) { 153 | ensureDestructor(); 154 | } 155 | 156 | TEST(OnceFunction, EnsureDestructionExtraLarge) { 157 | ensureDestructor(); 158 | } 159 | 160 | template 161 | struct EnsureAlign { 162 | void operator()() { 163 | uintptr_t bloc = reinterpret_cast(&b); 164 | EXPECT_EQ(0, bloc & (alignment - 1)) << "broken for alignment: " << alignment; 165 | } 166 | 167 | alignas(alignment) char b = 0; 168 | }; 169 | 170 | TEST(OnceFunction, EnsureAlignment1) { 171 | EnsureAlign<1> e; 172 | OnceFunction f(e); 173 | f(); 174 | } 175 | 176 | TEST(OnceFunction, EnsureAlignment2) { 177 | EnsureAlign<2> e; 178 | OnceFunction f(e); 179 | f(); 180 | } 181 | TEST(OnceFunction, EnsureAlignment4) { 182 | EnsureAlign<4> e; 183 | OnceFunction f(e); 184 | f(); 185 | } 186 | TEST(OnceFunction, EnsureAlignment8) { 187 | EnsureAlign<8> e; 188 | OnceFunction f(e); 189 | f(); 190 | } 191 | TEST(OnceFunction, EnsureAlignment16) { 192 | EnsureAlign<16> e; 193 | OnceFunction f(e); 194 | f(); 195 | } 196 | TEST(OnceFunction, EnsureAlignment32) { 197 | EnsureAlign<32> e; 198 | OnceFunction f(e); 199 | f(); 200 | } 201 | TEST(OnceFunction, EnsureAlignment64) { 202 | EnsureAlign<64> e; 203 | OnceFunction f(e); 204 | f(); 205 | } 206 | TEST(OnceFunction, EnsureAlignment128) { 207 | EnsureAlign<128> e; 208 | OnceFunction f(e); 209 | f(); 210 | } 211 | TEST(OnceFunction, EnsureAlignment256) { 212 | EnsureAlign<256> e; 213 | OnceFunction f(e); 214 | f(); 215 | } 216 | -------------------------------------------------------------------------------- /dispenso/detail/task_set_impl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file task_set.h 10 | * A file providing TaskSet and ConcurrentTaskSet. These interfaces allow the user to 11 | * submit/schedule multiple closures and then wait on them. 12 | **/ 13 | 14 | #pragma once 15 | 16 | #include 17 | 18 | namespace dispenso { 19 | 20 | class TaskSetBase; 21 | 22 | namespace detail { 23 | template 24 | class FutureBase; 25 | 26 | class LimitGatedScheduler; 27 | 28 | DISPENSO_DLL_ACCESS void pushThreadTaskSet(TaskSetBase* tasks); 29 | DISPENSO_DLL_ACCESS void popThreadTaskSet(); 30 | 31 | } // namespace detail 32 | 33 | DISPENSO_DLL_ACCESS TaskSetBase* parentTaskSet(); 34 | 35 | class TaskSetBase { 36 | public: 37 | TaskSetBase( 38 | ThreadPool& p, 39 | ParentCascadeCancel registerForParentCancel = ParentCascadeCancel::kOff, 40 | ssize_t stealingLoadMultiplier = 4) 41 | : pool_(p), taskSetLoadFactor_(stealingLoadMultiplier * p.numThreads()) { 42 | #if defined DISPENSO_DEBUG 43 | assert(stealingLoadMultiplier > 0); 44 | pool_.outstandingTaskSets_.fetch_add(1, std::memory_order_acquire); 45 | #endif 46 | 47 | parent_ = (registerForParentCancel == ParentCascadeCancel::kOn) ? parentTaskSet() : nullptr; 48 | 49 | if (parent_) { 50 | parent_->registerChild(this); 51 | if (parent_->canceled()) { 52 | canceled_.store(true, std::memory_order_release); 53 | } 54 | } 55 | } 56 | 57 | TaskSetBase(TaskSetBase&& other) = delete; 58 | TaskSetBase& operator=(TaskSetBase&& other) = delete; 59 | 60 | ssize_t numPoolThreads() const { 61 | return pool_.numThreads(); 62 | } 63 | 64 | ThreadPool& pool() { 65 | return pool_; 66 | } 67 | 68 | void cancel() { 69 | canceled_.store(true, std::memory_order_release); 70 | cancelChildren(); 71 | } 72 | 73 | bool canceled() const { 74 | return canceled_.load(std::memory_order_acquire); 75 | } 76 | 77 | ~TaskSetBase() { 78 | #if defined DISPENSO_DEBUG 79 | pool_.outstandingTaskSets_.fetch_sub(1, std::memory_order_release); 80 | #endif 81 | 82 | if (parent_) { 83 | parent_->unregisterChild(this); 84 | } 85 | } 86 | 87 | protected: 88 | template 89 | auto packageTask(F&& f) { 90 | outstandingTaskCount_.fetch_add(1, std::memory_order_acquire); 91 | return [this, f = std::move(f)]() mutable { 92 | detail::pushThreadTaskSet(this); 93 | if (!canceled_.load(std::memory_order_acquire)) { 94 | #if defined(__cpp_exceptions) 95 | try { 96 | f(); 97 | } catch (...) { 98 | trySetCurrentException(); 99 | } 100 | #else 101 | f(); 102 | #endif // __cpp_exceptions 103 | } 104 | detail::popThreadTaskSet(); 105 | outstandingTaskCount_.fetch_sub(1, std::memory_order_release); 106 | }; 107 | } 108 | 109 | DISPENSO_DLL_ACCESS void trySetCurrentException(); 110 | bool testAndResetException(); 111 | 112 | void registerChild(TaskSetBase* child) { 113 | std::lock_guard lk(mtx_); 114 | 115 | child->prev_ = tail_; 116 | child->next_ = nullptr; 117 | if (tail_) { 118 | tail_->next_ = child; 119 | tail_ = child; 120 | } else { 121 | head_ = tail_ = child; 122 | } 123 | } 124 | 125 | void unregisterChild(TaskSetBase* child) { 126 | std::lock_guard lk(mtx_); 127 | 128 | if (child->prev_) { 129 | child->prev_->next_ = child->next_; 130 | } else { 131 | // We're head 132 | assert(child == head_); 133 | head_ = child->next_; 134 | } 135 | if (child->next_) { 136 | child->next_->prev_ = child->prev_; 137 | } else { 138 | // We're tail 139 | assert(child == tail_); 140 | tail_ = child->prev_; 141 | } 142 | } 143 | 144 | void cancelChildren() { 145 | std::lock_guard lk(mtx_); 146 | 147 | auto* node = head_; 148 | while (node) { 149 | node->cancel(); 150 | node = node->next_; 151 | } 152 | } 153 | 154 | alignas(kCacheLineSize) std::atomic outstandingTaskCount_{0}; 155 | alignas(kCacheLineSize) ThreadPool& pool_; 156 | alignas(kCacheLineSize) std::atomic canceled_{false}; 157 | const ssize_t taskSetLoadFactor_; 158 | #if defined(__cpp_exceptions) 159 | enum ExceptionState { kUnset, kSetting, kSet }; 160 | std::atomic guardException_{kUnset}; 161 | std::exception_ptr exception_; 162 | #endif // __cpp_exceptions 163 | 164 | TaskSetBase* parent_; 165 | 166 | // This mutex guards modifications/use of the intusive linked list between head_ and tail_ 167 | std::mutex mtx_; 168 | TaskSetBase* head_{nullptr}; 169 | TaskSetBase* tail_{nullptr}; 170 | 171 | // prev_ and next_ are links in our *parent's* intrusive linked list. 172 | TaskSetBase* prev_; 173 | TaskSetBase* next_; 174 | }; 175 | 176 | } // namespace dispenso 177 | -------------------------------------------------------------------------------- /dispenso/timing.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #if defined(_MSC_VER) 14 | #include 15 | #endif // _MSC_VER 16 | 17 | #if defined(_WIN32) 18 | #include 19 | #endif // _WIN32 20 | 21 | #if defined(__MACH__) 22 | #include 23 | #include 24 | #endif // __MACH__ 25 | 26 | namespace dispenso { 27 | namespace { 28 | #if defined(__x86_64__) || defined(_M_AMD64) 29 | #define DISPENSO_HAS_TIMESTAMP 30 | #if defined(_MSC_VER) 31 | inline uint64_t rdtscp() { 32 | uint32_t ui; 33 | return __rdtscp(&ui); 34 | } 35 | 36 | #else 37 | inline uint64_t rdtscp() { 38 | uint32_t lo, hi; 39 | __asm__ volatile("rdtscp" 40 | : /* outputs */ "=a"(lo), "=d"(hi) 41 | : /* no inputs */ 42 | : /* clobbers */ "%rcx"); 43 | return (uint64_t)lo | (((uint64_t)hi) << 32); 44 | } 45 | #endif // OS 46 | #elif (defined(__GNUC__) || defined(__clang__)) && defined(__aarch64__) 47 | #define DISPENSO_HAS_TIMESTAMP 48 | uint64_t rdtscp(void) { 49 | uint64_t val; 50 | __asm__ volatile("mrs %0, cntvct_el0" : "=r"(val)); 51 | return val; 52 | } 53 | #endif // ARCH 54 | } // namespace 55 | 56 | #if defined(DISPENSO_HAS_TIMESTAMP) 57 | 58 | #if !defined(__aarch64__) 59 | 60 | static bool snapFreq(double& firstApprox) { 61 | switch (static_cast(firstApprox)) { 62 | case 0: 63 | if (std::abs(int(firstApprox * 10.0)) <= 1) { 64 | firstApprox = 0.0; 65 | return true; 66 | } 67 | break; 68 | case 9: 69 | if (std::abs(int(firstApprox * 10.0) - 99) <= 1) { 70 | firstApprox = 10.0; 71 | 72 | return true; 73 | } 74 | break; 75 | case 3: 76 | if (std::abs(int(firstApprox * 10.0) - 33) <= 1) { 77 | firstApprox = 3.0 + 1.0 / 3.0; 78 | return true; 79 | } 80 | break; 81 | case 6: 82 | if (std::abs(int(firstApprox * 10.0) - 66) <= 1) { 83 | firstApprox = 6.0 + 2.0 / 3.0; 84 | return true; 85 | } 86 | break; 87 | } 88 | return false; 89 | } 90 | 91 | static double fallbackTicksPerSecond() { 92 | using namespace std::chrono_literals; 93 | constexpr double kChronoOverheadBias = 250e-9; 94 | 95 | auto baseStart = std::chrono::high_resolution_clock::now(); 96 | auto start = rdtscp(); 97 | std::this_thread::sleep_for(50ms); 98 | auto end = rdtscp(); 99 | auto baseEnd = std::chrono::high_resolution_clock::now(); 100 | 101 | auto base = std::chrono::duration(baseEnd - baseStart).count() - kChronoOverheadBias; 102 | double firstApprox = (static_cast(end - start)) / base; 103 | 104 | // Try to refine the approximation. In some circumstances we can "snap" the frequency to a very 105 | // good guess that is off by less than one part in thousands. Accuracy should already be quite 106 | // good in any case, but this allows us to improve in some cases. 107 | 108 | // Get first 3 digits 109 | firstApprox *= 1e-7; 110 | 111 | int firstInt = static_cast(firstApprox); 112 | firstApprox -= firstInt; 113 | 114 | firstApprox *= 10.0; 115 | 116 | if (!snapFreq(firstApprox)) { 117 | int secondInt = static_cast(firstApprox); 118 | firstApprox -= secondInt; 119 | firstApprox *= 10.0; 120 | snapFreq(firstApprox); 121 | firstApprox *= 0.1; 122 | firstApprox += secondInt; 123 | } 124 | 125 | firstApprox *= 0.1; 126 | 127 | firstApprox += firstInt; 128 | firstApprox *= 1e7; 129 | return firstApprox; 130 | } 131 | #endif // !__aarch64__ 132 | 133 | #if defined(__aarch64__) 134 | static double ticksPerSecond() { 135 | uint64_t val; 136 | __asm__ volatile("mrs %0, cntfrq_el0" : "=r"(val)); 137 | return static_cast(val); 138 | } 139 | #elif defined(__MACH__) 140 | static double ticksPerSecond() { 141 | mach_timebase_info_data_t info; 142 | if (mach_timebase_info(&info) != KERN_SUCCESS) { 143 | return fallbackTicksPerSecond(); 144 | } 145 | return 1e9 * static_cast(info.denom) / static_cast(info.numer); 146 | } 147 | #else 148 | double ticksPerSecond() { 149 | return fallbackTicksPerSecond(); 150 | } 151 | #endif 152 | 153 | double getTime() { 154 | static double secondsPerTick = 1.0 / ticksPerSecond(); 155 | static double startTime = static_cast(rdtscp()) * secondsPerTick; 156 | 157 | double t = static_cast(rdtscp()) * secondsPerTick; 158 | return t - startTime; 159 | } 160 | #else 161 | double getTime() { 162 | static auto startTime = std::chrono::high_resolution_clock::now(); 163 | auto cur = std::chrono::high_resolution_clock::now(); 164 | 165 | return std::chrono::duration(cur - startTime).count(); 166 | } 167 | #endif // DISPENSO_HAS_TIMESTAMP 168 | 169 | namespace { 170 | // This should ensure that we initialize the time before main. 171 | double g_dummyTime = getTime(); 172 | } // namespace 173 | 174 | } // namespace dispenso 175 | -------------------------------------------------------------------------------- /dispenso/detail/concurrent_vector_impl2.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | // This file intended for textual inclusion into concurrent_vector.h only 9 | 10 | namespace cv { 11 | 12 | template 13 | DISPENSO_INLINE ConVecIterBase::ConVecIterBase(const VecT* vec, cv::BucketInfo info) 14 | : vb_(reinterpret_cast(vec) | info.bucket), 15 | bucketStart_(vec->buffers_[info.bucket].load(std::memory_order_relaxed)), 16 | bucketPtr_(bucketStart_ + info.bucketIndex), 17 | bucketEnd_(bucketStart_ + info.bucketCapacity) {} 18 | 19 | template 20 | DISPENSO_INLINE ConcurrentVectorIterator& 21 | ConcurrentVectorIterator::operator++() { 22 | ++bucketPtr_; 23 | if (bucketPtr_ == bucketEnd_) { 24 | auto len = bucketEnd_ - bucketStart_; 25 | ++vb_; 26 | auto vb = getVecAndBucket(); 27 | len <<= int{vb.bucket > 1}; 28 | bucketPtr_ = bucketStart_ = vb.vec->buffers_[vb.bucket].load(std::memory_order_relaxed); 29 | bucketEnd_ = bucketPtr_ + len; 30 | } 31 | return *this; 32 | } 33 | 34 | template 35 | DISPENSO_INLINE ConcurrentVectorIterator& 36 | ConcurrentVectorIterator::operator--() { 37 | --bucketPtr_; 38 | if (bucketPtr_ < bucketStart_) { 39 | auto vb = getVecAndBucket(); 40 | if (vb.bucket) { 41 | auto len = bucketEnd_ - bucketStart_; 42 | --vb_; 43 | len >>= int{vb.bucket > 1}; 44 | bucketStart_ = vb.vec->buffers_[vb.bucket - 1].load(std::memory_order_relaxed); 45 | bucketPtr_ = bucketStart_ + len; 46 | bucketEnd_ = bucketPtr_; 47 | --bucketPtr_; 48 | } 49 | } 50 | return *this; 51 | } 52 | 53 | template 54 | DISPENSO_INLINE typename ConcurrentVectorIterator::reference 55 | ConcurrentVectorIterator::operator*() const { 56 | return *bucketPtr_; 57 | } 58 | template 59 | DISPENSO_INLINE typename ConcurrentVectorIterator::pointer 60 | ConcurrentVectorIterator::operator->() const { 61 | return &operator*(); 62 | } 63 | 64 | template 65 | DISPENSO_INLINE typename ConcurrentVectorIterator::reference 66 | ConcurrentVectorIterator::operator[](difference_type n) const { 67 | T* nPtr = bucketPtr_ + n; 68 | if (nPtr >= bucketStart_ && nPtr < bucketEnd_) { 69 | return *nPtr; 70 | } 71 | 72 | auto vb = getVecAndBucket(); 73 | 74 | // Reconstruct index 75 | ssize_t oldIndex = bucketPtr_ - bucketStart_; 76 | oldIndex += (bool)vb.bucket * (bucketEnd_ - bucketStart_); 77 | auto binfo = vb.vec->bucketAndSubIndexForIndex(oldIndex + n); 78 | return *(vb.vec->buffers_[binfo.bucket].load(std::memory_order_relaxed) + binfo.bucketIndex); 79 | } 80 | 81 | template 82 | DISPENSO_INLINE ConcurrentVectorIterator& 83 | ConcurrentVectorIterator::operator+=(difference_type n) { 84 | T* nPtr = bucketPtr_ + n; 85 | if (nPtr >= bucketStart_ && nPtr < bucketEnd_) { 86 | bucketPtr_ = nPtr; 87 | return *this; 88 | } 89 | 90 | auto vb = getVecAndBucket(); 91 | 92 | // Reconstruct index 93 | ssize_t oldIndex = bucketPtr_ - bucketStart_; 94 | oldIndex += (bool)vb.bucket * (bucketEnd_ - bucketStart_); 95 | auto binfo = vb.vec->bucketAndSubIndexForIndex(oldIndex + n); 96 | bucketStart_ = vb.vec->buffers_[binfo.bucket].load(std::memory_order_relaxed); 97 | bucketEnd_ = bucketStart_ + binfo.bucketCapacity; 98 | bucketPtr_ = bucketStart_ + binfo.bucketIndex; 99 | vb_ = reinterpret_cast(vb.vec) | binfo.bucket; 100 | return *this; 101 | } 102 | 103 | template 104 | DISPENSO_INLINE ConcurrentVectorIterator 105 | ConcurrentVectorIterator::operator+(difference_type n) const { 106 | T* nPtr = bucketPtr_ + n; 107 | if (nPtr >= bucketStart_ && nPtr < bucketEnd_) { 108 | return {vb_, bucketStart_, nPtr, bucketEnd_}; 109 | } 110 | 111 | auto vb = getVecAndBucket(); 112 | // Reconstruct index 113 | ssize_t oldIndex = bucketPtr_ - bucketStart_; 114 | oldIndex += (bool)vb.bucket * (bucketEnd_ - bucketStart_); 115 | auto binfo = vb.vec->bucketAndSubIndexForIndex(oldIndex + n); 116 | return {vb.vec, binfo}; 117 | } 118 | 119 | template 120 | DISPENSO_INLINE typename CompactCVecIterator::reference 121 | CompactCVecIterator::operator*() const { 122 | return const_cast(*vec_)[index_]; 123 | } 124 | 125 | template 126 | DISPENSO_INLINE typename CompactCVecIterator::pointer 127 | CompactCVecIterator::operator->() const { 128 | return &operator*(); 129 | } 130 | 131 | template 132 | DISPENSO_INLINE typename CompactCVecIterator::reference 133 | CompactCVecIterator::operator[](ssize_t n) const { 134 | return const_cast(*vec_)[index_ + n]; 135 | } 136 | } // namespace cv 137 | -------------------------------------------------------------------------------- /dispenso/task_set.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include "task_set.h" 9 | 10 | #include 11 | 12 | namespace dispenso { 13 | 14 | namespace detail { 15 | // 64 depth is pretty ridiculous, but try not to step on anyone's feet. 16 | constexpr int32_t kMaxTasksStackSize = 64; 17 | 18 | DISPENSO_THREAD_LOCAL TaskSetBase* g_taskStack[kMaxTasksStackSize]; 19 | DISPENSO_THREAD_LOCAL int32_t g_taskStackSize = 0; 20 | 21 | void pushThreadTaskSet(TaskSetBase* t) { 22 | #ifndef NDEBUG 23 | if (g_taskStackSize < 0 || g_taskStackSize >= kMaxTasksStackSize) { 24 | fprintf(stderr, "TaskSet parent stack index is invalid when pushing: %d\n", g_taskStackSize); 25 | std::abort(); 26 | } 27 | #endif // NDEBUG 28 | g_taskStack[g_taskStackSize++] = t; 29 | } 30 | void popThreadTaskSet() { 31 | #ifndef NDEBUG 32 | if (g_taskStackSize <= 0) { 33 | fprintf(stderr, "TaskSet parent stack index is invalid when popping: %d\n", g_taskStackSize); 34 | std::abort(); 35 | } 36 | #endif // NDEBUG 37 | --g_taskStackSize; 38 | } 39 | } // namespace detail 40 | 41 | TaskSetBase* parentTaskSet() { 42 | using namespace detail; 43 | 44 | #ifndef NDEBUG 45 | if (g_taskStackSize < 0 || g_taskStackSize >= kMaxTasksStackSize) { 46 | fprintf(stderr, "TaskSet parent stack index is invalid when accessing: %d\n", g_taskStackSize); 47 | std::abort(); 48 | } 49 | #endif // NDEBUG 50 | 51 | return g_taskStackSize ? g_taskStack[g_taskStackSize - 1] : nullptr; 52 | } 53 | 54 | void TaskSetBase::trySetCurrentException() { 55 | #if defined(__cpp_exceptions) 56 | auto status = kUnset; 57 | if (guardException_.compare_exchange_strong(status, kSetting, std::memory_order_acq_rel)) { 58 | exception_ = std::current_exception(); 59 | guardException_.store(kSet, std::memory_order_release); 60 | canceled_.store(true, std::memory_order_release); 61 | } 62 | #endif // __cpp_exceptions 63 | } 64 | 65 | inline bool TaskSetBase::testAndResetException() { 66 | #if defined(__cpp_exceptions) 67 | if (guardException_.load(std::memory_order_acquire) == kSet) { 68 | auto exception = std::move(exception_); 69 | guardException_.store(kUnset, std::memory_order_release); 70 | std::rethrow_exception(exception); 71 | } 72 | #endif // __cpp_exceptions 73 | return canceled_.load(std::memory_order_acquire); 74 | } 75 | 76 | bool ConcurrentTaskSet::wait() { 77 | // Steal work until our set is unblocked. Note that this is not the 78 | // fastest possible way to unblock the current set, but it will alleviate 79 | // deadlock, and should provide decent throughput for all waiters. 80 | 81 | // The deadlock scenario mentioned goes as follows: N threads in the 82 | // ThreadPool. Each thread is running code that is using TaskSets. No 83 | // progress could be made without stealing. 84 | while (outstandingTaskCount_.load(std::memory_order_acquire)) { 85 | if (!pool_.tryExecuteNext()) { 86 | std::this_thread::yield(); 87 | } 88 | } 89 | 90 | return testAndResetException(); 91 | } 92 | 93 | bool ConcurrentTaskSet::tryWait(size_t maxToExecute) { 94 | while (outstandingTaskCount_.load(std::memory_order_acquire) && maxToExecute--) { 95 | if (!pool_.tryExecuteNext()) { 96 | break; 97 | } 98 | } 99 | 100 | // Must check completion prior to checking exceptions, otherwise there could be a case where 101 | // exceptions are checked, then an exception is propagated, and then we return whether all items 102 | // have been completed, thus dropping the exception. 103 | if (outstandingTaskCount_.load(std::memory_order_acquire)) { 104 | return false; 105 | } 106 | 107 | return !testAndResetException(); 108 | } 109 | 110 | moodycamel::ProducerToken TaskSet::makeToken(moodycamel::ConcurrentQueue& pool) { 111 | return moodycamel::ProducerToken(pool); 112 | } 113 | 114 | bool TaskSet::wait() { 115 | // Steal work until our set is unblocked. 116 | // The deadlock scenario mentioned goes as follows: N threads in the 117 | // ThreadPool. Each thread is running code that is using TaskSets. No 118 | // progress could be made without stealing. 119 | while (pool_.tryExecuteNextFromProducerToken(token_)) { 120 | } 121 | 122 | while (outstandingTaskCount_.load(std::memory_order_acquire)) { 123 | if (!pool_.tryExecuteNext()) { 124 | std::this_thread::yield(); 125 | } 126 | } 127 | 128 | return testAndResetException(); 129 | } 130 | 131 | bool TaskSet::tryWait(size_t maxToExecute) { 132 | ssize_t maxToExe = static_cast(maxToExecute); 133 | while (outstandingTaskCount_.load(std::memory_order_acquire) && maxToExe--) { 134 | if (!pool_.tryExecuteNextFromProducerToken(token_)) { 135 | break; 136 | } 137 | } 138 | 139 | // Must check completion prior to checking exceptions, otherwise there could be a case where 140 | // exceptions are checked, then an exception is propagated, and then we return whether all items 141 | // have been completed, thus dropping the exception. 142 | 143 | maxToExe = std::max(0, maxToExe); 144 | 145 | while (outstandingTaskCount_.load(std::memory_order_acquire) && maxToExe--) { 146 | if (!pool_.tryExecuteNext()) { 147 | std::this_thread::yield(); 148 | } 149 | } 150 | 151 | if (outstandingTaskCount_.load(std::memory_order_acquire)) { 152 | return false; 153 | } 154 | 155 | return !testAndResetException(); 156 | } 157 | 158 | } // namespace dispenso 159 | -------------------------------------------------------------------------------- /benchmarks/for_latency_benchmark.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #if defined(_OPENMP) 13 | #include 14 | #endif 15 | 16 | #include 17 | #include 18 | 19 | #if !defined(BENCHMARK_WITHOUT_TBB) 20 | #include "tbb/blocked_range.h" 21 | #include "tbb/parallel_for.h" 22 | #include "tbb/task_scheduler_init.h" 23 | #endif // !BENCHMARK_WITHOUT_TBB 24 | 25 | #include "thread_benchmark_common.h" 26 | 27 | namespace { 28 | 29 | using namespace std::chrono_literals; 30 | 31 | uint32_t kSeed(8); 32 | constexpr int kSize = 50000; 33 | constexpr auto kSleep = 30ms; 34 | } // namespace 35 | 36 | // Adapted from Google gtest examples 37 | // Returns true iff n is a prime number. 38 | bool isPrime(int n) { 39 | // Trivial case 1: small numbers 40 | if (n <= 1) 41 | return false; 42 | 43 | // Trivial case 2: even numbers 44 | if (n % 2 == 0) 45 | return n == 2; 46 | 47 | // Now, we have that n is odd and n >= 3. 48 | 49 | // Try to divide n by every odd number i, starting from 3 50 | for (int i = 3;; i += 2) { 51 | // We only have to try i up to the squre root of n 52 | if (i > n / i) 53 | break; 54 | 55 | // Now, we have i <= n/i < n. 56 | // If n is divisible by i, n is not prime. 57 | if (n % i == 0) 58 | return false; 59 | } 60 | 61 | // n has no integer factor in the range (1, n), and thus is prime. 62 | return true; 63 | } 64 | 65 | const std::vector& getInputs(int numElements) { 66 | static std::unordered_map> vecs; 67 | auto it = vecs.find(numElements); 68 | if (it != vecs.end()) { 69 | return it->second; 70 | } 71 | 72 | std::mt19937_64 gen64(kSeed); 73 | std::uniform_int_distribution<> distribution(100000, 1000000); 74 | std::vector values; 75 | values.reserve(numElements); 76 | for (int i = 0; i < numElements; ++i) { 77 | values.push_back(distribution(gen64)); 78 | } 79 | auto res = vecs.emplace(numElements, std::move(values)); 80 | assert(res.second); 81 | return res.first->second; 82 | } 83 | 84 | void BM_serial(benchmark::State& state) { 85 | std::vector output(kSize, 0); 86 | auto& input = getInputs(kSize); 87 | 88 | std::vector times; 89 | times.reserve(1000); 90 | 91 | for (auto UNUSED_VAR : state) { 92 | std::this_thread::sleep_for(kSleep); 93 | times.push_back(dispenso::getTime()); 94 | for (size_t i = 0; i < kSize; ++i) { 95 | output[i] = isPrime(input[i]); 96 | } 97 | times.back() = dispenso::getTime() - times.back(); 98 | } 99 | 100 | doStats(times, state); 101 | } 102 | 103 | void BM_dispenso(benchmark::State& state) { 104 | const int numThreads = state.range(0) - 1; 105 | 106 | std::vector output(kSize, 0); 107 | dispenso::resizeGlobalThreadPool(numThreads); 108 | 109 | std::vector times; 110 | times.reserve(1000); 111 | 112 | auto& input = getInputs(kSize); 113 | for (auto UNUSED_VAR : state) { 114 | std::this_thread::sleep_for(kSleep); 115 | times.push_back(dispenso::getTime()); 116 | dispenso::parallel_for( 117 | dispenso::makeChunkedRange(0, kSize), [&input, &output](size_t i, size_t e) { 118 | for (; i != e; ++i) { 119 | output[i] = isPrime(input[i]); 120 | } 121 | }); 122 | times.back() = dispenso::getTime() - times.back(); 123 | } 124 | 125 | doStats(times, state); 126 | } 127 | 128 | #if defined(_OPENMP) 129 | void BM_omp(benchmark::State& state) { 130 | const int numThreads = state.range(0); 131 | 132 | std::vector output(kSize, 0); 133 | omp_set_num_threads(numThreads); 134 | 135 | std::vector times; 136 | times.reserve(1000); 137 | 138 | auto& input = getInputs(kSize); 139 | for (auto UNUSED_VAR : state) { 140 | std::this_thread::sleep_for(kSleep); 141 | times.push_back(dispenso::getTime()); 142 | #pragma omp parallel for 143 | for (int i = 0; i < kSize; ++i) { 144 | output[i] = isPrime(input[i]); 145 | } 146 | times.back() = dispenso::getTime() - times.back(); 147 | } 148 | doStats(times, state); 149 | } 150 | #endif /*defined(_OPENMP)*/ 151 | 152 | #if !defined(BENCHMARK_WITHOUT_TBB) 153 | void BM_tbb(benchmark::State& state) { 154 | const int numThreads = state.range(0); 155 | 156 | std::vector output(kSize, 0); 157 | 158 | tbb::task_scheduler_init initsched(numThreads); 159 | 160 | std::vector times; 161 | times.reserve(1000); 162 | 163 | auto& input = getInputs(kSize); 164 | for (auto UNUSED_VAR : state) { 165 | std::this_thread::sleep_for(kSleep); 166 | times.push_back(dispenso::getTime()); 167 | tbb::parallel_for( 168 | tbb::blocked_range(0, kSize), 169 | [&input, &output](const tbb::blocked_range& r) { 170 | for (size_t i = r.begin(); i < r.end(); ++i) { 171 | output[i] = isPrime(input[i]); 172 | } 173 | }); 174 | times.back() = dispenso::getTime() - times.back(); 175 | } 176 | doStats(times, state); 177 | } 178 | #endif // !BENCHMARK_WITHOUT_TBB 179 | 180 | static void CustomArguments(benchmark::internal::Benchmark* b) { 181 | for (int i : pow2HalfStepThreads()) { 182 | b->Arg(i); 183 | } 184 | } 185 | 186 | BENCHMARK(BM_serial)->UseRealTime(); 187 | 188 | #if defined(_OPENMP) 189 | BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime(); 190 | #endif // OPENMP 191 | #if !defined(BENCHMARK_WITHOUT_TBB) 192 | BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime(); 193 | #endif // !BENCHMARK_WITHOUT_TBB 194 | 195 | BENCHMARK(BM_dispenso)->Apply(CustomArguments)->UseRealTime(); 196 | 197 | BENCHMARK_MAIN(); 198 | -------------------------------------------------------------------------------- /dispenso/pipeline.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /** 9 | * @file pipeline.h 10 | * A file providing utilities for parallel pipelining of work. 11 | **/ 12 | 13 | #pragma once 14 | 15 | #include 16 | 17 | #include 18 | 19 | namespace dispenso { 20 | 21 | /** 22 | * OpResult is like a poor-man's std::optional for those who wish to use dispenso pipeline filtering 23 | * in C++14. In C++17 and beyond, it is recommended to use std::optional instead. OpResult has 24 | * implicit construct from T, just like std::optional, and move/copy constructors and operators, 25 | * bool conversion, and value() function, but otherwise provides less functionality than 26 | * std::optional. 27 | **/ 28 | template 29 | using OpResult = detail::OpResult; 30 | 31 | /** 32 | * A simple constant representing maximum parallelism for a stage. This number has no particular 33 | * significance, and is simply here for convenience. 34 | **/ 35 | constexpr ssize_t kStageNoLimit = std::numeric_limits::max(); 36 | 37 | /** 38 | * Create a stage for use in the pipeline function. 39 | * 40 | * @param f A function-like object that can accept the result of the previous stage (if any), and 41 | * which produces the output for the next stage (if any). 42 | * @param limit How many threads may concurrently run work for this stage. Values larger than the 43 | * number of threads in the associated thread pool of the used ConcurrentTaskSet will be capped to 44 | * the size of the pool. 45 | * @return A stage object suitable for pipelining. 46 | **/ 47 | template 48 | auto stage(F&& f, ssize_t limit) { 49 | return detail::Stage(std::forward(f), limit); 50 | } 51 | 52 | /** 53 | * Pipeline work in stages. Pipelines allow stages to specify parallelism limits by using the 54 | * stage function, or a function-like object can simply be passed directly, indicating 55 | * a serial stage. Even if stages are serial, there can be parallelism between stages, so in a 3 56 | * stage serial pipeline, the expected runtime is the max of the 3 stages runtimes (note that this 57 | * is in the absence of pipeline overheads and with an infinitely long workstream. In practice 58 | * speedup is somewhat less). This function will block until the entire pipeline has completed. 59 | * 60 | * @param pool The ThreadPool to run the work in. This inherently determines the upper bound for 61 | * parallelism of the pipeline. 62 | * @param sIn The stages to run. The first stage must be a Generator stage, the last must be a Sink 63 | * stage, and intermediate stages are Transform stages. 64 | * - If there is only one stage, it takes no 65 | * arguments, but returns a bool indicating completion (false means the pipeline is complete). 66 | * - Otherwise, the Generator stage takes no arguments and must return an OpResult or std::optional 67 | * value, and an invalid/nullopt result indicates that the Generator is done (no more values 68 | * forthcoming). 69 | * - Transform stages should accept the output of the prior stage (or output.value() in the case of 70 | * OpResult or std::optional), and should return either a value or an OpResult or std::optional 71 | * value if the Transform is capable of filtering results. Invalid/nullopt OpResult or std::optional 72 | * values indicate that the value should be filtered, and not passed on to the next stage. 73 | * - The Sink stage should accept the output of the prior stage, just as a Transform stage does, but 74 | * does not return any value (or at least the pipeline will ignore it). 75 | **/ 76 | template 77 | void pipeline(ThreadPool& pool, Stages&&... sIn) { 78 | ConcurrentTaskSet tasks(pool); 79 | auto pipes = detail::makePipes(tasks, std::forward(sIn)...); 80 | pipes.execute(); 81 | pipes.wait(); 82 | } 83 | 84 | /** 85 | * Pipeline work in stages. Pipelines allow stages to specify parallelism limits by using the 86 | * stage function, or a function-like object can simply be passed directly, indicating 87 | * a serial stage. Even if stages are serial, there can be parallelism between stages, so in a 3 88 | * stage serial pipeline, the expected runtime is the max of the 3 stages runtimes (note that this 89 | * is in the absence of pipeline overheads and with an infinitely long workstream. In practice 90 | * speedup is somewhat less). Work will be run on dispenso's global thread pool. This function will 91 | * block until the entire pipeline has completed. 92 | * 93 | * @param sIn The stages to run. The first stage must be a Generator stage, the last must be a Sink 94 | * stage, and intermediate stages are Transform stages. 95 | * - If there is only one stage, it takes no 96 | * arguments, but returns a bool indicating completion (false means the pipeline is complete). 97 | * - Otherwise, the Generator stage takes no arguments and must return an OpResult or std::optional 98 | * value, and an invalid/nullopt result indicates that the Generator is done (no more values 99 | * forthcoming). 100 | * - Transform stages should accept the output of the prior stage (or output.value() in the case of 101 | * OpResult or std::optional), and should return either a value or an OpResult or std::optional 102 | * value if the Transform is capable of filtering results. Invalid/nullopt OpResult or std::optional 103 | * values indicate that the value should be filtered, and not passed on to the next stage. 104 | * - The Sink stage should accept the output of the prior stage, just as a Transform stage does, but 105 | * does not return any value (or at least the pipeline will ignore it). 106 | **/ 107 | template 108 | void pipeline(Stages&&... sIn) { 109 | pipeline(globalThreadPool(), std::forward(sIn)...); 110 | } 111 | 112 | } // namespace dispenso 113 | -------------------------------------------------------------------------------- /dispenso/priority.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #if (defined(__unix__) || defined(unix)) && !defined(USG) 11 | #include 12 | #endif 13 | 14 | #if defined(__linux__) 15 | #include 16 | #include 17 | #include 18 | #elif defined(__MACH__) 19 | #include 20 | #include 21 | #include 22 | #elif defined(_WIN32) 23 | #include 24 | #elif defined(BSD) 25 | #include 26 | #include 27 | #endif 28 | 29 | namespace dispenso { 30 | 31 | namespace { 32 | DISPENSO_THREAD_LOCAL ThreadPriority g_threadPriority = ThreadPriority::kNormal; 33 | } // namespace 34 | 35 | ThreadPriority getCurrentThreadPriority() { 36 | return g_threadPriority; 37 | } 38 | 39 | #ifdef __MACH__ 40 | bool setCurrentThreadPriority(ThreadPriority prio) { 41 | mach_port_t threadport = pthread_mach_thread_np(pthread_self()); 42 | if (prio == ThreadPriority::kRealtime) { 43 | mach_timebase_info_data_t info; 44 | mach_timebase_info(&info); 45 | double msToAbsTime = ((double)info.denom / (double)info.numer) * 1000000.0; 46 | thread_time_constraint_policy_data_t time_constraints; 47 | time_constraints.period = 0; 48 | time_constraints.computation = static_cast(1.0 * msToAbsTime); 49 | time_constraints.constraint = static_cast(10.0 * msToAbsTime); 50 | time_constraints.preemptible = 0; 51 | 52 | if (thread_policy_set( 53 | threadport, 54 | THREAD_TIME_CONSTRAINT_POLICY, 55 | (thread_policy_t)&time_constraints, 56 | THREAD_TIME_CONSTRAINT_POLICY_COUNT) != KERN_SUCCESS) { 57 | return false; 58 | } 59 | } 60 | 61 | // https://fergofrog.com/code/cbowser/xnu/osfmk/kern/sched.h.html#_M/MAXPRI_USER 62 | struct thread_precedence_policy ttcpolicy; 63 | 64 | switch (prio) { 65 | case ThreadPriority::kLow: 66 | ttcpolicy.importance = 20; 67 | break; 68 | case ThreadPriority::kNormal: 69 | ttcpolicy.importance = 37; 70 | break; 71 | case ThreadPriority::kHigh: // fallthrough 72 | case ThreadPriority::kRealtime: 73 | ttcpolicy.importance = 63; 74 | break; 75 | } 76 | 77 | if (thread_policy_set( 78 | threadport, 79 | THREAD_PRECEDENCE_POLICY, 80 | (thread_policy_t)&ttcpolicy, 81 | THREAD_PRECEDENCE_POLICY_COUNT) != KERN_SUCCESS) { 82 | return false; 83 | } 84 | 85 | g_threadPriority = prio; 86 | return true; 87 | } 88 | #elif defined(_WIN32) 89 | // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-setthreadpriority 90 | bool setCurrentThreadPriority(ThreadPriority prio) { 91 | if (prio == ThreadPriority::kRealtime) { 92 | if (!SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS)) { 93 | return false; 94 | } 95 | } 96 | 97 | if (prio == ThreadPriority::kHigh) { 98 | // Best effort 99 | SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS); 100 | } 101 | 102 | bool success = false; 103 | switch (prio) { 104 | case ThreadPriority::kLow: 105 | success = SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_LOWEST); 106 | break; 107 | case ThreadPriority::kNormal: 108 | success = SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_NORMAL); 109 | break; 110 | case ThreadPriority::kHigh: // fallthrough 111 | case ThreadPriority::kRealtime: 112 | success = SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); 113 | break; 114 | } 115 | 116 | if (!success) { 117 | return false; 118 | } 119 | 120 | g_threadPriority = prio; 121 | return true; 122 | } 123 | #elif defined(__linux__) 124 | bool setCurrentThreadPriority(ThreadPriority prio) { 125 | if (prio == ThreadPriority::kRealtime) { 126 | struct sched_param param; 127 | param.sched_priority = 99; 128 | if (pthread_setschedparam(pthread_self(), SCHED_FIFO, ¶m)) { 129 | return false; 130 | } 131 | } 132 | 133 | switch (prio) { 134 | case ThreadPriority::kLow: 135 | errno = 0; 136 | (void)!nice(10); 137 | break; 138 | case ThreadPriority::kNormal: 139 | errno = 0; 140 | (void)!nice(0); 141 | break; 142 | case ThreadPriority::kHigh: // fallthrough 143 | case ThreadPriority::kRealtime: { 144 | struct rlimit rlim; 145 | getrlimit(RLIMIT_NICE, &rlim); 146 | if (rlim.rlim_max <= 20) { 147 | return false; 148 | } 149 | rlim.rlim_cur = rlim.rlim_max; 150 | setrlimit(RLIMIT_NICE, &rlim); 151 | errno = 0; 152 | (void)!nice(static_cast(20 - rlim.rlim_max)); 153 | } 154 | } 155 | if (errno != 0) { 156 | return false; 157 | } 158 | g_threadPriority = prio; 159 | return true; 160 | } 161 | #elif defined(__FreeBSD__) 162 | // TODO: Find someone who has a FreeBSD system to test this code. 163 | bool setCurrentThreadPriority(ThreadPriority prio) { 164 | struct rtprio rtp; 165 | 166 | if (prio == ThreadPriority::kRealtime) { 167 | rtp.type = RTP_PRIO_REALTIME; 168 | rtp.prio = 10; 169 | if (rtprio_thread(RTP_SET, 0, &rtp)) { 170 | return false; 171 | } 172 | } else { 173 | rtp.type = RTP_PRIO_NORMAL; 174 | switch (prio) { 175 | case ThreadPriority::kLow: 176 | rtp.prio = 31; 177 | break; 178 | case ThreadPriority::kNormal: 179 | rtp.prio = 15; 180 | break; 181 | case ThreadPriority::kHigh: // fallthrough 182 | case ThreadPriority::kRealtime: 183 | rtp.prio = 0; 184 | break; 185 | } 186 | if (rtprio_thread(RTP_SET, 0, &rtp)) { 187 | return false; 188 | } 189 | } 190 | g_threadPriority = prio; 191 | return true; 192 | } 193 | #else 194 | bool setCurrentThreadPriority(ThreadPriority prio) { 195 | return false; 196 | } 197 | 198 | #endif // platform 199 | 200 | } // namespace dispenso 201 | -------------------------------------------------------------------------------- /benchmarks/trivial_compute_benchmark.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #if defined(_OPENMP) 14 | #include 15 | #endif 16 | 17 | #if !defined(BENCHMARK_WITHOUT_TBB) 18 | #include "tbb/blocked_range.h" 19 | #include "tbb/parallel_reduce.h" 20 | #include "tbb/task_scheduler_init.h" 21 | #endif // !BENCHMARK_WITHOUT_TBB 22 | 23 | #include "thread_benchmark_common.h" 24 | 25 | static constexpr int kSmallSize = 100; 26 | static constexpr int kMediumSize = 1000000; 27 | static constexpr int kLargeSize = 100000000; 28 | 29 | uint32_t getInputs(int num_elements) { 30 | srand(num_elements); 31 | return rand() & 127; 32 | } 33 | 34 | inline uint64_t calculate(uint64_t input, uint64_t index, size_t foo) { 35 | return std::cos( 36 | std::log( 37 | std::sin(std::exp(std::sqrt(static_cast((input ^ index) - 3 * foo * input)))))); 38 | } 39 | 40 | void checkResults(uint32_t input, uint64_t actual, int foo, size_t num_elements) { 41 | if (!foo) 42 | return; 43 | if (input != getInputs(num_elements)) { 44 | std::cerr << "Failed to recover input!" << std::endl; 45 | abort(); 46 | } 47 | uint64_t expected = 0; 48 | for (size_t i = 0; i < num_elements; ++i) { 49 | expected += calculate(input, i, foo); 50 | } 51 | if (expected != actual) { 52 | std::cerr << "FAIL! " << expected << " vs " << actual << std::endl; 53 | abort(); 54 | } 55 | } 56 | 57 | template 58 | void BM_serial(benchmark::State& state) { 59 | auto input = getInputs(num_elements); 60 | uint64_t sum = 0; 61 | int foo = 0; 62 | for (auto UNUSED_VAR : state) { 63 | sum = 0; 64 | ++foo; 65 | for (size_t i = 0; i < num_elements; ++i) { 66 | sum += calculate(input, i, foo); 67 | } 68 | } 69 | checkResults(input, sum, foo, num_elements); 70 | } 71 | 72 | void BM_dispenso(benchmark::State& state) { 73 | const int num_threads = state.range(0) - 1; 74 | const int num_elements = state.range(1); 75 | 76 | dispenso::ThreadPool pool(num_threads); 77 | 78 | uint64_t sum = 0; 79 | int foo = 0; 80 | 81 | dispenso::ParForOptions options; 82 | options.minItemsPerChunk = 4000; 83 | 84 | auto input = getInputs(num_elements); 85 | for (auto UNUSED_VAR : state) { 86 | dispenso::TaskSet tasks(pool); 87 | 88 | std::vector sums; 89 | sums.reserve(num_threads + 1); 90 | ++foo; 91 | dispenso::parallel_for( 92 | tasks, 93 | sums, 94 | []() { return uint64_t{0}; }, 95 | dispenso::makeChunkedRange(0, num_elements, dispenso::ParForChunking::kStatic), 96 | [input, foo](uint64_t& lsumStore, size_t i, size_t end) { 97 | uint64_t lsum = 0; 98 | for (; i != end; ++i) { 99 | lsum += calculate(input, i, foo); 100 | } 101 | lsumStore += lsum; 102 | }, 103 | options); 104 | sum = 0; 105 | for (auto s : sums) { 106 | sum += s; 107 | } 108 | } 109 | 110 | checkResults(input, sum, foo, num_elements); 111 | } 112 | 113 | #if defined(_OPENMP) 114 | void BM_omp(benchmark::State& state) { 115 | const int num_threads = state.range(0); 116 | const int num_elements = state.range(1); 117 | 118 | omp_set_num_threads(num_threads); 119 | 120 | uint64_t sum = 0; 121 | 122 | int foo = 0; 123 | 124 | auto input = getInputs(num_elements); 125 | for (auto UNUSED_VAR : state) { 126 | sum = 0; 127 | ++foo; 128 | #pragma omp parallel for reduction(+ : sum) 129 | for (int i = 0; i < num_elements; ++i) { 130 | sum += calculate(input, i, foo); 131 | } 132 | } 133 | checkResults(input, sum, foo, num_elements); 134 | } 135 | #endif /* defined(_OPENMP)*/ 136 | 137 | #if !defined(BENCHMARK_WITHOUT_TBB) 138 | void BM_tbb(benchmark::State& state) { 139 | const int num_threads = state.range(0); 140 | const int num_elements = state.range(1); 141 | 142 | uint64_t sum = 0; 143 | 144 | int foo = 0; 145 | 146 | auto input = getInputs(num_elements); 147 | for (auto UNUSED_VAR : state) { 148 | tbb::task_scheduler_init initsched(num_threads); 149 | ++foo; 150 | sum = tbb::parallel_reduce( 151 | tbb::blocked_range(0, num_elements), 152 | uint64_t{0}, 153 | [input, foo](const tbb::blocked_range& r, uint64_t init) -> uint64_t { 154 | for (size_t a = r.begin(); a != r.end(); ++a) 155 | init += calculate(input, a, foo); 156 | return init; 157 | }, 158 | [](uint64_t x, uint64_t y) -> uint64_t { return x + y; }); 159 | } 160 | checkResults(input, sum, foo, num_elements); 161 | } 162 | #endif // !BENCHMARK_WITHOUT_TBB 163 | 164 | void BM_async(benchmark::State& state) { 165 | const int num_threads = state.range(0); 166 | const int num_elements = state.range(1); 167 | uint64_t sum = 0; 168 | int foo = 0; 169 | 170 | auto input = getInputs(num_elements); 171 | for (auto UNUSED_VAR : state) { 172 | std::vector sums; 173 | ++foo; 174 | 175 | size_t chunkSize = (num_elements + num_threads - 1) / num_threads; 176 | 177 | std::vector> futures; 178 | 179 | for (int i = 0; i < num_elements; i += chunkSize) { 180 | futures.push_back( 181 | std::async([input, foo, i, end = std::min(num_elements, i + chunkSize)]() mutable { 182 | uint64_t lsum = 0; 183 | for (; i != end; ++i) { 184 | lsum += calculate(input, i, foo); 185 | } 186 | return lsum; 187 | })); 188 | } 189 | sum = 0; 190 | for (auto& s : futures) { 191 | sum += s.get(); 192 | } 193 | } 194 | 195 | checkResults(input, sum, foo, num_elements); 196 | } 197 | 198 | static void CustomArguments(benchmark::internal::Benchmark* b) { 199 | for (int j : {kSmallSize, kMediumSize, kLargeSize}) { 200 | for (int i : pow2HalfStepThreads()) { 201 | b->Args({i, j}); 202 | } 203 | } 204 | } 205 | 206 | BENCHMARK_TEMPLATE(BM_serial, kSmallSize); 207 | BENCHMARK_TEMPLATE(BM_serial, kMediumSize); 208 | BENCHMARK_TEMPLATE(BM_serial, kLargeSize); 209 | 210 | #if defined(_OPENMP) 211 | BENCHMARK(BM_omp)->Apply(CustomArguments)->UseRealTime(); 212 | #endif // OPENMP 213 | #if !defined(BENCHMARK_WITHOUT_TBB) 214 | BENCHMARK(BM_tbb)->Apply(CustomArguments)->UseRealTime(); 215 | #endif // !BENCHMARK_WITHOUT_TBB 216 | BENCHMARK(BM_async)->Apply(CustomArguments)->UseRealTime(); 217 | BENCHMARK(BM_dispenso)->Apply(CustomArguments)->UseRealTime(); 218 | 219 | BENCHMARK_MAIN(); 220 | -------------------------------------------------------------------------------- /dispenso/graph.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) Meta Platforms, Inc. and affiliates. 3 | * 4 | * This source code is licensed under the MIT license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | namespace { 14 | constexpr size_t kToDelete = std::numeric_limits::max(); 15 | 16 | void set_union( 17 | std::vector& s1, 18 | const std::vector& s2) { 19 | std::vector tmp(s1); 20 | s1.clear(); 21 | std::set_union(tmp.cbegin(), tmp.cend(), s2.cbegin(), s2.cend(), std::back_inserter(s1)); 22 | } 23 | 24 | void set_insert(std::vector& s, const dispenso::BiPropNode* node) { 25 | auto it = std::upper_bound(s.begin(), s.end(), node); 26 | if (it == s.begin() || *(it - 1) != node) { 27 | s.insert(it, node); 28 | } 29 | } 30 | } // anonymous namespace 31 | 32 | namespace dispenso { 33 | 34 | void BiPropNode::biPropDependsOnOneNode(BiPropNode& node) { 35 | Node::dependsOnOneNode(node); 36 | if (node.biPropSet_ == nullptr && biPropSet_ == nullptr) { 37 | biPropSet_ = std::make_shared>(); 38 | set_insert(*biPropSet_, this); 39 | set_insert(*biPropSet_, &node); 40 | node.biPropSet_ = biPropSet_; 41 | } else if (node.biPropSet_ != nullptr && biPropSet_ != nullptr) { 42 | set_union(*biPropSet_, *node.biPropSet_); 43 | node.biPropSet_ = biPropSet_; 44 | } else if (biPropSet_ == nullptr) { 45 | biPropSet_ = node.biPropSet_; 46 | set_insert(*biPropSet_, this); 47 | } else { 48 | node.biPropSet_ = biPropSet_; 49 | set_insert(*biPropSet_, &node); 50 | } 51 | } 52 | 53 | template 54 | void SubgraphT::clear() { 55 | decrementDependentCounters(); 56 | const size_t numGraphPredecessors = markNodesWithPredicessors(); 57 | if (numGraphPredecessors != 0) { 58 | removePredecessorDependencies(numGraphPredecessors); 59 | } 60 | destroyNodes(); 61 | } 62 | 63 | template 64 | void SubgraphT::destroyNodes() { 65 | for (NodeType* n : nodes_) { 66 | n->~NodeType(); 67 | } 68 | allocator_->clear(); 69 | nodes_.clear(); 70 | } 71 | 72 | template 73 | SubgraphT::~SubgraphT() { 74 | for (NodeType* n : nodes_) { 75 | n->~NodeType(); 76 | } 77 | } 78 | 79 | template 80 | void SubgraphT::decrementDependentCounters() { 81 | for (N* node : nodes_) { 82 | for (Node* const dependent : node->dependents_) { 83 | dependent->numPredecessors_--; 84 | } 85 | removeNodeFromBiPropSet(node); 86 | } 87 | } 88 | 89 | template 90 | size_t SubgraphT::markNodesWithPredicessors() { 91 | size_t numGraphPredecessors = 0; 92 | for (N* node : nodes_) { 93 | if (node->numPredecessors_ != 0) { 94 | numGraphPredecessors += node->numPredecessors_; 95 | node->numPredecessors_ = kToDelete; 96 | } 97 | } 98 | return numGraphPredecessors; 99 | } 100 | 101 | template 102 | void SubgraphT::removePredecessorDependencies(size_t numGraphPredecessors) { 103 | for (SubgraphT& subgraph : graph_->subgraphs_) { 104 | if (&subgraph == this) { 105 | continue; 106 | } 107 | for (N* node : subgraph.nodes_) { 108 | std::vector& dependents = node->dependents_; 109 | size_t num = dependents.size(); 110 | for (size_t i = 0; i < num;) { 111 | if (dependents[i]->numPredecessors_ == kToDelete) { 112 | dependents[i] = dependents[num - 1]; 113 | --num; 114 | if (--numGraphPredecessors == 0) { 115 | dependents.resize(num); 116 | return; 117 | } 118 | } else { 119 | i++; 120 | } 121 | } 122 | dependents.resize(num); 123 | } 124 | } 125 | } 126 | 127 | namespace { 128 | constexpr size_t kMaxCache = 8; 129 | // Don't cache too-large allocators. This way we will have at most 8*(2**16) = 512K outstanding 130 | // nodes worth of memory per node type. 131 | // TODO(bbudge): Make these caching values macro configurable for lightweight platforms. 132 | constexpr size_t kMaxChunkCapacity = 1 << 16; 133 | 134 | using AlignedNodePoolPtr = 135 | std::unique_ptr>; 136 | 137 | std::vector g_sgcache[2]; 138 | std::mutex g_sgcacheMtx; 139 | 140 | template 141 | constexpr size_t kCacheIndex = size_t{std::is_same::value}; 142 | 143 | } // namespace 144 | 145 | template 146 | typename SubgraphT::PoolPtr SubgraphT::getAllocator() { 147 | AlignedNodePoolPtr ptr; 148 | 149 | auto& cache = g_sgcache[kCacheIndex]; 150 | 151 | { 152 | std::lock_guard lk(g_sgcacheMtx); 153 | if (cache.empty()) { 154 | void* alloc = 155 | detail::alignedMalloc(sizeof(NoLockPoolAllocator), alignof(NoLockPoolAllocator)); 156 | auto* pool = new (alloc) 157 | NoLockPoolAllocator(sizeof(NodeType), 128 * sizeof(NodeType), ::malloc, ::free); 158 | ptr.reset(pool); 159 | } else { 160 | ptr = std::move(cache.back()); 161 | ptr->clear(); 162 | cache.pop_back(); 163 | } 164 | } 165 | return PoolPtr(ptr.release(), releaseAllocator); 166 | } 167 | 168 | template 169 | void SubgraphT::releaseAllocator(NoLockPoolAllocator* ptr) { 170 | if (!ptr) { 171 | return; 172 | } 173 | if (ptr->totalChunkCapacity() < kMaxChunkCapacity) { 174 | auto& cache = g_sgcache[kCacheIndex]; 175 | { 176 | std::lock_guard lk(g_sgcacheMtx); 177 | if (cache.size() < kMaxCache) { 178 | cache.emplace_back(ptr); 179 | return; 180 | } 181 | } 182 | } 183 | detail::AlignedFreeDeleter()(ptr); 184 | } 185 | 186 | template 187 | GraphT::GraphT(GraphT&& other) : subgraphs_(std::move(other.subgraphs_)) { 188 | for (SubgraphT& subgraph : subgraphs_) { 189 | subgraph.graph_ = this; 190 | } 191 | } 192 | 193 | template 194 | GraphT& GraphT::operator=(GraphT&& other) noexcept { 195 | subgraphs_ = std::move(other.subgraphs_); 196 | for (SubgraphT& subgraph : subgraphs_) { 197 | subgraph.graph_ = this; 198 | } 199 | return *this; 200 | } 201 | 202 | template 203 | SubgraphT& GraphT::addSubgraph() { 204 | subgraphs_.push_back(SubgraphType(this)); 205 | return subgraphs_.back(); 206 | } 207 | 208 | template class DISPENSO_DLL_ACCESS SubgraphT; 209 | template class DISPENSO_DLL_ACCESS SubgraphT; 210 | template class DISPENSO_DLL_ACCESS GraphT; 211 | template class DISPENSO_DLL_ACCESS GraphT; 212 | } // namespace dispenso 213 | --------------------------------------------------------------------------------