├── .gitignore ├── CMakeLists.txt ├── README.md ├── benchmark ├── CMakeLists.txt ├── all_ptrs.hpp ├── bench_atomic_shared_ptrs.cpp ├── bench_shared_ptrs.cpp └── external │ ├── anthonywilliams │ ├── README.md │ └── atomic_shared_ptr │ └── vtyulb │ ├── README.md │ └── atomic_shared_ptr.h ├── include └── parlay │ ├── atomic_shared_ptr.hpp │ ├── basic_atomic_shared_ptr.hpp │ ├── details │ ├── atomic_details.hpp │ ├── hazard_pointers.hpp │ └── wait_free_counter.hpp │ ├── fast_shared_ptr.hpp │ └── shared_ptr.hpp └── test ├── CMakeLists.txt ├── atomic_sp_tests.hpp ├── test_atomic_shared_ptr_custom.cpp ├── test_basic_atomic_shared_ptr.cpp ├── test_basic_shared_ptr.cpp └── test_shared_ptr.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Build 2 | /build 3 | 4 | # IDE files 5 | .idea/ 6 | *.swp 7 | 8 | # Prerequisites 9 | *.d 10 | 11 | # Compiled Object files 12 | *.slo 13 | *.lo 14 | *.o 15 | *.obj 16 | 17 | # Precompiled Headers 18 | *.gch 19 | *.pch 20 | 21 | # Compiled Dynamic libraries 22 | *.so 23 | *.dylib 24 | *.dll 25 | 26 | # Compiled Static libraries 27 | *.lai 28 | *.la 29 | *.a 30 | *.lib 31 | 32 | # Executables 33 | *.exe 34 | *.out 35 | *.app 36 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(PARLAY_ATOMIC_SHARED_PTR VERSION 2.0 3 | DESCRIPTION "A lock-free atomic shared pointer for C++" 4 | LANGUAGES CXX) 5 | 6 | include(FetchContent) 7 | 8 | # Set a default build type 9 | if(NOT CMAKE_BUILD_TYPE) 10 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE) 11 | endif(NOT CMAKE_BUILD_TYPE) 12 | 13 | message(STATUS "--------------- General configuration -------------") 14 | message(STATUS "CMake Generator: ${CMAKE_GENERATOR}") 15 | message(STATUS "Compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") 16 | message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") 17 | message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") 18 | message(STATUS "CMAKE_CXX_FLAGS_DEBUG: ${CMAKE_CXX_FLAGS_DEBUG}") 19 | message(STATUS "CMAKE_CXX_FLAGS_RELEASE: ${CMAKE_CXX_FLAGS_RELEASE}") 20 | message(STATUS "CMAKE_CXX_FLAGS_RELWITHDEBINFO: ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") 21 | message(STATUS "CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS}") 22 | message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}" ) 23 | 24 | # Make sure -fno-omit-frame-pointer is set for profiling 25 | set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-omit-frame-pointer") 26 | 27 | if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") 28 | set(LINUX TRUE) 29 | endif() 30 | 31 | # ------------------------------------------------------------------- 32 | # Library definition 33 | 34 | add_library(parlay_atomic_shared_ptr INTERFACE) 35 | set(PARLAY_ATOMIC_SHARED_PTR_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/include") 36 | target_include_directories(parlay_atomic_shared_ptr INTERFACE ${PARLAY_ATOMIC_SHARED_PTR_INCLUDE_DIR}) 37 | target_compile_features(parlay_atomic_shared_ptr INTERFACE cxx_std_20) 38 | 39 | # Link against system threads 40 | find_package(Threads REQUIRED) 41 | target_link_libraries(parlay_atomic_shared_ptr INTERFACE Threads::Threads) 42 | 43 | # We use boost::atomic for DWCAS 44 | find_package(Boost COMPONENTS atomic) 45 | target_link_libraries(parlay_atomic_shared_ptr INTERFACE Boost::atomic) 46 | 47 | # We use Folly utilities such as heavy/light fences and compare against their atomic_shared_ptr 48 | find_package(folly REQUIRED) 49 | target_link_libraries(parlay_atomic_shared_ptr INTERFACE folly glog dl double-conversion fmt gflags) 50 | 51 | # We use Parlay for its pool allocator 52 | find_package(Parlay 2.2.1 REQUIRED) 53 | target_link_libraries(parlay_atomic_shared_ptr INTERFACE Parlay::parlay) 54 | 55 | # Configure just::threads (Anthony William's commercial library with a lock-free atomic_shared_ptr) 56 | # if available. We will perform benchmarks against it if it is installed, otherwise we skip it 57 | find_library(JUST_THREADS_LIB NAMES justthread) 58 | find_path(JUST_THREADS_INCLUDE NAMES jss/experimental_concurrency.hpp PATH_SUFFIXES justthread) 59 | if(NOT JUST_THREADS_LIB) 60 | message(STATUS "Could not find just::threads library. Benchmarks against it will be omitted") 61 | elseif(NOT JUST_THREADS_INCLUDE) 62 | message(STATUS "Could not find just::threads includes. Benchmarks against it will be omitted") 63 | else() 64 | message(STATUS "just::threads found -- lib: " ${JUST_THREADS_LIB} " include: " ${JUST_THREADS_INCLUDE}) 65 | set(JUST_THREADS_AVAILABLE TRUE) 66 | add_library(jss INTERFACE) 67 | target_include_directories(jss INTERFACE ${JUST_THREADS_INCLUDE}) 68 | target_link_libraries(jss INTERFACE ${JUST_THREADS_LIB} atomic) 69 | target_compile_definitions(parlay_atomic_shared_ptr INTERFACE -DJUST_THREADS_AVAILABLE) 70 | target_link_libraries(parlay_atomic_shared_ptr INTERFACE jss) 71 | endif() 72 | 73 | # ------------------------------------------------------------------- 74 | # Unit tests 75 | 76 | message(STATUS "---------------------------------- Unit Tests ----------------------------------") 77 | 78 | 79 | # Set CMake options for GoogleTest 80 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 81 | set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) 82 | set(BUILD_GMOCK OFF CACHE BOOL "" FORCE) 83 | set(gtest_disable_pthreads ON CACHE BOOL "" FORCE) 84 | 85 | # Download and configure GoogleTest 86 | include(FetchContent) 87 | FetchContent_Declare(googletest 88 | GIT_REPOSITORY https://github.com/google/googletest.git 89 | GIT_TAG main 90 | ) 91 | FetchContent_GetProperties(googletest) 92 | if(NOT googletest_POPULATED) 93 | message(STATUS "testing: Configuring GoogleTest") 94 | FetchContent_Populate(googletest) 95 | set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS 1 CACHE BOOL "") 96 | add_subdirectory(${googletest_SOURCE_DIR} 97 | ${googletest_BINARY_DIR} 98 | EXCLUDE_FROM_ALL) 99 | endif() 100 | 101 | # Include test targets 102 | message(STATUS "testing: Enabled") 103 | include(CTest) 104 | add_subdirectory(test) 105 | 106 | # ------------------------------------------------------------------- 107 | # Benchmarks 108 | 109 | message(STATUS "---------------------------------- Benchmarks ----------------------------------") 110 | 111 | # Benchmark should not run its own unit tests 112 | set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) 113 | set(BENCHMARK_ENABLE_GTEST_TESTS OFF CACHE BOOL "" FORCE) 114 | 115 | # Download Benchmark library 116 | include(FetchContent) 117 | FetchContent_Declare(benchmark 118 | GIT_REPOSITORY https://github.com/google/benchmark.git 119 | GIT_TAG main 120 | ) 121 | FetchContent_GetProperties(benchmark) 122 | if(NOT benchmark_POPULATED) 123 | message(STATUS "benchmarks: Configuring Google Benchmark") 124 | FetchContent_Populate(benchmark) 125 | set(CMAKE_SUPPRESS_DEVELOPER_WARNINGS 1 CACHE BOOL "") 126 | add_subdirectory(${benchmark_SOURCE_DIR} 127 | ${benchmark_BINARY_DIR} 128 | EXCLUDE_FROM_ALL) 129 | endif() 130 | 131 | add_subdirectory(benchmark) 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Lock-Free Atomic Shared Pointer 2 | 3 | This repository contains my lock-free **atomic_shared_ptr** implementation that I discussed at CppCon 2023 in 4 | "*Lock-free Atomic Shared Pointers Without a Split Reference Count? It Can Be Done!*". Its still a proof of concept 5 | and not quite ready for production, but it is reasonably featureful. 6 | 7 | ## Dependencies 8 | 9 | The library currently depends on [Folly](https://github.com/facebook/folly) (for asymmetric fences and F14FastSet) and 10 | [ParlayLib](https://github.com/cmuparlay/parlaylib) (for its allocator). The library itself is completely header only. 11 | 12 | 13 | ## Usage 14 | 15 | ```c++ 16 | #include 17 | 18 | parlay::atomic_shared_ptr asp{parlay::make_shared(42)}; 19 | 20 | parlay::shared_ptr sp = asp.load(); // {42} has a reference count of 2 21 | sp = parlay::make_shared(1); // {42} has a reference count of 1 22 | asp.store(sp); // {42} is destroyed because the last owner is gone 23 | ``` 24 | 25 | ## Learning 26 | 27 | If you'd like to learn about the algorithm, you should watch my CppCon talk if you have not already. If you'd like to 28 | see the code, there is a "basic" implementation in [basic_atomic_shared_ptr.hpp](./include/parlay/basic_atomic_shared_ptr.hpp). 29 | It is unoptimized and very feature incomplete, but it is designed to be as readible as possible to demonstrate the algorithm 30 | in a simple and understandable way. It uses [Folly's Hazard Pointers](https://github.com/facebook/folly/blob/main/folly/synchronization/Hazptr.h) 31 | for the protection of the control block, which adds some additional overhead. 32 | 33 | The more optimized and feature complete implementations can be found in 34 | [atomic_shared_ptr.hpp](./include/parlay/atomic_shared_ptr.hpp) and [shared_ptr.hpp](./include/parlay/shared_ptr.hpp). 35 | They use a custom-implemented Hazard Pointer library, [hazard_pointers.hpp](./include/parlay/details/hazard_pointers.hpp). 36 | 37 | 38 | ## Benchmarks 39 | 40 | There are some basic latency benchmarks in the [benchmark](./benchmark) directory. The throughput benchmarks from my 41 | CppCon talk can be found in the **new_sps** branch of [this repository](https://github.com/cmuparlay/concurrent_deferred_rc/tree/new_sps) 42 | (it was much easier to integrate *this* library into my existing benchmarks from a previous project than to do it 43 | the other way around, sorry!) 44 | 45 | 46 | ## Deamortized Reclamation (Experimental Feature!) 47 | 48 | One drawback of Hazard-Pointer based cleanup schemes is that while they produce high throughput, they are not ideal for 49 | latency-critical applications since once in every while, a thread will have to perform a garbage collection operation 50 | which could take tens of microseconds, while an ordinary store operations would only take tens of nanoseconds. This 51 | spike in latency is undesirable in certain domains. 52 | 53 | To address this, the library comes with an **experimental** feature, deamortized reclamation. This means that instead 54 | of accumulating garbage and then performing cleanup once every 1000 retires or so, it tries to incrementally scan the 55 | Hazard Pointers one per retire and delete one or two ready-to-reclaim objects each time. This effectively spreads out (formally, 56 | deamortizes) the cleanup operation so that it does not introduce large latency spikes. As a tradeoff, load latency is 57 | increase by about 25% in uncontended benchmarks (from 16ns to 20ns), but the benefit is reducing the tail latency of 58 | a store operation from 14 microseconds to just over 100 nanoseconds. 59 | 60 | More thorough benchmarks are needed to determine the impliciations of this technique, and there is plenty of room left 61 | to optimize it. 62 | 63 | 64 | ## Work in Progress 65 | 66 | **Implementation** 67 | - Support for `atomic_weak_ptr` 68 | - Support for aliased `shared_ptr` 69 | 70 | **Cleanup** 71 | - Port the throughput benchmarks into this repository 72 | - Drop the dependence on Folly when/if we get asymmetric fences in the standard library 73 | - Drop the dependence on ParlayLib if we can fix the allocator problem (essentially, jemalloc **hates** deferred 74 | reclamation and it performs terribly, so I need to use my own allocator for now instead). 75 | - Put some CI in this repository 76 | -------------------------------------------------------------------------------- /benchmark/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(add_google_benchmark NAME) 2 | add_executable(bench_${NAME} bench_${NAME}.cpp) 3 | target_link_libraries(bench_${NAME} PRIVATE parlay_atomic_shared_ptr benchmark_main) 4 | endfunction() 5 | 6 | function(add_benchmark NAME) 7 | add_executable(bench_${NAME} bench_${NAME}.cpp) 8 | target_link_libraries(bench_${NAME} PRIVATE parlay_atomic_shared_ptr) 9 | endfunction() 10 | 11 | add_google_benchmark(shared_ptrs) 12 | add_google_benchmark(atomic_shared_ptrs) 13 | 14 | -------------------------------------------------------------------------------- /benchmark/all_ptrs.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include "external/anthonywilliams/atomic_shared_ptr" 8 | #include "external/vtyulb/atomic_shared_ptr.h" 9 | 10 | #ifdef JUST_THREADS_AVAILABLE 11 | #include 12 | #endif 13 | 14 | #include "parlay/atomic_shared_ptr.hpp" 15 | 16 | #include "parlay/basic_atomic_shared_ptr.hpp" 17 | 18 | #ifdef __cpp_lib_atomic_shared_ptr 19 | 20 | // C++ standard library atomic support for shared ptrs 21 | template 22 | using StlAtomicSharedPtr = std::atomic>; 23 | 24 | #else 25 | 26 | // Use free functions if std::atomic is not available. Much worse. 27 | template 28 | struct StlAtomicSharedPtr { 29 | StlAtomicSharedPtr() = default; 30 | explicit(false) StlAtomicSharedPtr(std::shared_ptr other) : sp(std::move(other)) { } // NOLINT 31 | std::shared_ptr load() { return std::atomic_load(&sp); } 32 | void store(std::shared_ptr r) { std::atomic_store(&sp, std::move(r)); } 33 | bool compare_exchange_strong(std::shared_ptr& expected, std::shared_ptr desired) { 34 | return atomic_compare_exchange_strong(&sp, &expected, std::move(desired)); 35 | } 36 | bool compare_exchange_weak(std::shared_ptr& expected, std::shared_ptr desired) { 37 | return atomic_compare_exchange_weak(&sp, &expected, std::move(desired)); 38 | } 39 | std::shared_ptr sp; 40 | }; 41 | 42 | #endif -------------------------------------------------------------------------------- /benchmark/bench_atomic_shared_ptrs.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include "all_ptrs.hpp" 13 | 14 | 15 | constexpr auto compute_low = [](std::vector& v) -> double { 16 | std::nth_element(v.begin(), v.begin() + v.size()/100, v.end()); 17 | return v[v.size()/100]; 18 | }; 19 | 20 | constexpr auto compute_med = [](std::vector& v) -> double { 21 | std::nth_element(v.begin(), v.begin() + v.size()/2, v.end()); 22 | return v[v.size()/100]; 23 | }; 24 | 25 | constexpr auto compute_high = [](std::vector& v) -> double { 26 | std::nth_element(v.begin(), v.begin() + v.size()*99/100, v.end()); 27 | return v[v.size()*99/100]; 28 | }; 29 | 30 | constexpr auto compute_tail = [](std::vector& v) -> double { 31 | std::nth_element(v.begin(), v.begin() + v.size()*9995/10000, v.end()); 32 | return v[v.size()*9995/10000]; 33 | }; 34 | 35 | template typename AtomicSharedPtr, template typename SharedPtr> 36 | static void bench_load(benchmark::State& state) { 37 | parlay::enable_deamortized_reclamation(); 38 | 39 | int n_threads = state.range(0); // Spawn n-1 contending threads 40 | 41 | AtomicSharedPtr src; 42 | src.store(SharedPtr(new int(42))); 43 | 44 | std::vector enemies; 45 | enemies.reserve(n_threads-1); 46 | for (int i = 0; i < n_threads - 1; i++) { 47 | enemies.emplace_back([mine = SharedPtr(new int(i+1)), &src](std::stop_token stoken) { 48 | while (!stoken.stop_requested()) { 49 | src.store(mine); // Stores a copy so we're not spamming retires 50 | } 51 | }); 52 | } 53 | 54 | std::vector all_times; 55 | 56 | for (auto _ : state) { 57 | auto start = std::chrono::high_resolution_clock::now(); 58 | auto result = src.load(); 59 | auto finish = std::chrono::high_resolution_clock::now(); 60 | 61 | auto elapsed_time = std::chrono::duration_cast>(finish - start); 62 | state.SetIterationTime(elapsed_time.count()); 63 | all_times.push_back(elapsed_time.count()); 64 | } 65 | 66 | for (auto& t : enemies) { 67 | t.request_stop(); 68 | t.join(); 69 | } 70 | 71 | state.counters["1%"] = compute_low(all_times); 72 | state.counters["50%"] = compute_med(all_times); 73 | state.counters["99%"] = compute_high(all_times); 74 | state.counters["99.95%"] = compute_tail(all_times); 75 | } 76 | 77 | template typename AtomicSharedPtr, template typename SharedPtr> 78 | static void bench_store_delete(benchmark::State& state) { 79 | 80 | int n_threads = state.range(0); // Spawn n-1 contending threads 81 | 82 | AtomicSharedPtr src; 83 | src.store(SharedPtr(new int(42))); 84 | 85 | std::vector enemies; 86 | enemies.reserve(n_threads-1); 87 | for (int i = 0; i < n_threads - 1; i++) { 88 | enemies.emplace_back([mine = SharedPtr(new int(i+1)), &src](std::stop_token stoken) { 89 | while (!stoken.stop_requested()) { 90 | src.store(mine); // Stores a copy so we're not spamming retires 91 | } 92 | }); 93 | } 94 | 95 | std::vector all_times; 96 | 97 | // These stores all overwrite the only copy of the pointer, so it will trigger destruction 98 | // of the managed object. This benchmark therefore also measures the cost of destruction. 99 | for (auto _ : state) { 100 | auto new_sp = SharedPtr(new int(rand())); 101 | auto start = std::chrono::high_resolution_clock::now(); 102 | src.store(std::move(new_sp)); 103 | auto finish = std::chrono::high_resolution_clock::now(); 104 | 105 | auto elapsed_time = std::chrono::duration_cast>(finish - start); 106 | state.SetIterationTime(elapsed_time.count()); 107 | all_times.push_back(elapsed_time.count()); 108 | } 109 | 110 | for (auto& t : enemies) { 111 | t.request_stop(); 112 | t.join(); 113 | } 114 | 115 | state.counters["1%"] = compute_low(all_times); 116 | state.counters["50%"] = compute_med(all_times); 117 | state.counters["99%"] = compute_high(all_times); 118 | state.counters["99.95%"] = compute_tail(all_times); 119 | } 120 | 121 | template typename AtomicSharedPtr, template typename SharedPtr> 122 | static void bench_store_copy(benchmark::State& state) { 123 | 124 | int n_threads = state.range(0); // Spawn n-1 contending threads 125 | 126 | AtomicSharedPtr src; 127 | src.store(SharedPtr(new int(42))); 128 | 129 | auto my_sp = SharedPtr(new int(42)); 130 | 131 | std::vector enemies; 132 | enemies.reserve(n_threads-1); 133 | for (int i = 0; i < n_threads - 1; i++) { 134 | enemies.emplace_back([mine = SharedPtr(new int(i+1)), &src](std::stop_token stoken) { 135 | while (!stoken.stop_requested()) { 136 | src.store(mine); // Stores a copy so we're not spamming retires 137 | } 138 | }); 139 | } 140 | 141 | std::vector all_times; 142 | 143 | // In this version, we keep a copy of our own pointer and store a copy of it, so it will 144 | // never be destroyed. This version is therefore only testing the efficiency of store 145 | // without also testing the efficiency destruction. 146 | for (auto _ : state) { 147 | auto new_sp = my_sp; 148 | auto start = std::chrono::high_resolution_clock::now(); 149 | src.store(std::move(new_sp)); 150 | auto finish = std::chrono::high_resolution_clock::now(); 151 | 152 | auto elapsed_time = std::chrono::duration_cast>(finish - start); 153 | state.SetIterationTime(elapsed_time.count()); 154 | all_times.push_back(elapsed_time.count()); 155 | } 156 | 157 | for (auto& t : enemies) { 158 | t.request_stop(); 159 | t.join(); 160 | } 161 | 162 | state.counters["1%"] = compute_low(all_times); 163 | state.counters["50%"] = compute_med(all_times); 164 | state.counters["99%"] = compute_high(all_times); 165 | state.counters["99.95%"] = compute_tail(all_times); 166 | } 167 | 168 | 169 | 170 | #define SETUP_BENCHMARK(ptr_name, bench_name, bench) \ 171 | BENCHMARK(bench) \ 172 | ->Name(ptr_name "::" bench_name) \ 173 | ->UseManualTime() \ 174 | ->RangeMultiplier(2)->Range(1, 64); 175 | 176 | #define BENCH_PTR(name, atomic_sp, sp) \ 177 | SETUP_BENCHMARK(name, "load", (bench_load)); \ 178 | SETUP_BENCHMARK(name, "store", (bench_store_copy)); \ 179 | SETUP_BENCHMARK(name, "store-del", (bench_store_delete)); 180 | 181 | 182 | BENCH_PTR("STL", StlAtomicSharedPtr, std::shared_ptr); 183 | BENCH_PTR("Folly", folly::atomic_shared_ptr, std::shared_ptr); 184 | BENCH_PTR("Mine", parlay::atomic_shared_ptr, parlay::shared_ptr); 185 | BENCH_PTR("JSS-Free", jss::atomic_shared_ptr, jss::shared_ptr); 186 | BENCH_PTR("Vtyulb", LFStructs::AtomicSharedPtr, LFStructs::SharedPtr); 187 | BENCH_PTR("Mine-basic", parlay::basic::atomic_shared_ptr, parlay::basic::shared_ptr); 188 | 189 | #ifdef JUST_THREADS_AVAILABLE 190 | BENCH_PTR("JSS", std::experimental::atomic_shared_ptr, std::experimental::shared_ptr); 191 | #endif 192 | -------------------------------------------------------------------------------- /benchmark/bench_shared_ptrs.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | #include 12 | 13 | #include "external/anthonywilliams/atomic_shared_ptr" 14 | #include "external/vtyulb/atomic_shared_ptr.h" 15 | 16 | #ifdef JUST_THREADS_AVAILABLE 17 | #include 18 | #endif 19 | 20 | #include "parlay/atomic_shared_ptr.hpp" 21 | 22 | template typename SharedPtr, typename T, typename... Args> 23 | auto dispatch_make_shared(Args... args) { 24 | if constexpr (std::is_same_v, std::shared_ptr>) { 25 | return std::make_shared(std::forward(args)...); 26 | } 27 | else if constexpr (std::is_same_v, parlay::shared_ptr>) { 28 | return parlay::make_shared(std::forward(args)...); 29 | } 30 | else if constexpr (std::is_same_v, jss::shared_ptr>) { 31 | return jss::make_shared(std::forward(args)...); 32 | } 33 | else if constexpr (std::is_same_v, LFStructs::SharedPtr>) { 34 | // No make_shared in Vtyulb's SharedPtr type 35 | return LFStructs::SharedPtr(new T(std::forward(args)...)); 36 | } 37 | #if defined(JUST_THREADS_AVAILABLE) 38 | else if constexpr (std::is_same_v, std::experimental::shared_ptr>) { 39 | return std::experimental::make_shared(std::forward(args)...); 40 | } 41 | #endif 42 | } 43 | 44 | 45 | template typename SharedPtr> 46 | static void bench_new(benchmark::State& state) { 47 | std::unique_ptr[]> sps = std::make_unique_for_overwrite[]>(1000000); 48 | size_t i = 0; 49 | for (auto _ : state) { 50 | new (&sps[i++]) SharedPtr{new int(42)}; 51 | } 52 | } 53 | 54 | template typename SharedPtr> 55 | static void bench_make(benchmark::State& state) { 56 | std::unique_ptr[]> sps = std::make_unique_for_overwrite[]>(1000000); 57 | size_t i = 0; 58 | for (auto _ : state) { 59 | new (&sps[i++]) SharedPtr{dispatch_make_shared(42)}; 60 | } 61 | } 62 | 63 | template typename SharedPtr> 64 | static void bench_copy(benchmark::State& state) { 65 | SharedPtr sp{new int(42)}; 66 | std::unique_ptr[]> sps = std::make_unique_for_overwrite[]>(1000000); 67 | size_t i = 0; 68 | for (auto _ : state) { 69 | new (&sps[i++]) SharedPtr{sp}; 70 | } 71 | } 72 | 73 | template typename SharedPtr> 74 | static void bench_decrement(benchmark::State& state) { 75 | SharedPtr sp{new int(42)}; 76 | std::unique_ptr[]> sps = std::make_unique_for_overwrite[]>(1000000); 77 | for (size_t i = 0; i < 1000000; i++) { 78 | new (&sps[i++]) SharedPtr{sp}; 79 | } 80 | size_t i = 0; 81 | for (auto _ : state) { 82 | sps[i++].~SharedPtr(); 83 | } 84 | sps.release(); 85 | } 86 | 87 | template typename SharedPtr> 88 | static void bench_destroy(benchmark::State& state) { 89 | std::unique_ptr[]> sps = std::make_unique_for_overwrite[]>(1000000); 90 | for (size_t i = 0; i < 1000000; i++) { 91 | new (&sps[i++]) SharedPtr{new int(i)}; 92 | } 93 | size_t i = 0; 94 | for (auto _ : state) { 95 | sps[i++].~SharedPtr(); 96 | } 97 | sps.release(); 98 | } 99 | 100 | #define SETUP_BENCHMARK(ptr_name, bench_name, bench) \ 101 | BENCHMARK(bench) \ 102 | ->Name(ptr_name "::" bench_name) \ 103 | ->UseRealTime() \ 104 | ->Iterations(1000000); 105 | 106 | #define BENCH_PTR(name, sp) \ 107 | SETUP_BENCHMARK(name, "new", (bench_new)); \ 108 | SETUP_BENCHMARK(name, "make", (bench_make)); \ 109 | SETUP_BENCHMARK(name, "decrement", (bench_make)); \ 110 | SETUP_BENCHMARK(name, "destroy", (bench_make)); 111 | 112 | 113 | BENCH_PTR("STL", std::shared_ptr); 114 | BENCH_PTR("Mine", parlay::shared_ptr); 115 | BENCH_PTR("JSS-Free", jss::shared_ptr); 116 | BENCH_PTR("Vtyulb", LFStructs::SharedPtr); 117 | 118 | #ifdef JUST_THREADS_AVAILABLE 119 | BENCH_PTR("JSS", std::experimental::shared_ptr); 120 | #endif 121 | 122 | -------------------------------------------------------------------------------- /benchmark/external/anthonywilliams/README.md: -------------------------------------------------------------------------------- 1 | # README # 2 | 3 | This is an implementation of a lock-free atomic_shared_ptr class template as described in N4162 (http://isocpp.org/files/papers/N4162.pdf). 4 | 5 | It is provided as a single header file released under the BSD license. 6 | -------------------------------------------------------------------------------- /benchmark/external/anthonywilliams/atomic_shared_ptr: -------------------------------------------------------------------------------- 1 | // //-*-C++-*- 2 | // Implementation of atomic_shared_ptr as per N4162 3 | // (http://isocpp.org/files/papers/N4162.pdf) 4 | // 5 | // Copyright (c) 2014, Just Software Solutions Ltd 6 | // All rights reserved. 7 | // 8 | // Redistribution and use in source and binary forms, with or 9 | // without modification, are permitted provided that the 10 | // following conditions are met: 11 | // 12 | // 1. Redistributions of source code must retain the above 13 | // copyright notice, this list of conditions and the following 14 | // disclaimer. 15 | // 16 | // 2. Redistributions in binary form must reproduce the above 17 | // copyright notice, this list of conditions and the following 18 | // disclaimer in the documentation and/or other materials 19 | // provided with the distribution. 20 | // 21 | // 3. Neither the name of the copyright holder nor the names of 22 | // its contributors may be used to endorse or promote products 23 | // derived from this software without specific prior written 24 | // permission. 25 | // 26 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 27 | // CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 28 | // INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 29 | // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 30 | // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 31 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 33 | // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 34 | // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 | // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 36 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 37 | // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 38 | // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39 | 40 | #ifndef _JSS_ATOMIC_SHARED_PTR 41 | #define _JSS_ATOMIC_SHARED_PTR 42 | 43 | #include 44 | #include 45 | 46 | #include 47 | 48 | namespace jss{ 49 | template class shared_ptr; 50 | template class weak_ptr; 51 | 52 | struct shared_ptr_data_block_base{}; 53 | 54 | template 55 | struct shared_ptr_deleter_base{ 56 | D d; 57 | 58 | shared_ptr_deleter_base(D& d_): 59 | d(d_) 60 | {} 61 | 62 | template 63 | void do_delete(P p) 64 | { 65 | d(p); 66 | } 67 | }; 68 | 69 | template<> 70 | struct shared_ptr_deleter_base{ 71 | template 72 | void do_delete(T* p) 73 | { 74 | delete p; 75 | } 76 | }; 77 | 78 | struct shared_ptr_header_block_base{ 79 | struct counter{ 80 | unsigned external_counters; 81 | int count; 82 | 83 | counter() noexcept: 84 | external_counters(0), 85 | count(1) 86 | {} 87 | }; 88 | 89 | static unsigned const cast_pointer_count=3; 90 | struct ptr_extension_block{ 91 | std::atomic cast_pointers[cast_pointer_count]; 92 | std::atomic cp_extension; 93 | 94 | ptr_extension_block(): 95 | cp_extension(0) 96 | { 97 | for(unsigned i=0;iget_ptr_index(p)+cast_pointer_count; 126 | } 127 | 128 | void* get_pointer(unsigned index) 129 | { 130 | return (indexget_pointer(index-cast_pointer_count); 133 | } 134 | 135 | ~ptr_extension_block() 136 | { 137 | delete cp_extension.load(); 138 | } 139 | 140 | }; 141 | 142 | std::atomic count; 143 | std::atomic weak_count; 144 | ptr_extension_block cp_extension; 145 | 146 | unsigned use_count() 147 | { 148 | counter c=count.load(std::memory_order_relaxed); 149 | return c.count+(c.external_counters?1:0); 150 | } 151 | 152 | unsigned get_ptr_index(void* p) 153 | { 154 | return cp_extension.get_ptr_index(p); 155 | } 156 | 157 | virtual ~shared_ptr_header_block_base() 158 | {} 159 | 160 | template 161 | T* get_ptr(unsigned index) 162 | { 163 | return static_cast(cp_extension.get_pointer(index)); 164 | } 165 | 166 | shared_ptr_header_block_base(): 167 | count(counter()),weak_count(1) 168 | {} 169 | 170 | virtual void do_delete()=0; 171 | 172 | void delete_object() 173 | { 174 | do_delete(); 175 | dec_weak_count(); 176 | } 177 | 178 | void dec_weak_count() 179 | { 180 | if(weak_count.fetch_add(-1)==1){ 181 | delete this; 182 | } 183 | } 184 | 185 | void inc_weak_count() 186 | { 187 | ++weak_count; 188 | } 189 | 190 | void dec_count() 191 | { 192 | counter old=count.load(std::memory_order_relaxed); 193 | for(;;){ 194 | counter new_count=old; 195 | --new_count.count; 196 | if(count.compare_exchange_weak(old,new_count)) 197 | break; 198 | } 199 | if((old.count==1) && !old.external_counters){ 200 | delete_object(); 201 | } 202 | } 203 | 204 | bool shared_from_weak() 205 | { 206 | counter old=count.load(std::memory_order_relaxed); 207 | while(old.count||old.external_counters){ 208 | counter new_count=old; 209 | ++new_count.count; 210 | if(count.compare_exchange_weak(old,new_count)) 211 | return true; 212 | } 213 | return false; 214 | } 215 | 216 | void inc_count() 217 | { 218 | counter old=count.load(std::memory_order_relaxed); 219 | for(;;){ 220 | counter new_count=old; 221 | ++new_count.count; 222 | if(count.compare_exchange_weak(old,new_count)) 223 | break; 224 | } 225 | } 226 | 227 | void add_external_counters(unsigned external_count) 228 | { 229 | counter old=count.load(std::memory_order_relaxed); 230 | for(;;){ 231 | counter new_count=old; 232 | new_count.external_counters+=external_count; 233 | if(count.compare_exchange_weak(old,new_count)) 234 | break; 235 | } 236 | } 237 | 238 | void remove_external_counter() 239 | { 240 | counter old=count.load(std::memory_order_relaxed); 241 | for(;;){ 242 | counter new_count=old; 243 | --new_count.external_counters; 244 | if(count.compare_exchange_weak(old,new_count)) 245 | break; 246 | } 247 | if(!old.count && (old.external_counters==1)){ 248 | delete_object(); 249 | } 250 | } 251 | 252 | }; 253 | 254 | template 255 | struct shared_ptr_header_block: 256 | shared_ptr_header_block_base{}; 257 | 258 | template 259 | struct shared_ptr_header_separate: 260 | public shared_ptr_header_block

, 261 | private shared_ptr_deleter_base{ 262 | P const ptr; 263 | 264 | void* get_base_ptr() 265 | { 266 | return ptr; 267 | } 268 | 269 | shared_ptr_header_separate(P p): 270 | ptr(p) 271 | {} 272 | 273 | template 274 | shared_ptr_header_separate(P p,D2& d): 275 | shared_ptr_deleter_base(d),ptr(p) 276 | {} 277 | 278 | void do_delete() 279 | { 280 | shared_ptr_deleter_base::do_delete(ptr); 281 | } 282 | }; 283 | 284 | template 285 | struct shared_ptr_header_combined: 286 | public shared_ptr_header_block{ 287 | typedef typename std::aligned_storage::type storage_type; 288 | storage_type storage; 289 | 290 | T* value(){ 291 | return static_cast(get_base_ptr()); 292 | } 293 | 294 | void* get_base_ptr() 295 | { 296 | return &storage; 297 | } 298 | 299 | template 300 | shared_ptr_header_combined(Args&& ... args) 301 | { 302 | new(get_base_ptr()) T(static_cast(args)...); 303 | } 304 | 305 | void do_delete() 306 | { 307 | value()->~T(); 308 | } 309 | }; 310 | 311 | template 312 | shared_ptr make_shared(Args&& ... args); 313 | 314 | template class shared_ptr { 315 | private: 316 | T* ptr; 317 | shared_ptr_header_block_base* header; 318 | 319 | template 320 | friend class atomic_shared_ptr; 321 | template 322 | friend class shared_ptr; 323 | template 324 | friend class weak_ptr; 325 | 326 | template 327 | friend shared_ptr make_shared(Args&& ... args); 328 | 329 | shared_ptr(shared_ptr_header_block_base* header_,unsigned index): 330 | ptr(header_?header_->get_ptr(index):nullptr),header(header_) 331 | { 332 | if(header){ 333 | header->inc_count(); 334 | } 335 | } 336 | 337 | shared_ptr(shared_ptr_header_block_base* header_,T* ptr_): 338 | ptr(ptr_),header(header_) 339 | { 340 | if(header && !header->shared_from_weak()){ 341 | ptr=nullptr; 342 | header=nullptr; 343 | } 344 | } 345 | 346 | shared_ptr(shared_ptr_header_combined* header_): 347 | ptr(header_->value()),header(header_) 348 | {} 349 | 350 | void clear() 351 | { 352 | header=nullptr; 353 | ptr=nullptr; 354 | } 355 | 356 | public: 357 | typedef T element_type; 358 | // 20.8.2.2.1, constructors: 359 | constexpr shared_ptr() noexcept: 360 | ptr(nullptr),header(nullptr) 361 | {} 362 | 363 | template explicit shared_ptr(Y* p) 364 | try: 365 | ptr(p), 366 | header(new shared_ptr_header_separate(p)) 367 | {} 368 | catch(...){ 369 | delete p; 370 | } 371 | 372 | 373 | template shared_ptr(Y* p, D d) 374 | try: 375 | ptr(p),header(new shared_ptr_header_separate(p,d)) 376 | {} 377 | catch(...){ 378 | d(p); 379 | } 380 | 381 | template shared_ptr(std::nullptr_t p, D d) 382 | try: 383 | ptr(p), 384 | header(new shared_ptr_header_separate(p,d)) 385 | {} 386 | catch(...){ 387 | d(p); 388 | } 389 | 390 | template shared_ptr(Y* p, D d, A a); 391 | template shared_ptr(std::nullptr_t p, D d, A a); 392 | 393 | template shared_ptr(const shared_ptr& r, T* p) noexcept: 394 | ptr(p),header(r.header) 395 | { 396 | if(header) 397 | header->inc_count(); 398 | } 399 | 400 | shared_ptr(const shared_ptr& r) noexcept: 401 | ptr(r.ptr),header(r.header) 402 | { 403 | if(header) 404 | header->inc_count(); 405 | } 406 | 407 | template shared_ptr(const shared_ptr& r) noexcept: 408 | ptr(r.ptr),header(r.header) 409 | { 410 | if(header) 411 | header->inc_count(); 412 | } 413 | 414 | shared_ptr(shared_ptr&& r) noexcept: 415 | ptr(r.ptr),header(r.header) 416 | { 417 | r.clear(); 418 | } 419 | 420 | template shared_ptr(shared_ptr&& r) noexcept: 421 | ptr(r.ptr),header(r.header) 422 | { 423 | r.clear(); 424 | } 425 | 426 | template explicit shared_ptr(const weak_ptr& r); 427 | 428 | template shared_ptr(std::unique_ptr&& r): 429 | ptr(r.get()), 430 | header( 431 | r.get()? 432 | new shared_ptr_header_separate(r.get(),r.get_deleter()):nullptr) 433 | { 434 | r.release(); 435 | } 436 | constexpr shared_ptr(std::nullptr_t) : shared_ptr() { } 437 | // 20.8.2.2.2, destructor: 438 | ~shared_ptr() 439 | { 440 | if(header){ 441 | header->dec_count(); 442 | } 443 | } 444 | 445 | // 20.8.2.2.3, assignment: 446 | shared_ptr& operator=(const shared_ptr& r) noexcept 447 | { 448 | if(&r!=this){ 449 | shared_ptr temp(r); 450 | swap(temp); 451 | } 452 | return *this; 453 | } 454 | template shared_ptr& operator=(const shared_ptr& r) noexcept 455 | { 456 | shared_ptr temp(r); 457 | swap(temp); 458 | return *this; 459 | } 460 | 461 | shared_ptr& operator=(shared_ptr&& r) noexcept 462 | { 463 | swap(r); 464 | r.reset(); 465 | return *this; 466 | } 467 | 468 | template shared_ptr& operator=(shared_ptr&& r) noexcept 469 | { 470 | shared_ptr temp(static_cast&&>(r)); 471 | swap(temp); 472 | return *this; 473 | } 474 | 475 | template shared_ptr& operator=(std::unique_ptr&& r) 476 | { 477 | shared_ptr temp(static_cast&&>(r)); 478 | swap(temp); 479 | return *this; 480 | } 481 | // 20.8.2.2.4, modifiers: 482 | void swap(shared_ptr& r) noexcept 483 | { 484 | std::swap(ptr,r.ptr); 485 | std::swap(header,r.header); 486 | } 487 | void reset() noexcept 488 | { 489 | if(header){ 490 | header->dec_count(); 491 | } 492 | clear(); 493 | } 494 | 495 | template void reset(Y* p) 496 | { 497 | shared_ptr temp(p); 498 | swap(temp); 499 | } 500 | 501 | template void reset(Y* p, D d) 502 | { 503 | shared_ptr temp(p,d); 504 | swap(temp); 505 | } 506 | 507 | template void reset(Y* p, D d, A a); 508 | // 20.8.2.2.5, observers: 509 | T* get() const noexcept 510 | { 511 | return ptr; 512 | } 513 | 514 | T& operator*() const noexcept 515 | { 516 | return *ptr; 517 | } 518 | 519 | T* operator->() const noexcept 520 | { 521 | return ptr; 522 | } 523 | 524 | long use_count() const noexcept 525 | { 526 | return header?header->use_count():0; 527 | } 528 | 529 | bool unique() const noexcept 530 | { 531 | return use_count()==1; 532 | } 533 | 534 | explicit operator bool() const noexcept 535 | { 536 | return ptr; 537 | } 538 | template bool owner_before(shared_ptr const& b) const; 539 | template bool owner_before(weak_ptr const& b) const; 540 | 541 | friend inline bool operator==(shared_ptr const& lhs,shared_ptr const& rhs) 542 | { 543 | return lhs.ptr==rhs.ptr; 544 | } 545 | 546 | friend inline bool operator!=(shared_ptr const& lhs,shared_ptr const& rhs) 547 | { 548 | return !(lhs==rhs); 549 | } 550 | 551 | }; 552 | 553 | template 554 | shared_ptr make_shared(Args&& ... args){ 555 | return shared_ptr( 556 | new shared_ptr_header_combined( 557 | static_cast(args)...)); 558 | } 559 | 560 | template class weak_ptr { 561 | T* ptr; 562 | shared_ptr_header_block_base* header; 563 | 564 | void clear() 565 | { 566 | header=nullptr; 567 | ptr=nullptr; 568 | } 569 | 570 | public: 571 | typedef T element_type; 572 | // 20.8.2.3.1, constructors 573 | constexpr weak_ptr() noexcept: 574 | header(nullptr) 575 | {} 576 | 577 | template weak_ptr(shared_ptr const& r) noexcept: 578 | ptr(r.ptr),header(r.header) 579 | { 580 | if(header) 581 | header->inc_weak_count(); 582 | } 583 | 584 | weak_ptr(weak_ptr const& r) noexcept: 585 | ptr(r.ptr),header(r.header) 586 | { 587 | if(header) 588 | header->inc_weak_count(); 589 | } 590 | template weak_ptr(weak_ptr const& r) noexcept: 591 | ptr(r.ptr),header(r.header) 592 | { 593 | if(header) 594 | header->inc_weak_count(); 595 | } 596 | weak_ptr(weak_ptr&& r) noexcept: 597 | ptr(r.ptr),header(r.header) 598 | { 599 | r.clear(); 600 | } 601 | template weak_ptr(weak_ptr&& r) noexcept: 602 | ptr(r.ptr),header(r.header) 603 | { 604 | r.clear(); 605 | } 606 | // 20.8.2.3.2, destructor 607 | ~weak_ptr() 608 | { 609 | if(header) 610 | header->dec_weak_count(); 611 | } 612 | 613 | // 20.8.2.3.3, assignment 614 | weak_ptr& operator=(weak_ptr const& r) noexcept 615 | { 616 | if(&r!=this){ 617 | weak_ptr temp(r); 618 | swap(temp); 619 | } 620 | return *this; 621 | } 622 | 623 | template weak_ptr& operator=(weak_ptr const& r) noexcept 624 | { 625 | weak_ptr temp(r); 626 | swap(temp); 627 | return *this; 628 | } 629 | 630 | template weak_ptr& operator=(shared_ptr const& r) noexcept 631 | { 632 | weak_ptr temp(r); 633 | swap(temp); 634 | return *this; 635 | } 636 | 637 | weak_ptr& operator=(weak_ptr&& r) noexcept 638 | { 639 | swap(r); 640 | r.reset(); 641 | return *this; 642 | } 643 | 644 | template weak_ptr& operator=(weak_ptr&& r) noexcept 645 | { 646 | weak_ptr temp(static_cast&&>(r)); 647 | swap(temp); 648 | return *this; 649 | } 650 | 651 | // 20.8.2.3.4, modifiers 652 | void swap(weak_ptr& r) noexcept 653 | { 654 | std::swap(r.header,header); 655 | std::swap(r.ptr,ptr); 656 | } 657 | 658 | void reset() noexcept 659 | { 660 | if(header) 661 | header->dec_weak_count(); 662 | clear(); 663 | } 664 | 665 | // 20.8.2.3.5, observers 666 | long use_count() const noexcept 667 | { 668 | return header?header->use_count():0; 669 | } 670 | 671 | bool expired() const noexcept 672 | { 673 | return !use_count(); 674 | } 675 | 676 | shared_ptr lock() const noexcept 677 | { 678 | return shared_ptr(header,ptr); 679 | } 680 | 681 | template bool owner_before(shared_ptr const& b) const; 682 | template bool owner_before(weak_ptr const& b) const; 683 | }; 684 | 685 | #ifdef _MSC_VER 686 | #define JSS_ASP_ALIGN_TO(alignment) __declspec(align(alignment)) 687 | #ifdef _WIN64 688 | #define JSS_ASP_BITFIELD_SIZE 32 689 | #else 690 | #define JSS_ASP_BITFIELD_SIZE 16 691 | #endif 692 | #else 693 | #define JSS_ASP_ALIGN_TO(alignment) __attribute__((aligned(alignment))) 694 | #ifdef __LP64__ 695 | #define JSS_ASP_BITFIELD_SIZE 32 696 | #else 697 | #define JSS_ASP_BITFIELD_SIZE 16 698 | #endif 699 | #endif 700 | 701 | template 702 | class atomic_shared_ptr 703 | { 704 | template 705 | friend class atomic_shared_ptr; 706 | 707 | struct counted_ptr{ 708 | unsigned access_count:JSS_ASP_BITFIELD_SIZE; 709 | unsigned index:JSS_ASP_BITFIELD_SIZE; 710 | shared_ptr_header_block_base* ptr; 711 | 712 | counted_ptr() noexcept: 713 | access_count(0),index(0),ptr(nullptr) 714 | {} 715 | 716 | counted_ptr(shared_ptr_header_block_base* ptr_,unsigned index_): 717 | access_count(0),index(index_),ptr(ptr_) 718 | {} 719 | }; 720 | 721 | // Switched boost::atomic to boost::atomic for portable DWCAS support 722 | mutable JSS_ASP_ALIGN_TO(sizeof(counted_ptr)) boost::atomic p; 723 | 724 | struct local_access{ 725 | boost::atomic& p; 726 | counted_ptr val; 727 | 728 | void acquire(boost::memory_order order){ 729 | if(!val.ptr) 730 | return; 731 | for(;;){ 732 | counted_ptr newval=val; 733 | ++newval.access_count; 734 | if(p.compare_exchange_weak(val,newval,order)) 735 | break; 736 | } 737 | ++val.access_count; 738 | } 739 | 740 | local_access( 741 | boost::atomic& p_, 742 | boost::memory_order order=boost::memory_order_relaxed): 743 | p(p_),val(p.load(order)) 744 | { 745 | acquire(order); 746 | } 747 | 748 | ~local_access() 749 | { 750 | release(); 751 | } 752 | 753 | void release(){ 754 | if(!val.ptr) 755 | return; 756 | counted_ptr target=val; 757 | do{ 758 | counted_ptr newval=target; 759 | --newval.access_count; 760 | if(p.compare_exchange_weak(target,newval)) 761 | break; 762 | }while(target.ptr==val.ptr); 763 | if(target.ptr!=val.ptr){ 764 | val.ptr->remove_external_counter(); 765 | } 766 | } 767 | 768 | void refresh(counted_ptr newval,boost::memory_order order){ 769 | if(newval.ptr==val.ptr) 770 | return; 771 | release(); 772 | val=newval; 773 | acquire(order); 774 | } 775 | 776 | shared_ptr_header_block_base* get_ptr() 777 | { 778 | return val.ptr; 779 | } 780 | 781 | shared_ptr get_shared_ptr() 782 | { 783 | return shared_ptr(val.ptr,val.index); 784 | } 785 | 786 | }; 787 | 788 | 789 | public: 790 | 791 | bool is_lock_free() const noexcept 792 | { 793 | return p.is_lock_free(); 794 | } 795 | 796 | void store( 797 | shared_ptr newptr, 798 | boost::memory_order order= boost::memory_order_seq_cst) /*noexcept*/ 799 | { 800 | unsigned index=0; 801 | if(newptr.header){ 802 | index=newptr.header->get_ptr_index(newptr.ptr); 803 | } 804 | counted_ptr old=p.exchange(counted_ptr(newptr.header,index),order); 805 | if(old.ptr){ 806 | old.ptr->add_external_counters(old.access_count); 807 | old.ptr->dec_count(); 808 | } 809 | newptr.clear(); 810 | } 811 | 812 | shared_ptr load( 813 | boost::memory_order order= boost::memory_order_seq_cst) const noexcept 814 | { 815 | local_access guard(p,order); 816 | return guard.get_shared_ptr(); 817 | } 818 | 819 | operator shared_ptr() const noexcept { 820 | return load(); 821 | } 822 | 823 | shared_ptr exchange( 824 | shared_ptr newptr, 825 | boost::memory_order order= boost::memory_order_seq_cst) /*noexcept*/ 826 | { 827 | counted_ptr newval( 828 | newptr.header, 829 | newptr.header?newptr.header->get_ptr_index(newptr.ptr):0); 830 | counted_ptr old=p.exchange(newval,order); 831 | shared_ptr res(old.ptr,old.index); 832 | if(old.ptr){ 833 | old.ptr->add_external_counters(old.access_count); 834 | old.ptr->dec_count(); 835 | } 836 | newptr.clear(); 837 | return res; 838 | } 839 | 840 | bool compare_exchange_weak( 841 | shared_ptr & expected, shared_ptr newptr, 842 | boost::memory_order success_order=boost::memory_order_seq_cst, 843 | boost::memory_order failure_order=boost::memory_order_seq_cst) /*noexcept*/ 844 | { 845 | local_access guard(p); 846 | if(guard.get_ptr()!=expected.header){ 847 | expected=guard.get_shared_ptr(); 848 | return false; 849 | } 850 | 851 | counted_ptr expectedval( 852 | expected.header, 853 | expected.header?expected.header->get_ptr_index(expected.ptr):0); 854 | 855 | if(guard.val.index!=expectedval.index){ 856 | expected=guard.get_shared_ptr(); 857 | return false; 858 | } 859 | 860 | counted_ptr oldval(guard.val); 861 | counted_ptr newval( 862 | newptr.header, 863 | newptr.header?newptr.header->get_ptr_index(newptr.ptr):0); 864 | if((oldval.ptr==newval.ptr) && (oldval.index==newval.index)){ 865 | return true; 866 | } 867 | if(p.compare_exchange_weak(oldval,newval,success_order,failure_order)){ 868 | if(oldval.ptr){ 869 | oldval.ptr->add_external_counters(oldval.access_count); 870 | oldval.ptr->dec_count(); 871 | } 872 | newptr.clear(); 873 | return true; 874 | } 875 | else{ 876 | guard.refresh(oldval,failure_order); 877 | expected=guard.get_shared_ptr(); 878 | return false; 879 | } 880 | } 881 | 882 | bool compare_exchange_strong( 883 | shared_ptr &expected,shared_ptr newptr, 884 | boost::memory_order success_order=boost::memory_order_seq_cst, 885 | boost::memory_order failure_order=boost::memory_order_seq_cst) noexcept 886 | { 887 | shared_ptr local_expected=expected; 888 | do{ 889 | if(compare_exchange_weak(expected,newptr,success_order,failure_order)) 890 | return true; 891 | } 892 | while(expected==local_expected); 893 | return false; 894 | } 895 | 896 | atomic_shared_ptr() noexcept = default; 897 | atomic_shared_ptr( shared_ptr val) /*noexcept*/: 898 | p(counted_ptr(val.header,val.header?val.header->get_ptr_index(val.ptr):0)) 899 | { 900 | val.header=nullptr; 901 | val.ptr=nullptr; 902 | } 903 | 904 | ~atomic_shared_ptr() 905 | { 906 | counted_ptr old=p.load(boost::memory_order_relaxed); 907 | if(old.ptr) 908 | old.ptr->dec_count(); 909 | } 910 | 911 | atomic_shared_ptr(const atomic_shared_ptr&) = delete; 912 | atomic_shared_ptr& operator=(const atomic_shared_ptr&) = delete; 913 | shared_ptr operator=(shared_ptr newval) noexcept 914 | { 915 | store(static_cast&&>(newval)); 916 | return newval; 917 | } 918 | 919 | }; 920 | 921 | } 922 | 923 | #endif 924 | -------------------------------------------------------------------------------- /benchmark/external/vtyulb/README.md: -------------------------------------------------------------------------------- 1 | # AtomicSharedPtr 2 | Lock-Free implementation of std::atomic<std::shared_ptr> & several Lock-Free data structures based on it 3 | 4 | # Motivation 5 | This project was created as a proof-of-concept for std::atomic<std::shared_ptr>. 6 | In [proposal N4058](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4058.pdf) Herb Sutter 7 | suggested atomic specialization for shared_ptr. Such specialization gives ability to write Lock-Free algorithms 8 | easily by avoiding [ABA-problem](https://en.wikipedia.org/wiki/ABA_problem). Some languages (java) use 9 | garbage collection, so they never get same pointers, other languages are just too slow to get any 10 | advantage from Lock-Free implementations. However, in C++ we have a lot of problems out of nowhere with 11 | memory handling. All these problems can be evaded with AtomicSharedPtr, which can update it's value 12 | in Lock-Free style and you will never receive same pointers which can break your program. 13 | 14 | Current std::atomic<std::shared_ptr> is implemented by using some mutexes (I checked libcxx-10.0.0). It is fast but it gives up 15 | all Lock-Free guarantees this way. I was not satisfied it, so I just implemented Lock-Free version. 16 | It is possible to create AtomicSharedPtr compatible with std::shared_ptr by using same control block, 17 | but mine implementation uses it's own block, because it's a lot easier. 18 | 19 | # Other implementations 20 | I currently know about 2 different std::atomic<std::shared_ptr> implementations. 21 | First one is https://github.com/anthonywilliams/atomic_shared_ptr 22 | It uses 128-bit Compare-And-Swap (CAS) operations. Not every platform supports such operations 23 | and they are too slow. 24 | 25 | Second implementation is from facebook [folly/concurrency](https://github.com/facebook/folly/blob/master/folly/concurrency/AtomicSharedPtr.h) 26 | It uses simple hack. Inside 64-bit pointer there is 16-bit reference counter (refcount). 27 | You can do it as long as your addresses can fit in 48-bit. I don't think there are any 28 | major problems with facebook's implementation except this hack and a fact that 29 | I understood nothing in their code but big comment at the start of the main header. 30 | So, it was just easier to figure out all details by writing my own implementation 31 | (I was mistaken of course). 32 | 33 | Same hack is used by me. First 48 bits in my packed structure is a pointer, last 16 bits 34 | are refcount. This way by using fetch_add(1) I can increase some refcount and get pointer 35 | to control block atomically. Global refcount inside control block is required anyway, 36 | because there can be several atomic pointers for the same control block. 37 | 38 | # Project structure 39 | - AtomicSharedPtr, SharedPtr, ControlBlock and FastSharedPtr 40 | - LFStack, LFQueue, LFMap, LFMapAvl 41 | - FastLogger 42 | 43 | LFMap is based on a randomized [treap](https://en.wikipedia.org/wiki/Treap). 44 | LFMapAvl is an [avl tree](https://en.wikipedia.org/wiki/AVL_tree). 45 | 46 | AtomicSharedPtr::getFast() -> FastSharedPtr: 47 | - Destruction of AtomicSharedPtr during lifetime of FastSharedPtr is undefined behaviour 48 | - Read is one-time fetch_add 49 | - Destruction is one compare_exchange if nothing changed, one if AtomicSharedPtr 50 | changed. One or more compare_exchanges might be required on active work 51 | 52 | AtomicSharedPtr::get() -> SharedPtr: 53 | - No lifetime dependencies 54 | - Read is 2 fetch_add + 1 compare_exchange. One or more CAS might be required on active work 55 | - Destruction if 1 fetch_sub 56 | - Data pointer access is free 57 | - Copying is 1 fetch_add 58 | 59 | AtomicSharedPtr::compareExchange(): 60 | - This is actually a strong version 61 | - 1 AtomicSharedPtr::getFast() + zero or more {fetch_add + CAS + fetch_sub} + one or more CAS 62 | 63 | I suggest you to look at [queue](https://github.com/vtyulb/AtomicSharedPtr/blob/master/src/lfqueue.h) and 64 | [stack](https://github.com/vtyulb/AtomicSharedPtr/blob/master/src/lfstack.h) code - life becomes 65 | a lot easier when don't have to worry about memory. 66 | 67 | # ABA problem and chain reaction at destruction 68 | AtomicSharedPtr is not affected by ABA problem in any scenario. You can push same control block 69 | to pointer over and over again, nothing bad will happen. 70 | 71 | However, if you write your own Lock-Free structs based on AtomicSharedPtr you can encounter chain reaction problem. 72 | For example, if you have stack with 1000000 elements and then you destroy it's top, than top will destroy next 73 | pointer. Next pointer will destroy next one and so on. My implementation uses deferred destruction which is a little slower, 74 | but it won't crash because of stack overflow. There will be a visible lag when whole chain would be destructed, 75 | and there won't be any lag with mutexed std::stack. 76 | 77 | # Proof-Of-Work 78 | Code passes thread, memory and address sanitizers while under stress test for 10+ minutes. 79 | There might be a false positive on std::map in memory sanitizer due to some external bug: 80 | [stackoverflow](https://stackoverflow.com/questions/60097307/memory-sanitizer-reports-use-of-uninitialized-value-in-global-object-constructio), 81 | [godbolt](https://godbolt.org/z/pZj6Lm). 82 | Implementation was not tested in any big production yet and not recommended for production use. 83 | 84 | # Build 85 | ``` 86 | git clone https://github.com/vtyulb/AtomicSharedPtr/ 87 | cd AtomicSharedPtr 88 | mkdir build && cd build 89 | cmake -DENABLE_FAST_LOGGING=ON -DCMAKE_BUILD_TYPE=Release -DTSAN=OFF -DMSAN=OFF -DASAN=OFF .. 90 | make 91 | ./AtomicSharedPtr 92 | ``` 93 | 94 | # Speed 95 | This is sample output with Core i7-6700hq processor. First column is number of operations push/pop divided around 50/50 by rand. 96 | All other columns are time in milliseconds which took the test to finish. LF structs are based on AtomicSharedPtr. 97 | Lockable structs use std::queue/std::stack/std::map and a mutex for synchronizations. Initial map size is 10000, 98 | and most of map operations are reads. Lesser is better. 99 | 100 | There are a lot of optimizations still pending. 101 | ``` 102 | vlad@vtyulb-thinkpad ~/AtomicSharedPtr/build (git)-[master] % ./AtomicSharedPtr 103 | running AtomicSharedPtr load/store test... 104 | running simple LFMap test... 105 | running simple LFMapAvl test... 106 | 107 | running correctness LFMap test... 108 | 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 109 | 110 | running correctness LFMapAvl test... 111 | 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100% 112 | 113 | running LFMap stress test... 114 | 1 2 3 4 5 6 7 8 115 | 500000 325 395 383 470 448 562 545 579 116 | 1000000 716 837 782 793 854 794 774 1015 117 | 1500000 674 927 1094 1164 1173 1287 1279 1358 118 | 2000000 1004 1230 1200 1747 1839 1813 1643 1923 119 | 120 | running LFMapAvl stress test... 121 | 1 2 3 4 5 6 7 8 122 | 500000 139 210 235 272 253 239 241 266 123 | 1000000 336 465 442 468 440 450 441 458 124 | 1500000 466 597 579 669 642 658 639 660 125 | 2000000 539 763 825 885 848 848 861 885 126 | 127 | running lockable map stress test 128 | 1 2 3 4 5 6 7 8 129 | 500000 71 340 205 242 246 321 340 312 130 | 1000000 98 545 396 496 627 686 712 722 131 | 1500000 211 1030 705 824 774 875 986 928 132 | 2000000 189 1013 807 1140 1201 1216 1312 1254 133 | 134 | 135 | running simple LFQueue test... 136 | 137 | running LFQueue stress test... 138 | 1 2 3 4 5 6 7 8 139 | 500000 250 551 381 330 340 348 374 433 140 | 1000000 646 1035 805 738 696 709 803 905 141 | 1500000 839 1411 1100 1291 1359 1377 1344 1305 142 | 2000000 1026 1911 1667 1924 1404 1510 1505 1750 143 | 144 | running lockable queue stress test... 145 | 1 2 3 4 5 6 7 8 146 | 500000 24 110 81 107 116 138 148 159 147 | 1000000 50 233 164 221 239 282 304 309 148 | 1500000 76 329 246 331 362 417 447 468 149 | 2000000 97 456 334 436 482 564 597 633 150 | 151 | running simple LFStack test... 152 | 153 | running LFStack stress test... 154 | 1 2 3 4 5 6 7 8 155 | 500000 132 295 343 476 442 547 655 814 156 | 1000000 356 651 720 957 1001 1139 1296 1709 157 | 1500000 427 891 1086 1382 1446 1746 1950 2520 158 | 2000000 735 1306 1479 1948 1875 2189 2737 3425 159 | 160 | running lockable stack stress test... 161 | 1 2 3 4 5 6 7 8 162 | 500000 23 110 75 106 113 134 150 154 163 | 1000000 49 229 150 219 231 272 294 309 164 | 1500000 67 332 238 336 343 414 448 470 165 | 2000000 96 445 316 432 462 548 592 623 166 | 167 | ./AtomicSharedPtr 485,72s user 178,46s system 400% cpu 2:44,99 total 168 | ``` 169 | 170 | # Debugging with FastLogger 171 | FastLogger is very completed but highly specialized tool. It captures events in a thread_local 172 | ring buffer, which you can view on segfault or abortion, thus understanding what happend. 173 | rdtsc is used to +- synchronize time. I wasted something like 20+ hours on single bug, and 174 | then I wrote FastLogger. After several more hours bug was fixed. 175 | 176 | Due to no active synchronization (except rdtsc call) FastLogger is quite fast. 177 | If you run FAST_LOG() 2 times in a row, you would be able to see that it took around 178 | 30-50 clock cycles between log entries. Atomic operations take 700-1600 cycles, so 179 | FastLogger's impacts measurement result quite a little. Logs to debug your 180 | crashing once-per-day algorithm are invaluable. It is also very interesting to see 181 | how processor cores bounce across your tasks. 182 | 183 | On next motivational screen you can see, that local refcount was moved to global and dropped after CAS. Then thread woke 184 | only to see, that it can't decrease local refcount anymore despite the same pointer address (internal ABA problem, already fixed). 185 |

186 | 187 |

188 | 189 | Second bug with [heap-use-after-free](https://raw.githubusercontent.com/vtyulb/AtomicSharedPtr/master/resources/00007fffec016880_sample_race_at_destruction) 190 | This one is easier. Thread went to sleep right after CAS (operation 12) at address 00007fffec016880. 191 | It did not increase refcount for threads from which it stole local refcount. Then foreign threads 192 | destroyed their objects decreasing refcount (operation 51) leading to object destruction (operation 100). 193 | Then thread finally woke up just to panic as it wanted to use destroyed object. 194 | 195 | # Other things 196 | I also recommend reading simple wait-free queue algorithm by Alex Kogan and Erez Petrank in article 197 | [Wait-Free Queues With Multiple Enqueuers and Dequeuers](http://www.cs.technion.ac.il/~erez/Papers/wfquque-ppopp.pdf). 198 | It looks like their algorithm is not possible to implement without proper garbage collection 199 | (they used java). It even looks that I can't implement it with any available hacks for now. 200 | Some ideas were taken from that algorithm. Continous global refcount updating looks very 201 | much alike thread helping tasks from wait-free queue. 202 | -------------------------------------------------------------------------------- /benchmark/external/vtyulb/atomic_shared_ptr.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | namespace LFStructs { 11 | 12 | const int MAGIC_LEN = 16; 13 | const size_t MAGIC_MASK = 0x0000'0000'0000'FFFF; 14 | const int CACHE_LINE_SIZE = 128; 15 | 16 | template 17 | struct alignas(CACHE_LINE_SIZE) ControlBlock { 18 | explicit ControlBlock() = delete; 19 | explicit ControlBlock(T *data) 20 | : data(data) 21 | , refCount(1) 22 | { 23 | assert(reinterpret_cast(data) <= 0x0000'FFFF'FFFF'FFFF); 24 | } 25 | 26 | T *data; 27 | std::atomic refCount; 28 | }; 29 | 30 | 31 | template 32 | class SharedPtr { 33 | public: 34 | SharedPtr(): controlBlock(nullptr) {} 35 | explicit SharedPtr(T *data) 36 | : controlBlock(new ControlBlock(data)) 37 | { 38 | 39 | } 40 | explicit SharedPtr(ControlBlock *controlBlock): controlBlock(controlBlock) {} 41 | SharedPtr(const SharedPtr &other) { 42 | controlBlock = other.controlBlock; 43 | if (controlBlock != nullptr) { 44 | int before = controlBlock->refCount.fetch_add(1); 45 | assert(before); 46 | } 47 | }; 48 | SharedPtr(SharedPtr &&other) noexcept { 49 | controlBlock = other.controlBlock; 50 | other.controlBlock = nullptr; 51 | }; 52 | SharedPtr& operator=(const SharedPtr &other) { 53 | auto old = controlBlock; 54 | controlBlock = other.controlBlock; 55 | if (controlBlock != nullptr) { 56 | int before = controlBlock->refCount.fetch_add(1); 57 | assert(before); 58 | } 59 | unref(old); 60 | return *this; 61 | }; 62 | SharedPtr& operator=(SharedPtr &&other) { 63 | if (controlBlock != other.controlBlock) { 64 | auto old = controlBlock; 65 | controlBlock = other.controlBlock; 66 | other.controlBlock = nullptr; 67 | unref(old); 68 | } 69 | return *this; 70 | } 71 | 72 | SharedPtr& operator=(std::nullptr_t) { 73 | auto old = std::exchange(controlBlock, nullptr); 74 | unref(old); 75 | return *this; 76 | } 77 | 78 | ~SharedPtr() { 79 | thread_local std::vector*> destructionQueue; 80 | thread_local bool destructionInProgress = false; 81 | 82 | destructionQueue.push_back(controlBlock); 83 | if (!destructionInProgress) { 84 | destructionInProgress = true; 85 | while (destructionQueue.size()) { 86 | ControlBlock *blockToUnref = destructionQueue.back(); 87 | destructionQueue.pop_back(); 88 | unref(blockToUnref); 89 | } 90 | destructionInProgress = false; 91 | } 92 | } 93 | 94 | SharedPtr copy() { return SharedPtr(*this); } 95 | T* get() const { return controlBlock ? controlBlock->data : nullptr; } 96 | T* operator->() const { return controlBlock->data; } 97 | 98 | /* implicit */ explicit(false) operator bool() const noexcept { return get() != nullptr; } // NOLINT 99 | 100 | private: 101 | void unref(ControlBlock *blockToUnref) { 102 | if (blockToUnref) { 103 | int before = blockToUnref->refCount.fetch_sub(1); 104 | assert(before); 105 | if (before == 1) { 106 | delete blockToUnref->data; 107 | delete blockToUnref; 108 | } 109 | } 110 | } 111 | 112 | template friend class AtomicSharedPtr; 113 | ControlBlock *controlBlock; 114 | }; 115 | 116 | 117 | template 118 | class alignas(CACHE_LINE_SIZE) FastSharedPtr { 119 | public: 120 | FastSharedPtr(const FastSharedPtr &other) = delete; 121 | FastSharedPtr(FastSharedPtr &&other) 122 | : knownValue(other.knownValue) 123 | , foreignPackedPtr(other.foreignPackedPtr) 124 | , data(other.data) 125 | { 126 | other.foreignPackedPtr = nullptr; 127 | }; 128 | FastSharedPtr& operator=(FastSharedPtr &&other) { 129 | destroy(); 130 | knownValue = other.knownValue; 131 | foreignPackedPtr = other.foreignPackedPtr; 132 | data = other.data; 133 | other.foreignPackedPtr = nullptr; 134 | return *this; 135 | } 136 | ~FastSharedPtr() { 137 | destroy(); 138 | }; 139 | 140 | ControlBlock* getControlBlock() { return reinterpret_cast*>(knownValue >> MAGIC_LEN); } 141 | T* get() { return data; } 142 | T* operator->(){ return data; } 143 | private: 144 | void destroy() { 145 | if (foreignPackedPtr != nullptr) { 146 | size_t expected = knownValue; 147 | while (!foreignPackedPtr->compare_exchange_weak(expected, expected - 1)) { 148 | if (((expected >> MAGIC_LEN) != (knownValue >> MAGIC_LEN)) || !(expected & MAGIC_MASK)) { 149 | ControlBlock *block = reinterpret_cast*>(knownValue >> MAGIC_LEN); 150 | size_t before = block->refCount.fetch_sub(1); 151 | if (before == 1) { 152 | delete data; 153 | delete block; 154 | } 155 | break; 156 | } 157 | } 158 | } 159 | } 160 | FastSharedPtr(std::atomic *packedPtr) 161 | : knownValue(packedPtr->fetch_add(1) + 1) 162 | , foreignPackedPtr(packedPtr) 163 | , data(getControlBlock()->data) 164 | { 165 | auto block = getControlBlock(); 166 | int diff = knownValue & MAGIC_MASK; 167 | while (diff > 1000 && block == getControlBlock()) { 168 | block->refCount.fetch_add(diff); 169 | if (packedPtr->compare_exchange_strong(knownValue, knownValue - diff)) { 170 | foreignPackedPtr = nullptr; 171 | break; 172 | } 173 | block->refCount.fetch_sub(diff); 174 | diff = knownValue & MAGIC_MASK; 175 | } 176 | }; 177 | 178 | size_t knownValue; 179 | std::atomic *foreignPackedPtr; 180 | T *data; 181 | 182 | template friend class AtomicSharedPtr; 183 | }; 184 | 185 | 186 | template 187 | class alignas(CACHE_LINE_SIZE) AtomicSharedPtr { 188 | public: 189 | AtomicSharedPtr(T *data = nullptr); 190 | ~AtomicSharedPtr(); 191 | 192 | AtomicSharedPtr(const AtomicSharedPtr &other) = delete; 193 | AtomicSharedPtr(AtomicSharedPtr &&other) = delete; 194 | AtomicSharedPtr& operator=(const AtomicSharedPtr &other) = delete; 195 | AtomicSharedPtr& operator=(AtomicSharedPtr &&other) = delete; 196 | 197 | SharedPtr load(); 198 | FastSharedPtr getFast(); 199 | 200 | bool compareExchange(T *expected, SharedPtr &&newOne); // this actually is strong version 201 | 202 | bool compare_exchange_strong(SharedPtr& expected, SharedPtr desired); // this actually is strong version 203 | bool compare_exchange_weak(SharedPtr& expected, SharedPtr desired); 204 | 205 | void store(T *data); 206 | void store(SharedPtr data); 207 | 208 | private: 209 | void destroyOldControlBlock(size_t oldPackedPtr); 210 | 211 | /* first 48 bit - pointer to control block 212 | * last 16 bit - local refcount if anyone is accessing control block 213 | * through current AtomicSharedPtr instance right now */ 214 | std::atomic packedPtr; 215 | static_assert(sizeof(T*) == sizeof(size_t)); 216 | }; 217 | 218 | template 219 | AtomicSharedPtr::AtomicSharedPtr(T *data) { 220 | auto block = new ControlBlock(data); 221 | packedPtr.store(reinterpret_cast(block) << MAGIC_LEN); 222 | } 223 | 224 | template 225 | SharedPtr AtomicSharedPtr::load() { 226 | // taking copy and notifying about read in progress 227 | size_t packedPtrCopy = packedPtr.fetch_add(1); 228 | auto block = reinterpret_cast*>(packedPtrCopy >> MAGIC_LEN); 229 | int before = block->refCount.fetch_add(1); 230 | assert(before); 231 | // copy is completed 232 | 233 | // notifying about completed copy 234 | size_t expected = packedPtrCopy + 1; 235 | while (true) { 236 | assert((expected & MAGIC_MASK) > 0); 237 | size_t expCopy = expected; 238 | if (packedPtr.compare_exchange_weak(expected, expected - 1)) { 239 | break; 240 | } 241 | 242 | // if control block pointer just changed, then 243 | // handling object's refcount is not our responsibility 244 | if (((expected >> MAGIC_LEN) != (packedPtrCopy >> MAGIC_LEN)) || 245 | ((expected & MAGIC_MASK) == 0)) // >20 hours wasted here 246 | { 247 | int before = block->refCount.fetch_sub(1); 248 | assert(before); 249 | break; 250 | } 251 | 252 | if ((expected & MAGIC_MASK) == 0) { 253 | abort(); 254 | break; 255 | } 256 | } 257 | // notification finished 258 | 259 | return SharedPtr(block); 260 | } 261 | 262 | template 263 | FastSharedPtr AtomicSharedPtr::getFast() { 264 | return FastSharedPtr(&packedPtr); 265 | } 266 | 267 | template 268 | AtomicSharedPtr::~AtomicSharedPtr() { 269 | thread_local std::vector destructionQueue; 270 | thread_local bool destructionInProgress = false; 271 | 272 | size_t packedPtrCopy = packedPtr.load(); 273 | auto block = reinterpret_cast*>(packedPtrCopy >> MAGIC_LEN); 274 | size_t diff = packedPtrCopy & MAGIC_MASK; 275 | if (diff != 0) { 276 | block->refCount.fetch_add(diff); 277 | } 278 | 279 | destructionQueue.push_back(packedPtrCopy); 280 | if (!destructionInProgress) { 281 | destructionInProgress = true; 282 | while (destructionQueue.size()) { 283 | size_t controlBlockToDestroy = destructionQueue.back(); 284 | destructionQueue.pop_back(); 285 | destroyOldControlBlock(controlBlockToDestroy); 286 | } 287 | destructionInProgress = false; 288 | } 289 | } 290 | 291 | template 292 | void AtomicSharedPtr::store(T *data) { 293 | store(SharedPtr(data)); 294 | } 295 | 296 | template 297 | void AtomicSharedPtr::store(SharedPtr data) { 298 | while (true) { 299 | auto holder = this->getFast(); 300 | if (compareExchange(holder.get(), std::move(data))) { 301 | break; 302 | } 303 | } 304 | } 305 | 306 | template 307 | bool AtomicSharedPtr::compareExchange(T *expected, SharedPtr &&newOne) { 308 | if (expected == newOne.get()) { 309 | return true; 310 | } 311 | auto holder = this->getFast(); 312 | if (holder.get() == expected) { 313 | size_t holdedPtr = reinterpret_cast(holder.getControlBlock()); 314 | size_t desiredPackedPtr = reinterpret_cast(newOne.controlBlock) << MAGIC_LEN; 315 | size_t expectedPackedPtr = holdedPtr << MAGIC_LEN; 316 | while (holdedPtr == (expectedPackedPtr >> MAGIC_LEN)) { 317 | if (expectedPackedPtr & MAGIC_MASK) { 318 | int diff = expectedPackedPtr & MAGIC_MASK; 319 | holder.getControlBlock()->refCount.fetch_add(diff); 320 | if (!packedPtr.compare_exchange_weak(expectedPackedPtr, expectedPackedPtr & ~MAGIC_MASK)) { 321 | holder.getControlBlock()->refCount.fetch_sub(diff); 322 | } 323 | continue; 324 | } 325 | assert((expectedPackedPtr >> MAGIC_LEN) != (desiredPackedPtr >> MAGIC_LEN)); 326 | if (packedPtr.compare_exchange_weak(expectedPackedPtr, desiredPackedPtr)) { 327 | newOne.controlBlock = nullptr; 328 | assert((expectedPackedPtr >> MAGIC_LEN) == holdedPtr); 329 | destroyOldControlBlock(expectedPackedPtr); 330 | return true; 331 | } 332 | } 333 | } 334 | 335 | return false; 336 | } 337 | 338 | template 339 | bool AtomicSharedPtr::compare_exchange_strong(SharedPtr& expected, SharedPtr desired) { 340 | return compareExchange(expected.get(), std::move(desired)); 341 | } 342 | 343 | template 344 | bool AtomicSharedPtr::compare_exchange_weak(SharedPtr& expected, SharedPtr desired) { 345 | return compareExchange(expected.get(), std::move(desired)); 346 | } 347 | 348 | template 349 | void AtomicSharedPtr::destroyOldControlBlock(size_t oldPackedPtr) { 350 | auto block = reinterpret_cast*>(oldPackedPtr >> MAGIC_LEN); 351 | auto refCountBefore = block->refCount.fetch_sub(1); 352 | assert(refCountBefore); 353 | if (refCountBefore == 1) { 354 | delete block->data; 355 | delete block; 356 | } 357 | } 358 | 359 | } // namespace LFStructs 360 | -------------------------------------------------------------------------------- /include/parlay/atomic_shared_ptr.hpp: -------------------------------------------------------------------------------- 1 | // A lock-free atomic shared pointer for modern C++. Not fully 2 | // feature complete yet. Currently, support is missing for: 3 | // - atomic_weak_ptr 4 | // - aliased shared pointers 5 | 6 | #pragma once 7 | 8 | #include 9 | 10 | #include "details/atomic_details.hpp" 11 | #include "details/hazard_pointers.hpp" 12 | 13 | #include "shared_ptr.hpp" 14 | 15 | namespace parlay { 16 | 17 | // Turn on deamortized reclamation. This substantially improves the worst-case store 18 | // latency by spreading out reclamation over time instead of doing it in bulk, in 19 | // exchange for a slight increase in load latency. 20 | inline void enable_deamortized_reclamation() { 21 | // Experimental feature. Still a work-in-progress! 22 | get_hazard_list().enable_deamortized_reclamation(); 23 | } 24 | 25 | template 26 | class atomic_shared_ptr { 27 | 28 | using shared_ptr_type = shared_ptr; 29 | using control_block_type = details::control_block_base; 30 | 31 | public: 32 | 33 | constexpr atomic_shared_ptr() noexcept = default; 34 | constexpr explicit(false) atomic_shared_ptr(std::nullptr_t) noexcept // NOLINT(google-explicit-constructor) 35 | : control_block{nullptr} { } 36 | 37 | explicit(false) atomic_shared_ptr(shared_ptr_type desired) { // NOLINT(google-explicit-constructor) 38 | auto [ptr_, control_block_] = desired.release_internals(); 39 | control_block.store(control_block_, std::memory_order_relaxed); 40 | } 41 | 42 | atomic_shared_ptr(const atomic_shared_ptr&) = delete; 43 | atomic_shared_ptr& operator=(const atomic_shared_ptr&) = delete; 44 | 45 | ~atomic_shared_ptr() { store(nullptr); } 46 | 47 | bool is_lock_free() const noexcept { 48 | return control_block.is_lock_free(); 49 | } 50 | 51 | constexpr static bool is_always_lock_free = std::atomic::is_always_lock_free; 52 | 53 | [[nodiscard]] shared_ptr_type load([[maybe_unused]] std::memory_order order = std::memory_order_seq_cst) const { 54 | control_block_type* current_control_block = nullptr; 55 | 56 | auto& hazptr = get_hazard_list(); 57 | 58 | while (true) { 59 | current_control_block = hazptr.protect(control_block); 60 | if (current_control_block == nullptr || current_control_block->increment_strong_count_if_nonzero()) break; 61 | } 62 | 63 | return make_shared_from_ctrl_block(current_control_block); 64 | } 65 | 66 | void store(shared_ptr_type desired, std::memory_order order = std::memory_order_seq_cst) { 67 | auto [ptr_, control_block_] = desired.release_internals(); 68 | auto old_control_block = control_block.exchange(control_block_, order); 69 | if (old_control_block) { 70 | old_control_block->decrement_strong_count(); 71 | } 72 | } 73 | 74 | shared_ptr_type exchange(shared_ptr_type desired, std::memory_order order = std::memory_order_seq_cst) noexcept { 75 | auto [ptr_, control_block_] = desired.release_internals(); 76 | auto old_control_block = control_block.exchange(control_block_, order); 77 | return make_shared_from_ctrl_block(old_control_block); 78 | } 79 | 80 | bool compare_exchange_weak(shared_ptr_type& expected, shared_ptr_type&& desired, 81 | std::memory_order success, std::memory_order failure) { 82 | 83 | auto expected_ctrl_block = expected.control_block; 84 | auto desired_ctrl_block = desired.control_block; 85 | 86 | if (control_block.compare_exchange_weak(expected_ctrl_block, desired_ctrl_block, success, failure)) { 87 | if (expected_ctrl_block) { 88 | expected_ctrl_block->decrement_strong_count(); 89 | } 90 | desired.release_internals(); 91 | return true; 92 | } 93 | else { 94 | expected = load(); // It's possible that expected ABAs and stays the same on failure, hence 95 | return false; // why this algorithm can not be used to implement compare_exchange_strong 96 | } 97 | } 98 | 99 | bool compare_exchange_strong(shared_ptr_type& expected, shared_ptr_type&& desired, 100 | std::memory_order success, std::memory_order failure) { 101 | 102 | auto expected_ctrl_block = expected.control_block; 103 | 104 | // If expected changes then we have completed the operation (unsuccessfully), we only 105 | // have to loop in case expected ABAs or the weak operation fails spuriously. 106 | do { 107 | if (compare_exchange_weak(expected, std::move(desired), success, failure)) { 108 | return true; 109 | } 110 | } while (expected_ctrl_block == expected.control_block); 111 | 112 | return false; 113 | } 114 | 115 | bool compare_exchange_weak(shared_ptr_type& expected, const shared_ptr_type& desired, 116 | std::memory_order success, std::memory_order failure) { 117 | 118 | // This version is not very efficient and should be avoided. It's just here to provide the complete 119 | // API of atomic. The issue with it is that if the compare_exchange fails, the reference 120 | // count of desired is incremented and decremented for no reason. On the other hand, the rvalue 121 | // version doesn't modify the reference count of desired at all. 122 | 123 | return compare_exchange_weak(expected, shared_ptr_type{desired}, success, failure); 124 | } 125 | 126 | 127 | bool compare_exchange_strong(shared_ptr_type& expected, const shared_ptr_type& desired, 128 | std::memory_order success, std::memory_order failure) { 129 | 130 | // This version is not very efficient and should be avoided. It's just here to provide the complete 131 | // API of atomic. The issue with it is that if the compare_exchange fails, the reference 132 | // count of desired is incremented and decremented for no reason. On the other hand, the rvalue 133 | // version doesn't modify the reference count of desired at all. 134 | 135 | return compare_exchange_strong(expected, shared_ptr_type{desired}, success, failure); 136 | } 137 | 138 | 139 | bool compare_exchange_strong(shared_ptr_type& expected, const shared_ptr_type& desired, std::memory_order order = std::memory_order_seq_cst) { 140 | return compare_exchange_strong(expected, desired, order, details::default_failure_memory_order(order)); 141 | } 142 | 143 | bool compare_exchange_weak(shared_ptr_type& expected, const shared_ptr_type& desired, std::memory_order order = std::memory_order_seq_cst) { 144 | return compare_exchange_weak(expected, desired, order, details::default_failure_memory_order(order)); 145 | } 146 | 147 | bool compare_exchange_strong(shared_ptr_type& expected, shared_ptr_type&& desired, std::memory_order order = std::memory_order_seq_cst) { 148 | return compare_exchange_strong(expected, std::move(desired), order, details::default_failure_memory_order(order)); 149 | } 150 | 151 | bool compare_exchange_weak(shared_ptr_type& expected, shared_ptr_type&& desired, std::memory_order order = std::memory_order_seq_cst) { 152 | return compare_exchange_weak(expected, std::move(desired), order, details::default_failure_memory_order(order)); 153 | } 154 | 155 | private: 156 | 157 | static shared_ptr_type make_shared_from_ctrl_block(control_block_type* control_block_) { 158 | if (control_block_) { 159 | T* ptr = static_cast(control_block_->get_ptr()); 160 | return shared_ptr_type{ptr, control_block_}; 161 | } 162 | else { 163 | return shared_ptr_type{nullptr}; 164 | } 165 | } 166 | 167 | mutable std::atomic control_block; 168 | }; 169 | 170 | }; 171 | 172 | -------------------------------------------------------------------------------- /include/parlay/basic_atomic_shared_ptr.hpp: -------------------------------------------------------------------------------- 1 | // A bare minimal atomic_shared_ptr implementation that exists to teach the main algorithm. 2 | // Not efficient and not feature complete. Just for demonstration!! 3 | // 4 | // In particular, the following are absent: 5 | // - No make_shared, 6 | // - No custom deleters/allocators, 7 | // - No weak_ptr, 8 | // - No alias pointers, 9 | // - No enable_shared_from_this 10 | // - No memory orders. All seq_cst. 11 | // 12 | // See atomic_shared_ptr_custom.hpp, shared_ptr.hpp for a feature-complete and optimized implementation! 13 | // 14 | 15 | #pragma once 16 | 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | namespace parlay { 23 | 24 | namespace basic { 25 | 26 | template 27 | class shared_ptr; 28 | 29 | template 30 | class atomic_shared_ptr; 31 | 32 | } 33 | 34 | namespace details { 35 | 36 | template 37 | struct basic_control_block : public folly::hazptr_obj_base> { 38 | 39 | template 40 | friend class basic::atomic_shared_ptr; 41 | 42 | template 43 | explicit basic_control_block(T* ptr_) noexcept : ref_count(1), ptr(ptr_) { } 44 | 45 | basic_control_block(const basic_control_block &) = delete; 46 | basic_control_block& operator=(const basic_control_block&) = delete; 47 | 48 | ~basic_control_block() = default; 49 | 50 | // Increment the reference count. The reference count must not be zero 51 | void increment_count() noexcept { 52 | ref_count.fetch_add(1); 53 | } 54 | 55 | // Increment the reference count if it is not zero. 56 | bool increment_if_nonzero() noexcept { 57 | auto cnt = ref_count.load(); 58 | while (cnt > 0 && !ref_count.compare_exchange_weak(cnt, cnt + 1)) { } 59 | return cnt > 0; 60 | } 61 | 62 | // Release a reference to the object. 63 | void decrement_count() noexcept { 64 | if (ref_count.fetch_sub(1) == 1) { 65 | delete ptr; 66 | this->retire(); 67 | } 68 | } 69 | 70 | std::atomic ref_count; 71 | T* ptr; 72 | }; 73 | 74 | } 75 | 76 | namespace basic { 77 | 78 | 79 | template 80 | class shared_ptr { 81 | 82 | template 83 | friend class atomic_shared_ptr; 84 | 85 | // Private constructor used by atomic_shared_ptr::load 86 | explicit shared_ptr(details::basic_control_block* control_block_) : control_block(control_block_) {} 87 | 88 | public: 89 | 90 | using element_type = T; 91 | 92 | // Decrement the reference count on destruction. 93 | ~shared_ptr() noexcept { 94 | decrement(); 95 | } 96 | 97 | constexpr shared_ptr() noexcept = default; 98 | 99 | constexpr explicit(false) shared_ptr(std::nullptr_t) noexcept {} // NOLINT(google-explicit-constructor) 100 | 101 | explicit shared_ptr(T* p) { 102 | std::unique_ptr up(p); // Hold inside a unique_ptr so that p is deleted if the allocation throws 103 | control_block = new details::basic_control_block(p); 104 | up.release(); 105 | } 106 | 107 | shared_ptr(const shared_ptr& other) noexcept : control_block(other.control_block) { 108 | increment(); 109 | } 110 | 111 | shared_ptr(shared_ptr&& other) noexcept : control_block(std::exchange(other.control_block, nullptr)) { } 112 | 113 | shared_ptr& operator=(const shared_ptr& other) noexcept { 114 | shared_ptr(other).swap(*this); 115 | return *this; 116 | } 117 | 118 | shared_ptr& operator=(shared_ptr&& other) noexcept { 119 | shared_ptr(std::move(other)).swap(*this); 120 | return *this; 121 | } 122 | 123 | void swap(shared_ptr& other) noexcept { 124 | std::swap(control_block, other.control_block); 125 | } 126 | 127 | void reset() noexcept { 128 | shared_ptr().swap(*this); 129 | } 130 | 131 | void reset(std::nullptr_t) noexcept { 132 | shared_ptr().swap(*this); 133 | } 134 | 135 | void reset(T* p) { 136 | shared_ptr(p).swap(*this); 137 | } 138 | 139 | T* get() noexcept { 140 | return control_block ? control_block->ptr : nullptr; 141 | } 142 | 143 | const T* get() const noexcept { 144 | return control_block ? control_block->ptr : nullptr; 145 | } 146 | 147 | [[nodiscard]] T& operator*() noexcept requires (!std::is_void_v) { 148 | return *(this->get()); 149 | } 150 | 151 | [[nodiscard]] const T& operator*() const noexcept requires (!std::is_void_v) { 152 | return *(this->get()); 153 | } 154 | 155 | [[nodiscard]] T* operator->() const noexcept { 156 | return this->get(); 157 | } 158 | 159 | explicit operator bool() const noexcept { 160 | return this->get() != nullptr; 161 | } 162 | 163 | [[nodiscard]] long use_count() const noexcept { 164 | return control_block ? control_block->ref_count.load() : 0; 165 | } 166 | 167 | private: 168 | 169 | void increment() noexcept { 170 | if (control_block) { 171 | control_block->increment_count(); 172 | } 173 | } 174 | 175 | void decrement() noexcept { 176 | if (control_block) { 177 | control_block->decrement_count(); 178 | } 179 | } 180 | 181 | details::basic_control_block* control_block{nullptr}; 182 | }; 183 | 184 | template 185 | auto operator<=>(const shared_ptr& left, const shared_ptr& right) noexcept { 186 | return left.get() <=> right.get(); 187 | } 188 | 189 | template 190 | auto operator<=>(const shared_ptr& left, std::nullptr_t) noexcept { 191 | return left.get() <=> static_cast::element_type*>(nullptr); 192 | } 193 | 194 | template 195 | auto operator<=>(std::nullptr_t, const shared_ptr& right) noexcept { 196 | return static_cast::element_type*>(nullptr) <=> right.get(); 197 | } 198 | 199 | template 200 | auto operator==(const shared_ptr& left, const shared_ptr& right) noexcept { 201 | return left.get() == right.get(); 202 | } 203 | 204 | template 205 | auto operator==(const shared_ptr& left, std::nullptr_t) noexcept { 206 | return left.get() == static_cast::element_type*>(nullptr); 207 | } 208 | 209 | template 210 | auto operator==(std::nullptr_t, const shared_ptr& right) noexcept { 211 | return static_cast::element_type*>(nullptr) == right.get(); 212 | } 213 | 214 | template 215 | class atomic_shared_ptr { 216 | using shared_ptr_type = shared_ptr; 217 | using control_block_type = details::basic_control_block; 218 | 219 | public: 220 | 221 | constexpr atomic_shared_ptr() noexcept = default; 222 | constexpr explicit(false) atomic_shared_ptr(std::nullptr_t) noexcept // NOLINT(google-explicit-constructor) 223 | : control_block{nullptr} { } 224 | 225 | atomic_shared_ptr(shared_ptr_type desired) { // NOLINT(google-explicit-constructor) 226 | control_block.store(std::exchange(desired.control_block, nullptr)); 227 | } 228 | 229 | atomic_shared_ptr(const atomic_shared_ptr&) = delete; 230 | atomic_shared_ptr& operator=(const atomic_shared_ptr&) = delete; 231 | 232 | ~atomic_shared_ptr() { store(nullptr); } 233 | 234 | bool is_lock_free() const noexcept { 235 | return control_block.is_lock_free(); 236 | } 237 | 238 | constexpr static bool is_always_lock_free = std::atomic::is_always_lock_free; 239 | 240 | [[nodiscard]] shared_ptr_type load() const { 241 | 242 | folly::hazptr_holder hp = folly::make_hazard_pointer(); 243 | control_block_type* current_control_block = nullptr; 244 | 245 | do { 246 | current_control_block = hp.protect(control_block); 247 | } while (current_control_block != nullptr && !current_control_block->increment_if_nonzero()); 248 | 249 | return shared_ptr(current_control_block); 250 | } 251 | 252 | void store(shared_ptr_type desired) { 253 | auto new_control_block = std::exchange(desired.control_block, nullptr); 254 | auto old_control_block = control_block.exchange(new_control_block); 255 | if (old_control_block) { 256 | old_control_block->decrement_count(); 257 | } 258 | } 259 | 260 | shared_ptr_type exchange(shared_ptr_type desired) noexcept { 261 | auto new_control_block = std::exchange(desired.control_block, nullptr); 262 | auto old_control_block = control_block.exchange(new_control_block); 263 | return shared_ptr_type(old_control_block); 264 | } 265 | 266 | bool compare_exchange_weak(shared_ptr_type& expected, shared_ptr_type desired) { 267 | auto expected_ctrl_block = expected.control_block; 268 | auto desired_ctrl_block = desired.control_block; 269 | 270 | if (control_block.compare_exchange_weak(expected_ctrl_block, desired_ctrl_block)) { 271 | if (expected_ctrl_block) { 272 | expected_ctrl_block->decrement_count(); 273 | } 274 | desired.control_block = nullptr; 275 | return true; 276 | } 277 | else { 278 | expected = load(); // It's possible that expected ABAs and stays the same on failure, hence 279 | return false; // why this algorithm can not be used to implement compare_exchange_strong 280 | } 281 | } 282 | 283 | bool compare_exchange_strong(shared_ptr_type& expected, shared_ptr_type desired) { 284 | auto expected_ctrl_block = expected.control_block; 285 | 286 | // If expected changes then we have completed the operation (unsuccessfully), we only 287 | // have to loop in case expected ABAs or the weak operation fails spuriously. 288 | do { 289 | if (compare_exchange_weak(expected, desired)) { 290 | return true; 291 | } 292 | } while (expected_ctrl_block == expected.control_block); 293 | 294 | return false; 295 | } 296 | 297 | private: 298 | mutable std::atomic control_block; 299 | }; 300 | 301 | } // namespace basic 302 | } // namespace parlay 303 | -------------------------------------------------------------------------------- /include/parlay/details/atomic_details.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace parlay { 6 | namespace details { 7 | 8 | inline std::memory_order default_failure_memory_order(std::memory_order successMode) { 9 | switch (successMode) { 10 | case std::memory_order_acq_rel: 11 | return std::memory_order_acquire; 12 | case std::memory_order_release: 13 | return std::memory_order_relaxed; 14 | case std::memory_order_relaxed: 15 | case std::memory_order_consume: 16 | case std::memory_order_acquire: 17 | case std::memory_order_seq_cst: 18 | return successMode; 19 | } 20 | return successMode; 21 | } 22 | 23 | } // namespace details 24 | } // namespace parlay 25 | -------------------------------------------------------------------------------- /include/parlay/details/hazard_pointers.hpp: -------------------------------------------------------------------------------- 1 | 2 | #pragma once 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | namespace parlay { 20 | 21 | #ifdef __cpp_lib_hardware_interference_size 22 | 23 | #pragma GCC diagnostic push 24 | #pragma GCC diagnostic ignored "-Winterference-size" 25 | 26 | inline constexpr std::size_t CACHE_LINE_ALIGNMENT = 2 * std::hardware_destructive_interference_size; 27 | 28 | #pragma GCC diagnostic pop 29 | 30 | #else 31 | inline constexpr std::size_t CACHE_LINE_ALIGNMENT = 128; 32 | #endif 33 | 34 | enum class ReclamationMethod { 35 | amortized_reclamation, // Reclamation happens in bulk in the retiring thread 36 | deamortized_reclamation // Reclamation happens spread out over the retiring thread 37 | }; 38 | 39 | template 40 | concept GarbageCollectible = requires(T* t, T* tp) { 41 | { t->get_next() } -> std::convertible_to; // The object should expose an intrusive next ptr 42 | { t->set_next(tp) }; 43 | { t->destroy() }; // The object should be destructible on demand 44 | }; 45 | 46 | template requires GarbageCollectible 47 | class HazardPointers; 48 | 49 | template 50 | extern inline HazardPointers& get_hazard_list(); 51 | 52 | // A simple and efficient implementation of Hazard Pointer deferred reclamation 53 | // 54 | // Each live thread owns *exactly one* Hazard Pointer, which is sufficient for most 55 | // (but not all) algorithms that use them. In particular, it is sufficient for lock- 56 | // free atomic shared ptrs. This makes it much simpler and slightly more efficient 57 | // than a general-purpose Hazard Pointer implementation, like the one in Folly, which 58 | // supports each thread having an arbitrary number of Hazard Pointers. 59 | // 60 | // Each thread keeps a local retired list of objects that are pending deletion. 61 | // This means that a stalled thread can delay the destruction of its retired objects 62 | // indefinitely, however, since each thread is only allowed to protect a single object 63 | // at a time, it is guaranteed that there are at most O(P^2) total unreclaimed objects 64 | // at any given point, so the memory usage is theoretically bounded. 65 | // 66 | template requires GarbageCollectible 67 | class HazardPointers { 68 | 69 | // After this many retires, a thread will attempt to clean up the contents of 70 | // its local retired list, deleting any retired objects that are not protected. 71 | constexpr static std::size_t cleanup_threshold = 2000; 72 | 73 | using garbage_type = GarbageType; 74 | using protected_set_type = folly::F14FastSet; 75 | 76 | // The retired list is an intrusive linked list of retired blocks. It takes advantage 77 | // of the available managed object pointer in the control block to store the next pointers. 78 | // (since, after retirement, it is guaranteed that the object has been freed, and thus 79 | // the managed object pointer is no longer used. Furthermore, it does not have to be 80 | // kept as null since threads never read the pointer unless they own a reference count.) 81 | // 82 | struct RetiredList { 83 | 84 | constexpr RetiredList() noexcept = default; 85 | 86 | explicit RetiredList(garbage_type* head_) : head(head_) { } 87 | 88 | RetiredList& operator=(garbage_type* head_) { 89 | assert(head == nullptr); 90 | head = head_; 91 | } 92 | 93 | RetiredList(const RetiredList&) = delete; 94 | 95 | RetiredList(RetiredList&& other) noexcept : head(std::exchange(other.head, nullptr)) { } 96 | 97 | ~RetiredList() { 98 | cleanup([](auto&&) { return false; }); 99 | } 100 | 101 | void push(garbage_type* p) noexcept { 102 | p->set_next(std::exchange(head, p)); 103 | if (p->get_next() == nullptr) [[unlikely]] tail = p; 104 | } 105 | 106 | void append(RetiredList&& other) { 107 | if (head == nullptr) { 108 | head = std::exchange(other.head, nullptr); 109 | tail = std::exchange(other.tail, nullptr); 110 | } 111 | else if (other.head != nullptr) { 112 | assert(tail != nullptr); 113 | tail->set_next(std::exchange(other.head, nullptr)); 114 | tail = std::exchange(other.tail, nullptr); 115 | } 116 | } 117 | 118 | void swap(RetiredList& other) { 119 | std::swap(head, other.head); 120 | std::swap(tail, other.tail); 121 | } 122 | 123 | // For each element x currently in the retired list, if is_protected(x) == false, 124 | // then x->destroy() and remove x from the retired list. Otherwise, keep x on 125 | // the retired list for the next cleanup. 126 | template 127 | void cleanup(F&& is_protected) { 128 | 129 | while (head && !is_protected(head)) { 130 | garbage_type* old = std::exchange(head, head->get_next()); 131 | old->destroy(); 132 | } 133 | 134 | if (head) { 135 | garbage_type* prev = head; 136 | garbage_type* current = head->get_next(); 137 | while (current) { 138 | if (!is_protected(current)) { 139 | garbage_type* old = std::exchange(current, current->get_next()); 140 | old->destroy(); 141 | prev->set_next(current); 142 | } else { 143 | prev = std::exchange(current, current->get_next()); 144 | } 145 | } 146 | tail = prev; 147 | } 148 | else { 149 | tail = nullptr; 150 | } 151 | } 152 | 153 | // Cleanup *at most* n retired objects. For up to n elements x currently in the retired list, 154 | // if is_protected(x) == false, then x->destroy() and remove x from the retired list. Otherwise, 155 | // move x onto the "into" list. 156 | template 157 | void eject_and_move(std::size_t n, RetiredList& into, F&& is_protected) { 158 | for (; head && n > 0; --n) { 159 | garbage_type* current = std::exchange(head, head->get_next()); 160 | if (is_protected(current)) { 161 | into.push(current); 162 | } 163 | else { 164 | current->destroy(); 165 | } 166 | } 167 | if (head == nullptr) { 168 | tail = nullptr; 169 | } 170 | } 171 | 172 | garbage_type* head{nullptr}; 173 | garbage_type* tail{nullptr}; 174 | }; 175 | 176 | struct DeamortizedReclaimer; 177 | 178 | // Each thread owns a hazard entry slot which contains a single hazard pointer 179 | // (called protected_pointer) and the thread's local retired list. 180 | // 181 | // The slots are linked together to form a linked list so that threads can scan 182 | // for the set of currently protected pointers. 183 | // 184 | struct alignas(CACHE_LINE_ALIGNMENT) HazardSlot { 185 | explicit HazardSlot(bool in_use_) : in_use(in_use_) {} 186 | 187 | // The *actual* "Hazard Pointer" that protects the object that it points to. 188 | // Other threads scan for the set of all such pointers before they clean up. 189 | std::atomic protected_ptr{nullptr}; 190 | 191 | // Link together all existing slots into a big global linked list 192 | std::atomic next{nullptr}; 193 | 194 | // (Intrusive) linked list of retired objects. Does not allocate memory since it 195 | // just uses the next pointer from inside the retired block. 196 | RetiredList retired_list{}; 197 | 198 | // Count the number of retires since the last cleanup. When this value exceeds 199 | // cleanup_threshold, we will perform cleanup. 200 | unsigned num_retires_since_cleanup{0}; 201 | 202 | // True if this hazard pointer slow is owned by a thread. 203 | std::atomic in_use; 204 | 205 | // Set of protected objects used by cleanup(). Re-used between cleanups so that 206 | // we don't have to allocate new memory unless the table gets full, which would 207 | // only happen if the user spawns substantially more threads than were active 208 | // during the previous call to cleanup(). Therefore cleanup is always lock free 209 | // unless the number of threads has doubled since last time. 210 | protected_set_type protected_set{2 * std::thread::hardware_concurrency()}; 211 | 212 | std::unique_ptr deamortized_reclaimer{nullptr}; 213 | }; 214 | 215 | // Find an available hazard slot, or allocate a new one if none available. 216 | HazardSlot *get_slot() { 217 | auto current = list_head; 218 | while (true) { 219 | if (!current->in_use.load() && !current->in_use.exchange(true)) { 220 | return current; 221 | } 222 | if (current->next.load() == nullptr) { 223 | auto my_slot = new HazardSlot{true}; 224 | if (mode == ReclamationMethod::deamortized_reclamation) { 225 | my_slot->deamortized_reclaimer = std::make_unique(*my_slot, list_head); 226 | } 227 | HazardSlot* next = nullptr; 228 | while (!current->next.compare_exchange_weak(next, my_slot)) { 229 | current = next; 230 | next = nullptr; 231 | } 232 | return my_slot; 233 | } else { 234 | current = current->next.load(); 235 | } 236 | } 237 | } 238 | 239 | // Give a slot back to the world so another thread can re-use it 240 | void relinquish_slot(HazardSlot* slot) { 241 | slot->in_use.store(false); 242 | } 243 | 244 | // A HazardSlotOwner owns exactly one HazardSlot entry in the global linked list 245 | // of HazardSlots. On creation, it acquires a free slot from the list, or appends 246 | // a new slot if all of them are in use. On destruction, it makes the slot available 247 | // for another thread to pick up. 248 | struct HazardSlotOwner { 249 | explicit HazardSlotOwner(HazardPointers& list_) : list(list_), my_slot(list.get_slot()) {} 250 | 251 | ~HazardSlotOwner() { 252 | list.relinquish_slot(my_slot); 253 | } 254 | 255 | private: 256 | HazardPointers& list; 257 | public: 258 | HazardSlot* const my_slot; 259 | }; 260 | 261 | public: 262 | 263 | // Pre-populate the slot list with P slots, one for each hardware thread 264 | HazardPointers() : list_head(new HazardSlot{false}) { 265 | auto current = list_head; 266 | for (unsigned i = 1; i < std::thread::hardware_concurrency(); i++) { 267 | current->next = new HazardSlot{false}; 268 | current = current->next; 269 | } 270 | } 271 | 272 | ~HazardPointers() { 273 | auto current = list_head; 274 | while (current) { 275 | auto old = std::exchange(current, current->next.load()); 276 | delete old; 277 | } 278 | } 279 | 280 | // Protect the object pointed to by the pointer currently stored at src. 281 | // 282 | // The second argument allows the protected pointer to be deduced from 283 | // the value stored at src, for example, if src stores a pair containing 284 | // the pointer to protect and some other value. In this case, the value of 285 | // f(ptr) is protected instead, but the full value *ptr is still returned. 286 | template typename Atomic, typename U, typename F> 287 | U protect(const Atomic &src, F &&f) { 288 | static_assert(std::is_convertible_v, garbage_type*>); 289 | auto &slot = local_slot.my_slot->protected_ptr; 290 | 291 | U result = src.load(std::memory_order_acquire); 292 | 293 | while (true) { 294 | auto ptr_to_protect = f(result); 295 | if (ptr_to_protect == nullptr) { 296 | return result; 297 | } 298 | PARLAY_PREFETCH(ptr_to_protect, 0, 0); 299 | slot.store(ptr_to_protect, protection_order); 300 | folly::asymmetric_thread_fence_light(std::memory_order_seq_cst); /* Fast-side fence */ 301 | 302 | U current_value = src.load(std::memory_order_acquire); 303 | if (current_value == result) [[likely]] { 304 | return result; 305 | } else { 306 | result = std::move(current_value); 307 | } 308 | } 309 | } 310 | 311 | // Protect the object pointed to by the pointer currently stored at src. 312 | template typename Atomic, typename U> 313 | U protect(const Atomic &src) { 314 | return protect(src, [](auto &&x) { return std::forward(x); }); 315 | } 316 | 317 | // Unprotect the currently protected object 318 | void release() { 319 | local_slot.my_slot->protected_ptr.store(nullptr, std::memory_order_release); 320 | } 321 | 322 | // Retire the given object 323 | // 324 | // The object managed by p must have reference count zero. 325 | void retire(garbage_type* p) noexcept { 326 | HazardSlot& my_slot = *local_slot.my_slot; 327 | my_slot.retired_list.push(p); 328 | 329 | if (mode == ReclamationMethod::deamortized_reclamation) { 330 | assert(my_slot.deamortized_reclaimer != nullptr); 331 | my_slot.deamortized_reclaimer->do_reclamation_work(); 332 | } 333 | else if (++my_slot.num_retires_since_cleanup >= cleanup_threshold) [[unlikely]] { 334 | cleanup(my_slot); 335 | } 336 | } 337 | 338 | void enable_deamortized_reclamation() { 339 | assert(mode == ReclamationMethod::amortized_reclamation); 340 | for_each_slot([&](HazardSlot& slot) { 341 | slot.deamortized_reclaimer = std::make_unique(slot, list_head); 342 | }); 343 | mode = ReclamationMethod::deamortized_reclamation; 344 | protection_order = std::memory_order_seq_cst; 345 | } 346 | 347 | private: 348 | 349 | struct DeamortizedReclaimer { 350 | 351 | explicit DeamortizedReclaimer(HazardSlot& slot_, HazardSlot* const head_) : my_slot(slot_), head_slot(head_) { } 352 | 353 | void do_reclamation_work() { 354 | num_retires++; 355 | 356 | if (current_slot == nullptr) { 357 | if (num_retires < 2 * num_hazard_ptrs) { 358 | // Need to batch 2P retires before scanning hazard pointers to ensure 359 | // that we eject at least P blocks to make it worth the work. 360 | return; 361 | } 362 | // There are at least 2*num_hazard_pointers objects awaiting reclamation 363 | num_retires = 0; 364 | num_hazard_ptrs = std::exchange(next_num_hazard_ptrs, 0); 365 | current_slot = head_slot; 366 | protected_set.swap(next_protected_set); 367 | next_protected_set.clear(); // The only not-O(1) operation, but its fast 368 | 369 | eligible.append(std::move(next_eligible)); 370 | next_eligible.swap(my_slot.retired_list); 371 | } 372 | 373 | // Eject up to two elements from the eligible set. It has to be two because we waited until 374 | // we had 2 * num_hazard_ptrs eligible objects, so we want that to be processed by the time 375 | // we get through the hazard-pointer list again. 376 | eligible.eject_and_move(2, my_slot.retired_list, [&](auto p) { return protected_set.count(p) > 0; }); 377 | 378 | next_num_hazard_ptrs++; 379 | next_protected_set.insert(current_slot->protected_ptr.load()); 380 | current_slot = current_slot->next; 381 | } 382 | 383 | HazardSlot& my_slot; 384 | HazardSlot* const head_slot; 385 | HazardSlot* current_slot{nullptr}; 386 | 387 | protected_set_type protected_set{2*std::thread::hardware_concurrency()}; 388 | protected_set_type next_protected_set{2*std::thread::hardware_concurrency()}; 389 | 390 | RetiredList eligible{}; 391 | RetiredList next_eligible{}; 392 | 393 | // A local estimate of the number of active hazard pointers 394 | unsigned int num_hazard_ptrs{std::thread::hardware_concurrency()}; 395 | unsigned int next_num_hazard_ptrs{std::thread::hardware_concurrency()}; 396 | 397 | unsigned int num_retires{0}; 398 | }; 399 | 400 | template 401 | void for_each_slot(F&& f) noexcept(std::is_nothrow_invocable_v) { 402 | auto current = list_head; 403 | while (current) { 404 | f(*current); 405 | current = current->next.load(); 406 | } 407 | } 408 | 409 | // Apply the function f to all currently announced hazard pointers 410 | template 411 | void scan_hazard_pointers(F&& f) noexcept(std::is_nothrow_invocable_v) { 412 | for_each_slot([&, f = std::forward(f)](HazardSlot& slot) { 413 | auto p = slot.protected_ptr.load(); 414 | if (p) { 415 | f(p); 416 | } 417 | }); 418 | } 419 | 420 | PARLAY_NOINLINE void cleanup(HazardSlot& slot) { 421 | slot.num_retires_since_cleanup = 0; 422 | folly::asymmetric_thread_fence_heavy(std::memory_order_seq_cst); 423 | scan_hazard_pointers([&](auto p) { slot.protected_set.insert(p); }); 424 | slot.retired_list.cleanup([&](auto p) { return slot.protected_set.count(p) > 0; }); 425 | slot.protected_set.clear(); // Does not free memory, only clears contents 426 | } 427 | 428 | ReclamationMethod mode{ReclamationMethod::amortized_reclamation}; 429 | std::memory_order protection_order{std::memory_order_relaxed}; 430 | HazardSlot* const list_head; 431 | 432 | static inline const thread_local HazardSlotOwner local_slot{get_hazard_list()}; 433 | }; 434 | 435 | 436 | // Global singleton containing the list of hazard pointers. We store it in raw 437 | // storage so that it is never destructed. 438 | // 439 | // (a detached thread might grab a HazardSlot entry and not relinquish it until 440 | // static destruction, at which point this global static would have already been 441 | // destroyed. We avoid that using this pattern.) 442 | // 443 | // This does technically mean that we leak the HazardSlots, but that is 444 | // a price we are willing to pay. 445 | template 446 | HazardPointers& get_hazard_list() { 447 | alignas(HazardPointers) static char buffer[sizeof(HazardPointers)]; 448 | static auto* list = new (&buffer) HazardPointers{}; 449 | return *list; 450 | } 451 | 452 | } // namespace parlay 453 | -------------------------------------------------------------------------------- /include/parlay/details/wait_free_counter.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | // A wait-free atomic counter that supports increment and decrement, 7 | // such that attempting to increment the counter from zero fails and 8 | // does not perform the increment. 9 | // 10 | // Useful for incrementing reference counting, where the underlying 11 | // managed memory is freed when the counter hits zero, so that other 12 | // racing threads can not increment the counter back up from zero 13 | // 14 | // Note: The counter steals the top two bits of the integer for book- 15 | // keeping purposes. Hence the maximum representable value in the 16 | // counter is 2^(8*sizeof(T)-2) - 1 17 | template 18 | struct WaitFreeCounter { 19 | static_assert(std::is_integral_v && std::is_unsigned_v); 20 | 21 | WaitFreeCounter() noexcept : x(1) {} 22 | explicit WaitFreeCounter(T desired) noexcept : x(desired) {} 23 | 24 | [[nodiscard]] bool is_lock_free() const { return x.is_lock_free(); } 25 | static constexpr bool is_always_lock_free = std::atomic::is_always_lock_free; 26 | [[nodiscard]] constexpr T max_value() const { return zero_pending_flag - 1; } 27 | 28 | WaitFreeCounter& operator=(const WaitFreeCounter&) = delete; 29 | 30 | explicit operator T() const noexcept { return load(); } 31 | 32 | T load(std::memory_order order = std::memory_order_seq_cst) const noexcept { 33 | auto val = x.load(order); 34 | if (val == 0 && x.compare_exchange_strong(val, zero_flag | zero_pending_flag)) [[unlikely]] return 0; 35 | return (val & zero_flag) ? 0 : val; 36 | } 37 | 38 | // Increment the counter by arg. Returns false on failure, i.e., if the counter 39 | // was previously zero. Otherwise returns true. 40 | T increment(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept { 41 | auto val = x.fetch_add(arg, order); 42 | return (val & zero_flag) == 0; 43 | } 44 | 45 | // Decrement the counter by arg. Returns true if this operation was responsible 46 | // for decrementing the counter to zero. Otherwise, returns false. 47 | bool decrement(T arg, std::memory_order order = std::memory_order_seq_cst) noexcept { 48 | if (x.fetch_sub(arg, order) == arg) { 49 | T expected = 0; 50 | if (x.compare_exchange_strong(expected, zero_flag)) [[likely]] 51 | return true; 52 | else if ((expected & zero_pending_flag) && (x.exchange(zero_flag) & zero_pending_flag)) 53 | return true; 54 | } 55 | return false; 56 | } 57 | 58 | private: 59 | static constexpr inline T zero_flag = T(1) << (sizeof(T)*8) - 1; 60 | static constexpr inline T zero_pending_flag = T(1) << (sizeof(T)*8) - 2; 61 | mutable std::atomic x; 62 | }; 63 | -------------------------------------------------------------------------------- /include/parlay/fast_shared_ptr.hpp: -------------------------------------------------------------------------------- 1 | // A faster shared_ptr that omits most of the bells and whistles in order 2 | // to make the control block smaller and remove all type erasure. 3 | // 4 | // In particular, the following are absent: 5 | // - No make_shared, 6 | // - No custom deleters/allocators, 7 | // - No weak_ptr, 8 | // - No alias pointers, 9 | // - No enable_shared_from_this 10 | // 11 | // The benefit is that the control block is only 16 bytes at minimum 12 | // because it has no weak ref count and no vtable pointer. 13 | // 14 | // See shared_ptr.hpp for a feature-complete implementation! 15 | // 16 | #pragma once 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include "details/hazard_pointers.hpp" 23 | #include "details/wait_free_counter.hpp" 24 | 25 | #include 26 | 27 | namespace parlay { 28 | 29 | template 30 | class atomic_shared_ptr; 31 | 32 | template 33 | class shared_ptr; 34 | 35 | namespace details { 36 | 37 | 38 | using ref_cnt_type = uint32_t; 39 | 40 | 41 | // Minimal, optimized control block. No alias support, or custom deleter, or custom allocator. 42 | template 43 | struct fast_control_block { 44 | 45 | static void* operator new(std::size_t sz) { 46 | assert(sz == 1); 47 | return parlay::type_allocator::alloc(); 48 | } 49 | 50 | static void operator delete(void* ptr) { 51 | parlay::type_allocator::free(static_cast(ptr)); 52 | } 53 | 54 | struct inline_tag {}; 55 | 56 | template 57 | friend class atomic_shared_ptr; 58 | 59 | fast_control_block(T* ptr_) : strong_count(1), ptr(ptr_), inline_alloc(false) { } // NOLINT 60 | 61 | template 62 | fast_control_block(inline_tag, Args&&... args) // NOLINT 63 | : strong_count(1), object(std::forward(args)...), inline_alloc(true) { } 64 | 65 | 66 | fast_control_block(const fast_control_block &) = delete; 67 | fast_control_block& operator=(const fast_control_block&) = delete; 68 | 69 | ~fast_control_block() { } 70 | 71 | // Destroy the managed object. Called when the strong count hits zero 72 | void dispose() noexcept { 73 | if (inline_alloc) { 74 | object.~T(); 75 | } 76 | else { 77 | delete ptr; 78 | } 79 | } 80 | 81 | // Destroy the control block. dispose() must have been called prior to 82 | // calling destroy. Called when the weak count hits zero. 83 | void destroy() noexcept { 84 | delete this; 85 | } 86 | 87 | // Increment the strong reference count. The strong reference count must not be zero 88 | void increment_strong_count() noexcept { 89 | assert(strong_count.load(std::memory_order_relaxed) > 0); 90 | [[maybe_unused]] auto success = strong_count.increment(1, std::memory_order_relaxed); 91 | assert(success); 92 | } 93 | 94 | // Increment the strong reference count if it is not zero. Return true if successful, 95 | // otherwise return false indicating that the strong reference count is zero. 96 | bool increment_strong_count_if_nonzero() noexcept { 97 | return strong_count.increment(1, std::memory_order_relaxed); 98 | } 99 | 100 | void decrement_strong_count() noexcept { 101 | if (strong_count.decrement(1, std::memory_order_release)) { 102 | std::atomic_thread_fence(std::memory_order_acquire); 103 | dispose(); 104 | get_hazard_list().retire(this); 105 | } 106 | } 107 | 108 | fast_control_block* get_next() const noexcept { return next_; } 109 | void set_next(fast_control_block* next) noexcept { next_ = next; } 110 | 111 | T* get_ptr() const noexcept { 112 | if (inline_alloc) return const_cast(std::addressof(object)); 113 | else return const_cast(ptr); 114 | } 115 | 116 | auto get_use_count() const noexcept { return strong_count.load(std::memory_order_relaxed); } 117 | 118 | private: 119 | 120 | WaitFreeCounter strong_count; 121 | const bool inline_alloc; 122 | 123 | union { 124 | std::monostate empty; 125 | fast_control_block* next_; // Intrusive ptr used for garbage collection by Hazard pointers 126 | T* ptr; // Pointer to the managed object while it is alive 127 | T object; 128 | }; 129 | 130 | }; 131 | 132 | static_assert(sizeof(fast_control_block) == 16); 133 | static_assert(sizeof(fast_control_block) == 16); 134 | 135 | // Base class for shared_ptr and weak_ptr 136 | template 137 | class smart_ptr_base { 138 | 139 | template 140 | friend class atomic_shared_ptr; 141 | 142 | public: 143 | using element_type = T; 144 | 145 | [[nodiscard]] long use_count() const noexcept { 146 | return control_block ? control_block->get_use_count() : 0; 147 | } 148 | 149 | // Comparator for sorting shared pointers. Ordering is based on the address of the control blocks. 150 | template 151 | [[nodiscard]] bool owner_before(const smart_ptr_base& other) const noexcept { 152 | return control_block < other.control_block; 153 | } 154 | 155 | smart_ptr_base& operator=(const smart_ptr_base&) = delete; 156 | 157 | [[nodiscard]] element_type* get() const noexcept { 158 | return control_block ? control_block->get_ptr() : nullptr; 159 | } 160 | 161 | protected: 162 | 163 | constexpr smart_ptr_base() noexcept = default; 164 | 165 | smart_ptr_base(fast_control_block* control_block_) noexcept 166 | : control_block(control_block_) { 167 | 168 | } 169 | 170 | 171 | explicit smart_ptr_base(const smart_ptr_base& other) noexcept 172 | : control_block(other.control_block) { 173 | 174 | } 175 | 176 | 177 | explicit smart_ptr_base(smart_ptr_base&& other) noexcept 178 | : control_block(std::exchange(other.control_block, nullptr)) { 179 | 180 | } 181 | 182 | ~smart_ptr_base() = default; 183 | 184 | void swap_ptrs(smart_ptr_base& other) noexcept { 185 | //std::swap(ptr, other.ptr); 186 | std::swap(control_block, other.control_block); 187 | } 188 | 189 | void increment_strong() const noexcept { 190 | if (control_block) { 191 | control_block->increment_strong_count(); 192 | } 193 | } 194 | 195 | [[nodiscard]] bool increment_if_nonzero() const noexcept { 196 | return control_block && control_block->increment_strong_count_if_nonzero(); 197 | } 198 | 199 | void decrement_strong() noexcept { 200 | if (control_block) { 201 | control_block->decrement_strong_count(); 202 | } 203 | } 204 | 205 | //element_type* ptr{nullptr}; 206 | fast_control_block* control_block{nullptr}; 207 | }; 208 | 209 | } // namespace details 210 | 211 | template 212 | class shared_ptr : public details::smart_ptr_base { 213 | 214 | using base = details::smart_ptr_base; 215 | 216 | template 217 | friend class atomic_shared_ptr; 218 | 219 | template 220 | friend class shared_ptr; 221 | 222 | // Private constructor used by atomic_shared_ptr::load and weak_ptr::lock 223 | shared_ptr([[maybe_unused]] T* ptr_, details::fast_control_block* control_block_) : base(control_block_) { 224 | assert(ptr_ == control_block_->get_ptr() && "This shared_ptr does not support alias pointers."); 225 | } 226 | 227 | public: 228 | using typename base::element_type; 229 | 230 | // Decrement the reference count on destruction. Resource cleanup is all 231 | // handled internally by the control block (including deleting itself!) 232 | ~shared_ptr() noexcept { 233 | this->decrement_strong(); 234 | } 235 | 236 | // ========================================================================================== 237 | // INITIALIZING AND NULL CONSTRUCTORS 238 | // ========================================================================================== 239 | 240 | constexpr shared_ptr() noexcept = default; 241 | 242 | constexpr explicit(false) shared_ptr(std::nullptr_t) noexcept {} // NOLINT(google-explicit-constructor) 243 | 244 | explicit shared_ptr(T* p) { 245 | std::unique_ptr up(p); // Hold inside a unique_ptr so that p is deleted if the allocation throws 246 | auto control_block = new details::fast_control_block(p); 247 | this->set_ptrs_and_esft(up.release(), control_block); 248 | } 249 | 250 | 251 | // ========================================================================================== 252 | // COPY CONSTRUCTORS 253 | // ========================================================================================== 254 | 255 | shared_ptr(const shared_ptr& other) noexcept : base(other) { 256 | this->increment_strong(); 257 | } 258 | 259 | // ========================================================================================== 260 | // MOVE CONSTRUCTORS 261 | // ========================================================================================== 262 | 263 | shared_ptr(shared_ptr&& other) noexcept : base(std::exchange(other.control_block, nullptr)) { } 264 | 265 | // ========================================================================================== 266 | // ASSIGNMENT OPERATORS 267 | // ========================================================================================== 268 | 269 | shared_ptr& operator=(const shared_ptr& other) noexcept { 270 | shared_ptr(other).swap(*this); 271 | return *this; 272 | } 273 | 274 | shared_ptr& operator=(shared_ptr&& other) noexcept { 275 | shared_ptr(std::move(other)).swap(*this); 276 | return *this; 277 | } 278 | 279 | // ========================================================================================== 280 | // SWAP, RESET 281 | // ========================================================================================== 282 | 283 | void swap(shared_ptr& other) noexcept { 284 | this->swap_ptrs(other); 285 | } 286 | 287 | void reset() noexcept { 288 | shared_ptr().swap(*this); 289 | } 290 | 291 | void reset(std::nullptr_t) noexcept { 292 | shared_ptr().swap(*this); 293 | } 294 | 295 | void reset(T* p) { 296 | shared_ptr(p).swap(*this); 297 | } 298 | 299 | // ========================================================================================== 300 | // ACCESS, DEREFERENCE 301 | // ========================================================================================== 302 | 303 | [[nodiscard]] T& operator*() const noexcept requires (!std::is_void_v) { 304 | return *(this->get()); 305 | } 306 | 307 | [[nodiscard]] T* operator->() const noexcept { 308 | return this->get(); 309 | } 310 | 311 | explicit operator bool() const noexcept { 312 | return this->get() != nullptr; 313 | } 314 | 315 | // ========================================================================================== 316 | // FACTORIES 317 | // ========================================================================================== 318 | 319 | template 320 | // requires std::constructible_from 321 | friend shared_ptr make_shared(Args&&... args); 322 | 323 | private: 324 | 325 | void set_ptrs_and_esft(T* ptr_, details::fast_control_block* control_block_) { 326 | //this->ptr = ptr_; 327 | this->control_block = control_block_; 328 | } 329 | 330 | // Release the ptr and control_block to the caller. Does not modify the reference count, 331 | // so the caller is responsible for taking over the reference count owned by this copy 332 | std::pair*> release_internals() noexcept { 333 | auto p = this->control_block ? this->control_block->get_ptr() : nullptr; 334 | return std::make_pair(p, std::exchange(this->control_block, nullptr)); 335 | } 336 | 337 | }; 338 | 339 | // ========================================================================================== 340 | // IMPLEMENTATIONS OF PREDECLARED FRIEND FUNCTIONS 341 | // ========================================================================================== 342 | 343 | template 344 | Deleter* get_deleter(const shared_ptr& sp) noexcept { 345 | if (sp.control_block) { 346 | return static_cast(sp.control_block.get_deleter(typeid(Deleter))); 347 | } 348 | return nullptr; 349 | } 350 | 351 | template 352 | [[nodiscard]] shared_ptr make_shared(Args&&... args) { 353 | const auto control_block = new details::fast_control_block(typename details::fast_control_block::inline_tag{}, std::forward(args)...); 354 | assert(control_block != nullptr); 355 | assert(control_block->get_ptr() != nullptr); 356 | shared_ptr result(control_block->get_ptr(), control_block); 357 | return result; 358 | } 359 | 360 | 361 | // ========================================================================================== 362 | // COMPARISON 363 | // ========================================================================================== 364 | 365 | template 366 | auto operator<=>(const shared_ptr& left, const shared_ptr& right) noexcept { 367 | return left.get() <=> right.get(); 368 | } 369 | 370 | template 371 | auto operator<=>(const shared_ptr& left, std::nullptr_t) noexcept { 372 | return left.get() <=> static_cast::element_type*>(nullptr); 373 | } 374 | 375 | template 376 | auto operator<=>(std::nullptr_t, const shared_ptr& right) noexcept { 377 | return static_cast::element_type*>(nullptr) <=> right.get(); 378 | } 379 | 380 | template 381 | auto operator==(const shared_ptr& left, const shared_ptr& right) noexcept { 382 | return left.get() == right.get(); 383 | } 384 | 385 | template 386 | auto operator==(const shared_ptr& left, std::nullptr_t) noexcept { 387 | return left.get() == static_cast::element_type*>(nullptr); 388 | } 389 | 390 | template 391 | auto operator==(std::nullptr_t, const shared_ptr& right) noexcept { 392 | return static_cast::element_type*>(nullptr) == right.get(); 393 | } 394 | 395 | 396 | } // namespace parlay 397 | -------------------------------------------------------------------------------- /include/parlay/shared_ptr.hpp: -------------------------------------------------------------------------------- 1 | // A custom modernized C++20 shared_ptr and weak_ptr implementation used by the atomic_shared_ptr. 2 | // 3 | // It tries to closely match the standard library std::shared_ptr as much as possible. Most of the 4 | // code roughly follows the same implementation strategies as libstdc++, libc++, and Microsoft STL. 5 | // 6 | // The main difference is using Hazard Pointer deferred reclaimation on the control block to 7 | // allow atomic_shared_ptr to be lock free and not require a split reference count. 8 | // 9 | // No support for std::shared_ptr, i.e., shared pointers of arrays. Everything else should 10 | // be supported, including custom deleters, allocators, weak_ptrs, enable_shared_from_this, etc. 11 | // 12 | 13 | #pragma once 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #include "details/hazard_pointers.hpp" 20 | #include "details/wait_free_counter.hpp" 21 | 22 | #include 23 | 24 | namespace parlay { 25 | 26 | template 27 | class atomic_shared_ptr; 28 | 29 | template 30 | class shared_ptr; 31 | 32 | template 33 | class weak_ptr; 34 | 35 | template 36 | class enable_shared_from_this; 37 | 38 | template 39 | Deleter* get_deleter(const shared_ptr&) noexcept; 40 | 41 | namespace details { 42 | 43 | // Very useful explanation from Raymond Chen's blog: 44 | // https://devblogs.microsoft.com/oldnewthing/20230816-00/?p=108608 45 | template 46 | concept SupportsESFT = requires() { 47 | typename T::esft_detector; // Class should derive from ESFT 48 | requires std::same_as>; 49 | requires std::convertible_to*>; // Inheritance is unambiguous 50 | }; 51 | 52 | using ref_cnt_type = uint32_t; 53 | 54 | 55 | // Base class of all control blocks used by smart pointers. This base class is agnostic 56 | // to the type of the managed object, so all type-specific operations are implemented 57 | // by virtual functions in the derived classes. 58 | struct control_block_base { 59 | 60 | template 61 | friend class atomic_shared_ptr; 62 | 63 | explicit control_block_base() noexcept : strong_count(1), weak_count(1) { } 64 | 65 | control_block_base(const control_block_base &) = delete; 66 | control_block_base& operator=(const control_block_base&) = delete; 67 | 68 | virtual ~control_block_base() = default; 69 | 70 | // Destroy the managed object. Called when the strong count hits zero 71 | virtual void dispose() noexcept = 0; 72 | 73 | // Destroy the control block. dispose() must have been called prior to 74 | // calling destroy. Called when the weak count hits zero. 75 | virtual void destroy() noexcept = 0; 76 | 77 | // Delay the destroy using hazard pointers in case there are in in-flight increments. 78 | void retire() noexcept { 79 | // Defer destruction of the control block using hazard pointers 80 | get_hazard_list().retire(this); 81 | } 82 | 83 | // Return the custom deleter for this object if the deleter has the type, 84 | // indicated by the argument, otherwise return nullptr 85 | virtual void* get_deleter(std::type_info&) const noexcept { return nullptr; } 86 | 87 | // Increment the strong reference count. The strong reference count must not be zero 88 | void increment_strong_count() noexcept { 89 | assert(strong_count.load(std::memory_order_relaxed) > 0); 90 | [[maybe_unused]] auto success = strong_count.increment(1, std::memory_order_relaxed); 91 | assert(success); 92 | } 93 | 94 | // Increment the strong reference count if it is not zero. Return true if successful, 95 | // otherwise return false indicating that the strong reference count is zero. 96 | bool increment_strong_count_if_nonzero() noexcept { 97 | return strong_count.increment(1, std::memory_order_relaxed); 98 | } 99 | 100 | // Release a strong reference to the object. If the strong reference count hits zero, 101 | // the object is disposed and the weak reference count is decremented. If the weak 102 | // reference count also reaches zero, the object is immediately destroyed. 103 | void decrement_strong_count() noexcept { 104 | 105 | // A decrement-release + an acquire fence is recommended by Boost's documentation: 106 | // https://www.boost.org/doc/libs/1_57_0/doc/html/atomic/usage_examples.html 107 | // Alternatively, an acquire-release decrement would work, but might be less efficient 108 | // since the acquire is only relevant if the decrement zeros the counter. 109 | if (strong_count.decrement(1, std::memory_order_release)) { 110 | std::atomic_thread_fence(std::memory_order_acquire); 111 | 112 | // The strong reference count has hit zero, so the managed object can be disposed of. 113 | dispose(); 114 | decrement_weak_count(); 115 | } 116 | } 117 | 118 | // Increment the weak reference count. 119 | void increment_weak_count() noexcept { 120 | weak_count.fetch_add(1, std::memory_order_relaxed); 121 | } 122 | 123 | // Release weak references to the object. If this causes the weak reference count 124 | // to hit zero, the control block is ready to be destroyed. 125 | void decrement_weak_count() noexcept { 126 | if (weak_count.fetch_sub(1, std::memory_order_release) == 1) { 127 | retire(); 128 | } 129 | } 130 | 131 | [[nodiscard]] virtual control_block_base* get_next() const noexcept = 0; 132 | virtual void set_next(control_block_base* next) noexcept = 0; 133 | 134 | [[nodiscard]] virtual void* get_ptr() const noexcept = 0; 135 | 136 | auto get_use_count() const noexcept { return strong_count.load(std::memory_order_relaxed); } 137 | auto get_weak_count() const noexcept { return weak_count.load(std::memory_order_relaxed); } 138 | 139 | private: 140 | WaitFreeCounter strong_count; 141 | std::atomic weak_count; 142 | }; 143 | 144 | 145 | // Diambiguate make_shared and make_shared_for_overwrite 146 | struct for_overwrite_tag {}; 147 | 148 | // Shared base class for control blocks that store the object directly inside 149 | template 150 | struct control_block_inplace_base : public control_block_base { 151 | 152 | control_block_inplace_base() : control_block_base(), empty{} { } 153 | 154 | T* get() const noexcept { return const_cast(std::addressof(object)); } 155 | 156 | void* get_ptr() const noexcept override { 157 | return static_cast(get()); 158 | } 159 | 160 | // Expose intrusive pointers used by Hazard Pointers 161 | [[nodiscard]] control_block_base* get_next() const noexcept override { return next_; } 162 | void set_next(control_block_base* next) noexcept override { next_ = next; } 163 | 164 | ~control_block_inplace_base() override { } 165 | 166 | 167 | union { 168 | std::monostate empty{}; 169 | T object; // Since the object is inside a union, we get precise control over its lifetime 170 | control_block_base* next_; // Intrusive ptr used for garbage collection by Hazard Pointers 171 | }; 172 | }; 173 | 174 | 175 | template 176 | struct control_block_inplace final : public control_block_inplace_base { 177 | 178 | // TODO: Don't hardcode an allocator override here. Should just 179 | // use allocate_shared and pass in an appropriate allocator. 180 | static void* operator new(std::size_t sz) { 181 | assert(sz == sizeof(control_block_inplace)); 182 | return parlay::type_allocator::alloc(); 183 | } 184 | 185 | static void operator delete(void* ptr) { 186 | parlay::type_allocator::free(static_cast(ptr)); 187 | } 188 | 189 | explicit control_block_inplace(for_overwrite_tag) { 190 | ::new(static_cast(this->get())) T; // Default initialization when using make_shared_for_overwrite 191 | } 192 | 193 | template 194 | requires (!(std::is_same_v || ...)) 195 | explicit control_block_inplace(Args&&... args) { 196 | ::new(static_cast(this->get())) T(std::forward(args)...); 197 | } 198 | 199 | void dispose() noexcept override { 200 | this->get()->~T(); 201 | } 202 | 203 | void destroy() noexcept override { 204 | delete this; 205 | } 206 | }; 207 | 208 | template 209 | struct control_block_inplace_allocator final : public control_block_inplace_base { 210 | 211 | using cb_allocator_t = typename std::allocator_traits::template rebind_alloc; 212 | using object_allocator_t = typename std::allocator_traits::template rebind_alloc>; 213 | 214 | control_block_inplace_allocator(Allocator, for_overwrite_tag) { 215 | ::new(static_cast(this->get())) T; // Default initialization when using make_shared_for_overwrite 216 | // Unfortunately not possible via the allocator since the C++ 217 | // standard forgot about this case, apparently. 218 | } 219 | 220 | template 221 | requires (!(std::is_same_v && ...)) 222 | explicit control_block_inplace_allocator(Allocator alloc_, Args&&... args) : alloc(alloc_) { 223 | std::allocator_traits::construct(alloc, this->get(), std::forward(args)...); 224 | } 225 | 226 | ~control_block_inplace_allocator() noexcept = default; 227 | 228 | void dispose() noexcept override { 229 | std::allocator_traits::destroy(alloc, this->get()); 230 | } 231 | 232 | void destroy() noexcept override { 233 | cb_allocator_t a{alloc}; 234 | this->~control_block_inplace_allocator(); 235 | std::allocator_traits::deallocate(a, this, 1); 236 | } 237 | 238 | [[no_unique_address]] object_allocator_t alloc; 239 | }; 240 | 241 | 242 | // A control block pointing to a dynamically allocated object without a custom allocator or custom deleter 243 | template 244 | struct control_block_with_ptr : public control_block_base { 245 | 246 | using base = control_block_base; 247 | 248 | explicit control_block_with_ptr(T* ptr_) : ptr(ptr_) { } 249 | 250 | void dispose() noexcept override { 251 | delete get(); 252 | } 253 | 254 | void destroy() noexcept override { 255 | delete this; 256 | } 257 | 258 | void* get_ptr() const noexcept override { 259 | return static_cast(get()); 260 | } 261 | 262 | T* get() const noexcept { 263 | return const_cast(ptr); 264 | } 265 | 266 | // Expose intrusive pointers used by Hazard Pointers 267 | [[nodiscard]] control_block_base* get_next() const noexcept override { return next_; } 268 | void set_next(control_block_base* next) noexcept override { next_ = next; } 269 | 270 | union { 271 | control_block_base* next_; // Intrusive ptr used for garbage collection by Hazard pointers 272 | T* ptr; // Pointer to the managed object while it is alive 273 | }; 274 | }; 275 | 276 | // A control block pointing to a dynamically allocated object with a custom deleter 277 | template 278 | struct control_block_with_deleter : public control_block_with_ptr { 279 | 280 | using base = control_block_with_ptr; 281 | 282 | control_block_with_deleter(T* ptr_, Deleter deleter_) : base(ptr_), deleter(std::move(deleter_)) { } 283 | 284 | ~control_block_with_deleter() noexcept override = default; 285 | 286 | // Get a pointer to the custom deleter if it is of the request type indicated by the argument 287 | [[nodiscard]] void* get_deleter(const std::type_info& type) const noexcept override { 288 | if (type == typeid(Deleter)) { 289 | return const_cast(std::addressof(deleter)); 290 | } 291 | else { 292 | return nullptr; 293 | } 294 | } 295 | 296 | // Dispose of the managed object using the provided custom deleter 297 | void dispose() noexcept override { 298 | deleter(this->ptr); 299 | } 300 | 301 | [[no_unique_address]] Deleter deleter; 302 | }; 303 | 304 | 305 | // A control block pointing to a dynamically allocated object with a custom deleter and custom allocator 306 | template 307 | struct control_block_with_allocator final : public control_block_with_deleter { 308 | 309 | using base = control_block_with_deleter; 310 | using allocator_t = typename std::allocator_traits::template rebind_alloc; 311 | 312 | control_block_with_allocator(T* ptr_, Deleter deleter_, const Allocator& alloc_) : 313 | base(ptr_, std::move(deleter_)), alloc(alloc_) { } 314 | 315 | ~control_block_with_allocator() noexcept override = default; 316 | 317 | // Deallocate the control block using the provided custom allocator 318 | void destroy() noexcept override { 319 | allocator_t a{alloc}; // We must copy the allocator otherwise it gets destroyed 320 | this->~control_block_with_allocator(); // on the next line, then we can't use it on the final line 321 | std::allocator_traits::deallocate(a, this, 1); 322 | } 323 | 324 | [[no_unique_address]] allocator_t alloc; 325 | }; 326 | 327 | 328 | // Base class for shared_ptr and weak_ptr 329 | template 330 | class smart_ptr_base { 331 | 332 | template 333 | friend class atomic_shared_ptr; 334 | 335 | public: 336 | using element_type = T; 337 | 338 | [[nodiscard]] long use_count() const noexcept { 339 | return control_block ? control_block->get_use_count() : 0; 340 | } 341 | 342 | // Comparator for sorting shared pointers. Ordering is based on the address of the control blocks. 343 | template 344 | [[nodiscard]] bool owner_before(const smart_ptr_base& other) const noexcept { 345 | return control_block < other.control_block; 346 | } 347 | 348 | smart_ptr_base& operator=(const smart_ptr_base&) = delete; 349 | 350 | [[nodiscard]] element_type* get() const noexcept { 351 | return ptr; 352 | } 353 | 354 | protected: 355 | 356 | constexpr smart_ptr_base() noexcept = default; 357 | 358 | smart_ptr_base(element_type* ptr_, control_block_base* control_block_) noexcept 359 | : ptr(ptr_), control_block(control_block_) { 360 | assert(control_block != nullptr || ptr == nullptr); // Can't have non-null ptr and null control_block 361 | } 362 | 363 | template 364 | requires std::convertible_to 365 | explicit smart_ptr_base(const smart_ptr_base& other) noexcept 366 | : ptr(other.ptr), control_block(other.control_block) { 367 | assert(control_block != nullptr || ptr == nullptr); // Can't have non-null ptr and null control_block 368 | } 369 | 370 | template 371 | requires std::convertible_to 372 | explicit smart_ptr_base(smart_ptr_base&& other) noexcept 373 | : ptr(std::exchange(other.ptr, nullptr)), control_block(std::exchange(other.control_block, nullptr)) { 374 | assert(control_block != nullptr || ptr == nullptr); // Can't have non-null ptr and null control_block 375 | } 376 | 377 | ~smart_ptr_base() = default; 378 | 379 | void swap_ptrs(smart_ptr_base& other) noexcept { 380 | std::swap(ptr, other.ptr); 381 | std::swap(control_block, other.control_block); 382 | } 383 | 384 | void increment_strong() const noexcept { 385 | if (control_block) { 386 | control_block->increment_strong_count(); 387 | } 388 | } 389 | 390 | [[nodiscard]] bool increment_if_nonzero() const noexcept { 391 | return control_block && control_block->increment_strong_count_if_nonzero(); 392 | } 393 | 394 | void decrement_strong() noexcept { 395 | if (control_block) { 396 | control_block->decrement_strong_count(); 397 | } 398 | } 399 | 400 | void increment_weak() const noexcept { 401 | if (control_block) { 402 | control_block->increment_weak_count(); 403 | } 404 | } 405 | 406 | void decrement_weak() noexcept { 407 | if (control_block) { 408 | control_block->decrement_weak_count(); 409 | } 410 | } 411 | 412 | template 413 | friend Deleter* ::parlay::get_deleter(const shared_ptr&) noexcept; 414 | 415 | element_type* ptr{nullptr}; 416 | control_block_base* control_block{nullptr}; 417 | }; 418 | 419 | } // namespace details 420 | 421 | template 422 | class shared_ptr : public details::smart_ptr_base { 423 | 424 | using base = details::smart_ptr_base; 425 | 426 | template 427 | friend class atomic_shared_ptr; 428 | 429 | template 430 | friend class shared_ptr; 431 | 432 | template 433 | friend class weak_ptr; 434 | 435 | // Private constructor used by atomic_shared_ptr::load and weak_ptr::lock 436 | shared_ptr(T* ptr_, details::control_block_base* control_block_) : base(ptr_, control_block_) { } 437 | 438 | public: 439 | using typename base::element_type; 440 | using weak_type = weak_ptr; 441 | 442 | // Decrement the reference count on destruction. Resource cleanup is all 443 | // handled internally by the control block (including deleting itself!) 444 | ~shared_ptr() noexcept { 445 | this->decrement_strong(); 446 | } 447 | 448 | // ========================================================================================== 449 | // INITIALIZING AND NULL CONSTRUCTORS 450 | // ========================================================================================== 451 | 452 | constexpr shared_ptr() noexcept = default; 453 | 454 | constexpr explicit(false) shared_ptr(std::nullptr_t) noexcept {} // NOLINT(google-explicit-constructor) 455 | 456 | template 457 | requires std::convertible_to 458 | explicit shared_ptr(U* p) { 459 | std::unique_ptr up(p); // Hold inside a unique_ptr so that p is deleted if the allocation throws 460 | auto control_block = new details::control_block_with_ptr(p); 461 | this->set_ptrs_and_esft(up.release(), control_block); 462 | } 463 | 464 | template 465 | requires std::convertible_to && std::copy_constructible && std::invocable 466 | shared_ptr(U* p, Deleter deleter) { 467 | std::unique_ptr up(p, deleter); 468 | auto control_block = new details::control_block_with_deleter(p, std::move(deleter)); 469 | this->set_ptrs_and_esft(up.release(), control_block); 470 | } 471 | 472 | template 473 | requires std::convertible_to && std::copy_constructible && std::invocable 474 | shared_ptr(U* p, Deleter deleter, Allocator alloc) { 475 | using cb_alloc_t = typename std::allocator_traits::template rebind_alloc>; 476 | 477 | std::unique_ptr up(p, deleter); 478 | cb_alloc_t a{alloc}; 479 | auto control_block = std::allocator_traits::allocate(a, 1); 480 | std::allocator_traits::construct(a, control_block, p, std::move(deleter), a); 481 | this->set_ptrs_and_esft(up.release(), control_block); 482 | } 483 | 484 | template 485 | requires std::convertible_to && std::copy_constructible && std::invocable 486 | shared_ptr(std::nullptr_t, Deleter deleter) { 487 | std::unique_ptr up(nullptr, deleter); 488 | auto control_block = new details::control_block_with_deleter(nullptr, std::move(deleter)); 489 | this->set_ptrs_and_esft(nullptr, control_block); 490 | } 491 | 492 | template 493 | requires std::convertible_to && std::copy_constructible && std::invocable 494 | shared_ptr(std::nullptr_t, Deleter deleter, Allocator alloc) { 495 | using cb_alloc_t = typename std::allocator_traits::template rebind_alloc>; 496 | 497 | std::unique_ptr up(nullptr, deleter); 498 | cb_alloc_t a{alloc}; 499 | auto control_block = std::allocator_traits::allocate(a, 1); 500 | std::allocator_traits::construct(a, control_block, nullptr, std::move(deleter), a); 501 | this->set_ptrs_and_esft(up.release(), control_block); 502 | } 503 | 504 | // ========================================================================================== 505 | // ALIASING CONSTRUCTORS 506 | // ========================================================================================== 507 | 508 | template 509 | shared_ptr(const shared_ptr& other, element_type* p) noexcept : base(p, other.control_block) { 510 | this->increment_strong(); 511 | } 512 | 513 | template 514 | shared_ptr(shared_ptr&& other, element_type* p) noexcept : base(p, other.control_block) { 515 | other.ptr = nullptr; 516 | other.control_block = nullptr; 517 | } 518 | 519 | // ========================================================================================== 520 | // COPY CONSTRUCTORS 521 | // ========================================================================================== 522 | 523 | shared_ptr(const shared_ptr& other) noexcept : base(other) { 524 | this->increment_strong(); 525 | } 526 | 527 | template 528 | requires std::convertible_to 529 | explicit(false) shared_ptr(const shared_ptr& other) noexcept { // NOLINT(google-explicit-constructor) 530 | other.increment_strong(); 531 | this->set_ptrs_and_esft(other.ptr, other.control_block); 532 | } 533 | 534 | // ========================================================================================== 535 | // MOVE CONSTRUCTORS 536 | // ========================================================================================== 537 | 538 | shared_ptr(shared_ptr&& other) noexcept { 539 | this->set_ptrs_and_esft(other.ptr, other.control_block); 540 | other.ptr = nullptr; 541 | other.control_block = nullptr; 542 | } 543 | 544 | template 545 | requires std::convertible_to 546 | explicit(false) shared_ptr(shared_ptr&& other) noexcept { // NOLINT(google-explicit-constructor) 547 | this->set_ptrs_and_esft(other.ptr, other.control_block); 548 | other.ptr = nullptr; 549 | other.control_block = nullptr; 550 | } 551 | 552 | // ========================================================================================== 553 | // CONVERTING CONSTRUCTORS 554 | // ========================================================================================== 555 | 556 | template 557 | requires std::convertible_to 558 | explicit(false) shared_ptr(const weak_ptr& other) { // NOLINT(google-explicit-constructor) 559 | if (other.increment_if_nonzero()) { 560 | this->set_ptrs_and_esft(other.ptr, other.control_block); 561 | } 562 | else { 563 | throw std::bad_weak_ptr(); 564 | } 565 | } 566 | 567 | template 568 | requires std::convertible_to && std::convertible_to::pointer, T*> 569 | explicit(false) shared_ptr(std::unique_ptr&& other) { // NOLINT(google-explicit-constructor) 570 | using ptr_type = typename std::unique_ptr::pointer; 571 | 572 | if (other) { 573 | // [https://en.cppreference.com/w/cpp/memory/shared_ptr/shared_ptr] 574 | // If Deleter is a reference type, it is equivalent to shared_ptr(r.release(), std::ref(r.get_deleter()). 575 | // Otherwise, it is equivalent to shared_ptr(r.release(), std::move(r.get_deleter())) 576 | if constexpr (std::is_reference_v) { 577 | auto control_block = new details::control_block_with_deleter 578 | (other.get(), std::ref(other.get_deleter())); 579 | this->set_ptrs_and_esft(other.release(), control_block); 580 | } 581 | else { 582 | auto control_block = new details::control_block_with_deleter 583 | (other.get(), std::move(other.get_deleter())); 584 | this->set_ptrs_and_esft(other.release(), control_block); 585 | } 586 | } 587 | } 588 | 589 | // ========================================================================================== 590 | // ASSIGNMENT OPERATORS 591 | // ========================================================================================== 592 | 593 | shared_ptr& operator=(const shared_ptr& other) noexcept { 594 | shared_ptr(other).swap(*this); 595 | return *this; 596 | } 597 | 598 | template 599 | requires std::convertible_to 600 | shared_ptr& operator=(const shared_ptr& other) noexcept { 601 | shared_ptr(other).swap(*this); 602 | return *this; 603 | } 604 | 605 | shared_ptr& operator=(shared_ptr&& other) noexcept { 606 | shared_ptr(std::move(other)).swap(*this); 607 | return *this; 608 | } 609 | 610 | template 611 | requires std::convertible_to 612 | shared_ptr& operator=(shared_ptr&& other) noexcept { 613 | shared_ptr(std::move(other)).swap(*this); 614 | return *this; 615 | } 616 | 617 | template 618 | requires std::convertible_to && std::convertible_to::pointer, T*> 619 | shared_ptr& operator=(std::unique_ptr&& other) { 620 | shared_ptr(std::move(other)).swap(*this); 621 | return *this; 622 | } 623 | 624 | // ========================================================================================== 625 | // SWAP, RESET 626 | // ========================================================================================== 627 | 628 | void swap(shared_ptr& other) noexcept { 629 | this->swap_ptrs(other); 630 | } 631 | 632 | void reset() noexcept { 633 | shared_ptr().swap(*this); 634 | } 635 | 636 | void reset(std::nullptr_t) noexcept { 637 | shared_ptr().swap(*this); 638 | } 639 | 640 | template 641 | requires std::copy_constructible && std::invocable 642 | void reset(std::nullptr_t, Deleter deleter) { 643 | shared_ptr(nullptr, deleter).swap(*this); 644 | } 645 | 646 | template 647 | requires std::copy_constructible && std::invocable 648 | void reset(std::nullptr_t, Deleter deleter, Allocator alloc) { 649 | shared_ptr(nullptr, deleter, alloc).swap(*this); 650 | } 651 | 652 | template 653 | requires std::convertible_to 654 | void reset(U* p) { 655 | shared_ptr(p).swap(*this); 656 | } 657 | 658 | template 659 | requires std::convertible_to && std::copy_constructible && std::invocable 660 | void reset(U* p, Deleter deleter) { 661 | shared_ptr(p, deleter).swap(*this); 662 | } 663 | 664 | template 665 | requires std::convertible_to && std::copy_constructible && std::invocable 666 | void reset(U* p, Deleter deleter, Allocator alloc) { 667 | shared_ptr(p, deleter, alloc).swap(*this); 668 | } 669 | 670 | // ========================================================================================== 671 | // ACCESS, DEREFERENCE 672 | // ========================================================================================== 673 | 674 | [[nodiscard]] T& operator*() const noexcept requires (!std::is_void_v) { 675 | return *(this->get()); 676 | } 677 | 678 | [[nodiscard]] T* operator->() const noexcept { 679 | return this->get(); 680 | } 681 | 682 | explicit operator bool() const noexcept { 683 | return this->get() != nullptr; 684 | } 685 | 686 | // ========================================================================================== 687 | // FACTORIES 688 | // ========================================================================================== 689 | 690 | template 691 | // requires std::constructible_from 692 | friend shared_ptr make_shared(Args&&... args); 693 | 694 | template 695 | requires std::constructible_from 696 | friend shared_ptr make_shared_for_overwrite(); 697 | 698 | template 699 | requires std::constructible_from 700 | friend shared_ptr allocate_shared(const Allocator& allocator, Args&&... args); 701 | 702 | template 703 | requires std::constructible_from 704 | friend shared_ptr allocate_shared_for_overwrite(const Allocator& allocator); 705 | 706 | private: 707 | 708 | template 709 | void set_ptrs_and_esft(U* ptr_, details::control_block_base* control_block_) { 710 | static_assert(std::convertible_to); 711 | 712 | this->ptr = ptr_; 713 | this->control_block = control_block_; 714 | 715 | if constexpr(details::SupportsESFT) { 716 | if (this->ptr && this->ptr->weak_this.expired()) { 717 | this->ptr->weak_this = shared_ptr>(*this, const_cast*>(this->ptr)); 718 | } 719 | } 720 | } 721 | 722 | // Release the ptr and control_block to the caller. Does not modify the reference count, 723 | // so the caller is responsible for taking over the reference count owned by this copy 724 | std::pair release_internals() noexcept { 725 | return std::make_pair(std::exchange(this->ptr, nullptr), std::exchange(this->control_block, nullptr)); 726 | } 727 | 728 | }; 729 | 730 | // ========================================================================================== 731 | // IMPLEMENTATIONS OF PREDECLARED FRIEND FUNCTIONS 732 | // ========================================================================================== 733 | 734 | template 735 | Deleter* get_deleter(const shared_ptr& sp) noexcept { 736 | if (sp.control_block) { 737 | return static_cast(sp.control_block.get_deleter(typeid(Deleter))); 738 | } 739 | return nullptr; 740 | } 741 | 742 | template 743 | [[nodiscard]] shared_ptr make_shared(Args&&... args) { 744 | const auto control_block = new details::control_block_inplace(std::forward(args)...); 745 | shared_ptr result(control_block->get(), control_block); 746 | return result; 747 | } 748 | 749 | template 750 | [[nodiscard]] shared_ptr make_shared_for_overwrite() { 751 | const auto control_block = new details::control_block_inplace(details::for_overwrite_tag{}); 752 | shared_ptr result; 753 | result.set_ptrs_and_esft(control_block.get(), control_block); 754 | return result; 755 | } 756 | 757 | template 758 | [[nodiscard]] shared_ptr allocate_shared(const Allocator& allocator, Args&&... args) { 759 | using control_block_type = details::control_block_inplace_allocator, Allocator>; 760 | using allocator_type = typename std::allocator_traits::template rebind_alloc; 761 | 762 | allocator_type a{allocator}; 763 | const auto control_block = std::allocator_traits::allocate(a, 1); 764 | std::allocator_traits::construct(a, control_block, a, std::forward(args)...); 765 | shared_ptr result; 766 | result.set_ptrs_and_esft(control_block.get(), control_block); 767 | return result; 768 | } 769 | 770 | template 771 | [[nodiscard]] shared_ptr allocate_shared_for_overwrite(const Allocator& allocator) { 772 | using control_block_type = details::control_block_inplace_allocator, Allocator>; 773 | using allocator_type = typename std::allocator_traits::template rebind_alloc; 774 | 775 | allocator_type a{allocator}; 776 | const auto control_block = std::allocator_traits::allocate(a, 1); 777 | std::allocator_traits::construct(a, control_block, a, details::for_overwrite_tag{}); 778 | shared_ptr result; 779 | result.set_ptrs_and_esft(control_block.get(), control_block); 780 | return result; 781 | } 782 | 783 | // ========================================================================================== 784 | // COMPARISON 785 | // ========================================================================================== 786 | 787 | template 788 | auto operator<=>(const shared_ptr& left, const shared_ptr& right) noexcept { 789 | return left.get() <=> right.get(); 790 | } 791 | 792 | template 793 | auto operator<=>(const shared_ptr& left, std::nullptr_t) noexcept { 794 | return left.get() <=> static_cast::element_type*>(nullptr); 795 | } 796 | 797 | template 798 | auto operator<=>(std::nullptr_t, const shared_ptr& right) noexcept { 799 | return static_cast::element_type*>(nullptr) <=> right.get(); 800 | } 801 | 802 | template 803 | auto operator==(const shared_ptr& left, const shared_ptr& right) noexcept { 804 | return left.get() == right.get(); 805 | } 806 | 807 | template 808 | auto operator==(const shared_ptr& left, std::nullptr_t) noexcept { 809 | return left.get() == static_cast::element_type*>(nullptr); 810 | } 811 | 812 | template 813 | auto operator==(std::nullptr_t, const shared_ptr& right) noexcept { 814 | return static_cast::element_type*>(nullptr) == right.get(); 815 | } 816 | 817 | template 818 | class weak_ptr : public details::smart_ptr_base { 819 | 820 | using base = details::smart_ptr_base; 821 | 822 | public: 823 | 824 | // ========================================================================================== 825 | // CONSTRUCTORS 826 | // ========================================================================================== 827 | 828 | constexpr weak_ptr() noexcept = default; 829 | 830 | weak_ptr(const weak_ptr& other) noexcept : base(other) { } 831 | 832 | template 833 | requires std::convertible_to 834 | explicit(false) weak_ptr(const shared_ptr& other) noexcept // NOLINT(google-explicit-constructor) 835 | : base(other) { 836 | this->increment_weak(); 837 | } 838 | 839 | template 840 | requires std::convertible_to && std::convertible_to 841 | explicit(false) weak_ptr(const weak_ptr& other) noexcept // NOLINT(google-explicit-constructor) 842 | : base(other) { 843 | this->increment_weak(); 844 | } 845 | 846 | template 847 | requires std::convertible_to 848 | explicit(false) weak_ptr(const weak_ptr& other) noexcept // NOLINT(google-explicit-constructor) 849 | : base{} { 850 | 851 | // This case is subtle. If T2 virtually inherits T, then it might require RTTI to 852 | // convert from T2* to T*. If other.ptr is expired, the vtable may have been 853 | // destroyed, which is very bad. Furthermore, other.ptr could expire concurrently 854 | // at any point by another thread, so we can not just check. So, we increment the 855 | // strong ref count to prevent other from being destroyed while we copy. 856 | if (other.control_block) { 857 | this->control_block = other.control_block; 858 | this->control_block->increment_weak_count(); 859 | 860 | if (this->increment_if_nonzero()) { 861 | this->ptr = other.ptr; // Now that we own a strong ref, it is safe to copy the ptr 862 | this->control_block->decrement_strong_count(); 863 | } 864 | } 865 | } 866 | 867 | weak_ptr(weak_ptr&& other) noexcept : base(std::move(other)) { } 868 | 869 | template 870 | requires std::convertible_to && std::convertible_to 871 | explicit(false) weak_ptr(weak_ptr&& other) noexcept // NOLINT(google-explicit-constructor) 872 | : base(std::move(other)) { } 873 | 874 | template 875 | requires std::convertible_to 876 | explicit(false) weak_ptr(weak_ptr&& other) noexcept : base{} { // NOLINT(google-explicit-constructor) 877 | this->control_block = std::exchange(other.control_block, nullptr); 878 | 879 | // See comment in copy constructor. Same subtlety applies. 880 | if (this->increment_if_nonzero()) { 881 | this->ptr = other.ptr; 882 | this->control_block->decrement_strong_count(); 883 | } 884 | 885 | other.ptr = nullptr; 886 | } 887 | 888 | ~weak_ptr() { 889 | this->decrement_weak(); 890 | } 891 | 892 | // ========================================================================================== 893 | // ASSIGNMENT OPERATORS 894 | // ========================================================================================== 895 | 896 | weak_ptr& operator=(const weak_ptr& other) noexcept { 897 | weak_ptr(other).swap(*this); 898 | return *this; 899 | } 900 | 901 | template 902 | requires std::convertible_to 903 | weak_ptr& operator=(const weak_ptr& other) noexcept { 904 | weak_ptr(other).swap(*this); 905 | return *this; 906 | } 907 | 908 | weak_ptr& operator=(weak_ptr&& other) noexcept { 909 | weak_ptr(std::move(other)).swap(*this); 910 | return *this; 911 | } 912 | 913 | template 914 | requires std::convertible_to 915 | weak_ptr& operator=(weak_ptr&& other) noexcept { 916 | weak_ptr(std::move(other)).swap(*this); 917 | return *this; 918 | } 919 | 920 | template 921 | requires std::convertible_to 922 | weak_ptr& operator=(const shared_ptr& other) noexcept { 923 | weak_ptr(other).swap(*this); 924 | return *this; 925 | } 926 | 927 | void swap(weak_ptr& other) noexcept { 928 | this->swap_ptrs(other); 929 | } 930 | 931 | [[nodiscard]] bool expired() const noexcept { 932 | return this->use_count() == 0; 933 | } 934 | 935 | [[nodiscard]] shared_ptr lock() const noexcept { 936 | if (this->increment_if_nonzero()) { 937 | return shared_ptr{this->ptr, this->control_block}; 938 | } 939 | return {nullptr}; 940 | } 941 | 942 | }; 943 | 944 | 945 | // ========================================================================================== 946 | // shared_from_this 947 | // ========================================================================================== 948 | 949 | template 950 | class enable_shared_from_this { 951 | protected: 952 | constexpr enable_shared_from_this() noexcept : weak_this{} {} 953 | 954 | enable_shared_from_this(enable_shared_from_this const&) noexcept : weak_this{} {} 955 | 956 | enable_shared_from_this& operator=(enable_shared_from_this const&) noexcept { return *this; } 957 | 958 | ~enable_shared_from_this() = default; 959 | 960 | public: 961 | using esft_detector = enable_shared_from_this; 962 | 963 | [[nodiscard]] weak_ptr weak_from_this() { 964 | return weak_this; 965 | } 966 | 967 | [[nodiscard]] weak_ptr weak_from_this() const { 968 | return weak_this; 969 | } 970 | 971 | [[nodiscard]] shared_ptr shared_from_this() { 972 | return shared_ptr{weak_this}; 973 | } 974 | 975 | [[nodiscard]] shared_ptr shared_from_this() const { 976 | return shared_ptr{weak_this}; 977 | } 978 | 979 | mutable weak_ptr weak_this; 980 | }; 981 | 982 | } // namespace parlay 983 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | function(add_my_test TARGET) 3 | add_executable(${TARGET} ${TARGET}.cpp) 4 | target_link_libraries(${TARGET} PRIVATE parlay_atomic_shared_ptr gtest_main) 5 | target_compile_options(${TARGET} PRIVATE -mcx16) 6 | 7 | if(NOT MSVC) 8 | target_compile_options(${TARGET} PRIVATE -Wall -Wextra -Wfatal-errors) 9 | if (CMAKE_BUILD_TYPE STREQUAL "Debug") 10 | target_compile_options(${TARGET} PRIVATE -fsanitize=address,undefined -fno-omit-frame-pointer) 11 | target_link_options(${TARGET} PRIVATE -fsanitize=address,undefined -fno-omit-frame-pointer) 12 | endif (CMAKE_BUILD_TYPE STREQUAL "Debug") 13 | endif() 14 | 15 | add_test(${TARGET} ${TARGET}) 16 | message(STATUS "Added test target " ${TARGET}) 17 | endfunction() 18 | 19 | add_my_test(test_basic_shared_ptr) 20 | add_my_test(test_basic_atomic_shared_ptr) 21 | 22 | add_my_test(test_shared_ptr) 23 | add_my_test(test_atomic_shared_ptr_custom) 24 | -------------------------------------------------------------------------------- /test/atomic_sp_tests.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | TEST(TestAtomicSharedPtr, TestConstructionEmpty) { 9 | atomic_shared_ptr p; 10 | 11 | auto s = p.load(); 12 | ASSERT_FALSE(s); 13 | ASSERT_EQ(s, nullptr); 14 | } 15 | 16 | TEST(TestAtomicSharedPtr, TestConstructionValue) { 17 | shared_ptr s{new int(5)}; 18 | atomic_shared_ptr p{std::move(s)}; 19 | 20 | auto s2 = p.load(); 21 | ASSERT_EQ(s2.use_count(), 2); 22 | ASSERT_EQ(*s2, 5); 23 | } 24 | 25 | TEST(TestAtomicSharedPtr, TestStoreCopy) { 26 | atomic_shared_ptr p; 27 | 28 | shared_ptr s{new int(5)}; 29 | ASSERT_EQ(s.use_count(), 1); 30 | p.store(s); 31 | ASSERT_EQ(s.use_count(), 2); 32 | 33 | auto s2 = p.load(); 34 | ASSERT_EQ(s2.use_count(), 3); 35 | ASSERT_EQ(*s2, 5); 36 | } 37 | 38 | TEST(TestAtomicSharedPtr, TestStoreMove) { 39 | atomic_shared_ptr p; 40 | 41 | shared_ptr s{new int(5)}; 42 | auto s2 = s; 43 | ASSERT_EQ(s.use_count(), 2); 44 | 45 | p.store(std::move(s2)); 46 | ASSERT_FALSE(s2); 47 | ASSERT_EQ(s2, nullptr); 48 | ASSERT_EQ(s.use_count(), 2); 49 | } 50 | 51 | TEST(TestAtomicSharedPtr, TestLoad) { 52 | shared_ptr s{new int(5)}; 53 | atomic_shared_ptr p{std::move(s)}; 54 | ASSERT_FALSE(s); 55 | ASSERT_EQ(s, nullptr); 56 | 57 | shared_ptr l = p.load(); 58 | ASSERT_EQ(*l, 5); 59 | ASSERT_EQ(l.use_count(), 2); 60 | } 61 | 62 | TEST(TestAtomicSharedPtr, TestExchange) { 63 | shared_ptr s{new int(5)}; 64 | atomic_shared_ptr p{std::move(s)}; 65 | ASSERT_FALSE(s); 66 | ASSERT_EQ(s, nullptr); 67 | 68 | shared_ptr s2{new int(42)}; 69 | shared_ptr s3 = p.exchange(std::move(s2)); 70 | 71 | ASSERT_EQ(*s3, 5); 72 | ASSERT_EQ(s3.use_count(), 1); 73 | 74 | shared_ptr l = p.load(); 75 | ASSERT_EQ(*l, 42); 76 | ASSERT_EQ(l.use_count(), 2); 77 | } 78 | 79 | TEST(TestAtomicSharedPtr, TestCompareExchangeWeakTrue) { 80 | shared_ptr s{new int(5)}; 81 | atomic_shared_ptr p{s}; 82 | ASSERT_TRUE(s); 83 | ASSERT_EQ(s.use_count(), 2); 84 | 85 | shared_ptr s2{new int(42)}; 86 | bool result = p.compare_exchange_weak(s, std::move(s2)); 87 | ASSERT_TRUE(result); 88 | ASSERT_FALSE(s2); 89 | ASSERT_EQ(s2, nullptr); 90 | 91 | shared_ptr l = p.load(); 92 | ASSERT_EQ(*l, 42); 93 | ASSERT_EQ(l.use_count(), 2); 94 | } 95 | 96 | TEST(TestAtomicSharedPtr, TestCompareExchangeWeakFalse) { 97 | shared_ptr s{new int(5)}; 98 | atomic_shared_ptr p{s}; 99 | ASSERT_TRUE(s); 100 | ASSERT_EQ(s.use_count(), 2); 101 | 102 | shared_ptr s2{new int(42)}; 103 | shared_ptr s3{new int(5)}; 104 | bool result = p.compare_exchange_weak(s3, std::move(s2)); 105 | ASSERT_FALSE(result); 106 | 107 | shared_ptr l = p.load(); 108 | ASSERT_EQ(*l, 5); 109 | ASSERT_EQ(l.use_count(), 4); 110 | } 111 | 112 | TEST(TestAtomicSharedPtr, TestCompareExchangeStrongTrue) { 113 | shared_ptr s{new int(5)}; 114 | atomic_shared_ptr p{s}; 115 | ASSERT_TRUE(s); 116 | ASSERT_EQ(s.use_count(), 2); 117 | 118 | shared_ptr s2{new int(42)}; 119 | bool result = p.compare_exchange_strong(s, std::move(s2)); 120 | ASSERT_TRUE(result); 121 | ASSERT_FALSE(s2); 122 | ASSERT_EQ(s2, nullptr); 123 | 124 | shared_ptr l = p.load(); 125 | ASSERT_EQ(*l, 42); 126 | ASSERT_EQ(l.use_count(), 2); 127 | } 128 | 129 | TEST(TestAtomicSharedPtr, TestCompareExchangeStrongFalse) { 130 | shared_ptr s{new int(5)}; 131 | atomic_shared_ptr p{s}; 132 | ASSERT_TRUE(s); 133 | ASSERT_EQ(s.use_count(), 2); 134 | 135 | shared_ptr s2{new int(42)}; 136 | shared_ptr s3{new int(5)}; 137 | bool result = p.compare_exchange_strong(s3, std::move(s2)); 138 | ASSERT_FALSE(result); 139 | 140 | shared_ptr l = p.load(); 141 | ASSERT_EQ(*l, 5); 142 | ASSERT_EQ(l.use_count(), 4); 143 | } 144 | 145 | TEST(TestAtomicSharedPtr, TestConcurrentStoreLoads) { 146 | 147 | constexpr std::size_t N = 64; // Number of threads 148 | constexpr int M = 10000; // Number of operations 149 | 150 | atomic_shared_ptr s; 151 | std::latch go{N}; 152 | 153 | std::vector consumers; 154 | consumers.reserve(N/2); 155 | std::vector consumer_sums(N/2); 156 | for (size_t i = 0; i < N/2; i++) { 157 | consumers.emplace_back([i, &s, &consumer_sums, &go] { 158 | go.arrive_and_wait(); 159 | long long int local_sum = 0; 160 | for(int j = 0; j < M; j++) { 161 | auto p = s.load(); 162 | if (p) { 163 | local_sum += *p; 164 | } 165 | } 166 | consumer_sums[i] = local_sum; 167 | }); 168 | } 169 | 170 | std::vector producers; 171 | producers.reserve(N/2); 172 | for (size_t i = 0; i < N/2; i++) { 173 | producers.emplace_back([&s, &go] { 174 | go.arrive_and_wait(); 175 | for(int j = 0; j < M; j++) { 176 | s.store(shared_ptr(new int(j))); 177 | } 178 | }); 179 | } 180 | } 181 | 182 | TEST(TestAtomicSharedPtr, TestConcurrentExchange) { 183 | 184 | constexpr std::size_t N = 64; // Number of threads 185 | constexpr int M = 10000; // Number of operations 186 | 187 | atomic_shared_ptr s(shared_ptr(new int(0))); 188 | std::latch go{N}; 189 | 190 | std::vector local_sums_produced(N); 191 | std::vector local_sums_consumed(N); 192 | 193 | { 194 | std::vector threads; 195 | threads.reserve(N); 196 | 197 | for (size_t i = 0; i < N; i++) { 198 | threads.emplace_back([i, &s, &go, &local_sums_produced, &local_sums_consumed] { 199 | go.arrive_and_wait(); 200 | long long int local_sum_produced = 0, local_sum_consumed = 0; 201 | for(int j = 0; j < M; j++) { 202 | shared_ptr new_sp(new int(std::rand())); 203 | local_sum_produced += *new_sp; 204 | shared_ptr old_sp = s.exchange(std::move(new_sp)); 205 | ASSERT_TRUE(old_sp); 206 | local_sum_consumed += *old_sp; 207 | } 208 | local_sums_produced[i] = local_sum_produced; 209 | local_sums_consumed[i] = local_sum_consumed; 210 | }); 211 | } 212 | } // wait for threads to join 213 | 214 | long long int total_produced = std::accumulate(local_sums_produced.begin(), local_sums_produced.end(), 0LL); 215 | long long int total_consumed = std::accumulate(local_sums_consumed.begin(), local_sums_consumed.end(), 0LL) + *(s.load()); 216 | 217 | ASSERT_EQ(total_produced, total_consumed); 218 | } 219 | -------------------------------------------------------------------------------- /test/test_atomic_shared_ptr_custom.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | #include "gtest/gtest.h" 10 | 11 | #include 12 | 13 | using parlay::atomic_shared_ptr; 14 | using parlay::shared_ptr; 15 | 16 | #include "atomic_sp_tests.hpp" 17 | -------------------------------------------------------------------------------- /test/test_basic_atomic_shared_ptr.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "gtest/gtest.h" 9 | 10 | #include 11 | 12 | using parlay::basic::atomic_shared_ptr; 13 | using parlay::basic::shared_ptr; 14 | 15 | #include "atomic_sp_tests.hpp" 16 | -------------------------------------------------------------------------------- /test/test_basic_shared_ptr.cpp: -------------------------------------------------------------------------------- 1 | // Tests for the basic shared_ptr. 2 | // 3 | 4 | #include "gtest/gtest.h" 5 | 6 | #include 7 | 8 | 9 | TEST(TestSharedPtr, TestDefaultConstruction) { 10 | parlay::basic::shared_ptr s; 11 | } 12 | 13 | 14 | TEST(TestSharedPtr, TestMoveConstructor) { 15 | parlay::basic::shared_ptr src(new int(1729)); 16 | 17 | ASSERT_TRUE(src); 18 | ASSERT_EQ(*src, 1729); 19 | 20 | parlay::basic::shared_ptr dest(std::move(src)); 21 | 22 | ASSERT_FALSE(src); 23 | ASSERT_TRUE(dest); 24 | ASSERT_EQ(*dest, 1729); 25 | } 26 | 27 | TEST(TestSharedPtr, TestMoveAssign) { 28 | parlay::basic::shared_ptr src(new int(123)); 29 | parlay::basic::shared_ptr dest(new int(888)); 30 | 31 | ASSERT_TRUE(src); 32 | ASSERT_EQ(*src, 123); 33 | 34 | ASSERT_TRUE(dest); 35 | ASSERT_EQ(*dest, 888); 36 | 37 | dest = std::move(src); 38 | 39 | ASSERT_FALSE(src); 40 | ASSERT_TRUE(dest); 41 | ASSERT_EQ(*dest, 123); 42 | } 43 | -------------------------------------------------------------------------------- /test/test_shared_ptr.cpp: -------------------------------------------------------------------------------- 1 | // Tests for the custom shared_ptr. 2 | // 3 | // Some of these were stolen from the Microsoft STL Github. 4 | 5 | #include "gtest/gtest.h" 6 | 7 | #include 8 | 9 | 10 | class Base { 11 | public: 12 | Base() {} 13 | virtual ~Base() {} 14 | 15 | virtual std::string str() const { 16 | return "Base"; 17 | } 18 | 19 | private: 20 | Base(const Base&); 21 | Base& operator=(const Base&); 22 | }; 23 | 24 | class Derived : public Base { 25 | public: 26 | virtual std::string str() const { 27 | return "Derived"; 28 | } 29 | }; 30 | 31 | 32 | 33 | 34 | TEST(TestSharedPtr, TestDefaultConstruction) { 35 | parlay::shared_ptr s; 36 | } 37 | 38 | 39 | TEST(TestSharedPtr, TestMoveConstructor) { 40 | parlay::shared_ptr src(new int(1729)); 41 | 42 | ASSERT_TRUE(src); 43 | ASSERT_EQ(*src, 1729); 44 | 45 | parlay::shared_ptr dest(std::move(src)); 46 | 47 | ASSERT_FALSE(src); 48 | ASSERT_TRUE(dest); 49 | ASSERT_EQ(*dest, 1729); 50 | } 51 | 52 | TEST(TestSharedPtr, TestMoveAssign) { 53 | parlay::shared_ptr src(new int(123)); 54 | parlay::shared_ptr dest(new int(888)); 55 | 56 | ASSERT_TRUE(src); 57 | ASSERT_EQ(*src, 123); 58 | 59 | ASSERT_TRUE(dest); 60 | ASSERT_EQ(*dest, 888); 61 | 62 | dest = std::move(src); 63 | 64 | ASSERT_FALSE(src); 65 | ASSERT_TRUE(dest); 66 | ASSERT_EQ(*dest, 123); 67 | } 68 | 69 | TEST(TestSharedPtr, TestAliasMoveConstructor) { 70 | parlay::shared_ptr src(new Derived); 71 | 72 | ASSERT_TRUE(src); 73 | ASSERT_EQ(src->str(), "Derived"); 74 | 75 | parlay::shared_ptr dest(std::move(src)); 76 | 77 | ASSERT_FALSE(src); 78 | ASSERT_TRUE(dest); 79 | ASSERT_EQ(dest->str(), "Derived"); 80 | } 81 | 82 | TEST(TestSharedPtr, TestMakeShared) { 83 | parlay::shared_ptr p = parlay::make_shared(42); 84 | } 85 | 86 | TEST(TestSharedPtr, TestMakeSharedNonTrivial) { 87 | parlay::shared_ptr s = parlay::make_shared(1000, 'b'); 88 | } --------------------------------------------------------------------------------