├── .gitignore ├── shared.hpp ├── cow_ownership_flags ├── thread_unsafe_flag.hpp ├── mutex_flag.hpp ├── seq_cst_atomics_flag.hpp └── manually_ordered_atomics_flag.hpp ├── copy_on_write_ptr.hpp ├── bench_results ├── thread_unsafe-vs-mutex.txt ├── thread_unsafe-vs-seq_cst_atomics.txt ├── thread_unsafe_cow-vs-shared_ptr.txt └── thread_unsafe-vs-manually_ordered_atomics.txt ├── README.md ├── bench_vs_shared_ptr.cpp ├── bench_unsafe_vs_other.cpp └── LICENSE.lesser /.gitignore: -------------------------------------------------------------------------------- 1 | a.out 2 | *.bin 3 | -------------------------------------------------------------------------------- /shared.hpp: -------------------------------------------------------------------------------- 1 | /* This file is part of copy_on_write_ptr. 2 | 3 | copy_on_write_ptr is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | copy_on_write_ptr is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with copy_on_write_ptr. If not, see . */ 15 | 16 | #ifndef SHARED_H 17 | #define SHARED_H 18 | 19 | #include 20 | 21 | // These shared facilities are used by all my copy-on-write benchmarking programs 22 | namespace Shared { 23 | 24 | // Some forward declarations required to use std::chrono's timing functions 25 | using Clock = std::chrono::system_clock; 26 | using Duration = std::chrono::duration; 27 | 28 | // Generic timer for performance measurement purposes 29 | template 31 | DurationType time_it(Callable && operation, 32 | const std::size_t amount) { 33 | std::chrono::time_point start_time, end_time; 34 | start_time = Clock::now(); 35 | for(size_t i = 0; i < amount; ++i) { 36 | operation(); 37 | } 38 | end_time = Clock::now(); 39 | return end_time - start_time; 40 | } 41 | 42 | // Define the data type used by the test, and a typical value of it 43 | using Data = int; 44 | const Data typical_value = 42; 45 | 46 | } 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /cow_ownership_flags/thread_unsafe_flag.hpp: -------------------------------------------------------------------------------- 1 | /* This file is part of copy_on_write_ptr. 2 | 3 | copy_on_write_ptr is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | copy_on_write_ptr is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with copy_on_write_ptr. If not, see . */ 15 | 16 | #ifndef THREAD_UNSAFE_FLAG_H 17 | #define THREAD_UNSAFE_FLAG_H 18 | 19 | #include 20 | 21 | namespace cow_ownership_flags { 22 | 23 | // This implementation of the copy-on-write ownership does not attempt to achieve thread safety 24 | class thread_unsafe_flag { 25 | public: 26 | 27 | // Construct our ownership flag from an initial value 28 | thread_unsafe_flag(bool initially_owned) : m_owned{initially_owned} { } 29 | 30 | // Move-construct the flag from a flag rvalue 31 | thread_unsafe_flag(thread_unsafe_flag && other) : m_owned{other.m_owned} { } 32 | 33 | // There's nothing special about deleting an ownership flag. 34 | ~thread_unsafe_flag() = default; 35 | 36 | // Move-assign the flag. Without thread safety, this is equivalent to move-construction. 37 | thread_unsafe_flag & operator=(thread_unsafe_flag && other) { 38 | set_ownership(other.m_owned); 39 | } 40 | 41 | // Ownership flags are not copyable. Proper CoW semantics would require clearing them upon 42 | // copy, which is at odds with normal copy semantics. It's better to throw a compiler error 43 | // in this case, and let the user write more explicit code. 44 | thread_unsafe_flag(const thread_unsafe_flag &) = delete; 45 | thread_unsafe_flag & operator=(const thread_unsafe_flag &) = delete; 46 | 47 | // Authoritatively mark the active memory block as owned/not owned by the active thread 48 | void set_ownership(bool owned) { 49 | m_owned = owned; 50 | } 51 | 52 | // Acquire ownership of the active memory block, using the provided resource acquisition 53 | // routine, if that's not done already. 54 | // Disregard the possibility that other threads may be doing the same thing. 55 | template 56 | void acquire_ownership_once(Callable && acquire) { 57 | if(!m_owned) { 58 | acquire(); 59 | m_owned = true; 60 | } 61 | } 62 | 63 | private: 64 | 65 | bool m_owned; 66 | }; 67 | 68 | } 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /cow_ownership_flags/mutex_flag.hpp: -------------------------------------------------------------------------------- 1 | /* This file is part of copy_on_write_ptr. 2 | 3 | copy_on_write_ptr is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | copy_on_write_ptr is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with copy_on_write_ptr. If not, see . */ 15 | 16 | #ifndef MUTEX_FLAG_H 17 | #define MUTEX_FLAG_H 18 | 19 | #include 20 | 21 | namespace cow_ownership_flags { 22 | 23 | // This implementation of the copy-on-write ownership flag uses a mutex to achieve thread safety. 24 | class mutex_flag { 25 | public: 26 | 27 | // Ownership flags may be initialized to a certain value without synchronization, as at 28 | // construction time only one thread has access to the active ownership flag. 29 | mutex_flag(bool initially_owned) : m_owned{initially_owned} { } 30 | 31 | // When we move-construct from an ownership flag rvalue, we may assume that no other thread 32 | // has access to either that rvalue or the active flag, and avoid using synchronization. 33 | mutex_flag(mutex_flag && other) : m_owned{other.m_owned} { } 34 | 35 | // There's nothing special about deleting an ownership flag. 36 | ~mutex_flag() = default; 37 | 38 | // When we move-assign an ownership flag rvalue, no other thread has access to that rvalue, 39 | // so we can access it without read synchronization. 40 | // But the active flag may be shared with other threads, so we need write synchronization. 41 | mutex_flag & operator=(mutex_flag && other) { 42 | set_ownership(other.m_owned); 43 | } 44 | 45 | // Ownership flags are not copyable. Proper CoW semantics would require clearing them upon 46 | // copy, which is at odds with normal copy semantics. It's better to throw a compiler error 47 | // in this case, and let the user write more explicit code. 48 | mutex_flag(const mutex_flag &) = delete; 49 | mutex_flag & operator=(const mutex_flag &) = delete; 50 | 51 | // Authoritatively mark the active memory block as owned/not owned by the active thread 52 | void set_ownership(bool owned) { 53 | std::lock_guard lock(m_ownership_mutex); 54 | m_owned = owned; 55 | } 56 | 57 | // Acquire ownership of the active memory block, using the provided resource acquisition 58 | // routine, if that's not done already. Other threads should block during this process. 59 | template 60 | void acquire_ownership_once(Callable && acquire) { 61 | std::lock_guard lock(m_ownership_mutex); 62 | if(!m_owned) { 63 | acquire(); 64 | m_owned = true; 65 | } 66 | } 67 | 68 | private: 69 | 70 | std::mutex m_ownership_mutex; 71 | bool m_owned; 72 | }; 73 | 74 | } 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /copy_on_write_ptr.hpp: -------------------------------------------------------------------------------- 1 | /* This file is part of copy_on_write_ptr. 2 | 3 | copy_on_write_ptr is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | copy_on_write_ptr is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with copy_on_write_ptr. If not, see . */ 15 | 16 | #ifndef COW_PTR_H 17 | #define COW_PTR_H 18 | 19 | #include 20 | 21 | // The cow_ptr class implements copy-on-write semantics on top of std::shared_ptr 22 | template 24 | class copy_on_write_ptr { 25 | public: 26 | // === BASIC CLASS LIFECYCLE === 27 | 28 | // Construct a cow_ptr from a raw pointer, acquire ownership. 29 | copy_on_write_ptr(T * ptr) : 30 | m_payload{ptr}, 31 | m_ownership{true} 32 | { } 33 | 34 | // TODO: As a performance optimization, allow direct construction from a shared_ptr 35 | 36 | // Move-construct from a copy_on_write_ptr, acquire ownership. 37 | copy_on_write_ptr(copy_on_write_ptr && cptr) : 38 | m_payload{cptr.m_payload}, 39 | m_ownership{true} 40 | { } 41 | 42 | // Copy-construct from a copy_on_write_ptr, DO NOT acquire ownership. 43 | copy_on_write_ptr(const copy_on_write_ptr & cptr) : 44 | m_payload{cptr.m_payload}, 45 | m_ownership{false} 46 | { } 47 | 48 | // All our data members can take care of themselves on their own. 49 | ~copy_on_write_ptr() = default; 50 | 51 | // Moving a copy_on_write_ptr transfers ownership of the underlying data : nothing special 52 | copy_on_write_ptr & operator=(copy_on_write_ptr && cptr) = default; 53 | 54 | // Copying a copy_on_write_ptr DOES NOT transfer ownership of the underlying content, so we 55 | // need to reset our ownership bit in this scenario. 56 | copy_on_write_ptr & operator=(const copy_on_write_ptr & cptr) { 57 | m_ownership.set_ownership(false); 58 | m_payload = cptr.m_payload; 59 | } 60 | 61 | 62 | // === DATA ACCESS === 63 | 64 | // Reading from copy-on-write data does not require ownership. 65 | // CAUTION: Be careful with references to non-const CoW data, as writes may invalidate them. 66 | const T & read() const { return *m_payload; } 67 | 68 | // Writing to copy-on-write data requires ownership, which must be acquired as needed. 69 | void write(const T & value) { 70 | copy_if_not_owner(); 71 | *m_payload = value; 72 | } 73 | 74 | void write(T && value) { 75 | copy_if_not_owner(); 76 | *m_payload = value; 77 | } 78 | 79 | private: 80 | std::shared_ptr m_payload; 81 | OwnershipFlag m_ownership; 82 | 83 | // If we are not the owner of the payload object, make a private copy of it 84 | void copy_if_not_owner() { 85 | m_ownership.acquire_ownership_once([this](){ 86 | m_payload = std::make_shared(*m_payload); 87 | }); 88 | } 89 | }; 90 | 91 | #endif 92 | -------------------------------------------------------------------------------- /bench_results/thread_unsafe-vs-mutex.txt: -------------------------------------------------------------------------------- 1 | === MICROBENCHMARK : THREAD-UNSAFE COW POINTER VS MUTEX-PROTECTED VERSION === 2 | 3 | hadrien@pc-grasland:~/Bureau/Programmation/TestCoW$ g++ -O0 -std=c++11 bench_unsafe_vs_other.cpp -o bench_unsafe_vs_other.bin 4 | [...] 5 | 6 | Creating 100000000 pointers from raw pointers 7 | With a thread-unsafe implementation, this operation takes 10.0881 s 8 | With the tested implementation, it takes 10.5088 s (1.0417x slower) 9 | 10 | Creating AND move-constructing 2500000000 pointers 11 | With a thread-unsafe implementation, this operation takes 381.738 s 12 | With the tested implementation, it takes 403.743 s (1.05764x slower) 13 | 14 | Copy-constructing 1000000000 pointers 15 | With a thread-unsafe implementation, this operation takes 48.0419 s 16 | With the tested implementation, it takes 54.8719 s (1.14217x slower) 17 | 18 | Copy-constructing AND move-assigning 5000000000 pointers 19 | With a thread-unsafe implementation, this operation takes 480.287 s 20 | With the tested implementation, it takes 596.449 s (1.24186x slower) 21 | 22 | Copy-assigning 64000000 pointers 23 | With a thread-unsafe implementation, this operation takes 1.06318 s 24 | With the tested implementation, it takes 2.27327 s (2.13819x slower) 25 | 26 | Reading from 5000000000 pointers 27 | With a thread-unsafe implementation, this operation takes 35.1963 s 28 | With the tested implementation, it takes 35.4005 s (1.0058x slower) 29 | 30 | Performing 1920000000 pointer copies AND cold writes 31 | With a thread-unsafe implementation, this operation takes 603.847 s 32 | With the tested implementation, it takes 696.199 s (1.15294x slower) 33 | 34 | Performing 1920000000 warm pointer writes 35 | With a thread-unsafe implementation, this operation takes 21.2827 s 36 | With the tested implementation, it takes 61.7025 s (2.89919x slower) 37 | 38 | 39 | === RESULTS ANALYSIS === 40 | 41 | For details on the methodology being used here, please refer to the comparison between thread-unsafe 42 | copy-on-write and raw shared_ptrs. 43 | 44 | Move-construction: 45 | 46 | thread-unsafe creation takes 101ns 47 | mutex-based creation takes 105ns 48 | thread-unsafe creation and move-construction takes 153ns 49 | mutex-based creation and move-construction takes 161ns 50 | 51 | therefore, 52 | 53 | thread-unsafe move-construction takes 52ns 54 | mutex-based move-construction takes 56ns 55 | 56 | hence the later is 1.1x slower 57 | 58 | Move-assignment: 59 | 60 | thread-unsafe copy-construction takes 48.0ns 61 | mutex-based copy-construction takes 54.9ns 62 | thread-unsafe copy-construction and move-assignment takes 96.1ns 63 | mutex-based copy-construction and move-assignment takes 119ns 64 | 65 | therefore, 66 | 67 | thread-unsafe move-assignment takes 48.1ns 68 | mutex-based move-assignment takes 64ns 69 | 70 | hence the later is 1.3x slower 71 | 72 | Cold writes: 73 | 74 | thread-unsafe copy-assignment takes 16.6ns 75 | mutex-based copy-assignment takes 35.5ns 76 | thread-unsafe copy-assignment and cold write takes 315ns 77 | mutex-based copy-assignment and cold write takes 363ns 78 | 79 | therefore, 80 | 81 | thread-unsafe cold write takes 298ns 82 | mutex-based cold write takes 327ns 83 | 84 | hence the later is 1.1x slower 85 | 86 | 87 | === CONCLUSIONS === 88 | 89 | In terms of elementary operations, before compiler optimization kicks in... 90 | * Creation from a raw pointer is 1.0x slower 91 | * Move-constructing is 1.1x slower 92 | * Copy-constructing is 1.1x slower 93 | * Moving is 1.3x slower 94 | * Copying is 2.1x slower 95 | * Reading is 1.0x slower 96 | * Cold-writing is 1.1x slower 97 | * Warm-writing is 2.9x slower 98 | 99 | The extra overhead upon copy assignment and warm writes is perhaps problematic, so we would like to use a cheaper 100 | synchronization primitive than a mutex there. 101 | -------------------------------------------------------------------------------- /bench_results/thread_unsafe-vs-seq_cst_atomics.txt: -------------------------------------------------------------------------------- 1 | === MICROBENCHMARK : THREAD-UNSAFE COW POINTER VS SEQUENTIALLY CONSISTENT ATOMICS === 2 | 3 | hadrien@pc-grasland:~/Bureau/Programmation/TestCoW$ g++ -O0 -std=c++11 bench_unsafe_vs_other.cpp -o bench_unsafe_vs_other.bin 4 | [...] 5 | 6 | Creating 100000000 pointers from raw pointers 7 | With a thread-unsafe implementation, this operation takes 9.82543 s 8 | With the tested implementation, it takes 10.6042 s (1.07926x slower) 9 | 10 | Creating AND move-constructing 2500000000 pointers 11 | With a thread-unsafe implementation, this operation takes 405.151 s 12 | With the tested implementation, it takes 420.437 s (1.03773x slower) 13 | 14 | Copy-constructing 1000000000 pointers 15 | With a thread-unsafe implementation, this operation takes 47.3745 s 16 | With the tested implementation, it takes 56.2335 s (1.187x slower) 17 | 18 | Copy-constructing AND move-assigning 5000000000 pointers 19 | With a thread-unsafe implementation, this operation takes 460.931 s 20 | With the tested implementation, it takes 655.856 s (1.42289x slower) 21 | 22 | Copy-assigning 64000000 pointers 23 | With a thread-unsafe implementation, this operation takes 1.03478 s 24 | With the tested implementation, it takes 2.62275 s (2.5346x slower) 25 | 26 | Reading from 5000000000 pointers 27 | With a thread-unsafe implementation, this operation takes 35.1397 s 28 | With the tested implementation, it takes 35.1514 s (1.00033x slower) 29 | 30 | Performing 1920000000 pointer copies AND cold writes 31 | With a thread-unsafe implementation, this operation takes 626.467 s 32 | With the tested implementation, it takes 751.766 s (1.20001x slower) 33 | 34 | Performing 1920000000 warm pointer writes 35 | With a thread-unsafe implementation, this operation takes 21.1624 s 36 | With the tested implementation, it takes 58.2328 s (2.75171x slower) 37 | 38 | 39 | === RESULTS ANALYSIS === 40 | 41 | For details on the methodology being used here, please refer to the comparison between thread-unsafe 42 | copy-on-write and raw shared_ptrs. 43 | 44 | Move-construction: 45 | 46 | thread-unsafe creation takes 98.3ns 47 | seq_cst-based creation takes 106ns 48 | thread-unsafe creation and move-construction takes 162ns 49 | seq_cst-based creation and move-construction takes 168ns 50 | 51 | therefore, 52 | 53 | thread-unsafe move-construction takes 64ns 54 | seq_cst-based move-construction takes 62ns 55 | 56 | hence the later is 1.0x slower 57 | 58 | Move-assignment: 59 | 60 | thread-unsafe copy-construction takes 47.4ns 61 | seq_cst-based copy-construction takes 56.3ns 62 | thread-unsafe copy-construction and move-assignment takes 92.2ns 63 | seq_cst-based copy-construction and move-assignment takes 132ns 64 | 65 | therefore, 66 | 67 | thread-unsafe move-assignment takes 45ns 68 | seq_cst-based move-assignment takes 75ns 69 | 70 | hence the later is 1.7x slower 71 | 72 | Cold writes: 73 | 74 | thread-unsafe copy-assignment takes 16.2ns 75 | seq_cst-based copy-assignment takes 41.0ns 76 | thread-unsafe copy-assignment and cold write takes 326ns 77 | seq_cst-based copy-assignment and cold write takes 392ns 78 | 79 | therefore, 80 | 81 | thread-unsafe cold write takes 310ns 82 | seq_cst-based cold write takes 351ns 83 | 84 | hence the later is 1.1x slower 85 | 86 | 87 | === CONCLUSIONS === 88 | 89 | In terms of elementary operations, before compiler optimization kicks in... 90 | * Creation from a raw pointer is 1.1x slower => Comparable to mutex 91 | * Move-constructing is 1.0x slower => Comparable to mutex 92 | * Copy-constructing is 1.2x slower => Comparable to mutex 93 | * Moving is 1.7x slower => Slightly slower (1.3x for mutex) 94 | * Copying is 2.5x slower => Slightly slower (2.3x for mutex) 95 | * Reading is 1.0x slower => Comparable to mutex 96 | * Cold-writing is 1.1x slower => Comparable to mutex 97 | * Warm-writing is 2.8x slower => Comparable to mutex 98 | 99 | Sequentially consistent atomics bring no performance benefits with respect to mutexes, and are in some cases slightly 100 | slower. They do not appear to be worth the massive code complexity that they bring in this use case. 101 | -------------------------------------------------------------------------------- /bench_results/thread_unsafe_cow-vs-shared_ptr.txt: -------------------------------------------------------------------------------- 1 | === MICROBENCHMARK : THREAD-UNSAFE COW POINTER VS RAW SHARED_PTR === 2 | 3 | hadrien@pc-grasland:~/Bureau/Programmation/TestCoW$ g++ -O0 -std=c++11 bench_vs_shared_ptr.cpp -o bench_vs_shared_ptr.bin && ./bench_vs_shared_ptr.bin 4 | [...] 5 | 6 | Creating 100000000 pointers from raw pointers 7 | With a raw shared_ptr, this operation takes 9.05853 s 8 | With cow_ptr, it takes 9.70459 s (1.07132x slower) 9 | 10 | Creating AND move-constructing 2500000000 pointers 11 | With a raw shared_ptr, this operation takes 285.148 s 12 | With cow_ptr, it takes 404.977 s (1.42023x slower) 13 | 14 | Copy-constructing 1000000000 pointers 15 | With a raw shared_ptr, this operation takes 48.1197 s 16 | With cow_ptr, it takes 52.9928 s (1.10127x slower) 17 | 18 | Copy-constructing AND move-assigning 5000000000 pointers 19 | With a raw shared_ptr, this operation takes 418.364 s 20 | With cow_ptr, it takes 491.536 s (1.1749x slower) 21 | 22 | Copy-assigning 64000000 pointers 23 | With a raw shared_ptr, this operation takes 0.750994 s 24 | With cow_ptr, it takes 1.04189 s (1.38734x slower) 25 | 26 | Reading from 5000000000 pointers 27 | With a raw shared_ptr, this operation takes 21.9751 s 28 | With cow_ptr, it takes 35.1145 s (1.59792x slower) 29 | 30 | Performing 1920000000 pointer copies AND cold writes 31 | With a raw shared_ptr, this operation takes 26.1059 s 32 | With cow_ptr, it takes 619.9 s (23.7456x slower) 33 | 34 | Performing 1920000000 warm pointer writes 35 | With a raw shared_ptr, this operation takes 8.97253 s 36 | With cow_ptr, it takes 21.1923 s (2.36191x slower) 37 | 38 | 39 | === RESULTS ANALYSIS === 40 | 41 | When interpreting the results of this test, one should be mindful of three things: 42 | - The amount of operations is not constant, but optimized per-test to get a measurement uncertainty of a few percents 43 | - The benchmark is built at -O0 optimization, and may not follow -O3 performance (which cannot be microbenchmarked) 44 | - Some operations are composite, i.e. made of multiple inner operations that must be separated. 45 | 46 | Moves are an example of a composite operation: it is quite hard to build a stateless and lightweight benchmark which 47 | measures the performance of moving a piece of data back and forth between two locations. Instead, what is done is to 48 | measure the overhead of creating + moving a piece of data, then substract the overhead of data creation alone from it. 49 | 50 | This is how it is done for move-construction... 51 | 52 | shared_ptr creation takes 90.6ns 53 | copy_on_write_ptr creation takes 97.0ns 54 | shared_ptr creation and move-construction takes 114ns 55 | copy_on_write_ptr creation and move-construction takes 162ns 56 | 57 | therefore, 58 | 59 | shared_ptr move-construction takes 23ns 60 | copy_on_write_ptr move-construction takes 65ns 61 | 62 | hence the later is 2.8x slower 63 | 64 | ...and for move-assignment: 65 | 66 | shared_ptr copy-construction takes 48.1ns 67 | copy_on_write_ptr copy-construction takes 53.0ns 68 | shared_ptr copy-construction and move-assignment takes 83.7ns 69 | copy_on_write_ptr copy-construction and move-assignment takes 98.3ns 70 | 71 | therefore, 72 | 73 | shared_ptr move-assignment takes 35.6ns 74 | copy_on_write_ptr move-assignment takes 45.3ns 75 | 76 | hence the later is 1.3x slower 77 | 78 | Finally, by the very nature of the copy-on-write abstraction, cold writes may also only be measured in a composite way: 79 | 80 | shared_ptr copy-assignment takes 11.7ns 81 | copy_on_write_ptr copy-assignment takes 16.3ns 82 | shared_ptr copy-assignment and cold write takes 13.6ns 83 | copy_on_write_ptr copy-assignment and cold write takes 323ns 84 | 85 | therefore, 86 | 87 | shared_ptr cold write takes 1.9ns 88 | copy_on_write_ptr cold write takes 307ns 89 | 90 | hence the later is 161x slower 91 | 92 | 93 | === CONCLUSIONS === 94 | 95 | In terms of elementary operations, before compiler optimization kicks in... 96 | * Creation from a raw pointer is 1.1x slower 97 | * Move-constructing is 2.8x slower 98 | * Copy-constructing is 1.1x slower 99 | * Moving is 1.3x slower 100 | * Copying is 1.4x slower 101 | * Reading is 1.6x slower 102 | * Cold-writing is 161x slower => EXPECTED: Dynamic memory allocation overhead. 103 | * Warm-writing is 2.4x slower 104 | 105 | This sets some expectations on how much performance may be expected from thread-safe copy-on-write implementations, 106 | when measured in the same way. It also highlights the well-known fact that for scenarios where writes are infrequent, 107 | copy-on-write of large objects remains quite efficient as a memory usage optimization. 108 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # About the `copy_on_write_ptr` project 2 | 3 | ## Introduction 4 | 5 | The idea behind `copy_on_write_ptr` is to provide users with a relatively straightforward way to use `std::shared_ptr` 6 | with copy-on-write (CoW) semantics. 7 | 8 | In CoW semantics, large pieces of data may be cheaply "copied" by reference as long as they are not written to, whereas 9 | writing triggers a lazy deep copy of the underlying data block. Effectively, copy-on-write allows a client to have 10 | something which offers the memory efficiency benefits of an `std::shared_ptr`, but can gracefully degrade into 11 | an `std::unique_ptr` to a private mutable data block as needed. 12 | 13 | There is a widespread belief in the C++ community that the innovations brought forth by C++11, in particular move 14 | semantics, have rendered copy-on-write obsolete. This belief has led, for example, a similar effort to be rejected by 15 | the Boost community. However, this is not entirely accurate. C++11 has not rendered copy-on-write obsolete, it has 16 | simply proposed a better solution to a *subset* of the problems which required CoW usage in the past. 17 | 18 | Copy-on-write semantics remain appropriate in scenarios where... 19 | 20 | - Multiple threads need access to a large piece of data, behaving *as if* they owned a private copy of it. 21 | - It is not known in advance whether threads will need to mutate their "cheap copy" of the data. 22 | - The probability of data mutation is low enough for the memory efficiency gains to offset the CPU efficiency losses. 23 | 24 | 25 | ## Copy-on-write in a single-threaded world 26 | 27 | Writing to copy-on-write data relies on an underlying notion of data ownership: 28 | 29 | - If the active pointer has ownership of the data block it points to, it can perform the write directly 30 | - If it does not have ownership, it must create a new data block (which it will own) and write there 31 | 32 | In a single-threaded world, this may be implemented simply using a boolean dirty flag which tells whether a 33 | `copy_on_write_ptr` owns the data it points to. This flag is tested on every write, and a lazy copy will occur when a 34 | write is attempted as this flag is `false`, setting the flag to `true` along the way. 35 | 36 | 37 | ## Copy-on-write in a multithreaded world 38 | 39 | If thread safety is desired, then copy-on-write gets more complicated, because we need to handle two data races: 40 | 41 | 1. Two threads attempt to write new values into CoW data at the same time, potentially causing multiple lazy copies. 42 | 2. A thread attempts to assign a new value to the pointer while another is performing a write to the contained data. 43 | 44 | Another data race that cannot be avoided (in a library-based copy-on-write implementation at least) is that writing to 45 | CoW data potentially invalidates the address of that data. Therefore, client threads should be very careful when holding 46 | long-lived references to CoW data that may be asynchronously written to. This is a general concern when using container 47 | libraries, though, so threaded code authors should be aware of it. 48 | 49 | It should be noted that both of the data races above generally indicate an error within the client code, thus opting not 50 | to handle them would be a reasonable option. But if they are to be handled well, thread synchronization must be used. 51 | 52 | 53 | ## Exploring the design tradeoff 54 | 55 | To explore the design space for copy-on-write implementations, I decided to decouple the data ownership handling 56 | mechanism from the high-level CoW interface that is provided by `copy_on_write_ptr`. In this repository, you will find 57 | multiple implementations of this mechanism: 58 | 59 | - A thread-unsafe implementation using a simple boolean flag 60 | - An implementation using mutex synchronization to prevent concurrent ownership flag assignment and lazy copies 61 | - An implementation using atomics-based synchronization instead of mutexes, at a cost of some design complexity 62 | - An implementation using explicit memory ordering to try to accelerate atomics, at the cost of further complexity 63 | 64 | I initially tried to use `std::once_flag` as a copy-on-write ownership flag implementation, however its non-readable, 65 | non-writable, non-moveable and non-copyable semantics turned out to be too limiting for my needs. 66 | 67 | These implementations may easily be compared from a performance and design complexity point of view: the thread-unsafe 68 | implementation can serve as a baseline for the best performance that one may expect from copy-on-write semantics, under 69 | disciplined single-threaded use, whereas the synchronized implementations represent different points on the thread-safe 70 | design continuum between maximal performance and minimal design complexity. 71 | 72 | You will find the results of this comparison in the `bench_results/` subdirectory. 73 | -------------------------------------------------------------------------------- /bench_results/thread_unsafe-vs-manually_ordered_atomics.txt: -------------------------------------------------------------------------------- 1 | === MICROBENCHMARK : THREAD-UNSAFE COW POINTER VS MANUALLY ORDERED ATOMICS === 2 | 3 | hadrien@pc-grasland:~/Bureau/Programmation/TestCoW$ g++ -O0 -std=c++11 bench_unsafe_vs_other.cpp -o bench_unsafe_vs_other.bin 4 | [...] 5 | 6 | Creating 100000000 pointers from raw pointers 7 | With a thread-unsafe implementation, this operation takes 9.60142 s 8 | With the tested implementation, it takes 10.4198 s (1.08524x slower) 9 | 10 | Creating AND move-constructing 2500000000 pointers 11 | With a thread-unsafe implementation, this operation takes 394.183 s 12 | With the tested implementation, it takes 401.842 s (1.01943x slower) 13 | 14 | Copy-constructing 1000000000 pointers 15 | With a thread-unsafe implementation, this operation takes 48.1861 s 16 | With the tested implementation, it takes 56.815 s (1.17907x slower) 17 | 18 | Copy-constructing AND move-assigning 5000000000 pointers 19 | With a thread-unsafe implementation, this operation takes 478.704 s 20 | With the tested implementation, it takes 600.547 s (1.25453x slower) 21 | 22 | Copy-assigning 64000000 pointers 23 | With a thread-unsafe implementation, this operation takes 1.05171 s 24 | With the tested implementation, it takes 2.07142 s (1.96958x slower) 25 | 26 | Reading from 5000000000 pointers 27 | With a thread-unsafe implementation, this operation takes 35.1065 s 28 | With the tested implementation, it takes 35.2061 s (1.00284x slower) 29 | 30 | Performing 1920000000 pointer copies AND cold writes 31 | With a thread-unsafe implementation, this operation takes 604.841 s 32 | With the tested implementation, it takes 698.893 s (1.1555x slower) 33 | 34 | Performing 1920000000 warm pointer writes 35 | With a thread-unsafe implementation, this operation takes 21.4445 s 36 | With the tested implementation, it takes 41.5779 s (1.93886x slower) 37 | 38 | 39 | === RESULTS ANALYSIS === 40 | 41 | For details on the methodology being used here, please refer to the comparison between thread-unsafe 42 | copy-on-write and raw shared_ptrs. 43 | 44 | Move-construction: 45 | 46 | thread-unsafe creation takes 96.0ns 47 | atomics-based creation takes 104ns 48 | thread-unsafe creation and move-construction takes 158ns 49 | atomics-based creation and move-construction takes 161ns 50 | 51 | therefore, 52 | 53 | thread-unsafe move-construction takes 62ns 54 | atomics-based move-construction takes 57ns 55 | 56 | hence the later is 0.9x slower 57 | 58 | Move-assignment: 59 | 60 | thread-unsafe copy-construction takes 48.2ns 61 | atomics-based copy-construction takes 56.8ns 62 | thread-unsafe copy-construction and move-assignment takes 95.7ns 63 | atomics-based copy-construction and move-assignment takes 120ns 64 | 65 | therefore, 66 | 67 | thread-unsafe move-assignment takes 47.5ns 68 | atomics-based move-assignment takes 63ns 69 | 70 | hence the later is 1.3x slower 71 | 72 | Cold writes: 73 | 74 | thread-unsafe copy-assignment takes 16.4ns 75 | atomics-based copy-assignment takes 32.4ns 76 | thread-unsafe copy-assignment and cold write takes 315ns 77 | atomics-based copy-assignment and cold write takes 364ns 78 | 79 | therefore, 80 | 81 | thread-unsafe cold write takes 299ns 82 | atomics-based cold write takes 332ns 83 | 84 | hence the later is 1.1x slower 85 | 86 | 87 | === CONCLUSIONS === 88 | 89 | In terms of elementary operations, before compiler optimization kicks in... 90 | * Creation from a raw pointer is 1.1x slower => Comparable to mutex 91 | * Move-constructing is 0.9x slower => Slightly faster (1.1x for mutex) 92 | * Copy-constructing is 1.2x slower => Comparable to mutex 93 | * Moving is 1.3x slower => Comparable to mutex 94 | * Copying is 2.0x slower => Slightly faster (2.3x for mutex) 95 | * Reading is 1.0x slower => Comparable to mutex 96 | * Cold-writing is 1.1x slower => Comparable to mutex 97 | * Warm-writing is 1.9x slower => Much faster (2.9x for mutex) 98 | 99 | Unlike sequentially consistent atomics, manually ordered atomics emerge as an attractive alternative to mutexes. 100 | They are always at least as fast as mutexes, and exhibit a nice speed advantage in the two areas where mutexes shine 101 | least, copy assignment (which requires synchronization) and warm writes (which are the mutex' weakest point). 102 | 103 | Whether they are worth it or not depends on one's hunger for speed: atomics are tricky, manually ordering them is 104 | trickier, so this code is much more likely to exhibit bugs, and to gain some across maintenance years. However, I would 105 | say that this primitive might convince a copy-on-write nonbeliever that synchronization across threads can be done 106 | reasonably cheaply. 107 | -------------------------------------------------------------------------------- /cow_ownership_flags/seq_cst_atomics_flag.hpp: -------------------------------------------------------------------------------- 1 | /* This file is part of copy_on_write_ptr. 2 | 3 | copy_on_write_ptr is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | copy_on_write_ptr is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with copy_on_write_ptr. If not, see . */ 15 | 16 | #ifndef SEQ_CST_ATOMICS_FLAG_H 17 | #define SEQ_CST_ATOMICS_FLAG_H 18 | 19 | #include 20 | #include 21 | 22 | namespace cow_ownership_flags { 23 | 24 | // This implementation of the copy-on-write ownership flag uses sequentially consistent atomics 25 | // to achieve thread safety. 26 | class seq_cst_atomics_flag { 27 | public: 28 | 29 | // Ownership flags may be initialized to a certain value without synchronization, as at 30 | // construction time only one thread has access to the active ownership flag. 31 | seq_cst_atomics_flag(bool initially_owned) : 32 | m_ownership_status{to_ownership_status(initially_owned)} 33 | { } 34 | 35 | 36 | // When we move-construct from an ownership flag rvalue, we may assume that no other thread 37 | // has access to either that rvalue or the active flag, and avoid using synchronization. 38 | seq_cst_atomics_flag(seq_cst_atomics_flag && other) : 39 | m_ownership_status{other.unsynchronized_status()} 40 | { } 41 | 42 | 43 | // There's nothing special about deleting an ownership flag. 44 | ~seq_cst_atomics_flag() = default; 45 | 46 | 47 | // When we move-assign an ownership flag rvalue, no other thread has access to that rvalue, 48 | // so we can access it without read synchronization. 49 | // But the active flag may be shared with other threads, so we need write synchronization. 50 | seq_cst_atomics_flag & operator=(seq_cst_atomics_flag && other) { 51 | set_ownership_status(other.unsynchronized_status()); 52 | } 53 | 54 | 55 | // Ownership flags are not copyable. Proper CoW semantics would require clearing them upon 56 | // copy, which is at odds with normal copy semantics. It's better to throw a compiler error 57 | // in this case, and let the user write more explicit code. 58 | seq_cst_atomics_flag(const seq_cst_atomics_flag &) = delete; 59 | seq_cst_atomics_flag & operator=(const seq_cst_atomics_flag &) = delete; 60 | 61 | 62 | // Authoritatively mark the active memory block as owned/not owned by the active thread. 63 | void set_ownership(bool owned) { 64 | set_ownership_status(to_ownership_status(owned)); 65 | } 66 | 67 | 68 | // Acquire ownership of the active memory block, using the provided resource acquisition 69 | // routine, if that's not done already. Other threads should block during this process. 70 | template 71 | void acquire_ownership_once(Callable && acquisition_routine) { 72 | // Try to switch the ownership status from NotOwner to AcquiringOwnership 73 | // and tell previous ownership status 74 | OwnershipStatusType previous_ownership = NotOwner; 75 | m_ownership_status.compare_exchange_strong(previous_ownership, AcquiringOwnership); 76 | 77 | // Act according to the previous ownership status 78 | switch(previous_ownership) { 79 | case NotOwner: // Acquire resource ownership 80 | acquisition_routine(); 81 | m_ownership_status.store(Owner); 82 | break; 83 | 84 | case AcquiringOwnership: // Wait for ownership acquisition 85 | while(m_ownership_status.load() != Owner); 86 | break; 87 | 88 | case Owner: // Nothing to do, we already own the resource 89 | break; 90 | } 91 | } 92 | 93 | 94 | private: 95 | 96 | using OwnershipStatusType = std::uint_fast8_t; 97 | enum OwnershipStatus : OwnershipStatusType { NotOwner, AcquiringOwnership, Owner }; 98 | std::atomic m_ownership_status; 99 | 100 | static OwnershipStatusType to_ownership_status(bool is_owned) { 101 | return (is_owned ? Owner : NotOwner); 102 | } 103 | 104 | OwnershipStatusType unsynchronized_status() { 105 | return m_ownership_status.load(std::memory_order_relaxed); 106 | } 107 | 108 | void set_ownership_status(const OwnershipStatusType desired_ownership) { 109 | OwnershipStatusType current_ownership = m_ownership_status.load(); 110 | 111 | do { 112 | // Wait for any resource ownership acquisition operation to complete 113 | while(current_ownership == AcquiringOwnership) { 114 | current_ownership = m_ownership_status.load(); 115 | } 116 | 117 | // Once that is done, try to swap in the new resource ownership status 118 | } while(!m_ownership_status.compare_exchange_weak(current_ownership, desired_ownership)); 119 | } 120 | 121 | }; 122 | 123 | } 124 | 125 | #endif 126 | -------------------------------------------------------------------------------- /cow_ownership_flags/manually_ordered_atomics_flag.hpp: -------------------------------------------------------------------------------- 1 | /* This file is part of copy_on_write_ptr. 2 | 3 | copy_on_write_ptr is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | copy_on_write_ptr is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with copy_on_write_ptr. If not, see . */ 15 | 16 | #ifndef MANUALLY_ORDERED_ATOMICS_FLAG_H 17 | #define MANUALLY_ORDERED_ATOMICS_FLAG_H 18 | 19 | #include 20 | #include 21 | 22 | namespace cow_ownership_flags { 23 | 24 | // This implementation of the copy-on-write ownership flag uses manually ordered atomics to 25 | // achieve thread safety. 26 | class manually_ordered_atomics_flag { 27 | public: 28 | 29 | // Ownership flags may be initialized to a certain value without synchronization, as at 30 | // construction time only one thread has access to the active ownership flag. 31 | manually_ordered_atomics_flag(bool initially_owned) : 32 | m_ownership_status{to_ownership_status(initially_owned)} 33 | { } 34 | 35 | 36 | // When we move-construct from an ownership flag rvalue, we may assume that no other thread 37 | // has access to either that rvalue or the active flag, and avoid using synchronization. 38 | manually_ordered_atomics_flag(manually_ordered_atomics_flag && other) : 39 | m_ownership_status{other.unsynchronized_status()} 40 | { } 41 | 42 | 43 | // There's nothing special about deleting an ownership flag. 44 | ~manually_ordered_atomics_flag() = default; 45 | 46 | 47 | // When we move-assign an ownership flag rvalue, no other thread has access to that rvalue, 48 | // so we can access it without read synchronization. 49 | // But the active flag may be shared with other threads, so we need write synchronization. 50 | manually_ordered_atomics_flag & operator=(manually_ordered_atomics_flag && other) { 51 | set_ownership_status(other.unsynchronized_status()); 52 | } 53 | 54 | 55 | // Ownership flags are not copyable. Proper CoW semantics would require clearing them upon 56 | // copy, which is at odds with normal copy semantics. It's better to throw a compiler error 57 | // in this case, and let the user write more explicit code. 58 | manually_ordered_atomics_flag(const manually_ordered_atomics_flag &) = delete; 59 | manually_ordered_atomics_flag & operator=(const manually_ordered_atomics_flag &) = delete; 60 | 61 | 62 | // Authoritatively mark the active memory block as owned/not owned by the active thread. 63 | void set_ownership(bool owned) { 64 | set_ownership_status(to_ownership_status(owned)); 65 | } 66 | 67 | 68 | // Acquire ownership of the active memory block, using the provided resource acquisition 69 | // routine, if that's not done already. Other threads should block during this process. 70 | template 71 | void acquire_ownership_once(Callable && acquisition_routine) { 72 | // Try to switch the ownership status from NotOwner to AcquiringOwnership 73 | // and tell previous ownership status 74 | OwnershipStatusType previous_ownership = NotOwner; 75 | m_ownership_status.compare_exchange_strong(previous_ownership, 76 | AcquiringOwnership, 77 | std::memory_order_acq_rel, 78 | std::memory_order_acquire); 79 | 80 | // Act according to the previous ownership status 81 | switch(previous_ownership) { 82 | case NotOwner: // Acquire resource ownership 83 | acquisition_routine(); 84 | m_ownership_status.store(Owner, 85 | std::memory_order_release); 86 | break; 87 | 88 | case AcquiringOwnership: // Wait for ownership acquisition 89 | while(m_ownership_status.load(std::memory_order_acquire) != Owner); 90 | break; 91 | 92 | case Owner: // Nothing to do, we already own the resource 93 | break; 94 | } 95 | } 96 | 97 | 98 | private: 99 | 100 | using OwnershipStatusType = std::uint_fast8_t; 101 | enum OwnershipStatus : OwnershipStatusType { NotOwner, AcquiringOwnership, Owner }; 102 | std::atomic m_ownership_status; 103 | 104 | static OwnershipStatusType to_ownership_status(bool is_owned) { 105 | return (is_owned ? Owner : NotOwner); 106 | } 107 | 108 | OwnershipStatusType unsynchronized_status() { 109 | return m_ownership_status.load(std::memory_order_relaxed); 110 | } 111 | 112 | void set_ownership_status(const OwnershipStatusType desired_ownership) { 113 | OwnershipStatusType current_ownership = m_ownership_status.load(std::memory_order_consume); 114 | 115 | do { 116 | // Wait for any resource ownership acquisition operation to complete 117 | while(current_ownership == AcquiringOwnership) { 118 | current_ownership = m_ownership_status.load(std::memory_order_consume); 119 | } 120 | 121 | // Once that is done, try to swap in the new resource ownership status 122 | } while(!m_ownership_status.compare_exchange_weak(current_ownership, 123 | desired_ownership, 124 | std::memory_order_acq_rel, 125 | std::memory_order_consume)); 126 | } 127 | 128 | }; 129 | 130 | } 131 | 132 | #endif 133 | -------------------------------------------------------------------------------- /bench_vs_shared_ptr.cpp: -------------------------------------------------------------------------------- 1 | /* This file is part of copy_on_write_ptr. 2 | 3 | copy_on_write_ptr is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | copy_on_write_ptr is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with copy_on_write_ptr. If not, see . */ 15 | 16 | #include 17 | #include 18 | 19 | #include "copy_on_write_ptr.hpp" 20 | #include "cow_ownership_flags/thread_unsafe_flag.hpp" 21 | #include "shared.hpp" 22 | 23 | // === FORWARD DECLARATIONS === 24 | 25 | // Import shared definitions 26 | using namespace Shared; 27 | 28 | // Specialization of time_it for my comparison purposes 29 | template 31 | void compare_it(Callable1 && shptr_operation, 32 | Callable2 && cowptr_operation, 33 | const std::size_t amount) { 34 | const auto shptr_duration = Shared::time_it(shptr_operation, amount); 35 | std::cout << "With a raw shared_ptr, this operation takes " 36 | << shptr_duration.count() << " s" 37 | << std::endl; 38 | 39 | const auto cowptr_duration = Shared::time_it(cowptr_operation, amount); 40 | std::cout << "With cow_ptr, it takes " 41 | << cowptr_duration.count() << " s (" 42 | << cowptr_duration.count() / shptr_duration.count() << "x slower)" 43 | << std::endl; 44 | } 45 | 46 | // === PERFORMANCE TEST BODY === 47 | 48 | int main() { 49 | 50 | // === PART 0 : TEST-WIDE DEFINITIONS === 51 | 52 | // Define our smart pointer types 53 | using SharedPointer = std::shared_ptr; 54 | using COWPointer = copy_on_write_ptr; 55 | 56 | // Say hi :) 57 | std::cout << std::endl << "=== Microbenchmarking cow_ptr ===" << std::endl; 58 | 59 | // === PART 1 : CREATION FROM RAW POINTER === 60 | 61 | const size_t creation_amount = 1000 * 1000 * 100; 62 | std::cout << std::endl << "Creating " << creation_amount << " pointers from raw pointers" << std::endl; 63 | { 64 | compare_it( 65 | [&](){ 66 | SharedPointer ptr{new Data{typical_value}}; 67 | }, 68 | [&](){ 69 | COWPointer ptr{new Data{typical_value}}; 70 | }, 71 | creation_amount 72 | ); 73 | } 74 | 75 | // === PART 2 : CREATION + MOVE-CONSTRUCTION === (NOTE: Cannot test move construction alone easily) 76 | 77 | const size_t move_amount = 25 * creation_amount; 78 | std::cout << std::endl << "Creating AND move-constructing " << move_amount << " pointers" << std::endl; 79 | { 80 | compare_it( 81 | [&](){ 82 | SharedPointer source{new Data{typical_value}}; 83 | const SharedPointer dest{std::move(source)}; 84 | }, 85 | [&](){ 86 | COWPointer source{new Data{typical_value}}; 87 | const COWPointer dest{std::move(source)}; 88 | }, 89 | move_amount 90 | ); 91 | } 92 | 93 | // === PART 3 : COPY CONSTRUCTION === 94 | 95 | const size_t copy_amount = 1000 * 1000 * 1000; 96 | std::cout << std::endl << "Copy-constructing " << copy_amount << " pointers" << std::endl; 97 | { 98 | const SharedPointer source_shptr{std::make_shared(typical_value)}; 99 | const COWPointer source_cowptr{new Data{typical_value}}; 100 | 101 | compare_it( 102 | [&](){ 103 | SharedPointer copy{source_shptr}; 104 | }, 105 | [&](){ 106 | COWPointer copy{source_cowptr}; 107 | }, 108 | copy_amount 109 | ); 110 | } 111 | 112 | // === PART 4 : COPY CONSTRUCTION + MOVE-ASSIGNMENT === (NOTE: Cannot test move assignment alone easily) 113 | 114 | const size_t copy_move_amount = 5 * copy_amount; 115 | std::cout << std::endl << "Copy-constructing AND move-assigning " << copy_move_amount << " pointers" << std::endl; 116 | { 117 | const SharedPointer source_shptr{std::make_shared(typical_value)}; 118 | const COWPointer source_cowptr{new Data{typical_value}}; 119 | 120 | SharedPointer dest_shptr{source_shptr}; 121 | COWPointer dest_cowptr{source_cowptr}; 122 | 123 | compare_it( 124 | [&](){ 125 | SharedPointer copy{source_shptr}; 126 | dest_shptr = std::move(copy); 127 | }, 128 | [&](){ 129 | COWPointer copy{source_cowptr}; 130 | dest_cowptr = std::move(copy); 131 | }, 132 | copy_move_amount 133 | ); 134 | } 135 | 136 | // === PART 5 : COPY ASSIGNMENT === 137 | 138 | const size_t copy_assign_amount = 1000 * 1000 * 64; 139 | std::cout << std::endl << "Copy-assigning " << copy_assign_amount << " pointers" << std::endl; 140 | { 141 | const SharedPointer source_shptr{std::make_shared(typical_value)}; 142 | const COWPointer source_cowptr{new Data{typical_value}}; 143 | 144 | SharedPointer dest_shptr{source_shptr}; 145 | COWPointer dest_cowptr{source_cowptr}; 146 | 147 | compare_it( 148 | [&](){ 149 | dest_shptr = source_shptr; 150 | }, 151 | [&](){ 152 | dest_cowptr = source_cowptr; 153 | }, 154 | copy_assign_amount 155 | ); 156 | } 157 | 158 | // === PART 6 : READ DATA === 159 | 160 | const size_t read_amount = 1000ULL * 1000ULL * 1000ULL * 5ULL; 161 | std::cout << std::endl << "Reading from " << read_amount << " pointers" << std::endl; 162 | { 163 | const SharedPointer source_shptr{std::make_shared(typical_value)}; 164 | const COWPointer source_cowptr{new Data{typical_value}}; 165 | 166 | compare_it( 167 | [&](){ 168 | const Data & read = *source_shptr; 169 | }, 170 | [&](){ 171 | const Data & read = source_cowptr.read(); 172 | }, 173 | read_amount 174 | ); 175 | } 176 | 177 | // === PART 7 : COPY ASSIGNMENT + COLD WRITES === (NOTE: A pure cold write would require breaking encapsulation) 178 | 179 | const size_t cold_write_amount = 30 * copy_assign_amount; 180 | std::cout << std::endl << "Performing " << cold_write_amount << " pointer copies AND cold writes" << std::endl; 181 | { 182 | const SharedPointer source_shptr{std::make_shared(typical_value)}; 183 | const COWPointer source_cowptr{new Data{typical_value}}; 184 | 185 | SharedPointer dest_shptr{source_shptr}; 186 | COWPointer dest_cowptr{source_cowptr}; 187 | 188 | compare_it( 189 | [&](){ 190 | dest_shptr = source_shptr; 191 | *dest_shptr = typical_value; 192 | }, 193 | [&](){ 194 | dest_cowptr = source_cowptr; 195 | dest_cowptr.write(typical_value); 196 | }, 197 | cold_write_amount 198 | ); 199 | } 200 | 201 | // === PART 8 : WARM WRITES === 202 | 203 | const size_t warm_write_amount = cold_write_amount; 204 | std::cout << std::endl << "Performing " << warm_write_amount << " warm pointer writes" << std::endl; 205 | { 206 | SharedPointer shptr{std::make_shared(typical_value)}; 207 | COWPointer cowptr{new Data{typical_value}}; 208 | 209 | compare_it( 210 | [&](){ 211 | *shptr = typical_value; 212 | }, 213 | [&](){ 214 | cowptr.write(typical_value); 215 | }, 216 | warm_write_amount 217 | ); 218 | } 219 | 220 | // === TEST FINALIZATION === 221 | 222 | std::cout << std::endl; 223 | return 0; 224 | 225 | } 226 | -------------------------------------------------------------------------------- /bench_unsafe_vs_other.cpp: -------------------------------------------------------------------------------- 1 | /* This file is part of copy_on_write_ptr. 2 | 3 | copy_on_write_ptr is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU Lesser General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | copy_on_write_ptr is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU Lesser General Public License for more details. 12 | 13 | You should have received a copy of the GNU Lesser General Public License 14 | along with copy_on_write_ptr. If not, see . */ 15 | 16 | #include 17 | 18 | #include "copy_on_write_ptr.hpp" 19 | #include "cow_ownership_flags/thread_unsafe_flag.hpp" 20 | #include "cow_ownership_flags/manually_ordered_atomics_flag.hpp" 21 | #include "shared.hpp" 22 | 23 | // === FORWARD DECLARATIONS === 24 | 25 | // Import shared definitions 26 | using namespace Shared; 27 | 28 | // Specialization of time_it for my comparison purposes 29 | template 31 | void compare_it(Callable1 && unsafe_operation, 32 | Callable2 && tested_operation, 33 | const std::size_t amount) { 34 | const auto unsafe_duration = Shared::time_it(unsafe_operation, amount); 35 | std::cout << "With a thread-unsafe implementation, this operation takes " 36 | << unsafe_duration.count() << " s" 37 | << std::endl; 38 | 39 | const auto tested_duration = Shared::time_it(tested_operation, amount); 40 | std::cout << "With the tested implementation, it takes " 41 | << tested_duration.count() << " s (" 42 | << tested_duration.count() / unsafe_duration.count() << "x slower)" 43 | << std::endl; 44 | } 45 | 46 | // === PERFORMANCE TEST BODY === 47 | 48 | int main() { 49 | 50 | // === PART 0 : TEST-WIDE DEFINITIONS === 51 | 52 | // Define our smart pointer types 53 | using UnsafePointer = copy_on_write_ptr; 54 | using TestedPointer = copy_on_write_ptr; 55 | 56 | // Say hi :) 57 | std::cout << std::endl << "=== Microbenchmarking cow_ptr ===" << std::endl; 58 | 59 | // === PART 1 : CREATION FROM RAW POINTER === 60 | 61 | const size_t creation_amount = 1000 * 1000 * 100; 62 | std::cout << std::endl << "Creating " << creation_amount << " pointers from raw pointers" << std::endl; 63 | { 64 | compare_it( 65 | [&](){ 66 | UnsafePointer ptr{new Data{typical_value}}; 67 | }, 68 | [&](){ 69 | TestedPointer ptr{new Data{typical_value}}; 70 | }, 71 | creation_amount 72 | ); 73 | } 74 | 75 | // === PART 2 : CREATION + MOVE-CONSTRUCTION === (NOTE: Cannot test move construction alone easily) 76 | 77 | const size_t move_amount = 25 * creation_amount; 78 | std::cout << std::endl << "Creating AND move-constructing " << move_amount << " pointers" << std::endl; 79 | { 80 | compare_it( 81 | [&](){ 82 | UnsafePointer source{new Data{typical_value}}; 83 | const UnsafePointer dest{std::move(source)}; 84 | }, 85 | [&](){ 86 | TestedPointer source{new Data{typical_value}}; 87 | const TestedPointer dest{std::move(source)}; 88 | }, 89 | move_amount 90 | ); 91 | } 92 | 93 | // === PART 3 : COPY CONSTRUCTION === 94 | 95 | const size_t copy_amount = 1000 * 1000 * 1000; 96 | std::cout << std::endl << "Copy-constructing " << copy_amount << " pointers" << std::endl; 97 | { 98 | const UnsafePointer source_unsafe{new Data{typical_value}}; 99 | const TestedPointer source_tested{new Data{typical_value}}; 100 | 101 | compare_it( 102 | [&](){ 103 | UnsafePointer copy{source_unsafe}; 104 | }, 105 | [&](){ 106 | TestedPointer copy{source_tested}; 107 | }, 108 | copy_amount 109 | ); 110 | } 111 | 112 | // === PART 4 : COPY CONSTRUCTION + MOVE-ASSIGNMENT === (NOTE: Cannot test move assignment alone easily) 113 | 114 | const size_t copy_move_amount = 5 * copy_amount; 115 | std::cout << std::endl << "Copy-constructing AND move-assigning " << copy_move_amount << " pointers" << std::endl; 116 | { 117 | const UnsafePointer source_unsafe{new Data{typical_value}}; 118 | const TestedPointer source_tested{new Data{typical_value}}; 119 | 120 | UnsafePointer dest_unsafe{source_unsafe}; 121 | TestedPointer dest_tested{source_tested}; 122 | 123 | compare_it( 124 | [&](){ 125 | UnsafePointer copy{source_unsafe}; 126 | dest_unsafe = std::move(copy); 127 | }, 128 | [&](){ 129 | TestedPointer copy{source_tested}; 130 | dest_tested = std::move(copy); 131 | }, 132 | copy_move_amount 133 | ); 134 | } 135 | 136 | // === PART 5 : COPY ASSIGNMENT === 137 | 138 | const size_t copy_assign_amount = 1000 * 1000 * 64; 139 | std::cout << std::endl << "Copy-assigning " << copy_assign_amount << " pointers" << std::endl; 140 | { 141 | const UnsafePointer source_unsafe{new Data{typical_value}}; 142 | const TestedPointer source_tested{new Data{typical_value}}; 143 | 144 | UnsafePointer dest_unsafe{source_unsafe}; 145 | TestedPointer dest_tested{source_tested}; 146 | 147 | compare_it( 148 | [&](){ 149 | dest_unsafe = source_unsafe; 150 | }, 151 | [&](){ 152 | dest_tested = source_tested; 153 | }, 154 | copy_assign_amount 155 | ); 156 | } 157 | 158 | // === PART 6 : READ DATA === 159 | 160 | const size_t read_amount = 1000ULL * 1000ULL * 1000ULL * 5ULL; 161 | std::cout << std::endl << "Reading from " << read_amount << " pointers" << std::endl; 162 | { 163 | const UnsafePointer source_unsafe{new Data{typical_value}}; 164 | const TestedPointer source_tested{new Data{typical_value}}; 165 | 166 | compare_it( 167 | [&](){ 168 | const Data & read = source_unsafe.read(); 169 | }, 170 | [&](){ 171 | const Data & read = source_tested.read(); 172 | }, 173 | read_amount 174 | ); 175 | } 176 | 177 | // === PART 7 : COPY ASSIGNMENT + COLD WRITES === (NOTE: A pure cold write would require breaking encapsulation) 178 | 179 | const size_t cold_write_amount = 30 * copy_assign_amount; 180 | std::cout << std::endl << "Performing " << cold_write_amount << " pointer copies AND cold writes" << std::endl; 181 | { 182 | const UnsafePointer source_unsafe{new Data{typical_value}}; 183 | const TestedPointer source_tested{new Data{typical_value}}; 184 | 185 | UnsafePointer dest_unsafe{source_unsafe}; 186 | TestedPointer dest_tested{source_tested}; 187 | 188 | compare_it( 189 | [&](){ 190 | dest_unsafe = source_unsafe; 191 | dest_unsafe.write(typical_value); 192 | }, 193 | [&](){ 194 | dest_tested = source_tested; 195 | dest_tested.write(typical_value); 196 | }, 197 | cold_write_amount 198 | ); 199 | } 200 | 201 | // === PART 8 : WARM WRITES === 202 | 203 | const size_t warm_write_amount = cold_write_amount; 204 | std::cout << std::endl << "Performing " << warm_write_amount << " warm pointer writes" << std::endl; 205 | { 206 | UnsafePointer unsafe{new Data{typical_value}}; 207 | TestedPointer tested{new Data{typical_value}}; 208 | 209 | compare_it( 210 | [&](){ 211 | unsafe.write(typical_value); 212 | }, 213 | [&](){ 214 | tested.write(typical_value); 215 | }, 216 | warm_write_amount 217 | ); 218 | } 219 | 220 | // === TEST FINALIZATION === 221 | 222 | std::cout << std::endl; 223 | return 0; 224 | 225 | } 226 | -------------------------------------------------------------------------------- /LICENSE.lesser: -------------------------------------------------------------------------------- 1 | 2 | 3 | GNU LESSER GENERAL PUBLIC LICENSE 4 | Version 3, 29 June 2007 5 | 6 | Copyright (C) 2007 Free Software Foundation, Inc. 7 | Everyone is permitted to copy and distribute verbatim copies 8 | of this license document, but changing it is not allowed. 9 | 10 | 11 | This version of the GNU Lesser General Public License incorporates 12 | the terms and conditions of version 3 of the GNU General Public 13 | License, supplemented by the additional permissions listed below. 14 | 15 | 0. Additional Definitions. 16 | 17 | As used herein, "this License" refers to version 3 of the GNU Lesser 18 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 19 | General Public License. 20 | 21 | "The Library" refers to a covered work governed by this License, 22 | other than an Application or a Combined Work as defined below. 23 | 24 | An "Application" is any work that makes use of an interface provided 25 | by the Library, but which is not otherwise based on the Library. 26 | Defining a subclass of a class defined by the Library is deemed a mode 27 | of using an interface provided by the Library. 28 | 29 | A "Combined Work" is a work produced by combining or linking an 30 | Application with the Library. The particular version of the Library 31 | with which the Combined Work was made is also called the "Linked 32 | Version". 33 | 34 | The "Minimal Corresponding Source" for a Combined Work means the 35 | Corresponding Source for the Combined Work, excluding any source code 36 | for portions of the Combined Work that, considered in isolation, are 37 | based on the Application, and not on the Linked Version. 38 | 39 | The "Corresponding Application Code" for a Combined Work means the 40 | object code and/or source code for the Application, including any data 41 | and utility programs needed for reproducing the Combined Work from the 42 | Application, but excluding the System Libraries of the Combined Work. 43 | 44 | 1. Exception to Section 3 of the GNU GPL. 45 | 46 | You may convey a covered work under sections 3 and 4 of this License 47 | without being bound by section 3 of the GNU GPL. 48 | 49 | 2. Conveying Modified Versions. 50 | 51 | If you modify a copy of the Library, and, in your modifications, a 52 | facility refers to a function or data to be supplied by an Application 53 | that uses the facility (other than as an argument passed when the 54 | facility is invoked), then you may convey a copy of the modified 55 | version: 56 | 57 | a) under this License, provided that you make a good faith effort to 58 | ensure that, in the event an Application does not supply the 59 | function or data, the facility still operates, and performs 60 | whatever part of its purpose remains meaningful, or 61 | 62 | b) under the GNU GPL, with none of the additional permissions of 63 | this License applicable to that copy. 64 | 65 | 3. Object Code Incorporating Material from Library Header Files. 66 | 67 | The object code form of an Application may incorporate material from 68 | a header file that is part of the Library. You may convey such object 69 | code under terms of your choice, provided that, if the incorporated 70 | material is not limited to numerical parameters, data structure 71 | layouts and accessors, or small macros, inline functions and templates 72 | (ten or fewer lines in length), you do both of the following: 73 | 74 | a) Give prominent notice with each copy of the object code that the 75 | Library is used in it and that the Library and its use are 76 | covered by this License. 77 | 78 | b) Accompany the object code with a copy of the GNU GPL and this license 79 | document. 80 | 81 | 4. Combined Works. 82 | 83 | You may convey a Combined Work under terms of your choice that, 84 | taken together, effectively do not restrict modification of the 85 | portions of the Library contained in the Combined Work and reverse 86 | engineering for debugging such modifications, if you also do each of 87 | the following: 88 | 89 | a) Give prominent notice with each copy of the Combined Work that 90 | the Library is used in it and that the Library and its use are 91 | covered by this License. 92 | 93 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 94 | document. 95 | 96 | c) For a Combined Work that displays copyright notices during 97 | execution, include the copyright notice for the Library among 98 | these notices, as well as a reference directing the user to the 99 | copies of the GNU GPL and this license document. 100 | 101 | d) Do one of the following: 102 | 103 | 0) Convey the Minimal Corresponding Source under the terms of this 104 | License, and the Corresponding Application Code in a form 105 | suitable for, and under terms that permit, the user to 106 | recombine or relink the Application with a modified version of 107 | the Linked Version to produce a modified Combined Work, in the 108 | manner specified by section 6 of the GNU GPL for conveying 109 | Corresponding Source. 110 | 111 | 1) Use a suitable shared library mechanism for linking with the 112 | Library. A suitable mechanism is one that (a) uses at run time 113 | a copy of the Library already present on the user's computer 114 | system, and (b) will operate properly with a modified version 115 | of the Library that is interface-compatible with the Linked 116 | Version. 117 | 118 | e) Provide Installation Information, but only if you would otherwise 119 | be required to provide such information under section 6 of the 120 | GNU GPL, and only to the extent that such information is 121 | necessary to install and execute a modified version of the 122 | Combined Work produced by recombining or relinking the 123 | Application with a modified version of the Linked Version. (If 124 | you use option 4d0, the Installation Information must accompany 125 | the Minimal Corresponding Source and Corresponding Application 126 | Code. If you use option 4d1, you must provide the Installation 127 | Information in the manner specified by section 6 of the GNU GPL 128 | for conveying Corresponding Source.) 129 | 130 | 5. Combined Libraries. 131 | 132 | You may place library facilities that are a work based on the 133 | Library side by side in a single library together with other library 134 | facilities that are not Applications and are not covered by this 135 | License, and convey such a combined library under terms of your 136 | choice, if you do both of the following: 137 | 138 | a) Accompany the combined library with a copy of the same work based 139 | on the Library, uncombined with any other library facilities, 140 | conveyed under the terms of this License. 141 | 142 | b) Give prominent notice with the combined library that part of it 143 | is a work based on the Library, and explaining where to find the 144 | accompanying uncombined form of the same work. 145 | 146 | 6. Revised Versions of the GNU Lesser General Public License. 147 | 148 | The Free Software Foundation may publish revised and/or new versions 149 | of the GNU Lesser General Public License from time to time. Such new 150 | versions will be similar in spirit to the present version, but may 151 | differ in detail to address new problems or concerns. 152 | 153 | Each version is given a distinguishing version number. If the 154 | Library as you received it specifies that a certain numbered version 155 | of the GNU Lesser General Public License "or any later version" 156 | applies to it, you have the option of following the terms and 157 | conditions either of that published version or of any later version 158 | published by the Free Software Foundation. If the Library as you 159 | received it does not specify a version number of the GNU Lesser 160 | General Public License, you may choose any version of the GNU Lesser 161 | General Public License ever published by the Free Software Foundation. 162 | 163 | If the Library as you received it specifies that a proxy can decide 164 | whether future versions of the GNU Lesser General Public License shall 165 | apply, that proxy's public statement of acceptance of any version is 166 | permanent authorization for you to choose that version for the 167 | Library. 168 | 169 | 170 | --------------------------------------------------------------------------------