├── .gitignore
├── shared.hpp
├── cow_ownership_flags
├── thread_unsafe_flag.hpp
├── mutex_flag.hpp
├── seq_cst_atomics_flag.hpp
└── manually_ordered_atomics_flag.hpp
├── copy_on_write_ptr.hpp
├── bench_results
├── thread_unsafe-vs-mutex.txt
├── thread_unsafe-vs-seq_cst_atomics.txt
├── thread_unsafe_cow-vs-shared_ptr.txt
└── thread_unsafe-vs-manually_ordered_atomics.txt
├── README.md
├── bench_vs_shared_ptr.cpp
├── bench_unsafe_vs_other.cpp
└── LICENSE.lesser
/.gitignore:
--------------------------------------------------------------------------------
1 | a.out
2 | *.bin
3 |
--------------------------------------------------------------------------------
/shared.hpp:
--------------------------------------------------------------------------------
1 | /* This file is part of copy_on_write_ptr.
2 |
3 | copy_on_write_ptr is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU Lesser General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | copy_on_write_ptr is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU Lesser General Public License for more details.
12 |
13 | You should have received a copy of the GNU Lesser General Public License
14 | along with copy_on_write_ptr. If not, see . */
15 |
16 | #ifndef SHARED_H
17 | #define SHARED_H
18 |
19 | #include
20 |
21 | // These shared facilities are used by all my copy-on-write benchmarking programs
22 | namespace Shared {
23 |
24 | // Some forward declarations required to use std::chrono's timing functions
25 | using Clock = std::chrono::system_clock;
26 | using Duration = std::chrono::duration;
27 |
28 | // Generic timer for performance measurement purposes
29 | template
31 | DurationType time_it(Callable && operation,
32 | const std::size_t amount) {
33 | std::chrono::time_point start_time, end_time;
34 | start_time = Clock::now();
35 | for(size_t i = 0; i < amount; ++i) {
36 | operation();
37 | }
38 | end_time = Clock::now();
39 | return end_time - start_time;
40 | }
41 |
42 | // Define the data type used by the test, and a typical value of it
43 | using Data = int;
44 | const Data typical_value = 42;
45 |
46 | }
47 |
48 | #endif
49 |
--------------------------------------------------------------------------------
/cow_ownership_flags/thread_unsafe_flag.hpp:
--------------------------------------------------------------------------------
1 | /* This file is part of copy_on_write_ptr.
2 |
3 | copy_on_write_ptr is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU Lesser General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | copy_on_write_ptr is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU Lesser General Public License for more details.
12 |
13 | You should have received a copy of the GNU Lesser General Public License
14 | along with copy_on_write_ptr. If not, see . */
15 |
16 | #ifndef THREAD_UNSAFE_FLAG_H
17 | #define THREAD_UNSAFE_FLAG_H
18 |
19 | #include
20 |
21 | namespace cow_ownership_flags {
22 |
23 | // This implementation of the copy-on-write ownership does not attempt to achieve thread safety
24 | class thread_unsafe_flag {
25 | public:
26 |
27 | // Construct our ownership flag from an initial value
28 | thread_unsafe_flag(bool initially_owned) : m_owned{initially_owned} { }
29 |
30 | // Move-construct the flag from a flag rvalue
31 | thread_unsafe_flag(thread_unsafe_flag && other) : m_owned{other.m_owned} { }
32 |
33 | // There's nothing special about deleting an ownership flag.
34 | ~thread_unsafe_flag() = default;
35 |
36 | // Move-assign the flag. Without thread safety, this is equivalent to move-construction.
37 | thread_unsafe_flag & operator=(thread_unsafe_flag && other) {
38 | set_ownership(other.m_owned);
39 | }
40 |
41 | // Ownership flags are not copyable. Proper CoW semantics would require clearing them upon
42 | // copy, which is at odds with normal copy semantics. It's better to throw a compiler error
43 | // in this case, and let the user write more explicit code.
44 | thread_unsafe_flag(const thread_unsafe_flag &) = delete;
45 | thread_unsafe_flag & operator=(const thread_unsafe_flag &) = delete;
46 |
47 | // Authoritatively mark the active memory block as owned/not owned by the active thread
48 | void set_ownership(bool owned) {
49 | m_owned = owned;
50 | }
51 |
52 | // Acquire ownership of the active memory block, using the provided resource acquisition
53 | // routine, if that's not done already.
54 | // Disregard the possibility that other threads may be doing the same thing.
55 | template
56 | void acquire_ownership_once(Callable && acquire) {
57 | if(!m_owned) {
58 | acquire();
59 | m_owned = true;
60 | }
61 | }
62 |
63 | private:
64 |
65 | bool m_owned;
66 | };
67 |
68 | }
69 |
70 | #endif
71 |
--------------------------------------------------------------------------------
/cow_ownership_flags/mutex_flag.hpp:
--------------------------------------------------------------------------------
1 | /* This file is part of copy_on_write_ptr.
2 |
3 | copy_on_write_ptr is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU Lesser General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | copy_on_write_ptr is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU Lesser General Public License for more details.
12 |
13 | You should have received a copy of the GNU Lesser General Public License
14 | along with copy_on_write_ptr. If not, see . */
15 |
16 | #ifndef MUTEX_FLAG_H
17 | #define MUTEX_FLAG_H
18 |
19 | #include
20 |
21 | namespace cow_ownership_flags {
22 |
23 | // This implementation of the copy-on-write ownership flag uses a mutex to achieve thread safety.
24 | class mutex_flag {
25 | public:
26 |
27 | // Ownership flags may be initialized to a certain value without synchronization, as at
28 | // construction time only one thread has access to the active ownership flag.
29 | mutex_flag(bool initially_owned) : m_owned{initially_owned} { }
30 |
31 | // When we move-construct from an ownership flag rvalue, we may assume that no other thread
32 | // has access to either that rvalue or the active flag, and avoid using synchronization.
33 | mutex_flag(mutex_flag && other) : m_owned{other.m_owned} { }
34 |
35 | // There's nothing special about deleting an ownership flag.
36 | ~mutex_flag() = default;
37 |
38 | // When we move-assign an ownership flag rvalue, no other thread has access to that rvalue,
39 | // so we can access it without read synchronization.
40 | // But the active flag may be shared with other threads, so we need write synchronization.
41 | mutex_flag & operator=(mutex_flag && other) {
42 | set_ownership(other.m_owned);
43 | }
44 |
45 | // Ownership flags are not copyable. Proper CoW semantics would require clearing them upon
46 | // copy, which is at odds with normal copy semantics. It's better to throw a compiler error
47 | // in this case, and let the user write more explicit code.
48 | mutex_flag(const mutex_flag &) = delete;
49 | mutex_flag & operator=(const mutex_flag &) = delete;
50 |
51 | // Authoritatively mark the active memory block as owned/not owned by the active thread
52 | void set_ownership(bool owned) {
53 | std::lock_guard lock(m_ownership_mutex);
54 | m_owned = owned;
55 | }
56 |
57 | // Acquire ownership of the active memory block, using the provided resource acquisition
58 | // routine, if that's not done already. Other threads should block during this process.
59 | template
60 | void acquire_ownership_once(Callable && acquire) {
61 | std::lock_guard lock(m_ownership_mutex);
62 | if(!m_owned) {
63 | acquire();
64 | m_owned = true;
65 | }
66 | }
67 |
68 | private:
69 |
70 | std::mutex m_ownership_mutex;
71 | bool m_owned;
72 | };
73 |
74 | }
75 |
76 | #endif
77 |
--------------------------------------------------------------------------------
/copy_on_write_ptr.hpp:
--------------------------------------------------------------------------------
1 | /* This file is part of copy_on_write_ptr.
2 |
3 | copy_on_write_ptr is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU Lesser General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | copy_on_write_ptr is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU Lesser General Public License for more details.
12 |
13 | You should have received a copy of the GNU Lesser General Public License
14 | along with copy_on_write_ptr. If not, see . */
15 |
16 | #ifndef COW_PTR_H
17 | #define COW_PTR_H
18 |
19 | #include
20 |
21 | // The cow_ptr class implements copy-on-write semantics on top of std::shared_ptr
22 | template
24 | class copy_on_write_ptr {
25 | public:
26 | // === BASIC CLASS LIFECYCLE ===
27 |
28 | // Construct a cow_ptr from a raw pointer, acquire ownership.
29 | copy_on_write_ptr(T * ptr) :
30 | m_payload{ptr},
31 | m_ownership{true}
32 | { }
33 |
34 | // TODO: As a performance optimization, allow direct construction from a shared_ptr
35 |
36 | // Move-construct from a copy_on_write_ptr, acquire ownership.
37 | copy_on_write_ptr(copy_on_write_ptr && cptr) :
38 | m_payload{cptr.m_payload},
39 | m_ownership{true}
40 | { }
41 |
42 | // Copy-construct from a copy_on_write_ptr, DO NOT acquire ownership.
43 | copy_on_write_ptr(const copy_on_write_ptr & cptr) :
44 | m_payload{cptr.m_payload},
45 | m_ownership{false}
46 | { }
47 |
48 | // All our data members can take care of themselves on their own.
49 | ~copy_on_write_ptr() = default;
50 |
51 | // Moving a copy_on_write_ptr transfers ownership of the underlying data : nothing special
52 | copy_on_write_ptr & operator=(copy_on_write_ptr && cptr) = default;
53 |
54 | // Copying a copy_on_write_ptr DOES NOT transfer ownership of the underlying content, so we
55 | // need to reset our ownership bit in this scenario.
56 | copy_on_write_ptr & operator=(const copy_on_write_ptr & cptr) {
57 | m_ownership.set_ownership(false);
58 | m_payload = cptr.m_payload;
59 | }
60 |
61 |
62 | // === DATA ACCESS ===
63 |
64 | // Reading from copy-on-write data does not require ownership.
65 | // CAUTION: Be careful with references to non-const CoW data, as writes may invalidate them.
66 | const T & read() const { return *m_payload; }
67 |
68 | // Writing to copy-on-write data requires ownership, which must be acquired as needed.
69 | void write(const T & value) {
70 | copy_if_not_owner();
71 | *m_payload = value;
72 | }
73 |
74 | void write(T && value) {
75 | copy_if_not_owner();
76 | *m_payload = value;
77 | }
78 |
79 | private:
80 | std::shared_ptr m_payload;
81 | OwnershipFlag m_ownership;
82 |
83 | // If we are not the owner of the payload object, make a private copy of it
84 | void copy_if_not_owner() {
85 | m_ownership.acquire_ownership_once([this](){
86 | m_payload = std::make_shared(*m_payload);
87 | });
88 | }
89 | };
90 |
91 | #endif
92 |
--------------------------------------------------------------------------------
/bench_results/thread_unsafe-vs-mutex.txt:
--------------------------------------------------------------------------------
1 | === MICROBENCHMARK : THREAD-UNSAFE COW POINTER VS MUTEX-PROTECTED VERSION ===
2 |
3 | hadrien@pc-grasland:~/Bureau/Programmation/TestCoW$ g++ -O0 -std=c++11 bench_unsafe_vs_other.cpp -o bench_unsafe_vs_other.bin
4 | [...]
5 |
6 | Creating 100000000 pointers from raw pointers
7 | With a thread-unsafe implementation, this operation takes 10.0881 s
8 | With the tested implementation, it takes 10.5088 s (1.0417x slower)
9 |
10 | Creating AND move-constructing 2500000000 pointers
11 | With a thread-unsafe implementation, this operation takes 381.738 s
12 | With the tested implementation, it takes 403.743 s (1.05764x slower)
13 |
14 | Copy-constructing 1000000000 pointers
15 | With a thread-unsafe implementation, this operation takes 48.0419 s
16 | With the tested implementation, it takes 54.8719 s (1.14217x slower)
17 |
18 | Copy-constructing AND move-assigning 5000000000 pointers
19 | With a thread-unsafe implementation, this operation takes 480.287 s
20 | With the tested implementation, it takes 596.449 s (1.24186x slower)
21 |
22 | Copy-assigning 64000000 pointers
23 | With a thread-unsafe implementation, this operation takes 1.06318 s
24 | With the tested implementation, it takes 2.27327 s (2.13819x slower)
25 |
26 | Reading from 5000000000 pointers
27 | With a thread-unsafe implementation, this operation takes 35.1963 s
28 | With the tested implementation, it takes 35.4005 s (1.0058x slower)
29 |
30 | Performing 1920000000 pointer copies AND cold writes
31 | With a thread-unsafe implementation, this operation takes 603.847 s
32 | With the tested implementation, it takes 696.199 s (1.15294x slower)
33 |
34 | Performing 1920000000 warm pointer writes
35 | With a thread-unsafe implementation, this operation takes 21.2827 s
36 | With the tested implementation, it takes 61.7025 s (2.89919x slower)
37 |
38 |
39 | === RESULTS ANALYSIS ===
40 |
41 | For details on the methodology being used here, please refer to the comparison between thread-unsafe
42 | copy-on-write and raw shared_ptrs.
43 |
44 | Move-construction:
45 |
46 | thread-unsafe creation takes 101ns
47 | mutex-based creation takes 105ns
48 | thread-unsafe creation and move-construction takes 153ns
49 | mutex-based creation and move-construction takes 161ns
50 |
51 | therefore,
52 |
53 | thread-unsafe move-construction takes 52ns
54 | mutex-based move-construction takes 56ns
55 |
56 | hence the later is 1.1x slower
57 |
58 | Move-assignment:
59 |
60 | thread-unsafe copy-construction takes 48.0ns
61 | mutex-based copy-construction takes 54.9ns
62 | thread-unsafe copy-construction and move-assignment takes 96.1ns
63 | mutex-based copy-construction and move-assignment takes 119ns
64 |
65 | therefore,
66 |
67 | thread-unsafe move-assignment takes 48.1ns
68 | mutex-based move-assignment takes 64ns
69 |
70 | hence the later is 1.3x slower
71 |
72 | Cold writes:
73 |
74 | thread-unsafe copy-assignment takes 16.6ns
75 | mutex-based copy-assignment takes 35.5ns
76 | thread-unsafe copy-assignment and cold write takes 315ns
77 | mutex-based copy-assignment and cold write takes 363ns
78 |
79 | therefore,
80 |
81 | thread-unsafe cold write takes 298ns
82 | mutex-based cold write takes 327ns
83 |
84 | hence the later is 1.1x slower
85 |
86 |
87 | === CONCLUSIONS ===
88 |
89 | In terms of elementary operations, before compiler optimization kicks in...
90 | * Creation from a raw pointer is 1.0x slower
91 | * Move-constructing is 1.1x slower
92 | * Copy-constructing is 1.1x slower
93 | * Moving is 1.3x slower
94 | * Copying is 2.1x slower
95 | * Reading is 1.0x slower
96 | * Cold-writing is 1.1x slower
97 | * Warm-writing is 2.9x slower
98 |
99 | The extra overhead upon copy assignment and warm writes is perhaps problematic, so we would like to use a cheaper
100 | synchronization primitive than a mutex there.
101 |
--------------------------------------------------------------------------------
/bench_results/thread_unsafe-vs-seq_cst_atomics.txt:
--------------------------------------------------------------------------------
1 | === MICROBENCHMARK : THREAD-UNSAFE COW POINTER VS SEQUENTIALLY CONSISTENT ATOMICS ===
2 |
3 | hadrien@pc-grasland:~/Bureau/Programmation/TestCoW$ g++ -O0 -std=c++11 bench_unsafe_vs_other.cpp -o bench_unsafe_vs_other.bin
4 | [...]
5 |
6 | Creating 100000000 pointers from raw pointers
7 | With a thread-unsafe implementation, this operation takes 9.82543 s
8 | With the tested implementation, it takes 10.6042 s (1.07926x slower)
9 |
10 | Creating AND move-constructing 2500000000 pointers
11 | With a thread-unsafe implementation, this operation takes 405.151 s
12 | With the tested implementation, it takes 420.437 s (1.03773x slower)
13 |
14 | Copy-constructing 1000000000 pointers
15 | With a thread-unsafe implementation, this operation takes 47.3745 s
16 | With the tested implementation, it takes 56.2335 s (1.187x slower)
17 |
18 | Copy-constructing AND move-assigning 5000000000 pointers
19 | With a thread-unsafe implementation, this operation takes 460.931 s
20 | With the tested implementation, it takes 655.856 s (1.42289x slower)
21 |
22 | Copy-assigning 64000000 pointers
23 | With a thread-unsafe implementation, this operation takes 1.03478 s
24 | With the tested implementation, it takes 2.62275 s (2.5346x slower)
25 |
26 | Reading from 5000000000 pointers
27 | With a thread-unsafe implementation, this operation takes 35.1397 s
28 | With the tested implementation, it takes 35.1514 s (1.00033x slower)
29 |
30 | Performing 1920000000 pointer copies AND cold writes
31 | With a thread-unsafe implementation, this operation takes 626.467 s
32 | With the tested implementation, it takes 751.766 s (1.20001x slower)
33 |
34 | Performing 1920000000 warm pointer writes
35 | With a thread-unsafe implementation, this operation takes 21.1624 s
36 | With the tested implementation, it takes 58.2328 s (2.75171x slower)
37 |
38 |
39 | === RESULTS ANALYSIS ===
40 |
41 | For details on the methodology being used here, please refer to the comparison between thread-unsafe
42 | copy-on-write and raw shared_ptrs.
43 |
44 | Move-construction:
45 |
46 | thread-unsafe creation takes 98.3ns
47 | seq_cst-based creation takes 106ns
48 | thread-unsafe creation and move-construction takes 162ns
49 | seq_cst-based creation and move-construction takes 168ns
50 |
51 | therefore,
52 |
53 | thread-unsafe move-construction takes 64ns
54 | seq_cst-based move-construction takes 62ns
55 |
56 | hence the later is 1.0x slower
57 |
58 | Move-assignment:
59 |
60 | thread-unsafe copy-construction takes 47.4ns
61 | seq_cst-based copy-construction takes 56.3ns
62 | thread-unsafe copy-construction and move-assignment takes 92.2ns
63 | seq_cst-based copy-construction and move-assignment takes 132ns
64 |
65 | therefore,
66 |
67 | thread-unsafe move-assignment takes 45ns
68 | seq_cst-based move-assignment takes 75ns
69 |
70 | hence the later is 1.7x slower
71 |
72 | Cold writes:
73 |
74 | thread-unsafe copy-assignment takes 16.2ns
75 | seq_cst-based copy-assignment takes 41.0ns
76 | thread-unsafe copy-assignment and cold write takes 326ns
77 | seq_cst-based copy-assignment and cold write takes 392ns
78 |
79 | therefore,
80 |
81 | thread-unsafe cold write takes 310ns
82 | seq_cst-based cold write takes 351ns
83 |
84 | hence the later is 1.1x slower
85 |
86 |
87 | === CONCLUSIONS ===
88 |
89 | In terms of elementary operations, before compiler optimization kicks in...
90 | * Creation from a raw pointer is 1.1x slower => Comparable to mutex
91 | * Move-constructing is 1.0x slower => Comparable to mutex
92 | * Copy-constructing is 1.2x slower => Comparable to mutex
93 | * Moving is 1.7x slower => Slightly slower (1.3x for mutex)
94 | * Copying is 2.5x slower => Slightly slower (2.3x for mutex)
95 | * Reading is 1.0x slower => Comparable to mutex
96 | * Cold-writing is 1.1x slower => Comparable to mutex
97 | * Warm-writing is 2.8x slower => Comparable to mutex
98 |
99 | Sequentially consistent atomics bring no performance benefits with respect to mutexes, and are in some cases slightly
100 | slower. They do not appear to be worth the massive code complexity that they bring in this use case.
101 |
--------------------------------------------------------------------------------
/bench_results/thread_unsafe_cow-vs-shared_ptr.txt:
--------------------------------------------------------------------------------
1 | === MICROBENCHMARK : THREAD-UNSAFE COW POINTER VS RAW SHARED_PTR ===
2 |
3 | hadrien@pc-grasland:~/Bureau/Programmation/TestCoW$ g++ -O0 -std=c++11 bench_vs_shared_ptr.cpp -o bench_vs_shared_ptr.bin && ./bench_vs_shared_ptr.bin
4 | [...]
5 |
6 | Creating 100000000 pointers from raw pointers
7 | With a raw shared_ptr, this operation takes 9.05853 s
8 | With cow_ptr, it takes 9.70459 s (1.07132x slower)
9 |
10 | Creating AND move-constructing 2500000000 pointers
11 | With a raw shared_ptr, this operation takes 285.148 s
12 | With cow_ptr, it takes 404.977 s (1.42023x slower)
13 |
14 | Copy-constructing 1000000000 pointers
15 | With a raw shared_ptr, this operation takes 48.1197 s
16 | With cow_ptr, it takes 52.9928 s (1.10127x slower)
17 |
18 | Copy-constructing AND move-assigning 5000000000 pointers
19 | With a raw shared_ptr, this operation takes 418.364 s
20 | With cow_ptr, it takes 491.536 s (1.1749x slower)
21 |
22 | Copy-assigning 64000000 pointers
23 | With a raw shared_ptr, this operation takes 0.750994 s
24 | With cow_ptr, it takes 1.04189 s (1.38734x slower)
25 |
26 | Reading from 5000000000 pointers
27 | With a raw shared_ptr, this operation takes 21.9751 s
28 | With cow_ptr, it takes 35.1145 s (1.59792x slower)
29 |
30 | Performing 1920000000 pointer copies AND cold writes
31 | With a raw shared_ptr, this operation takes 26.1059 s
32 | With cow_ptr, it takes 619.9 s (23.7456x slower)
33 |
34 | Performing 1920000000 warm pointer writes
35 | With a raw shared_ptr, this operation takes 8.97253 s
36 | With cow_ptr, it takes 21.1923 s (2.36191x slower)
37 |
38 |
39 | === RESULTS ANALYSIS ===
40 |
41 | When interpreting the results of this test, one should be mindful of three things:
42 | - The amount of operations is not constant, but optimized per-test to get a measurement uncertainty of a few percents
43 | - The benchmark is built at -O0 optimization, and may not follow -O3 performance (which cannot be microbenchmarked)
44 | - Some operations are composite, i.e. made of multiple inner operations that must be separated.
45 |
46 | Moves are an example of a composite operation: it is quite hard to build a stateless and lightweight benchmark which
47 | measures the performance of moving a piece of data back and forth between two locations. Instead, what is done is to
48 | measure the overhead of creating + moving a piece of data, then substract the overhead of data creation alone from it.
49 |
50 | This is how it is done for move-construction...
51 |
52 | shared_ptr creation takes 90.6ns
53 | copy_on_write_ptr creation takes 97.0ns
54 | shared_ptr creation and move-construction takes 114ns
55 | copy_on_write_ptr creation and move-construction takes 162ns
56 |
57 | therefore,
58 |
59 | shared_ptr move-construction takes 23ns
60 | copy_on_write_ptr move-construction takes 65ns
61 |
62 | hence the later is 2.8x slower
63 |
64 | ...and for move-assignment:
65 |
66 | shared_ptr copy-construction takes 48.1ns
67 | copy_on_write_ptr copy-construction takes 53.0ns
68 | shared_ptr copy-construction and move-assignment takes 83.7ns
69 | copy_on_write_ptr copy-construction and move-assignment takes 98.3ns
70 |
71 | therefore,
72 |
73 | shared_ptr move-assignment takes 35.6ns
74 | copy_on_write_ptr move-assignment takes 45.3ns
75 |
76 | hence the later is 1.3x slower
77 |
78 | Finally, by the very nature of the copy-on-write abstraction, cold writes may also only be measured in a composite way:
79 |
80 | shared_ptr copy-assignment takes 11.7ns
81 | copy_on_write_ptr copy-assignment takes 16.3ns
82 | shared_ptr copy-assignment and cold write takes 13.6ns
83 | copy_on_write_ptr copy-assignment and cold write takes 323ns
84 |
85 | therefore,
86 |
87 | shared_ptr cold write takes 1.9ns
88 | copy_on_write_ptr cold write takes 307ns
89 |
90 | hence the later is 161x slower
91 |
92 |
93 | === CONCLUSIONS ===
94 |
95 | In terms of elementary operations, before compiler optimization kicks in...
96 | * Creation from a raw pointer is 1.1x slower
97 | * Move-constructing is 2.8x slower
98 | * Copy-constructing is 1.1x slower
99 | * Moving is 1.3x slower
100 | * Copying is 1.4x slower
101 | * Reading is 1.6x slower
102 | * Cold-writing is 161x slower => EXPECTED: Dynamic memory allocation overhead.
103 | * Warm-writing is 2.4x slower
104 |
105 | This sets some expectations on how much performance may be expected from thread-safe copy-on-write implementations,
106 | when measured in the same way. It also highlights the well-known fact that for scenarios where writes are infrequent,
107 | copy-on-write of large objects remains quite efficient as a memory usage optimization.
108 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # About the `copy_on_write_ptr` project
2 |
3 | ## Introduction
4 |
5 | The idea behind `copy_on_write_ptr` is to provide users with a relatively straightforward way to use `std::shared_ptr`
6 | with copy-on-write (CoW) semantics.
7 |
8 | In CoW semantics, large pieces of data may be cheaply "copied" by reference as long as they are not written to, whereas
9 | writing triggers a lazy deep copy of the underlying data block. Effectively, copy-on-write allows a client to have
10 | something which offers the memory efficiency benefits of an `std::shared_ptr`, but can gracefully degrade into
11 | an `std::unique_ptr` to a private mutable data block as needed.
12 |
13 | There is a widespread belief in the C++ community that the innovations brought forth by C++11, in particular move
14 | semantics, have rendered copy-on-write obsolete. This belief has led, for example, a similar effort to be rejected by
15 | the Boost community. However, this is not entirely accurate. C++11 has not rendered copy-on-write obsolete, it has
16 | simply proposed a better solution to a *subset* of the problems which required CoW usage in the past.
17 |
18 | Copy-on-write semantics remain appropriate in scenarios where...
19 |
20 | - Multiple threads need access to a large piece of data, behaving *as if* they owned a private copy of it.
21 | - It is not known in advance whether threads will need to mutate their "cheap copy" of the data.
22 | - The probability of data mutation is low enough for the memory efficiency gains to offset the CPU efficiency losses.
23 |
24 |
25 | ## Copy-on-write in a single-threaded world
26 |
27 | Writing to copy-on-write data relies on an underlying notion of data ownership:
28 |
29 | - If the active pointer has ownership of the data block it points to, it can perform the write directly
30 | - If it does not have ownership, it must create a new data block (which it will own) and write there
31 |
32 | In a single-threaded world, this may be implemented simply using a boolean dirty flag which tells whether a
33 | `copy_on_write_ptr` owns the data it points to. This flag is tested on every write, and a lazy copy will occur when a
34 | write is attempted as this flag is `false`, setting the flag to `true` along the way.
35 |
36 |
37 | ## Copy-on-write in a multithreaded world
38 |
39 | If thread safety is desired, then copy-on-write gets more complicated, because we need to handle two data races:
40 |
41 | 1. Two threads attempt to write new values into CoW data at the same time, potentially causing multiple lazy copies.
42 | 2. A thread attempts to assign a new value to the pointer while another is performing a write to the contained data.
43 |
44 | Another data race that cannot be avoided (in a library-based copy-on-write implementation at least) is that writing to
45 | CoW data potentially invalidates the address of that data. Therefore, client threads should be very careful when holding
46 | long-lived references to CoW data that may be asynchronously written to. This is a general concern when using container
47 | libraries, though, so threaded code authors should be aware of it.
48 |
49 | It should be noted that both of the data races above generally indicate an error within the client code, thus opting not
50 | to handle them would be a reasonable option. But if they are to be handled well, thread synchronization must be used.
51 |
52 |
53 | ## Exploring the design tradeoff
54 |
55 | To explore the design space for copy-on-write implementations, I decided to decouple the data ownership handling
56 | mechanism from the high-level CoW interface that is provided by `copy_on_write_ptr`. In this repository, you will find
57 | multiple implementations of this mechanism:
58 |
59 | - A thread-unsafe implementation using a simple boolean flag
60 | - An implementation using mutex synchronization to prevent concurrent ownership flag assignment and lazy copies
61 | - An implementation using atomics-based synchronization instead of mutexes, at a cost of some design complexity
62 | - An implementation using explicit memory ordering to try to accelerate atomics, at the cost of further complexity
63 |
64 | I initially tried to use `std::once_flag` as a copy-on-write ownership flag implementation, however its non-readable,
65 | non-writable, non-moveable and non-copyable semantics turned out to be too limiting for my needs.
66 |
67 | These implementations may easily be compared from a performance and design complexity point of view: the thread-unsafe
68 | implementation can serve as a baseline for the best performance that one may expect from copy-on-write semantics, under
69 | disciplined single-threaded use, whereas the synchronized implementations represent different points on the thread-safe
70 | design continuum between maximal performance and minimal design complexity.
71 |
72 | You will find the results of this comparison in the `bench_results/` subdirectory.
73 |
--------------------------------------------------------------------------------
/bench_results/thread_unsafe-vs-manually_ordered_atomics.txt:
--------------------------------------------------------------------------------
1 | === MICROBENCHMARK : THREAD-UNSAFE COW POINTER VS MANUALLY ORDERED ATOMICS ===
2 |
3 | hadrien@pc-grasland:~/Bureau/Programmation/TestCoW$ g++ -O0 -std=c++11 bench_unsafe_vs_other.cpp -o bench_unsafe_vs_other.bin
4 | [...]
5 |
6 | Creating 100000000 pointers from raw pointers
7 | With a thread-unsafe implementation, this operation takes 9.60142 s
8 | With the tested implementation, it takes 10.4198 s (1.08524x slower)
9 |
10 | Creating AND move-constructing 2500000000 pointers
11 | With a thread-unsafe implementation, this operation takes 394.183 s
12 | With the tested implementation, it takes 401.842 s (1.01943x slower)
13 |
14 | Copy-constructing 1000000000 pointers
15 | With a thread-unsafe implementation, this operation takes 48.1861 s
16 | With the tested implementation, it takes 56.815 s (1.17907x slower)
17 |
18 | Copy-constructing AND move-assigning 5000000000 pointers
19 | With a thread-unsafe implementation, this operation takes 478.704 s
20 | With the tested implementation, it takes 600.547 s (1.25453x slower)
21 |
22 | Copy-assigning 64000000 pointers
23 | With a thread-unsafe implementation, this operation takes 1.05171 s
24 | With the tested implementation, it takes 2.07142 s (1.96958x slower)
25 |
26 | Reading from 5000000000 pointers
27 | With a thread-unsafe implementation, this operation takes 35.1065 s
28 | With the tested implementation, it takes 35.2061 s (1.00284x slower)
29 |
30 | Performing 1920000000 pointer copies AND cold writes
31 | With a thread-unsafe implementation, this operation takes 604.841 s
32 | With the tested implementation, it takes 698.893 s (1.1555x slower)
33 |
34 | Performing 1920000000 warm pointer writes
35 | With a thread-unsafe implementation, this operation takes 21.4445 s
36 | With the tested implementation, it takes 41.5779 s (1.93886x slower)
37 |
38 |
39 | === RESULTS ANALYSIS ===
40 |
41 | For details on the methodology being used here, please refer to the comparison between thread-unsafe
42 | copy-on-write and raw shared_ptrs.
43 |
44 | Move-construction:
45 |
46 | thread-unsafe creation takes 96.0ns
47 | atomics-based creation takes 104ns
48 | thread-unsafe creation and move-construction takes 158ns
49 | atomics-based creation and move-construction takes 161ns
50 |
51 | therefore,
52 |
53 | thread-unsafe move-construction takes 62ns
54 | atomics-based move-construction takes 57ns
55 |
56 | hence the later is 0.9x slower
57 |
58 | Move-assignment:
59 |
60 | thread-unsafe copy-construction takes 48.2ns
61 | atomics-based copy-construction takes 56.8ns
62 | thread-unsafe copy-construction and move-assignment takes 95.7ns
63 | atomics-based copy-construction and move-assignment takes 120ns
64 |
65 | therefore,
66 |
67 | thread-unsafe move-assignment takes 47.5ns
68 | atomics-based move-assignment takes 63ns
69 |
70 | hence the later is 1.3x slower
71 |
72 | Cold writes:
73 |
74 | thread-unsafe copy-assignment takes 16.4ns
75 | atomics-based copy-assignment takes 32.4ns
76 | thread-unsafe copy-assignment and cold write takes 315ns
77 | atomics-based copy-assignment and cold write takes 364ns
78 |
79 | therefore,
80 |
81 | thread-unsafe cold write takes 299ns
82 | atomics-based cold write takes 332ns
83 |
84 | hence the later is 1.1x slower
85 |
86 |
87 | === CONCLUSIONS ===
88 |
89 | In terms of elementary operations, before compiler optimization kicks in...
90 | * Creation from a raw pointer is 1.1x slower => Comparable to mutex
91 | * Move-constructing is 0.9x slower => Slightly faster (1.1x for mutex)
92 | * Copy-constructing is 1.2x slower => Comparable to mutex
93 | * Moving is 1.3x slower => Comparable to mutex
94 | * Copying is 2.0x slower => Slightly faster (2.3x for mutex)
95 | * Reading is 1.0x slower => Comparable to mutex
96 | * Cold-writing is 1.1x slower => Comparable to mutex
97 | * Warm-writing is 1.9x slower => Much faster (2.9x for mutex)
98 |
99 | Unlike sequentially consistent atomics, manually ordered atomics emerge as an attractive alternative to mutexes.
100 | They are always at least as fast as mutexes, and exhibit a nice speed advantage in the two areas where mutexes shine
101 | least, copy assignment (which requires synchronization) and warm writes (which are the mutex' weakest point).
102 |
103 | Whether they are worth it or not depends on one's hunger for speed: atomics are tricky, manually ordering them is
104 | trickier, so this code is much more likely to exhibit bugs, and to gain some across maintenance years. However, I would
105 | say that this primitive might convince a copy-on-write nonbeliever that synchronization across threads can be done
106 | reasonably cheaply.
107 |
--------------------------------------------------------------------------------
/cow_ownership_flags/seq_cst_atomics_flag.hpp:
--------------------------------------------------------------------------------
1 | /* This file is part of copy_on_write_ptr.
2 |
3 | copy_on_write_ptr is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU Lesser General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | copy_on_write_ptr is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU Lesser General Public License for more details.
12 |
13 | You should have received a copy of the GNU Lesser General Public License
14 | along with copy_on_write_ptr. If not, see . */
15 |
16 | #ifndef SEQ_CST_ATOMICS_FLAG_H
17 | #define SEQ_CST_ATOMICS_FLAG_H
18 |
19 | #include
20 | #include
21 |
22 | namespace cow_ownership_flags {
23 |
24 | // This implementation of the copy-on-write ownership flag uses sequentially consistent atomics
25 | // to achieve thread safety.
26 | class seq_cst_atomics_flag {
27 | public:
28 |
29 | // Ownership flags may be initialized to a certain value without synchronization, as at
30 | // construction time only one thread has access to the active ownership flag.
31 | seq_cst_atomics_flag(bool initially_owned) :
32 | m_ownership_status{to_ownership_status(initially_owned)}
33 | { }
34 |
35 |
36 | // When we move-construct from an ownership flag rvalue, we may assume that no other thread
37 | // has access to either that rvalue or the active flag, and avoid using synchronization.
38 | seq_cst_atomics_flag(seq_cst_atomics_flag && other) :
39 | m_ownership_status{other.unsynchronized_status()}
40 | { }
41 |
42 |
43 | // There's nothing special about deleting an ownership flag.
44 | ~seq_cst_atomics_flag() = default;
45 |
46 |
47 | // When we move-assign an ownership flag rvalue, no other thread has access to that rvalue,
48 | // so we can access it without read synchronization.
49 | // But the active flag may be shared with other threads, so we need write synchronization.
50 | seq_cst_atomics_flag & operator=(seq_cst_atomics_flag && other) {
51 | set_ownership_status(other.unsynchronized_status());
52 | }
53 |
54 |
55 | // Ownership flags are not copyable. Proper CoW semantics would require clearing them upon
56 | // copy, which is at odds with normal copy semantics. It's better to throw a compiler error
57 | // in this case, and let the user write more explicit code.
58 | seq_cst_atomics_flag(const seq_cst_atomics_flag &) = delete;
59 | seq_cst_atomics_flag & operator=(const seq_cst_atomics_flag &) = delete;
60 |
61 |
62 | // Authoritatively mark the active memory block as owned/not owned by the active thread.
63 | void set_ownership(bool owned) {
64 | set_ownership_status(to_ownership_status(owned));
65 | }
66 |
67 |
68 | // Acquire ownership of the active memory block, using the provided resource acquisition
69 | // routine, if that's not done already. Other threads should block during this process.
70 | template
71 | void acquire_ownership_once(Callable && acquisition_routine) {
72 | // Try to switch the ownership status from NotOwner to AcquiringOwnership
73 | // and tell previous ownership status
74 | OwnershipStatusType previous_ownership = NotOwner;
75 | m_ownership_status.compare_exchange_strong(previous_ownership, AcquiringOwnership);
76 |
77 | // Act according to the previous ownership status
78 | switch(previous_ownership) {
79 | case NotOwner: // Acquire resource ownership
80 | acquisition_routine();
81 | m_ownership_status.store(Owner);
82 | break;
83 |
84 | case AcquiringOwnership: // Wait for ownership acquisition
85 | while(m_ownership_status.load() != Owner);
86 | break;
87 |
88 | case Owner: // Nothing to do, we already own the resource
89 | break;
90 | }
91 | }
92 |
93 |
94 | private:
95 |
96 | using OwnershipStatusType = std::uint_fast8_t;
97 | enum OwnershipStatus : OwnershipStatusType { NotOwner, AcquiringOwnership, Owner };
98 | std::atomic m_ownership_status;
99 |
100 | static OwnershipStatusType to_ownership_status(bool is_owned) {
101 | return (is_owned ? Owner : NotOwner);
102 | }
103 |
104 | OwnershipStatusType unsynchronized_status() {
105 | return m_ownership_status.load(std::memory_order_relaxed);
106 | }
107 |
108 | void set_ownership_status(const OwnershipStatusType desired_ownership) {
109 | OwnershipStatusType current_ownership = m_ownership_status.load();
110 |
111 | do {
112 | // Wait for any resource ownership acquisition operation to complete
113 | while(current_ownership == AcquiringOwnership) {
114 | current_ownership = m_ownership_status.load();
115 | }
116 |
117 | // Once that is done, try to swap in the new resource ownership status
118 | } while(!m_ownership_status.compare_exchange_weak(current_ownership, desired_ownership));
119 | }
120 |
121 | };
122 |
123 | }
124 |
125 | #endif
126 |
--------------------------------------------------------------------------------
/cow_ownership_flags/manually_ordered_atomics_flag.hpp:
--------------------------------------------------------------------------------
1 | /* This file is part of copy_on_write_ptr.
2 |
3 | copy_on_write_ptr is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU Lesser General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | copy_on_write_ptr is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU Lesser General Public License for more details.
12 |
13 | You should have received a copy of the GNU Lesser General Public License
14 | along with copy_on_write_ptr. If not, see . */
15 |
16 | #ifndef MANUALLY_ORDERED_ATOMICS_FLAG_H
17 | #define MANUALLY_ORDERED_ATOMICS_FLAG_H
18 |
19 | #include
20 | #include
21 |
22 | namespace cow_ownership_flags {
23 |
24 | // This implementation of the copy-on-write ownership flag uses manually ordered atomics to
25 | // achieve thread safety.
26 | class manually_ordered_atomics_flag {
27 | public:
28 |
29 | // Ownership flags may be initialized to a certain value without synchronization, as at
30 | // construction time only one thread has access to the active ownership flag.
31 | manually_ordered_atomics_flag(bool initially_owned) :
32 | m_ownership_status{to_ownership_status(initially_owned)}
33 | { }
34 |
35 |
36 | // When we move-construct from an ownership flag rvalue, we may assume that no other thread
37 | // has access to either that rvalue or the active flag, and avoid using synchronization.
38 | manually_ordered_atomics_flag(manually_ordered_atomics_flag && other) :
39 | m_ownership_status{other.unsynchronized_status()}
40 | { }
41 |
42 |
43 | // There's nothing special about deleting an ownership flag.
44 | ~manually_ordered_atomics_flag() = default;
45 |
46 |
47 | // When we move-assign an ownership flag rvalue, no other thread has access to that rvalue,
48 | // so we can access it without read synchronization.
49 | // But the active flag may be shared with other threads, so we need write synchronization.
50 | manually_ordered_atomics_flag & operator=(manually_ordered_atomics_flag && other) {
51 | set_ownership_status(other.unsynchronized_status());
52 | }
53 |
54 |
55 | // Ownership flags are not copyable. Proper CoW semantics would require clearing them upon
56 | // copy, which is at odds with normal copy semantics. It's better to throw a compiler error
57 | // in this case, and let the user write more explicit code.
58 | manually_ordered_atomics_flag(const manually_ordered_atomics_flag &) = delete;
59 | manually_ordered_atomics_flag & operator=(const manually_ordered_atomics_flag &) = delete;
60 |
61 |
62 | // Authoritatively mark the active memory block as owned/not owned by the active thread.
63 | void set_ownership(bool owned) {
64 | set_ownership_status(to_ownership_status(owned));
65 | }
66 |
67 |
68 | // Acquire ownership of the active memory block, using the provided resource acquisition
69 | // routine, if that's not done already. Other threads should block during this process.
70 | template
71 | void acquire_ownership_once(Callable && acquisition_routine) {
72 | // Try to switch the ownership status from NotOwner to AcquiringOwnership
73 | // and tell previous ownership status
74 | OwnershipStatusType previous_ownership = NotOwner;
75 | m_ownership_status.compare_exchange_strong(previous_ownership,
76 | AcquiringOwnership,
77 | std::memory_order_acq_rel,
78 | std::memory_order_acquire);
79 |
80 | // Act according to the previous ownership status
81 | switch(previous_ownership) {
82 | case NotOwner: // Acquire resource ownership
83 | acquisition_routine();
84 | m_ownership_status.store(Owner,
85 | std::memory_order_release);
86 | break;
87 |
88 | case AcquiringOwnership: // Wait for ownership acquisition
89 | while(m_ownership_status.load(std::memory_order_acquire) != Owner);
90 | break;
91 |
92 | case Owner: // Nothing to do, we already own the resource
93 | break;
94 | }
95 | }
96 |
97 |
98 | private:
99 |
100 | using OwnershipStatusType = std::uint_fast8_t;
101 | enum OwnershipStatus : OwnershipStatusType { NotOwner, AcquiringOwnership, Owner };
102 | std::atomic m_ownership_status;
103 |
104 | static OwnershipStatusType to_ownership_status(bool is_owned) {
105 | return (is_owned ? Owner : NotOwner);
106 | }
107 |
108 | OwnershipStatusType unsynchronized_status() {
109 | return m_ownership_status.load(std::memory_order_relaxed);
110 | }
111 |
112 | void set_ownership_status(const OwnershipStatusType desired_ownership) {
113 | OwnershipStatusType current_ownership = m_ownership_status.load(std::memory_order_consume);
114 |
115 | do {
116 | // Wait for any resource ownership acquisition operation to complete
117 | while(current_ownership == AcquiringOwnership) {
118 | current_ownership = m_ownership_status.load(std::memory_order_consume);
119 | }
120 |
121 | // Once that is done, try to swap in the new resource ownership status
122 | } while(!m_ownership_status.compare_exchange_weak(current_ownership,
123 | desired_ownership,
124 | std::memory_order_acq_rel,
125 | std::memory_order_consume));
126 | }
127 |
128 | };
129 |
130 | }
131 |
132 | #endif
133 |
--------------------------------------------------------------------------------
/bench_vs_shared_ptr.cpp:
--------------------------------------------------------------------------------
1 | /* This file is part of copy_on_write_ptr.
2 |
3 | copy_on_write_ptr is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU Lesser General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | copy_on_write_ptr is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU Lesser General Public License for more details.
12 |
13 | You should have received a copy of the GNU Lesser General Public License
14 | along with copy_on_write_ptr. If not, see . */
15 |
16 | #include
17 | #include
18 |
19 | #include "copy_on_write_ptr.hpp"
20 | #include "cow_ownership_flags/thread_unsafe_flag.hpp"
21 | #include "shared.hpp"
22 |
23 | // === FORWARD DECLARATIONS ===
24 |
25 | // Import shared definitions
26 | using namespace Shared;
27 |
28 | // Specialization of time_it for my comparison purposes
29 | template
31 | void compare_it(Callable1 && shptr_operation,
32 | Callable2 && cowptr_operation,
33 | const std::size_t amount) {
34 | const auto shptr_duration = Shared::time_it(shptr_operation, amount);
35 | std::cout << "With a raw shared_ptr, this operation takes "
36 | << shptr_duration.count() << " s"
37 | << std::endl;
38 |
39 | const auto cowptr_duration = Shared::time_it(cowptr_operation, amount);
40 | std::cout << "With cow_ptr, it takes "
41 | << cowptr_duration.count() << " s ("
42 | << cowptr_duration.count() / shptr_duration.count() << "x slower)"
43 | << std::endl;
44 | }
45 |
46 | // === PERFORMANCE TEST BODY ===
47 |
48 | int main() {
49 |
50 | // === PART 0 : TEST-WIDE DEFINITIONS ===
51 |
52 | // Define our smart pointer types
53 | using SharedPointer = std::shared_ptr;
54 | using COWPointer = copy_on_write_ptr;
55 |
56 | // Say hi :)
57 | std::cout << std::endl << "=== Microbenchmarking cow_ptr ===" << std::endl;
58 |
59 | // === PART 1 : CREATION FROM RAW POINTER ===
60 |
61 | const size_t creation_amount = 1000 * 1000 * 100;
62 | std::cout << std::endl << "Creating " << creation_amount << " pointers from raw pointers" << std::endl;
63 | {
64 | compare_it(
65 | [&](){
66 | SharedPointer ptr{new Data{typical_value}};
67 | },
68 | [&](){
69 | COWPointer ptr{new Data{typical_value}};
70 | },
71 | creation_amount
72 | );
73 | }
74 |
75 | // === PART 2 : CREATION + MOVE-CONSTRUCTION === (NOTE: Cannot test move construction alone easily)
76 |
77 | const size_t move_amount = 25 * creation_amount;
78 | std::cout << std::endl << "Creating AND move-constructing " << move_amount << " pointers" << std::endl;
79 | {
80 | compare_it(
81 | [&](){
82 | SharedPointer source{new Data{typical_value}};
83 | const SharedPointer dest{std::move(source)};
84 | },
85 | [&](){
86 | COWPointer source{new Data{typical_value}};
87 | const COWPointer dest{std::move(source)};
88 | },
89 | move_amount
90 | );
91 | }
92 |
93 | // === PART 3 : COPY CONSTRUCTION ===
94 |
95 | const size_t copy_amount = 1000 * 1000 * 1000;
96 | std::cout << std::endl << "Copy-constructing " << copy_amount << " pointers" << std::endl;
97 | {
98 | const SharedPointer source_shptr{std::make_shared(typical_value)};
99 | const COWPointer source_cowptr{new Data{typical_value}};
100 |
101 | compare_it(
102 | [&](){
103 | SharedPointer copy{source_shptr};
104 | },
105 | [&](){
106 | COWPointer copy{source_cowptr};
107 | },
108 | copy_amount
109 | );
110 | }
111 |
112 | // === PART 4 : COPY CONSTRUCTION + MOVE-ASSIGNMENT === (NOTE: Cannot test move assignment alone easily)
113 |
114 | const size_t copy_move_amount = 5 * copy_amount;
115 | std::cout << std::endl << "Copy-constructing AND move-assigning " << copy_move_amount << " pointers" << std::endl;
116 | {
117 | const SharedPointer source_shptr{std::make_shared(typical_value)};
118 | const COWPointer source_cowptr{new Data{typical_value}};
119 |
120 | SharedPointer dest_shptr{source_shptr};
121 | COWPointer dest_cowptr{source_cowptr};
122 |
123 | compare_it(
124 | [&](){
125 | SharedPointer copy{source_shptr};
126 | dest_shptr = std::move(copy);
127 | },
128 | [&](){
129 | COWPointer copy{source_cowptr};
130 | dest_cowptr = std::move(copy);
131 | },
132 | copy_move_amount
133 | );
134 | }
135 |
136 | // === PART 5 : COPY ASSIGNMENT ===
137 |
138 | const size_t copy_assign_amount = 1000 * 1000 * 64;
139 | std::cout << std::endl << "Copy-assigning " << copy_assign_amount << " pointers" << std::endl;
140 | {
141 | const SharedPointer source_shptr{std::make_shared(typical_value)};
142 | const COWPointer source_cowptr{new Data{typical_value}};
143 |
144 | SharedPointer dest_shptr{source_shptr};
145 | COWPointer dest_cowptr{source_cowptr};
146 |
147 | compare_it(
148 | [&](){
149 | dest_shptr = source_shptr;
150 | },
151 | [&](){
152 | dest_cowptr = source_cowptr;
153 | },
154 | copy_assign_amount
155 | );
156 | }
157 |
158 | // === PART 6 : READ DATA ===
159 |
160 | const size_t read_amount = 1000ULL * 1000ULL * 1000ULL * 5ULL;
161 | std::cout << std::endl << "Reading from " << read_amount << " pointers" << std::endl;
162 | {
163 | const SharedPointer source_shptr{std::make_shared(typical_value)};
164 | const COWPointer source_cowptr{new Data{typical_value}};
165 |
166 | compare_it(
167 | [&](){
168 | const Data & read = *source_shptr;
169 | },
170 | [&](){
171 | const Data & read = source_cowptr.read();
172 | },
173 | read_amount
174 | );
175 | }
176 |
177 | // === PART 7 : COPY ASSIGNMENT + COLD WRITES === (NOTE: A pure cold write would require breaking encapsulation)
178 |
179 | const size_t cold_write_amount = 30 * copy_assign_amount;
180 | std::cout << std::endl << "Performing " << cold_write_amount << " pointer copies AND cold writes" << std::endl;
181 | {
182 | const SharedPointer source_shptr{std::make_shared(typical_value)};
183 | const COWPointer source_cowptr{new Data{typical_value}};
184 |
185 | SharedPointer dest_shptr{source_shptr};
186 | COWPointer dest_cowptr{source_cowptr};
187 |
188 | compare_it(
189 | [&](){
190 | dest_shptr = source_shptr;
191 | *dest_shptr = typical_value;
192 | },
193 | [&](){
194 | dest_cowptr = source_cowptr;
195 | dest_cowptr.write(typical_value);
196 | },
197 | cold_write_amount
198 | );
199 | }
200 |
201 | // === PART 8 : WARM WRITES ===
202 |
203 | const size_t warm_write_amount = cold_write_amount;
204 | std::cout << std::endl << "Performing " << warm_write_amount << " warm pointer writes" << std::endl;
205 | {
206 | SharedPointer shptr{std::make_shared(typical_value)};
207 | COWPointer cowptr{new Data{typical_value}};
208 |
209 | compare_it(
210 | [&](){
211 | *shptr = typical_value;
212 | },
213 | [&](){
214 | cowptr.write(typical_value);
215 | },
216 | warm_write_amount
217 | );
218 | }
219 |
220 | // === TEST FINALIZATION ===
221 |
222 | std::cout << std::endl;
223 | return 0;
224 |
225 | }
226 |
--------------------------------------------------------------------------------
/bench_unsafe_vs_other.cpp:
--------------------------------------------------------------------------------
1 | /* This file is part of copy_on_write_ptr.
2 |
3 | copy_on_write_ptr is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU Lesser General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | copy_on_write_ptr is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU Lesser General Public License for more details.
12 |
13 | You should have received a copy of the GNU Lesser General Public License
14 | along with copy_on_write_ptr. If not, see . */
15 |
16 | #include
17 |
18 | #include "copy_on_write_ptr.hpp"
19 | #include "cow_ownership_flags/thread_unsafe_flag.hpp"
20 | #include "cow_ownership_flags/manually_ordered_atomics_flag.hpp"
21 | #include "shared.hpp"
22 |
23 | // === FORWARD DECLARATIONS ===
24 |
25 | // Import shared definitions
26 | using namespace Shared;
27 |
28 | // Specialization of time_it for my comparison purposes
29 | template
31 | void compare_it(Callable1 && unsafe_operation,
32 | Callable2 && tested_operation,
33 | const std::size_t amount) {
34 | const auto unsafe_duration = Shared::time_it(unsafe_operation, amount);
35 | std::cout << "With a thread-unsafe implementation, this operation takes "
36 | << unsafe_duration.count() << " s"
37 | << std::endl;
38 |
39 | const auto tested_duration = Shared::time_it(tested_operation, amount);
40 | std::cout << "With the tested implementation, it takes "
41 | << tested_duration.count() << " s ("
42 | << tested_duration.count() / unsafe_duration.count() << "x slower)"
43 | << std::endl;
44 | }
45 |
46 | // === PERFORMANCE TEST BODY ===
47 |
48 | int main() {
49 |
50 | // === PART 0 : TEST-WIDE DEFINITIONS ===
51 |
52 | // Define our smart pointer types
53 | using UnsafePointer = copy_on_write_ptr;
54 | using TestedPointer = copy_on_write_ptr;
55 |
56 | // Say hi :)
57 | std::cout << std::endl << "=== Microbenchmarking cow_ptr ===" << std::endl;
58 |
59 | // === PART 1 : CREATION FROM RAW POINTER ===
60 |
61 | const size_t creation_amount = 1000 * 1000 * 100;
62 | std::cout << std::endl << "Creating " << creation_amount << " pointers from raw pointers" << std::endl;
63 | {
64 | compare_it(
65 | [&](){
66 | UnsafePointer ptr{new Data{typical_value}};
67 | },
68 | [&](){
69 | TestedPointer ptr{new Data{typical_value}};
70 | },
71 | creation_amount
72 | );
73 | }
74 |
75 | // === PART 2 : CREATION + MOVE-CONSTRUCTION === (NOTE: Cannot test move construction alone easily)
76 |
77 | const size_t move_amount = 25 * creation_amount;
78 | std::cout << std::endl << "Creating AND move-constructing " << move_amount << " pointers" << std::endl;
79 | {
80 | compare_it(
81 | [&](){
82 | UnsafePointer source{new Data{typical_value}};
83 | const UnsafePointer dest{std::move(source)};
84 | },
85 | [&](){
86 | TestedPointer source{new Data{typical_value}};
87 | const TestedPointer dest{std::move(source)};
88 | },
89 | move_amount
90 | );
91 | }
92 |
93 | // === PART 3 : COPY CONSTRUCTION ===
94 |
95 | const size_t copy_amount = 1000 * 1000 * 1000;
96 | std::cout << std::endl << "Copy-constructing " << copy_amount << " pointers" << std::endl;
97 | {
98 | const UnsafePointer source_unsafe{new Data{typical_value}};
99 | const TestedPointer source_tested{new Data{typical_value}};
100 |
101 | compare_it(
102 | [&](){
103 | UnsafePointer copy{source_unsafe};
104 | },
105 | [&](){
106 | TestedPointer copy{source_tested};
107 | },
108 | copy_amount
109 | );
110 | }
111 |
112 | // === PART 4 : COPY CONSTRUCTION + MOVE-ASSIGNMENT === (NOTE: Cannot test move assignment alone easily)
113 |
114 | const size_t copy_move_amount = 5 * copy_amount;
115 | std::cout << std::endl << "Copy-constructing AND move-assigning " << copy_move_amount << " pointers" << std::endl;
116 | {
117 | const UnsafePointer source_unsafe{new Data{typical_value}};
118 | const TestedPointer source_tested{new Data{typical_value}};
119 |
120 | UnsafePointer dest_unsafe{source_unsafe};
121 | TestedPointer dest_tested{source_tested};
122 |
123 | compare_it(
124 | [&](){
125 | UnsafePointer copy{source_unsafe};
126 | dest_unsafe = std::move(copy);
127 | },
128 | [&](){
129 | TestedPointer copy{source_tested};
130 | dest_tested = std::move(copy);
131 | },
132 | copy_move_amount
133 | );
134 | }
135 |
136 | // === PART 5 : COPY ASSIGNMENT ===
137 |
138 | const size_t copy_assign_amount = 1000 * 1000 * 64;
139 | std::cout << std::endl << "Copy-assigning " << copy_assign_amount << " pointers" << std::endl;
140 | {
141 | const UnsafePointer source_unsafe{new Data{typical_value}};
142 | const TestedPointer source_tested{new Data{typical_value}};
143 |
144 | UnsafePointer dest_unsafe{source_unsafe};
145 | TestedPointer dest_tested{source_tested};
146 |
147 | compare_it(
148 | [&](){
149 | dest_unsafe = source_unsafe;
150 | },
151 | [&](){
152 | dest_tested = source_tested;
153 | },
154 | copy_assign_amount
155 | );
156 | }
157 |
158 | // === PART 6 : READ DATA ===
159 |
160 | const size_t read_amount = 1000ULL * 1000ULL * 1000ULL * 5ULL;
161 | std::cout << std::endl << "Reading from " << read_amount << " pointers" << std::endl;
162 | {
163 | const UnsafePointer source_unsafe{new Data{typical_value}};
164 | const TestedPointer source_tested{new Data{typical_value}};
165 |
166 | compare_it(
167 | [&](){
168 | const Data & read = source_unsafe.read();
169 | },
170 | [&](){
171 | const Data & read = source_tested.read();
172 | },
173 | read_amount
174 | );
175 | }
176 |
177 | // === PART 7 : COPY ASSIGNMENT + COLD WRITES === (NOTE: A pure cold write would require breaking encapsulation)
178 |
179 | const size_t cold_write_amount = 30 * copy_assign_amount;
180 | std::cout << std::endl << "Performing " << cold_write_amount << " pointer copies AND cold writes" << std::endl;
181 | {
182 | const UnsafePointer source_unsafe{new Data{typical_value}};
183 | const TestedPointer source_tested{new Data{typical_value}};
184 |
185 | UnsafePointer dest_unsafe{source_unsafe};
186 | TestedPointer dest_tested{source_tested};
187 |
188 | compare_it(
189 | [&](){
190 | dest_unsafe = source_unsafe;
191 | dest_unsafe.write(typical_value);
192 | },
193 | [&](){
194 | dest_tested = source_tested;
195 | dest_tested.write(typical_value);
196 | },
197 | cold_write_amount
198 | );
199 | }
200 |
201 | // === PART 8 : WARM WRITES ===
202 |
203 | const size_t warm_write_amount = cold_write_amount;
204 | std::cout << std::endl << "Performing " << warm_write_amount << " warm pointer writes" << std::endl;
205 | {
206 | UnsafePointer unsafe{new Data{typical_value}};
207 | TestedPointer tested{new Data{typical_value}};
208 |
209 | compare_it(
210 | [&](){
211 | unsafe.write(typical_value);
212 | },
213 | [&](){
214 | tested.write(typical_value);
215 | },
216 | warm_write_amount
217 | );
218 | }
219 |
220 | // === TEST FINALIZATION ===
221 |
222 | std::cout << std::endl;
223 | return 0;
224 |
225 | }
226 |
--------------------------------------------------------------------------------
/LICENSE.lesser:
--------------------------------------------------------------------------------
1 |
2 |
3 | GNU LESSER GENERAL PUBLIC LICENSE
4 | Version 3, 29 June 2007
5 |
6 | Copyright (C) 2007 Free Software Foundation, Inc.
7 | Everyone is permitted to copy and distribute verbatim copies
8 | of this license document, but changing it is not allowed.
9 |
10 |
11 | This version of the GNU Lesser General Public License incorporates
12 | the terms and conditions of version 3 of the GNU General Public
13 | License, supplemented by the additional permissions listed below.
14 |
15 | 0. Additional Definitions.
16 |
17 | As used herein, "this License" refers to version 3 of the GNU Lesser
18 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
19 | General Public License.
20 |
21 | "The Library" refers to a covered work governed by this License,
22 | other than an Application or a Combined Work as defined below.
23 |
24 | An "Application" is any work that makes use of an interface provided
25 | by the Library, but which is not otherwise based on the Library.
26 | Defining a subclass of a class defined by the Library is deemed a mode
27 | of using an interface provided by the Library.
28 |
29 | A "Combined Work" is a work produced by combining or linking an
30 | Application with the Library. The particular version of the Library
31 | with which the Combined Work was made is also called the "Linked
32 | Version".
33 |
34 | The "Minimal Corresponding Source" for a Combined Work means the
35 | Corresponding Source for the Combined Work, excluding any source code
36 | for portions of the Combined Work that, considered in isolation, are
37 | based on the Application, and not on the Linked Version.
38 |
39 | The "Corresponding Application Code" for a Combined Work means the
40 | object code and/or source code for the Application, including any data
41 | and utility programs needed for reproducing the Combined Work from the
42 | Application, but excluding the System Libraries of the Combined Work.
43 |
44 | 1. Exception to Section 3 of the GNU GPL.
45 |
46 | You may convey a covered work under sections 3 and 4 of this License
47 | without being bound by section 3 of the GNU GPL.
48 |
49 | 2. Conveying Modified Versions.
50 |
51 | If you modify a copy of the Library, and, in your modifications, a
52 | facility refers to a function or data to be supplied by an Application
53 | that uses the facility (other than as an argument passed when the
54 | facility is invoked), then you may convey a copy of the modified
55 | version:
56 |
57 | a) under this License, provided that you make a good faith effort to
58 | ensure that, in the event an Application does not supply the
59 | function or data, the facility still operates, and performs
60 | whatever part of its purpose remains meaningful, or
61 |
62 | b) under the GNU GPL, with none of the additional permissions of
63 | this License applicable to that copy.
64 |
65 | 3. Object Code Incorporating Material from Library Header Files.
66 |
67 | The object code form of an Application may incorporate material from
68 | a header file that is part of the Library. You may convey such object
69 | code under terms of your choice, provided that, if the incorporated
70 | material is not limited to numerical parameters, data structure
71 | layouts and accessors, or small macros, inline functions and templates
72 | (ten or fewer lines in length), you do both of the following:
73 |
74 | a) Give prominent notice with each copy of the object code that the
75 | Library is used in it and that the Library and its use are
76 | covered by this License.
77 |
78 | b) Accompany the object code with a copy of the GNU GPL and this license
79 | document.
80 |
81 | 4. Combined Works.
82 |
83 | You may convey a Combined Work under terms of your choice that,
84 | taken together, effectively do not restrict modification of the
85 | portions of the Library contained in the Combined Work and reverse
86 | engineering for debugging such modifications, if you also do each of
87 | the following:
88 |
89 | a) Give prominent notice with each copy of the Combined Work that
90 | the Library is used in it and that the Library and its use are
91 | covered by this License.
92 |
93 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
94 | document.
95 |
96 | c) For a Combined Work that displays copyright notices during
97 | execution, include the copyright notice for the Library among
98 | these notices, as well as a reference directing the user to the
99 | copies of the GNU GPL and this license document.
100 |
101 | d) Do one of the following:
102 |
103 | 0) Convey the Minimal Corresponding Source under the terms of this
104 | License, and the Corresponding Application Code in a form
105 | suitable for, and under terms that permit, the user to
106 | recombine or relink the Application with a modified version of
107 | the Linked Version to produce a modified Combined Work, in the
108 | manner specified by section 6 of the GNU GPL for conveying
109 | Corresponding Source.
110 |
111 | 1) Use a suitable shared library mechanism for linking with the
112 | Library. A suitable mechanism is one that (a) uses at run time
113 | a copy of the Library already present on the user's computer
114 | system, and (b) will operate properly with a modified version
115 | of the Library that is interface-compatible with the Linked
116 | Version.
117 |
118 | e) Provide Installation Information, but only if you would otherwise
119 | be required to provide such information under section 6 of the
120 | GNU GPL, and only to the extent that such information is
121 | necessary to install and execute a modified version of the
122 | Combined Work produced by recombining or relinking the
123 | Application with a modified version of the Linked Version. (If
124 | you use option 4d0, the Installation Information must accompany
125 | the Minimal Corresponding Source and Corresponding Application
126 | Code. If you use option 4d1, you must provide the Installation
127 | Information in the manner specified by section 6 of the GNU GPL
128 | for conveying Corresponding Source.)
129 |
130 | 5. Combined Libraries.
131 |
132 | You may place library facilities that are a work based on the
133 | Library side by side in a single library together with other library
134 | facilities that are not Applications and are not covered by this
135 | License, and convey such a combined library under terms of your
136 | choice, if you do both of the following:
137 |
138 | a) Accompany the combined library with a copy of the same work based
139 | on the Library, uncombined with any other library facilities,
140 | conveyed under the terms of this License.
141 |
142 | b) Give prominent notice with the combined library that part of it
143 | is a work based on the Library, and explaining where to find the
144 | accompanying uncombined form of the same work.
145 |
146 | 6. Revised Versions of the GNU Lesser General Public License.
147 |
148 | The Free Software Foundation may publish revised and/or new versions
149 | of the GNU Lesser General Public License from time to time. Such new
150 | versions will be similar in spirit to the present version, but may
151 | differ in detail to address new problems or concerns.
152 |
153 | Each version is given a distinguishing version number. If the
154 | Library as you received it specifies that a certain numbered version
155 | of the GNU Lesser General Public License "or any later version"
156 | applies to it, you have the option of following the terms and
157 | conditions either of that published version or of any later version
158 | published by the Free Software Foundation. If the Library as you
159 | received it does not specify a version number of the GNU Lesser
160 | General Public License, you may choose any version of the GNU Lesser
161 | General Public License ever published by the Free Software Foundation.
162 |
163 | If the Library as you received it specifies that a proxy can decide
164 | whether future versions of the GNU Lesser General Public License shall
165 | apply, that proxy's public statement of acceptance of any version is
166 | permanent authorization for you to choose that version for the
167 | Library.
168 |
169 |
170 |
--------------------------------------------------------------------------------