├── results ├── g++-O1.png ├── g++-O2.png ├── g++-O3.png ├── g++-Os.png ├── clang++-O1.png ├── clang++-O2.png ├── clang++-O3.png └── clang++-Os.png ├── plot.gp ├── bench.sh ├── README.md └── array.cpp /results/g++-O1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphitemaster/vector_benchmark/HEAD/results/g++-O1.png -------------------------------------------------------------------------------- /results/g++-O2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphitemaster/vector_benchmark/HEAD/results/g++-O2.png -------------------------------------------------------------------------------- /results/g++-O3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphitemaster/vector_benchmark/HEAD/results/g++-O3.png -------------------------------------------------------------------------------- /results/g++-Os.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphitemaster/vector_benchmark/HEAD/results/g++-Os.png -------------------------------------------------------------------------------- /results/clang++-O1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphitemaster/vector_benchmark/HEAD/results/clang++-O1.png -------------------------------------------------------------------------------- /results/clang++-O2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphitemaster/vector_benchmark/HEAD/results/clang++-O2.png -------------------------------------------------------------------------------- /results/clang++-O3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphitemaster/vector_benchmark/HEAD/results/clang++-O3.png -------------------------------------------------------------------------------- /results/clang++-Os.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graphitemaster/vector_benchmark/HEAD/results/clang++-Os.png -------------------------------------------------------------------------------- /plot.gp: -------------------------------------------------------------------------------- 1 | set terminal png size 512,256 enhanced 2 | set output 'results.png' 3 | # set lmargin 1 4 | # set rmargin 0 5 | # set tmargin 0 6 | # set bmargin 0 7 | set yrange [0:2] 8 | set ylabel "Time (seconds)" 9 | set style data histogram 10 | set style histogram cluster gap 1 11 | set style fill solid 12 | set boxwidth 1 13 | set xtics format "" 14 | set xtic scale 0.9 15 | set grid ytics 16 | 17 | set title "std::vector vs Array" 18 | plot "results.dat" using 2:xtic(1) title "pod", \ 19 | "results.dat" using 3 title "trivial", \ 20 | "results.dat" using 4 title "non-trivial" -------------------------------------------------------------------------------- /bench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | COMPILERS=('g++' 'clang++') 4 | OPTIONS='-std=c++20 -fno-exceptions -fno-rtti -march=native' 5 | OPTIMIZATIONS=('-O1' '-O2' '-O3' '-Os') 6 | FILE='array.cpp' 7 | 8 | rm results.dat 9 | for compiler in ${COMPILERS[@]}; do 10 | for optimization in ${OPTIMIZATIONS[@]}; do 11 | echo "# $compiler $optimization" > results.dat 12 | $compiler $FILE $OPTIONS $optimization -DOPTION=\""$compiler $optimization\"" -o bench.bin 13 | ./bench.bin >> results.dat 14 | rm bench.bin 15 | gnuplot plot.gp 16 | mv results.png results/${compiler}${optimization}.png 17 | done 18 | done 19 | rm results.dat -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # std::vector replacement benchmark 2 | 3 | ## Dependencies 4 | You'll need `gnuplot` and `bash` to run `./bench.sh`. In addition to that, you'll 5 | need to have `gcc` and `clang` installed with support for `-std=c++20`. 6 | 7 | ## Running 8 | To run the benchmark just execute `./bench.sh` this will compile and test `std::vector` against a trivial replacement in `array.cpp` with both `gcc` and `clang` using multiple compilation options (for optimization) and produce bar graph results in the `results/` directory. 9 | 10 | ## Results 11 | A trivial dynamic array implementation consistently out-performs `std::vector` at all optimization levels for all types (plain old data, trivial, and non-trivial) as can be seen here. 12 | 13 | > The difference is approximately 2x better on average. 14 | 15 | Not shown here is what happens under no optimizations, i.e -O0 which is common when compiling for debug builds. The reason I chose not to show that is because `std::vector` begins to take ~30 SECONDS in these tests when optimizations are disabled. Also not benched here is MSVC. 16 | 17 | ### GCC 18 | ![](./results/g++-O1.png) 19 | ![](./results/g++-O2.png) 20 | ![](./results/g++-O3.png) 21 | ![](./results/g++-Os.png) 22 | 23 | ### CLANG 24 | ![](./results/clang++-O1.png) 25 | ![](./results/clang++-O2.png) 26 | ![](./results/clang++-O3.png) 27 | ![](./results/clang++-Os.png) 28 | -------------------------------------------------------------------------------- /array.cpp: -------------------------------------------------------------------------------- 1 | #include // size_t 2 | #include // realloc, free 3 | #include // printf 4 | #include // memset 5 | #include // clock_t, clock 6 | 7 | #include // std::move, std::exchange, std::forward 8 | #include // std::is_trivially_{copyable,constructible,destructible}_v 9 | #include // std::vector 10 | 11 | #if defined(_MSC_VER) 12 | #define force_inline __force_inline 13 | #else 14 | #define force_inline __attribute__((always_inline)) inline 15 | #endif 16 | 17 | // custom placement-new so it can be forced inlined 18 | struct placement_new {}; 19 | force_inline constexpr void* operator new(size_t, void* ptr, placement_new) noexcept { return ptr; } 20 | 21 | template 22 | struct Array { 23 | using value_type = T; 24 | constexpr Array() = default; 25 | ~Array() { clear(); free(m_data); } 26 | Array(Array&& array) 27 | : m_data{std::exchange(array.m_data, nullptr)} 28 | , m_size{std::exchange(array.m_size, 0)} 29 | , m_capacity{std::exchange(array.m_capacity, 0)} 30 | {} 31 | template 32 | force_inline bool emplace_back(Ts&&... args) noexcept { 33 | if (!ensure(size() + 1)) return false; 34 | new (m_data + m_size, placement_new{}) T{std::forward(args)...}; 35 | m_size++; 36 | return true; 37 | } 38 | force_inline bool push_back(T&& value) noexcept { 39 | if (!ensure(size() + 1)) return false; 40 | new (m_data + m_size, placement_new{}) T{std::move(value)}; 41 | m_size++; 42 | return true; 43 | } 44 | force_inline bool push_back(const T& value) noexcept { 45 | if (!ensure(size() + 1)) return false; 46 | new (m_data + m_size, placement_new{}) T{value}; 47 | m_size++; 48 | return true; 49 | } 50 | force_inline void clear() noexcept { 51 | // Rely on unsigned underflow to walk in reverse order for calling destructors 52 | if constexpr (!std::is_trivially_destructible_v) { 53 | if (m_size) for (size_t i = m_size - 1; i < m_size; i--) m_data[i].~T(); 54 | } 55 | m_size = 0; 56 | } 57 | bool resize(size_t size) noexcept { 58 | if (size <= m_size) { 59 | if constexpr(!std::is_trivially_destructible_v) { 60 | if (m_size) for (size_t i = m_size - 1; i > size; i--) m_data[i].~T(); 61 | } 62 | } else { 63 | if (!ensure(size)) return false; 64 | if constexpr (std::is_trivially_constructible_v) { 65 | memset(m_data + m_size, 0, (size - m_size) * sizeof(T)); 66 | } else { 67 | for (size_t i = m_size; i < size; i++) new (m_data + i, placement_new{}) T; 68 | } 69 | } 70 | m_size = size; 71 | return true; 72 | } 73 | force_inline T* begin() noexcept { return m_data; } 74 | force_inline const T* begin() const noexcept { return m_data; } 75 | force_inline T* end() noexcept { return m_data + m_size; } 76 | force_inline const T* end() const noexcept { return m_data + m_size; } 77 | force_inline const T& operator[](size_t index) const noexcept { return m_data[index]; } 78 | force_inline T& operator[](size_t index) noexcept { return m_data[index]; } 79 | force_inline bool empty() const noexcept { return m_size == 0; } 80 | force_inline size_t size() const noexcept { return m_size; } 81 | force_inline size_t capacity() const noexcept { return m_capacity; } 82 | force_inline const T* data() const noexcept { return m_data; } 83 | force_inline T* data() noexcept{ return m_data; } 84 | private: 85 | bool ensure(size_t size) noexcept { 86 | if (size <= m_capacity) return true; 87 | size_t new_capacity = m_capacity; 88 | while (new_capacity < size) new_capacity = ((new_capacity + 1) * 3) / 2; 89 | if (sizeof(T) > (size_t)-1/new_capacity) return false; // sizeof(T) * new_capacity overflow. 90 | void *data = nullptr; 91 | if constexpr (std::is_trivially_copyable_v) { 92 | data = realloc(m_data, sizeof(T) * new_capacity); 93 | } else { 94 | data = malloc(sizeof(T) * new_capacity); 95 | } 96 | if (!data) return false; // out of memory 97 | if constexpr (!std::is_trivially_copyable_v) { 98 | if (m_size) { 99 | auto store = reinterpret_cast(data); 100 | for (auto item = begin(), last = end(); item != last; ++item) { 101 | *store++ = std::move(*item); 102 | item->~T(); 103 | } 104 | } 105 | free(m_data); 106 | } 107 | m_data = reinterpret_cast(data); 108 | m_capacity = new_capacity; 109 | return true; 110 | } 111 | T* m_data = nullptr; 112 | size_t m_size = 0; 113 | size_t m_capacity = 0; 114 | }; 115 | 116 | // Simple benchmark timer. 117 | struct Timer { 118 | clock_t t0, t1; 119 | void start() noexcept { t0 = clock(); } 120 | void stop() noexcept { t1 = clock(); } 121 | double elapsed() const noexcept { 122 | return (double)(t1 - t0) / CLOCKS_PER_SEC; 123 | } 124 | }; 125 | 126 | // Some types to test the Array implementation with. 127 | struct NonTrivial { 128 | NonTrivial() {} 129 | NonTrivial(NonTrivial&&) {} 130 | ~NonTrivial() {} 131 | NonTrivial(const NonTrivial&) {} 132 | NonTrivial& operator=(NonTrivial&&) { return *this; } 133 | NonTrivial& operator=(const NonTrivial&) { return *this; } 134 | char buffer[128] = {0}; 135 | }; 136 | struct Trivial { float x = 0.0f, y = 1.0f, z = 2.0f; }; 137 | using POD = size_t; 138 | 139 | static constexpr const auto ITERATIONS = 5'000'000; 140 | 141 | template 142 | T test() noexcept { 143 | srand(0xdeadbeef); // constant seed to be fair. 144 | T array; 145 | Timer timer; 146 | timer.start(); 147 | typename T::value_type to_copy{}; 148 | for (size_t i = 0; i < ITERATIONS; i++) { 149 | array.push_back(to_copy); 150 | typename T::value_type to_move{}; 151 | array.push_back(std::move(to_move)); 152 | array.emplace_back(); // to_forward 153 | } 154 | timer.stop(); 155 | printf("%f\t", timer.elapsed()); 156 | return array; 157 | } 158 | 159 | #ifndef OPTION 160 | #define OPTION "" 161 | #endif 162 | 163 | int main() { 164 | printf("\"array %s\" ", OPTION); 165 | auto x0 = test>(); 166 | auto x1 = test>(); 167 | auto x2 = test>(); 168 | printf("\n"); 169 | printf("\"vector %s\" ", OPTION); 170 | auto y0 = test>(); 171 | auto y1 = test>(); 172 | auto y2 = test>(); 173 | printf("\n"); 174 | } 175 | --------------------------------------------------------------------------------