├── .clang-format ├── LICENSE ├── README.md ├── fastmod ├── .bench0.cpp ├── .bench1.cpp ├── .bench2.cpp ├── .bench3.cpp ├── .bench4.cpp ├── .bench5.cpp └── bench.cpp ├── flags ├── notes.txt └── vector ├── .bench0.cpp ├── .bench1.cpp ├── .bench2.cpp └── bench.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | ColumnLimit: 80 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cppcon2015-tuning-cxx 2 | Repository for the CppCon 2015 Tuning C++ talk. 3 | -------------------------------------------------------------------------------- /fastmod/.bench0.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static void bench_(benchmark::State &state) { 4 | while (state.KeepRunning()) { 5 | } 6 | } 7 | BENCHMARK(bench_); 8 | 9 | BENCHMARK_MAIN(); 10 | -------------------------------------------------------------------------------- /fastmod/.bench1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static void generate_arg_pairs(benchmark::internal::Benchmark *b) { 5 | for (int i = 1 << 4; i <= 1 << 10; i <<= 2) 6 | for (int j : {32, 128, 224}) 7 | b = b->ArgPair(i, j); 8 | } 9 | 10 | static void bench_fastmod(benchmark::State &state) { 11 | const int size = state.range_x(); 12 | assert(size >= 16 && "Only support 16 integers at a time!"); 13 | const int ceil = state.range_y(); 14 | std::vector input, output; 15 | input.resize(size, 0); 16 | output.resize(size, 0); 17 | 18 | while (state.KeepRunning()) 19 | for (int i = 0; i < size; ++i) 20 | output[i] = input[i] >= ceil ? input[i] % ceil : input[i]; 21 | } 22 | BENCHMARK(bench_fastmod)->Apply(generate_arg_pairs); 23 | 24 | BENCHMARK_MAIN(); 25 | -------------------------------------------------------------------------------- /fastmod/.bench2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | static void 6 | generate_arg_pairs(benchmark::internal::Benchmark *b) { 7 | for (int i = 1 << 4; i <= 1 << 10; i <<= 2) 8 | for (int j : {32, 128, 224}) 9 | b = b->ArgPair(i, j); 10 | } 11 | 12 | static void bench_fastmod(benchmark::State &state) { 13 | const int size = state.range_x(); 14 | const int ceil = state.range_y(); 15 | std::vector input, output; 16 | input.resize(size, 0); 17 | output.resize(size, 0); 18 | 19 | std::mt19937 rng; 20 | rng.seed(std::random_device()()); 21 | std::uniform_int_distribution dist(0, 255); 22 | for (int &i : input) 23 | i = dist(rng); 24 | 25 | assert(size >= 16 && 26 | "Only support 16 integers at a time!"); 27 | 28 | while (state.KeepRunning()) 29 | for (int i = 0; i < size; ++i) 30 | output[i] = 31 | input[i] >= ceil ? input[i] % ceil : input[i]; 32 | } 33 | BENCHMARK(bench_fastmod)->Apply(generate_arg_pairs); 34 | 35 | BENCHMARK_MAIN(); 36 | -------------------------------------------------------------------------------- /fastmod/.bench3.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | static void 6 | generate_arg_pairs(benchmark::internal::Benchmark *b) { 7 | for (int i = 1 << 4; i <= 1 << 10; i <<= 2) 8 | for (int j : {32, 128, 224}) 9 | b = b->ArgPair(i, j); 10 | } 11 | 12 | static void bench_fastmod(benchmark::State &state) { 13 | const int size = state.range_x(); 14 | const int ceil = state.range_y(); 15 | std::vector input, output; 16 | input.resize(size, 0); 17 | output.resize(size, 0); 18 | 19 | std::mt19937 rng; 20 | rng.seed(std::random_device()()); 21 | std::uniform_int_distribution dist(0, 255); 22 | for (int &i : input) 23 | i = dist(rng); 24 | 25 | assert(size >= 16 && 26 | "Only support 16 integers at a time!"); 27 | 28 | while (state.KeepRunning()) 29 | for (int i = 0; i < size; i += 4) { 30 | #define mod(i) \ 31 | output[i] = input[i] >= ceil ? input[i] % ceil : input[i] 32 | mod(i + 0); 33 | mod(i + 1); 34 | mod(i + 2); 35 | mod(i + 3); 36 | } 37 | } 38 | BENCHMARK(bench_fastmod)->Apply(generate_arg_pairs); 39 | 40 | BENCHMARK_MAIN(); 41 | -------------------------------------------------------------------------------- /fastmod/.bench4.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define UNLIKELY(x) __builtin_expect((x), 0) 6 | 7 | static void 8 | generate_arg_pairs(benchmark::internal::Benchmark *b) { 9 | for (int i = 1 << 4; i <= 1 << 10; i <<= 2) 10 | for (int j : {32, 128, 224}) 11 | b = b->ArgPair(i, j); 12 | } 13 | 14 | static void bench_fastmod(benchmark::State &state) { 15 | const int size = state.range_x(); 16 | const int ceil = state.range_y(); 17 | std::vector input, output; 18 | input.resize(size, 0); 19 | output.resize(size, 0); 20 | 21 | std::mt19937 rng; 22 | rng.seed(std::random_device()()); 23 | std::uniform_int_distribution dist(0, 24 | ceil + ceil / 8); 25 | for (int &i : input) 26 | i = dist(rng); 27 | 28 | assert(size >= 16 && 29 | "Only support 16 integers at a time!"); 30 | 31 | while (state.KeepRunning()) 32 | for (int i = 0; i < size; i += 4) { 33 | #define mod(i) \ 34 | output[i] = UNLIKELY(input[i] >= ceil) ? input[i] % ceil \ 35 | : input[i] 36 | mod(i + 0); 37 | mod(i + 1); 38 | mod(i + 2); 39 | mod(i + 3); 40 | } 41 | } 42 | BENCHMARK(bench_fastmod)->Apply(generate_arg_pairs); 43 | 44 | BENCHMARK_MAIN(); 45 | -------------------------------------------------------------------------------- /fastmod/.bench5.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | static void 6 | generate_arg_pairs(benchmark::internal::Benchmark *b) { 7 | for (int i = 1 << 4; i <= 1 << 10; i <<= 2) 8 | for (int j : {32, 128, 224}) 9 | b = b->ArgPair(i, j); 10 | } 11 | 12 | static void bench_mod(benchmark::State &state) { 13 | const int size = state.range_x(); 14 | const int ceil = state.range_y(); 15 | std::vector input, output; 16 | input.resize(size, 0); 17 | output.resize(size, 0); 18 | 19 | std::mt19937 rng; 20 | rng.seed(std::random_device()()); 21 | std::uniform_int_distribution dist(0, 255); 22 | for (int &i : input) 23 | i = dist(rng); 24 | 25 | assert(size >= 16 && 26 | "Only support 16 integers at a time!"); 27 | 28 | while (state.KeepRunning()) 29 | for (int i = 0; i < size; ++i) 30 | output[i] = input[i] % ceil; 31 | } 32 | BENCHMARK(bench_mod)->Apply(generate_arg_pairs); 33 | 34 | static void bench_fastmod(benchmark::State &state) { 35 | const int size = state.range_x(); 36 | const int ceil = state.range_y(); 37 | std::vector input, output; 38 | input.resize(size, 0); 39 | output.resize(size, 0); 40 | 41 | std::mt19937 rng; 42 | rng.seed(std::random_device()()); 43 | std::uniform_int_distribution dist(0, 255); 44 | for (int &i : input) 45 | i = dist(rng); 46 | 47 | assert(size >= 16 && 48 | "Only support 16 integers at a time!"); 49 | 50 | while (state.KeepRunning()) 51 | for (int i = 0; i < size; ++i) 52 | output[i] = 53 | input[i] >= ceil ? input[i] % ceil : input[i]; 54 | } 55 | BENCHMARK(bench_fastmod)->Apply(generate_arg_pairs); 56 | 57 | BENCHMARK_MAIN(); 58 | -------------------------------------------------------------------------------- /fastmod/bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static void bench_(benchmark::State &state) { 4 | while (state.KeepRunning()) { 5 | } 6 | } 7 | BENCHMARK(bench_); 8 | 9 | BENCHMARK_MAIN(); 10 | -------------------------------------------------------------------------------- /flags: -------------------------------------------------------------------------------- 1 | -O3 2 | -std=c++14 3 | -stdlib=libc++ 4 | -lc++abi 5 | -Wl,-rpath=/home/chandlerc/lib64 6 | -fno-exceptions 7 | -fno-rtti 8 | -Wall 9 | -pedantic 10 | -Werror 11 | -isystem /home/chandlerc/include 12 | -pthreads 13 | -fno-omit-frame-pointer 14 | -------------------------------------------------------------------------------- /notes.txt: -------------------------------------------------------------------------------- 1 | - benchmark push_back, creating the vector inside the benchmark loop 2 | 3 | - build, showing flags, remember to link the benchmark library. run it. 4 | 5 | - We're benchmarking a lot more than push_back 6 | 7 | - pull out a create benchmark, unsurprisingly that part is free 8 | 9 | - what else is there? oh, we're growing the vector. let's pull that apart with reserve. add a reserve benchmark and explicitly reserve in the push_back 10 | 11 | - observe that pushback is now 10x faster!!! how can this be? comment out reserve and reproduce. wtf? 12 | 13 | - when you (inevitably) hit something that doesn't make sense, you need to go in with a profiler to understand 14 | 15 | -- let's introduce perf -- MUST go back to the version without reserve! 16 | 17 | - run it under perf stat, and explain what perf is doing 18 | 19 | - run it under perf record to collect a specific profile rather than just statistics 20 | 21 | - run perf report and show the lack of context 22 | 23 | - run it under perf record -g, and run perf report to show call graph 24 | 25 | - point out that call graph is terrible, explain that it can't unwind the stack 26 | 27 | - add -fno-omit-frame-pointer to flags file and recompile 28 | 29 | - run it under perf record -g, and run perf report to actually show call graph, but show really confusing callee orientation and relative %s 30 | 31 | - run it perf report -g 'graph,0.5,caller' to get the inverted and absolute call graph view 32 | 33 | - annotate a function in perf report, show control flow indicators, etc 34 | 35 | -- back to benchmarking 36 | 37 | - show in the annotate view the empty loop, that's why its fast, the compiler deleted all the code! 38 | 39 | - we need to block the optimizer from deleting our code 40 | 41 | - introduce an escape and a clobber function with inline asm 42 | 43 | - mention that we want to escape as little as possible 44 | 45 | - show the profile of the correctly benchmarked push_back, and kibitz about how terrible the optimizer is 46 | 47 | -- switch to benchmarking the "fastmod" 48 | 49 | - walk through new benchmark, run it, note no performance difference with different mods 50 | - profile and look to see that no samples on the modulo branch, code shows we fill with zeros 51 | - std::mt19937, seed, uniform_int_distribution, generate 52 | 53 | - note unrolling done by LLVM -- 2 is good, 4 is great? 54 | - show the jumping from block to block, lots of icache for a few insts 55 | 56 | - add an UNLIKELY macro, do bool conversion, explain expect 57 | - show LLVM collecting the cold modulo code below the hot loop trace 58 | - even with unlikely, not all rosy 59 | 60 | - but why on earth are we doing this? we should just use mod 61 | - no, we should *measure* mod! 62 | - wait, what?!?!?! its sometimes slower, sometimes faster? 63 | 64 | - note that when its faster... 65 | -------------------------------------------------------------------------------- /vector/.bench0.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static void bench_(benchmark::State &state) { 5 | while (state.KeepRunning()) { 6 | } 7 | } 8 | BENCHMARK(bench_); 9 | 10 | BENCHMARK_MAIN(); 11 | -------------------------------------------------------------------------------- /vector/.bench1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static void bench_create(benchmark::State &state) { 5 | while (state.KeepRunning()) { 6 | std::vector v; 7 | (void)v; 8 | } 9 | } 10 | BENCHMARK(bench_create); 11 | 12 | static void bench_reserve(benchmark::State &state) { 13 | while (state.KeepRunning()) { 14 | std::vector v; 15 | v.reserve(1); 16 | } 17 | } 18 | BENCHMARK(bench_reserve); 19 | 20 | static void bench_push_back(benchmark::State &state) { 21 | while (state.KeepRunning()) { 22 | std::vector v; 23 | v.reserve(1); 24 | v.push_back(1); 25 | } 26 | } 27 | BENCHMARK(bench_push_back); 28 | 29 | BENCHMARK_MAIN(); 30 | -------------------------------------------------------------------------------- /vector/.bench2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static void escape(void *v) { 5 | asm volatile("" : : "g"(v) : "memory"); 6 | } 7 | 8 | static void clobber() { asm volatile("" : : : "memory"); } 9 | 10 | static void bench_create(benchmark::State &state) { 11 | while (state.KeepRunning()) { 12 | std::vector v; 13 | escape(&v); 14 | } 15 | } 16 | BENCHMARK(bench_create); 17 | 18 | static void bench_reserve(benchmark::State &state) { 19 | while (state.KeepRunning()) { 20 | std::vector v; 21 | v.reserve(1); 22 | escape(v.data()); 23 | } 24 | } 25 | BENCHMARK(bench_reserve); 26 | 27 | static void bench_push_back(benchmark::State &state) { 28 | while (state.KeepRunning()) { 29 | std::vector v; 30 | v.reserve(1); 31 | escape(v.data()); 32 | v.push_back(42); 33 | clobber(); 34 | } 35 | } 36 | BENCHMARK(bench_push_back); 37 | 38 | BENCHMARK_MAIN(); 39 | -------------------------------------------------------------------------------- /vector/bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static void bench_(benchmark::State &state) { 5 | while (state.KeepRunning()) { 6 | } 7 | } 8 | BENCHMARK(bench_); 9 | 10 | BENCHMARK_MAIN(); 11 | --------------------------------------------------------------------------------