├── .gitmodules ├── .travis-cmake-install.sh ├── .travis.yml ├── CMakeLists.txt ├── LICENSE ├── README.md ├── TODO.txt ├── c.vs.cxx ├── CMakeLists.txt ├── README └── cxxstring.vs.cstring.bench.cpp ├── c ├── CMakeLists.txt ├── algorithm.bench.cpp ├── math.bench.cpp └── string.bench.cpp ├── compiler.vs.programmer ├── CMakeLists.txt └── memory.bench.cpp ├── config.bat ├── cxx ├── CMakeLists.txt ├── README ├── accessors.bench.cpp ├── algorithm.bench.cpp ├── farequote.csv ├── getline.cpp ├── max.cpp ├── mutators.bench.cpp ├── size_metric.bench.cpp ├── stable_sort.cpp ├── str.cpp ├── str.s ├── string.bench.cpp ├── stringstream.bench.cpp ├── umap.cpp └── umap2.cpp ├── docs ├── Bibliography.bib ├── Makefile ├── abstract-cppcon.tex ├── acmcopyright.sty ├── frequency-scaling.md ├── graphviz.sty ├── sig-alternate.cls ├── slides │ ├── AppStartupCompilerOptimizationsAndTechniquesForEmbeddedSystems.pdf │ ├── Compiler-optimization-and-swe-technique-for-improving-app-launch-time-sonoma-sept-5-2023.pdf │ ├── CppConCodesizeCompilerOptimizationAndTechniques.pdf │ ├── Makefile │ ├── clang.result │ ├── compiler-flags-for-perf-and-codesize-llvm-bangalore-meetup-sept-22.pdf │ ├── dac-todo │ ├── gcc-1.result │ ├── gcc.result │ ├── msvc.l1l2.result │ ├── slide-DAC-2017.pdf │ ├── slide-DAC-2017.pptx │ ├── slide-acm-2021-DAC-2017.pptx │ ├── slide-acm-2021.pdf │ ├── slide-cppnow.pdf │ ├── slide-cppnow.pptx │ └── slide-meetup.pdf ├── std-benchmark.tex └── string-find-vs-cstring-strstr.ods ├── include ├── rng_utils.h ├── test_configs.h └── test_utils.h └── test ├── CMakeLists.txt └── test_random.cpp /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "benchmark"] 2 | path = benchmark 3 | url = https://github.com/google/benchmark.git 4 | -------------------------------------------------------------------------------- /.travis-cmake-install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Install a newer CMake version 4 | curl -sSL https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.sh -o install-cmake.sh 5 | chmod +x install-cmake.sh 6 | sudo ./install-cmake.sh --prefix=/usr/local --skip-license 7 | 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: generic 2 | sudo: true 3 | dist: trusty 4 | 5 | branches: 6 | only: 7 | - master 8 | 9 | before_script: 10 | - cat /proc/cpuinfo 11 | - set -x 12 | - source .travis-cmake-install.sh 13 | - set +x 14 | - mkdir build 15 | - cd build 16 | - cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} -DCMAKE_VERBOSE_MAKEFILE=ON -DTRAVIS_BUILD:BOOL=ON .. 17 | 18 | script: 19 | - make -j4 20 | - ctest -L c --verbose 21 | #- ctest -L cxx --verbose 22 | - ctest -L c.vs.cxx --verbose 23 | 24 | compiler: 25 | - gcc 26 | 27 | cache: 28 | apt: true 29 | 30 | matrix: 31 | include: 32 | - compiler: gcc 33 | addons: 34 | apt: 35 | sources: 36 | - ubuntu-toolchain-r-test 37 | packages: 38 | - g++-6 39 | env: 40 | - COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Release 41 | 42 | # matrix: 43 | # include: 44 | # - env: CXX=g++-5 CC=gcc-5 45 | # addons: 46 | # apt: 47 | # packages: 48 | # - g++-5 49 | # sources: &sources 50 | # - ubuntu-toolchain-r-test 51 | # - llvm-toolchain-precise 52 | # - env: CXX=g++-4.9 CC=gcc-4.9 53 | # addons: 54 | # apt: 55 | # packages: 56 | # - g++-4.9 57 | # sources: *sources 58 | # - env: CXX=clang++-3.8 CC=clang-3.8 59 | # addons: 60 | # apt: 61 | # packages: 62 | # - clang-3.8 63 | # sources: *sources 64 | # sources: *sources 65 | 66 | 67 | # http://stackoverflow.com/a/35113462 68 | # *sources is a "backreference" to the previous &sources at line 44 -- 69 | # the whole tree from under the property marked by &sources is 70 | # copied under the property marked by *sources 71 | 72 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | project(std_benchmark) 3 | 4 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE) 5 | message(FATAL_ERROR "In-source builds are not allowed. 6 | Please remove/comment this line to build in-source") 7 | endif() 8 | 9 | set(GOOGLE_BENCHMARK_SRC ${CMAKE_SOURCE_DIR}/benchmark) 10 | set(BENCHMARK_INSTALL ${CMAKE_BINARY_DIR}/benchmark) 11 | set(STD_BENCHMARK_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/include) 12 | 13 | if ("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") 14 | set(COMPILE_FLAGS 15 | "-std=c++11 -g3 -I${GOOGLE_BENCHMARK_SRC}/include -I${STD_BENCHMARK_INCLUDE_DIR}") 16 | else() 17 | # Google benchmark is build as Debug by default. 18 | set(CMAKE_BUILD_TYPE "Release") 19 | set(COMPILE_FLAGS 20 | "-std=c++11 -I${GOOGLE_BENCHMARK_SRC}/include -I${STD_BENCHMARK_INCLUDE_DIR}") 21 | endif() 22 | 23 | if (BENCHMARK_USE_LIBCXX) 24 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 25 | set(COMPILE_FLAGS "${COMPILE_FLAGS} -stdlib=libc++") 26 | else() 27 | message(FATAL "-DBENCHMARK_USE_LIBCXX:BOOL=ON is not supported for compiler") 28 | endif() 29 | endif(BENCHMARK_USE_LIBCXX) 30 | 31 | if (MSVC) 32 | add_compile_options("$<$:/MDd>") 33 | add_compile_options("$<$:/O2>") 34 | endif() 35 | 36 | # -fno-omit-frame-pointer 37 | 38 | set(LINK_FLAGS 39 | # -nodefaultlibs 40 | "-L${BENCHMARK_INSTALL}/src/") 41 | 42 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILE_FLAGS}") 43 | set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${LINK_FLAGS}") 44 | 45 | include_directories(${PROJECT_SOURCE_DIR}/include) 46 | add_subdirectory(benchmark) 47 | 48 | # Set these after adding benchmark because google-bechmark 49 | # fails with C++ standard greater than 2003 50 | #set(CMAKE_CXX_STANDARD 14) 51 | set(CMAKE_CXX_EXTENSIONS OFF) 52 | 53 | option(ENABLE_C_BENCHMARKS "Compile C benchmarks" ON) 54 | option(ENABLE_CXX_BENCHMARKS "Compile C++ benchmarks" ON) 55 | option(ENABLE_C_VS_CXX_BENCHMARKS "Compile C vs C++ benchmarks" ON) 56 | option(ENABLE_COMPILER_VS_PROGRAMMER "Compile C vs C++ benchmarks" ON) 57 | 58 | enable_testing() 59 | 60 | if (TRAVIS_BUILD) 61 | add_definitions(-DENABLE_TRAVIS_BUILD) 62 | endif() 63 | 64 | if(ENABLE_C_BENCHMARKS) 65 | add_subdirectory(c) 66 | endif() 67 | 68 | if(ENABLE_CXX_BENCHMARKS) 69 | add_subdirectory(cxx) 70 | endif() 71 | 72 | if(ENABLE_C_VS_CXX_BENCHMARKS) 73 | add_subdirectory(c.vs.cxx) 74 | endif() 75 | 76 | if(ENABLE_COMPILER_VS_PROGRAMMER) 77 | add_subdirectory(compiler.vs.programmer) 78 | endif() 79 | 80 | # Only for testing utility functions written for the std-benchmark. 81 | add_subdirectory(test EXCLUDE_FROM_ALL) 82 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A benchmark for c/c++ standard library. 2 | 3 | [![Build 4 | Status](https://travis-ci.org/hiraditya/std-benchmark.svg?branch=master)](https://travis-ci.org/hiraditya/std-benchmark) 5 | 6 | **DEPENDENCIES** 7 | - CMake 2.8 or higher 8 | - Since this is a standard library benchmark, you need a compiler toolchain to benchmark. 9 | - A C++ compiler with C++11 support 10 | - google-benchmark (added as a submodule) 11 | To download the google-benchmark submodule run the following command in the project root. 12 | `git submodule update --recursive --remote` 13 | 14 | **BUILDING ON LINUX, MAC** 15 | ```sh 16 | mkdir $SRC/build 17 | cd $SRC/build 18 | cmake .. 19 | cmake --build . 20 | cmake --buid . --config Release 21 | ``` 22 | 23 | When you are compiling with clang compiler along with libcxx enabled (-DBENCHMARK_USE_LIBCXX=ON), 24 | you might get cmake error like this: 25 | ```sh 26 | CMake Error at benchmark/CMakeLists.txt:171 (message): 27 | Failed to determine the source files for the regular expression backend 28 | -- Configuring incomplete, errors occurred! 29 | ``` 30 | 31 | In that case, 32 | ```sh 33 | export LD_LIBRARY_PATH=path-to-clang-install/lib 34 | ``` 35 | 36 | **BUILDING WITH VISUAL STUDIO** 37 | ```sh 38 | mkdir $SRC/build 39 | cd $SRC/build 40 | C:\Program Files (x86)\Microsoft Visual Studio \VC\vcvarsall.bat amd64 41 | cmake -G"Visual Studio 15 2017 Win64" .. 42 | cmake --build . --config release 43 | ``` 44 | 45 | Remember to use a generator for Win64 otherwise 32-bit binaries will be built. 46 | For more help see config.bat in the top level directory. It has some useful tricks 47 | which allows you to use git-bash for building and running the tests. 48 | 49 | If there are errors then build the project by opening the visual studio IDE (devenv) 50 | and then try to build one component at a time. 51 | 52 | **TESTING** 53 | 54 | To test the project run ctest. Running ctest will run the google-benchmark harness tests 55 | as well, so if you are only interested in runningthe `c` and `cxx` std-benchmark tests use: 56 | ```sh 57 | ctest -L c --verbose 58 | ctest -L cxx --verbose 59 | ctest -C Release -L c.vs.cxx --verbose 60 | ``` 61 | 62 | NB: On windows the tests might not run because ctest adds each test under specific 63 | configurations. So you have to pass the appropriate configuration using the `-C ` e.g., 64 | ```sh 65 | ctest -C Release -L c --verbose 66 | ctest -C Release -L cxx --verbose 67 | ctest -C Release -L c.vs.cxx --verbose 68 | ``` 69 | 70 | -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | Graph L1-L3 data sizes 2 | Unit in graphs 3 | Compare with Mingw on windows with latest gcc to show performance against MSVC 4 | Illustrate example of uniform int distribution (MSVC does not allow uniform_int_distribution) 5 | shared_ptr inlining hot functions 8993759ae957800a061e60d1809482efb03e3ba3 6 | http://llbit.se/?p=369 7 | 8 | -------------------------------------------------------------------------------- /c.vs.cxx/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB BENCHMARK_TESTS "*.bench.cpp") 2 | foreach(test_path ${BENCHMARK_TESTS}) 3 | get_filename_component(test_file "${test_path}" NAME) 4 | set(target ${test_file}.out) 5 | #EXCLUDE_FROM_ALL 6 | add_executable(${target} ${test_file}) 7 | # shlwapi for MSVC 8 | #target_link_libraries(${target} benchmark -pthread shlwapi) 9 | target_link_libraries(${target} benchmark -pthread) 10 | # TODO: Rename test targets different from executables. 11 | add_test (NAME ${target} COMMAND ${target} --benchmark_format=csv) 12 | set_tests_properties(${target} PROPERTIES LABELS "c.vs.cxx") 13 | endforeach() 14 | -------------------------------------------------------------------------------- /c.vs.cxx/README: -------------------------------------------------------------------------------- 1 | This directory has benchmarks which compares standard c implementation of 2 | an algorithm vs. standard c++ implementation of the same algorithm. The idea is 3 | to first construct equivalent data structures in both C and C++ respectively, 4 | and then call corresponding functions (e.g., string::find vs. strstr) to 5 | measure performance. 6 | 7 | Caveats: 8 | - Effort has been made to discard any unwanted side-effects like malloc etc. 9 | - The c library functions are called from the C++ programs 10 | - c_alloc is a very primitive wrapper to hide malloc and free, it is intended 11 | to be very simple and used as such. 12 | -------------------------------------------------------------------------------- /c.vs.cxx/cxxstring.vs.cstring.bench.cpp: -------------------------------------------------------------------------------- 1 | //===-- cxxstring.vs.cstring.bench.cpp ------------------------------------===// 2 | // Bechmark the c++ string::find function against the c-strstr function. 3 | // 4 | //===----------------------------------------------------------------------===// 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "test_utils.h" 13 | #include "benchmark/benchmark.h" 14 | 15 | constexpr std::size_t MAX_STRING_LEN = 8 << 14; 16 | constexpr std::size_t MIN_STRING_LEN = 16; 17 | const int alignment_shift = 0; 18 | 19 | // C++ Strings 20 | 21 | // Benchmark when there is no match. 22 | static void BM_CXXStringFindNoMatch(benchmark::State &state) { 23 | std::string s1(state.range(0), '-'); 24 | std::string s2(8, '*'); 25 | while (state.KeepRunning()) 26 | benchmark::DoNotOptimize(s1.find(s2, alignment_shift)); 27 | } 28 | BENCHMARK(BM_CXXStringFindNoMatch)->Range(MIN_STRING_LEN, MAX_STRING_LEN); 29 | 30 | // Benchmark when the string matches first time. 31 | static void BM_CXXStringFindAllMatch(benchmark::State &state) { 32 | std::string s1(MAX_STRING_LEN, '-'); 33 | std::string s2(state.range(0), '-'); 34 | while (state.KeepRunning()) 35 | benchmark::DoNotOptimize(s1.find(s2, alignment_shift)); 36 | } 37 | BENCHMARK(BM_CXXStringFindAllMatch)->Range(MIN_STRING_LEN, MAX_STRING_LEN); 38 | 39 | // Benchmark when the string matches somewhere in the end. 40 | static void BM_CXXStringFindMatch1(benchmark::State &state) { 41 | std::string s1(MAX_STRING_LEN / 2, '*'); 42 | s1 += std::string(state.range(0), '-'); 43 | std::string s2(state.range(0), '-'); 44 | while (state.KeepRunning()) 45 | benchmark::DoNotOptimize(s1.find(s2, alignment_shift)); 46 | } 47 | BENCHMARK(BM_CXXStringFindMatch1)->Range(MIN_STRING_LEN, MAX_STRING_LEN / 4); 48 | 49 | // Benchmark when the string matches somewhere from middle to the end. 50 | static void BM_CXXStringFindMatch2(benchmark::State &state) { 51 | std::string s1(MAX_STRING_LEN / 2, '*'); 52 | s1 += std::string(state.range(0), '-'); 53 | s1 += std::string(state.range(0), '*'); 54 | std::string s2(state.range(0), '-'); 55 | while (state.KeepRunning()) 56 | benchmark::DoNotOptimize(s1.find(s2, alignment_shift)); 57 | } 58 | 59 | BENCHMARK(BM_CXXStringFindMatch2)->Range(MIN_STRING_LEN, MAX_STRING_LEN / 4); 60 | 61 | static void BM_CXXStringRegression(benchmark::State &state) { 62 | std::string s1 = "aabbaabbaaxd adbffdadgaxaabbbddhatyaaaabbbaabbaabbcsy"; 63 | std::string s2 = "aabbaabbc"; 64 | while (state.KeepRunning()) 65 | benchmark::DoNotOptimize(s1.find(s2, alignment_shift)); 66 | } 67 | 68 | BENCHMARK(BM_CXXStringRegression); 69 | 70 | // Benchmark memchr vs. traits_type::find 71 | static void BM_CXXStringmemchr(benchmark::State &state) { 72 | const unsigned N = state.range(0); 73 | std::string s1(N, '*'); 74 | s1[N-1] = '-'; 75 | while (state.KeepRunning()) 76 | benchmark::DoNotOptimize(std::string::traits_type::find(s1.c_str(), s1.size(), '-')); 77 | } 78 | 79 | BENCHMARK(BM_CXXStringmemchr)->Range(MIN_STRING_LEN, MAX_STRING_LEN); 80 | 81 | // Benchmark memchr vs. traits_type::find 82 | static void BM_CStringmemchr(benchmark::State &state) { 83 | const unsigned N = state.range(0); 84 | c_alloc s1(N); 85 | memset(s1, '*', N); 86 | s1[N-1] = '-'; 87 | while (state.KeepRunning()) 88 | benchmark::DoNotOptimize(memchr(s1, '-', N)); 89 | } 90 | 91 | BENCHMARK(BM_CStringmemchr)->Range(MIN_STRING_LEN, MAX_STRING_LEN); 92 | 93 | // CStrings 94 | 95 | static void BM_CStringFindNoMatch(benchmark::State &state) { 96 | const unsigned N = state.range(0); 97 | c_alloc s1(N); 98 | c_alloc s2(8); 99 | memset(s1, '-', N); 100 | memset(s2, '*', 8); 101 | while (state.KeepRunning()) 102 | benchmark::DoNotOptimize(strstr(s1, s2)); 103 | } 104 | BENCHMARK(BM_CStringFindNoMatch)->Range(MIN_STRING_LEN, MAX_STRING_LEN); 105 | 106 | // Benchmark when the string matches first time. 107 | static void BM_CStringFindAllMatch(benchmark::State &state) { 108 | const unsigned N = state.range(0); 109 | c_alloc s1(MAX_STRING_LEN); 110 | c_alloc s2(N); 111 | memset(s1, '-', MAX_STRING_LEN); 112 | memset(s2, '-', N); 113 | while (state.KeepRunning()) 114 | benchmark::DoNotOptimize(strstr(s1, s2)); 115 | } 116 | BENCHMARK(BM_CStringFindAllMatch)->Range(MIN_STRING_LEN, MAX_STRING_LEN); 117 | 118 | // Benchmark when the string matches somewhere in the end. 119 | static void BM_CStringFindMatch1(benchmark::State &state) { 120 | /*std::string s1(MAX_STRING_LEN / 2, '*'); 121 | s1 += std::string(state.range(0), '-'); 122 | std::string s2(state.range(0), '-');*/ 123 | 124 | const unsigned N = state.range(0); 125 | c_alloc s1(MAX_STRING_LEN/2+N); 126 | c_alloc s2(N); 127 | memset(s1, '*', MAX_STRING_LEN/2); 128 | memset(s1 + MAX_STRING_LEN/2, '-', N); 129 | memset(s2, '-', N); 130 | while (state.KeepRunning()) 131 | benchmark::DoNotOptimize(strstr(s1, s2)); 132 | } 133 | BENCHMARK(BM_CStringFindMatch1)->Range(MIN_STRING_LEN, MAX_STRING_LEN / 4); 134 | 135 | // Benchmark when the string matches somewhere from middle to the end. 136 | static void BM_CStringFindMatch2(benchmark::State &state) { 137 | /*std::string s1(MAX_STRING_LEN / 2, '*'); 138 | s1 += std::string(state.range(0), '-'); 139 | s1 += std::string(state.range(0), '*'); 140 | std::string s2(state.range(0), '-');*/ 141 | 142 | const unsigned N = state.range(0); 143 | c_alloc s1(MAX_STRING_LEN/2 + 2*N); 144 | c_alloc s2(N); 145 | memset(s1, '*', MAX_STRING_LEN/2); 146 | memset(s1 + MAX_STRING_LEN/2, '-', N); 147 | memset(s1 + MAX_STRING_LEN/2 + N, '*', N); 148 | memset(s2, '-', N); 149 | while (state.KeepRunning()) 150 | benchmark::DoNotOptimize(strstr(s1, s2)); 151 | } 152 | 153 | BENCHMARK(BM_CStringFindMatch2)->Range(MIN_STRING_LEN, MAX_STRING_LEN / 4); 154 | 155 | static void BM_CStringRegression(benchmark::State &state) { 156 | const char* s1 = "aabbaabbaaxd adbffdadgaxaabbbddhatyaaaabbbaabbaabbcsy"; 157 | const char* s2 = "aabbaabbc"; 158 | while (state.KeepRunning()) 159 | benchmark::DoNotOptimize(strstr(s1, s2)); 160 | } 161 | 162 | BENCHMARK(BM_CStringRegression); 163 | 164 | BENCHMARK_MAIN() 165 | -------------------------------------------------------------------------------- /c/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB BENCHMARK_TESTS "*.bench.cpp") 2 | foreach(test_path ${BENCHMARK_TESTS}) 3 | get_filename_component(test_file "${test_path}" NAME) 4 | set(target ${test_file}.c.out) 5 | #EXCLUDE_FROM_ALL 6 | add_executable(${target} ${test_file}) 7 | target_link_libraries(${target} benchmark -pthread) 8 | add_test (NAME ${target} COMMAND ${target} --benchmark_format=csv) 9 | set_tests_properties(${target} PROPERTIES LABELS "c") 10 | endforeach() 11 | -------------------------------------------------------------------------------- /c/algorithm.bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "benchmark/benchmark.h" 5 | #include "test_configs.h" 6 | #include "test_utils.h" 7 | 8 | // qsort 9 | template 10 | void BM_sort(benchmark::State& state) { 11 | int N = state.range(0); 12 | c_alloc a(N); 13 | fill_seq(a, a+N); 14 | while (state.KeepRunning()) { 15 | // searching for all the elements. 16 | for (int i = 0; i < N; ++i) 17 | qsort(a.get(), N, sizeof (T), compare); 18 | } 19 | state.SetComplexityN(N); 20 | } 21 | 22 | // Linear search on a sequence 23 | template 24 | void BM_search_linear(benchmark::State& state) { 25 | int N = state.range(0); 26 | c_alloc a(N); 27 | fill_seq(a, a+N); 28 | while (state.KeepRunning()) { 29 | // searching for all the elements. 30 | for (int i = 0; i < N; ++i) { 31 | int j = 0; 32 | while (j < N) 33 | if (a[j++] == i) 34 | break; 35 | benchmark::DoNotOptimize(j); 36 | assert(j == i); // j is the i-th element in a 37 | } 38 | } 39 | state.SetComplexityN(N); 40 | } 41 | 42 | // Binary search on a sequence 43 | template 44 | void BM_search_binary(benchmark::State& state) { 45 | int N = state.range(0); 46 | c_alloc a(N); 47 | fill_seq(a, a+N); 48 | while (state.KeepRunning()) { 49 | // searching for all the elements. 50 | for (int i = 0; i < N; ++i) { 51 | T *p = (T*) bsearch(&i, a, N, sizeof (T), compare); 52 | benchmark::DoNotOptimize(p); 53 | assert(*p == i); // j is the i-th element in a 54 | } 55 | } 56 | state.SetComplexityN(N); 57 | } 58 | 59 | //static const int MSize = L1; 60 | COMPLEXITY_BENCHMARK_GEN(BM_search_linear, int, MSize); 61 | COMPLEXITY_BENCHMARK_GEN(BM_search_linear, char, MSize); 62 | COMPLEXITY_BENCHMARK_GEN(BM_search_binary, int, MSize); 63 | COMPLEXITY_BENCHMARK_GEN(BM_search_binary, char, MSize); 64 | COMPLEXITY_BENCHMARK_GEN(BM_sort, int, MSize); 65 | COMPLEXITY_BENCHMARK_GEN(BM_sort, char, MSize); 66 | BENCHMARK_MAIN() 67 | -------------------------------------------------------------------------------- /c/math.bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "benchmark/benchmark.h" 5 | #include "test_configs.h" 6 | #include "test_utils.h" 7 | 8 | // Values between -1 and +1 9 | #define BM_atrig(Name) template \ 10 | void BM_##Name(benchmark::State& state) {\ 11 | int N = state.range(0);\ 12 | c_alloc a(N);\ 13 | fill_random(a.get(), a.get()+N, RAND_MAX);\ 14 | while (state.KeepRunning()) {\ 15 | for (int i = 0; i < N; ++i) {\ 16 | T p = Name(a[i]);\ 17 | benchmark::DoNotOptimize(p);\ 18 | }\ 19 | }\ 20 | state.SetComplexityN(N);\ 21 | } 22 | 23 | // Trigonometric functions 24 | BM_unary_real(sin) 25 | BM_unary_real(cos) 26 | BM_unary_real(tan) 27 | BM_unary_real(sinh) 28 | BM_unary_real(cosh) 29 | BM_unary_real(tanh) 30 | BM_unary_real(atan) 31 | BM_atrig(asin) 32 | BM_atrig(acos) 33 | 34 | // Exponential/Logarithmic functions 35 | BM_unary_real(exp) 36 | BM_binary_real(pow) 37 | BM_unary_real(sqrt) 38 | BM_unary_real(log) 39 | BM_unary_real(log10) 40 | BM_binary_real(ldexp) 41 | 42 | static uint32_t digits10(uint64_t v) { 43 | uint32_t result = 0; 44 | do { 45 | ++result; 46 | v /= 10; 47 | } while (v); 48 | return result; 49 | } 50 | 51 | static uint32_t digits10_fast(uint64_t v) { 52 | uint32_t result = 1; 53 | for (;;) { 54 | if (v < 10) return result; 55 | if (v < 100) return result + 1; 56 | if (v < 1000) return result + 2; 57 | if (v < 10000) return result + 3; // Skip ahead by 4 orders of magnitude 58 | v /= 10000; 59 | result += 4; 60 | } 61 | } 62 | 63 | BM_unary_real(digits10) 64 | BM_unary_real(digits10_fast) 65 | 66 | /*void BM_digits10(benchmark::State& state) { 67 | int N = state.range(0); 68 | c_alloc a(N); 69 | fill_random(a.get(), a.get()+N, RAND_MAX); 70 | int i = 0; 71 | while (state.KeepRunning()) { 72 | uint32_t p = digits10(a[i]); 73 | benchmark::DoNotOptimize(p); 74 | if (i++ == N) // re-initialize 75 | i = 0; 76 | } 77 | state.SetComplexityN(N); 78 | }*/ 79 | 80 | //static const int MSize = L1; 81 | #define COMPLEXITY_BENCHMARK_GEN_T(T) \ 82 | COMPLEXITY_BENCHMARK_GEN(BM_sin, T, MSize);\ 83 | COMPLEXITY_BENCHMARK_GEN(BM_cos, T, MSize);\ 84 | COMPLEXITY_BENCHMARK_GEN(BM_tan, T, MSize);\ 85 | COMPLEXITY_BENCHMARK_GEN(BM_sinh, T, MSize);\ 86 | COMPLEXITY_BENCHMARK_GEN(BM_cosh, T, MSize);\ 87 | COMPLEXITY_BENCHMARK_GEN(BM_tanh, T, MSize);\ 88 | COMPLEXITY_BENCHMARK_GEN(BM_asin, T, MSize);\ 89 | COMPLEXITY_BENCHMARK_GEN(BM_acos, T, MSize);\ 90 | COMPLEXITY_BENCHMARK_GEN(BM_atan, T, MSize);\ 91 | COMPLEXITY_BENCHMARK_GEN(BM_exp, T, MSize);\ 92 | COMPLEXITY_BENCHMARK_GEN(BM_pow, T, MSize);\ 93 | COMPLEXITY_BENCHMARK_GEN(BM_sqrt, T, MSize);\ 94 | COMPLEXITY_BENCHMARK_GEN(BM_log, T, MSize);\ 95 | COMPLEXITY_BENCHMARK_GEN(BM_log10, T, MSize);\ 96 | COMPLEXITY_BENCHMARK_GEN(BM_ldexp, T, MSize); 97 | 98 | COMPLEXITY_BENCHMARK_GEN_T(float) 99 | COMPLEXITY_BENCHMARK_GEN_T(double) 100 | 101 | COMPLEXITY_BENCHMARK_GEN(BM_digits10, int, MSize); 102 | COMPLEXITY_BENCHMARK_GEN(BM_digits10_fast, int, MSize); 103 | COMPLEXITY_BENCHMARK_GEN(BM_log10, int, MSize); 104 | 105 | BENCHMARK_MAIN() 106 | -------------------------------------------------------------------------------- /c/string.bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "benchmark/benchmark.h" 4 | #include "test_configs.h" 5 | #include "test_utils.h" 6 | 7 | // Linear search on a sequence 8 | static void BM_strstr(benchmark::State& state) { 9 | int N = state.range(0); 10 | c_alloc s1(N); 11 | c_alloc s2(N/16); 12 | fill_random_chars(s1, s1+N, true); 13 | fill_random_chars(s2, s2+N/16, false); 14 | while (state.KeepRunning()) { 15 | // searching for all the elements. 16 | benchmark::DoNotOptimize(strstr(s1, s2)); 17 | } 18 | state.SetComplexityN(N); 19 | } 20 | 21 | // catenate a character to a string 22 | static void BM_strcat(benchmark::State& state) { 23 | int N = state.range(0); 24 | int s2_sz = 2; 25 | if (N < s2_sz) 26 | return; // invalid test configuration 27 | c_alloc s1(N); 28 | c_alloc s2(s2_sz); 29 | s1[0] = '\0'; 30 | int s1_sz = 0; 31 | fill_random_chars(s2, s2+s2_sz, false); 32 | while (state.KeepRunning()) { 33 | if ((s1_sz + s2_sz) >= N) { 34 | // reset s1 to prevent memory corruption 35 | s1[0] = '\0'; 36 | s1_sz = 0; 37 | } 38 | benchmark::DoNotOptimize(strcat(s1, s2)); 39 | s1_sz += s2_sz; 40 | } 41 | state.SetComplexityN(N); 42 | } 43 | 44 | // Linear search on a sequence 45 | static void BM_strchr(benchmark::State& state) { 46 | int N = state.range(0); 47 | c_alloc s1(N); 48 | char s2 = '1'; 49 | fill_random_chars(s1, s1+N, true); 50 | while (state.KeepRunning()) { 51 | // searching for all the elements. 52 | benchmark::DoNotOptimize(strchr(s1, s2)); 53 | } 54 | state.SetComplexityN(N); 55 | } 56 | 57 | static void BM_strcmp(benchmark::State& state) { 58 | int N = state.range(0); 59 | int s2_sz = N/16; 60 | c_alloc s1(N); 61 | c_alloc s2(s2_sz); 62 | fill_random_chars(s1, s1+N, true); 63 | fill_random_chars(s2, s2+s2_sz, false); 64 | int i = 0; 65 | while (state.KeepRunning()) { 66 | // strcmp at varying positions 67 | benchmark::DoNotOptimize(strcmp(s1+(i++%N), s2)); 68 | } 69 | state.SetComplexityN(N); 70 | } 71 | 72 | static void BM_strcpy(benchmark::State& state) { 73 | int N = state.range(0); 74 | int s2_sz = N/16; 75 | c_alloc s1(N); 76 | c_alloc s2(s2_sz); 77 | fill_random_chars(s1, s1+N, true); 78 | fill_random_chars(s2, s2+s2_sz, false); 79 | int i = 0; 80 | while (state.KeepRunning()) { 81 | // strcpy at varying positions. 82 | const int offset = (i++%N)/2; 83 | benchmark::DoNotOptimize(strcpy(s1 + offset, s2)); 84 | } 85 | state.SetComplexityN(N); 86 | } 87 | 88 | static void BM_strlen(benchmark::State& state) { 89 | int N = state.range(0); 90 | int s2_sz = N/16; 91 | c_alloc s1(N); 92 | c_alloc s2(s2_sz); 93 | fill_random_chars(s1, s1+N, true); 94 | fill_random_chars(s2, s2+s2_sz, false); 95 | int i = 0; 96 | while (state.KeepRunning()) { 97 | // strlen at varying positions. 98 | benchmark::DoNotOptimize(strlen(s1+i++)); 99 | if (i == N) // Reinitialize to avoid segfault. 100 | i = 0; 101 | } 102 | state.SetComplexityN(N); 103 | } 104 | 105 | //static const int MSize = L1; 106 | COMPLEXITY_BENCHMARK(BM_strstr, MSize); 107 | COMPLEXITY_BENCHMARK(BM_strcat, MSize); 108 | COMPLEXITY_BENCHMARK(BM_strchr, MSize); 109 | COMPLEXITY_BENCHMARK(BM_strcmp, MSize); 110 | COMPLEXITY_BENCHMARK(BM_strcpy, MSize); 111 | COMPLEXITY_BENCHMARK(BM_strlen, MSize); 112 | BENCHMARK_MAIN() 113 | -------------------------------------------------------------------------------- /compiler.vs.programmer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB BENCHMARK_TESTS "*.bench.cpp") 2 | foreach(test_path ${BENCHMARK_TESTS}) 3 | get_filename_component(test_file "${test_path}" NAME) 4 | set(target ${test_file}.out) 5 | #EXCLUDE_FROM_ALL 6 | add_executable(${target} ${test_file}) 7 | # shlwapi for MSVC 8 | #target_link_libraries(${target} benchmark -pthread shlwapi) 9 | target_link_libraries(${target} benchmark -pthread) 10 | # TODO: Rename test targets different from executables. 11 | add_test (NAME ${target} COMMAND ${target} --benchmark_format=csv) 12 | set_tests_properties(${target} PROPERTIES LABELS "compiler.vs.programmer") 13 | endforeach() 14 | -------------------------------------------------------------------------------- /compiler.vs.programmer/memory.bench.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark/benchmark.h" 2 | #include "test_configs.h" 3 | #include "test_utils.h" 4 | 5 | #include 6 | 7 | ATTR_NOINLINE 8 | const char* assign(const char *beg, const char *end, char *dest) { 9 | while (beg != end) 10 | *dest++ = *beg++; 11 | return beg; 12 | } 13 | 14 | ATTR_NOINLINE 15 | const char* assign_res(const char * __restrict beg, 16 | const char * __restrict end, 17 | char *__restrict dest) { 18 | while (beg != end) 19 | *dest++ = *beg++; 20 | return beg; 21 | } 22 | 23 | void BM_prog_memcpy(benchmark::State& state) { 24 | const unsigned N = state.range(0); 25 | c_alloc s1(N); 26 | c_alloc s2(N); 27 | memset(s1, '*', N); 28 | while (state.KeepRunning()) { 29 | benchmark::DoNotOptimize(assign(s1.get(), s1.get()+N, s2.get())); 30 | } 31 | state.SetComplexityN(N); 32 | } 33 | 34 | void BM_compiler_memcpy(benchmark::State& state) { 35 | const unsigned N = state.range(0); 36 | c_alloc s1(N); 37 | c_alloc s2(N); 38 | memset(s1, '*', N); 39 | while (state.KeepRunning()) { 40 | benchmark::DoNotOptimize(assign_res(s1.get(), s1.get()+N, s2.get())); 41 | } 42 | state.SetComplexityN(N); 43 | } 44 | 45 | void BM_memcpy(benchmark::State& state) { 46 | const unsigned N = state.range(0); 47 | c_alloc s1(N); 48 | c_alloc s2(N); 49 | memset(s1, '*', N); 50 | while (state.KeepRunning()) { 51 | benchmark::DoNotOptimize(memcpy(s2.get(), s1.get(), N)); 52 | } 53 | state.SetComplexityN(N); 54 | } 55 | 56 | //static const int MSize = L1; 57 | 58 | BASIC_BENCHMARK_TEST(BM_prog_memcpy); 59 | BASIC_BENCHMARK_TEST(BM_compiler_memcpy); 60 | BASIC_BENCHMARK_TEST(BM_memcpy); 61 | 62 | BENCHMARK_MAIN() 63 | 64 | -------------------------------------------------------------------------------- /config.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | :: %HOMEDRIVE% 4 | :: cd %HOMEPATH% 5 | 6 | echo "Calling vcvars" 7 | 8 | :: Initialize environment variables by calling vcvars. Your compiler may have this 9 | :: batch file somewhere else. Please change this appropriately. 10 | call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary/Build/vcvars64.bat" 11 | :: READ README.md for more info 12 | 13 | ::call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary/Build/vcvars32.bat" 14 | 15 | 16 | echo Use full exe names when running under bash, e.g. "msbuild.exe" 17 | echo Loading bash, you may now use git and msbuild in the same console \o/. 18 | 19 | :: "C:\ProgramData\Microsoft\Windows\Start Menu\Programs\Git\Git Bash" --login -i 20 | 21 | :: Invoke git-bash from this environment such that git-bash inherits it. 22 | "C:\Program Files\Git\git-bash.exe" 23 | -------------------------------------------------------------------------------- /cxx/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB BENCHMARK_TESTS "*.bench.cpp") 2 | foreach(test_path ${BENCHMARK_TESTS}) 3 | get_filename_component(test_file "${test_path}" NAME) 4 | set(target ${test_file}.out) 5 | #EXCLUDE_FROM_ALL 6 | add_executable(${target} ${test_file}) 7 | # shlwapi for MSVC 8 | #target_link_libraries(${target} benchmark -pthread shlwapi) 9 | target_link_libraries(${target} benchmark -pthread) 10 | # TODO: Rename test targets different from executables. 11 | add_test (NAME ${target} COMMAND ${target} --benchmark_format=csv) 12 | set_tests_properties(${target} PROPERTIES LABELS "cxx") 13 | endforeach() 14 | -------------------------------------------------------------------------------- /cxx/README: -------------------------------------------------------------------------------- 1 | Benchmarking C++ standard library 2 | 3 | To define your own data type for benchmarking: 4 | Define the class in a header e.g., $PROJECT_HOME/include/test_utils.h 5 | The class should have a T(0) otherwise you have to define init. 6 | The class should have operator++ defined otherwise you have to define increment. 7 | 8 | Define the following if it cannot use the default: 9 | hasher, init>, increment>, 10 | 11 | Call the macro COMPLEXITY_BENCHMARK_GEN_T(T) in all the files. 12 | -------------------------------------------------------------------------------- /cxx/accessors.bench.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark/benchmark.h" 2 | #include "test_configs.h" 3 | #include "test_utils.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | // TODO: get, operator[], count, equal_range, erase, lower_bound, upper_bound 15 | 16 | // See http://info.prelert.com/blog/speed-is-not-the-only-consideration-with-stddeque 17 | // for comparison of vector vs. deque. 18 | 19 | template 20 | void BM_advance(benchmark::State& state) { 21 | int N = state.range(0); 22 | V v(N); 23 | fill_seq(v); 24 | while (state.KeepRunning()) { 25 | for (int i = 0; i < N; ++i) { 26 | auto it = v.begin(); 27 | std::advance(it, i); 28 | benchmark::DoNotOptimize(it); 29 | } 30 | } 31 | state.SetComplexityN(N); 32 | } 33 | 34 | template 35 | void BM_access(benchmark::State& state) { 36 | int N = state.range(0); 37 | V v(N); 38 | fill_seq(v); 39 | while (state.KeepRunning()) { 40 | for (int i = 0; i < N; ++i) { 41 | auto it = v.begin(); 42 | std::advance(it, i); 43 | benchmark::DoNotOptimize(*it); 44 | } 45 | } 46 | state.SetComplexityN(N); 47 | } 48 | 49 | template 50 | void BM_at(benchmark::State& state) { 51 | int N = state.range(0); 52 | V v(N); 53 | fill_seq(v); 54 | while (state.KeepRunning()) { 55 | for (int i = 0; i < N; ++i) 56 | benchmark::DoNotOptimize(v.at(i)); 57 | } 58 | state.SetComplexityN(N); 59 | } 60 | 61 | // Insert random elements 62 | template 63 | void BM_assoc_find_random(benchmark::State& state) { 64 | int N = state.range(0); 65 | using CVT = typename V::value_type; 66 | using VT = typename remove_const::type; 67 | using KT = typename std::remove_const::type; 68 | std::vector temp(N); 69 | fill_random(temp, N); 70 | V v; 71 | random_device r; 72 | for (int i = 0; i < N; ++i) 73 | v.insert(get_rand(r, RAND_MAX)); 74 | while (state.KeepRunning()) { 75 | for (int i = 0; i < N; ++i) 76 | benchmark::DoNotOptimize(v.find(temp[i])); 77 | } 78 | state.SetComplexityN(N); 79 | } 80 | 81 | template 82 | void BM_assoc_find_seq(benchmark::State& state) { 83 | int N = state.range(0); 84 | using CVT = typename V::value_type; 85 | using VT = typename remove_const::type; 86 | using KT = typename std::remove_const::type; 87 | std::vector temp(N); 88 | fill_seq(temp); 89 | V v; 90 | for (int i = 0; i < N; ++i) 91 | v.insert(temp[i]); 92 | while (state.KeepRunning()) { 93 | for (int i = 0; i < N; ++i) { 94 | auto it = v.find(i); 95 | benchmark::DoNotOptimize(it); 96 | assert (it != v.end()); 97 | } 98 | } 99 | state.SetComplexityN(N); 100 | } 101 | 102 | //static const int MSize = L1; 103 | 104 | #define BENCH_STD_MAP(T) SINGLE_ARG(std::map) 105 | #define BENCH_STD_UNORDERED_MAP(T) SINGLE_ARG(std::unordered_map) 106 | 107 | #define COMPLEXITY_BENCHMARK_GEN_T(T) \ 108 | COMPLEXITY_BENCHMARK_GEN(BM_advance, std::vector, MSize);\ 109 | COMPLEXITY_BENCHMARK_GEN(BM_advance, std::list, MSize);\ 110 | COMPLEXITY_BENCHMARK_GEN(BM_advance, std::deque, MSize);\ 111 | COMPLEXITY_BENCHMARK_GEN(BM_access, std::vector, MSize);\ 112 | COMPLEXITY_BENCHMARK_GEN(BM_access, std::list, MSize);\ 113 | COMPLEXITY_BENCHMARK_GEN(BM_access, std::deque, MSize);\ 114 | COMPLEXITY_BENCHMARK_GEN(BM_at, std::vector, MSize);\ 115 | COMPLEXITY_BENCHMARK_GEN(BM_at, std::deque, MSize);\ 116 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_find_random, std::set, MSize);\ 117 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_find_random, std::unordered_set, MSize);\ 118 | \ 119 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_find_random, BENCH_STD_MAP(T), MSize);\ 120 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_find_random, BENCH_STD_UNORDERED_MAP(T), MSize);\ 121 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_find_seq, std::set, MSize);\ 122 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_find_seq, std::unordered_set, MSize);\ 123 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_find_seq, BENCH_STD_MAP(T), MSize);\ 124 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_find_seq, BENCH_STD_UNORDERED_MAP(T), MSize); 125 | 126 | COMPLEXITY_BENCHMARK_GEN_T(int) 127 | COMPLEXITY_BENCHMARK_GEN_T(aggregate) 128 | 129 | BENCHMARK_MAIN() 130 | -------------------------------------------------------------------------------- /cxx/algorithm.bench.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark/benchmark.h" 2 | #include "test_configs.h" 3 | #include "test_utils.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | // TODO: reverse, rotate, shuffle, lower_bound, upper_bound 13 | // uninitialized_fill, uninitialized_copy 14 | // change data type (int, double, class { int, int }, and compare 15 | // algorithms. 16 | 17 | #ifndef NDEBUG 18 | #include 19 | template 20 | std::ostream& operator<<(std::ostream& os,const std::vector& v) 21 | { 22 | for(typename std::vector::const_iterator i = v.begin(); i!= v.end(); ++i) 23 | os<<*i<<' '; 24 | os<< "\n"; 25 | return os; 26 | } 27 | #endif 28 | 29 | 30 | #define START_TIMER auto start = std::chrono::high_resolution_clock::now(); 31 | #define STOP_TIMER auto end = std::chrono::high_resolution_clock::now();\ 32 | auto elapsed_seconds =\ 33 | std::chrono::duration_cast>(\ 34 | end - start);\ 35 | state.SetIterationTime(elapsed_seconds.count()); 36 | 37 | template 38 | void BM_sort_std_common(benchmark::State& state) { 39 | int N = state.range(0); 40 | V v(N); 41 | fill_random(v); 42 | using T = typename V::value_type; 43 | while (state.KeepRunning()) { 44 | std::vector vec(v.begin(), v.end()); 45 | START_TIMER 46 | std::sort(vec.begin(), vec.end()); 47 | STOP_TIMER 48 | } 49 | state.SetComplexityN(N); 50 | } 51 | 52 | template 53 | void BM_sort_std_list_with_vector(benchmark::State& state) { 54 | int N = state.range(0); 55 | V v(N); 56 | fill_random(v); 57 | using T = typename V::value_type; 58 | // Copy the contents into a vector 59 | while (state.KeepRunning()) { 60 | std::vector vec(v.begin(), v.end()); 61 | // Sort the vector 62 | std::sort(vec.begin(), vec.end()); 63 | // Put the item back in the list 64 | v.assign(vec.begin(), vec.end()); 65 | } 66 | state.SetComplexityN(N); 67 | } 68 | 69 | // Sort (a sequence in ascending order) in ascending order. 70 | template 71 | void BM_sort_std_ascending(benchmark::State& state) { 72 | int N = state.range(0); 73 | using T = typename V::value_type; 74 | V v(N); 75 | fill_seq(v); 76 | while (state.KeepRunning()) { 77 | std::vector vec(v.begin(), v.end()); 78 | START_TIMER 79 | std::sort(vec.begin(), vec.end(), std::less()); 80 | STOP_TIMER 81 | } 82 | state.SetComplexityN(N); 83 | } 84 | 85 | // Sort (a sequence in ascending order) in descending order. 86 | template 87 | void BM_sort_std_descending(benchmark::State& state) { 88 | int N = state.range(0); 89 | using T = typename V::value_type; 90 | V v(N); 91 | fill_seq(v); 92 | while (state.KeepRunning()) { 93 | std::vector vec(v.begin(), v.end()); 94 | START_TIMER 95 | std::sort(vec.begin(), vec.end(), std::greater()); 96 | STOP_TIMER 97 | } 98 | state.SetComplexityN(N); 99 | } 100 | 101 | template 102 | void BM_sort_std_worst_quick(benchmark::State& state) { 103 | int N = state.range(0); 104 | using T = typename V::value_type; 105 | V v; 106 | make_killer(N, v); 107 | while (state.KeepRunning()) { 108 | std::vector vec(v.begin(), v.end()); 109 | START_TIMER 110 | std::sort(vec.begin(), vec.end()); 111 | STOP_TIMER 112 | } 113 | state.SetComplexityN(N); 114 | } 115 | 116 | template 117 | void BM_sort_stable(benchmark::State& state) { 118 | int N = state.range(0); 119 | V v(N); 120 | fill_random(v); 121 | while (state.KeepRunning()) { 122 | std::stable_sort(v.begin(), v.end()); 123 | } 124 | state.SetComplexityN(N); 125 | } 126 | 127 | 128 | template 129 | void BM_search_linear(benchmark::State& state) { 130 | int N = state.range(0); 131 | V v(N); 132 | fill_seq(v); 133 | while (state.KeepRunning()) { 134 | // searching for all the elements. 135 | for (int i = 0; i < N; ++i) { 136 | auto j = std::find(v.begin(), v.end(), i); 137 | benchmark::DoNotOptimize(j); 138 | assert(std::distance(v.begin(), j) == i); // j is the i-th element in v 139 | } 140 | } 141 | state.SetComplexityN(N); 142 | } 143 | 144 | template 145 | void BM_search_binary(benchmark::State& state) { 146 | int N = state.range(0); 147 | V v(N); 148 | fill_seq(v); 149 | while (state.KeepRunning()) { 150 | // searching for all the elements. 151 | for (int i = 0; i < N; ++i) { 152 | auto j = std::lower_bound(v.begin(), v.end(), i); 153 | benchmark::DoNotOptimize(j); 154 | assert(std::distance(v.begin(), j) == i); // j is the i-th element in v 155 | } 156 | } 157 | state.SetComplexityN(N); 158 | } 159 | 160 | //static const int MSize = L1; 161 | 162 | #define COMPLEXITY_BENCHMARK_GEN_T(T) \ 163 | COMPLEXITY_BENCHMARK_GEN(BM_search_linear, std::vector, MSize); \ 164 | COMPLEXITY_BENCHMARK_GEN(BM_search_linear, std::list, MSize); \ 165 | COMPLEXITY_BENCHMARK_GEN(BM_search_linear, std::deque, MSize);\ 166 | COMPLEXITY_BENCHMARK_GEN(BM_search_binary, std::vector, MSize);\ 167 | COMPLEXITY_BENCHMARK_GEN(BM_search_binary, std::list, MSize);\ 168 | COMPLEXITY_BENCHMARK_GEN(BM_search_binary, std::deque, MSize);\ 169 | COMPLEXITY_BENCHMARK_GEN(BM_sort_std_common, std::vector, MSize);\ 170 | COMPLEXITY_BENCHMARK_GEN(BM_sort_std_ascending, std::vector, MSize);\ 171 | COMPLEXITY_BENCHMARK_GEN(BM_sort_std_descending, std::vector, MSize);\ 172 | COMPLEXITY_BENCHMARK_GEN(BM_sort_stable, std::vector, MSize); 173 | 174 | COMPLEXITY_BENCHMARK_GEN_T(int) 175 | //COMPLEXITY_BENCHMARK_GEN_T(double) 176 | COMPLEXITY_BENCHMARK_GEN_T(aggregate) 177 | 178 | COMPLEXITY_BENCHMARK_GEN(BM_sort_std_list_with_vector, std::list, MSize); 179 | COMPLEXITY_BENCHMARK_GEN(BM_sort_std_list_with_vector, std::list, MSize); 180 | COMPLEXITY_BENCHMARK_GEN(BM_sort_std_worst_quick, std::vector, MSize); 181 | 182 | BENCHMARK_MAIN() 183 | -------------------------------------------------------------------------------- /cxx/getline.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | // Adapted from http://info.prelert.com/blog/stdgetline-is-the-poor-relation 8 | 9 | void populateVec_getline(std::vector &vec) 10 | { 11 | vec.clear(); 12 | std::ifstream dataStrm("farequote.csv"); 13 | std::string line; 14 | while (std::getline(dataStrm, line)) 15 | { 16 | vec.push_back(std::move(line)); 17 | } 18 | } 19 | 20 | void splitUp(const char *begin, 21 | const char *end, 22 | std::vector &vec, 23 | bool &continueLast) 24 | { 25 | while (begin != end) 26 | { 27 | const char *eol = std::find(begin, end, '\n'); 28 | std::size_t len = eol - begin; 29 | if (continueLast) 30 | { 31 | vec.back().append(begin, len); 32 | } 33 | else 34 | { 35 | vec.push_back(std::string(begin, len)); 36 | } 37 | if (eol == end) 38 | { 39 | continueLast = true; 40 | break; 41 | } 42 | continueLast = false; 43 | begin = eol + 1; 44 | } 45 | } 46 | 47 | void populateVec_split(std::vector &vec) 48 | { 49 | vec.clear(); 50 | std::ifstream dataStrm("farequote.csv"); 51 | static const std::streamsize bufSize(256); 52 | std::vector workBuf(bufSize); 53 | bool continueLast(false); 54 | while (dataStrm.read(&workBuf[0], bufSize)) 55 | { 56 | splitUp(&workBuf[0], &workBuf[dataStrm.gcount()], vec, continueLast); 57 | } 58 | if (!dataStrm.bad()) // gcount() can't be relied on if stream is bad 59 | { 60 | splitUp(&workBuf[0], &workBuf[dataStrm.gcount()], vec, continueLast); 61 | } 62 | } 63 | 64 | int main(int, char **) 65 | { 66 | { 67 | std::vector fileContent; 68 | for (std::size_t count = 0; count < 1000; ++count) 69 | { 70 | populateVec_getline(fileContent); 71 | } 72 | std::cout << fileContent.size() << std::endl; 73 | } 74 | { 75 | std::vector fileContent; 76 | for (std::size_t count = 0; count < 1000; ++count) 77 | { 78 | populateVec_split(fileContent); 79 | } 80 | std::cout << fileContent.size() << std::endl; 81 | } 82 | return 0; 83 | } 84 | -------------------------------------------------------------------------------- /cxx/max.cpp: -------------------------------------------------------------------------------- 1 | #include // std::max 2 | #include 3 | #include 4 | #include // std::fmax 5 | #include 6 | #include 7 | 8 | #ifndef FUNC 9 | #error Define FUNC to be either max or fmax! 10 | #endif 11 | 12 | int 13 | main(int argc, char *argv[]) { 14 | size_t static const numEntries = 30000; 15 | size_t static const runs = 10; 16 | 17 | if (argc != 2) { 18 | std::cout << "error: Please pass an integer to the executable" 19 | << std::endl; 20 | std::exit(1); 21 | } 22 | // Make sure the loop cannot be optimised away 23 | int const arg = std::stoi(argv[1]); 24 | 25 | for (size_t k = 0; k < runs; ++k) { 26 | double res = 0; 27 | srand(arg + k); 28 | std::array a; 29 | for (size_t i=0; i < numEntries; ++i) { 30 | a[i] = (double)rand() / RAND_MAX; 31 | } 32 | 33 | clock_t const begin = clock(); 34 | double y = 0; 35 | for (size_t i = k; i < numEntries - runs + k; ++i) { 36 | double x = 0; 37 | for (size_t j = 0; j <= i; j+=arg) 38 | x = std::FUNC(x, a[j]); 39 | y += x; 40 | } 41 | clock_t const end = clock(); 42 | 43 | double const elapsed_secs = double(end - begin) / CLOCKS_PER_SEC; 44 | std::cout << elapsed_secs << " (result = " << y << ")" << std::endl; 45 | } 46 | } 47 | 48 | -------------------------------------------------------------------------------- /cxx/mutators.bench.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark/benchmark.h" 2 | #include "test_configs.h" 3 | #include "test_utils.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | template 16 | void BM_write_seq(benchmark::State& state) { 17 | int N = state.range(0); 18 | V v(N); 19 | while (state.KeepRunning()) { 20 | fill_seq(v); 21 | benchmark::DoNotOptimize(v); 22 | } 23 | state.SetComplexityN(N); 24 | } 25 | 26 | template 27 | void BM_push_back(benchmark::State& state) { 28 | int N = state.range(0); 29 | V v; 30 | while (state.KeepRunning()) { 31 | for (int i = 0; i < N; ++i) 32 | v.push_back(i); 33 | } 34 | state.SetComplexityN(N); 35 | } 36 | 37 | template 38 | void BM_push_back_resize(benchmark::State& state) { 39 | int N = state.range(0); 40 | V v(N); 41 | while (state.KeepRunning()) { 42 | for (int i = 0; i < N; ++i) 43 | v.push_back(i); 44 | } 45 | state.SetComplexityN(N); 46 | } 47 | 48 | template 49 | void BM_push_back_vector_reserve(benchmark::State& state) { 50 | int N = state.range(0); 51 | V v; 52 | v.reserve(N); 53 | while (state.KeepRunning()) { 54 | for (int i = 0; i < N; ++i) 55 | v.push_back(i); 56 | } 57 | state.SetComplexityN(N); 58 | } 59 | 60 | template 61 | void BM_insert_begin(benchmark::State& state) { 62 | int N = state.range(0); 63 | V v(N, 1); 64 | auto val = *v.begin(); 65 | while (state.KeepRunning()) { 66 | v.insert(v.begin(), val); 67 | } 68 | state.SetComplexityN(N); 69 | } 70 | 71 | // Disable this for now for travis CI to pass until I figure out 72 | // a way to use latest compiler on travis CI. 73 | #if 0 && __cplusplus >= 201402L 74 | template 75 | void BM_insert_middle(benchmark::State& state) { 76 | int N = state.range(0); 77 | using namespace std; 78 | using v_iterator = typename iterator_traits::iterator_category; 79 | V v(N, 1); 80 | auto val = *v.begin(); 81 | auto pos = std::next(v.begin(), N/2); 82 | while (state.KeepRunning()) { 83 | if constexpr(is_same::value) 84 | pos = std::next(v.begin(), N/2); 85 | v.insert(pos, val); 86 | } 87 | state.SetComplexityN(N); 88 | } 89 | #endif 90 | 91 | // Insert random elements 92 | template 93 | void BM_assoc_insert_random(benchmark::State& state) { 94 | int N = state.range(0); 95 | using CVT = typename V::value_type; 96 | using VT = typename remove_const::type; 97 | std::vector temp(N*1000); 98 | fill_random(temp); 99 | V v; 100 | auto it = temp.begin(); 101 | while (state.KeepRunning()) { 102 | v.insert(*it++); 103 | if (it == temp.end()) // FIXME: After temp.end insert will just return. 104 | assert(0);//it = temp.begin(); 105 | } 106 | state.SetComplexityN(N); 107 | } 108 | 109 | // Insert random elements 110 | template 111 | void BM_assoc_insert_seq(benchmark::State& state) { 112 | int N = state.range(0); 113 | using CVT = typename V::value_type; 114 | using VT = typename remove_const::type; 115 | std::vector temp(N); 116 | fill_seq(temp); 117 | V v; 118 | auto it = temp.begin(); 119 | while (state.KeepRunning()) { 120 | v.insert(*it++); 121 | if (it == temp.end()) // FIXME: After temp.end insert will just return. 122 | it = temp.begin(); 123 | } 124 | state.SetComplexityN(N); 125 | } 126 | 127 | // Insert same element over and over. 128 | template 129 | void BM_assoc_insert(benchmark::State& state) { 130 | int N = state.range(0); 131 | using CVT = typename V::value_type; 132 | using VT = typename remove_const::type; 133 | random_device r; 134 | VT temp = get_rand(r, N); 135 | V v; 136 | while (state.KeepRunning()) { 137 | v.insert(temp); 138 | } 139 | state.SetComplexityN(N); 140 | } 141 | 142 | /*/ Base case. 143 | template 144 | void 145 | check () 146 | { 147 | } 148 | 149 | template 150 | void 151 | check () 152 | { 153 | COMPLEXITY_BENCHMARK_GEN(T, container, MSize); 154 | // Check rest of the properties. 155 | check(); 156 | } 157 | 158 | // All the sequence testing functors which take one argument. 159 | #define CONTAINERS std::vector, std::list 160 | 161 | check ();*/ 162 | 163 | 164 | //std::forward_list does not have push_back :( 165 | //static const int MSize = L1; 166 | 167 | #define BENCH_STD_MAP(T) SINGLE_ARG(std::map) 168 | #define BENCH_STD_UNORDERED_MAP(T) SINGLE_ARG(std::unordered_map) 169 | 170 | #define COMPLEXITY_BENCHMARK_GEN_T(T) \ 171 | COMPLEXITY_BENCHMARK_GEN(BM_write_seq, std::vector, MSize);\ 172 | COMPLEXITY_BENCHMARK_GEN(BM_write_seq, std::list, MSize);\ 173 | COMPLEXITY_BENCHMARK_GEN(BM_write_seq, std::deque, MSize);\ 174 | COMPLEXITY_BENCHMARK_GEN(BM_push_back, std::vector, MSize);\ 175 | COMPLEXITY_BENCHMARK_GEN(BM_push_back, std::list, MSize);\ 176 | COMPLEXITY_BENCHMARK_GEN(BM_push_back, std::deque, MSize);\ 177 | COMPLEXITY_BENCHMARK_GEN(BM_push_back_resize, std::vector, MSize);\ 178 | COMPLEXITY_BENCHMARK_GEN(BM_push_back_resize, std::list, MSize);\ 179 | COMPLEXITY_BENCHMARK_GEN(BM_push_back_resize, std::deque, MSize);\ 180 | COMPLEXITY_BENCHMARK_GEN(BM_push_back_vector_reserve, std::vector, MSize);\ 181 | COMPLEXITY_BENCHMARK_GEN(BM_insert_begin, std::vector, MSize);\ 182 | COMPLEXITY_BENCHMARK_GEN(BM_insert_begin, std::list, MSize);\ 183 | COMPLEXITY_BENCHMARK_GEN(BM_insert_begin, std::deque, MSize);\ 184 | \ 185 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert, std::set, MSize);\ 186 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert_random, std::set, MSize);\ 187 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert_seq, std::set, MSize);\ 188 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert, std::unordered_set, MSize);\ 189 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert_random, std::unordered_set, MSize);\ 190 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert_seq, std::unordered_set, MSize);\ 191 | \ 192 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert, BENCH_STD_MAP(T), MSize);\ 193 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert_random, BENCH_STD_MAP(T), MSize);\ 194 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert_seq, BENCH_STD_MAP(T), MSize);\ 195 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert, BENCH_STD_UNORDERED_MAP(T), MSize);\ 196 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert_random, BENCH_STD_UNORDERED_MAP(T), MSize);\ 197 | COMPLEXITY_BENCHMARK_GEN(BM_assoc_insert_seq, BENCH_STD_UNORDERED_MAP(T), MSize); 198 | 199 | COMPLEXITY_BENCHMARK_GEN_T(int) 200 | COMPLEXITY_BENCHMARK_GEN_T(aggregate) 201 | 202 | #if 0 && __cplusplus >= 201402L 203 | #define COMPLEXITY_BENCHMARK_GEN_T_14(T) \ 204 | COMPLEXITY_BENCHMARK_GEN(BM_insert_middle, std::vector, MSize);\ 205 | COMPLEXITY_BENCHMARK_GEN(BM_insert_middle, std::list, MSize);\ 206 | COMPLEXITY_BENCHMARK_GEN(BM_insert_middle, std::deque, MSize); 207 | COMPLEXITY_BENCHMARK_GEN_T_14(int) 208 | COMPLEXITY_BENCHMARK_GEN_T_14(aggregate) 209 | #endif 210 | 211 | BENCHMARK_MAIN() 212 | -------------------------------------------------------------------------------- /cxx/size_metric.bench.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define GETNAME(T) #T 11 | 12 | template 13 | void print_sizeof(const T& t, const char* N) { 14 | std::cout << "\nSizeof, " << N << ", " << sizeof(t); 15 | } 16 | 17 | int main() { 18 | print_sizeof(std::deque(), GETNAME(std::deque())); 19 | print_sizeof(std::list(), GETNAME(std::list())); 20 | print_sizeof(std::vector(), GETNAME(std::vector())); 21 | print_sizeof(std::set(), GETNAME(std::set())); 22 | print_sizeof(std::unordered_set(), GETNAME(std::unordered_set())); 23 | print_sizeof(std::map(), GETNAME((std::map()))); 24 | print_sizeof(std::unordered_map(), GETNAME((std::unordered_map()))); 25 | return 0; 26 | } 27 | -------------------------------------------------------------------------------- /cxx/stable_sort.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | static int cnt = 0; 6 | 7 | bool cmpLess(int a, int b) 8 | { 9 | ++cnt; 10 | return a < b; 11 | } 12 | 13 | bool cmpGt(int a, int b) 14 | { 15 | ++cnt; 16 | return a > b; 17 | } 18 | 19 | int main() 20 | { 21 | int A[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 }; 22 | 23 | const int N = sizeof(A) / sizeof(int); 24 | const int logN = 3; // ln(N) rounded up 25 | 26 | std::stable_sort(A, A + N, cmpLess); 27 | printf("cmpLess=%d\n", cnt); 28 | assert(cnt <= N * logN * logN); 29 | 30 | //reset cnt 31 | cnt = 0; 32 | 33 | std::stable_sort(A, A + N, cmpGt); 34 | printf("cmpGt=%d\n", cnt); 35 | assert(cnt <= N * logN * logN); 36 | } 37 | 38 | -------------------------------------------------------------------------------- /cxx/str.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | void foo(); 3 | 4 | // Test to find if the destructor of std::string is optimized away or not. 5 | // Currently g++ does not optimize the first one and clang++ does not optimize 6 | // the second one. 7 | // You have to compile with -O3 str.cpp -S -o - -fno-exceptions 8 | // Enable -stdlib=libc++ for clang++ 9 | // g++ version: 10 | // commit 7f22a985951e8c33ee6ecb98219fec1f048f5ef1 11 | // Author: gccadmin 12 | // Date: Fri Apr 21 00:16:27 2017 +0000 13 | // 14 | // Daily bump. 15 | // 16 | // clang++ version (llvm-project) SHA: 17 | // commit fdda8da9f92db73bf1058ee2bead192fb9f1321b 18 | // Author: Eric Fiselier 19 | // Date: Fri Apr 21 01:48:02 2017 +0000 20 | // 21 | // XFAIL Windows test failures under test/libcxx 22 | // 23 | // This patch XFAIL's a number of tests under test/libcxx when on Windows. 24 | // These failures need more investigation or patches to either Clang or libc++ 25 | // but for now we don't want them to prevent the bot from going green. 26 | 27 | 28 | // http://info.prelert.com/blog/cpp-stdstring-implementations 29 | // print sizeof std::string empty. Discuss empty base optimization 30 | // after reading the code. 31 | 32 | int t1() { 33 | std::string s("a"); 34 | foo(); 35 | return 0; 36 | } 37 | 38 | int t2() { 39 | std::string s("a"); 40 | s+='a'; 41 | return 0; 42 | } 43 | 44 | // g++ -O3 -S -o a.s ../a.cpp -fdump-tree-all-all 45 | // g++ does not inline the string::find function 46 | int foo1(const std::string &s1, const std::string &s2, int i) { 47 | bool ret = false; 48 | for (int k = 0; k < i; ++k) 49 | ret |= s1.find(s2) == k; 50 | return ret; 51 | } 52 | -------------------------------------------------------------------------------- /cxx/str.s: -------------------------------------------------------------------------------- 1 | 2 | str.obj: file format pe-x86-64 3 | 4 | compiled with /Ob2 /Fo str.cpp 5 | 6 | Disassembly of section .text$mn: 7 | 8 | 0000000000000000 : 9 | 0: 48 83 ec 58 sub $0x58,%rsp 10 | 4: 48 8b 05 00 00 00 00 mov 0x0(%rip),%rax # b 11 | b: 48 33 c4 xor %rsp,%rax 12 | e: 48 89 44 24 48 mov %rax,0x48(%rsp) 13 | 13: 48 8d 15 00 00 00 00 lea 0x0(%rip),%rdx # 1a 14 | 1a: 48 8d 4c 24 28 lea 0x28(%rsp),%rcx 15 | 1f: e8 00 00 00 00 callq 24 16 | 24: e8 00 00 00 00 callq 29 17 | 29: c7 44 24 20 00 00 00 movl $0x0,0x20(%rsp) 18 | 30: 00 19 | 31: 48 8d 4c 24 28 lea 0x28(%rsp),%rcx 20 | 36: e8 00 00 00 00 callq 3b 21 | 3b: 8b 44 24 20 mov 0x20(%rsp),%eax 22 | 3f: 48 8b 4c 24 48 mov 0x48(%rsp),%rcx 23 | 44: 48 33 cc xor %rsp,%rcx 24 | 47: e8 00 00 00 00 callq 4c 25 | 4c: 48 83 c4 58 add $0x58,%rsp 26 | 50: c3 retq 27 | 51: cc int3 28 | 52: cc int3 29 | 53: cc int3 30 | 54: cc int3 31 | 55: cc int3 32 | 56: cc int3 33 | 57: cc int3 34 | 58: cc int3 35 | 59: cc int3 36 | 5a: cc int3 37 | 5b: cc int3 38 | 5c: cc int3 39 | 5d: cc int3 40 | 5e: cc int3 41 | 5f: cc int3 42 | 43 | 0000000000000060 : 44 | 60: 48 83 ec 58 sub $0x58,%rsp 45 | 64: 48 8b 05 00 00 00 00 mov 0x0(%rip),%rax # 6b 46 | 6b: 48 33 c4 xor %rsp,%rax 47 | 6e: 48 89 44 24 48 mov %rax,0x48(%rsp) 48 | 73: 48 8d 15 00 00 00 00 lea 0x0(%rip),%rdx # 7a 49 | 7a: 48 8d 4c 24 28 lea 0x28(%rsp),%rcx 50 | 7f: e8 00 00 00 00 callq 84 51 | 84: b2 61 mov $0x61,%dl 52 | 86: 48 8d 4c 24 28 lea 0x28(%rsp),%rcx 53 | 8b: e8 00 00 00 00 callq 90 54 | 90: c7 44 24 20 00 00 00 movl $0x0,0x20(%rsp) 55 | 97: 00 56 | 98: 48 8d 4c 24 28 lea 0x28(%rsp),%rcx 57 | 9d: e8 00 00 00 00 callq a2 58 | a2: 8b 44 24 20 mov 0x20(%rsp),%eax 59 | a6: 48 8b 4c 24 48 mov 0x48(%rsp),%rcx 60 | ab: 48 33 cc xor %rsp,%rcx 61 | ae: e8 00 00 00 00 callq b3 62 | b3: 48 83 c4 58 add $0x58,%rsp 63 | b7: c3 retq 64 | b8: cc int3 65 | b9: cc int3 66 | ba: cc int3 67 | bb: cc int3 68 | bc: cc int3 69 | bd: cc int3 70 | be: cc int3 71 | bf: cc int3 72 | 73 | 00000000000000c0 : 74 | c0: 44 89 44 24 18 mov %r8d,0x18(%rsp) 75 | c5: 48 89 54 24 10 mov %rdx,0x10(%rsp) 76 | ca: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 77 | cf: 48 83 ec 38 sub $0x38,%rsp 78 | d3: c6 44 24 20 00 movb $0x0,0x20(%rsp) 79 | d8: c7 44 24 24 00 00 00 movl $0x0,0x24(%rsp) 80 | df: 00 81 | e0: eb 0a jmp ec 82 | e2: 8b 44 24 24 mov 0x24(%rsp),%eax 83 | e6: ff c0 inc %eax 84 | e8: 89 44 24 24 mov %eax,0x24(%rsp) 85 | ec: 8b 44 24 50 mov 0x50(%rsp),%eax 86 | f0: 39 44 24 24 cmp %eax,0x24(%rsp) 87 | f4: 7d 3d jge 133 88 | f6: 45 33 c0 xor %r8d,%r8d 89 | f9: 48 8b 54 24 48 mov 0x48(%rsp),%rdx 90 | fe: 48 8b 4c 24 40 mov 0x40(%rsp),%rcx 91 | 103: e8 00 00 00 00 callq 108 92 | 108: 48 63 4c 24 24 movslq 0x24(%rsp),%rcx 93 | 10d: 48 3b c1 cmp %rcx,%rax 94 | 110: 75 0a jne 11c 95 | 112: c7 44 24 28 01 00 00 movl $0x1,0x28(%rsp) 96 | 119: 00 97 | 11a: eb 08 jmp 124 98 | 11c: c7 44 24 28 00 00 00 movl $0x0,0x28(%rsp) 99 | 123: 00 100 | 124: 0f b6 44 24 20 movzbl 0x20(%rsp),%eax 101 | 129: 0b 44 24 28 or 0x28(%rsp),%eax 102 | 12d: 88 44 24 20 mov %al,0x20(%rsp) 103 | 131: eb af jmp e2 104 | 133: 0f b6 44 24 20 movzbl 0x20(%rsp),%eax 105 | 138: 48 83 c4 38 add $0x38,%rsp 106 | 13c: c3 retq 107 | 108 | Disassembly of section .text$mn: 109 | 110 | 0000000000000000 : 111 | 0: 4c 89 4c 24 20 mov %r9,0x20(%rsp) 112 | 5: 4c 89 44 24 18 mov %r8,0x18(%rsp) 113 | a: 48 89 54 24 10 mov %rdx,0x10(%rsp) 114 | f: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 115 | 14: 48 83 ec 38 sub $0x38,%rsp 116 | 18: 48 8b 44 24 48 mov 0x48(%rsp),%rax 117 | 1d: 48 39 44 24 60 cmp %rax,0x60(%rsp) 118 | 22: 77 17 ja 3b 119 | 24: 48 8b 44 24 60 mov 0x60(%rsp),%rax 120 | 29: 48 8b 4c 24 48 mov 0x48(%rsp),%rcx 121 | 2e: 48 2b c8 sub %rax,%rcx 122 | 31: 48 8b c1 mov %rcx,%rax 123 | 34: 48 39 44 24 50 cmp %rax,0x50(%rsp) 124 | 39: 76 0c jbe 47 125 | 3b: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 126 | 42: e9 be 00 00 00 jmpq 105 127 | 47: 48 83 7c 24 60 00 cmpq $0x0,0x60(%rsp) 128 | 4d: 75 0a jne 59 129 | 4f: 48 8b 44 24 50 mov 0x50(%rsp),%rax 130 | 54: e9 ac 00 00 00 jmpq 105 131 | 59: 48 8b 44 24 60 mov 0x60(%rsp),%rax 132 | 5e: 48 8b 4c 24 48 mov 0x48(%rsp),%rcx 133 | 63: 48 2b c8 sub %rax,%rcx 134 | 66: 48 8b c1 mov %rcx,%rax 135 | 69: 48 8b 4c 24 40 mov 0x40(%rsp),%rcx 136 | 6e: 48 8d 44 01 01 lea 0x1(%rcx,%rax,1),%rax 137 | 73: 48 89 44 24 28 mov %rax,0x28(%rsp) 138 | 78: 48 8b 44 24 50 mov 0x50(%rsp),%rax 139 | 7d: 48 8b 4c 24 40 mov 0x40(%rsp),%rcx 140 | 82: 48 03 c8 add %rax,%rcx 141 | 85: 48 8b c1 mov %rcx,%rax 142 | 88: 48 89 44 24 20 mov %rax,0x20(%rsp) 143 | 8d: eb 0d jmp 9c 144 | 8f: 48 8b 44 24 20 mov 0x20(%rsp),%rax 145 | 94: 48 ff c0 inc %rax 146 | 97: 48 89 44 24 20 mov %rax,0x20(%rsp) 147 | 9c: 48 8b 44 24 20 mov 0x20(%rsp),%rax 148 | a1: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx 149 | a6: 48 2b c8 sub %rax,%rcx 150 | a9: 48 8b c1 mov %rcx,%rax 151 | ac: 48 8b 4c 24 58 mov 0x58(%rsp),%rcx 152 | b1: 0f be 09 movsbl (%rcx),%ecx 153 | b4: 4c 8b c0 mov %rax,%r8 154 | b7: 8b d1 mov %ecx,%edx 155 | b9: 48 8b 4c 24 20 mov 0x20(%rsp),%rcx 156 | be: e8 00 00 00 00 callq c3 157 | c3: 48 89 44 24 20 mov %rax,0x20(%rsp) 158 | c8: 48 83 7c 24 20 00 cmpq $0x0,0x20(%rsp) 159 | ce: 75 09 jne d9 160 | d0: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 161 | d7: eb 2c jmp 105 162 | d9: 4c 8b 44 24 60 mov 0x60(%rsp),%r8 163 | de: 48 8b 54 24 58 mov 0x58(%rsp),%rdx 164 | e3: 48 8b 4c 24 20 mov 0x20(%rsp),%rcx 165 | e8: e8 00 00 00 00 callq ed 166 | ed: 85 c0 test %eax,%eax 167 | ef: 75 12 jne 103 168 | f1: 48 8b 44 24 40 mov 0x40(%rsp),%rax 169 | f6: 48 8b 4c 24 20 mov 0x20(%rsp),%rcx 170 | fb: 48 2b c8 sub %rax,%rcx 171 | fe: 48 8b c1 mov %rcx,%rax 172 | 101: eb 02 jmp 105 173 | 103: eb 8a jmp 8f 174 | 105: 48 83 c4 38 add $0x38,%rsp 175 | 109: c3 retq 176 | 177 | Disassembly of section .text$mn: 178 | 179 | 0000000000000000 : 180 | 0: 48 89 54 24 10 mov %rdx,0x10(%rsp) 181 | 5: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 182 | a: 48 83 ec 48 sub $0x48,%rsp 183 | e: 48 8b 44 24 50 mov 0x50(%rsp),%rax 184 | 13: 48 89 44 24 28 mov %rax,0x28(%rsp) 185 | 18: 48 8b 44 24 28 mov 0x28(%rsp),%rax 186 | 1d: 48 89 44 24 20 mov %rax,0x20(%rsp) 187 | 22: 48 8b 44 24 20 mov 0x20(%rsp),%rax 188 | 27: 48 c7 40 10 00 00 00 movq $0x0,0x10(%rax) 189 | 2e: 00 190 | 2f: 48 8b 44 24 20 mov 0x20(%rsp),%rax 191 | 34: 48 c7 40 18 00 00 00 movq $0x0,0x18(%rax) 192 | 3b: 00 193 | 3c: 48 8b 4c 24 50 mov 0x50(%rsp),%rcx 194 | 41: e8 00 00 00 00 callq 46 195 | 46: 48 8b 4c 24 58 mov 0x58(%rsp),%rcx 196 | 4b: e8 00 00 00 00 callq 50 197 | 50: 48 89 44 24 30 mov %rax,0x30(%rsp) 198 | 55: 4c 8b 44 24 30 mov 0x30(%rsp),%r8 199 | 5a: 48 8b 54 24 58 mov 0x58(%rsp),%rdx 200 | 5f: 48 8b 4c 24 50 mov 0x50(%rsp),%rcx 201 | 64: e8 00 00 00 00 callq 69 202 | 69: 48 8b 44 24 50 mov 0x50(%rsp),%rax 203 | 6e: 48 83 c4 48 add $0x48,%rsp 204 | 72: c3 retq 205 | 206 | Disassembly of section .text$mn: 207 | 208 | 0000000000000000 : 209 | 0: 44 88 44 24 18 mov %r8b,0x18(%rsp) 210 | 5: 48 89 54 24 10 mov %rdx,0x10(%rsp) 211 | a: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 212 | f: 48 83 ec 48 sub $0x48,%rsp 213 | 13: 48 c7 44 24 20 00 00 movq $0x0,0x20(%rsp) 214 | 1a: 00 00 215 | 1c: 48 83 7c 24 50 00 cmpq $0x0,0x50(%rsp) 216 | 22: 75 0a jne 2e 217 | 24: 48 8b 44 24 20 mov 0x20(%rsp),%rax 218 | 29: e9 c5 00 00 00 jmpq f3 219 | 2e: 33 d2 xor %edx,%edx 220 | 30: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 221 | 37: 48 f7 74 24 58 divq 0x58(%rsp) 222 | 3c: 48 3b 44 24 50 cmp 0x50(%rsp),%rax 223 | 41: 73 05 jae 48 224 | 43: e8 00 00 00 00 callq 48 225 | 48: 48 8b 44 24 50 mov 0x50(%rsp),%rax 226 | 4d: 48 0f af 44 24 58 imul 0x58(%rsp),%rax 227 | 53: 48 89 44 24 28 mov %rax,0x28(%rsp) 228 | 58: 0f b6 44 24 60 movzbl 0x60(%rsp),%eax 229 | 5d: 85 c0 test %eax,%eax 230 | 5f: 74 71 je d2 231 | 61: 48 81 7c 24 28 00 10 cmpq $0x1000,0x28(%rsp) 232 | 68: 00 00 233 | 6a: 72 66 jb d2 234 | 6c: 48 8b 44 24 28 mov 0x28(%rsp),%rax 235 | 71: 48 83 c0 27 add $0x27,%rax 236 | 75: 48 89 44 24 38 mov %rax,0x38(%rsp) 237 | 7a: 48 8b 44 24 28 mov 0x28(%rsp),%rax 238 | 7f: 48 39 44 24 38 cmp %rax,0x38(%rsp) 239 | 84: 77 05 ja 8b 240 | 86: e8 00 00 00 00 callq 8b 241 | 8b: 48 8b 4c 24 38 mov 0x38(%rsp),%rcx 242 | 90: e8 00 00 00 00 callq 95 243 | 95: 48 89 44 24 30 mov %rax,0x30(%rsp) 244 | 9a: 48 83 7c 24 30 00 cmpq $0x0,0x30(%rsp) 245 | a0: 75 05 jne a7 246 | a2: e8 00 00 00 00 callq a7 247 | a7: 48 8b 44 24 30 mov 0x30(%rsp),%rax 248 | ac: 48 83 c0 27 add $0x27,%rax 249 | b0: 48 83 e0 e0 and $0xffffffffffffffe0,%rax 250 | b4: 48 89 44 24 20 mov %rax,0x20(%rsp) 251 | b9: b8 08 00 00 00 mov $0x8,%eax 252 | be: 48 6b c0 ff imul $0xffffffffffffffff,%rax,%rax 253 | c2: 48 8b 4c 24 20 mov 0x20(%rsp),%rcx 254 | c7: 48 8b 54 24 30 mov 0x30(%rsp),%rdx 255 | cc: 48 89 14 01 mov %rdx,(%rcx,%rax,1) 256 | d0: eb 1c jmp ee 257 | d2: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx 258 | d7: e8 00 00 00 00 callq dc 259 | dc: 48 89 44 24 20 mov %rax,0x20(%rsp) 260 | e1: 48 83 7c 24 20 00 cmpq $0x0,0x20(%rsp) 261 | e7: 75 05 jne ee 262 | e9: e8 00 00 00 00 callq ee 263 | ee: 48 8b 44 24 20 mov 0x20(%rsp),%rax 264 | f3: 48 83 c4 48 add $0x48,%rsp 265 | f7: c3 retq 266 | 267 | Disassembly of section .text$mn: 268 | 269 | 0000000000000000 : 270 | 0: 48 89 54 24 10 mov %rdx,0x10(%rsp) 271 | 5: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 272 | a: 48 81 ec c8 00 00 00 sub $0xc8,%rsp 273 | 11: 48 c7 84 24 a8 00 00 movq $0xfffffffffffffffe,0xa8(%rsp) 274 | 18: 00 fe ff ff ff 275 | 1d: 48 c7 44 24 58 ff ff movq $0xffffffffffffffff,0x58(%rsp) 276 | 24: ff ff 277 | 26: 48 83 7c 24 58 01 cmpq $0x1,0x58(%rsp) 278 | 2c: 77 0b ja 39 279 | 2e: 48 c7 44 24 60 01 00 movq $0x1,0x60(%rsp) 280 | 35: 00 00 281 | 37: eb 0d jmp 46 282 | 39: 48 8b 44 24 58 mov 0x58(%rsp),%rax 283 | 3e: 48 ff c8 dec %rax 284 | 41: 48 89 44 24 60 mov %rax,0x60(%rsp) 285 | 46: 48 8b 44 24 60 mov 0x60(%rsp),%rax 286 | 4b: 48 89 44 24 48 mov %rax,0x48(%rsp) 287 | 50: 48 8b 44 24 48 mov 0x48(%rsp),%rax 288 | 55: 48 39 84 24 d8 00 00 cmp %rax,0xd8(%rsp) 289 | 5c: 00 290 | 5d: 76 05 jbe 64 291 | 5f: e8 00 00 00 00 callq 64 292 | 64: 48 8b 84 24 d0 00 00 mov 0xd0(%rsp),%rax 293 | 6b: 00 294 | 6c: 48 89 44 24 20 mov %rax,0x20(%rsp) 295 | 71: 48 8b 44 24 20 mov 0x20(%rsp),%rax 296 | 76: 48 8b 40 18 mov 0x18(%rax),%rax 297 | 7a: 48 89 44 24 30 mov %rax,0x30(%rsp) 298 | 7f: 48 8b 84 24 d8 00 00 mov 0xd8(%rsp),%rax 299 | 86: 00 300 | 87: 48 83 c8 0f or $0xf,%rax 301 | 8b: 48 89 44 24 28 mov %rax,0x28(%rsp) 302 | 90: 48 8b 44 24 28 mov 0x28(%rsp),%rax 303 | 95: 48 39 44 24 48 cmp %rax,0x48(%rsp) 304 | 9a: 73 0f jae ab 305 | 9c: 48 8b 84 24 d8 00 00 mov 0xd8(%rsp),%rax 306 | a3: 00 307 | a4: 48 89 44 24 28 mov %rax,0x28(%rsp) 308 | a9: eb 7b jmp 126 309 | ab: 33 d2 xor %edx,%edx 310 | ad: 48 8b 44 24 30 mov 0x30(%rsp),%rax 311 | b2: b9 02 00 00 00 mov $0x2,%ecx 312 | b7: 48 f7 f1 div %rcx 313 | ba: 48 89 44 24 68 mov %rax,0x68(%rsp) 314 | bf: 33 d2 xor %edx,%edx 315 | c1: 48 8b 44 24 28 mov 0x28(%rsp),%rax 316 | c6: b9 03 00 00 00 mov $0x3,%ecx 317 | cb: 48 f7 f1 div %rcx 318 | ce: 48 8b 4c 24 68 mov 0x68(%rsp),%rcx 319 | d3: 48 3b c8 cmp %rax,%rcx 320 | d6: 77 02 ja da 321 | d8: eb 4c jmp 126 322 | da: 33 d2 xor %edx,%edx 323 | dc: 48 8b 44 24 30 mov 0x30(%rsp),%rax 324 | e1: b9 02 00 00 00 mov $0x2,%ecx 325 | e6: 48 f7 f1 div %rcx 326 | e9: 48 8b 4c 24 48 mov 0x48(%rsp),%rcx 327 | ee: 48 2b c8 sub %rax,%rcx 328 | f1: 48 8b c1 mov %rcx,%rax 329 | f4: 48 39 44 24 30 cmp %rax,0x30(%rsp) 330 | f9: 77 21 ja 11c 331 | fb: 33 d2 xor %edx,%edx 332 | fd: 48 8b 44 24 30 mov 0x30(%rsp),%rax 333 | 102: b9 02 00 00 00 mov $0x2,%ecx 334 | 107: 48 f7 f1 div %rcx 335 | 10a: 48 8b 4c 24 30 mov 0x30(%rsp),%rcx 336 | 10f: 48 03 c8 add %rax,%rcx 337 | 112: 48 8b c1 mov %rcx,%rax 338 | 115: 48 89 44 24 28 mov %rax,0x28(%rsp) 339 | 11a: eb 0a jmp 126 340 | 11c: 48 8b 44 24 48 mov 0x48(%rsp),%rax 341 | 121: 48 89 44 24 28 mov %rax,0x28(%rsp) 342 | 126: 48 8b 84 24 d0 00 00 mov 0xd0(%rsp),%rax 343 | 12d: 00 344 | 12e: 48 89 84 24 b0 00 00 mov %rax,0xb0(%rsp) 345 | 135: 00 346 | 136: 48 8b 44 24 28 mov 0x28(%rsp),%rax 347 | 13b: 48 ff c0 inc %rax 348 | 13e: 41 b0 01 mov $0x1,%r8b 349 | 141: ba 01 00 00 00 mov $0x1,%edx 350 | 146: 48 8b c8 mov %rax,%rcx 351 | 149: e8 00 00 00 00 callq 14e 352 | 14e: 48 89 44 24 40 mov %rax,0x40(%rsp) 353 | 153: eb 00 jmp 155 <$LN16> 354 | 355 | 0000000000000155 <$LN16>: 356 | 155: 48 8b 44 24 20 mov 0x20(%rsp),%rax 357 | 15a: 48 83 78 18 10 cmpq $0x10,0x18(%rax) 358 | 15f: 72 0a jb 16b <$LN16+0x16> 359 | 161: c7 44 24 38 01 00 00 movl $0x1,0x38(%rsp) 360 | 168: 00 361 | 169: eb 08 jmp 173 <$LN16+0x1e> 362 | 16b: c7 44 24 38 00 00 00 movl $0x0,0x38(%rsp) 363 | 172: 00 364 | 173: 0f b6 44 24 38 movzbl 0x38(%rsp),%eax 365 | 178: 85 c0 test %eax,%eax 366 | 17a: 74 7f je 1fb <$LN16+0xa6> 367 | 17c: 48 8b 44 24 20 mov 0x20(%rsp),%rax 368 | 181: 48 8b 00 mov (%rax),%rax 369 | 184: 48 89 44 24 78 mov %rax,0x78(%rsp) 370 | 189: 48 8b 44 24 20 mov 0x20(%rsp),%rax 371 | 18e: 48 8b 40 10 mov 0x10(%rax),%rax 372 | 192: 48 ff c0 inc %rax 373 | 195: 48 89 44 24 70 mov %rax,0x70(%rsp) 374 | 19a: 48 8b 44 24 40 mov 0x40(%rsp),%rax 375 | 19f: 48 89 84 24 80 00 00 mov %rax,0x80(%rsp) 376 | 1a6: 00 377 | 1a7: 4c 8b 44 24 70 mov 0x70(%rsp),%r8 378 | 1ac: 48 8b 54 24 78 mov 0x78(%rsp),%rdx 379 | 1b1: 48 8b 8c 24 80 00 00 mov 0x80(%rsp),%rcx 380 | 1b8: 00 381 | 1b9: e8 00 00 00 00 callq 1be <$LN16+0x69> 382 | 1be: 48 8b 44 24 20 mov 0x20(%rsp),%rax 383 | 1c3: 48 8b 00 mov (%rax),%rax 384 | 1c6: 48 89 84 24 88 00 00 mov %rax,0x88(%rsp) 385 | 1cd: 00 386 | 1ce: 48 8b 44 24 30 mov 0x30(%rsp),%rax 387 | 1d3: 48 ff c0 inc %rax 388 | 1d6: 41 b8 01 00 00 00 mov $0x1,%r8d 389 | 1dc: 48 8b d0 mov %rax,%rdx 390 | 1df: 48 8b 8c 24 88 00 00 mov 0x88(%rsp),%rcx 391 | 1e6: 00 392 | 1e7: e8 00 00 00 00 callq 1ec <$LN16+0x97> 393 | 1ec: 48 8b 44 24 20 mov 0x20(%rsp),%rax 394 | 1f1: 48 8b 4c 24 40 mov 0x40(%rsp),%rcx 395 | 1f6: 48 89 08 mov %rcx,(%rax) 396 | 1f9: eb 78 jmp 273 <$LN16+0x11e> 397 | 1fb: 48 8b 44 24 20 mov 0x20(%rsp),%rax 398 | 200: 48 8b 40 10 mov 0x10(%rax),%rax 399 | 204: 48 ff c0 inc %rax 400 | 207: 48 89 84 24 90 00 00 mov %rax,0x90(%rsp) 401 | 20e: 00 402 | 20f: 48 8b 44 24 40 mov 0x40(%rsp),%rax 403 | 214: 48 89 84 24 98 00 00 mov %rax,0x98(%rsp) 404 | 21b: 00 405 | 21c: 48 8b 44 24 20 mov 0x20(%rsp),%rax 406 | 221: 4c 8b 84 24 90 00 00 mov 0x90(%rsp),%r8 407 | 228: 00 408 | 229: 48 8b d0 mov %rax,%rdx 409 | 22c: 48 8b 8c 24 98 00 00 mov 0x98(%rsp),%rcx 410 | 233: 00 411 | 234: e8 00 00 00 00 callq 239 <$LN16+0xe4> 412 | 239: 48 8b 44 24 20 mov 0x20(%rsp),%rax 413 | 23e: 48 89 44 24 50 mov %rax,0x50(%rsp) 414 | 243: 48 83 7c 24 50 00 cmpq $0x0,0x50(%rsp) 415 | 249: 74 1c je 267 <$LN16+0x112> 416 | 24b: 48 8b 44 24 50 mov 0x50(%rsp),%rax 417 | 250: 48 8b 4c 24 40 mov 0x40(%rsp),%rcx 418 | 255: 48 89 08 mov %rcx,(%rax) 419 | 258: 48 8b 44 24 50 mov 0x50(%rsp),%rax 420 | 25d: 48 89 84 24 a0 00 00 mov %rax,0xa0(%rsp) 421 | 264: 00 422 | 265: eb 0c jmp 273 <$LN16+0x11e> 423 | 267: 48 c7 84 24 a0 00 00 movq $0x0,0xa0(%rsp) 424 | 26e: 00 00 00 00 00 425 | 273: 48 8b 44 24 20 mov 0x20(%rsp),%rax 426 | 278: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx 427 | 27d: 48 89 48 18 mov %rcx,0x18(%rax) 428 | 281: 48 81 c4 c8 00 00 00 add $0xc8,%rsp 429 | 288: c3 retq 430 | 431 | Disassembly of section .text$x: 432 | 433 | 0000000000000000 : 434 | 0: 48 89 54 24 10 mov %rdx,0x10(%rsp) 435 | 5: 55 push %rbp 436 | 6: 48 83 ec 20 sub $0x20,%rsp 437 | a: 48 8b ea mov %rdx,%rbp 438 | 439 | 000000000000000d <__catch$?_Copy@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAX_K@Z$0>: 440 | d: 48 8b 85 d8 00 00 00 mov 0xd8(%rbp),%rax 441 | 14: 48 89 45 28 mov %rax,0x28(%rbp) 442 | 18: 48 8b 45 28 mov 0x28(%rbp),%rax 443 | 1c: 48 ff c0 inc %rax 444 | 1f: 41 b0 01 mov $0x1,%r8b 445 | 22: ba 01 00 00 00 mov $0x1,%edx 446 | 27: 48 8b c8 mov %rax,%rcx 447 | 2a: e8 00 00 00 00 callq 2f <__catch$?_Copy@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAX_K@Z$0+0x22> 448 | 2f: 48 89 45 40 mov %rax,0x40(%rbp) 449 | 33: 48 8d 05 00 00 00 00 lea 0x0(%rip),%rax # 3a <__catch$?_Copy@?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@QEAAX_K@Z$0+0x2d> 450 | 3a: 48 83 c4 20 add $0x20,%rsp 451 | 3e: 5d pop %rbp 452 | 3f: c3 retq 453 | 40: cc int3 454 | 455 | Disassembly of section .text$mn: 456 | 457 | 0000000000000000 : 458 | 0: 4c 89 44 24 18 mov %r8,0x18(%rsp) 459 | 5: 48 89 54 24 10 mov %rdx,0x10(%rsp) 460 | a: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 461 | f: 48 83 ec 48 sub $0x48,%rsp 462 | 13: 33 d2 xor %edx,%edx 463 | 15: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 464 | 1c: 48 f7 74 24 60 divq 0x60(%rsp) 465 | 21: 48 39 44 24 58 cmp %rax,0x58(%rsp) 466 | 26: 76 05 jbe 2d 467 | 28: e8 00 00 00 00 callq 2d 468 | 2d: 48 8b 44 24 58 mov 0x58(%rsp),%rax 469 | 32: 48 0f af 44 24 60 imul 0x60(%rsp),%rax 470 | 38: 48 89 44 24 30 mov %rax,0x30(%rsp) 471 | 3d: 48 81 7c 24 30 00 10 cmpq $0x1000,0x30(%rsp) 472 | 44: 00 00 473 | 46: 0f 82 89 00 00 00 jb d5 474 | 4c: 48 8b 44 24 50 mov 0x50(%rsp),%rax 475 | 51: 48 89 44 24 20 mov %rax,0x20(%rsp) 476 | 56: 48 8b 44 24 20 mov 0x20(%rsp),%rax 477 | 5b: 48 83 e0 1f and $0x1f,%rax 478 | 5f: 48 85 c0 test %rax,%rax 479 | 62: 74 05 je 69 480 | 64: e8 00 00 00 00 callq 69 481 | 69: 48 8b 44 24 20 mov 0x20(%rsp),%rax 482 | 6e: 48 83 e8 08 sub $0x8,%rax 483 | 72: 48 89 44 24 38 mov %rax,0x38(%rsp) 484 | 77: 48 8b 44 24 38 mov 0x38(%rsp),%rax 485 | 7c: 48 8b 00 mov (%rax),%rax 486 | 7f: 48 89 44 24 28 mov %rax,0x28(%rsp) 487 | 84: 48 8b 44 24 20 mov 0x20(%rsp),%rax 488 | 89: 48 39 44 24 28 cmp %rax,0x28(%rsp) 489 | 8e: 72 05 jb 95 490 | 90: e8 00 00 00 00 callq 95 491 | 95: 48 8b 44 24 28 mov 0x28(%rsp),%rax 492 | 9a: 48 8b 4c 24 20 mov 0x20(%rsp),%rcx 493 | 9f: 48 2b c8 sub %rax,%rcx 494 | a2: 48 8b c1 mov %rcx,%rax 495 | a5: 48 83 f8 08 cmp $0x8,%rax 496 | a9: 73 05 jae b0 497 | ab: e8 00 00 00 00 callq b0 498 | b0: 48 8b 44 24 28 mov 0x28(%rsp),%rax 499 | b5: 48 8b 4c 24 20 mov 0x20(%rsp),%rcx 500 | ba: 48 2b c8 sub %rax,%rcx 501 | bd: 48 8b c1 mov %rcx,%rax 502 | c0: 48 83 f8 27 cmp $0x27,%rax 503 | c4: 76 05 jbe cb 504 | c6: e8 00 00 00 00 callq cb 505 | cb: 48 8b 44 24 28 mov 0x28(%rsp),%rax 506 | d0: 48 89 44 24 50 mov %rax,0x50(%rsp) 507 | d5: 48 8b 4c 24 50 mov 0x50(%rsp),%rcx 508 | da: e8 00 00 00 00 callq df 509 | df: 48 83 c4 48 add $0x48,%rsp 510 | e3: c3 retq 511 | 512 | Disassembly of section .text$mn: 513 | 514 | 0000000000000000 : 515 | 0: 48 89 54 24 10 mov %rdx,0x10(%rsp) 516 | 5: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 517 | a: 48 83 ec 48 sub $0x48,%rsp 518 | e: 48 83 7c 24 58 00 cmpq $0x0,0x58(%rsp) 519 | 14: 75 4e jne 64 520 | 16: 48 8b 44 24 50 mov 0x50(%rsp),%rax 521 | 1b: 48 89 44 24 28 mov %rax,0x28(%rsp) 522 | 20: c6 44 24 20 00 movb $0x0,0x20(%rsp) 523 | 25: 48 8b 44 24 28 mov 0x28(%rsp),%rax 524 | 2a: 48 c7 40 10 00 00 00 movq $0x0,0x10(%rax) 525 | 31: 00 526 | 32: 33 c0 xor %eax,%eax 527 | 34: 48 6b c0 01 imul $0x1,%rax,%rax 528 | 38: 48 89 44 24 30 mov %rax,0x30(%rsp) 529 | 3d: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx 530 | 42: e8 00 00 00 00 callq 47 531 | 47: 48 8b 4c 24 30 mov 0x30(%rsp),%rcx 532 | 4c: 48 03 c1 add %rcx,%rax 533 | 4f: 48 89 44 24 38 mov %rax,0x38(%rsp) 534 | 54: 48 8b 44 24 38 mov 0x38(%rsp),%rax 535 | 59: 0f b6 4c 24 20 movzbl 0x20(%rsp),%ecx 536 | 5e: 88 08 mov %cl,(%rax) 537 | 60: 32 c0 xor %al,%al 538 | 62: eb 21 jmp 85 539 | 64: 48 8b 44 24 50 mov 0x50(%rsp),%rax 540 | 69: 48 8b 4c 24 58 mov 0x58(%rsp),%rcx 541 | 6e: 48 39 48 18 cmp %rcx,0x18(%rax) 542 | 72: 73 0f jae 83 543 | 74: 48 8b 54 24 58 mov 0x58(%rsp),%rdx 544 | 79: 48 8b 4c 24 50 mov 0x50(%rsp),%rcx 545 | 7e: e8 00 00 00 00 callq 83 546 | 83: b0 01 mov $0x1,%al 547 | 85: 48 83 c4 48 add $0x48,%rsp 548 | 89: c3 retq 549 | 550 | Disassembly of section .text$mn: 551 | 552 | 0000000000000000 : 553 | 0: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 554 | 5: 48 83 ec 28 sub $0x28,%rsp 555 | 9: 48 8b 44 24 30 mov 0x30(%rsp),%rax 556 | e: 48 83 78 18 10 cmpq $0x10,0x18(%rax) 557 | 13: 72 09 jb 1e 558 | 15: c7 04 24 01 00 00 00 movl $0x1,(%rsp) 559 | 1c: eb 07 jmp 25 560 | 1e: c7 04 24 00 00 00 00 movl $0x0,(%rsp) 561 | 25: 0f b6 04 24 movzbl (%rsp),%eax 562 | 29: 85 c0 test %eax,%eax 563 | 2b: 74 19 je 46 564 | 2d: 48 8b 44 24 10 mov 0x10(%rsp),%rax 565 | 32: 48 89 44 24 08 mov %rax,0x8(%rsp) 566 | 37: 48 8b 44 24 30 mov 0x30(%rsp),%rax 567 | 3c: 48 8b 00 mov (%rax),%rax 568 | 3f: 48 89 44 24 08 mov %rax,0x8(%rsp) 569 | 44: eb 0a jmp 50 570 | 46: 48 8b 44 24 30 mov 0x30(%rsp),%rax 571 | 4b: 48 89 44 24 08 mov %rax,0x8(%rsp) 572 | 50: 48 8b 44 24 08 mov 0x8(%rsp),%rax 573 | 55: 48 83 c4 28 add $0x28,%rsp 574 | 59: c3 retq 575 | 576 | Disassembly of section .text$mn: 577 | 578 | 0000000000000000 : 579 | 0: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 580 | 5: 48 83 ec 28 sub $0x28,%rsp 581 | 9: 48 8b 44 24 30 mov 0x30(%rsp),%rax 582 | e: 48 83 78 18 10 cmpq $0x10,0x18(%rax) 583 | 13: 72 09 jb 1e 584 | 15: c7 04 24 01 00 00 00 movl $0x1,(%rsp) 585 | 1c: eb 07 jmp 25 586 | 1e: c7 04 24 00 00 00 00 movl $0x0,(%rsp) 587 | 25: 0f b6 04 24 movzbl (%rsp),%eax 588 | 29: 85 c0 test %eax,%eax 589 | 2b: 74 19 je 46 590 | 2d: 48 8b 44 24 10 mov 0x10(%rsp),%rax 591 | 32: 48 89 44 24 08 mov %rax,0x8(%rsp) 592 | 37: 48 8b 44 24 30 mov 0x30(%rsp),%rax 593 | 3c: 48 8b 00 mov (%rax),%rax 594 | 3f: 48 89 44 24 08 mov %rax,0x8(%rsp) 595 | 44: eb 0a jmp 50 596 | 46: 48 8b 44 24 30 mov 0x30(%rsp),%rax 597 | 4b: 48 89 44 24 08 mov %rax,0x8(%rsp) 598 | 50: 48 8b 44 24 08 mov 0x8(%rsp),%rax 599 | 55: 48 83 c4 28 add $0x28,%rsp 600 | 59: c3 retq 601 | 602 | Disassembly of section .text$mn: 603 | 604 | 0000000000000000 : 605 | 0: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 606 | 5: 48 83 ec 58 sub $0x58,%rsp 607 | 9: 48 8b 44 24 60 mov 0x60(%rsp),%rax 608 | e: 48 89 44 24 28 mov %rax,0x28(%rsp) 609 | 13: 48 8b 44 24 28 mov 0x28(%rsp),%rax 610 | 18: 48 83 78 18 10 cmpq $0x10,0x18(%rax) 611 | 1d: 72 0a jb 29 612 | 1f: c7 44 24 24 01 00 00 movl $0x1,0x24(%rsp) 613 | 26: 00 614 | 27: eb 08 jmp 31 615 | 29: c7 44 24 24 00 00 00 movl $0x0,0x24(%rsp) 616 | 30: 00 617 | 31: 0f b6 44 24 24 movzbl 0x24(%rsp),%eax 618 | 36: 85 c0 test %eax,%eax 619 | 38: 74 3d je 77 620 | 3a: 48 8b 44 24 28 mov 0x28(%rsp),%rax 621 | 3f: 48 8b 00 mov (%rax),%rax 622 | 42: 48 89 44 24 38 mov %rax,0x38(%rsp) 623 | 47: 48 8b 44 24 60 mov 0x60(%rsp),%rax 624 | 4c: 48 89 44 24 40 mov %rax,0x40(%rsp) 625 | 51: 48 8b 44 24 28 mov 0x28(%rsp),%rax 626 | 56: 48 8b 40 18 mov 0x18(%rax),%rax 627 | 5a: 48 ff c0 inc %rax 628 | 5d: 48 89 44 24 30 mov %rax,0x30(%rsp) 629 | 62: 41 b8 01 00 00 00 mov $0x1,%r8d 630 | 68: 48 8b 54 24 30 mov 0x30(%rsp),%rdx 631 | 6d: 48 8b 4c 24 38 mov 0x38(%rsp),%rcx 632 | 72: e8 00 00 00 00 callq 77 633 | 77: 48 8b 44 24 28 mov 0x28(%rsp),%rax 634 | 7c: 48 c7 40 10 00 00 00 movq $0x0,0x10(%rax) 635 | 83: 00 636 | 84: 48 8b 44 24 28 mov 0x28(%rsp),%rax 637 | 89: 48 c7 40 18 0f 00 00 movq $0xf,0x18(%rax) 638 | 90: 00 639 | 91: c6 44 24 20 00 movb $0x0,0x20(%rsp) 640 | 96: b8 01 00 00 00 mov $0x1,%eax 641 | 9b: 48 6b c0 00 imul $0x0,%rax,%rax 642 | 9f: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx 643 | a4: 0f b6 54 24 20 movzbl 0x20(%rsp),%edx 644 | a9: 88 14 01 mov %dl,(%rcx,%rax,1) 645 | ac: 48 83 c4 58 add $0x58,%rsp 646 | b0: c3 retq 647 | 648 | Disassembly of section .text$mn: 649 | 650 | 0000000000000000 : 651 | 0: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 652 | 5: 48 83 ec 18 sub $0x18,%rsp 653 | 9: 48 8b 44 24 20 mov 0x20(%rsp),%rax 654 | e: 48 89 44 24 08 mov %rax,0x8(%rsp) 655 | 13: 48 8b 44 24 08 mov 0x8(%rsp),%rax 656 | 18: 48 c7 40 10 00 00 00 movq $0x0,0x10(%rax) 657 | 1f: 00 658 | 20: 48 8b 44 24 08 mov 0x8(%rsp),%rax 659 | 25: 48 c7 40 18 0f 00 00 movq $0xf,0x18(%rax) 660 | 2c: 00 661 | 2d: c6 04 24 00 movb $0x0,(%rsp) 662 | 31: b8 01 00 00 00 mov $0x1,%eax 663 | 36: 48 6b c0 00 imul $0x0,%rax,%rax 664 | 3a: 48 8b 4c 24 08 mov 0x8(%rsp),%rcx 665 | 3f: 0f b6 14 24 movzbl (%rsp),%edx 666 | 43: 88 14 01 mov %dl,(%rcx,%rax,1) 667 | 46: 48 83 c4 18 add $0x18,%rsp 668 | 4a: c3 retq 669 | 670 | Disassembly of section .text$mn: 671 | 672 | 0000000000000000 : 673 | 0: 48 83 ec 28 sub $0x28,%rsp 674 | 4: 48 8d 0d 00 00 00 00 lea 0x0(%rip),%rcx # b 675 | b: e8 00 00 00 00 callq 10 676 | 10: 48 83 c4 28 add $0x28,%rsp 677 | 14: c3 retq 678 | 679 | Disassembly of section .text$mn: 680 | 681 | 0000000000000000 : 682 | 0: 48 83 ec 28 sub $0x28,%rsp 683 | 4: 48 8d 0d 00 00 00 00 lea 0x0(%rip),%rcx # b 684 | b: e8 00 00 00 00 callq 10 685 | 10: 48 83 c4 28 add $0x28,%rsp 686 | 14: c3 retq 687 | 688 | Disassembly of section .text$mn: 689 | 690 | 0000000000000000 : 691 | 0: 4c 89 4c 24 20 mov %r9,0x20(%rsp) 692 | 5: 4c 89 44 24 18 mov %r8,0x18(%rsp) 693 | a: 48 89 54 24 10 mov %rdx,0x10(%rsp) 694 | f: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 695 | 14: 48 83 ec 78 sub $0x78,%rsp 696 | 18: 48 8b 84 24 88 00 00 mov 0x88(%rsp),%rax 697 | 1f: 00 698 | 20: 48 8b 8c 24 90 00 00 mov 0x90(%rsp),%rcx 699 | 27: 00 700 | 28: 48 39 48 10 cmp %rcx,0x10(%rax) 701 | 2c: 73 05 jae 33 702 | 2e: e8 00 00 00 00 callq 33 703 | 33: 48 8b 84 24 98 00 00 mov 0x98(%rsp),%rax 704 | 3a: 00 705 | 3b: 48 89 44 24 30 mov %rax,0x30(%rsp) 706 | 40: 48 8b 84 24 88 00 00 mov 0x88(%rsp),%rax 707 | 47: 00 708 | 48: 48 8b 8c 24 90 00 00 mov 0x90(%rsp),%rcx 709 | 4f: 00 710 | 50: 48 8b 40 10 mov 0x10(%rax),%rax 711 | 54: 48 2b c1 sub %rcx,%rax 712 | 57: 48 89 44 24 28 mov %rax,0x28(%rsp) 713 | 5c: 48 8b 44 24 30 mov 0x30(%rsp),%rax 714 | 61: 48 39 44 24 28 cmp %rax,0x28(%rsp) 715 | 66: 73 0c jae 74 716 | 68: 48 8d 44 24 28 lea 0x28(%rsp),%rax 717 | 6d: 48 89 44 24 38 mov %rax,0x38(%rsp) 718 | 72: eb 0a jmp 7e 719 | 74: 48 8d 44 24 30 lea 0x30(%rsp),%rax 720 | 79: 48 89 44 24 38 mov %rax,0x38(%rsp) 721 | 7e: 48 8b 44 24 38 mov 0x38(%rsp),%rax 722 | 83: 48 89 44 24 48 mov %rax,0x48(%rsp) 723 | 88: 48 8b 44 24 48 mov 0x48(%rsp),%rax 724 | 8d: 48 8b 00 mov (%rax),%rax 725 | 90: 48 89 84 24 98 00 00 mov %rax,0x98(%rsp) 726 | 97: 00 727 | 98: 48 8b 84 24 88 00 00 mov 0x88(%rsp),%rax 728 | 9f: 00 729 | a0: 48 39 84 24 80 00 00 cmp %rax,0x80(%rsp) 730 | a7: 00 731 | a8: 75 42 jne ec 732 | aa: 48 8b 84 24 98 00 00 mov 0x98(%rsp),%rax 733 | b1: 00 734 | b2: 48 8b 8c 24 90 00 00 mov 0x90(%rsp),%rcx 735 | b9: 00 736 | ba: 48 03 c8 add %rax,%rcx 737 | bd: 48 8b c1 mov %rcx,%rax 738 | c0: 48 8b d0 mov %rax,%rdx 739 | c3: 48 8b 8c 24 80 00 00 mov 0x80(%rsp),%rcx 740 | ca: 00 741 | cb: e8 00 00 00 00 callq d0 742 | d0: 4c 8b 84 24 90 00 00 mov 0x90(%rsp),%r8 743 | d7: 00 744 | d8: 33 d2 xor %edx,%edx 745 | da: 48 8b 8c 24 80 00 00 mov 0x80(%rsp),%rcx 746 | e1: 00 747 | e2: e8 00 00 00 00 callq e7 748 | e7: e9 b8 00 00 00 jmpq 1a4 749 | ec: 48 8b 94 24 98 00 00 mov 0x98(%rsp),%rdx 750 | f3: 00 751 | f4: 48 8b 8c 24 80 00 00 mov 0x80(%rsp),%rcx 752 | fb: 00 753 | fc: e8 00 00 00 00 callq 101 754 | 101: 0f b6 c0 movzbl %al,%eax 755 | 104: 85 c0 test %eax,%eax 756 | 106: 0f 84 98 00 00 00 je 1a4 757 | 10c: 48 8b 84 24 88 00 00 mov 0x88(%rsp),%rax 758 | 113: 00 759 | 114: 48 8b c8 mov %rax,%rcx 760 | 117: e8 00 00 00 00 callq 11c 761 | 11c: 48 89 44 24 50 mov %rax,0x50(%rsp) 762 | 121: 48 8b 84 24 80 00 00 mov 0x80(%rsp),%rax 763 | 128: 00 764 | 129: 48 8b c8 mov %rax,%rcx 765 | 12c: e8 00 00 00 00 callq 131 766 | 131: 48 89 44 24 58 mov %rax,0x58(%rsp) 767 | 136: 48 8b 84 24 90 00 00 mov 0x90(%rsp),%rax 768 | 13d: 00 769 | 13e: 48 8b 4c 24 50 mov 0x50(%rsp),%rcx 770 | 143: 48 03 c8 add %rax,%rcx 771 | 146: 48 8b c1 mov %rcx,%rax 772 | 149: 4c 8b 84 24 98 00 00 mov 0x98(%rsp),%r8 773 | 150: 00 774 | 151: 48 8b d0 mov %rax,%rdx 775 | 154: 48 8b 4c 24 58 mov 0x58(%rsp),%rcx 776 | 159: e8 00 00 00 00 callq 15e 777 | 15e: 48 8b 84 24 80 00 00 mov 0x80(%rsp),%rax 778 | 165: 00 779 | 166: 48 89 44 24 40 mov %rax,0x40(%rsp) 780 | 16b: c6 44 24 20 00 movb $0x0,0x20(%rsp) 781 | 170: 48 8b 44 24 40 mov 0x40(%rsp),%rax 782 | 175: 48 8b 8c 24 98 00 00 mov 0x98(%rsp),%rcx 783 | 17c: 00 784 | 17d: 48 89 48 10 mov %rcx,0x10(%rax) 785 | 181: 48 8b 4c 24 40 mov 0x40(%rsp),%rcx 786 | 186: e8 00 00 00 00 callq 18b 787 | 18b: 48 03 84 24 98 00 00 add 0x98(%rsp),%rax 788 | 192: 00 789 | 193: 48 89 44 24 60 mov %rax,0x60(%rsp) 790 | 198: 48 8b 44 24 60 mov 0x60(%rsp),%rax 791 | 19d: 0f b6 4c 24 20 movzbl 0x20(%rsp),%ecx 792 | 1a2: 88 08 mov %cl,(%rax) 793 | 1a4: 48 8b 84 24 80 00 00 mov 0x80(%rsp),%rax 794 | 1ab: 00 795 | 1ac: 48 83 c4 78 add $0x78,%rsp 796 | 1b0: c3 retq 797 | 798 | Disassembly of section .text$mn: 799 | 800 | 0000000000000000 : 801 | 0: 4c 89 44 24 18 mov %r8,0x18(%rsp) 802 | 5: 48 89 54 24 10 mov %rdx,0x10(%rsp) 803 | a: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 804 | f: 48 83 ec 68 sub $0x68,%rsp 805 | 13: 48 8b 44 24 70 mov 0x70(%rsp),%rax 806 | 18: 48 89 44 24 28 mov %rax,0x28(%rsp) 807 | 1d: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx 808 | 22: e8 00 00 00 00 callq 27 809 | 27: 48 89 44 24 30 mov %rax,0x30(%rsp) 810 | 2c: 48 8b 44 24 78 mov 0x78(%rsp),%rax 811 | 31: 48 39 44 24 30 cmp %rax,0x30(%rsp) 812 | 36: 77 25 ja 5d 813 | 38: 48 8b 44 24 28 mov 0x28(%rsp),%rax 814 | 3d: 48 8b 40 10 mov 0x10(%rax),%rax 815 | 41: 48 8b 4c 24 30 mov 0x30(%rsp),%rcx 816 | 46: 48 03 c8 add %rax,%rcx 817 | 49: 48 8b c1 mov %rcx,%rax 818 | 4c: 48 39 44 24 78 cmp %rax,0x78(%rsp) 819 | 51: 73 0a jae 5d 820 | 53: c7 44 24 24 01 00 00 movl $0x1,0x24(%rsp) 821 | 5a: 00 822 | 5b: eb 08 jmp 65 823 | 5d: c7 44 24 24 00 00 00 movl $0x0,0x24(%rsp) 824 | 64: 00 825 | 65: 0f b6 44 24 24 movzbl 0x24(%rsp),%eax 826 | 6a: 85 c0 test %eax,%eax 827 | 6c: 74 41 je af 828 | 6e: 48 8b 44 24 70 mov 0x70(%rsp),%rax 829 | 73: 48 8b c8 mov %rax,%rcx 830 | 76: e8 00 00 00 00 callq 7b 831 | 7b: 48 89 44 24 40 mov %rax,0x40(%rsp) 832 | 80: 48 8b 44 24 40 mov 0x40(%rsp),%rax 833 | 85: 48 8b 4c 24 78 mov 0x78(%rsp),%rcx 834 | 8a: 48 2b c8 sub %rax,%rcx 835 | 8d: 48 8b c1 mov %rcx,%rax 836 | 90: 4c 8b 8c 24 80 00 00 mov 0x80(%rsp),%r9 837 | 97: 00 838 | 98: 4c 8b c0 mov %rax,%r8 839 | 9b: 48 8b 54 24 70 mov 0x70(%rsp),%rdx 840 | a0: 48 8b 4c 24 70 mov 0x70(%rsp),%rcx 841 | a5: e8 00 00 00 00 callq aa 842 | aa: e9 8a 00 00 00 jmpq 139 843 | af: 48 8b 94 24 80 00 00 mov 0x80(%rsp),%rdx 844 | b6: 00 845 | b7: 48 8b 4c 24 70 mov 0x70(%rsp),%rcx 846 | bc: e8 00 00 00 00 callq c1 847 | c1: 0f b6 c0 movzbl %al,%eax 848 | c4: 85 c0 test %eax,%eax 849 | c6: 74 6c je 134 850 | c8: 48 8b 44 24 70 mov 0x70(%rsp),%rax 851 | cd: 48 8b c8 mov %rax,%rcx 852 | d0: e8 00 00 00 00 callq d5 853 | d5: 48 89 44 24 48 mov %rax,0x48(%rsp) 854 | da: 4c 8b 84 24 80 00 00 mov 0x80(%rsp),%r8 855 | e1: 00 856 | e2: 48 8b 54 24 78 mov 0x78(%rsp),%rdx 857 | e7: 48 8b 4c 24 48 mov 0x48(%rsp),%rcx 858 | ec: e8 00 00 00 00 callq f1 859 | f1: 48 8b 44 24 70 mov 0x70(%rsp),%rax 860 | f6: 48 89 44 24 38 mov %rax,0x38(%rsp) 861 | fb: c6 44 24 20 00 movb $0x0,0x20(%rsp) 862 | 100: 48 8b 44 24 38 mov 0x38(%rsp),%rax 863 | 105: 48 8b 8c 24 80 00 00 mov 0x80(%rsp),%rcx 864 | 10c: 00 865 | 10d: 48 89 48 10 mov %rcx,0x10(%rax) 866 | 111: 48 8b 4c 24 38 mov 0x38(%rsp),%rcx 867 | 116: e8 00 00 00 00 callq 11b 868 | 11b: 48 03 84 24 80 00 00 add 0x80(%rsp),%rax 869 | 122: 00 870 | 123: 48 89 44 24 50 mov %rax,0x50(%rsp) 871 | 128: 48 8b 44 24 50 mov 0x50(%rsp),%rax 872 | 12d: 0f b6 4c 24 20 movzbl 0x20(%rsp),%ecx 873 | 132: 88 08 mov %cl,(%rax) 874 | 134: 48 8b 44 24 70 mov 0x70(%rsp),%rax 875 | 139: 48 83 c4 68 add $0x68,%rsp 876 | 13d: c3 retq 877 | 878 | Disassembly of section .text$mn: 879 | 880 | 0000000000000000 : 881 | 0: 4c 89 44 24 18 mov %r8,0x18(%rsp) 882 | 5: 48 89 54 24 10 mov %rdx,0x10(%rsp) 883 | a: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 884 | f: 48 83 ec 68 sub $0x68,%rsp 885 | 13: 48 8b 44 24 70 mov 0x70(%rsp),%rax 886 | 18: 48 8b 4c 24 78 mov 0x78(%rsp),%rcx 887 | 1d: 48 39 48 10 cmp %rcx,0x10(%rax) 888 | 21: 73 05 jae 28 889 | 23: e8 00 00 00 00 callq 28 890 | 28: 48 8b 44 24 70 mov 0x70(%rsp),%rax 891 | 2d: 48 8b 4c 24 78 mov 0x78(%rsp),%rcx 892 | 32: 48 8b 40 10 mov 0x10(%rax),%rax 893 | 36: 48 2b c1 sub %rcx,%rax 894 | 39: 48 3b 84 24 80 00 00 cmp 0x80(%rsp),%rax 895 | 40: 00 896 | 41: 77 42 ja 85 897 | 43: 48 8b 44 24 70 mov 0x70(%rsp),%rax 898 | 48: 48 89 44 24 30 mov %rax,0x30(%rsp) 899 | 4d: c6 44 24 20 00 movb $0x0,0x20(%rsp) 900 | 52: 48 8b 44 24 30 mov 0x30(%rsp),%rax 901 | 57: 48 8b 4c 24 78 mov 0x78(%rsp),%rcx 902 | 5c: 48 89 48 10 mov %rcx,0x10(%rax) 903 | 60: 48 8b 4c 24 30 mov 0x30(%rsp),%rcx 904 | 65: e8 00 00 00 00 callq 6a 905 | 6a: 48 03 44 24 78 add 0x78(%rsp),%rax 906 | 6f: 48 89 44 24 48 mov %rax,0x48(%rsp) 907 | 74: 48 8b 44 24 48 mov 0x48(%rsp),%rax 908 | 79: 0f b6 4c 24 20 movzbl 0x20(%rsp),%ecx 909 | 7e: 88 08 mov %cl,(%rax) 910 | 80: e9 af 00 00 00 jmpq 134 911 | 85: 48 83 bc 24 80 00 00 cmpq $0x0,0x80(%rsp) 912 | 8c: 00 00 913 | 8e: 0f 84 a0 00 00 00 je 134 914 | 94: 48 8b 44 24 70 mov 0x70(%rsp),%rax 915 | 99: 48 8b c8 mov %rax,%rcx 916 | 9c: e8 00 00 00 00 callq a1 917 | a1: 48 03 44 24 78 add 0x78(%rsp),%rax 918 | a6: 48 89 44 24 38 mov %rax,0x38(%rsp) 919 | ab: 48 8b 44 24 70 mov 0x70(%rsp),%rax 920 | b0: 48 8b 8c 24 80 00 00 mov 0x80(%rsp),%rcx 921 | b7: 00 922 | b8: 48 8b 40 10 mov 0x10(%rax),%rax 923 | bc: 48 2b c1 sub %rcx,%rax 924 | bf: 48 89 44 24 28 mov %rax,0x28(%rsp) 925 | c4: 48 8b 44 24 78 mov 0x78(%rsp),%rax 926 | c9: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx 927 | ce: 48 2b c8 sub %rax,%rcx 928 | d1: 48 8b c1 mov %rcx,%rax 929 | d4: 48 8b 8c 24 80 00 00 mov 0x80(%rsp),%rcx 930 | db: 00 931 | dc: 48 8b 54 24 38 mov 0x38(%rsp),%rdx 932 | e1: 48 03 d1 add %rcx,%rdx 933 | e4: 48 8b ca mov %rdx,%rcx 934 | e7: 4c 8b c0 mov %rax,%r8 935 | ea: 48 8b d1 mov %rcx,%rdx 936 | ed: 48 8b 4c 24 38 mov 0x38(%rsp),%rcx 937 | f2: e8 00 00 00 00 callq f7 938 | f7: 48 8b 44 24 70 mov 0x70(%rsp),%rax 939 | fc: 48 89 44 24 40 mov %rax,0x40(%rsp) 940 | 101: c6 44 24 21 00 movb $0x0,0x21(%rsp) 941 | 106: 48 8b 44 24 40 mov 0x40(%rsp),%rax 942 | 10b: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx 943 | 110: 48 89 48 10 mov %rcx,0x10(%rax) 944 | 114: 48 8b 4c 24 40 mov 0x40(%rsp),%rcx 945 | 119: e8 00 00 00 00 callq 11e 946 | 11e: 48 03 44 24 28 add 0x28(%rsp),%rax 947 | 123: 48 89 44 24 50 mov %rax,0x50(%rsp) 948 | 128: 48 8b 44 24 50 mov 0x50(%rsp),%rax 949 | 12d: 0f b6 4c 24 21 movzbl 0x21(%rsp),%ecx 950 | 132: 88 08 mov %cl,(%rax) 951 | 134: 48 8b 44 24 70 mov 0x70(%rsp),%rax 952 | 139: 48 83 c4 68 add $0x68,%rsp 953 | 13d: c3 retq 954 | 955 | Disassembly of section .text$mn: 956 | 957 | 0000000000000000 : 958 | 0: 48 89 54 24 10 mov %rdx,0x10(%rsp) 959 | 5: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 960 | a: 48 83 ec 48 sub $0x48,%rsp 961 | e: 48 8b 44 24 50 mov 0x50(%rsp),%rax 962 | 13: 48 8b 4c 24 58 mov 0x58(%rsp),%rcx 963 | 18: 48 39 48 10 cmp %rcx,0x10(%rax) 964 | 1c: 73 05 jae 23 965 | 1e: e8 00 00 00 00 callq 23 966 | 23: 48 8b 44 24 50 mov 0x50(%rsp),%rax 967 | 28: 48 89 44 24 28 mov %rax,0x28(%rsp) 968 | 2d: c6 44 24 20 00 movb $0x0,0x20(%rsp) 969 | 32: 48 8b 44 24 28 mov 0x28(%rsp),%rax 970 | 37: 48 8b 4c 24 58 mov 0x58(%rsp),%rcx 971 | 3c: 48 89 48 10 mov %rcx,0x10(%rax) 972 | 40: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx 973 | 45: e8 00 00 00 00 callq 4a 974 | 4a: 48 03 44 24 58 add 0x58(%rsp),%rax 975 | 4f: 48 89 44 24 30 mov %rax,0x30(%rsp) 976 | 54: 48 8b 44 24 30 mov 0x30(%rsp),%rax 977 | 59: 0f b6 4c 24 20 movzbl 0x20(%rsp),%ecx 978 | 5e: 88 08 mov %cl,(%rax) 979 | 60: 48 8b 44 24 50 mov 0x50(%rsp),%rax 980 | 65: 48 83 c4 48 add $0x48,%rsp 981 | 69: c3 retq 982 | 983 | Disassembly of section .text$mn: 984 | 985 | 0000000000000000 : 986 | 0: 4c 89 44 24 18 mov %r8,0x18(%rsp) 987 | 5: 48 89 54 24 10 mov %rdx,0x10(%rsp) 988 | a: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 989 | f: 48 83 ec 48 sub $0x48,%rsp 990 | 13: 48 8b 44 24 58 mov 0x58(%rsp),%rax 991 | 18: 48 8b c8 mov %rax,%rcx 992 | 1b: e8 00 00 00 00 callq 20 993 | 20: 48 89 44 24 30 mov %rax,0x30(%rsp) 994 | 25: 48 8b 44 24 50 mov 0x50(%rsp),%rax 995 | 2a: 48 8b c8 mov %rax,%rcx 996 | 2d: e8 00 00 00 00 callq 32 997 | 32: 48 89 44 24 38 mov %rax,0x38(%rsp) 998 | 37: 48 8b 44 24 58 mov 0x58(%rsp),%rax 999 | 3c: 48 8b 40 10 mov 0x10(%rax),%rax 1000 | 40: 48 89 44 24 20 mov %rax,0x20(%rsp) 1001 | 45: 4c 8b 4c 24 30 mov 0x30(%rsp),%r9 1002 | 4a: 4c 8b 44 24 60 mov 0x60(%rsp),%r8 1003 | 4f: 48 8b 44 24 50 mov 0x50(%rsp),%rax 1004 | 54: 48 8b 50 10 mov 0x10(%rax),%rdx 1005 | 58: 48 8b 4c 24 38 mov 0x38(%rsp),%rcx 1006 | 5d: e8 00 00 00 00 callq 62 1007 | 62: 48 83 c4 48 add $0x48,%rsp 1008 | 66: c3 retq 1009 | 1010 | Disassembly of section .text$mn: 1011 | 1012 | 0000000000000000 : 1013 | 0: 88 54 24 10 mov %dl,0x10(%rsp) 1014 | 4: 48 89 4c 24 08 mov %rcx,0x8(%rsp) 1015 | 9: 48 83 ec 58 sub $0x58,%rsp 1016 | d: 48 8b 44 24 60 mov 0x60(%rsp),%rax 1017 | 12: 48 89 44 24 30 mov %rax,0x30(%rsp) 1018 | 17: 48 8b 44 24 30 mov 0x30(%rsp),%rax 1019 | 1c: 48 83 c0 10 add $0x10,%rax 1020 | 20: 48 89 44 24 28 mov %rax,0x28(%rsp) 1021 | 25: 48 8b 44 24 28 mov 0x28(%rsp),%rax 1022 | 2a: 48 8b 4c 24 30 mov 0x30(%rsp),%rcx 1023 | 2f: 48 8b 49 18 mov 0x18(%rcx),%rcx 1024 | 33: 48 39 08 cmp %rcx,(%rax) 1025 | 36: 75 18 jne 50 1026 | 38: 48 8b 44 24 28 mov 0x28(%rsp),%rax 1027 | 3d: 48 8b 00 mov (%rax),%rax 1028 | 40: 48 ff c0 inc %rax 1029 | 43: 48 8b d0 mov %rax,%rdx 1030 | 46: 48 8b 4c 24 60 mov 0x60(%rsp),%rcx 1031 | 4b: e8 00 00 00 00 callq 50 1032 | 50: 48 8b 4c 24 30 mov 0x30(%rsp),%rcx 1033 | 55: e8 00 00 00 00 callq 5a 1034 | 5a: 48 89 44 24 38 mov %rax,0x38(%rsp) 1035 | 5f: 48 8b 44 24 28 mov 0x28(%rsp),%rax 1036 | 64: 48 8b 00 mov (%rax),%rax 1037 | 67: 48 8b 4c 24 38 mov 0x38(%rsp),%rcx 1038 | 6c: 48 03 c8 add %rax,%rcx 1039 | 6f: 48 8b c1 mov %rcx,%rax 1040 | 72: 48 89 44 24 40 mov %rax,0x40(%rsp) 1041 | 77: 48 8b 44 24 40 mov 0x40(%rsp),%rax 1042 | 7c: 0f b6 4c 24 68 movzbl 0x68(%rsp),%ecx 1043 | 81: 88 08 mov %cl,(%rax) 1044 | 83: 48 8b 44 24 28 mov 0x28(%rsp),%rax 1045 | 88: 48 8b 00 mov (%rax),%rax 1046 | 8b: 48 ff c0 inc %rax 1047 | 8e: 48 8b 4c 24 28 mov 0x28(%rsp),%rcx 1048 | 93: 48 89 01 mov %rax,(%rcx) 1049 | 96: c6 44 24 20 00 movb $0x0,0x20(%rsp) 1050 | 9b: 48 8b 44 24 28 mov 0x28(%rsp),%rax 1051 | a0: 48 8b 00 mov (%rax),%rax 1052 | a3: 48 8b 4c 24 38 mov 0x38(%rsp),%rcx 1053 | a8: 48 03 c8 add %rax,%rcx 1054 | ab: 48 8b c1 mov %rcx,%rax 1055 | ae: 48 89 44 24 48 mov %rax,0x48(%rsp) 1056 | b3: 48 8b 44 24 48 mov 0x48(%rsp),%rax 1057 | b8: 0f b6 4c 24 20 movzbl 0x20(%rsp),%ecx 1058 | bd: 88 08 mov %cl,(%rax) 1059 | bf: 48 83 c4 58 add $0x58,%rsp 1060 | c3: c3 retq 1061 | -------------------------------------------------------------------------------- /cxx/string.bench.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark/benchmark.h" 2 | #include "test_configs.h" 3 | #include "test_utils.h" 4 | 5 | #include 6 | #include 7 | 8 | static inline void BM_find_util(benchmark::State& state, const std::string &s1, 9 | const std::string &s2, std::string::size_type ex) { 10 | std::string::size_type pos = std::string::npos; 11 | while (state.KeepRunning()) { 12 | pos = s1.find(s2); 13 | benchmark::DoNotOptimize(pos); 14 | } 15 | assert (pos == ex); 16 | } 17 | 18 | static void BM_find(benchmark::State& state) { 19 | int N = state.range(0); 20 | int ss2_sz = N/16; 21 | std::string s1(N, 0); 22 | std::string s2(ss2_sz, 0); 23 | fill_random_chars(s1.begin(), s1.end(), true); 24 | fill_random_chars(s2.begin(), s2.end(), false); 25 | BM_find_util(state, s1, s2, std::string::npos); 26 | state.SetComplexityN(N); 27 | } 28 | 29 | // Benchmark when there is no match. 30 | static void BM_find_no_match(benchmark::State& state) { 31 | int N = state.range(0); 32 | std::string s1(N, '-'); 33 | std::string s2(N/8, '*'); 34 | BM_find_util(state, s1, s2, std::string::npos); 35 | state.SetComplexityN(N); 36 | } 37 | 38 | // Benchmark when the string matches first time. 39 | static void BM_find_all_match(benchmark::State& state) { 40 | int N = state.range(0); 41 | std::string s1(N, '-'); 42 | std::string s2(N, '-'); 43 | BM_find_util(state, s1, s2, 0); 44 | state.SetComplexityN(N); 45 | } 46 | 47 | // Benchmark when the string matches somewhere in the end. 48 | static void BM_find_match1(benchmark::State& state) { 49 | int N = state.range(0); 50 | std::string s1(N, '*'); 51 | s1 += std::string(N/4, '-'); 52 | std::string s2(N/4, '-'); 53 | BM_find_util(state, s1, s2, N); 54 | state.SetComplexityN(N); 55 | } 56 | 57 | // Benchmark when the string matches somewhere from middle to the end. 58 | static void BM_find_match2(benchmark::State& state) { 59 | int N = state.range(0); 60 | std::string s1(N, '*'); 61 | s1 += std::string(N/2, '-'); 62 | s1 += std::string(N/2, '*'); 63 | std::string s2(N/4, '-'); 64 | BM_find_util(state, s1, s2, N); 65 | state.SetComplexityN(N); 66 | } 67 | 68 | static void BM_strcat(benchmark::State& state) { 69 | int N = state.range(0); 70 | int s2_sz = N/16; 71 | std::string s1(N, 0); 72 | std::string s2(s2_sz, 0); 73 | fill_random_chars(s1.begin(), s1.end(), true); 74 | fill_random_chars(s2.begin(), s2.end(), false); 75 | int s1_sz = 1; 76 | while (state.KeepRunning()) { 77 | benchmark::DoNotOptimize(s1.append(s2)); 78 | s1_sz += s2_sz; 79 | if (s1_sz >= N) { 80 | // 81 | } 82 | } 83 | state.SetComplexityN(N); 84 | } 85 | 86 | //static const int MSize = L1; 87 | COMPLEXITY_BENCHMARK(BM_find, MSize); 88 | COMPLEXITY_BENCHMARK(BM_find_no_match, MSize); 89 | COMPLEXITY_BENCHMARK(BM_find_all_match, MSize); 90 | COMPLEXITY_BENCHMARK(BM_find_match1, MSize); 91 | COMPLEXITY_BENCHMARK(BM_find_match2, MSize); 92 | COMPLEXITY_BENCHMARK(BM_strcat, MSize); 93 | BENCHMARK_MAIN() 94 | 95 | -------------------------------------------------------------------------------- /cxx/stringstream.bench.cpp: -------------------------------------------------------------------------------- 1 | #include "benchmark/benchmark.h" 2 | #include "test_configs.h" 3 | 4 | #include 5 | double ATTR_NOINLINE istream_numbers() { 6 | const char *a[] = { 7 | "-6 69 -71 2.4882e-02 -100 101 -2.00005 5000000 -50000000", 8 | "-25 71 7 -9.3262e+01 -100 101 -2.00005 5000000 -50000000", 9 | "-14 53 46 -6.7026e-02 -100 101 -2.00005 5000000 -50000000" 10 | }; 11 | 12 | int a1, a2, a3, a4, a5, a6, a7; 13 | double f1 = 0.0, f2 = 0.0, q = 0.0; 14 | for (int i=0; i < 3; i++) { 15 | std::istringstream s(a[i]); 16 | s >> a1 17 | >> a2 18 | >> a3 19 | >> f1 20 | >> a4 21 | >> a5 22 | >> f2 23 | >> a6 24 | >> a7; 25 | q += (a1 + a2 + a3 + a4 + a5 + a6 + a7 + f1 + f2)/1000000; 26 | } 27 | return q; 28 | } 29 | 30 | static void BM_Istream_numbers(benchmark::State &state) { 31 | double i = 0; 32 | while (state.KeepRunning()) 33 | benchmark::DoNotOptimize(i += istream_numbers()); 34 | } 35 | 36 | BASIC_BENCHMARK_TEST(BM_Istream_numbers); 37 | 38 | BENCHMARK_MAIN() 39 | -------------------------------------------------------------------------------- /cxx/umap.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include // Requires a C++11 compiler. 3 | #include 4 | #include 5 | #include // gettimeofday 6 | #include // strtol 7 | 8 | double elapsed(const timeval& tstart, const timeval& tstop) 9 | { 10 | return 11 | static_cast(tstop.tv_sec - tstart.tv_sec) + 12 | static_cast(tstop.tv_usec - tstart.tv_usec)*1.e-6; 13 | } 14 | 15 | int main(int argc, char **argv) 16 | { 17 | // Timing objects 18 | timeval tstart, tstop; 19 | 20 | // Possibly read number of trials from command line 21 | unsigned n_trials = static_cast(1e9); 22 | if (argc > 1) 23 | { 24 | double nt; 25 | std::stringstream ss; 26 | ss << argv[1]; 27 | ss >> nt; 28 | n_trials = static_cast(nt); 29 | } 30 | std::cout << "Performing " << n_trials << " trials." << std::endl; 31 | 32 | // Possibly read number of map entries from command line 33 | unsigned N = 16; 34 | if (argc > 2) 35 | N = strtol(argv[2], NULL, 10); 36 | std::cout << "Containers have " << N << " entries." << std::endl; 37 | 38 | 39 | // Typedefs 40 | typedef std::map MapType; 41 | typedef std::unordered_map UnorderedMapType; 42 | 43 | // typedef std::multimap MapType; 44 | // typedef std::unordered_multimap UnorderedMapType; 45 | 46 | // The test objects 47 | MapType m; 48 | UnorderedMapType um; 49 | 50 | for (unsigned i=0; ifirst; 65 | } 66 | gettimeofday (&tstop, NULL); 67 | std::cout << "map::find() with a single key known to be in the map: " << elapsed(tstart, tstop) << std::endl; 68 | } 69 | 70 | // 2.) 71 | { 72 | gettimeofday (&tstart, NULL); 73 | // We use 'dummy' to prevent clang from completely optimizing out the call to find(). 74 | unsigned dummy = 0; 75 | for (unsigned trial=0; trialfirst; 79 | } 80 | gettimeofday (&tstop, NULL); 81 | std::cout << "unordered_map::find() with a single key known to be in the map: " << elapsed(tstart, tstop) << std::endl; 82 | } 83 | 84 | return 0; 85 | } 86 | 87 | -------------------------------------------------------------------------------- /cxx/umap2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include // Requires a C++11 compiler. 3 | #include 4 | #include 5 | #include // gettimeofday 6 | #include // strtol 7 | 8 | int identity(int); 9 | 10 | double elapsed(const timeval& tstart, const timeval& tstop) 11 | { 12 | return 13 | static_cast(tstop.tv_sec - tstart.tv_sec) + 14 | static_cast(tstop.tv_usec - tstart.tv_usec)*1.e-6; 15 | } 16 | 17 | int main(int argc, char **argv) 18 | { 19 | // Timing objects 20 | timeval tstart, tstop; 21 | 22 | // Possibly read number of trials from command line 23 | unsigned n_trials = static_cast(1e9); 24 | if (argc > 1) 25 | { 26 | double nt; 27 | std::stringstream ss; 28 | ss << argv[1]; 29 | ss >> nt; 30 | n_trials = static_cast(nt); 31 | } 32 | std::cout << "Performing " << n_trials << " trials." << std::endl; 33 | 34 | // Possibly read number of map entries from command line 35 | unsigned N = 16; 36 | if (argc > 2) 37 | N = strtol(argv[2], NULL, 10); 38 | std::cout << "Containers have " << N << " entries." << std::endl; 39 | 40 | 41 | // Typedefs 42 | typedef std::map MapType; 43 | typedef std::unordered_map UnorderedMapType; 44 | 45 | // typedef std::multimap MapType; 46 | // typedef std::unordered_multimap UnorderedMapType; 47 | 48 | // The test objects 49 | MapType m; 50 | UnorderedMapType um; 51 | 52 | for (unsigned i=0; ifirst; 68 | } 69 | gettimeofday (&tstop, NULL); 70 | std::cout << "map::find() with a single key known to be in the map: " << elapsed(tstart, tstop) << std::endl; 71 | } 72 | */ 73 | 74 | // 2.) 75 | { 76 | gettimeofday (&tstart, NULL); 77 | // We use 'dummy' to prevent clang from completely optimizing out the call to find(). 78 | volatile unsigned dummy = 0; 79 | const int key2 = N / 2; 80 | for (unsigned trial=0; trialfirst; 85 | } 86 | gettimeofday (&tstop, NULL); 87 | std::cout << "unordered_map::find() with a single key known to be in the map: " << elapsed(tstart, tstop) << std::endl; 88 | std::cout << "Found " << dummy << std::endl; 89 | } 90 | 91 | return 0; 92 | } 93 | -------------------------------------------------------------------------------- /docs/Bibliography.bib: -------------------------------------------------------------------------------- 1 | @article{stroustrup2012, 2 | title={Software development for infrastructure}, 3 | author={Stroustrup, Bjarne}, 4 | journal={IEEE Computer}, 5 | volume={45}, 6 | number={1}, 7 | pages={47--58}, 8 | year={2012} 9 | } 10 | 11 | @book{clrs, 12 | title={Introduction to algorithms}, 13 | author={Thomas H.. Cormen and Leiserson, Charles Eric and Rivest, Ronald L and Stein, Clifford}, 14 | volume={6}, 15 | year={2001}, 16 | publisher={MIT press Cambridge} 17 | } 18 | 19 | @online{googlebench, 20 | title={google-benchmark: A microbenchmark support library}, 21 | year={2014}, 22 | publisher={https://github.com/google/benchmark} 23 | } 24 | 25 | @article{c++fcd, 26 | title={C++ standard}, 27 | } 28 | 29 | @article{c++perftr, 30 | title={Technical report on C++ performance}, 31 | author={Goldthwaite, Lois}, 32 | journal={ISO/IEC PDTR}, 33 | volume={18015}, 34 | url={http://www.open-std.org/jtc1/sc22/wg21/docs/TR18015.pdf}, 35 | year={2006} 36 | } 37 | 38 | @inproceedings{muller, 39 | title={Abstraction benchmarks and performance of C++ applications}, 40 | author={M{\"u}ller, Matthias}, 41 | booktitle={Proceedings of the Fourth International Conference on Supercomputing in Nuclear Applications}, 42 | year={2000}, 43 | organization={Citeseer} 44 | } 45 | 46 | @article{veldhuizen1997scientific, 47 | title={Scientific computing: C++ versus Fortran}, 48 | author={Veldhuizen, Todd}, 49 | journal={DOCTOR DOBBS JOURNAL}, 50 | volume={22}, 51 | pages={34--41}, 52 | year={1997}, 53 | publisher={M AND T PUBLISHING INC} 54 | } 55 | Run-time automatic instantiation of algorithms using C++ templates 56 | by T. Quintino, H. Deconinck 57 | International Journal of Computational Science and Engineering (IJCSE), Vol. 4, No. 4, 2009 58 | 59 | Tests: 60 | http://www.open-std.org/jtc1/sc22/wg21/docs/ 61 | http://www.open-std.org/jtc1/sc22/wg21/docs/D_5.cpp -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | paper = std-benchmark 2 | slides = slides 3 | $(paper).pdf: $(paper).tex Bibliography.bib 4 | pdflatex -shell-escape $(paper) 5 | bibtex $(paper) 6 | pdflatex -shell-escape $(paper) 7 | pdflatex -shell-escape $(paper) 8 | 9 | abstract: 10 | pdflatex -shell-escape abstract-cppcon 11 | 12 | $(slides).pdf: $(slides).tex 13 | pdflatex -shell-escape $(slides) 14 | 15 | latexmk: $(paper).tex Bibliography.bib 16 | latexmk -pdf -pvc $< -pdflatex="pdflatex --shell-escape %O %S" 17 | 18 | clean: 19 | rm -rf *.aux *.bbl *.blg *.log *.out *.pdf *.dot 20 | 21 | all: $(paper).pdf 22 | 23 | push: 24 | git pull --rebase origin master 25 | git push origin master 26 | 27 | dependences: 28 | apt-get install texlive-latex-extra texlive-fonts-recommended texlive-latex-base 29 | -------------------------------------------------------------------------------- /docs/abstract-cppcon.tex: -------------------------------------------------------------------------------- 1 | \documentclass[10pt]{article} 2 | \usepackage[margin=0.5in]{geometry} 3 | \usepackage{hyperref} 4 | \usepackage{comment} 5 | \usepackage{amsmath} 6 | \usepackage{graphicx} 7 | \usepackage{amssymb} 8 | \usepackage{graphviz} 9 | \usepackage{auto-pst-pdf} 10 | \usepackage{etoolbox} 11 | \usepackage{flushend} 12 | \usepackage{needspace} 13 | %\usepackage{authblk} 14 | 15 | \begin{document} 16 | \title{Performance analysis and optimization of system libraries} 17 | 18 | \date{} 19 | \maketitle 20 | 21 | \section*{Summary} 22 | C/C$++$ programs are widely used in performance critical applications, as such, 23 | they are expected to be very efficient. However, experimental results show 24 | opportunities for improvements in some of the most commonly used data structures 25 | and algorithms. 26 | 27 | We will present the performance analysis work on widely used system libraries 28 | like libc$++$, libstdc$++$, zlib, libziparchive (Android core), the improvements 29 | we did to these libraries and to the GCC and LLVM compilers to optimize 30 | them. This includes our contributions to standard library algorithms like 31 | string::find, libc$++$::basic\_streambuf::xsgetn, and libc$++$::locale. We 32 | improved these suboptimal algorithms, particularly string::find which improved 33 | by more than 10x. Similarly, we enabled the inlining of constructor and 34 | destructor of libc$++$::string. We also improved some of the hottest algorithms 35 | in zlib and libziparchive by several factors. We will highlight useful 36 | optimization tricks that we used as part of optimizing these libraries. 37 | 38 | We will present a systematic analysis of C++ standard libraries which enabled us 39 | to expose differences in their design as well as their dynamic behavior. We will 40 | present a comparative analysis of libc$++$ vs. libstdc$++$ vs. Microsoft's C++ 41 | standard library on commonly used data structures and algorithms based on our 42 | std-benchmark (https://github.com/hiraditya/std-benchmark), that we started 43 | developing to help analyze standard C$++$ libraries. We will discuss the 44 | performance issues with libc$++$::stringstream and libc$++$::sort that we are 45 | currently working on. We will also present the lessons learned as a result of 46 | analyzing C$++$ standard libraries, for example: 47 | \begin{enumerate} 48 | \item Iterator based algorithms can lose information and hence, can result in 49 | suboptimal performance. This is exemplified in the implementation of 50 | std::rotate where we can just exchange few pointers and avoid several useless 51 | copies should the underlying container be a doubly linked list e.g., 52 | std::list. 53 | \item The C++ programming language has a limitation that the constructor and 54 | destructor cannot be const qualified which could have facilitated useful 55 | compiler optimizations like removing the destructor of a const std::string 56 | when the string is small enough to be kept on the stack. This problem was 57 | pointed out a long time back but there seems to be no follow up 58 | (wg21/docs/papers/1995/N0798.htm). 59 | \end{enumerate} 60 | 61 | Keywords: C++, performance analysis, benchmarking libraries, compiler 62 | optimization, GCC, LLVM, libstc$++$, libc$++$ 63 | \\ 64 | \\ 65 | Reference to previous talks: \url{http://sched.co/A8J7}, \url{http://sched.co/8Yzk} 66 | \end{document} 67 | -------------------------------------------------------------------------------- /docs/acmcopyright.sty: -------------------------------------------------------------------------------- 1 | %% 2 | %% This is file `acmcopyright.sty', 3 | %% generated with the docstrip utility. 4 | %% 5 | %% The original source files were: 6 | %% 7 | %% acmcopyright.dtx (with options: `style') 8 | %% 9 | %% IMPORTANT NOTICE: 10 | %% 11 | %% For the copyright see the source file. 12 | %% 13 | %% Any modified versions of this file must be renamed 14 | %% with new filenames distinct from acmcopyright.sty. 15 | %% 16 | %% For distribution of the original source see the terms 17 | %% for copying and modification in the file acmcopyright.dtx. 18 | %% 19 | %% This generated file may be distributed as long as the 20 | %% original source files, as listed above, are part of the 21 | %% same distribution. (The sources need not necessarily be 22 | %% in the same archive or directory.) 23 | %% \CharacterTable 24 | %% {Upper-case \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z 25 | %% Lower-case \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z 26 | %% Digits \0\1\2\3\4\5\6\7\8\9 27 | %% Exclamation \! Double quote \" Hash (number) \# 28 | %% Dollar \$ Percent \% Ampersand \& 29 | %% Acute accent \' Left paren \( Right paren \) 30 | %% Asterisk \* Plus \+ Comma \, 31 | %% Minus \- Point \. Solidus \/ 32 | %% Colon \: Semicolon \; Less than \< 33 | %% Equals \= Greater than \> Question mark \? 34 | %% Commercial at \@ Left bracket \[ Backslash \\ 35 | %% Right bracket \] Circumflex \^ Underscore \_ 36 | %% Grave accent \` Left brace \{ Vertical bar \| 37 | %% Right brace \} Tilde \~} 38 | \NeedsTeXFormat{LaTeX2e} 39 | \ProvidesPackage{acmcopyright} 40 | [2014/06/29 v1.2 Copyright statemens for ACM classes] 41 | \newif\if@printcopyright 42 | \@printcopyrighttrue 43 | \newif\if@printpermission 44 | \@printpermissiontrue 45 | \newif\if@acmowned 46 | \@acmownedtrue 47 | \RequirePackage{xkeyval} 48 | \define@choicekey*{ACM@}{acmcopyrightmode}[% 49 | \acm@copyrightinput\acm@copyrightmode]{none,acmcopyright,acmlicensed,% 50 | rightsretained,usgov,usgovmixed,cagov,cagovmixed,% 51 | licensedusgovmixed,licensedcagovmixed,othergov,licensedothergov}{% 52 | \@printpermissiontrue 53 | \@printcopyrighttrue 54 | \@acmownedtrue 55 | \ifnum\acm@copyrightmode=0\relax % none 56 | \@printpermissionfalse 57 | \@printcopyrightfalse 58 | \@acmownedfalse 59 | \fi 60 | \ifnum\acm@copyrightmode=2\relax % acmlicensed 61 | \@acmownedfalse 62 | \fi 63 | \ifnum\acm@copyrightmode=3\relax % rightsretained 64 | \@acmownedfalse 65 | \fi 66 | \ifnum\acm@copyrightmode=4\relax % usgov 67 | \@printpermissiontrue 68 | \@printcopyrightfalse 69 | \@acmownedfalse 70 | \fi 71 | \ifnum\acm@copyrightmode=6\relax % cagov 72 | \@acmownedfalse 73 | \fi 74 | \ifnum\acm@copyrightmode=8\relax % licensedusgovmixed 75 | \@acmownedfalse 76 | \fi 77 | \ifnum\acm@copyrightmode=9\relax % licensedcagovmixed 78 | \@acmownedfalse 79 | \fi 80 | \ifnum\acm@copyrightmode=10\relax % othergov 81 | \@acmownedtrue 82 | \fi 83 | \ifnum\acm@copyrightmode=11\relax % licensedothergov 84 | \@acmownedfalse 85 | \@printcopyrightfalse 86 | \fi} 87 | \def\setcopyright#1{\setkeys{ACM@}{acmcopyrightmode=#1}} 88 | \setcopyright{acmcopyright} 89 | \def\@copyrightowner{% 90 | \ifcase\acm@copyrightmode\relax % none 91 | \or % acmcopyright 92 | ACM. 93 | \or % acmlicensed 94 | Copyright held by the owner/author(s). Publication rights licensed to 95 | ACM. 96 | \or % rightsretained 97 | Copyright held by the owner/author(s). 98 | \or % usgov 99 | \or % usgovmixed 100 | ACM. 101 | \or % cagov 102 | Crown in Right of Canada. 103 | \or %cagovmixed 104 | ACM. 105 | \or %licensedusgovmixed 106 | Copyright held by the owner/author(s). Publication rights licensed to 107 | ACM. 108 | \or %licensedcagovmixed 109 | Copyright held by the owner/author(s). Publication rights licensed to 110 | ACM. 111 | \or % othergov 112 | ACM. 113 | \or % licensedothergov 114 | \fi} 115 | \def\@copyrightpermission{% 116 | \ifcase\acm@copyrightmode\relax % none 117 | \or % acmcopyright 118 | Permission to make digital or hard copies of all or part of this 119 | work for personal or classroom use is granted without fee provided 120 | that copies are not made or distributed for profit or commercial 121 | advantage and that copies bear this notice and the full citation on 122 | the first page. Copyrights for components of this work owned by 123 | others than ACM must be honored. Abstracting with credit is 124 | permitted. To copy otherwise, or republish, to post on servers or to 125 | redistribute to lists, requires prior specific permission 126 | and\hspace*{.5pt}/or a fee. Request permissions from 127 | permissions@acm.org. 128 | \or % acmlicensed 129 | Permission to make digital or hard copies of all or part of this 130 | work for personal or classroom use is granted without fee provided 131 | that copies are not made or distributed for profit or commercial 132 | advantage and that copies bear this notice and the full citation on 133 | the first page. Copyrights for components of this work owned by 134 | others than the author(s) must be honored. Abstracting with credit 135 | is permitted. To copy otherwise, or republish, to post on servers 136 | or to redistribute to lists, requires prior specific permission 137 | and\hspace*{.5pt}/or a fee. Request permissions from 138 | permissions@acm.org. 139 | \or % rightsretained 140 | Permission to make digital or hard copies of part or all of this work 141 | for personal or classroom use is granted without fee provided that 142 | copies are not made or distributed for profit or commercial advantage 143 | and that copies bear this notice and the full citation on the first 144 | page. Copyrights for third-party components of this work must be 145 | honored. For all other uses, contact the 146 | owner\hspace*{.5pt}/author(s). 147 | \or % usgov 148 | This paper is authored by an employee(s) of the United States 149 | Government and is in the public domain. Non-exclusive copying or 150 | redistribution is allowed, provided that the article citation is 151 | given and the authors and agency are clearly identified as its 152 | source. 153 | \or % usgovmixed 154 | ACM acknowledges that this contribution was authored or co-authored 155 | by an employee, or contractor of the national government. As such, 156 | the Government retains a nonexclusive, royalty-free right to 157 | publish or reproduce this article, or to allow others to do so, for 158 | Government purposes only. Permission to make digital or hard copies 159 | for personal or classroom use is granted. Copies must bear this 160 | notice and the full citation on the first page. Copyrights for 161 | components of this work owned by others than ACM must be 162 | honored. To copy otherwise, distribute, republish, or post, 163 | requires prior specific permission and\hspace*{.5pt}/or a 164 | fee. Request permissions from permissions@acm.org. 165 | \or % cagov 166 | This article was authored by employees of the Government of Canada. 167 | As such, the Canadian government retains all interest in the 168 | copyright to this work and grants to ACM a nonexclusive, 169 | royalty-free right to publish or reproduce this article, or to allow 170 | others to do so, provided that clear attribution is given both to 171 | the authors and the Canadian government agency employing them. 172 | Permission to make digital or hard copies for personal or classroom 173 | use is granted. Copies must bear this notice and the full citation 174 | on the first page. Copyrights for components of this work owned by 175 | others than the Canadain Government must be honored. To copy 176 | otherwise, distribute, republish, or post, requires prior specific 177 | permission and\hspace*{.5pt}/or a fee. Request permissions from 178 | permissions@acm.org. 179 | \or % cagovmixed 180 | ACM acknowledges that this contribution was co-authored by an 181 | affiliate of the national government of Canada. As such, the Crown 182 | in Right of Canada retains an equal interest in the copyright. 183 | Reprints must include clear attribution to ACM and the author's 184 | government agency affiliation. Permission to make digital or hard 185 | copies for personal or classroom use is granted. Copies must bear 186 | this notice and the full citation on the first page. Copyrights for 187 | components of this work owned by others than ACM must be honored. 188 | To copy otherwise, distribute, republish, or post, requires prior 189 | specific permission and\hspace*{.5pt}/or a fee. Request permissions 190 | from permissions@acm.org. 191 | \or % licensedusgovmixed 192 | Publication rights licensed to ACM. ACM acknowledges that this 193 | contribution was authored or co-authored by an employee, contractor 194 | or affiliate of the United States government. As such, the 195 | Government retains a nonexclusive, royalty-free right to publish or 196 | reproduce this article, or to allow others to do so, for Government 197 | purposes only. 198 | \or % licensedcagovmixed 199 | Publication rights licensed to ACM. ACM acknowledges that this 200 | contribution was authored or co-authored by an employee, contractor 201 | or affiliate of the national government of Canada. As such, the 202 | Government retains a nonexclusive, royalty-free right to publish or 203 | reproduce this article, or to allow others to do so, for Government 204 | purposes only. 205 | \or % othergov 206 | ACM acknowledges that this contribution was authored or co-authored 207 | by an employee, contractor or affiliate of a national government. As 208 | such, the Government retains a nonexclusive, royalty-free right to 209 | publish or reproduce this article, or to allow others to do so, for 210 | Government purposes only. 211 | \or % licensedothergov 212 | Publication rights licensed to ACM. ACM acknowledges that this 213 | contribution was authored or co-authored by an employee, contractor 214 | or affiliate of a national government. As such, the Government 215 | retains a nonexclusive, royalty-free right to publish or reproduce 216 | this article, or to allow others to do so, for Government purposes 217 | only. 218 | \fi} 219 | \endinput 220 | %% 221 | %% End of file `acmcopyright.sty'. 222 | -------------------------------------------------------------------------------- /docs/frequency-scaling.md: -------------------------------------------------------------------------------- 1 | When you run the google-benchmark it might print a message like: 2 | 3 | ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. 4 | 5 | This means the operating system (OS) can dynamically change the frequency while the program is running. 6 | Modern processors are designed to run at multiple frequencies. The frequency is scaled up/down by the 7 | operating system in order to save power or to improve performance depending on the situation. For example, 8 | when the load factor is low, the OS can scale down the CPU because there is not much work to be done. On the 9 | other hand while running expensive compute operations the frequency is scaled up to execute the task as quickly 10 | as possible. For more details please see: (https://wiki.archlinux.org/index.php/CPU_frequency_scaling). 11 | 12 | Now when running a performance benchmark you want to fix the frequency otherwise the performance numbers 13 | will be noisy. So you must fix the frequency before running the benchmark. Without fixing the frequency 14 | wide variation in results may be observed (specially for small data sizes). 15 | 16 | On Ubuntu machines an easy way to fix the frequency is to install cpufrequtils: 17 | 18 | ```sh 19 | sudo apt-get install cpufrequtils 20 | ``` 21 | 22 | Then edit the following file (if it doesn't exist, create it): 23 | 24 | ``` 25 | sudo vi /etc/default/cpufrequtils 26 | ``` 27 | 28 | And add the following line to it: 29 | 30 | ```sh 31 | GOVERNOR="performance" 32 | ``` 33 | For more details see this post: 34 | http://askubuntu.com/questions/523640/how-i-can-disable-cpu-frequency-scaling-and-set-the-system-to-performance 35 | 36 | -------------------------------------------------------------------------------- /docs/graphviz.sty: -------------------------------------------------------------------------------- 1 | %% 2 | %% This is file `graphviz.sty', 3 | %% generated with the docstrip utility. 4 | %% 5 | %% The original source files were: 6 | %% 7 | %% graphviz.dtx (with options: `package') 8 | %% 9 | %% This is a generated file. 10 | %% 11 | %% Copyright (C) 2003-15 by Derek Rayside 12 | %% 13 | %% This file may be distributed and/or modified under the conditions of 14 | %% the LaTeX Project Public License, either version 1.3c of this license 15 | %% or (at your option) any later version. The latest version of this 16 | %% license is in: 17 | %% 18 | %% http://www.latex-project.org/lppl.txt 19 | %% 20 | \NeedsTeXFormat{LaTeX2e}[1999/12/01] 21 | \ProvidesPackage{graphviz} 22 | [2015/09/02 v0.95 .dtx graphviz file] 23 | \newif\ifsinglefile 24 | \DeclareOption{singlefile}{ 25 | \singlefiletrue 26 | \AtBeginDocument{% open a new file handle 27 | \newwrite\masterdotfile% 28 | \immediate\openout\masterdotfile=\@tmpdir tmpmaster.graphviz% 29 | \newwrite\mastergvprfile% 30 | \immediate\openout\mastergvprfile=\@tmpdir tmpmaster.gvpr} 31 | \AtEndDocument{% close the file 32 | % close the dot file and the gvpr file 33 | \immediate\closeout\masterdotfile% 34 | \immediate\closeout\mastergvprfile% 35 | % execute the gvpr file 36 | \immediate\write18{gvpr -f \@tmpdir tmpmaster.gvpr \@tmpdir tmpmaster.graphviz}% 37 | }} 38 | \newif\ifpsfrag 39 | \DeclareOption{psfrag}{ \psfragtrue } 40 | \newcommand{\@outext}{ps} 41 | \newcommand{\@outextspace}{ps } 42 | \DeclareOption{ps}{ 43 | \renewcommand{\@outext}{ps} 44 | \renewcommand{\@outextspace}{ps }} 45 | \DeclareOption{pdf}{% 46 | \renewcommand{\@outext}{pdf}% 47 | \renewcommand{\@outextspace}{pdf }} 48 | \newcommand{\@tmpdir}{} 49 | \DeclareOption{tmpdir}{% 50 | \immediate\write18{mkdir ./tmp/}% 51 | \renewcommand{\@tmpdir}{./tmp/}} 52 | \ExecuteOptions{ps} 53 | \ProcessOptions\relax % LaTeX class guide says it is wise to relax 54 | \RequirePackage{graphicx} 55 | \ifpsfrag \RequirePackage{psfrag} \fi 56 | \newcommand{\digraph}[2][scale=1]{ 57 | \inputdigraph[#1]{#2}{dot}% % Include the generated ps/pdf. 58 | \@digraph{digraph}{#2}% % Generate the .dot file. 59 | } 60 | \newcommand{\neatograph}[2][scale=1]{ 61 | \inputdigraph[#1]{#2}{neato}% % Include the generated ps/pdf. 62 | \@digraph{graph}{#2}% % Generate the .dot file. 63 | } 64 | \begingroup 65 | \catcode`\^^M=\active% 66 | \gdef\@digraph{\begingroup\catcode`\^^M=\active\def^^M{^^J}\@@digraph}% 67 | \endgroup 68 | \def\@@digraph#1#2#3{% 69 | \ifsinglefile% write the graph to the master file 70 | \expandafter\def\csname -\endcsname{\string\n}% 71 | \immediate\write\masterdotfile{#1 #2 {#3}}% 72 | \immediate\write\mastergvprfile{BEG_G { if ($.name == "#2") {writeG($G,"\@tmpdir#2.dot");} }}% 73 | \else% open a new file handle 74 | \newwrite\dotfile% 75 | \immediate\openout\dotfile=\@tmpdir#2.dot% 76 | \expandafter\def\csname -\endcsname{\string\n}% 77 | \immediate\write\dotfile{#1 #2 {#3}}% 78 | \immediate\closeout\dotfile% 79 | \fi% 80 | \endgroup}% 81 | \newcommand{\inputdigraph}[3][scale=1]{ 82 | % execute dot or neato (nb: requires latex -shell-escape) 83 | \immediate\write18{#3 -T\@outextspace -o \@tmpdir#2.\@outextspace \@tmpdir#2.dot} 84 | \IfFileExists{\@tmpdir#2.\@outext}{ % the postscript/pdf exists: include it 85 | \ifpsfrag 86 | % per the ladot 2.2 source code, psfrag has a problem with 87 | % graphviz 2.2, and some sed hackery is necessary to work around 88 | \immediate\write18{sed -ibackup -e "s/xshow/pop show/g" \@tmpdir#2.ps} 89 | \fi 90 | \includegraphics[#1]{\@tmpdir#2.\@outext} 91 | } 92 | % else: the postscript/pdf doesn't exist: tell the user how to create it 93 | { 94 | \fbox{ \begin{tabular}{l} 95 | The file \texttt{#2.\@outext} hasn't been created from 96 | \texttt{\@tmpdir#2.dot} yet. \\ 97 | Run `\texttt{dot -T\@outextspace -o \@tmpdir#2.\@outextspace \@tmpdir#2.dot}' 98 | to create it. \\ 99 | Or invoke \LaTeX\ with the \texttt{-shell-escape} option 100 | to have this done automatically. \\ 101 | \end{tabular}} 102 | } 103 | } 104 | \endinput 105 | %% 106 | %% End of file `graphviz.sty'. 107 | -------------------------------------------------------------------------------- /docs/slides/AppStartupCompilerOptimizationsAndTechniquesForEmbeddedSystems.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/AppStartupCompilerOptimizationsAndTechniquesForEmbeddedSystems.pdf -------------------------------------------------------------------------------- /docs/slides/Compiler-optimization-and-swe-technique-for-improving-app-launch-time-sonoma-sept-5-2023.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/Compiler-optimization-and-swe-technique-for-improving-app-launch-time-sonoma-sept-5-2023.pdf -------------------------------------------------------------------------------- /docs/slides/CppConCodesizeCompilerOptimizationAndTechniques.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/CppConCodesizeCompilerOptimizationAndTechniques.pdf -------------------------------------------------------------------------------- /docs/slides/Makefile: -------------------------------------------------------------------------------- 1 | paper = slide 2 | $(paper).pdf: $(paper).tex 3 | pdflatex -shell-escape $(paper) 4 | 5 | clean: 6 | rm -rf *.aux *.bbl *.blg *.log *.out *.pdf *.dot 7 | 8 | all: $(paper).pdf 9 | 10 | push: 11 | git pull --rebase origin master 12 | git push origin master 13 | 14 | dependences: 15 | apt-get install texlive-latex-extra texlive-fonts-recommended texlive-latex-base 16 | -------------------------------------------------------------------------------- /docs/slides/compiler-flags-for-perf-and-codesize-llvm-bangalore-meetup-sept-22.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/compiler-flags-for-perf-and-codesize-llvm-bangalore-meetup-sept-22.pdf -------------------------------------------------------------------------------- /docs/slides/dac-todo: -------------------------------------------------------------------------------- 1 | - Going into a description of how you discovered the issues and tools that you used to profile the problem would be really helpful. 2 | 3 | - What led you to look at this issue? 4 | Performance analysis of benchmarks 5 | 6 | - How did you know that there was a potential speed gain to be had? 7 | Optimizing for locality 8 | 9 | - How to effectively use tools to determine where to optimize or how to optimize? 10 | Valgrind, ltrace, linux perf (Basic tutorial of each of these) 11 | 12 | - Don't be afraid to go into detail. 13 | 14 | - Remember that, as a tutorial, people will be using your slides as an outline for how they can use your methodology in their own code. 15 | - If there's not enough content for them to do it on their own, a tutorial that just says "there was a problem and I fixed it" isn't going to be very helpful. 16 | - I'd hazard a guess and say that 99% of the people you'll be in front of won't have any experience with compiler internals or GLIBC internals. 17 | - That's why I'd move the focus away from the specific GLIBC internals and towards optimization in general. 18 | Carve out general code from the examples 19 | 20 | - I think that there's some extremely valuable information here that you can share with the audience, but you don't want them to get lost because they don't know GLIBC. 21 | -------------------------------------------------------------------------------- /docs/slides/slide-DAC-2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/slide-DAC-2017.pdf -------------------------------------------------------------------------------- /docs/slides/slide-DAC-2017.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/slide-DAC-2017.pptx -------------------------------------------------------------------------------- /docs/slides/slide-acm-2021-DAC-2017.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/slide-acm-2021-DAC-2017.pptx -------------------------------------------------------------------------------- /docs/slides/slide-acm-2021.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/slide-acm-2021.pdf -------------------------------------------------------------------------------- /docs/slides/slide-cppnow.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/slide-cppnow.pdf -------------------------------------------------------------------------------- /docs/slides/slide-cppnow.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/slide-cppnow.pptx -------------------------------------------------------------------------------- /docs/slides/slide-meetup.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/slides/slide-meetup.pdf -------------------------------------------------------------------------------- /docs/std-benchmark.tex: -------------------------------------------------------------------------------- 1 | \documentclass{sig-alternate} 2 | \usepackage{comment} 3 | \usepackage{amsmath} 4 | \usepackage{hyperref} 5 | \usepackage{graphicx} 6 | \usepackage{amssymb} 7 | \usepackage{graphviz} 8 | \usepackage{auto-pst-pdf} 9 | \usepackage{etoolbox} 10 | \usepackage{flushend} 11 | \usepackage{needspace} 12 | 13 | \makeatletter 14 | \preto{\@verbatim}{\topsep=1pt \partopsep=0pt} 15 | \makeatother 16 | 17 | \pagenumbering{arabic} 18 | 19 | \begin{document} 20 | \def \GCC {GCC} 21 | \def \LLVM {LLVM} 22 | 23 | \special{papersize=8.5in,11in} 24 | \setlength{\pdfpageheight}{\paperheight} 25 | \setlength{\pdfpagewidth}{\paperwidth} 26 | 27 | \title{Benchmarking C/C++ standard libraries} 28 | 29 | \toappear{ 30 | \hrule \vspace{5pt} 31 | Some conf 32 | } 33 | \numberofauthors{3} 34 | 35 | \author{ 36 | \alignauthor 37 | Aditya Kumar\\ 38 | \affaddr{Samsung Austin R\&D Center}\\ 39 | \email{aditya.k7@samsung.com} 40 | \and 41 | \alignauthor 42 | Sebastian\\ 43 | \affaddr{Samsung Austin R\&D Center}\\ 44 | \email{ancd} 45 | } 46 | 47 | \maketitle 48 | \begin{abstract} 49 | We present a systematic analysis of C and C++ standard libraries. The goal here 50 | is to enable each programmer make informed decision about the functionality one 51 | is using and not just rely on common wisdom. We make it very easy to know the 52 | internals of the standard libraries. There are benchmark analyses available 53 | online but those are in bits and pieces. We present a comprehensize 54 | infrastructure where a large subset of standard library is covered. This also 55 | enables someone to run their own configuration of test by adding just few lines 56 | of code. This also allows the programmer to compare which compiler toolchain 57 | generates better code in terms of performance, code-size etc. such that they can 58 | choose the right toolchain for their application. The comparative analysis of 59 | multiple toolchains also enabled us to improve the underperforming library 60 | functions. 61 | \end{abstract} 62 | 63 | \section{Introduction} 64 | Although programmers use C++ mostly in performance critical applications, 65 | there is little literature to validate claims on how C++ programs perform 66 | better than programs written in other languages. Early work by M{\"u}ller \cite{muller} 67 | and the technical report on C++ performance \cite{c++perftr} give some 68 | perspective on this. Since that time, compilers and hardware have come a long way 69 | and there is a need to revisit this. Moreover, a systematic analysis of C++ standard libraries 70 | has not been done with great rigor. Some references on gcc mailing list show 71 | analysis of basic\_string etc but after the conclusion ended in a suboptimal 72 | implementation. 73 | 74 | Why a systematic analysis is important. 75 | 76 | The main contributions of this paper are: 77 | \begin{itemize} 78 | \item a benchmark suite for C/C++ standard library 79 | \item ability to compare compiler performance for standard libraries 80 | \item identifying slower implementation in standard library 81 | \item investigating whether C++11/14 really makes your code faster (for 82 | standard libraries) at -O0, -O3 83 | \item investigating which nuances of C++ standard causes suboptimal 84 | implementation of programs 85 | \end{itemize} 86 | 87 | \section{Related Work} 88 | Several bits and pieces of benchmarking available online. Bjarne's channel9 89 | talk \cite{stroustrup2012}. He talks about std::list vs. std::vector, in fact 90 | there are several analyses online, all of them establish std::vector as a better 91 | choice over std::list. But is std::vector the best sequential data structure? 92 | Our experiments indicate that std::deque may be better in many cases 93 | \ref{sec:experiments}. 94 | 95 | clrs \cite{clrs} 96 | C++ standard \cite{c++fcd} 97 | 98 | \newpage 99 | 100 | \subsection{Layout of the project} 101 | Structure 102 | 103 | \subsection{Illustrative Example} \label{subsec:example} 104 | How to add a single benchmark \cite{googlebench} 105 | \newpage 106 | 107 | 108 | \section{Performance problems due to C++ standardese} 109 | char\_traits::find has to check if both the pointer to the string and the 110 | number of characters to analyze, because if both are zero then the result is 111 | valid (zero). 112 | 113 | 114 | 115 | \begin{verbatim} 116 | static const char_type* 117 | find(const char_type* __s, size_t __n, const char_type& __a) 118 | { 119 | if (__n == 0) 120 | return 0; 121 | return memchr(__s, __a, __n); 122 | } 123 | \end{verbatim} 124 | 125 | A pointer to the first character in the range specified by [p, p + count) that 126 | compares equal to ch, or NULL if not found. 127 | 128 | 129 | size being an unsigned int. Many loops written in C++ have unsigned integers 130 | as induction variables, the problem is that unsigned int overflow is well 131 | defined so the compiler cannot assume that loop is monotonic. This disables 132 | many useful compiler optimizations like vectorization etc. 133 | TODO: Example?? 134 | 135 | List of Undefined behavior in C++: 136 | http://stackoverflow.com/a/367662/811335 137 | 138 | \section{Experimental Results and discussion} 139 | \label{sec:experiments} 140 | 141 | \subsection{Benchmark results comparison across toolchains} 142 | 143 | \subsection{Time complexity results} 144 | 145 | \subsection{C vs C++ algorithms} 146 | string::find vs. strstr. 147 | 148 | We present the results we got on x86-64 as well as aarch64 machines. 149 | 150 | \section{compiler vs programmer} 151 | We created test to figure out simple patterns which could be converted to 152 | standard library functions by the compiler but did not in some cases. This is 153 | mostly because of aliasing ambiguities and how the programmer can void them. In 154 | the benchmark compiler.vs.programmer/memory.bench.cpp, we have program like: 155 | 156 | \begin{verbatim} 157 | const char* __attribute__ ((noinline)) 158 | assign(const char *beg, const char *end, char *dest) { 159 | while (beg != end) 160 | *dest++ = *beg++; 161 | return beg; 162 | } 163 | \end{verbatim} 164 | 165 | This is a very common pattern found in many codebases including C++ standard 166 | libraries e.g., libcxx:locale.cpp:const char*ctype::do\_widen, 167 | 168 | \begin{verbatim} 169 | const char* 170 | ctype::do_widen(const char* low, const char* high, 171 | char_type* dest) const 172 | { 173 | for (; low != high; ++low, ++dest) 174 | *dest = *low; 175 | return low; 176 | } 177 | \end{verbatim} 178 | 179 | The dest never aliases with the low or the high pointer but the compiler fails 180 | to convert this to memcpy, because it cannot figure out low, high, and dest are 181 | not aliases of each other. It might be able to figure out if they were inlined 182 | in the caller but this function is in a .cpp file and hence the caller will not 183 | see it. This function gets called over and over again each time you invoke 184 | std::stringstream to parse token of integers from a character stream. 185 | 186 | Just adding \_\_restrict\_\_ would solve this problem, for example in the 187 | function assign\_res 188 | from\\ std-benchmark/compiler.vs.programmer/memory.bench.cpp there is another 189 | function which shows the usage in this case. 190 | 191 | \begin{verbatim} 192 | const char* __attribute__ ((noinline)) 193 | assign_res(const char * __restrict__ beg, 194 | const char * __restrict__ end, 195 | char *__restrict__ dest) { 196 | while (beg != end) 197 | *dest++ = *beg++; 198 | return beg; 199 | } 200 | \end{verbatim} 201 | 202 | This function runs twice as fast as the one without restrict. 203 | 204 | \begin{verbatim} 205 | Benchmark Time Iterations 206 | -------------------------------------------------- 207 | BM_prog_memcpy/32 5 ns 143049157 208 | BM_prog_memcpy/64 6 ns 117543415 209 | BM_prog_memcpy/128 8 ns 87350103 210 | BM_prog_memcpy/256 12 ns 57677864 211 | BM_prog_memcpy/512 20 ns 34332565 212 | BM_prog_memcpy/1024 36 ns 19396612 213 | BM_compiler_memcpy/32 4 ns 181115627 214 | BM_compiler_memcpy/64 4 ns 169701384 215 | BM_compiler_memcpy/128 6 ns 111126103 216 | BM_compiler_memcpy/256 6 ns 122750774 217 | BM_compiler_memcpy/512 8 ns 91138876 218 | BM_compiler_memcpy/1024 11 ns 62055274 219 | BM_memcpy/32 3 ns 246831272 220 | BM_memcpy/64 3 ns 226251314 221 | BM_memcpy/128 6 ns 124278117 222 | BM_memcpy/256 5 ns 150856758 223 | BM_memcpy/512 6 ns 114059692 224 | BM_memcpy/1024 10 ns 69498277 225 | \end{verbatim} 226 | 227 | 228 | ; MSVC compiler identification is: MSVC 19.10.25019.0 229 | ; Same machine was used for gcc, clang, and MSVC (using dual boot) 230 | MSVC Compiler size of containers (64 bit): 231 | Sizeof, std::deque(), 40 232 | _Mapptr _Map; // pointer to array of pointers to blocks 233 | size_type _Mapsize; // size of map array, zero or 2^N 234 | size_type _Myoff; // offset of initial element 235 | size_type _Mysize; // current length of sequence 236 | 237 | 238 | Sizeof, std::list(), 16 239 | (node ptr and size only) 240 | 241 | Sizeof, std::vector(), 24 242 | pointer _Myfirst; // pointer to beginning of array 243 | pointer _Mylast; // pointer to current end of sequence 244 | pointer _Myend; // pointer to end of array 245 | 246 | Sizeof, std::set(), 16 247 | Sizeof, (std::map()), 16 248 | _Nodeptr _Myhead; // pointer to head node 249 | size_type _Mysize; // number of elements 250 | 251 | 252 | Sizeof, (std::unordered_map()), 64 253 | Sizeof, std::unordered_set(), 64 254 | _Traits _Traitsobj; // traits to customize behavior 255 | _Mylist _List; // list of elements, must initialize before _Vec 256 | _Myvec _Vec; // vector of list iterators, begin() then end()-1 257 | size_type _Mask; // the key mask 258 | size_type _Maxidx; // current maximum key value 259 | 260 | 261 | ; libc++ 262 | unordered_set/map 263 | __bucket_list (unique_ptr) __bucket_list_; 264 | __compressed_pair<__first_node, __node_allocator> __p1_; 265 | __compressed_pair __p2_; 266 | __compressed_pair __p3_; 267 | 268 | std::list 269 | __node_base __end_; (has prev and next ptr) 270 | __compressed_pair __size_alloc_; 271 | 272 | std::deque 273 | __map __map_ (split buffer 4 pointers => 32 size); 274 | size_type __start_; 275 | __compressed_pair __size_; 276 | 277 | std::set 278 | __iter_pointer __begin_node_; 279 | __compressed_pair<__end_node_t, __node_allocator> __pair1_; 280 | __compressed_pair __pair3_; 281 | 282 | std::unordered_set 283 | __bucket_list(unique_ptr) __bucket_list_; 284 | __compressed_pair<__first_node, __node_allocator> __p1_; 285 | __compressed_pair __p2_; 286 | __compressed_pair __p3_; 287 | 288 | 289 | gcc 290 | std::set/map 291 | _Base_ptr _M_root; 292 | _Base_ptr _M_nodes; 293 | _Rb_tree& _M_t; 294 | 295 | \section{Timing and Limitations} 296 | Timing and Limitations 297 | \subsection{Limitations of the time-complexity measurement} 298 | 299 | Add missing noexcept on std::_Sp_locker constructors 300 | git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@243291 138bc75d-0d04-0410-961f-82ee72b054a4 301 | 302 | \section{Conclusion and Future Work} 303 | 304 | \bibliographystyle{abbrv} 305 | {\small 306 | \bibliography{Bibliography} 307 | } 308 | \end{document} 309 | -------------------------------------------------------------------------------- /docs/string-find-vs-cstring-strstr.ods: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiraditya/std-benchmark/47b20eeb4a2c13e8b877c0ca006346530dfc8622/docs/string-find-vs-cstring-strstr.ods -------------------------------------------------------------------------------- /include/rng_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef RNG_UTILS_H 2 | #define RNG_UTILS_H 3 | #include 4 | 5 | template struct uniform_distribution { 6 | typedef void type; // error 7 | }; 8 | 9 | template<> struct uniform_distribution { 10 | typedef std::uniform_int_distribution type; 11 | }; 12 | 13 | template<> struct uniform_distribution { 14 | typedef std::uniform_int_distribution type; 15 | }; 16 | 17 | template<> struct uniform_distribution { 18 | typedef std::uniform_int_distribution type; 19 | }; 20 | 21 | template<> struct uniform_distribution { 22 | typedef std::uniform_real_distribution type; 23 | }; 24 | 25 | template<> struct uniform_distribution { 26 | typedef std::uniform_real_distribution type; 27 | }; 28 | 29 | template using gen = typename uniform_distribution::type; 30 | 31 | class random_device { 32 | public: 33 | template 34 | T get_rand(T min, T max) { 35 | std::mt19937 e(rd()); // seed the generator 36 | gen d(min, max); // define the range 37 | return d(e); 38 | } 39 | 40 | private: 41 | // TODO: Fix the seed to a constant. 42 | std::random_device rd; // obtain a random number from hardware 43 | }; 44 | 45 | 46 | #endif // RNG_UTILS_H 47 | 48 | -------------------------------------------------------------------------------- /include/test_configs.h: -------------------------------------------------------------------------------- 1 | #ifndef TEST_CONFIGS_H 2 | #define TEST_CONFIGS_H 3 | 4 | #define KB << 10 5 | #define MB << 20 6 | #define GB << 30 7 | 8 | #define i7_4770 9 | 10 | 11 | // Configurations for i7_4770 12 | #ifdef i7_4770 13 | // To benchmark data residing completely in L1 cache. 14 | #ifndef ENABLE_TRAVIS_BUILD 15 | #define L1 (32 KB) 16 | // To benchmark data residing in L2 cache. 17 | #define L2 (256 KB) 18 | #else 19 | // For the Travis CI to run the entire test. 20 | #define L1 (16 KB) 21 | #define L2 (32 KB) 22 | #endif 23 | 24 | // To benchmark data residing in L3 cache. 25 | #define L3 (8192 KB) 26 | // To benchmark data residing in main memory. 27 | #define MEMORY (12 GB) 28 | #endif 29 | 30 | #define SINGLE_ARG(...) __VA_ARGS__ 31 | 32 | #define BASIC_BENCHMARK_TEST(x) BENCHMARK(x)->RangeMultiplier(2)\ 33 | ->Range(L1, L2) 34 | 35 | #define COMPLEXITY_BENCHMARK(x, CACHE_TYPE) BENCHMARK(x)->RangeMultiplier(2)\ 36 | ->Range(L1, CACHE_TYPE)->Complexity() 37 | 38 | #define COMPLEXITY_BENCHMARK_GEN(x, y, CACHE_TYPE) BENCHMARK_TEMPLATE(x, y)\ 39 | ->RangeMultiplier(2)->Range(L1, CACHE_TYPE)\ 40 | ->Complexity() 41 | #endif // TEST_CONFIGS_H 42 | 43 | constexpr int MSize = L2; 44 | 45 | #if defined(__clang__) 46 | #define COMPILER_CLANG 47 | #elif defined(__GNUC__) 48 | #define COMPILER_GCC 49 | #elif defined(_MSC_VER) 50 | #define COMPILER_MSVC 51 | #endif 52 | 53 | #if defined(COMPILER_GCC) || defined(COMPILER_CLANG) 54 | #define ATTR_NOINLINE __attribute__((noinline)) 55 | #elif defined(COMPILER_MSVC) 56 | #define ATTR_NOINLINE __declspec(noinline) 57 | #else 58 | #define ATTR_NOINLINE 59 | #endif 60 | -------------------------------------------------------------------------------- /include/test_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef TEST_UTILS_H 2 | #define TEST_UTILS_H 3 | 4 | #include "rng_utils.h" 5 | #include 6 | #include 7 | 8 | // TODO: Add more aggregates. 9 | struct aggregate { 10 | int first; 11 | int second; 12 | int third; 13 | int fourth; 14 | aggregate() : first(0), second(0), third(0), fourth(0) 15 | {} 16 | // This is a hacky constructor for ::find on associative containers to work. 17 | aggregate(int i) 18 | : first(i), second(i), third(i), fourth(i) 19 | {} 20 | aggregate(int i, int j, int k, int l) 21 | : first(i), second(j), third(k), fourth(l) 22 | {} 23 | 24 | aggregate& operator++() { 25 | ++first; 26 | ++second; 27 | ++third; 28 | ++fourth; 29 | return *this; 30 | } 31 | aggregate operator++(int) { 32 | aggregate N(*this); 33 | ++(*this); 34 | return N; 35 | } 36 | 37 | bool operator<(const aggregate& i) const { 38 | return first < i.first; 39 | } 40 | 41 | bool operator>(const aggregate& i) const { 42 | return i < *this; 43 | } 44 | 45 | bool operator==(const aggregate& i) const { 46 | return first == i.first; 47 | } 48 | 49 | bool operator!=(const aggregate& i) const { 50 | return !(*this == i); 51 | } 52 | }; 53 | 54 | // Hasher for aggregate data type. 55 | namespace std { 56 | template <> 57 | struct hash 58 | { 59 | std::size_t operator()(const aggregate& k) const 60 | { 61 | using std::hash; 62 | // Hash and combine using bit-shift. 63 | return ((hash()(k.first) 64 | ^ (hash()(k.second) << 1)) >> 1) 65 | ^ (hash()(k.third) << 1) 66 | ^ (hash()(k.fourth) << 1); 67 | } 68 | }; 69 | } 70 | 71 | template 72 | struct remove_const { typedef T type; }; 73 | 74 | // value_type of a std::map is std::pair 75 | template 76 | struct remove_const> { typedef std::pair type; }; 77 | 78 | template 79 | T get_rand(random_device &r, int max) { 80 | return r.get_rand(T(0), T(max)); 81 | } 82 | 83 | template<> 84 | std::pair get_rand>(random_device &r, int max) { 85 | return std::make_pair(r.get_rand(0, max), r.get_rand(0, max)); 86 | } 87 | 88 | template<> 89 | aggregate get_rand(random_device &r, int max) { 90 | return aggregate(r.get_rand(0, max)); 91 | } 92 | 93 | template<> 94 | std::pair 95 | get_rand>(random_device &r, int max) { 96 | return std::make_pair(r.get_rand(0, max), r.get_rand(0, max)); 97 | } 98 | 99 | template 100 | T increment(T &i) { 101 | return ++i; 102 | } 103 | 104 | // value_type of a std::map is std::pair 105 | template<> 106 | std::pair increment>(std::pair &i) { 107 | return std::make_pair(++i.first, i.second); 108 | } 109 | 110 | template<> 111 | std::pair 112 | increment>(std::pair &i) { 113 | return std::make_pair(++i.first, i.second); 114 | } 115 | 116 | template 117 | T init() { 118 | return T(0); 119 | } 120 | 121 | template<> 122 | std::pair init>() { 123 | return std::make_pair(0, 0); 124 | } 125 | 126 | template<> 127 | std::pair init>() { 128 | return std::make_pair(0, 0); 129 | } 130 | 131 | template