├── .gitignore ├── tests ├── CMakeLists.txt ├── timed_run.cpp └── basic.cpp ├── geiger ├── CMakeLists.txt ├── geiger.h ├── printer.h ├── printer_csv.h ├── papi.h ├── printer_console.h ├── chrono.h ├── benchmark.h └── benchmark.tcc ├── TODO ├── examples ├── CMakeLists.txt ├── simple_csv.cpp ├── simple.cpp ├── hardware_counters.cpp ├── walk.cpp ├── full.cpp └── time.cpp ├── CMakeLists.txt ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | *.kdev4 3 | ~* 4 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(GTest REQUIRED) 2 | include_directories(${GTEST_INCLUDE_DIRS}) 3 | 4 | add_executable(unit_tests basic.cpp timed_run.cpp) 5 | target_link_libraries(unit_tests ${GTEST_BOTH_LIBRARIES} papi pthread) 6 | add_test(unit_tests unit_tests) 7 | -------------------------------------------------------------------------------- /geiger/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(HEADERS geiger.h chrono.h printer.h benchmark.h benchmark.tcc printer_console.h printer_csv.h) 2 | 3 | install(DIRECTORY DESTINATION include/geiger) 4 | install(FILES ${HEADERS} DESTINATION include/geiger) 5 | 6 | if (USE_PAPI) 7 | install(FILES papi.h DESTINATION include/geiger) 8 | endif() 9 | -------------------------------------------------------------------------------- /geiger/geiger.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef USE_PAPI 4 | #include "papi.h" 5 | #endif 6 | 7 | #include "chrono.h" 8 | #include "printer.h" 9 | #include "benchmark.h" 10 | #include "printer_console.h" 11 | #include "printer_csv.h" 12 | 13 | namespace geiger 14 | { 15 | 16 | inline void init() 17 | { 18 | chrono::init(); 19 | } 20 | 21 | } 22 | 23 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - console_printer: more info when measuring related hwd counters, e.g. instruction per cycle 2 | - multiplexing on hwd counters 3 | - more examples / doc 4 | - passing function, with fixed arguments, e.g. s.add("f(5,6)", &f, 5, 6) 5 | - passing function with generated arguments 6 | - (multiple) printers via ctor 7 | - console_printer: better uunits for hwd counter 8 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | function(add_example name) 2 | add_executable(${name} ${name}.cpp) 3 | 4 | if (USE_PAPI) 5 | target_link_libraries(${name} papi) 6 | endif() 7 | endfunction(add_example) 8 | 9 | add_example(simple) 10 | add_example(simple_csv) 11 | 12 | if (USE_PAPI) 13 | add_example(hardware_counters) 14 | add_example(walk) 15 | add_example(full) 16 | endif() 17 | -------------------------------------------------------------------------------- /geiger/printer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace geiger 6 | { 7 | 8 | struct suite_base; 9 | struct suite_report; 10 | struct test_report; 11 | 12 | struct printer_base 13 | { 14 | virtual ~printer_base() {} 15 | 16 | virtual void on_start(const suite_base&) {} 17 | virtual void on_test_complete(const std::string&, const test_report&) {} 18 | virtual void on_complete(const suite_report&) {} 19 | }; 20 | 21 | } 22 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8) 2 | 3 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -Wall -Wextra") 4 | 5 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g") 6 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") 7 | 8 | option(USE_PAPI "USE_PAPI" ON) 9 | 10 | if (USE_PAPI) 11 | add_definitions("-DUSE_PAPI") 12 | endif() 13 | 14 | include_directories(.) 15 | 16 | add_subdirectory(geiger) 17 | add_subdirectory(examples) 18 | add_subdirectory(tests) 19 | 20 | -------------------------------------------------------------------------------- /examples/simple_csv.cpp: -------------------------------------------------------------------------------- 1 | #include "geiger/geiger.h" 2 | 3 | #include 4 | #include 5 | 6 | int main() 7 | { 8 | geiger::init(); 9 | geiger::suite<> s; 10 | 11 | s.add("rand", []() 12 | { 13 | std::rand(); 14 | }); 15 | 16 | s.add("vector push_back", []() 17 | { 18 | std::vector v; 19 | v.push_back(1000); 20 | }); 21 | 22 | // Redirection of each test result to out.csv 23 | s.set_printer(geiger::printer::csv("out.csv")); 24 | s.run(); 25 | s.run(); 26 | 27 | return 0; 28 | } 29 | -------------------------------------------------------------------------------- /examples/simple.cpp: -------------------------------------------------------------------------------- 1 | #include "geiger/geiger.h" 2 | 3 | #include 4 | #include 5 | 6 | int main() 7 | { 8 | geiger::init(); 9 | 10 | // A benchmark suite that does only time measurement 11 | geiger::suite<> s; 12 | 13 | s.add("rand", []() 14 | { 15 | std::rand(); 16 | }); 17 | 18 | s.add("vector push_back", []() 19 | { 20 | std::vector v; 21 | v.push_back(1000); 22 | }); 23 | 24 | // Redirection of each test result to the "console" printer 25 | s.set_printer>(); 26 | 27 | // Run all benchmarks 28 | s.run(); 29 | 30 | return 0; 31 | } 32 | -------------------------------------------------------------------------------- /examples/hardware_counters.cpp: -------------------------------------------------------------------------------- 1 | #include "geiger/geiger.h" 2 | 3 | #include 4 | #include 5 | 6 | int main() 7 | { 8 | using namespace geiger; 9 | init(); 10 | 11 | // instr_profiler reports the number of instructions, cycles and mispredicted branches 12 | suite s; 13 | 14 | // You can cover the events you want by defining your own PAPI wrapper: 15 | // using branch_profiler = papi_wrapper 16 | // (here, branches taken, not taken, mispredicted, correctly predicted) 17 | // 18 | // and then use it: 19 | // suite s; 20 | 21 | s.add("rand", 22 | []() 23 | { 24 | std::rand(); 25 | }) 26 | .add("vector push_back", 27 | []() 28 | { 29 | std::vector v; 30 | v.push_back(1000); 31 | }) 32 | .set_printer>() 33 | .run(); 34 | 35 | return 0; 36 | } 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 david-grs 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /examples/walk.cpp: -------------------------------------------------------------------------------- 1 | #include "geiger/geiger.h" 2 | 3 | extern "C" { 4 | #include 5 | #include 6 | } 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | static const int size = 1024 * 1024 * 16; 13 | static const int batch = 64; 14 | static const int mask = size - 1; 15 | static const int prime = 7919; 16 | static int sum = 0; 17 | 18 | auto linear_walk() 19 | { 20 | std::vector v(size, 'a'); 21 | 22 | return [v = std::move(v)]() 23 | { 24 | static std::size_t pos = std::rand() & mask; 25 | 26 | for (int i = 0; i < batch; ++i) 27 | { 28 | sum += v[pos]; 29 | pos = (pos + 1) & mask; 30 | } 31 | }; 32 | } 33 | 34 | auto random_walk() 35 | { 36 | std::vector v(size, 'a'); 37 | 38 | return [v = std::move(v)]() 39 | { 40 | static std::size_t pos = std::rand() & mask; 41 | 42 | for (int i = 0; i < batch; ++i) 43 | { 44 | sum += v[pos]; 45 | pos = (pos * prime) & mask; 46 | } 47 | }; 48 | } 49 | 50 | void walk() 51 | { 52 | using namespace geiger; 53 | suite s; 54 | 55 | s.add("linear walk", linear_walk()) 56 | .add("random walk", random_walk()) 57 | .set_printer>() 58 | .run(size / batch); 59 | } 60 | 61 | int main() 62 | { 63 | geiger::init(); 64 | walk(); 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /geiger/printer_csv.h: -------------------------------------------------------------------------------- 1 | #include "printer.h" 2 | #include "benchmark.h" 3 | 4 | #include 5 | #include 6 | 7 | namespace geiger 8 | { 9 | 10 | namespace printer 11 | { 12 | 13 | struct csv : public printer_base 14 | { 15 | csv(const std::string& filename, char delimiter = ';') 16 | : m_filename(filename), 17 | m_delimiter(delimiter) 18 | { 19 | } 20 | 21 | csv(const csv& c) =delete; 22 | 23 | csv(csv&& c) 24 | : m_filename(std::move(c.m_filename)), 25 | m_delimiter(c.m_delimiter) 26 | { 27 | } 28 | 29 | void on_start(const suite_base& s) override 30 | { 31 | // Not the first run(): header has already been written, return 32 | if (m_ofile.is_open()) 33 | return; 34 | 35 | m_ofile.open(m_filename.c_str()); 36 | 37 | if (!m_ofile.is_open()) 38 | throw std::runtime_error("geiger::printer::csv: unable to open file " + m_filename); 39 | 40 | m_ofile << "#Test" << m_delimiter << "Time" << m_delimiter << "Iterations"; 41 | (void)s; 42 | 43 | #ifdef USE_PAPI 44 | 45 | std::vector papi_events = s.papi_events(); 46 | for (auto it = papi_events.begin(); it != papi_events.end(); ++it) 47 | { 48 | m_ofile << m_delimiter << get_papi_event_name(*it); 49 | } 50 | 51 | #endif 52 | } 53 | 54 | void on_test_complete(const std::string& name, const test_report& r) override 55 | { 56 | m_ofile << "\n" << name << m_delimiter << r.time_per_task().count() << m_delimiter << r.iteration_count(); 57 | 58 | for (long long counter : r.papi_counters()) 59 | { 60 | m_ofile << m_delimiter << counter; 61 | } 62 | } 63 | 64 | private: 65 | std::ofstream m_ofile; 66 | 67 | std::string m_filename; 68 | char m_delimiter; 69 | }; 70 | 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /tests/timed_run.cpp: -------------------------------------------------------------------------------- 1 | #include "geiger/geiger.h" 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | struct TimedRun : public ::testing::TestWithParam 9 | { 10 | using clock = std::chrono::high_resolution_clock; 11 | 12 | static void SetUpTestCase() 13 | { 14 | geiger::init(); 15 | } 16 | 17 | static void TearDownTestCase() {} 18 | 19 | void SetUp() override 20 | { 21 | m_start = clock::now(); 22 | } 23 | 24 | void TearDown() override {} 25 | 26 | auto ms_elapsed() const 27 | { 28 | return std::chrono::duration_cast(clock::now() - m_start); 29 | } 30 | 31 | protected: 32 | clock::time_point m_start; 33 | geiger::suite<> m_suite; 34 | }; 35 | 36 | TEST_P(TimedRun, OneIteration__SameDuration) 37 | { 38 | m_suite.add("sleep", [this]() { std::this_thread::sleep_for(GetParam()); }); 39 | m_suite.run(1); 40 | 41 | ASSERT_EQ(GetParam().count(), ms_elapsed().count()); 42 | } 43 | 44 | TEST_P(TimedRun, TwoIterations__TwiceLonger) 45 | { 46 | m_suite.add("sleep", [this]() { std::this_thread::sleep_for(GetParam()); }); 47 | m_suite.run(2); 48 | 49 | ASSERT_EQ(GetParam().count() * 2, ms_elapsed().count()); 50 | } 51 | 52 | TEST_P(TimedRun, OneSecond__OneSecondIfShorter) 53 | { 54 | m_suite.add("sleep", [this]() { std::this_thread::sleep_for(GetParam()); }); 55 | m_suite.run(std::chrono::seconds(1)); 56 | 57 | auto test_duration = GetParam(); 58 | 59 | if (test_duration < std::chrono::seconds(1)) 60 | ASSERT_NEAR(1000, ms_elapsed().count(), 100); 61 | else 62 | ASSERT_NEAR(test_duration.count(), ms_elapsed().count(), 100); 63 | } 64 | 65 | INSTANTIATE_TEST_CASE_P(FewTestDurations, 66 | TimedRun, 67 | ::testing::Values(std::chrono::milliseconds(1), 68 | std::chrono::milliseconds(9), 69 | std::chrono::milliseconds(85), 70 | std::chrono::milliseconds(357), 71 | std::chrono::milliseconds(901), 72 | std::chrono::milliseconds(2155))); 73 | 74 | -------------------------------------------------------------------------------- /examples/full.cpp: -------------------------------------------------------------------------------- 1 | #include "geiger/geiger.h" 2 | 3 | extern "C" 4 | { 5 | #include 6 | #include 7 | } 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | /* 15 | static void escape(void* p) 16 | { 17 | asm volatile("" : : "g"(p) : "memory"); 18 | } 19 | 20 | static void clobber() 21 | { 22 | asm volatile("" : : : "memory"); 23 | } 24 | */ 25 | 26 | void foo() 27 | { 28 | using namespace geiger; 29 | suite s; 30 | 31 | s.add("rand", 32 | []() 33 | { 34 | std::rand(); 35 | }) 36 | .add("random", 37 | []() 38 | { 39 | random(); 40 | }) 41 | .add("rdtsc", 42 | []() 43 | { 44 | geiger::detail::rdtsc(); 45 | }) 46 | .add("gettimeofday()", 47 | []() 48 | { 49 | struct timeval tv; 50 | gettimeofday(&tv, NULL); 51 | }) 52 | .add("time", 53 | []() 54 | { 55 | std::time(NULL); 56 | }) 57 | .add("srand", 58 | []() 59 | { 60 | std::srand(0); 61 | }) 62 | .add("srand(time)", 63 | []() 64 | { 65 | std::srand(std::time(NULL)); 66 | }) 67 | .add("vector reserve()", 68 | []() 69 | { 70 | std::vector v; 71 | v.reserve(1e3); 72 | }) 73 | .add("vector push_back", 74 | []() 75 | { 76 | std::vector v; 77 | v.push_back(1000); 78 | }) 79 | .set_printer>() 80 | .on_test_complete([](const std::string&, const test_report&) 81 | { 82 | 83 | }) 84 | .on_complete([](const suite_report&) 85 | { 86 | // for (const auto& p : r.tests) 87 | // std::cout << p.first << ":" << p.second.time_per_task().count() << "ns" << std::endl; 88 | }) 89 | .run(); 90 | } 91 | 92 | int main() 93 | { 94 | geiger::init(); 95 | foo(); 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /tests/basic.cpp: -------------------------------------------------------------------------------- 1 | #include "geiger/geiger.h" 2 | 3 | #include 4 | 5 | struct Basic : public ::testing::Test 6 | { 7 | static void SetUpTestCase() 8 | { 9 | geiger::init(); 10 | } 11 | 12 | Basic() 13 | { 14 | m_suite.on_test_complete([this](const std::string&, const geiger::test_report&) { ++m_on_test_complete_calls; }); 15 | m_suite.on_complete([this](const geiger::suite_report&) { ++m_on_complete_calls; }); 16 | } 17 | 18 | protected: 19 | geiger::suite<> m_suite; 20 | int m_on_test_complete_calls = 0; 21 | int m_on_complete_calls = 0; 22 | }; 23 | 24 | TEST_F(Basic, NoTest) 25 | { 26 | m_suite.run(); 27 | 28 | ASSERT_EQ(0, m_on_test_complete_calls); 29 | ASSERT_EQ(1, m_on_complete_calls); 30 | } 31 | 32 | TEST_F(Basic, OneTest_Lambda) 33 | { 34 | m_suite.add("foo", []() { }); 35 | m_suite.run(); 36 | 37 | ASSERT_EQ(1, m_on_test_complete_calls); 38 | ASSERT_EQ(1, m_on_complete_calls); 39 | } 40 | 41 | TEST_F(Basic, OneTest_Function) 42 | { 43 | struct Foo 44 | { 45 | static void Bar() {} 46 | }; 47 | 48 | m_suite.add("foo::bar", &Foo::Bar); 49 | m_suite.run(); 50 | 51 | ASSERT_EQ(1, m_on_test_complete_calls); 52 | ASSERT_EQ(1, m_on_complete_calls); 53 | } 54 | 55 | TEST_F(Basic, TwoTests_Lambda) 56 | { 57 | m_suite.add("foo", []() { }); 58 | m_suite.add("bar", []() { }); 59 | m_suite.run(); 60 | 61 | ASSERT_EQ(2, m_on_test_complete_calls); 62 | ASSERT_EQ(1, m_on_complete_calls); 63 | } 64 | 65 | TEST_F(Basic, TwoTests_RunTwice) 66 | { 67 | m_suite.add("foo", []() { }); 68 | m_suite.add("bar", []() { }); 69 | m_suite.run(); 70 | m_suite.run(); 71 | 72 | ASSERT_EQ(4, m_on_test_complete_calls); 73 | ASSERT_EQ(2, m_on_complete_calls); 74 | } 75 | 76 | TEST_F(Basic, OneTest_CorrectName) 77 | { 78 | m_suite.add("foo", []() { }); 79 | m_suite.on_test_complete([this](const std::string& name, const geiger::test_report&) 80 | { 81 | ASSERT_EQ("foo", name); 82 | 83 | }); 84 | m_suite.run(); 85 | } 86 | 87 | TEST_F(Basic, OneTest_NoPAPICounter) 88 | { 89 | m_suite.add("foo", []() { }); 90 | m_suite.on_test_complete([this](const std::string&, const geiger::test_report& report) 91 | { 92 | ASSERT_EQ(0, report.papi_counters().size()); 93 | 94 | }); 95 | m_suite.run(); 96 | } 97 | 98 | TEST_F(Basic, TwoTests_CorrectOrderReports) 99 | { 100 | m_suite.add("foo", []() { }); 101 | m_suite.add("bar", []() { }); 102 | m_suite.on_test_complete([this](const std::string& name, const geiger::test_report&) 103 | { 104 | static int i = 0; 105 | 106 | if (i++ == 0) 107 | ASSERT_EQ("foo", name); 108 | else 109 | ASSERT_EQ("bar", name); 110 | 111 | }); 112 | m_suite.run(); 113 | } 114 | -------------------------------------------------------------------------------- /geiger/papi.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | namespace geiger 10 | { 11 | 12 | inline std::string get_papi_event_name(int event_code) 13 | { 14 | char event_name[PAPI_MAX_STR_LEN]; 15 | PAPI_event_code_to_name(event_code, event_name); 16 | 17 | return event_name; 18 | } 19 | 20 | struct papi_wrapper_base 21 | { 22 | virtual void start() = 0; 23 | virtual void stop() = 0; 24 | }; 25 | 26 | template 27 | struct papi_wrapper : public papi_wrapper_base 28 | { 29 | static constexpr int events_count = sizeof...(_EventsT); 30 | typedef std::array counters_type; 31 | 32 | static const std::array& get_event_types() 33 | { 34 | return s_events; 35 | } 36 | 37 | void start() override 38 | { 39 | int ret; 40 | if ((ret = ::PAPI_start_counters(const_cast(s_events.data()), events_count)) != PAPI_OK) 41 | { 42 | std::string error("PAPI_start_counters failed with events: "); 43 | 44 | for (const std::string& name : s_event_names) 45 | { 46 | error += name; 47 | error += " "; 48 | } 49 | 50 | error += ": "; 51 | error += PAPI_strerror(ret); 52 | throw std::runtime_error(error); 53 | } 54 | } 55 | 56 | void stop() override 57 | { 58 | int ret; 59 | 60 | if ((ret = PAPI_stop_counters(&m_counters[0], events_count)) != PAPI_OK) 61 | throw std::runtime_error(PAPI_strerror(ret)); 62 | } 63 | 64 | const counters_type& get_counters() const 65 | { 66 | return m_counters; 67 | } 68 | 69 | template 70 | long long get_counter() const 71 | { 72 | return m_counters[_EventIndexT]; 73 | } 74 | 75 | template 76 | static constexpr int get_event_type() 77 | { 78 | return s_events[_EventIndexT]; 79 | } 80 | 81 | template 82 | static const std::string& get_event_name() 83 | { 84 | return s_event_names[_EventIndexT]; 85 | } 86 | 87 | private: 88 | static constexpr std::array s_events = {{_EventsT...}}; 89 | static const std::array s_event_names; 90 | 91 | counters_type m_counters; 92 | }; 93 | 94 | template 95 | constexpr std::array::events_count> papi_wrapper<_EventsT...>::s_events; 96 | 97 | template 98 | static auto get_papi_event_names(const std::array& events) 99 | { 100 | std::array ret; 101 | for (std::size_t i = 0; i < ret.size(); ++i) 102 | ret[i] = get_papi_event_name(events[i]); 103 | 104 | return ret; 105 | } 106 | 107 | template 108 | const std::array::events_count> papi_wrapper<_EventsT...>::s_event_names = 109 | get_papi_event_names(papi_wrapper<_EventsT...>::s_events); 110 | 111 | typedef papi_wrapper cache_profiler; 112 | typedef papi_wrapper instr_profiler; 113 | } 114 | -------------------------------------------------------------------------------- /geiger/printer_console.h: -------------------------------------------------------------------------------- 1 | #include "printer.h" 2 | #include "benchmark.h" 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | namespace geiger 14 | { 15 | 16 | namespace printer 17 | { 18 | 19 | namespace detail 20 | { 21 | template 22 | inline std::string to_string_with_commas(_IntT n) 23 | { 24 | std::string time_per_task = std::to_string(n); 25 | 26 | for (int pos = time_per_task.length() - 3; pos > 0; pos -= 3) 27 | time_per_task.insert(pos, ","); 28 | 29 | return time_per_task; 30 | } 31 | 32 | inline void papi_better_event_name(std::string& name) 33 | { 34 | using namespace boost; 35 | 36 | erase_first(name, "PAPI_"); 37 | replace_all(name, "_", " "); 38 | erase_first(name, "TOT"); 39 | trim(name); 40 | } 41 | 42 | template struct to_str {}; 43 | template <> struct to_str { static constexpr const char* value = "ns"; }; 44 | template <> struct to_str { static constexpr const char* value = "us"; }; 45 | template <> struct to_str { static constexpr const char* value = "ns"; }; 46 | template <> struct to_str { static constexpr const char* value = " s"; }; 47 | template <> struct to_str { static constexpr const char* value = "mn"; }; 48 | template <> struct to_str { static constexpr const char* value = " h"; }; 49 | } 50 | 51 | template 52 | struct console : public printer_base 53 | { 54 | void on_start(const suite_base& s) override 55 | { 56 | std::vector> names = s.test_names(); 57 | 58 | auto it = std::max_element(names.begin(), names.end(), [](const std::string& s1, const std::string& s2) 59 | { 60 | return s1.size() < s2.size(); 61 | }); 62 | 63 | m_first_col_width = it->get().size(); 64 | const std::string time_header = detail::to_str<_DurationT>::value + std::string(" / iteration"); 65 | 66 | int width = std::fprintf(stdout, "%-*s %17s %17s", m_first_col_width, "Test", "Iterations", time_header.c_str()); 67 | 68 | #ifdef USE_PAPI 69 | std::vector papi_events = s.papi_events(); 70 | for (int event : papi_events) 71 | { 72 | std::string event_name = get_papi_event_name(event); 73 | detail::papi_better_event_name(event_name); 74 | 75 | width += std::fprintf(stdout, " %12s", event_name.c_str()); 76 | } 77 | #endif 78 | 79 | std::cout << "\n" << std::string(width, '-') << std::endl; 80 | } 81 | 82 | void on_test_complete(const std::string& name, const test_report& r) override 83 | { 84 | std::string time_per_task = detail::to_string_with_commas(std::chrono::duration_cast<_DurationT>(r.time_per_task()).count()); 85 | std::string iteration_count = detail::to_string_with_commas(r.iteration_count()); 86 | 87 | std::fprintf(stdout, "%-*s %17s %17s", m_first_col_width, name.c_str(), iteration_count.c_str(), time_per_task.c_str()); 88 | 89 | for (long long counter : r.papi_counters()) 90 | { 91 | std::fprintf(stdout, " %12.2f", counter / double(r.iteration_count())); 92 | } 93 | 94 | std::cout << std::endl; 95 | } 96 | 97 | private: 98 | int m_first_col_width; 99 | }; 100 | 101 | } 102 | 103 | } 104 | -------------------------------------------------------------------------------- /geiger/chrono.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace geiger 9 | { 10 | 11 | namespace detail 12 | { 13 | 14 | static inline uint64_t rdtsc() 15 | { 16 | uint64_t rax, rdx; 17 | __asm__ __volatile__("rdtsc" : "=a"(rax), "=d"(rdx)); 18 | return (rdx << 32) + rax; 19 | } 20 | 21 | static inline uint64_t rdtscp() 22 | { 23 | uint64_t rax, rcx, rdx; 24 | __asm__ __volatile__("rdtscp" : "=a"(rax), "=d"(rdx), "=c"(rcx)); 25 | return (rdx << 32) + rax; 26 | } 27 | 28 | static inline void cpuid() 29 | { 30 | uint64_t rax, rbx, rcx, rdx; 31 | __asm__ __volatile__("cpuid" : "=a"(rax), "=b"(rbx), "=d"(rdx), "=c"(rcx)); 32 | } 33 | 34 | static inline uint64_t rdtscp(int& chip, int& core) 35 | { 36 | uint64_t rax, rcx, rdx; 37 | __asm__ __volatile__("rdtscp" : "=a"(rax), "=d"(rdx), "=c"(rcx)); 38 | chip = (rcx & 0xFFF000) >> 12; 39 | core = rcx & 0xFFF; 40 | return (rdx << 32) + rax; 41 | } 42 | 43 | struct tsc 44 | { 45 | static double& get_freq_ghz() 46 | { 47 | static double tsc_freq_ghz = .0; 48 | return tsc_freq_ghz; 49 | } 50 | }; 51 | 52 | inline void init() 53 | { 54 | double& tsc_freq_ghz = detail::tsc::get_freq_ghz(); 55 | if (tsc_freq_ghz) 56 | return; 57 | 58 | using Clock = std::conditional_t; 61 | 62 | int chip, core, chip2, core2; 63 | 64 | auto start = Clock::now(); 65 | 66 | detail::cpuid(); 67 | uint64_t rdtsc_start = detail::rdtscp(chip, core); 68 | 69 | std::this_thread::sleep_for(std::chrono::milliseconds(500)); 70 | 71 | uint64_t rdtsc_end = detail::rdtscp(chip2, core2); 72 | detail::cpuid(); 73 | 74 | auto end = Clock::now(); 75 | 76 | if (core != core2 || chip != chip2) 77 | throw std::runtime_error("please set this executable to a specific CPU core (e.g. taskset -c X ./exec)"); 78 | 79 | auto duration_ns = std::chrono::duration_cast(end - start); 80 | uint64_t cycles = rdtsc_end - rdtsc_start; 81 | 82 | tsc_freq_ghz = (double)cycles / duration_ns.count(); 83 | } 84 | 85 | } 86 | 87 | struct tsc_chrono 88 | { 89 | tsc_chrono() =default; 90 | 91 | static void init() 92 | { 93 | detail::init(); 94 | } 95 | 96 | void start() 97 | { 98 | detail::cpuid(); 99 | m_start = detail::rdtsc(); 100 | } 101 | 102 | int64_t elapsed() const 103 | { 104 | uint64_t now = detail::rdtscp(); 105 | detail::cpuid(); 106 | return now - m_start; 107 | } 108 | 109 | int64_t elapsed_and_restart() 110 | { 111 | detail::cpuid(); 112 | uint64_t now = detail::rdtscp(); 113 | detail::cpuid(); 114 | int64_t ts = now - m_start; 115 | m_start = now; 116 | return ts; 117 | } 118 | 119 | std::chrono::nanoseconds elapsed_time() const 120 | { 121 | return from_cycles(elapsed()); 122 | } 123 | 124 | static std::chrono::nanoseconds from_cycles(int64_t cycles) 125 | { 126 | return std::chrono::nanoseconds(std::llround(cycles / detail::tsc::get_freq_ghz())); 127 | } 128 | 129 | static double get_freq_ghz() 130 | { 131 | return detail::tsc::get_freq_ghz(); 132 | } 133 | 134 | template 135 | static int64_t to_cycles(_DurationT duration) 136 | { 137 | return std::chrono::duration_cast(duration).count() * detail::tsc::get_freq_ghz(); 138 | } 139 | 140 | private: 141 | uint64_t m_start; 142 | }; 143 | 144 | using chrono = tsc_chrono; 145 | 146 | } 147 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | geiger 2 | ===== 3 | A micro benchmark library in C++ that supports hardware performance counters. 4 | 5 | Why and what ? 6 | - Because you cannot do a micro-benchmark by running _perf_ 7 | - Simple API, header-only library 8 | - Each test is run either a number of iterations or a specified time 9 | 10 | 11 | Build & install 12 | --------------- 13 | ```bash 14 | $ mkdir build && cd build 15 | $ cmake -DCMAKE_INSTALL_PREFIX=/usr .. 16 | $ make 17 | # make install 18 | ``` 19 | 20 | 21 | Examples 22 | -------- 23 | 24 | ### Time measurement 25 | The simplest usage of geiger is to measure the time required for a task: 26 | 27 | ```c++ 28 | #include 29 | 30 | #include 31 | #include 32 | 33 | int main() 34 | { 35 | // This is mandatory before running any benchmarks 36 | geiger::init(); 37 | 38 | // A benchmark suite that does only time measurement 39 | geiger::suite<> s; 40 | 41 | s.add("rand", []() 42 | { 43 | std::rand(); 44 | }); 45 | 46 | s.add("vector push_back", []() 47 | { 48 | std::vector v; 49 | v.push_back(1000); 50 | }); 51 | 52 | // Redirection of each test result to the "console" printer 53 | s.set_printer>(); 54 | 55 | // Run each test during one second 56 | s.run(); 57 | 58 | return 0; 59 | } 60 | ``` 61 | 62 | This code will output: 63 | 64 | ``` 65 | Test Time (ns) 66 | --------------------------- 67 | rand 14 68 | vector push_back 47 69 | ``` 70 | 71 | By default - as in the example above - each test is running during one second. Here, the "rand" test has then been executed 72 | tens of thousands of times, and the average execution time was 14ns. 73 | 74 | You can specify the duration you want as argument to *geiger::suite::run()*... 75 | 76 | ```c++ 77 | // Run each test during one millisecond 78 | s.run(std::chrono::milliseconds(1)); 79 | ``` 80 | 81 | ... or a number of iterations: 82 | 83 | ```c++ 84 | // Run each test exactly 100 times 85 | s.run(100); 86 | ``` 87 | 88 | Simply be aware that a too short time - or a too low number of iterations - can result in less accurate measurements. 89 | 90 | When specifying a duration, before running the benchmark, geiger is performing a calibration stage where it approximates the number of iterations required to run this task during the specified time. 91 | 92 | 93 | --- 94 | 95 | ### Hardware counters 96 | A more advanced usage of geiger is to include hardware counters. This is done by specifying a list of *papi_wrapper<_EventT...>* in the 97 | template parameters list of *geiger::suite<>*: 98 | 99 | ```c++ 100 | // cache_profiler reports the number of L1, L2 and L3 cache misses. 101 | suite s; 102 | 103 | s.add("linear walk", linear_walk()) 104 | .add("random walk", random_walk()); 105 | 106 | s.run(); 107 | ``` 108 | 109 | The output is now also displaying the number of hardware events per test run. 110 | 111 | ``` 112 | Test Time (ns) PAPI_L1_DCM PAPI_L2_DCM PAPI_L3_TCM 113 | -------------------------------------------------------------------------------- 114 | linear walk 88 1 0 0 115 | random walk 613 64 64 55 116 | ``` 117 | 118 | You can cover the events you want by defining your own PAPI wrapper. For example, if you are interested by events 119 | around branch predictions: 120 | 121 | ```c++ 122 | // Measuring branches taken, not taken, mispredicted and correctly predicted 123 | using branch_profiler = papi_wrapper 124 | 125 | suite s; 126 | ``` 127 | 128 | 129 | --- 130 | 131 | ### Events 132 | ### Implementation details 133 | -------------------------------------------------------------------------------- /examples/time.cpp: -------------------------------------------------------------------------------- 1 | #include "geiger/geiger.h" 2 | 3 | #include 4 | #include 5 | 6 | extern "C" 7 | { 8 | #include 9 | #include 10 | } 11 | 12 | __thread std::array arr_tls; 13 | std::array arr, arr2; 14 | 15 | void* fuck_my_cache(int count) 16 | { 17 | std::vector v; 18 | v.reserve(count); 19 | 20 | for (int i = 0; i < count; ++i) 21 | { 22 | v.emplace_back(count * 2); 23 | count++; 24 | } 25 | 26 | return nullptr; 27 | // return &v; 28 | } 29 | 30 | int main(int argc, char **argv) 31 | { 32 | geiger::init(); 33 | 34 | // A benchmark suite that does only time measurement 35 | geiger::suite s; 36 | uint64_t res = 0; 37 | int base = atoi(argv[1]); 38 | float offset = atof(argv[2]); 39 | 40 | std::vector v; 41 | v.reserve(1e6); 42 | 43 | //struct timespec ts; 44 | //clock_gettime(CLOCK_MONOTONIC_RAW, &ts); 45 | //v.emplace_back(ts.tv_sec * 1e9 + ts.tv_nsec); 46 | 47 | 48 | 49 | for (int i = 0; i < arr.size(); ++i) 50 | { 51 | arr[i] = base * i + std::round(offset); 52 | arr2[i] = base * 0.2 * i + std::round(offset); 53 | arr_tls[i] = base * i; 54 | } 55 | 56 | s.add("tls only ", [&]() 57 | { 58 | uint64_t sum = 0; 59 | for (int i = 0; i < arr.size(); ++i) 60 | sum += arr_tls[i]; 61 | v.emplace_back(sum); 62 | }); 63 | 64 | s.add("standard ", [&]() 65 | { 66 | uint64_t sum = 0; 67 | for (int i = 0; i < arr.size(); ++i) 68 | sum += arr[i]; 69 | v.emplace_back(sum); 70 | }); 71 | 72 | 73 | s.add("mix tls & no tls ", [&]() 74 | { 75 | uint64_t sum = 0; 76 | for (int i = 0; i < arr.size(); ++i) 77 | { 78 | sum += arr[i]; 79 | sum += arr_tls[i]; 80 | } 81 | v.emplace_back(sum); 82 | }); 83 | 84 | s.add("mix no tls ", [&]() 85 | { 86 | uint64_t sum = 0; 87 | for (int i = 0; i < arr.size(); ++i) 88 | { 89 | sum += arr[i]; 90 | sum += arr2[i]; 91 | } 92 | v.emplace_back(sum); 93 | }); 94 | 95 | s.add("rdtsc", [&]() 96 | { 97 | res += geiger::detail::rdtsc(); 98 | }); 99 | 100 | s.add("rdtsc + mul + add", [&]() 101 | { 102 | res += geiger::detail::rdtsc() * offset + base; 103 | }); 104 | /* 105 | s.add("gettimeofday", [&]() 106 | { 107 | struct timeval tv; 108 | gettimeofday(&tv, nullptr); 109 | res += tv.tv_sec * 1e9 + tv.tv_usec * 1e3; 110 | }); 111 | */ 112 | s.add("clock_gettime CLOCK_REALTIME", [&]() 113 | { 114 | struct timespec ts; 115 | clock_gettime(CLOCK_REALTIME, &ts); 116 | v.emplace_back(ts.tv_sec * 1e9 + ts.tv_nsec); 117 | }); 118 | 119 | s.add("clock_gettime CLOCK_MONOTONIC", [&]() 120 | { 121 | struct timespec ts; 122 | clock_gettime(CLOCK_MONOTONIC, &ts); 123 | v.emplace_back(ts.tv_sec * 1e9 + ts.tv_nsec); 124 | }); 125 | 126 | 127 | s.add("clock_gettime CLOCK_MONOTONIC_RAW", [&]() 128 | { 129 | struct timespec ts; 130 | clock_gettime(CLOCK_MONOTONIC_RAW, &ts); 131 | v.emplace_back(ts.tv_sec * 1e9 + ts.tv_nsec); 132 | }); 133 | 134 | s.add("clock_gettime CLOCK_REALTIME_COARSE", [&]() 135 | { 136 | struct timespec ts; 137 | clock_gettime(CLOCK_REALTIME_COARSE, &ts); 138 | v.emplace_back(ts.tv_sec * 1e9 + ts.tv_nsec); 139 | }); 140 | 141 | 142 | // Redirection of each test result to the "console" printer 143 | s.set_printer(); 144 | 145 | // Run all benchmarks 146 | s.run(); 147 | 148 | (void)fuck_my_cache(4 << 20); 149 | sleep(3); 150 | 151 | s.run(1); 152 | 153 | return 0; 154 | } 155 | -------------------------------------------------------------------------------- /geiger/benchmark.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef USE_PAPI 4 | #include "papi.h" 5 | #endif 6 | 7 | #include "chrono.h" 8 | #include "printer.h" 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | namespace geiger 21 | { 22 | 23 | struct test_report 24 | { 25 | test_report(long iterations, int64_t cycles, std::vector&& papi_counters = {}) 26 | : m_iterations(iterations), m_cycles(cycles), m_papi_counters(papi_counters) 27 | { 28 | } 29 | 30 | long iteration_count() const 31 | { 32 | return m_iterations; 33 | } 34 | 35 | int64_t total_cycles() const 36 | { 37 | return m_cycles; 38 | } 39 | 40 | double cycles_per_task() const 41 | { 42 | return m_cycles / (double)m_iterations; 43 | } 44 | 45 | std::chrono::nanoseconds time_per_task() const 46 | { 47 | return geiger::chrono::from_cycles(cycles_per_task()); 48 | } 49 | 50 | const std::vector& papi_counters() const 51 | { 52 | return m_papi_counters; 53 | } 54 | 55 | private: 56 | long m_iterations; 57 | int64_t m_cycles; 58 | std::vector m_papi_counters; 59 | }; 60 | 61 | struct test_base 62 | { 63 | test_base(const std::string& name) : m_name(name) 64 | { 65 | } 66 | virtual ~test_base() 67 | { 68 | } 69 | 70 | virtual test_report run(std::chrono::milliseconds duration) const = 0; 71 | virtual test_report run(long iterations, boost::optional duration = boost::none) const = 0; 72 | 73 | const std::string& name() const 74 | { 75 | return m_name; 76 | } 77 | 78 | private: 79 | std::string m_name; 80 | }; 81 | 82 | template 83 | struct test : public test_base 84 | { 85 | test(const std::string& name, _CallableT&& callable = _CallableT()) : test_base(name), m_callable(callable) 86 | { 87 | } 88 | 89 | test_report run(std::chrono::milliseconds duration) const override; 90 | test_report run(long iterations, boost::optional duration = boost::none) const override; 91 | 92 | private: 93 | _CallableT m_callable; 94 | }; 95 | 96 | struct suite_report 97 | { 98 | std::vector> tests; 99 | }; 100 | 101 | struct suite_base 102 | { 103 | virtual ~suite_base() 104 | { 105 | } 106 | 107 | virtual suite_base& run(std::chrono::milliseconds duration) = 0; 108 | virtual suite_base& run(long iterations) =0; 109 | 110 | virtual std::vector> test_names() const = 0; 111 | 112 | #ifdef USE_PAPI 113 | virtual std::vector papi_events() const = 0; 114 | #endif 115 | }; 116 | 117 | template 118 | struct suite : public suite_base 119 | { 120 | suite& run(std::chrono::milliseconds duration = std::chrono::seconds(1)) override; 121 | suite& run(long iterations) override; 122 | 123 | std::vector> test_names() const override; 124 | 125 | #ifdef USE_PAPI 126 | std::vector papi_events() const override; 127 | #endif 128 | 129 | typedef std::function test_complete_t; 130 | typedef std::function suite_complete_t; 131 | 132 | template 133 | suite& add(const std::string& name, _CallableT&& callable) 134 | { 135 | m_tests.emplace_back(new test<_CallableT, _PAPIWrappersT...>(name, std::move(callable))); 136 | return *this; 137 | } 138 | 139 | template 140 | suite& set_printer(_PrinterT&& printer = _PrinterT()) 141 | { 142 | m_printer.reset(new _PrinterT(std::move(printer))); 143 | return *this; 144 | } 145 | 146 | suite& on_test_complete(test_complete_t f) 147 | { 148 | m_on_test_complete = f; 149 | return *this; 150 | } 151 | 152 | suite& on_complete(suite_complete_t f) 153 | { 154 | m_on_complete = f; 155 | return *this; 156 | } 157 | 158 | private: 159 | template 160 | suite& run_impl(_DurationT duration); 161 | 162 | std::vector> m_tests; 163 | std::unique_ptr m_printer; 164 | 165 | test_complete_t m_on_test_complete; 166 | suite_complete_t m_on_complete; 167 | }; 168 | } 169 | 170 | #include "benchmark.tcc" 171 | -------------------------------------------------------------------------------- /geiger/benchmark.tcc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | namespace geiger 9 | { 10 | 11 | template 12 | std::vector> suite<_PAPIWrappersT...>::test_names() const 13 | { 14 | std::vector> v; 15 | 16 | for (const auto& test : m_tests) 17 | v.push_back(test->name()); 18 | 19 | return v; 20 | } 21 | 22 | #ifdef USE_PAPI 23 | 24 | template 25 | std::vector suite<_PAPIWrappersT...>::papi_events() const 26 | { 27 | std::vector v; 28 | std::tuple<_PAPIWrappersT...> papi_wrappers; 29 | 30 | boost::fusion::for_each(papi_wrappers, [&v](auto& papi) 31 | { 32 | for (int event : papi.get_event_types()) 33 | v.push_back(event); 34 | }); 35 | 36 | return v; 37 | } 38 | 39 | #endif 40 | 41 | template 42 | suite<_PAPIWrappersT...>& suite<_PAPIWrappersT...>::run(std::chrono::milliseconds duration) 43 | { 44 | return run_impl(duration); 45 | } 46 | 47 | template 48 | suite<_PAPIWrappersT...>& suite<_PAPIWrappersT...>::run(long iterations) 49 | { 50 | return run_impl(iterations); 51 | } 52 | 53 | template 54 | template 55 | suite<_PAPIWrappersT...>& suite<_PAPIWrappersT...>::run_impl(_DurationT duration) 56 | { 57 | suite_report r; 58 | 59 | if (m_printer) 60 | m_printer->on_start(*this); 61 | 62 | for (const auto& p : m_tests) 63 | { 64 | test_report test_report = p->run(duration); 65 | r.tests.emplace_back(p->name(), test_report); 66 | 67 | if (m_on_test_complete) 68 | m_on_test_complete(p->name(), test_report); 69 | 70 | if (m_printer) 71 | m_printer->on_test_complete(p->name(), test_report); 72 | } 73 | 74 | if (m_on_complete) 75 | m_on_complete(r); 76 | 77 | return *this; 78 | } 79 | 80 | template 81 | test_report test<_CallableT, _PAPIWrappersT...>::run(std::chrono::milliseconds duration) const 82 | { 83 | test_report r = run(1); 84 | 85 | assert((r.iteration_count() == 1 && sizeof...(_PAPIWrappersT) == 0) 86 | || r.iteration_count() == sizeof...(_PAPIWrappersT)); 87 | 88 | auto time_elapsed = r.time_per_task(); 89 | if (time_elapsed > duration) 90 | return r; 91 | 92 | long iterations = duration / time_elapsed; 93 | return run(iterations, std::chrono::nanoseconds(duration)); 94 | } 95 | 96 | template 97 | test_report test<_CallableT, _PAPIWrappersT...>::run(long iterations, 98 | boost::optional duration) const 99 | { 100 | long batches; 101 | 102 | if (!duration) 103 | { 104 | batches = 1; 105 | } 106 | else 107 | { 108 | // TODO 109 | if (iterations > 1e6) 110 | batches = 1e3; 111 | else if (iterations > 1e4) 112 | batches = 100; 113 | else if (iterations > 1e3) 114 | batches = 10; 115 | else 116 | batches = 1; 117 | } 118 | 119 | iterations /= batches; 120 | chrono c; 121 | 122 | auto run_benchmark = [&]() -> auto 123 | { 124 | c.start(); 125 | 126 | for (long i = 0; i < iterations; ++i) 127 | m_callable(); 128 | 129 | return c.elapsed(); 130 | }; 131 | 132 | auto next_iterations_count = [&](int64_t cycles_last_batch) 133 | { 134 | if (duration) 135 | { 136 | int64_t expected_cycles = chrono::to_cycles(duration.get() / batches); 137 | double calibration = cycles_last_batch / (double)expected_cycles; 138 | 139 | iterations = std::lround(iterations / calibration); 140 | } 141 | }; 142 | 143 | constexpr long papi_wrapppers_count = sizeof...(_PAPIWrappersT); 144 | 145 | int64_t total_cycles = 0; 146 | long total_iterations = 0; 147 | 148 | #ifndef USE_PAPI 149 | static_assert(papi_wrapppers_count == 0, "PAPI not supported"); 150 | #endif 151 | 152 | #ifdef USE_PAPI 153 | if (papi_wrapppers_count == 0) 154 | { 155 | #endif 156 | for (int i = 0; i < batches; ++i) 157 | { 158 | int64_t cycles = run_benchmark(); 159 | 160 | total_iterations += iterations; 161 | total_cycles += cycles; 162 | next_iterations_count(cycles); 163 | } 164 | 165 | return {total_iterations, total_cycles}; 166 | #ifdef USE_PAPI 167 | } 168 | 169 | std::tuple<_PAPIWrappersT...> papi_wrappers; 170 | int hwd_counters = boost::fusion::accumulate(papi_wrappers, 0, [&](int count, auto& papi) { return count + papi.events_count; }); 171 | 172 | std::vector counters; 173 | counters.reserve(hwd_counters); 174 | 175 | boost::fusion::for_each(papi_wrappers, [&](auto& papi) 176 | { 177 | std::vector curr_counters(papi.events_count); 178 | 179 | for (int i = 0; i < batches; ++i) 180 | { 181 | papi.start(); 182 | int64_t cycles = run_benchmark(); 183 | papi.stop(); 184 | 185 | total_iterations += iterations; 186 | total_cycles += cycles; 187 | next_iterations_count(cycles); 188 | 189 | const auto& batch_counters = papi.get_counters(); 190 | 191 | for (int j = 0; j < (int)curr_counters.size(); ++j) 192 | curr_counters[j] += batch_counters[j]; 193 | } 194 | 195 | std::copy(curr_counters.begin(), 196 | curr_counters.end(), 197 | std::back_inserter(counters)); 198 | }); 199 | 200 | return {total_iterations, total_cycles, std::move(counters)}; 201 | 202 | #endif 203 | } 204 | 205 | } 206 | --------------------------------------------------------------------------------