├── .gitignore ├── CMakeLists.txt ├── LICENSE_1_0_0.txt ├── README.md ├── docs └── ORIGINAL_README.txt ├── include ├── benchmark_algorithms.h ├── benchmark_results.h ├── benchmark_shared_tests.h ├── benchmark_stdint.hpp └── benchmark_timer.h └── src ├── functionobjects.cpp ├── loop_unroll.cpp ├── machine.cpp ├── simple_types_constant_folding.cpp ├── simple_types_loop_invariant.cpp ├── stepanov_abstraction.cpp └── stepanov_vector.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | /build/* -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(cpp_benchmark) 3 | 4 | include_directories(include) 5 | 6 | add_executable(machine src/machine.cpp) 7 | add_executable(stepanov_abstraction src/stepanov_abstraction.cpp) 8 | add_executable(stepanov_vector src/stepanov_vector.cpp) 9 | add_executable(loop_unroll src/loop_unroll.cpp) 10 | add_executable(simple_types_loop_invariant src/simple_types_loop_invariant.cpp) 11 | add_executable(functionobjects src/functionobjects.cpp) 12 | add_executable(simple_types_constant_folding src/simple_types_constant_folding.cpp) 13 | 14 | enable_testing() 15 | add_test(NAME report_machine COMMAND machine) 16 | add_test(NAME report_stepanov_abstraction COMMAND stepanov_abstraction) 17 | add_test(NAME report_stepanov_vector COMMAND stepanov_vector) 18 | add_test(NAME report_loop_unroll COMMAND loop_unroll) 19 | add_test(NAME report_simple_types_loop_invariant COMMAND simple_types_loop_invariant) 20 | add_test(NAME report_functionobjects COMMAND functionobjects) 21 | add_test(NAME report_simple_types_constant_folding COMMAND simple_types_constant_folding) -------------------------------------------------------------------------------- /LICENSE_1_0_0.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2007-2008 Adobe Systems Incorporated 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Adobe's C++ Performance Benchmarks for modern compilers (and build systems) 2 | 3 | Imported and cleaned up from http://stlab.adobe.com/performance/. 4 | 5 | ### Overview 6 | 7 | This is a updated version of Adobe STL Labs' C++ Performance Benchmark. 8 | The code has mostly been left untouched, the main goal of this version is to provide a simple CMake 9 | build so that it can be run on any modern toolchain. 10 | 11 | The original readme that includes the creator's intent can still be found under `docs/`. 12 | 13 | ### Build & run 14 | 15 | Simply build it like any CMake project and run all tests through `ctest -V`. 16 | 17 | ### Credits 18 | 19 | Original work by Chris Cox and Adobe STL Labs: http://stlab.adobe.com/performance/credits.html. 20 | 21 | ### License 22 | 23 | This project is licensed under the MIT License. -------------------------------------------------------------------------------- /docs/ORIGINAL_README.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html ) 5 | */ 6 | 7 | /******************************************************************************/ 8 | 9 | Goals: 10 | 11 | To help compiler vendors identify places where they may be able to improve 12 | the performance of the code they generate. 13 | 14 | To help developers understand the performance impact of using different 15 | data types, operations, and C++ langugage features with their 16 | target compilers and OSes. 17 | 18 | /******************************************************************************/ 19 | 20 | Secondary goals: 21 | 22 | To take performance problems found in real world code and turn them 23 | into benchmarks for compiler vendors and other developers to learn from. 24 | 25 | Keep the benchmark portable to as many compilers and OSes as possible 26 | This means keeping things simple and external dependencies minimal 27 | 28 | Not to use specialized optimization flags per test 29 | No pragmas or other compiler directives are allowed in the source. 30 | All source files should use the same compilation flags. 31 | Use the common optimization flags (-O, -O1, -O2, -O3, or -Os). 32 | If another option improves optimization, then why isn't it on for -O3? 33 | If an optimization flag doesn't always improve performance, that is 34 | most likely a bug in the optimization code that needs to be fixed. 35 | In the real world, developers can't test all permutations of all 36 | optimization flags. They expect the standard flags to work. 37 | 38 | /******************************************************************************/ 39 | 40 | **** A note to compiler vendors: 41 | Please match the idioms, not the instances. 42 | The benchmark code will be changing over time. 43 | And we do read your assembly output. 44 | 45 | /******************************************************************************/ 46 | 47 | Building: 48 | 49 | Unix users should be able to use "make all" to build and "make report" 50 | to generate the report. If you wish to use a different compiler, you can 51 | set that from the make command line, or edit the makefile. 52 | 53 | Windows users will need to make sure that the VC environment variables 54 | are set for their shell (command prompt), then use "nmake -f makefile.nt all" 55 | and "nmake -f makefile.nt report" from within that shell. 56 | 57 | -------------------------------------------------------------------------------- /include/benchmark_algorithms.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html) 5 | 6 | Shared source file for algorithms used in multiple benchmark files 7 | */ 8 | 9 | namespace benchmark { 10 | 11 | /******************************************************************************/ 12 | 13 | template 14 | bool is_sorted(Iterator first, Iterator last) { 15 | Iterator prev = first; 16 | first++; 17 | while (first != last) { 18 | if ( *first++ < *prev++) 19 | return false; 20 | } 21 | return true; 22 | } 23 | 24 | /******************************************************************************/ 25 | 26 | template 27 | void fill(Iterator first, Iterator last, T value) { 28 | while (first != last) *first++ = value; 29 | } 30 | 31 | /******************************************************************************/ 32 | 33 | template 34 | void fill_random(Iterator first, Iterator last) { 35 | while (first != last) { 36 | *first++ = static_cast( rand() ); 37 | } 38 | } 39 | 40 | /******************************************************************************/ 41 | 42 | template 43 | void fill_descending(Iterator first, Iterator last, unsigned count) { 44 | while (first != last) { 45 | *first++ = static_cast( --count ); 46 | } 47 | } 48 | 49 | /******************************************************************************/ 50 | 51 | template 52 | void copy(Iterator1 firstSource, Iterator1 lastSource, Iterator2 firstDest) { 53 | while (firstSource != lastSource) *(firstDest++) = *(firstSource++); 54 | } 55 | 56 | /******************************************************************************/ 57 | 58 | template 59 | void reverse(Iterator begin, Iterator end, Swapper doswap) 60 | { 61 | while (begin != end) 62 | { 63 | --end; 64 | if (begin == end) 65 | break; 66 | doswap(begin, end); 67 | ++begin; 68 | } 69 | } 70 | 71 | /******************************************************************************/ 72 | 73 | // our accumulator function template, using iterators or pointers 74 | template 75 | Number accumulate(Iterator first, Iterator last, Number result) { 76 | while (first != last) result = result + *first++; 77 | return result; 78 | } 79 | 80 | /******************************************************************************/ 81 | 82 | template 83 | void insertionSort( Iterator begin, Iterator end ) 84 | { 85 | Iterator p = begin; 86 | p++; 87 | 88 | while ( p != end ) { 89 | T tmp = *p; 90 | Iterator j = p; 91 | Iterator prev = j; 92 | 93 | for ( ; j != begin && tmp < *--prev; --j ) { 94 | *j = *prev; 95 | } 96 | 97 | *j = tmp; 98 | p++; 99 | } 100 | } 101 | 102 | /******************************************************************************/ 103 | 104 | template 105 | void quicksort(Iterator begin, Iterator end) 106 | { 107 | if ( (end - begin) > 1 ) { 108 | 109 | T middleValue = *begin; 110 | Iterator left = begin; 111 | Iterator right = end; 112 | 113 | for(;;) { 114 | 115 | while ( middleValue < *(--right) ); 116 | if ( !(left < right ) ) break; 117 | 118 | while ( *(left) < middleValue ) 119 | ++left; 120 | if ( !(left < right ) ) break; 121 | 122 | // swap 123 | T temp = *right; 124 | *right = *left; 125 | *left = temp; 126 | } 127 | 128 | quicksort( begin, right + 1 ); 129 | quicksort( right + 1, end ); 130 | } 131 | } 132 | 133 | /******************************************************************************/ 134 | 135 | template 136 | void quicksort(Iterator begin, Iterator end, Swapper doswap) 137 | { 138 | if ( (end - begin) > 1 ) { 139 | 140 | T middleValue = *begin; 141 | Iterator left = begin; 142 | Iterator right = end; 143 | 144 | for(;;) { 145 | 146 | while ( middleValue < *(--right) ); 147 | if ( !(left < right ) ) break; 148 | 149 | while ( *(left) < middleValue ) 150 | ++left; 151 | if ( !(left < right ) ) break; 152 | 153 | // swap 154 | doswap( right, left ); 155 | } 156 | 157 | quicksort( begin, right + 1, doswap ); 158 | quicksort( right + 1, end, doswap ); 159 | } 160 | } 161 | 162 | /******************************************************************************/ 163 | 164 | template 165 | void sift_in(ptrdiff_t count, Iterator begin, ptrdiff_t free_in, T next) 166 | { 167 | ptrdiff_t i; 168 | ptrdiff_t free = free_in; 169 | 170 | // sift up the free node 171 | for ( i = 2*(free+1); i < count; i += i) { 172 | if ( *(begin+(i-1)) < *(begin+i)) 173 | i++; 174 | *(begin + free) = *(begin+(i-1)); 175 | free = i-1; 176 | } 177 | 178 | // special case in sift up if the last inner node has only 1 child 179 | if (i == count) { 180 | *(begin + free) = *(begin+(i-1)); 181 | free = i-1; 182 | } 183 | 184 | // sift down the new item next 185 | i = (free-1)/2; 186 | while( (free > free_in) && *(begin+i) < next) { 187 | *(begin + free) = *(begin+i); 188 | free = i; 189 | i = (free-1)/2; 190 | } 191 | 192 | *(begin + free) = next; 193 | } 194 | 195 | template 196 | void heapsort(Iterator begin, Iterator end) 197 | { 198 | ptrdiff_t j; 199 | ptrdiff_t count = end - begin; 200 | 201 | // build the heap structure 202 | for( j = (count / 2) - 1; j >= 0; --j) { 203 | T next = *(begin+j); 204 | sift_in< Iterator, T>(count, begin, j, next); 205 | } 206 | 207 | // search next by next remaining extremal element 208 | for( j = count - 1; j >= 1; --j) { 209 | T next = *(begin+j); 210 | *(begin+j) = *(begin); 211 | sift_in< Iterator, T>(j, begin, 0, next); 212 | } 213 | } 214 | 215 | } // end namespace benchmark 216 | 217 | using namespace benchmark; 218 | 219 | /******************************************************************************/ 220 | /******************************************************************************/ 221 | /******************************************************************************/ 222 | -------------------------------------------------------------------------------- /include/benchmark_results.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html) 5 | 6 | Source file for shared result reporting used by most of the benchmarks 7 | */ 8 | 9 | /******************************************************************************/ 10 | 11 | /* 12 | Yes, this would be easier with a class or std::vector 13 | but it needs to work for both C and C++ code 14 | */ 15 | 16 | /* declarations */ 17 | 18 | typedef struct one_result { 19 | double time; 20 | const char *label; 21 | } one_result; 22 | 23 | extern one_result *results; 24 | 25 | void record_result( double time, const char *label ); 26 | 27 | 28 | /******************************************************************************/ 29 | 30 | /* implementation */ 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | one_result *results = NULL; 39 | int current_test = 0; 40 | int allocated_results = 0; 41 | 42 | void record_result( double time, const char *label ) { 43 | 44 | if ( results == NULL || current_test >= allocated_results) { 45 | allocated_results += 10; 46 | results = (one_result *) realloc( results, allocated_results*sizeof(one_result) ); 47 | if (results == NULL) { 48 | printf("Could not allocate %d results\n", allocated_results); 49 | exit(-1); 50 | } 51 | } 52 | 53 | results[current_test].time = time; 54 | results[current_test].label = label; 55 | current_test++; 56 | } 57 | 58 | /******************************************************************************/ 59 | 60 | const int kShowGMeans = 1; 61 | const int kDontShowGMeans = 0; 62 | 63 | const int kShowPenalty = 1; 64 | const int kDontShowPenalty = 0; 65 | 66 | /******************************************************************************/ 67 | 68 | /* 69 | I need to be able to parse the label and absolute time from each entry, correctly 70 | BUT this also needs to be human readable for people testing/debugging the code 71 | (otherwise I'd use XML and make it really easy (if somewhat slow) to parse) 72 | (No, XML does not qualify as human readable) 73 | 74 | parse as: 75 | %i ([ ]*)\"%s\" %f sec %f M %f\r 76 | 77 | */ 78 | void summarize(const char *name, int size, int iterations, int show_gmeans, int show_penalty ) { 79 | int i; 80 | double millions = ((double)(size) * iterations)/1000000.0; 81 | double total_absolute_times = 0.0; 82 | double gmean_ratio = 0.0; 83 | 84 | 85 | /* find longest label so we can adjust formatting 86 | 12 = strlen("description")+1 */ 87 | int longest_label_len = 12; 88 | for (i = 0; i < current_test; ++i) { 89 | int len = (int)strlen(results[i].label); 90 | if (len > longest_label_len) 91 | longest_label_len = len; 92 | } 93 | 94 | printf("\ntest %*s description absolute operations ratio with\n", longest_label_len-12, " "); 95 | printf("number %*s time per second test0\n\n", longest_label_len, " "); 96 | 97 | for (i = 0; i < current_test; ++i) 98 | printf("%2i %*s\"%s\" %5.2f sec %5.2f M %.2f\n", 99 | i, 100 | (int)(longest_label_len - strlen(results[i].label)), 101 | "", 102 | results[i].label, 103 | results[i].time, 104 | millions/results[i].time, 105 | results[i].time/results[0].time); 106 | 107 | // calculate total time 108 | for (i = 0; i < current_test; ++i) { 109 | total_absolute_times += results[i].time; 110 | } 111 | 112 | // report total time 113 | printf("\nTotal absolute time for %s: %.2f sec\n", name, total_absolute_times); 114 | 115 | if ( current_test > 1 && show_penalty ) { 116 | 117 | // calculate gmean of tests compared to baseline 118 | for (i = 1; i < current_test; ++i) { 119 | gmean_ratio += log(results[i].time/results[0].time); 120 | } 121 | 122 | // report gmean of tests as the penalty 123 | printf("\n%s Penalty: %.2f\n\n", name, exp(gmean_ratio/(current_test-1))); 124 | } 125 | 126 | // reset the test counter so we can run more tests 127 | current_test = 0; 128 | } 129 | 130 | /******************************************************************************/ 131 | 132 | void summarize_simplef( FILE *output, const char *name ) { 133 | int i; 134 | double total_absolute_times = 0.0; 135 | 136 | /* find longest label so we can adjust formatting 137 | 12 = strlen("description")+1 */ 138 | int longest_label_len = 12; 139 | for (i = 0; i < current_test; ++i) { 140 | int len = (int)strlen(results[i].label); 141 | if (len > longest_label_len) 142 | longest_label_len = len; 143 | } 144 | 145 | fprintf(output,"\ntest %*s description absolute\n", longest_label_len-12, " "); 146 | fprintf(output,"number %*s time\n\n", longest_label_len, " "); 147 | 148 | for (i = 0; i < current_test; ++i) 149 | fprintf(output,"%2i %*s\"%s\" %5.2f sec\n", 150 | i, 151 | (int)(longest_label_len - strlen(results[i].label)), 152 | "", 153 | results[i].label, 154 | results[i].time); 155 | 156 | // calculate total time 157 | for (i = 0; i < current_test; ++i) { 158 | total_absolute_times += results[i].time; 159 | } 160 | 161 | // report total time 162 | fprintf(output,"\nTotal absolute time for %s: %.2f sec\n", name, total_absolute_times); 163 | 164 | // reset the test counter so we can run more tests 165 | current_test = 0; 166 | } 167 | 168 | /******************************************************************************/ 169 | -------------------------------------------------------------------------------- /include/benchmark_shared_tests.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html ) 5 | 6 | 7 | Source file for tests shared among several benchmarks 8 | */ 9 | 10 | /******************************************************************************/ 11 | 12 | template 13 | inline bool tolerance_equal(T &a, T &b) { 14 | T diff = a - b; 15 | return (abs(diff) < 1.0e-6); 16 | } 17 | 18 | 19 | template<> 20 | inline bool tolerance_equal(int32_t &a, int32_t &b) { 21 | return (a == b); 22 | } 23 | template<> 24 | inline bool tolerance_equal(uint32_t &a, uint32_t &b) { 25 | return (a == b); 26 | } 27 | template<> 28 | inline bool tolerance_equal(uint64_t &a, uint64_t &b) { 29 | return (a == b); 30 | } 31 | template<> 32 | inline bool tolerance_equal(int64_t &a, int64_t &b) { 33 | return (a == b); 34 | } 35 | 36 | template<> 37 | inline bool tolerance_equal(double &a, double &b) { 38 | double diff = a - b; 39 | double reldiff = diff; 40 | if (fabs(a) > 1.0e-8) 41 | reldiff = diff / a; 42 | return (fabs(reldiff) < 1.0e-6); 43 | } 44 | 45 | template<> 46 | inline bool tolerance_equal(float &a, float &b) { 47 | float diff = a - b; 48 | double reldiff = diff; 49 | if (fabs(a) > 1.0e-4) 50 | reldiff = diff / a; 51 | return (fabs(reldiff) < 1.0e-3); // single precision divide test is really imprecise 52 | } 53 | 54 | /******************************************************************************/ 55 | 56 | template 57 | inline void check_shifted_sum(T result) { 58 | T temp = (T)SIZE * Shifter::do_shift((T)init_value); 59 | if (!tolerance_equal(result,temp)) 60 | printf("test %i failed\n", current_test); 61 | } 62 | 63 | template 64 | inline void check_shifted_sum_CSE(T result) { 65 | T temp = (T)0.0; 66 | if (!tolerance_equal(result,temp)) 67 | printf("test %i failed\n", current_test); 68 | } 69 | 70 | template 71 | inline void check_shifted_variable_sum(T result, T var) { 72 | T temp = (T)SIZE * Shifter::do_shift((T)init_value, var); 73 | if (!tolerance_equal(result,temp)) 74 | printf("test %i failed\n", current_test); 75 | } 76 | 77 | template 78 | inline void check_shifted_variable_sum(T result, T var1, T var2, T var3, T var4) { 79 | T temp = (T)SIZE * Shifter::do_shift((T)init_value, var1, var2, var3, var4); 80 | if (!tolerance_equal(result,temp)) 81 | printf("test %i failed\n", current_test); 82 | } 83 | 84 | template 85 | inline void check_shifted_variable_sum_CSE(T result, T var) { 86 | T temp = (T)0.0; 87 | if (!tolerance_equal(result,temp)) 88 | printf("test %i failed\n", current_test); 89 | } 90 | 91 | template 92 | inline void check_shifted_variable_sum_CSE(T result, T var1, T var2, T var3, T var4) { 93 | T temp = (T)0.0; 94 | if (!tolerance_equal(result,temp)) 95 | printf("test %i failed\n", current_test); 96 | } 97 | 98 | 99 | /******************************************************************************/ 100 | 101 | template 102 | void fill(Iterator first, Iterator last, T value) { 103 | while (first != last) *first++ = value; 104 | } 105 | 106 | /******************************************************************************/ 107 | 108 | template 109 | struct custom_constant_add { 110 | static T do_shift(T input) { return (input + T(10)); } 111 | }; 112 | 113 | /******************************************************************************/ 114 | 115 | template 116 | struct custom_multiple_constant_add { 117 | static T do_shift(T input) { return (input + T(1) + T(2) + T(3) + T(4)); } 118 | }; 119 | 120 | /******************************************************************************/ 121 | 122 | template 123 | struct custom_constant_sub { 124 | static T do_shift(T input) { return (input - T(10)); } 125 | }; 126 | 127 | /******************************************************************************/ 128 | 129 | template 130 | struct custom_multiple_constant_sub { 131 | static T do_shift(T input) { return (input - T(1) - T(2) - T(3) - T(4)); } 132 | }; 133 | 134 | /******************************************************************************/ 135 | 136 | template 137 | struct custom_constant_multiply { 138 | static T do_shift(T input) { return (input * T(120)); } 139 | }; 140 | 141 | /******************************************************************************/ 142 | 143 | // this should result in a single multiply 144 | template 145 | struct custom_multiple_constant_multiply { 146 | static T do_shift(T input) { return (input * T(2) * T(3) * T(4) * T(5)); } 147 | }; 148 | 149 | /******************************************************************************/ 150 | 151 | // this should result in a single add 152 | template 153 | struct custom_multiple_constant_multiply2 { 154 | static T do_shift(T input) { return (input + T(2) * T(3) * T(4) * T(5)); } 155 | }; 156 | 157 | /******************************************************************************/ 158 | 159 | template 160 | struct custom_constant_divide { 161 | static T do_shift(T input) { return (input / T(5)); } 162 | }; 163 | 164 | /******************************************************************************/ 165 | 166 | template 167 | struct custom_multiple_constant_divide { 168 | static T do_shift(T input) { return ((((input / T(2) ) / T(3) ) / T(4)) / T(5)); } 169 | }; 170 | 171 | /******************************************************************************/ 172 | 173 | // this more likely to have constants fused than the version above 174 | template 175 | struct custom_multiple_constant_divide2 { 176 | static T do_shift(T input) { return (input + (((T(120) / T(3) ) / T(4)) / T(5))); } 177 | }; 178 | 179 | /******************************************************************************/ 180 | 181 | template 182 | struct custom_multiple_constant_mixed { 183 | static T do_shift(T input) { return (input + T(2) - T(3) * T(4) / T(5)); } 184 | }; 185 | 186 | /******************************************************************************/ 187 | 188 | template 189 | struct custom_constant_and { 190 | static T do_shift(T input) { return (input & T(10)); } 191 | }; 192 | 193 | /******************************************************************************/ 194 | 195 | template 196 | struct custom_multiple_constant_and { 197 | static T do_shift(T input) { return (input & T(15) & T(30) & T(31) & T(63)); } 198 | }; 199 | 200 | /******************************************************************************/ 201 | 202 | template 203 | struct custom_constant_or { 204 | static T do_shift(T input) { return (input | T(10)); } 205 | }; 206 | 207 | /******************************************************************************/ 208 | 209 | template 210 | struct custom_multiple_constant_or { 211 | static T do_shift(T input) { return (input | T(15) | T(30) | T(31) | T(63)); } 212 | }; 213 | 214 | /******************************************************************************/ 215 | 216 | template 217 | struct custom_constant_xor { 218 | static T do_shift(T input) { return (input ^ T(10)); } 219 | }; 220 | 221 | /******************************************************************************/ 222 | 223 | template 224 | struct custom_multiple_constant_xor { 225 | static T do_shift(T input) { return (input ^ T(15) ^ T(30) ^ T(31) ^ T(63)); } 226 | }; 227 | 228 | /******************************************************************************/ 229 | 230 | template 231 | struct custom_two { 232 | static T do_shift(T input) { return (T(2)); } 233 | }; 234 | 235 | /******************************************************************************/ 236 | 237 | template 238 | struct custom_add_constants { 239 | static T do_shift(T input) { return (T(1) + T(2)); } 240 | }; 241 | 242 | /******************************************************************************/ 243 | 244 | template 245 | struct custom_sub_constants { 246 | static T do_shift(T input) { return (T(2) - T(1)); } 247 | }; 248 | 249 | /******************************************************************************/ 250 | 251 | template 252 | struct custom_multiply_constants { 253 | static T do_shift(T input) { return (T(2) * T(3)); } 254 | }; 255 | 256 | /******************************************************************************/ 257 | 258 | template 259 | struct custom_divide_constants { 260 | static T do_shift(T input) { return (T(20) / T(10)); } 261 | }; 262 | 263 | /******************************************************************************/ 264 | 265 | template 266 | struct custom_mod_constants { 267 | static T do_shift(T input) { return (T(23) % T(10)); } 268 | }; 269 | 270 | /******************************************************************************/ 271 | 272 | template 273 | struct custom_and_constants { 274 | static T do_shift(T input) { return (T(23) & T(10)); } 275 | }; 276 | 277 | /******************************************************************************/ 278 | 279 | template 280 | struct custom_or_constants { 281 | static T do_shift(T input) { return (T(23) | T(10)); } 282 | }; 283 | 284 | /******************************************************************************/ 285 | 286 | template 287 | struct custom_xor_constants { 288 | static T do_shift(T input) { return (T(23) ^ T(10)); } 289 | }; 290 | 291 | /******************************************************************************/ 292 | 293 | template 294 | struct custom_equal_constants { 295 | static T do_shift(T input) { return (T(23) == T(10)); } 296 | }; 297 | 298 | /******************************************************************************/ 299 | 300 | template 301 | struct custom_notequal_constants { 302 | static T do_shift(T input) { return (T(23) != T(10)); } 303 | }; 304 | 305 | /******************************************************************************/ 306 | 307 | template 308 | struct custom_greaterthan_constants { 309 | static T do_shift(T input) { return (T(23) > T(10)); } 310 | }; 311 | 312 | /******************************************************************************/ 313 | 314 | template 315 | struct custom_lessthan_constants { 316 | static T do_shift(T input) { return (T(23) < T(10)); } 317 | }; 318 | 319 | /******************************************************************************/ 320 | 321 | template 322 | struct custom_greaterthanequal_constants { 323 | static T do_shift(T input) { return (T(23) >= T(10)); } 324 | }; 325 | 326 | /******************************************************************************/ 327 | 328 | template 329 | struct custom_lessthanequal_constants { 330 | static T do_shift(T input) { return (T(23) <= T(10)); } 331 | }; 332 | 333 | /******************************************************************************/ 334 | 335 | template 336 | struct custom_add_variable { 337 | static T do_shift(T input, T v1) { return (input + v1); } 338 | }; 339 | 340 | /******************************************************************************/ 341 | 342 | template 343 | struct custom_sub_variable { 344 | static T do_shift(T input, T v1) { return (input - v1); } 345 | }; 346 | 347 | /******************************************************************************/ 348 | 349 | template 350 | struct custom_multiply_variable { 351 | static T do_shift(T input, T v1) { return (input * v1); } 352 | }; 353 | 354 | /******************************************************************************/ 355 | 356 | template 357 | struct custom_divide_variable { 358 | static T do_shift(T input, T v1) { return (input / v1); } 359 | }; 360 | 361 | /******************************************************************************/ 362 | 363 | template 364 | struct custom_add_multiple_variable { 365 | static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input + v1 + v2 + v3 + v4); } 366 | }; 367 | 368 | /******************************************************************************/ 369 | 370 | template 371 | struct custom_sub_multiple_variable { 372 | static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input - v1 - v2 - v3 - v4); } 373 | }; 374 | 375 | /******************************************************************************/ 376 | 377 | template 378 | struct custom_multiply_multiple_variable { 379 | static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input * v1 * v2 * v3 * v4); } 380 | }; 381 | 382 | /******************************************************************************/ 383 | 384 | // something more likely to be moved out of loops, and a sanity check 385 | template 386 | struct custom_multiply_multiple_variable2 { 387 | static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input + v1 * v2 * v3 * v4); } 388 | }; 389 | 390 | /******************************************************************************/ 391 | 392 | // this can NOT have CSE and loop invariant motion applied in integer math 393 | // and can only be optimized in float if inexact math is allowed 394 | template 395 | struct custom_divide_multiple_variable { 396 | static T do_shift(T input, T v1, T v2, T v3, T v4) { return ((((input / v1 ) / v2 ) / v3) / v4); } 397 | }; 398 | 399 | /******************************************************************************/ 400 | 401 | // this can have CSE and loop invariant motion applied in integer math 402 | // this should be optimizeable without inexact math 403 | template 404 | struct custom_divide_multiple_variable2 { 405 | static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input + (((v1 / v2 ) / v3) / v4)); } 406 | }; 407 | 408 | /******************************************************************************/ 409 | 410 | template 411 | struct custom_mixed_multiple_variable { 412 | static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input + v1 - v2 * v3 / v4); } 413 | }; 414 | 415 | /******************************************************************************/ 416 | 417 | template 418 | struct custom_variable_and { 419 | static T do_shift(T input, T v1) { return (input & v1); } 420 | }; 421 | 422 | /******************************************************************************/ 423 | 424 | template 425 | struct custom_multiple_variable_and { 426 | static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input & v1 & v2 & v3 & v4); } 427 | }; 428 | 429 | /******************************************************************************/ 430 | 431 | template 432 | struct custom_variable_or { 433 | static T do_shift(T input, T v1) { return (input | v1); } 434 | }; 435 | 436 | /******************************************************************************/ 437 | 438 | template 439 | struct custom_multiple_variable_or { 440 | static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input | v1 | v2 | v3 | v4); } 441 | }; 442 | 443 | /******************************************************************************/ 444 | 445 | template 446 | struct custom_variable_xor { 447 | static T do_shift(T input, T v1) { return (input ^ v1); } 448 | }; 449 | 450 | /******************************************************************************/ 451 | 452 | template 453 | struct custom_multiple_variable_xor { 454 | static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input ^ v1 ^ v2 ^ v3 ^ v4); } 455 | }; 456 | 457 | 458 | /******************************************************************************/ 459 | 460 | template 461 | struct custom_identity { 462 | static T do_shift(T input) { return (input); } 463 | }; 464 | 465 | /******************************************************************************/ 466 | 467 | template 468 | struct custom_add_zero { 469 | static T do_shift(T input) { return (input + T(0)); } 470 | }; 471 | 472 | /******************************************************************************/ 473 | 474 | template 475 | struct custom_sub_zero { 476 | static T do_shift(T input) { return (input - T(0)); } 477 | }; 478 | 479 | /******************************************************************************/ 480 | 481 | template 482 | struct custom_negate { 483 | static T do_shift(T input) { return (-input); } 484 | }; 485 | 486 | /******************************************************************************/ 487 | 488 | template 489 | struct custom_negate_twice { 490 | static T do_shift(T input) { return (-(-input)); } 491 | }; 492 | 493 | /******************************************************************************/ 494 | 495 | template 496 | struct custom_zero_minus { 497 | static T do_shift(T input) { return (T(0) - input); } 498 | }; 499 | 500 | /******************************************************************************/ 501 | 502 | template 503 | struct custom_times_one { 504 | static T do_shift(T input) { return (input * T(1)); } 505 | }; 506 | 507 | /******************************************************************************/ 508 | 509 | template 510 | struct custom_divideby_one { 511 | static T do_shift(T input) { return (input / T(1)); } 512 | }; 513 | 514 | /******************************************************************************/ 515 | 516 | template 517 | struct custom_algebra_mixed { 518 | static T do_shift(T input) { return (-(T(0) - (((input + T(0)) - T(0)) / T(1)))) * T(1); } 519 | }; 520 | 521 | /******************************************************************************/ 522 | 523 | template 524 | struct custom_zero { 525 | static T do_shift(T input) { return T(0); } 526 | }; 527 | 528 | /******************************************************************************/ 529 | 530 | template 531 | struct custom_times_zero { 532 | static T do_shift(T input) { return (input * T(0)); } 533 | }; 534 | 535 | /******************************************************************************/ 536 | 537 | template 538 | struct custom_subtract_self { 539 | static T do_shift(T input) { return (input - input); } 540 | }; 541 | /******************************************************************************/ 542 | 543 | template 544 | struct custom_algebra_mixed_constant { 545 | static T do_shift(T input) { return (input - (-(T(0) - (((input + T(0)) / T(1)) - T(0)))) * T(1)); } 546 | }; 547 | 548 | /******************************************************************************/ 549 | 550 | template 551 | struct custom_cse1 { 552 | static T do_shift(T v1, T v2, T v3) { return (v1 * (v2 - v3) ); } 553 | }; 554 | 555 | /******************************************************************************/ 556 | 557 | template 558 | struct custom_and_self { 559 | static T do_shift(T input) { return (input & input); } 560 | }; 561 | 562 | /******************************************************************************/ 563 | 564 | template 565 | struct custom_or_self { 566 | static T do_shift(T input) { return (input | input); } 567 | }; 568 | 569 | /******************************************************************************/ 570 | 571 | template 572 | struct custom_xor_self { 573 | static T do_shift(T input) { return (input ^ input); } 574 | }; 575 | 576 | /******************************************************************************/ 577 | 578 | template 579 | struct custom_or_zero { 580 | static T do_shift(T input) { return (input | T(0)); } 581 | }; 582 | 583 | /******************************************************************************/ 584 | 585 | template 586 | struct custom_xor_zero { 587 | static T do_shift(T input) { return (input ^ T(0)); } 588 | }; 589 | 590 | /******************************************************************************/ 591 | 592 | template 593 | struct custom_andnot_zero { 594 | static T do_shift(T input) { return (input & ~ T(0)); } 595 | }; 596 | 597 | /******************************************************************************/ 598 | 599 | template 600 | struct custom_and_zero { 601 | static T do_shift(T input) { return (input & T(0)); } 602 | }; 603 | 604 | /******************************************************************************/ 605 | 606 | template 607 | struct custom_mod_one { 608 | static T do_shift(T input) { return (input % T(1)); } 609 | }; 610 | 611 | /******************************************************************************/ 612 | 613 | template 614 | struct custom_equal_self { 615 | static T do_shift(T input) { return (input == input); } 616 | }; 617 | 618 | /******************************************************************************/ 619 | 620 | template 621 | struct custom_notequal_self { 622 | static T do_shift(T input) { return (input != input); } 623 | }; 624 | 625 | /******************************************************************************/ 626 | 627 | template 628 | struct custom_greaterthan_self { 629 | static T do_shift(T input) { return (input > input); } 630 | }; 631 | 632 | /******************************************************************************/ 633 | 634 | template 635 | struct custom_lessthan_self { 636 | static T do_shift(T input) { return (input < input); } 637 | }; 638 | 639 | /******************************************************************************/ 640 | 641 | template 642 | struct custom_greaterthanequal_self { 643 | static T do_shift(T input) { return (input >= input); } 644 | }; 645 | 646 | /******************************************************************************/ 647 | 648 | template 649 | struct custom_lessthanequal_self { 650 | static T do_shift(T input) { return (input <= input); } 651 | }; 652 | 653 | /******************************************************************************/ 654 | 655 | template 656 | void test_constant(T* first, int count, const char *label) { 657 | int i; 658 | 659 | start_timer(); 660 | 661 | for(i = 0; i < iterations; ++i) { 662 | T result = 0; 663 | for (int n = 0; n < count; ++n) { 664 | result += Shifter::do_shift( first[n] ); 665 | } 666 | check_shifted_sum(result); 667 | } 668 | 669 | record_result( timer(), label ); 670 | } 671 | 672 | /******************************************************************************/ 673 | 674 | template 675 | void test_variable1(T* first, int count, T v1, const char *label) { 676 | int i; 677 | 678 | start_timer(); 679 | 680 | for(i = 0; i < iterations; ++i) { 681 | T result = 0; 682 | for (int n = 0; n < count; ++n) { 683 | result += Shifter::do_shift( first[n], v1 ); 684 | } 685 | check_shifted_variable_sum(result, v1); 686 | } 687 | 688 | record_result( timer(), label ); 689 | } 690 | 691 | /******************************************************************************/ 692 | 693 | template 694 | void test_variable4(T* first, int count, T v1, T v2, T v3, T v4, const char *label) { 695 | int i; 696 | 697 | start_timer(); 698 | 699 | for(i = 0; i < iterations; ++i) { 700 | T result = 0; 701 | for (int n = 0; n < count; ++n) { 702 | result += Shifter::do_shift( first[n], v1, v2, v3, v4 ); 703 | } 704 | check_shifted_variable_sum(result, v1, v2, v3, v4); 705 | } 706 | 707 | record_result( timer(), label ); 708 | } 709 | 710 | /******************************************************************************/ 711 | 712 | template 713 | void test_CSE_opt(T* first, int count, T v1, const char *label) { 714 | int i; 715 | 716 | start_timer(); 717 | 718 | for(i = 0; i < iterations; ++i) { 719 | T result = 0; 720 | T temp = Shifter::do_shift( v1, first[0], first[1] ); 721 | temp += temp; 722 | result += first[0] + temp; 723 | result -= first[1] + temp; 724 | for (int n = 1; n < count; ++n) { 725 | temp = Shifter::do_shift( v1, first[n-1], first[n] ); 726 | temp += temp; 727 | result += first[n-1] + temp; 728 | result -= first[n] + temp; 729 | } 730 | check_shifted_variable_sum_CSE(result, v1); 731 | } 732 | 733 | record_result( timer(), label ); 734 | } 735 | 736 | /******************************************************************************/ 737 | 738 | template 739 | void test_CSE(T* first, int count, T v1, const char *label) { 740 | int i; 741 | 742 | start_timer(); 743 | 744 | for(i = 0; i < iterations; ++i) { 745 | T result = 0; 746 | result += first[0] + Shifter::do_shift( v1, first[0], first[1] ) + Shifter::do_shift( v1, first[0], first[1] ); 747 | result -= first[1] + Shifter::do_shift( v1, first[0], first[1] ) + Shifter::do_shift( v1, first[0], first[1] ); 748 | for (int n = 1; n < count; ++n) { 749 | result += first[n-1] + Shifter::do_shift( v1, first[n-1], first[n] ) + Shifter::do_shift( v1, first[n-1], first[n] ); 750 | result -= first[n] + Shifter::do_shift( v1, first[n-1], first[n] ) + Shifter::do_shift( v1, first[n-1], first[n] ); 751 | } 752 | check_shifted_variable_sum_CSE(result, v1); 753 | } 754 | 755 | record_result( timer(), label ); 756 | } 757 | 758 | /******************************************************************************/ 759 | -------------------------------------------------------------------------------- /include/benchmark_stdint.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html ) 5 | 6 | 7 | Not all compilers support yet, and some don't even have 8 | So, we have to do our own type definitions. 9 | */ 10 | 11 | 12 | /******************************************************************************/ 13 | 14 | #if _MSC_VER 15 | 16 | // for platforms that don't include the C99 header stdint.h 17 | 18 | // if your platform does not include stdint.h, please edit the #if above 19 | // to include your platform (and remember that it has to work elsewhere as well) 20 | 21 | template 22 | struct if_; 23 | 24 | template 25 | struct if_ { typedef T type; }; 26 | 27 | template 28 | struct if_{ typedef E type; }; 29 | 30 | typedef signed char int8_t; 31 | typedef unsigned char uint8_t; 32 | 33 | typedef signed short int16_t; 34 | typedef unsigned short uint16_t; 35 | 36 | typedef if_::type int32_t; 37 | typedef if_::type uint32_t; 38 | 39 | #if _MSC_VER 40 | typedef __int64 int64_t; 41 | typedef unsigned __int64 uint64_t; 42 | #else 43 | typedef long long int64_t; 44 | typedef unsigned long long uint64_t; 45 | #endif 46 | 47 | #else 48 | 49 | // for platforms that do have stdint.h 50 | 51 | // if your platform fails to find this header, please edit the #if above 52 | // to include your platform (and remember that it has to work elsewhere as well) 53 | 54 | #include 55 | 56 | #endif 57 | 58 | /******************************************************************************/ 59 | -------------------------------------------------------------------------------- /include/benchmark_timer.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html) 5 | 6 | Shared source file for timing, used by all the benchmarks 7 | */ 8 | 9 | /******************************************************************************/ 10 | 11 | #include 12 | 13 | /******************************************************************************/ 14 | 15 | /* Yes, this would be easier with a class or vector 16 | but it needs to work for both C and C++ code 17 | */ 18 | 19 | /* simple timer functions */ 20 | clock_t start_time, end_time; 21 | 22 | void start_timer() { start_time = clock(); } 23 | 24 | double timer() { 25 | end_time = clock(); 26 | return (end_time - start_time)/ (double)(CLOCKS_PER_SEC); 27 | } 28 | 29 | /******************************************************************************/ 30 | -------------------------------------------------------------------------------- /src/functionobjects.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html ) 5 | 6 | 7 | This test file started life as 8 | ISO/IEC TR 18015:2006(E) Appendix D.4 9 | 10 | 11 | Goals: 12 | 13 | Compare the performance of function pointers, functors, inline functors, 14 | standard functors, and native comparison operators 15 | 16 | Also compare the performance of qsort(), quicksort template, and std::sort 17 | 18 | 19 | Assumptions: 20 | 21 | 1) inline functors, standard functors and inlined native 22 | comparisons will perform similarly 23 | 24 | 2) using functors is faster than using function pointers 25 | 26 | 3) inline functors are as fast or faster than out of line functors 27 | 28 | 4) a template is at least as fast as a hard coded function of 29 | the same algorithm, sometimes faster 30 | 31 | 5) std::sort is faster than qsort() 32 | 33 | 6) std::sort is faster than a naive quicksort template using the same functor 34 | 35 | 36 | Since qsort's comparison function must return int (less than 0, 0, greater than 0) 37 | and std::sort's must return a bool, it is not possible to test them with each 38 | other's comparator. 39 | 40 | */ 41 | 42 | 43 | /******************************************************************************/ 44 | 45 | #include 46 | #include 47 | #include 48 | #include "benchmark_results.h" 49 | #include "benchmark_timer.h" 50 | 51 | using namespace std; 52 | 53 | /******************************************************************************/ 54 | 55 | template 56 | void verify_sorted(Iterator first, Iterator last) { 57 | Iterator prev = first; 58 | first++; 59 | while (first != last) { 60 | if (*first++ < *prev++) { 61 | printf("test %i failed\n", current_test); 62 | break; 63 | } 64 | } 65 | } 66 | 67 | /******************************************************************************/ 68 | // --------- helper functions -------------------------------------------- 69 | 70 | // qsort passes void * arguments to its comparison function, 71 | // which must return negative, 0, or positive value 72 | int 73 | less_than_function1( const void * lhs, const void * rhs ) 74 | { 75 | if( *(const double *) lhs < *(const double *) rhs ) return -1; 76 | if( *(const double *) lhs > *(const double *) rhs ) return 1; 77 | return 0; 78 | } 79 | 80 | // std::sort, on the other hand, needs a comparator that returns true or false 81 | bool 82 | less_than_function2( const double lhs, const double rhs ) 83 | { 84 | return( lhs < rhs? true : false ); 85 | } 86 | 87 | // the comparison operator in the following functor is defined out of line 88 | struct less_than_functor 89 | { 90 | bool operator()( const double& lhs, const double& rhs ) const; 91 | }; 92 | 93 | bool 94 | less_than_functor::operator()( const double& lhs, const double& rhs ) const 95 | { 96 | return( lhs < rhs? true : false ); 97 | } 98 | 99 | // the comparison operator in the following functor is defined inline 100 | struct inline_less_than_functor 101 | { 102 | inline bool operator()( const double& lhs, const double& rhs ) const 103 | { 104 | return( lhs < rhs? true : false ); 105 | } 106 | }; 107 | 108 | /******************************************************************************/ 109 | 110 | // hard coded comparison function 111 | template 112 | void quicksort(Iterator begin, Iterator end) 113 | { 114 | // this only works for pointers and STL iterators 115 | typedef typename iterator_traits::value_type T; 116 | 117 | if ( (end - begin) > 1 ) { 118 | 119 | T middleValue = *begin; 120 | Iterator left = begin; 121 | Iterator right = end; 122 | 123 | for(;;) { 124 | 125 | while ( middleValue < *(--right) ); 126 | if ( !(left < right ) ) break; 127 | 128 | while ( *(left) < middleValue ) 129 | ++left; 130 | if ( !(left < right ) ) break; 131 | 132 | // swap 133 | T temp = *right; 134 | *right = *left; 135 | *left = temp; 136 | } 137 | 138 | quicksort( begin, right + 1 ); 139 | quicksort( right + 1, end ); 140 | } 141 | } 142 | 143 | /******************************************************************************/ 144 | 145 | // comparison function passed in as a functor 146 | template 147 | void quicksort(Iterator begin, Iterator end, Comparator compare) 148 | { 149 | // this only works for pointers and STL iterators 150 | typedef typename iterator_traits::value_type T; 151 | 152 | if ( (end - begin) > 1 ) { 153 | 154 | T middleValue = *begin; 155 | Iterator left = begin; 156 | Iterator right = end; 157 | 158 | for(;;) { 159 | 160 | while ( compare( middleValue, *(--right) ) ); 161 | if ( !(left < right ) ) break; 162 | while ( compare( *(left), middleValue ) ) 163 | ++left; 164 | if ( !(left < right ) ) break; 165 | 166 | // swap 167 | T temp = *right; 168 | *right = *left; 169 | *left = temp; 170 | } 171 | 172 | quicksort( begin, right + 1, compare ); 173 | quicksort( right + 1, end, compare ); 174 | } 175 | } 176 | 177 | /******************************************************************************/ 178 | 179 | typedef bool comparator_function( const double x, const double y ); 180 | 181 | // use a pointer to function as a template parameter 182 | // exact function is known at compile time, and can be inlined 183 | template 184 | void quicksort(Iterator begin, Iterator end) 185 | { 186 | // this only works for pointers and STL iterators 187 | typedef typename iterator_traits::value_type T; 188 | 189 | if ( (end - begin) > 1 ) { 190 | 191 | T middleValue = *begin; 192 | Iterator left = begin; 193 | Iterator right = end; 194 | 195 | for(;;) { 196 | 197 | while ( compare( middleValue, *(--right) ) ); 198 | if ( !(left < right ) ) break; 199 | while ( compare( *(left), middleValue ) ) 200 | ++left; 201 | if ( !(left < right ) ) break; 202 | 203 | // swap 204 | T temp = *right; 205 | *right = *left; 206 | *left = temp; 207 | } 208 | 209 | quicksort( begin, right + 1, compare ); 210 | quicksort( right + 1, end, compare ); 211 | } 212 | } 213 | 214 | /******************************************************************************/ 215 | 216 | // use a function pointer 217 | // most compilers will not inline the function argument 218 | void quicksort_function(double* begin, double* end, comparator_function compare) 219 | { 220 | if ( (end - begin) > 1 ) { 221 | 222 | double middleValue = *begin; 223 | double* left = begin; 224 | double* right = end; 225 | 226 | for(;;) { 227 | 228 | while ( compare( middleValue, *(--right) ) ); 229 | if ( !(left < right ) ) break; 230 | while ( compare( *(left), middleValue ) ) 231 | ++left; 232 | if ( !(left < right ) ) break; 233 | 234 | // swap 235 | double temp = *right; 236 | *right = *left; 237 | *left = temp; 238 | } 239 | 240 | quicksort( begin, right + 1, compare ); 241 | quicksort( right + 1, end, compare ); 242 | } 243 | } 244 | 245 | /******************************************************************************/ 246 | 247 | int main(int argc, char* argv[]) 248 | { 249 | int i; 250 | int iterations = (1 < argc) ? atoi(argv[1]) : 2000; // number of iterations 251 | int tablesize = (2 < argc) ? atoi(argv[2]) : 10000; // size of array 252 | 253 | 254 | // output command for documentation 255 | for (i = 0; i < argc; ++i) 256 | printf("%s ", argv[i] ); 257 | printf("\n"); 258 | 259 | 260 | // seed the random number generator, so we get repeatable results 261 | srand( tablesize + 123 ); 262 | 263 | 264 | // initialize the table to sort 265 | double * master_table = new double[tablesize]; 266 | for( int n = 0; n < tablesize; ++n ) 267 | { 268 | master_table[n] = static_cast( rand() ); 269 | } 270 | 271 | double * table = new double[tablesize]; // working copy 272 | 273 | 274 | 275 | // here is where the timing starts 276 | 277 | start_timer(); 278 | for (i = 0; i < iterations; ++i) 279 | { 280 | copy(master_table, master_table+tablesize, table); 281 | qsort( table, tablesize, sizeof(double), less_than_function1 ); 282 | verify_sorted( table, table + tablesize ); 283 | } 284 | record_result( timer(), "qsort array with function pointer" ); 285 | 286 | start_timer(); 287 | for (i = 0; i < iterations; ++i) 288 | { 289 | copy(master_table, master_table+tablesize, table); 290 | quicksort_function( table, table + tablesize, less_than_function2 ); 291 | verify_sorted( table, table + tablesize ); 292 | } 293 | record_result( timer(), "quicksort function array with function pointer" ); 294 | 295 | start_timer(); 296 | for (i = 0; i < iterations; ++i) 297 | { 298 | copy(master_table, master_table+tablesize, table); 299 | quicksort( table, table + tablesize, less_than_function2 ); 300 | verify_sorted( table, table + tablesize ); 301 | } 302 | record_result( timer(), "quicksort template array with function pointer" ); 303 | 304 | start_timer(); 305 | for (i = 0; i < iterations; ++i) 306 | { 307 | copy(master_table, master_table+tablesize, table); 308 | quicksort( table, table + tablesize ); 309 | verify_sorted( table, table + tablesize ); 310 | } 311 | record_result( timer(), "quicksort template array with template function pointer" ); 312 | 313 | start_timer(); 314 | for (i = 0; i < iterations; ++i) 315 | { 316 | copy(master_table, master_table+tablesize, table); 317 | sort( table, table + tablesize, less_than_function2 ); 318 | verify_sorted( table, table + tablesize ); 319 | } 320 | record_result( timer(), "sort array with function pointer" ); 321 | 322 | start_timer(); 323 | for (i = 0; i < iterations; ++i) 324 | { 325 | copy(master_table, master_table+tablesize, table); 326 | quicksort( table, table + tablesize, less_than_functor() ); 327 | verify_sorted( table, table + tablesize ); 328 | } 329 | record_result( timer(), "quicksort template array with user-supplied functor" ); 330 | 331 | start_timer(); 332 | for (i = 0; i < iterations; ++i) 333 | { 334 | copy(master_table, master_table+tablesize, table); 335 | sort( table, table + tablesize, less_than_functor() ); 336 | verify_sorted( table, table + tablesize ); 337 | } 338 | record_result( timer(), "sort array with user-supplied functor"); 339 | 340 | start_timer(); 341 | for (i = 0; i < iterations; ++i) 342 | { 343 | copy(master_table, master_table+tablesize, table); 344 | quicksort( table, table + tablesize, inline_less_than_functor() ); 345 | verify_sorted( table, table + tablesize ); 346 | } 347 | record_result( timer(), "quicksort template array with user-supplied inline functor" ); 348 | 349 | start_timer(); 350 | for (i = 0; i < iterations; ++i) 351 | { 352 | copy(master_table, master_table+tablesize, table); 353 | sort( table, table + tablesize, inline_less_than_functor() ); 354 | verify_sorted( table, table + tablesize ); 355 | } 356 | record_result( timer(), "sort array with user-supplied inline functor"); 357 | 358 | start_timer(); 359 | for (i = 0; i < iterations; ++i) 360 | { 361 | copy(master_table, master_table+tablesize, table); 362 | quicksort( table, table + tablesize, less() ); 363 | verify_sorted( table, table + tablesize ); 364 | } 365 | record_result( timer(), "quicksort template array with standard functor" ); 366 | 367 | start_timer(); 368 | for (i = 0; i < iterations; ++i) 369 | { 370 | copy(master_table, master_table+tablesize, table); 371 | sort( table, table + tablesize, less() ); 372 | verify_sorted( table, table + tablesize ); 373 | } 374 | record_result( timer(), "sort array with standard functor"); 375 | 376 | start_timer(); 377 | for (i = 0; i < iterations; ++i) 378 | { 379 | copy(master_table, master_table+tablesize, table); 380 | quicksort( table, table + tablesize ); 381 | verify_sorted( table, table + tablesize ); 382 | } 383 | record_result( timer(), "quicksort template array with native < operator" ); 384 | 385 | start_timer(); 386 | for (i = 0; i < iterations; ++i) 387 | { 388 | copy( master_table, master_table+tablesize, table ); 389 | sort( table, table + tablesize ); 390 | verify_sorted( table, table + tablesize ); 391 | } 392 | record_result( timer(), "sort array with native < operator"); 393 | 394 | 395 | summarize("Function Objects", tablesize, iterations, kDontShowGMeans, kDontShowPenalty ); 396 | 397 | delete[] table; 398 | delete[] master_table; 399 | 400 | return 0; 401 | } 402 | -------------------------------------------------------------------------------- /src/loop_unroll.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html) 5 | 6 | 7 | Goal: Test compiler optimizations related to loop unrolling 8 | 9 | Assumptions: 10 | 11 | 1) the compiler will unroll loops to hide instruction latency 12 | for() {} 13 | while() {} 14 | do {} while() 15 | goto 16 | 17 | 2) if the compiler unrolls the loop, it should not be slower than the original loop without unrolling 18 | 19 | 3) the compiler should unroll a multi-calculation loop as well as a single calculation loop 20 | up to the limit of performance gain for unrolling that loop 21 | in other words: no penalty for manually unrolling, 22 | as long as the manual unroll is less than or equal to the optimum unroll factor 23 | 24 | 4) The compiler should recognize and unroll all loop styles with the same efficiency 25 | in other words: do, while, for, and goto should have identical performance 26 | 27 | */ 28 | 29 | #include "benchmark_stdint.hpp" 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include "benchmark_results.h" 36 | #include "benchmark_timer.h" 37 | 38 | /******************************************************************************/ 39 | 40 | // this constant may need to be adjusted to give reasonable minimum times 41 | // For best results, times should be about 1.0 seconds for the minimum test run 42 | int iterations = 300000; 43 | 44 | // 8000 items, or between 8k and 64k of data 45 | // this is intended to remain within the L2 cache of most common CPUs 46 | #define SIZE 8000 47 | 48 | // initial value for filling our arrays, may be changed from the command line 49 | double init_value = 1.0; 50 | 51 | /******************************************************************************/ 52 | 53 | #include "benchmark_shared_tests.h" 54 | 55 | /******************************************************************************/ 56 | /******************************************************************************/ 57 | 58 | template 59 | T hash_func2(T seed) { 60 | return (914237 * (seed + 12345)) - 13; 61 | } 62 | 63 | template 64 | T complete_hash_func(T seed) { 65 | return hash_func2( hash_func2( hash_func2( seed ) ) ); 66 | } 67 | 68 | /******************************************************************************/ 69 | 70 | template 71 | inline void check_sum(T result) { 72 | T temp = (T)SIZE * complete_hash_func( (T)init_value ); 73 | if (!tolerance_equal(result,temp)) printf("test %i failed\n", current_test); 74 | } 75 | 76 | /******************************************************************************/ 77 | 78 | // this is the heart of our loop unrolling - a class that unrolls itself to generate the inner loop code 79 | // at least as long as we keep F < 50 (or some compilers won't compile it) 80 | template< int F, typename T > 81 | struct loop_inner_body { 82 | inline static void do_work(T &result, const T *first, int n) { 83 | loop_inner_body::do_work(result, first, n); 84 | T temp = first[ n + (F-1) ]; 85 | temp = complete_hash_func( temp ); 86 | result += temp; 87 | } 88 | }; 89 | 90 | template< typename T > 91 | struct loop_inner_body<0,T> { 92 | inline static void do_work(T &, const T *, int) { 93 | } 94 | }; 95 | 96 | /******************************************************************************/ 97 | /******************************************************************************/ 98 | 99 | // F is the unrolling factor 100 | template 101 | void test_for_loop_unroll_factor(const T* first, int count, const char *label) { 102 | int i; 103 | 104 | start_timer(); 105 | 106 | for(i = 0; i < iterations; ++i) { 107 | T result = 0; 108 | int n = 0; 109 | 110 | for (; n < (count - (F-1)); n += F) { 111 | loop_inner_body::do_work(result,first, n); 112 | } 113 | 114 | for (; n < count; ++n) { 115 | result += complete_hash_func( first[n] ); 116 | } 117 | 118 | check_sum(result); 119 | } 120 | 121 | record_result( timer(), label ); 122 | } 123 | 124 | /******************************************************************************/ 125 | 126 | // F is the unrolling factor 127 | template 128 | void test_while_loop_unroll_factor(const T* first, int count, const char *label) { 129 | int i; 130 | 131 | start_timer(); 132 | 133 | for(i = 0; i < iterations; ++i) { 134 | T result = 0; 135 | int n = 0; 136 | 137 | while ( n < (count - (F-1)) ) { 138 | loop_inner_body::do_work(result,first, n); 139 | n += F; 140 | } 141 | 142 | while ( n < count ) { 143 | result += complete_hash_func( first[n] ); 144 | ++n; 145 | } 146 | 147 | check_sum(result); 148 | } 149 | 150 | record_result( timer(), label ); 151 | } 152 | 153 | /******************************************************************************/ 154 | 155 | // F is the unrolling factor 156 | template 157 | void test_do_loop_unroll_factor(const T* first, int count, const char *label) { 158 | int i; 159 | 160 | start_timer(); 161 | 162 | for(i = 0; i < iterations; ++i) { 163 | T result = 0; 164 | int n = 0; 165 | 166 | if ((count - n) >= F) 167 | do { 168 | loop_inner_body::do_work(result,first, n); 169 | n += F; 170 | } while (n < (count - (F-1))); 171 | 172 | if (n < count) 173 | do { 174 | result += complete_hash_func( first[n] ); 175 | ++n; 176 | } while (n != count); 177 | 178 | check_sum(result); 179 | } 180 | 181 | record_result( timer(), label ); 182 | } 183 | 184 | /******************************************************************************/ 185 | 186 | // F is the unrolling factor 187 | template 188 | void test_goto_loop_unroll_factor(const T* first, int count, const char *label) { 189 | int i; 190 | 191 | start_timer(); 192 | 193 | for(i = 0; i < iterations; ++i) { 194 | T result = 0; 195 | int n = 0; 196 | 197 | if ((count - n) >= F) { 198 | loop2_start: 199 | loop_inner_body::do_work(result,first, n); 200 | n += F; 201 | 202 | if (n < (count - (F-1))) 203 | goto loop2_start; 204 | } 205 | 206 | if (n < count) { 207 | loop_start: 208 | result += complete_hash_func( first[n] ); 209 | ++n; 210 | 211 | if (n != count) 212 | goto loop_start; 213 | } 214 | 215 | check_sum(result); 216 | } 217 | 218 | record_result( timer(), label ); 219 | } 220 | 221 | /******************************************************************************/ 222 | /******************************************************************************/ 223 | 224 | // our global arrays of numbers to be operated upon 225 | 226 | double dataDouble[SIZE]; 227 | 228 | int32_t data32[SIZE]; 229 | 230 | // not elegant, but I need strings to hang around until we print the results 231 | // and I don't want to pull in STL 232 | const int UnrollLimit = 32; 233 | char temp_string[UnrollLimit][100]; 234 | 235 | /******************************************************************************/ 236 | /******************************************************************************/ 237 | 238 | // another unrolled loop to create all of our tests 239 | template< int N, typename T > 240 | struct for_loop_tests { 241 | static void do_test( const T *data, const char *label_base ) { 242 | for_loop_tests::do_test(data, label_base); 243 | sprintf( temp_string[N-1], "%s %d", label_base, N ); 244 | test_for_loop_unroll_factor( data, SIZE, temp_string[N-1] ); 245 | } 246 | }; 247 | 248 | template 249 | struct for_loop_tests<0,T> { 250 | static void do_test( const T *, const char * ) { 251 | } 252 | }; 253 | 254 | /******************************************************************************/ 255 | 256 | template< int N, typename T > 257 | struct while_loop_tests { 258 | static void do_test( const T *data, const char *label_base ) { 259 | while_loop_tests::do_test(data, label_base); 260 | sprintf( temp_string[N-1], "%s %d", label_base, N ); 261 | test_while_loop_unroll_factor( data, SIZE, temp_string[N-1] ); 262 | } 263 | }; 264 | 265 | template 266 | struct while_loop_tests<0,T> { 267 | static void do_test( const T *, const char * ) { 268 | } 269 | }; 270 | 271 | /******************************************************************************/ 272 | 273 | template< int N, typename T > 274 | struct do_loop_tests { 275 | static void do_test( const T *data, const char *label_base ) { 276 | do_loop_tests::do_test(data, label_base); 277 | sprintf( temp_string[N-1], "%s %d", label_base, N ); 278 | test_do_loop_unroll_factor( data, SIZE, temp_string[N-1] ); 279 | } 280 | }; 281 | 282 | template 283 | struct do_loop_tests<0,T> { 284 | static void do_test( const T *, const char * ) { 285 | } 286 | }; 287 | 288 | /******************************************************************************/ 289 | 290 | template< int N, typename T > 291 | struct goto_loop_tests { 292 | static void do_test( const T *data, const char *label_base ) { 293 | goto_loop_tests::do_test(data, label_base); 294 | sprintf( temp_string[N-1], "%s %d", label_base, N ); 295 | test_goto_loop_unroll_factor( data, SIZE, temp_string[N-1] ); 296 | } 297 | }; 298 | 299 | template 300 | struct goto_loop_tests<0,T> { 301 | static void do_test( const T *, const char * ) { 302 | } 303 | }; 304 | 305 | /******************************************************************************/ 306 | /******************************************************************************/ 307 | 308 | int main(int argc, char** argv) { 309 | 310 | // output command for documentation: 311 | int i; 312 | for (i = 0; i < argc; ++i) 313 | printf("%s ", argv[i] ); 314 | printf("\n"); 315 | 316 | if (argc > 1) iterations = atoi(argv[1]); 317 | if (argc > 2) init_value = (double) atof(argv[2]); 318 | 319 | 320 | // int32_t 321 | ::fill(data32, data32+SIZE, int32_t(init_value)); 322 | 323 | for_loop_tests::do_test( data32, "int32_t for loop unroll" ); 324 | summarize("int32_t for loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 325 | 326 | while_loop_tests::do_test( data32, "int32_t while loop unroll" ); 327 | summarize("int32_t while loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 328 | 329 | do_loop_tests::do_test( data32, "int32_t do loop unroll" ); 330 | summarize("int32_t do loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 331 | 332 | goto_loop_tests::do_test( data32, "int32_t goto loop unroll" ); 333 | summarize("int32_t goto loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 334 | 335 | 336 | // double 337 | iterations /= 4; 338 | 339 | ::fill(dataDouble, dataDouble+SIZE, double(init_value)); 340 | 341 | for_loop_tests::do_test( dataDouble, "double for loop unroll" ); 342 | summarize("double for loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 343 | 344 | while_loop_tests::do_test( dataDouble, "double while loop unroll" ); 345 | summarize("double while loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 346 | 347 | do_loop_tests::do_test( dataDouble, "double do loop unroll" ); 348 | summarize("double do loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 349 | 350 | goto_loop_tests::do_test( dataDouble, "double goto loop unroll" ); 351 | summarize("double goto loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 352 | 353 | 354 | return 0; 355 | } 356 | 357 | // the end 358 | /******************************************************************************/ 359 | /******************************************************************************/ 360 | -------------------------------------------------------------------------------- /src/machine.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html) 5 | 6 | 7 | The purpose of this source file is to report information about the compiler, 8 | OS and machine running the benchmark 9 | 10 | When adding reporting for your compiler, OS and CPU: 11 | Please remember that this source file has to compile everywhere else as well. 12 | 13 | See http://predef.sourceforge.net/precomp.html for some older compilers 14 | and architectures. 15 | See source for Unix hostinfo. 16 | 17 | All trademarks used herein are the property of their owner, and are only used 18 | for correct identification of their products 19 | 20 | */ 21 | 22 | /******************************************************************************/ 23 | 24 | #include 25 | #include 26 | #include "benchmark_stdint.hpp" 27 | 28 | // this should be defined on Mach derived OSes (MacOS, FreeBSD, etc.) 29 | #if defined(_MACHTYPES_H_) 30 | #include 31 | #endif 32 | 33 | 34 | /******************************************************************************/ 35 | 36 | void VerifyTypeSizes() 37 | { 38 | if (sizeof(int8_t) != 1) 39 | printf("Found size of int8_t was %d instead of 1\n", (int)sizeof(int8_t) ); 40 | if (sizeof(uint8_t) != 1) 41 | printf("Found size of uint8_t was %d instead of 1\n", (int)sizeof(uint8_t) ); 42 | if (sizeof(int16_t) != 2) 43 | printf("Found size of int16_t was %d instead of 2\n", (int)sizeof(int16_t) ); 44 | if (sizeof(uint16_t) != 2) 45 | printf("Found size of uint16_t was %d instead of 2\n", (int)sizeof(uint16_t) ); 46 | if (sizeof(int32_t) != 4) 47 | printf("Found size of int32_t was %d instead of 4\n", (int)sizeof(int32_t) ); 48 | if (sizeof(uint32_t) != 4) 49 | printf("Found size of uint32_t was %d instead of 4\n", (int)sizeof(uint32_t) ); 50 | if (sizeof(int64_t) != 8) 51 | printf("Found size of int64_t was %d instead of 8\n", (int)sizeof(int64_t) ); 52 | if (sizeof(uint64_t) != 8) 53 | printf("Found size of uint64_t was %d instead of 8\n", (int)sizeof(uint64_t) ); 54 | } 55 | 56 | /******************************************************************************/ 57 | 58 | // compiler version and any major targetting options (such as 32 vs 64 bit) 59 | void ReportCompiler() 60 | { 61 | 62 | printf("##Compiler\n"); 63 | 64 | #if __INTEL_COMPILER 65 | 66 | printf("Intel Compiler version %d\n", __INTEL_COMPILER ); 67 | printf("Build %d\n", __INTEL_COMPILER_BUILD_DATE ); 68 | 69 | #if defined(__WIN32__) || defined(_WIN32) 70 | printf("Compiling for Windows 32 bit\n" ); 71 | #endif 72 | 73 | #if __WIN64__ 74 | printf("Compiling for Windows 64 bit\n" ); 75 | #endif 76 | 77 | #elif _MSC_VER 78 | 79 | printf("Microsoft VisualC++ version %d\n", _MSC_VER ); 80 | 81 | #if defined(_WIN64) 82 | printf("Compiling for Windows 64 bit\n" ); 83 | #elif defined(_WIN32) 84 | printf("Compiling for Windows 32 bit\n"); 85 | #endif 86 | 87 | #if defined(__CLR_VER) 88 | printf("CLR version %s\n", __CLR_VER ); 89 | #endif 90 | 91 | /* 92 | See http://msdn2.microsoft.com/en-us/library/b0084kay(VS.80).aspx#_predir_table_1..3 93 | */ 94 | 95 | #elif __GNUC__ 96 | 97 | printf("GCC version %s\n", __VERSION__ ); 98 | 99 | /* 100 | printf("%d.%d", __GNUC__, __GNUC_MINOR__ ); 101 | #if defined(__GNUC_PATCHLEVEL__) 102 | printf(" . %d", __GNUC_PATCHLEVEL__ ); 103 | #endif 104 | printf("\n"); 105 | */ 106 | 107 | #if __LP64__ 108 | printf("Compiled for LP64\n"); 109 | #endif 110 | 111 | 112 | /* 113 | Other useful macros: 114 | __OPTIMIZE__ 115 | __OPTIMIZE_SIZE__ 116 | __NO_INLINE__ 117 | 118 | See http://developer.apple.com/documentation/DeveloperTools/gcc-4.0.1/cpp/Common-Predefined-Macros.html 119 | */ 120 | 121 | #elif __MWERKS__ 122 | printf("Metrowerks CodeWarrior version 0x%8.8lX\n", __MWERKS__ ); 123 | #elif __MRC__ 124 | printf("Apple MrC[pp] version 0x%8.8lX\n", __MRC__ ); 125 | #elif __MOTO__ 126 | printf("Motorola MCC version 0x%8.8lX\n", __MOTO__ ); 127 | #else 128 | printf("********\n" ); 129 | printf("Unknown compiler, please update %s for your compiler\n", __FILE__ ); 130 | printf("********\n" ); 131 | #endif 132 | 133 | } 134 | 135 | /******************************************************************************/ 136 | 137 | // what kind of CPU is the compiler targetting? 138 | void ReportCPUTarget() 139 | { 140 | 141 | printf("##Target CPU\n"); 142 | 143 | #if _MANAGED 144 | 145 | printf("Compiled for Microsoft managed code (CLR)\n" ); 146 | 147 | #elif defined(__ppc64__) 148 | 149 | printf("Compiled for PowerPC 64bit\n"); 150 | 151 | #elif defined(__powerc) || defined(__ppc__) || defined(powerpc) || defined(ppc) 152 | 153 | printf("Compiled for PowerPC 32bit\n"); 154 | 155 | #elif defined(_M_IA64) 156 | 157 | printf("Compiled for Intel Architecture 64\n" ); 158 | 159 | #elif defined(_M_X64) || defined(__x86_64__) 160 | 161 | printf("Compiled for x86 64bit\n" ); 162 | 163 | #elif defined(__i386__) || defined(i386) || defined(_X86_) || defined(_M_IX86) 164 | 165 | printf("Compiled for x86 32bit\n" ); 166 | 167 | #if _M_IX86 168 | switch( _M_IX86) { 169 | case 300: 170 | printf("Compiled for 80386\n" ); 171 | break; 172 | case 400: 173 | printf("Compiled for 80486\n" ); 174 | break; 175 | case 500: 176 | printf("Compiled for Pentium\n" ); 177 | break; 178 | case 600: 179 | printf("Compiled for PentiumII\n" ); 180 | break; 181 | default: 182 | printf("********\n" ); 183 | printf("Unknown x86 target, please update %s for your cpu\n", __FILE__ ); 184 | printf("********\n" ); 185 | break; 186 | } 187 | #endif 188 | 189 | #elif defined(_ALPHA_) 190 | 191 | printf("Compiled for Alpha\n" ); 192 | 193 | #else 194 | printf("********\n" ); 195 | printf("Unknown target CPU, please update %s for your cpu\n", __FILE__ ); 196 | printf("********\n" ); 197 | #endif 198 | 199 | } 200 | 201 | /******************************************************************************/ 202 | 203 | // byte order of the CPU we're running on 204 | void ReportEndian() 205 | { 206 | static uint32_t cookie = 0x01020304; 207 | unsigned char *testPtr = (unsigned char *) &cookie; 208 | 209 | if (*testPtr == 0x01) { 210 | printf( "Big Endian\n" ); 211 | } else if (*testPtr == 0x04) { 212 | printf( "Little Endian\n" ); 213 | } else { 214 | printf("********\n" ); 215 | printf("Unknown byteorder, please update %s for your cpu\n", __FILE__ ); 216 | printf("********\n" ); 217 | } 218 | } 219 | 220 | /******************************************************************************/ 221 | 222 | 223 | // what CPU are we actually running on 224 | // architecture, revision, speed 225 | 226 | // methods for obtaining this information are probably OS specific 227 | 228 | void ReportCPUPhysical() 229 | { 230 | const int one_million = 1000000L; 231 | 232 | printf("##Physical CPU\n"); 233 | 234 | 235 | 236 | // this should work for any Mach based OS (MacOS, FreeBSD, etc.) 237 | #if defined(_MACHTYPES_H_) 238 | 239 | // see sysctl.h for the definitions 240 | { 241 | long returnBuffer=0, retval=0; 242 | long long bigBuffer = 0; 243 | size_t len; 244 | 245 | // this gets us the CPU family, but not the exact CPU model and rev! 246 | len = 4; 247 | retval = sysctlbyname("hw.cputype", &returnBuffer, &len, NULL, 0); 248 | if (retval == 0) { 249 | printf("Mach CPU type %ld\n", returnBuffer ); 250 | 251 | // from sys/machine.h 252 | switch(returnBuffer) { 253 | case 1: 254 | printf("CPU_TYPE VAX\n"); 255 | break; 256 | case 6: 257 | printf("CPU_TYPE MC680x0\n"); 258 | break; 259 | case 7: 260 | printf("CPU_TYPE x86\n"); 261 | break; 262 | case 8: 263 | printf("CPU_TYPE MIPS\n"); 264 | break; 265 | case 10: 266 | printf("CPU_TYPE MC98000\n"); 267 | break; 268 | case 11: 269 | printf("CPU_TYPE HPPA\n"); 270 | break; 271 | case 12: 272 | printf("CPU_TYPE ARM\n"); 273 | break; 274 | case 13: 275 | printf("CPU_TYPE MC8880x0\n"); 276 | break; 277 | case 14: 278 | printf("CPU_TYPE SPARC\n"); 279 | break; 280 | case 15: 281 | printf("CPU_TYPE i860\n"); 282 | break; 283 | case 16: 284 | printf("CPU_TYPE Alpha\n"); 285 | break; 286 | case 18: 287 | printf("CPU_TYPE PowerPC\n"); 288 | break; 289 | default: 290 | printf("********\n" ); 291 | printf("Unknown Mach CPU Type, please update %s for your cpu\n", __FILE__ ); 292 | printf("********\n" ); 293 | break; 294 | } 295 | 296 | } 297 | 298 | // corresponds to CPU types, but the list is kinda big and dependent on CPU major type 299 | len = 4; 300 | retval = sysctlbyname("hw.cpusubtype", &returnBuffer, &len, NULL, 0); 301 | if (retval == 0) 302 | printf("Mach CPU subtype %ld\n", returnBuffer ); 303 | 304 | len = 8; 305 | retval = sysctlbyname("hw.cpufrequency_max", &bigBuffer, &len, NULL, 0); 306 | if (retval == 0) 307 | printf("CPU frequency: %.2f Mhz\n", (double)bigBuffer/one_million ); 308 | 309 | 310 | // PowerPC CPU extensions 311 | len = 4; 312 | retval = sysctlbyname("hw.optional.floatingpoint", &returnBuffer, &len, NULL, 0); 313 | if (retval == 0 && returnBuffer != 0) 314 | printf("CPU has optional floating point instructions\n" ); 315 | 316 | len = 4; 317 | retval = sysctlbyname("hw.optional.altivec", &returnBuffer, &len, NULL, 0); 318 | if (retval == 0 && returnBuffer != 0) 319 | printf("CPU has AltiVec instructions\n" ); 320 | 321 | len = 4; 322 | retval = sysctlbyname("hw.optional.64bitops", &returnBuffer, &len, NULL, 0); 323 | if (retval == 0 && returnBuffer != 0) 324 | printf("CPU has 64 bit instructions\n" ); 325 | 326 | len = 4; 327 | retval = sysctlbyname("hw.optional.fsqrt", &returnBuffer, &len, NULL, 0); 328 | if (retval == 0 && returnBuffer != 0) 329 | printf("CPU has fsqrt instruction\n" ); 330 | 331 | 332 | // x86 CPU extension 333 | len = 4; 334 | retval = sysctlbyname("hw.optional.mmx", &returnBuffer, &len, NULL, 0); 335 | if (retval == 0 && returnBuffer != 0) 336 | printf("CPU has MMX instructions\n" ); 337 | 338 | len = 4; 339 | retval = sysctlbyname("hw.optional.sse", &returnBuffer, &len, NULL, 0); 340 | if (retval == 0 && returnBuffer != 0) 341 | printf("CPU has SSE instructions\n" ); 342 | 343 | len = 4; 344 | retval = sysctlbyname("hw.optional.sse2", &returnBuffer, &len, NULL, 0); 345 | if (retval == 0 && returnBuffer != 0) 346 | printf("CPU has SSE2 instructions\n" ); 347 | 348 | len = 4; 349 | retval = sysctlbyname("hw.optional.sse3", &returnBuffer, &len, NULL, 0); 350 | if (retval == 0 && returnBuffer != 0) 351 | printf("CPU has SSE3 instructions\n" ); 352 | 353 | len = 4; 354 | retval = sysctlbyname("hw.optional.sse4", &returnBuffer, &len, NULL, 0); 355 | if (retval == 0 && returnBuffer != 0) 356 | printf("CPU has SSE4 instructions\n" ); 357 | 358 | len = 4; 359 | retval = sysctlbyname("hw.optional.sse5", &returnBuffer, &len, NULL, 0); 360 | if (retval == 0 && returnBuffer != 0) 361 | printf("CPU has SSE5 instructions\n" ); 362 | 363 | len = 4; 364 | retval = sysctlbyname("hw.optional.x86_64", &returnBuffer, &len, NULL, 0); 365 | if (retval == 0 && returnBuffer != 0) 366 | printf("CPU has x86_64 instructions\n" ); 367 | 368 | } 369 | 370 | #endif // _MACHTYPES_H_ 371 | 372 | // useful information, and not so dependent 373 | ReportEndian(); 374 | } 375 | 376 | /******************************************************************************/ 377 | 378 | // format a number of bytes and print (without return) 379 | void printMemSize( long long input ) 380 | { 381 | double meg = (double)input / (1024.0*1024.0); 382 | double tera = (double)input / (1024.0*1024.0*1024.0*1024.0); 383 | 384 | if (input < 1024) { // format as bytes 385 | printf("%lld bytes", input ); 386 | } else if (input < (1024*1024)) { // format as KB 387 | printf("%.2f KBytes", (double)input/1024.0 ); 388 | } else if (meg < 1024.0) { // format as MB 389 | printf("%.2f MBytes", meg ); 390 | } else if (meg < (1024.0*1024.0)) { // format as GB 391 | printf("%.2f GBytes", meg/1024.0 ); 392 | } else if (tera < (1024.0)) { // format as TB 393 | printf("%.2f TeraBytes", tera ); 394 | } else if (tera < (1024.0*1024.0)) { // format as PB 395 | printf("%.2f PetaBytes", tera/1024.0 ); 396 | } else { // format as EB 397 | printf("%.2f ExaBytes", tera/(1024.0*1024.0) ); 398 | } 399 | } 400 | 401 | /******************************************************************************/ 402 | 403 | // information about the machine, outside of the CPU 404 | void ReportMachinePhysical() 405 | { 406 | printf("##Machine\n"); 407 | 408 | 409 | // this should work for any Mach based OS (MacOS, FreeBSD, etc.) 410 | #if defined(_MACHTYPES_H_) 411 | 412 | // see sysctl.h for the definitions 413 | { 414 | long returnBuffer=0, retval=0; 415 | long long bigBuffer = 0; 416 | size_t len; 417 | 418 | len = 4; 419 | returnBuffer = 0; 420 | retval = sysctlbyname("hw.ncpu", &returnBuffer, &len, NULL, 0); 421 | if (retval == 0) { 422 | printf("Machine has %ld CPUs\n", returnBuffer ); 423 | } 424 | 425 | 426 | len = 4; 427 | retval = sysctlbyname("hw.physicalcpu_max", &returnBuffer, &len, NULL, 0); 428 | if (retval == 0) 429 | printf("Machine has %ld physical CPUs\n", returnBuffer ); 430 | 431 | len = 4; 432 | retval = sysctlbyname("hw.logicalcpu_max", &returnBuffer, &len, NULL, 0); 433 | if (retval == 0) 434 | printf("Machine has %ld logical CPUs\n", returnBuffer ); 435 | 436 | 437 | len = 8; 438 | retval = sysctlbyname("hw.memsize", &bigBuffer, &len, NULL, 0); 439 | if (retval == 0) { 440 | printf("Machine has "); 441 | printMemSize( bigBuffer ); 442 | printf(" of RAM\n"); 443 | } 444 | 445 | len = 8; 446 | retval = sysctlbyname("hw.pagesize", &bigBuffer, &len, NULL, 0); 447 | if (retval == 0) { 448 | printf("Machine using "); 449 | printMemSize( bigBuffer ); 450 | printf(" pagesize\n"); 451 | } 452 | 453 | } 454 | 455 | #endif // _MACHTYPES_H_ 456 | 457 | 458 | } 459 | 460 | /******************************************************************************/ 461 | 462 | void ReportOS() 463 | { 464 | printf("##Operating System\n"); 465 | 466 | 467 | // this should work for any Mach based OS (MacOS, FreeBSD, etc.) 468 | #if defined(_MACHTYPES_H_) 469 | 470 | // see sysctl.h for the definitions 471 | { 472 | //long returnBuffer; 473 | //long long bigBuffer; 474 | char string_buffer[1024]; 475 | long retval=0; 476 | int mib[4]; 477 | size_t len; 478 | 479 | mib[0] = CTL_KERN; 480 | mib[1] = KERN_VERSION; 481 | len = sizeof(string_buffer); 482 | retval = sysctl(mib, 2, string_buffer, &len, NULL, 0); 483 | if (retval == 0) 484 | printf("Kernel OS Version: %s\n", string_buffer ); 485 | 486 | } 487 | 488 | #endif // _MACHTYPES_H_ 489 | } 490 | 491 | /******************************************************************************/ 492 | 493 | int main (int argc, char *argv[]) 494 | { 495 | // this should only be changed when the reporting tags have changed in an incompatible way 496 | const char version[] = "version 1.0"; 497 | 498 | printf("##Start machine report %s\n", version ); 499 | VerifyTypeSizes(); 500 | ReportCompiler(); 501 | ReportCPUTarget(); 502 | ReportCPUPhysical(); 503 | ReportMachinePhysical(); 504 | ReportOS(); 505 | printf("##End machine report\n"); 506 | 507 | return 0; 508 | } 509 | 510 | -------------------------------------------------------------------------------- /src/simple_types_constant_folding.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html ) 5 | 6 | 7 | Goal: Test compiler optimizations related to constant folding of simple language defined types 8 | 9 | Assumptions: 10 | 11 | 1) the compiler will combine constant calculations into a single constant for simple types 12 | aka constant folding 13 | result = A + B ==> result = constant 14 | result = A - B ==> result = constant 15 | result = A * B ==> result = constant 16 | result = A / B ==> result = constant 17 | result = A % B ==> result = constant for integer types 18 | result = (A == B) ==> result = constant for integer types 19 | result = (A != B) ==> result = constant for integer types 20 | result = (A > B) ==> result = constant for integer types 21 | result = (A < B) ==> result = constant for integer types 22 | result = (A >= B) ==> result = constant for integer types 23 | result = (A <= B) ==> result = constant for integer types 24 | result = (A & B) ==> result = constant for integer types 25 | result = (A | B) ==> result = constant for integer types 26 | result = (A ^ B) ==> result = constant for integer types 27 | 28 | result = input + A + B + C + D ==> result = input + (A+B+C+D) 29 | result = input - A - B - C - D ==> result = input - (A+B+C+D) 30 | result = input * A * B * C * D ==> result = input * (A*B*C*D) 31 | result = input + A * B * C * D ==> result = input + (A*B*C*D) 32 | result = ((((input/A) /B) /C) /D) ==> result = input / (A*B*C*D) 33 | result = input + (((A /B) /C) /D) ==> result = input + (A/B/C/D) 34 | result = input & A & B & C & D ==> result = input & (A&B&C&D) for integer types 35 | result = input | A | B | C | D ==> result = input | (A|B|C|D) for integer types 36 | result = input ^ A ^ B ^ C ^ D ==> result = input ^ (A^B^C^D) for integer types 37 | 38 | 39 | NOTE - in some cases, loop invariant code motion might move the constant calculation out of the inner loop 40 | making it appear that the constants were folded 41 | But in the constant result cases, we want the compiler to recognize the constant and move it out of the loop 42 | 43 | */ 44 | 45 | /******************************************************************************/ 46 | 47 | #include "benchmark_stdint.hpp" 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include "benchmark_results.h" 54 | #include "benchmark_timer.h" 55 | 56 | /******************************************************************************/ 57 | 58 | // this constant may need to be adjusted to give reasonable minimum times 59 | // For best results, times should be about 1.0 seconds for the minimum test run 60 | int base_iterations = 2000000; 61 | int iterations = base_iterations; 62 | 63 | 64 | // 8000 items, or between 8k and 64k of data 65 | // this is intended to remain within the L2 cache of most common CPUs 66 | const int SIZE = 8000; 67 | 68 | 69 | // initial value for filling our arrays, may be changed from the command line 70 | double init_value = 1.0; 71 | 72 | /******************************************************************************/ 73 | 74 | // our global arrays of numbers to be operated upon 75 | 76 | double dataDouble[SIZE]; 77 | float dataFloat[SIZE]; 78 | 79 | uint64_t data64unsigned[SIZE]; 80 | int64_t data64[SIZE]; 81 | 82 | uint32_t data32unsigned[SIZE]; 83 | int32_t data32[SIZE]; 84 | 85 | uint16_t data16unsigned[SIZE]; 86 | int16_t data16[SIZE]; 87 | 88 | uint8_t data8unsigned[SIZE]; 89 | int8_t data8[SIZE]; 90 | 91 | /******************************************************************************/ 92 | 93 | #include "benchmark_shared_tests.h" 94 | 95 | /******************************************************************************/ 96 | 97 | 98 | int main(int argc, char** argv) { 99 | 100 | // output command for documentation: 101 | int i; 102 | for (i = 0; i < argc; ++i) 103 | printf("%s ", argv[i] ); 104 | printf("\n"); 105 | 106 | if (argc > 1) base_iterations = atoi(argv[1]); 107 | if (argc > 2) init_value = (double) atof(argv[2]); 108 | 109 | 110 | 111 | // int8_t 112 | ::fill(data8, data8+SIZE, int8_t(init_value)); 113 | 114 | iterations = base_iterations; 115 | test_constant >(data8,SIZE,"int8_t constant"); 116 | test_constant >(data8,SIZE,"int8_t add constants"); 117 | test_constant >(data8,SIZE,"int8_t subtract constants"); 118 | test_constant >(data8,SIZE,"int8_t multiply constants"); 119 | test_constant >(data8,SIZE,"int8_t divide constants"); 120 | test_constant >(data8,SIZE,"int8_t mod constants"); 121 | test_constant >(data8,SIZE,"int8_t equal constants"); 122 | test_constant >(data8,SIZE,"int8_t notequal constants"); 123 | test_constant >(data8,SIZE,"int8_t greater than constants"); 124 | test_constant >(data8,SIZE,"int8_t less than constants"); 125 | test_constant >(data8,SIZE,"int8_t greater than equal constants"); 126 | test_constant >(data8,SIZE,"int8_t less than equal constants"); 127 | test_constant >(data8,SIZE,"int8_t and constants"); 128 | test_constant >(data8,SIZE,"int8_t or constants"); 129 | test_constant >(data8,SIZE,"int8_t xor constants"); 130 | 131 | summarize("int8_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 132 | 133 | 134 | iterations = base_iterations / 10; 135 | test_constant >(data8,SIZE,"int8_t constant add"); 136 | test_constant >(data8,SIZE,"int8_t multiple constant adds"); 137 | 138 | test_constant >(data8,SIZE,"int8_t constant subtract"); 139 | test_constant >(data8,SIZE,"int8_t multiple constant subtracts"); 140 | 141 | test_constant >(data8,SIZE,"int8_t constant multiply"); 142 | test_constant >(data8,SIZE,"int8_t multiple constant multiplies"); 143 | test_constant >(data8,SIZE,"int8_t multiple constant multiply2"); 144 | 145 | test_constant >(data8,SIZE,"int8_t constant divide"); 146 | test_constant >(data8,SIZE,"int8_t multiple constant divides"); 147 | test_constant >(data8,SIZE,"int8_t multiple constant divide2"); 148 | 149 | test_constant >(data8,SIZE,"int8_t multiple constant mixed"); 150 | 151 | test_constant >(data8,SIZE,"int8_t constant and"); 152 | test_constant >(data8,SIZE,"int8_t multiple constant and"); 153 | 154 | test_constant >(data8,SIZE,"int8_t constant or"); 155 | test_constant >(data8,SIZE,"int8_t multiple constant or"); 156 | 157 | test_constant >(data8,SIZE,"int8_t constant xor"); 158 | test_constant >(data8,SIZE,"int8_t multiple constant xor"); 159 | 160 | summarize("int8_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 161 | 162 | 163 | // unsigned8 164 | ::fill(data8unsigned, data8unsigned+SIZE, uint8_t(init_value)); 165 | 166 | iterations = base_iterations; 167 | test_constant >(data8unsigned,SIZE,"uint8_t constant"); 168 | test_constant >(data8unsigned,SIZE,"uint8_t add constants"); 169 | test_constant >(data8unsigned,SIZE,"uint8_t subtract constants"); 170 | test_constant >(data8unsigned,SIZE,"uint8_t multiply constants"); 171 | test_constant >(data8unsigned,SIZE,"uint8_t divide constants"); 172 | test_constant >(data8unsigned,SIZE,"uint8_t mod constants"); 173 | test_constant >(data8unsigned,SIZE,"uint8_t equal constants"); 174 | test_constant >(data8unsigned,SIZE,"uint8_t notequal constants"); 175 | test_constant >(data8unsigned,SIZE,"uint8_t greater than constants"); 176 | test_constant >(data8unsigned,SIZE,"uint8_t less than constants"); 177 | test_constant >(data8unsigned,SIZE,"uint8_t greater than equal constants"); 178 | test_constant >(data8unsigned,SIZE,"uint8_t less than equal constants"); 179 | test_constant >(data8unsigned,SIZE,"uint8_t and constants"); 180 | test_constant >(data8unsigned,SIZE,"uint8_t or constants"); 181 | test_constant >(data8unsigned,SIZE,"uint8_t xor constants"); 182 | 183 | summarize("uint8_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 184 | 185 | 186 | iterations = base_iterations / 10; 187 | test_constant >(data8unsigned,SIZE,"uint8_t constant add"); 188 | test_constant >(data8unsigned,SIZE,"uint8_t multiple constant adds"); 189 | 190 | test_constant >(data8unsigned,SIZE,"uint8_t constant subtract"); 191 | test_constant >(data8unsigned,SIZE,"uint8_t multiple constant subtracts"); 192 | 193 | test_constant >(data8unsigned,SIZE,"uint8_t constant multiply"); 194 | test_constant >(data8unsigned,SIZE,"uint8_t multiple constant multiplies"); 195 | test_constant >(data8unsigned,SIZE,"uint8_t multiple constant multiply2"); 196 | 197 | test_constant >(data8unsigned,SIZE,"uint8_t constant divide"); 198 | test_constant >(data8unsigned,SIZE,"uint8_t multiple constant divides"); 199 | test_constant >(data8unsigned,SIZE,"uint8_t multiple constant divide2"); 200 | 201 | test_constant >(data8unsigned,SIZE,"uint8_t multiple constant mixed"); 202 | 203 | test_constant >(data8unsigned,SIZE,"uint8_t constant and"); 204 | test_constant >(data8unsigned,SIZE,"uint8_t multiple constant and"); 205 | 206 | test_constant >(data8unsigned,SIZE,"uint8_t constant or"); 207 | test_constant >(data8unsigned,SIZE,"uint8_t multiple constant or"); 208 | 209 | test_constant >(data8unsigned,SIZE,"uint8_t constant xor"); 210 | test_constant >(data8unsigned,SIZE,"uint8_t multiple constant xor"); 211 | 212 | summarize("uint8_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 213 | 214 | 215 | // int16_t 216 | ::fill(data16, data16+SIZE, int16_t(init_value)); 217 | 218 | iterations = base_iterations; 219 | test_constant >(data16,SIZE,"int16_t constant"); 220 | test_constant >(data16,SIZE,"int16_t add constants"); 221 | test_constant >(data16,SIZE,"int16_t subtract constants"); 222 | test_constant >(data16,SIZE,"int16_t multiply constants"); 223 | test_constant >(data16,SIZE,"int16_t divide constants"); 224 | test_constant >(data16,SIZE,"int16_t mod constants"); 225 | test_constant >(data16,SIZE,"int16_t equal constants"); 226 | test_constant >(data16,SIZE,"int16_t notequal constants"); 227 | test_constant >(data16,SIZE,"int16_t greater than constants"); 228 | test_constant >(data16,SIZE,"int16_t less than constants"); 229 | test_constant >(data16,SIZE,"int16_t greater than equal constants"); 230 | test_constant >(data16,SIZE,"int16_t less than equal constants"); 231 | test_constant >(data16,SIZE,"int16_t and constants"); 232 | test_constant >(data16,SIZE,"int16_t or constants"); 233 | test_constant >(data16,SIZE,"int16_t xor constants"); 234 | 235 | summarize("int16_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 236 | 237 | 238 | iterations = base_iterations / 10; 239 | test_constant >(data16,SIZE,"int16_t constant add"); 240 | test_constant >(data16,SIZE,"int16_t multiple constant adds"); 241 | 242 | test_constant >(data16,SIZE,"int16_t constant subtract"); 243 | test_constant >(data16,SIZE,"int16_t multiple constant subtracts"); 244 | 245 | test_constant >(data16,SIZE,"int16_t constant multiply"); 246 | test_constant >(data16,SIZE,"int16_t multiple constant multiplies"); 247 | test_constant >(data16,SIZE,"int16_t multiple constant multiply2"); 248 | 249 | test_constant >(data16,SIZE,"int16_t constant divide"); 250 | test_constant >(data16,SIZE,"int16_t multiple constant divides"); 251 | test_constant >(data16,SIZE,"int16_t multiple constant divide2"); 252 | 253 | test_constant >(data16,SIZE,"int16_t multiple constant mixed"); 254 | 255 | test_constant >(data16,SIZE,"int16_t constant and"); 256 | test_constant >(data16,SIZE,"int16_t multiple constant and"); 257 | 258 | test_constant >(data16,SIZE,"int16_t constant or"); 259 | test_constant >(data16,SIZE,"int16_t multiple constant or"); 260 | 261 | test_constant >(data16,SIZE,"int16_t constant xor"); 262 | test_constant >(data16,SIZE,"int16_t multiple constant xor"); 263 | 264 | summarize("int16_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 265 | 266 | 267 | // unsigned16 268 | ::fill(data16unsigned, data16unsigned+SIZE, uint16_t(init_value)); 269 | 270 | iterations = base_iterations; 271 | test_constant >(data16unsigned,SIZE,"uint16_t constant"); 272 | test_constant >(data16unsigned,SIZE,"uint16_t add constants"); 273 | test_constant >(data16unsigned,SIZE,"uint16_t subtract constants"); 274 | test_constant >(data16unsigned,SIZE,"uint16_t multiply constants"); 275 | test_constant >(data16unsigned,SIZE,"uint16_t divide constants"); 276 | test_constant >(data16unsigned,SIZE,"uint16_t mod constants"); 277 | test_constant >(data16unsigned,SIZE,"uint16_t equal constants"); 278 | test_constant >(data16unsigned,SIZE,"uint16_t notequal constants"); 279 | test_constant >(data16unsigned,SIZE,"uint16_t greater than constants"); 280 | test_constant >(data16unsigned,SIZE,"uint16_t less than constants"); 281 | test_constant >(data16unsigned,SIZE,"uint16_t greater than equal constants"); 282 | test_constant >(data16unsigned,SIZE,"uint16_t less than equal constants"); 283 | test_constant >(data16unsigned,SIZE,"uint16_t and constants"); 284 | test_constant >(data16unsigned,SIZE,"uint16_t or constants"); 285 | test_constant >(data16unsigned,SIZE,"uint16_t xor constants"); 286 | 287 | summarize("uint16_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 288 | 289 | 290 | iterations = base_iterations / 10; 291 | test_constant >(data16unsigned,SIZE,"uint16_t constant add"); 292 | test_constant >(data16unsigned,SIZE,"uint16_t multiple constant adds"); 293 | 294 | test_constant >(data16unsigned,SIZE,"uint16_t constant subtract"); 295 | test_constant >(data16unsigned,SIZE,"uint16_t multiple constant subtracts"); 296 | 297 | test_constant >(data16unsigned,SIZE,"uint16_t constant multiply"); 298 | test_constant >(data16unsigned,SIZE,"uint16_t multiple constant multiplies"); 299 | test_constant >(data16unsigned,SIZE,"uint16_t multiple constant multiply2"); 300 | 301 | test_constant >(data16unsigned,SIZE,"uint16_t constant divide"); 302 | test_constant >(data16unsigned,SIZE,"uint16_t multiple constant divides"); 303 | test_constant >(data16unsigned,SIZE,"uint16_t multiple constant divide2"); 304 | 305 | test_constant >(data16unsigned,SIZE,"uint16_t multiple constant mixed"); 306 | 307 | test_constant >(data16unsigned,SIZE,"uint16_t constant and"); 308 | test_constant >(data16unsigned,SIZE,"uint16_t multiple constant and"); 309 | 310 | test_constant >(data16unsigned,SIZE,"uint16_t constant or"); 311 | test_constant >(data16unsigned,SIZE,"uint16_t multiple constant or"); 312 | 313 | test_constant >(data16unsigned,SIZE,"uint16_t constant xor"); 314 | test_constant >(data16unsigned,SIZE,"uint16_t multiple constant xor"); 315 | 316 | summarize("uint16_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 317 | 318 | 319 | 320 | // int32_t 321 | ::fill(data32, data32+SIZE, int32_t(init_value)); 322 | 323 | iterations = base_iterations; 324 | test_constant >(data32,SIZE,"int32_t constant"); 325 | test_constant >(data32,SIZE,"int32_t add constants"); 326 | test_constant >(data32,SIZE,"int32_t subtract constants"); 327 | test_constant >(data32,SIZE,"int32_t multiply constants"); 328 | test_constant >(data32,SIZE,"int32_t divide constants"); 329 | test_constant >(data32,SIZE,"int32_t mod constants"); 330 | test_constant >(data32,SIZE,"int32_t equal constants"); 331 | test_constant >(data32,SIZE,"int32_t notequal constants"); 332 | test_constant >(data32,SIZE,"int32_t greater than constants"); 333 | test_constant >(data32,SIZE,"int32_t less than constants"); 334 | test_constant >(data32,SIZE,"int32_t greater than equal constants"); 335 | test_constant >(data32,SIZE,"int32_t less than equal constants"); 336 | test_constant >(data32,SIZE,"int32_t and constants"); 337 | test_constant >(data32,SIZE,"int32_t or constants"); 338 | test_constant >(data32,SIZE,"int32_t xor constants"); 339 | 340 | summarize("int32_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 341 | 342 | 343 | iterations = base_iterations / 10; 344 | test_constant >(data32,SIZE,"int32_t constant add"); 345 | test_constant >(data32,SIZE,"int32_t multiple constant adds"); 346 | 347 | test_constant >(data32,SIZE,"int32_t constant subtract"); 348 | test_constant >(data32,SIZE,"int32_t multiple constant subtracts"); 349 | 350 | test_constant >(data32,SIZE,"int32_t constant multiply"); 351 | test_constant >(data32,SIZE,"int32_t multiple constant multiplies"); 352 | test_constant >(data32,SIZE,"int32_t multiple constant multiply2"); 353 | 354 | test_constant >(data32,SIZE,"int32_t constant divide"); 355 | test_constant >(data32,SIZE,"int32_t multiple constant divides"); 356 | test_constant >(data32,SIZE,"int32_t multiple constant divide2"); 357 | 358 | test_constant >(data32,SIZE,"int32_t multiple constant mixed"); 359 | 360 | test_constant >(data32,SIZE,"int32_t constant and"); 361 | test_constant >(data32,SIZE,"int32_t multiple constant and"); 362 | 363 | test_constant >(data32,SIZE,"int32_t constant or"); 364 | test_constant >(data32,SIZE,"int32_t multiple constant or"); 365 | 366 | test_constant >(data32,SIZE,"int32_t constant xor"); 367 | test_constant >(data32,SIZE,"int32_t multiple constant xor"); 368 | 369 | summarize("int32_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 370 | 371 | 372 | 373 | // unsigned32 374 | ::fill(data32unsigned, data32unsigned+SIZE, uint32_t(init_value)); 375 | 376 | iterations = base_iterations; 377 | test_constant >(data32unsigned,SIZE,"uint32_t constant"); 378 | test_constant >(data32unsigned,SIZE,"uint32_t add constants"); 379 | test_constant >(data32unsigned,SIZE,"uint32_t subtract constants"); 380 | test_constant >(data32unsigned,SIZE,"uint32_t multiply constants"); 381 | test_constant >(data32unsigned,SIZE,"uint32_t divide constants"); 382 | test_constant >(data32unsigned,SIZE,"uint32_t mod constants"); 383 | test_constant >(data32unsigned,SIZE,"uint32_t equal constants"); 384 | test_constant >(data32unsigned,SIZE,"uint32_t notequal constants"); 385 | test_constant >(data32unsigned,SIZE,"uint32_t greater than constants"); 386 | test_constant >(data32unsigned,SIZE,"uint32_t less than constants"); 387 | test_constant >(data32unsigned,SIZE,"uint32_t greater than equal constants"); 388 | test_constant >(data32unsigned,SIZE,"uint32_t less than equal constants"); 389 | test_constant >(data32unsigned,SIZE,"uint32_t and constants"); 390 | test_constant >(data32unsigned,SIZE,"uint32_t or constants"); 391 | test_constant >(data32unsigned,SIZE,"uint32_t xor constants"); 392 | 393 | summarize("uint32_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 394 | 395 | 396 | iterations = base_iterations / 10; 397 | test_constant >(data32unsigned,SIZE,"uint32_t constant add"); 398 | test_constant >(data32unsigned,SIZE,"uint32_t multiple constant adds"); 399 | 400 | test_constant >(data32unsigned,SIZE,"uint32_t constant subtract"); 401 | test_constant >(data32unsigned,SIZE,"uint32_t multiple constant subtracts"); 402 | 403 | test_constant >(data32unsigned,SIZE,"uint32_t constant multiply"); 404 | test_constant >(data32unsigned,SIZE,"uint32_t multiple constant multiplies"); 405 | test_constant >(data32unsigned,SIZE,"uint32_t multiple constant multiply2"); 406 | 407 | test_constant >(data32unsigned,SIZE,"uint32_t constant divide"); 408 | test_constant >(data32unsigned,SIZE,"uint32_t multiple constant divides"); 409 | test_constant >(data32unsigned,SIZE,"uint32_t multiple constant divide2"); 410 | 411 | test_constant >(data32unsigned,SIZE,"uint32_t multiple constant mixed"); 412 | 413 | test_constant >(data32unsigned,SIZE,"uint32_t constant and"); 414 | test_constant >(data32unsigned,SIZE,"uint32_t multiple constant and"); 415 | 416 | test_constant >(data32unsigned,SIZE,"uint32_t constant or"); 417 | test_constant >(data32unsigned,SIZE,"uint32_t multiple constant or"); 418 | 419 | test_constant >(data32unsigned,SIZE,"uint32_t constant xor"); 420 | test_constant >(data32unsigned,SIZE,"uint32_t multiple constant xor"); 421 | 422 | summarize("uint32_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 423 | 424 | 425 | 426 | // int64_t 427 | ::fill(data64, data64+SIZE, int64_t(init_value)); 428 | 429 | iterations = base_iterations; 430 | test_constant >(data64,SIZE,"int64_t constant"); 431 | test_constant >(data64,SIZE,"int64_t add constants"); 432 | test_constant >(data64,SIZE,"int64_t subtract constants"); 433 | test_constant >(data64,SIZE,"int64_t multiply constants"); 434 | test_constant >(data64,SIZE,"int64_t divide constants"); 435 | test_constant >(data64,SIZE,"int64_t mod constants"); 436 | test_constant >(data64,SIZE,"int64_t equal constants"); 437 | test_constant >(data64,SIZE,"int64_t notequal constants"); 438 | test_constant >(data64,SIZE,"int64_t greater than constants"); 439 | test_constant >(data64,SIZE,"int64_t less than constants"); 440 | test_constant >(data64,SIZE,"int64_t greater than equal constants"); 441 | test_constant >(data64,SIZE,"int64_t less than equal constants"); 442 | test_constant >(data64,SIZE,"int64_t and constants"); 443 | test_constant >(data64,SIZE,"int64_t or constants"); 444 | test_constant >(data64,SIZE,"int64_t xor constants"); 445 | 446 | summarize("int64_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 447 | 448 | 449 | iterations = base_iterations / 10; 450 | test_constant >(data64,SIZE,"int64_t constant add"); 451 | test_constant >(data64,SIZE,"int64_t multiple constant adds"); 452 | 453 | test_constant >(data64,SIZE,"int64_t constant subtract"); 454 | test_constant >(data64,SIZE,"int64_t multiple constant subtracts"); 455 | 456 | test_constant >(data64,SIZE,"int64_t constant multiply"); 457 | test_constant >(data64,SIZE,"int64_t multiple constant multiplies"); 458 | test_constant >(data64,SIZE,"int64_t multiple constant multiply2"); 459 | 460 | test_constant >(data64,SIZE,"int64_t constant divide"); 461 | test_constant >(data64,SIZE,"int64_t multiple constant divides"); 462 | test_constant >(data64,SIZE,"int64_t multiple constant divide2"); 463 | 464 | test_constant >(data64,SIZE,"int64_t multiple constant mixed"); 465 | 466 | test_constant >(data64,SIZE,"int64_t constant and"); 467 | test_constant >(data64,SIZE,"int64_t multiple constant and"); 468 | 469 | test_constant >(data64,SIZE,"int64_t constant or"); 470 | test_constant >(data64,SIZE,"int64_t multiple constant or"); 471 | 472 | test_constant >(data64,SIZE,"int64_t constant xor"); 473 | test_constant >(data64,SIZE,"int64_t multiple constant xor"); 474 | 475 | summarize("int64_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 476 | 477 | 478 | // unsigned64 479 | ::fill(data64unsigned, data64unsigned+SIZE, uint64_t(init_value)); 480 | 481 | iterations = base_iterations; 482 | test_constant >(data64unsigned,SIZE,"uint64_t constant"); 483 | test_constant >(data64unsigned,SIZE,"uint64_t add constants"); 484 | test_constant >(data64unsigned,SIZE,"uint64_t subtract constants"); 485 | test_constant >(data64unsigned,SIZE,"uint64_t multiply constants"); 486 | test_constant >(data64unsigned,SIZE,"uint64_t divide constants"); 487 | test_constant >(data64unsigned,SIZE,"uint64_t mod constants"); 488 | test_constant >(data64unsigned,SIZE,"uint64_t equal constants"); 489 | test_constant >(data64unsigned,SIZE,"uint64_t notequal constants"); 490 | test_constant >(data64unsigned,SIZE,"uint64_t greater than constants"); 491 | test_constant >(data64unsigned,SIZE,"uint64_t less than constants"); 492 | test_constant >(data64unsigned,SIZE,"uint64_t greater than equal constants"); 493 | test_constant >(data64unsigned,SIZE,"uint64_t less than equal constants"); 494 | test_constant >(data64unsigned,SIZE,"uint64_t and constants"); 495 | test_constant >(data64unsigned,SIZE,"uint64_t or constants"); 496 | test_constant >(data64unsigned,SIZE,"uint64_t xor constants"); 497 | 498 | summarize("uint64_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 499 | 500 | 501 | iterations = base_iterations / 10; 502 | test_constant >(data64unsigned,SIZE,"uint64_t constant add"); 503 | test_constant >(data64unsigned,SIZE,"uint64_t multiple constant adds"); 504 | 505 | test_constant >(data64unsigned,SIZE,"uint64_t constant subtract"); 506 | test_constant >(data64unsigned,SIZE,"uint64_t multiple constant subtracts"); 507 | 508 | test_constant >(data64unsigned,SIZE,"uint64_t constant multiply"); 509 | test_constant >(data64unsigned,SIZE,"uint64_t multiple constant multiplies"); 510 | test_constant >(data64unsigned,SIZE,"uint64_t multiple constant multiply2"); 511 | 512 | test_constant >(data64unsigned,SIZE,"uint64_t constant divide"); 513 | test_constant >(data64unsigned,SIZE,"uint64_t multiple constant divides"); 514 | test_constant >(data64unsigned,SIZE,"uint64_t multiple constant divide2"); 515 | 516 | test_constant >(data64unsigned,SIZE,"uint64_t multiple constant mixed"); 517 | 518 | test_constant >(data64unsigned,SIZE,"uint64_t constant and"); 519 | test_constant >(data64unsigned,SIZE,"uint64_t multiple constant and"); 520 | 521 | test_constant >(data64unsigned,SIZE,"uint64_t constant or"); 522 | test_constant >(data64unsigned,SIZE,"uint64_t multiple constant or"); 523 | 524 | test_constant >(data64unsigned,SIZE,"uint64_t constant xor"); 525 | test_constant >(data64unsigned,SIZE,"uint64_t multiple constant xor"); 526 | 527 | summarize("uint64_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 528 | 529 | 530 | 531 | // float 532 | ::fill(dataFloat, dataFloat+SIZE, float(init_value)); 533 | 534 | iterations = base_iterations; 535 | test_constant >(dataFloat,SIZE,"float constant"); 536 | test_constant >(dataFloat,SIZE,"float add constants"); 537 | test_constant >(dataFloat,SIZE,"float subtract constants"); 538 | test_constant >(dataFloat,SIZE,"float multiply constants"); 539 | test_constant >(dataFloat,SIZE,"float divide constants"); 540 | 541 | summarize("float simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 542 | 543 | 544 | iterations = base_iterations / 10; 545 | test_constant >(dataFloat,SIZE,"float constant add"); 546 | test_constant >(dataFloat,SIZE,"float multiple constant adds"); 547 | 548 | test_constant >(dataFloat,SIZE,"float constant subtract"); 549 | test_constant >(dataFloat,SIZE,"float multiple constant subtracts"); 550 | 551 | test_constant >(dataFloat,SIZE,"float constant multiply"); 552 | test_constant >(dataFloat,SIZE,"float multiple constant multiplies"); 553 | test_constant >(dataFloat,SIZE,"float multiple constant multiply2"); 554 | 555 | test_constant >(dataFloat,SIZE,"float constant divide"); 556 | test_constant >(dataFloat,SIZE,"float multiple constant divides"); 557 | test_constant >(dataFloat,SIZE,"float multiple constant divide2"); 558 | 559 | test_constant >(dataFloat,SIZE,"float multiple constant mixed"); 560 | 561 | summarize("float constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 562 | 563 | 564 | // double 565 | ::fill(dataDouble, dataDouble+SIZE, double(init_value)); 566 | 567 | iterations = base_iterations; 568 | test_constant >(dataDouble,SIZE,"double constant"); 569 | test_constant >(dataDouble,SIZE,"double add constants"); 570 | test_constant >(dataDouble,SIZE,"double subtract constants"); 571 | test_constant >(dataDouble,SIZE,"double multiply constants"); 572 | test_constant >(dataDouble,SIZE,"double divide constants"); 573 | 574 | summarize("double simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 575 | 576 | 577 | iterations = base_iterations / 10; 578 | test_constant >(dataDouble,SIZE,"double constant add"); 579 | test_constant >(dataDouble,SIZE,"double multiple constant adds"); 580 | 581 | test_constant >(dataDouble,SIZE,"double constant subtract"); 582 | test_constant >(dataDouble,SIZE,"double multiple constant subtracts"); 583 | 584 | test_constant >(dataDouble,SIZE,"double constant multiply"); 585 | test_constant >(dataDouble,SIZE,"double multiple constant multiplies"); 586 | test_constant >(dataDouble,SIZE,"double multiple constant multiply2"); 587 | 588 | test_constant >(dataDouble,SIZE,"double constant divide"); 589 | test_constant >(dataDouble,SIZE,"double multiple constant divides"); 590 | test_constant >(dataDouble,SIZE,"double multiple constant divide2"); 591 | 592 | test_constant >(dataDouble,SIZE,"double multiple constant mixed"); 593 | 594 | summarize("double constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 595 | 596 | 597 | return 0; 598 | } 599 | 600 | // the end 601 | /******************************************************************************/ 602 | /******************************************************************************/ 603 | -------------------------------------------------------------------------------- /src/simple_types_loop_invariant.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html ) 5 | 6 | 7 | Goal: Test compiler optimizations related to simple language defined types 8 | 9 | Assumptions: 10 | 11 | 1) the compiler will move loop invariant calculations on simple types out of a loop 12 | aka: loop invariant code motion 13 | 14 | for (i = 0; i < N; ++i) temp = A + B + C + D; 15 | result = input[i] + A+B+C+D; ==> for (i = 0; i < N; ++i) 16 | result = input[i] + temp; 17 | 18 | */ 19 | 20 | /******************************************************************************/ 21 | 22 | #include "benchmark_stdint.hpp" 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "benchmark_results.h" 29 | #include "benchmark_timer.h" 30 | 31 | /******************************************************************************/ 32 | 33 | // this constant may need to be adjusted to give reasonable minimum times 34 | // For best results, times should be about 1.0 seconds for the minimum test run 35 | int iterations = 200000; 36 | 37 | 38 | // 8000 items, or between 8k and 64k of data 39 | // this is intended to remain within the L2 cache of most common CPUs 40 | const int SIZE = 8000; 41 | 42 | 43 | // initial value for filling our arrays, may be changed from the command line 44 | double init_value = 1.0; 45 | 46 | /******************************************************************************/ 47 | 48 | // our global arrays of numbers to be operated upon 49 | 50 | double dataDouble[SIZE]; 51 | float dataFloat[SIZE]; 52 | 53 | uint64_t data64unsigned[SIZE]; 54 | int64_t data64[SIZE]; 55 | 56 | uint32_t data32unsigned[SIZE]; 57 | int32_t data32[SIZE]; 58 | 59 | uint16_t data16unsigned[SIZE]; 60 | int16_t data16[SIZE]; 61 | 62 | uint8_t data8unsigned[SIZE]; 63 | int8_t data8[SIZE]; 64 | 65 | /******************************************************************************/ 66 | 67 | #include "benchmark_shared_tests.h" 68 | 69 | /******************************************************************************/ 70 | 71 | // v1 is constant in the function, so we can move the addition or subtraction of it outside the loop entirely 72 | // converting it to a multiply and a summation of the input array 73 | // Note that this is always legal for integers 74 | // it can only be applied to floating point if using inexact math (relaxed IEEE rules) 75 | template 76 | void test_hoisted_variable1(T* first, int count, T v1, const char *label) { 77 | int i; 78 | 79 | start_timer(); 80 | 81 | for(i = 0; i < iterations; ++i) { 82 | T result = 0; 83 | for (int n = 0; n < count; ++n) { 84 | result += first[n]; 85 | } 86 | result += count * v1; 87 | check_shifted_variable_sum(result, v1); 88 | } 89 | 90 | record_result( timer(), label ); 91 | } 92 | 93 | /******************************************************************************/ 94 | 95 | 96 | int main(int argc, char** argv) { 97 | double temp = 1.0; 98 | 99 | // output command for documentation: 100 | int i; 101 | for (i = 0; i < argc; ++i) 102 | printf("%s ", argv[i] ); 103 | printf("\n"); 104 | 105 | if (argc > 1) iterations = atoi(argv[1]); 106 | if (argc > 2) init_value = (double) atof(argv[2]); 107 | if (argc > 3) temp = (double)atof(argv[3]); 108 | 109 | 110 | // int8_t 111 | ::fill(data8, data8+SIZE, int8_t(init_value)); 112 | int8_t var1int8_1, var1int8_2, var1int8_3, var1int8_4; 113 | var1int8_1 = int8_t(temp); 114 | var1int8_2 = var1int8_1 * int8_t(2); 115 | var1int8_3 = var1int8_1 + int8_t(2); 116 | var1int8_4 = var1int8_1 + var1int8_2 / var1int8_3; 117 | 118 | // test moving redundant calcs out of loop 119 | test_variable1< int8_t, custom_add_variable > (data8, SIZE, var1int8_1, "int8_t variable add"); 120 | test_hoisted_variable1< int8_t, custom_add_variable > (data8, SIZE, var1int8_1, "int8_t variable add hoisted"); 121 | test_variable4< int8_t, custom_add_multiple_variable > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable adds"); 122 | 123 | test_variable1< int8_t, custom_sub_variable > (data8, SIZE, var1int8_1, "int8_t variable subtract"); 124 | test_variable4< int8_t, custom_sub_multiple_variable > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable subtracts"); 125 | 126 | test_variable1< int8_t, custom_multiply_variable > (data8, SIZE, var1int8_1, "int8_t variable multiply"); 127 | test_variable4< int8_t, custom_multiply_multiple_variable > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable multiplies"); 128 | test_variable4< int8_t, custom_multiply_multiple_variable2 > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable multiplies2"); 129 | 130 | test_variable1< int8_t, custom_divide_variable > (data8, SIZE, var1int8_1, "int8_t variable divide"); 131 | test_variable4< int8_t, custom_divide_multiple_variable > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable divides"); 132 | test_variable4< int8_t, custom_divide_multiple_variable2 > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable divides2"); 133 | 134 | test_variable4< int8_t, custom_mixed_multiple_variable > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable mixed"); 135 | 136 | test_variable1< int8_t, custom_variable_and > (data8, SIZE, var1int8_1, "int8_t variable and"); 137 | test_variable4< int8_t, custom_multiple_variable_and > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable and"); 138 | 139 | test_variable1< int8_t, custom_variable_or > (data8, SIZE, var1int8_1, "int8_t variable or"); 140 | test_variable4< int8_t, custom_multiple_variable_or > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable or"); 141 | 142 | test_variable1< int8_t, custom_variable_xor > (data8, SIZE, var1int8_1, "int8_t variable xor"); 143 | test_variable4< int8_t, custom_multiple_variable_xor > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable xor"); 144 | 145 | summarize("int8_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 146 | 147 | 148 | // unsigned8 149 | ::fill(data8unsigned, data8unsigned+SIZE, uint8_t(init_value)); 150 | uint8_t var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4; 151 | var1uint8_1 = uint8_t(temp); 152 | var1uint8_2 = var1uint8_1 * uint8_t(2); 153 | var1uint8_3 = var1uint8_1 + uint8_t(2); 154 | var1uint8_4 = var1uint8_1 + var1uint8_2 / var1uint8_3; 155 | 156 | // test moving redundant calcs out of loop 157 | test_variable1< uint8_t, custom_add_variable > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable add"); 158 | test_hoisted_variable1< uint8_t, custom_add_variable > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable add hoisted"); 159 | test_variable4< uint8_t, custom_add_multiple_variable > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable adds"); 160 | 161 | test_variable1< uint8_t, custom_sub_variable > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable subtract"); 162 | test_variable4< uint8_t, custom_sub_multiple_variable > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable subtracts"); 163 | 164 | test_variable1< uint8_t, custom_multiply_variable > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable multiply"); 165 | test_variable4< uint8_t, custom_multiply_multiple_variable > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable multiplies"); 166 | test_variable4< uint8_t, custom_multiply_multiple_variable2 > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable multiplies2"); 167 | 168 | test_variable1< uint8_t, custom_divide_variable > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable divide"); 169 | test_variable4< uint8_t, custom_divide_multiple_variable > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable divides"); 170 | test_variable4< uint8_t, custom_divide_multiple_variable2 > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable divides2"); 171 | 172 | test_variable4< uint8_t, custom_mixed_multiple_variable > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable mixed"); 173 | 174 | test_variable1< uint8_t, custom_variable_and > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable and"); 175 | test_variable4< uint8_t, custom_multiple_variable_and > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable and"); 176 | 177 | test_variable1< uint8_t, custom_variable_or > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable or"); 178 | test_variable4< uint8_t, custom_multiple_variable_or > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable or"); 179 | 180 | test_variable1< uint8_t, custom_variable_xor > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable xor"); 181 | test_variable4< uint8_t, custom_multiple_variable_xor > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable xor"); 182 | 183 | summarize("uint8_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 184 | 185 | 186 | // int16_t 187 | ::fill(data16, data16+SIZE, int16_t(init_value)); 188 | int16_t var1int16_1, var1int16_2, var1int16_3, var1int16_4; 189 | var1int16_1 = int16_t(temp); 190 | var1int16_2 = var1int16_1 * int16_t(2); 191 | var1int16_3 = var1int16_1 + int16_t(2); 192 | var1int16_4 = var1int16_1 + var1int16_2 / var1int16_3; 193 | 194 | // test moving redundant calcs out of loop 195 | test_variable1< int16_t, custom_add_variable > (data16, SIZE, var1int16_1, "int16_t variable add"); 196 | test_hoisted_variable1< int16_t, custom_add_variable > (data16, SIZE, var1int16_1, "int16_t variable add hoisted"); 197 | test_variable4< int16_t, custom_add_multiple_variable > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable adds"); 198 | 199 | test_variable1< int16_t, custom_sub_variable > (data16, SIZE, var1int16_1, "int16_t variable subtract"); 200 | test_variable4< int16_t, custom_sub_multiple_variable > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable subtracts"); 201 | 202 | test_variable1< int16_t, custom_multiply_variable > (data16, SIZE, var1int16_1, "int16_t variable multiply"); 203 | test_variable4< int16_t, custom_multiply_multiple_variable > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable multiplies"); 204 | test_variable4< int16_t, custom_multiply_multiple_variable2 > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable multiplies2"); 205 | 206 | test_variable1< int16_t, custom_divide_variable > (data16, SIZE, var1int16_1, "int16_t variable divide"); 207 | test_variable4< int16_t, custom_divide_multiple_variable > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable divides"); 208 | test_variable4< int16_t, custom_divide_multiple_variable2 > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable divides2"); 209 | 210 | test_variable4< int16_t, custom_mixed_multiple_variable > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable mixed"); 211 | 212 | test_variable1< int16_t, custom_variable_and > (data16, SIZE, var1int16_1, "int16_t variable and"); 213 | test_variable4< int16_t, custom_multiple_variable_and > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable and"); 214 | 215 | test_variable1< int16_t, custom_variable_or > (data16, SIZE, var1int16_1, "int16_t variable or"); 216 | test_variable4< int16_t, custom_multiple_variable_or > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable or"); 217 | 218 | test_variable1< int16_t, custom_variable_xor > (data16, SIZE, var1int16_1, "int16_t variable xor"); 219 | test_variable4< int16_t, custom_multiple_variable_xor > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable xor"); 220 | 221 | summarize("int16_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 222 | 223 | 224 | // unsigned16 225 | ::fill(data16unsigned, data16unsigned+SIZE, uint16_t(init_value)); 226 | uint16_t var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4; 227 | var1uint16_1 = uint16_t(temp); 228 | var1uint16_2 = var1uint16_1 * uint16_t(2); 229 | var1uint16_3 = var1uint16_1 + uint16_t(2); 230 | var1uint16_4 = var1uint16_1 + var1uint16_2 / var1uint16_3; 231 | 232 | // test moving redundant calcs out of loop 233 | test_variable1< uint16_t, custom_add_variable > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable add"); 234 | test_hoisted_variable1< uint16_t, custom_add_variable > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable add hoisted"); 235 | test_variable4< uint16_t, custom_add_multiple_variable > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable adds"); 236 | 237 | test_variable1< uint16_t, custom_sub_variable > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable subtract"); 238 | test_variable4< uint16_t, custom_sub_multiple_variable > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable subtracts"); 239 | 240 | test_variable1< uint16_t, custom_multiply_variable > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable multiply"); 241 | test_variable4< uint16_t, custom_multiply_multiple_variable > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable multiplies"); 242 | test_variable4< uint16_t, custom_multiply_multiple_variable2 > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable multiplies2"); 243 | 244 | test_variable1< uint16_t, custom_divide_variable > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable divide"); 245 | test_variable4< uint16_t, custom_divide_multiple_variable > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable divides"); 246 | test_variable4< uint16_t, custom_divide_multiple_variable2 > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable divides2"); 247 | 248 | test_variable4< uint16_t, custom_mixed_multiple_variable > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable mixed"); 249 | 250 | test_variable1< uint16_t, custom_variable_and > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable and"); 251 | test_variable4< uint16_t, custom_multiple_variable_and > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable and"); 252 | 253 | test_variable1< uint16_t, custom_variable_or > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable or"); 254 | test_variable4< uint16_t, custom_multiple_variable_or > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable or"); 255 | 256 | test_variable1< uint16_t, custom_variable_xor > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable xor"); 257 | test_variable4< uint16_t, custom_multiple_variable_xor > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable xor"); 258 | 259 | summarize("uint16_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 260 | 261 | 262 | // int32_t 263 | ::fill(data32, data32+SIZE, int32_t(init_value)); 264 | int32_t var1int32_1, var1int32_2, var1int32_3, var1int32_4; 265 | var1int32_1 = int32_t(temp); 266 | var1int32_2 = var1int32_1 * int32_t(2); 267 | var1int32_3 = var1int32_1 + int32_t(2); 268 | var1int32_4 = var1int32_1 + var1int32_2 / var1int32_3; 269 | 270 | // test moving redundant calcs out of loop 271 | test_variable1< int32_t, custom_add_variable > (data32, SIZE, var1int32_1, "int32_t variable add"); 272 | test_hoisted_variable1< int32_t, custom_add_variable > (data32, SIZE, var1int32_1, "int32_t variable add hoisted"); 273 | test_variable4< int32_t, custom_add_multiple_variable > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable adds"); 274 | 275 | test_variable1< int32_t, custom_sub_variable > (data32, SIZE, var1int32_1, "int32_t variable subtract"); 276 | test_variable4< int32_t, custom_sub_multiple_variable > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable subtracts"); 277 | 278 | test_variable1< int32_t, custom_multiply_variable > (data32, SIZE, var1int32_1, "int32_t variable multiply"); 279 | test_variable4< int32_t, custom_multiply_multiple_variable > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable multiplies"); 280 | test_variable4< int32_t, custom_multiply_multiple_variable2 > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable multiplies2"); 281 | 282 | test_variable1< int32_t, custom_divide_variable > (data32, SIZE, var1int32_1, "int32_t variable divide"); 283 | test_variable4< int32_t, custom_divide_multiple_variable > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable divides"); 284 | test_variable4< int32_t, custom_divide_multiple_variable2 > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable divides2"); 285 | 286 | test_variable4< int32_t, custom_mixed_multiple_variable > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable mixed"); 287 | 288 | test_variable1< int32_t, custom_variable_and > (data32, SIZE, var1int32_1, "int32_t variable and"); 289 | test_variable4< int32_t, custom_multiple_variable_and > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable and"); 290 | 291 | test_variable1< int32_t, custom_variable_or > (data32, SIZE, var1int32_1, "int32_t variable or"); 292 | test_variable4< int32_t, custom_multiple_variable_or > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable or"); 293 | 294 | test_variable1< int32_t, custom_variable_xor > (data32, SIZE, var1int32_1, "int32_t variable xor"); 295 | test_variable4< int32_t, custom_multiple_variable_xor > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable xor"); 296 | 297 | summarize("int32_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 298 | 299 | 300 | // unsigned32 301 | ::fill(data32unsigned, data32unsigned+SIZE, uint32_t(init_value)); 302 | uint32_t var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4; 303 | var1uint32_1 = uint32_t(temp); 304 | var1uint32_2 = var1uint32_1 * uint32_t(2); 305 | var1uint32_3 = var1uint32_1 + uint32_t(2); 306 | var1uint32_4 = var1uint32_1 + var1uint32_2 / var1uint32_3; 307 | 308 | // test moving redundant calcs out of loop 309 | test_variable1< uint32_t, custom_add_variable > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable add"); 310 | test_hoisted_variable1< uint32_t, custom_add_variable > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable add hoisted"); 311 | test_variable4< uint32_t, custom_add_multiple_variable > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable adds"); 312 | 313 | test_variable1< uint32_t, custom_sub_variable > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable subtract"); 314 | test_variable4< uint32_t, custom_sub_multiple_variable > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable subtracts"); 315 | 316 | test_variable1< uint32_t, custom_multiply_variable > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable multiply"); 317 | test_variable4< uint32_t, custom_multiply_multiple_variable > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable multiplies"); 318 | test_variable4< uint32_t, custom_multiply_multiple_variable2 > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable multiplies2"); 319 | 320 | test_variable1< uint32_t, custom_divide_variable > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable divide"); 321 | test_variable4< uint32_t, custom_divide_multiple_variable > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable divides"); 322 | test_variable4< uint32_t, custom_divide_multiple_variable2 > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable divides2"); 323 | 324 | test_variable4< uint32_t, custom_mixed_multiple_variable > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable mixed"); 325 | 326 | test_variable1< uint32_t, custom_variable_and > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable and"); 327 | test_variable4< uint32_t, custom_multiple_variable_and > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable and"); 328 | 329 | test_variable1< uint32_t, custom_variable_or > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable or"); 330 | test_variable4< uint32_t, custom_multiple_variable_or > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable or"); 331 | 332 | test_variable1< uint32_t, custom_variable_xor > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable xor"); 333 | test_variable4< uint32_t, custom_multiple_variable_xor > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable xor"); 334 | 335 | summarize("uint32_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 336 | 337 | 338 | // int64_t 339 | ::fill(data64, data64+SIZE, int64_t(init_value)); 340 | int64_t var1int64_1, var1int64_2, var1int64_3, var1int64_4; 341 | var1int64_1 = int64_t(temp); 342 | var1int64_2 = var1int64_1 * int64_t(2); 343 | var1int64_3 = var1int64_1 + int64_t(2); 344 | var1int64_4 = var1int64_1 + var1int64_2 / var1int64_3; 345 | 346 | // test moving redundant calcs out of loop 347 | test_variable1< int64_t, custom_add_variable > (data64, SIZE, var1int64_1, "int64_t variable add"); 348 | test_hoisted_variable1< int64_t, custom_add_variable > (data64, SIZE, var1int64_1, "int64_t variable add hoisted"); 349 | test_variable4< int64_t, custom_add_multiple_variable > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable adds"); 350 | 351 | test_variable1< int64_t, custom_sub_variable > (data64, SIZE, var1int64_1, "int64_t variable subtract"); 352 | test_variable4< int64_t, custom_sub_multiple_variable > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable subtracts"); 353 | 354 | test_variable1< int64_t, custom_multiply_variable > (data64, SIZE, var1int64_1, "int64_t variable multiply"); 355 | test_variable4< int64_t, custom_multiply_multiple_variable > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable multiplies"); 356 | test_variable4< int64_t, custom_multiply_multiple_variable2 > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable multiplies2"); 357 | 358 | test_variable1< int64_t, custom_divide_variable > (data64, SIZE, var1int64_1, "int64_t variable divide"); 359 | test_variable4< int64_t, custom_divide_multiple_variable > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable divides"); 360 | test_variable4< int64_t, custom_divide_multiple_variable2 > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable divides2"); 361 | 362 | test_variable4< int64_t, custom_mixed_multiple_variable > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable mixed"); 363 | 364 | test_variable1< int64_t, custom_variable_and > (data64, SIZE, var1int64_1, "int64_t variable and"); 365 | test_variable4< int64_t, custom_multiple_variable_and > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable and"); 366 | 367 | test_variable1< int64_t, custom_variable_or > (data64, SIZE, var1int64_1, "int64_t variable or"); 368 | test_variable4< int64_t, custom_multiple_variable_or > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable or"); 369 | 370 | test_variable1< int64_t, custom_variable_xor > (data64, SIZE, var1int64_1, "int64_t variable xor"); 371 | test_variable4< int64_t, custom_multiple_variable_xor > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable xor"); 372 | 373 | summarize("int64_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 374 | 375 | 376 | // unsigned64 377 | ::fill(data64unsigned, data64unsigned+SIZE, uint64_t(init_value)); 378 | uint64_t var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4; 379 | var1uint64_1 = uint64_t(temp); 380 | var1uint64_2 = var1uint64_1 * uint64_t(2); 381 | var1uint64_3 = var1uint64_1 + uint64_t(2); 382 | var1uint64_4 = var1uint64_1 + var1uint64_2 / var1uint64_3; 383 | 384 | // test moving redundant calcs out of loop 385 | test_variable1< uint64_t, custom_add_variable > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable add"); 386 | test_hoisted_variable1< uint64_t, custom_add_variable > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable add hoisted"); 387 | test_variable4< uint64_t, custom_add_multiple_variable > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable adds"); 388 | 389 | test_variable1< uint64_t, custom_sub_variable > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable subtract"); 390 | test_variable4< uint64_t, custom_sub_multiple_variable > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable subtracts"); 391 | 392 | test_variable1< uint64_t, custom_multiply_variable > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable multiply"); 393 | test_variable4< uint64_t, custom_multiply_multiple_variable > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable multiplies"); 394 | test_variable4< uint64_t, custom_multiply_multiple_variable2 > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable multiplies2"); 395 | 396 | test_variable1< uint64_t, custom_divide_variable > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable divide"); 397 | test_variable4< uint64_t, custom_divide_multiple_variable > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable divides"); 398 | test_variable4< uint64_t, custom_divide_multiple_variable2 > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable divides2"); 399 | 400 | test_variable4< uint64_t, custom_mixed_multiple_variable > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable mixed"); 401 | 402 | test_variable1< uint64_t, custom_variable_and > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable and"); 403 | test_variable4< uint64_t, custom_multiple_variable_and > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable and"); 404 | 405 | test_variable1< uint64_t, custom_variable_or > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable or"); 406 | test_variable4< uint64_t, custom_multiple_variable_or > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable or"); 407 | 408 | test_variable1< uint64_t, custom_variable_xor > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable xor"); 409 | test_variable4< uint64_t, custom_multiple_variable_xor > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable xor"); 410 | 411 | summarize("uint64_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 412 | 413 | 414 | 415 | // float 416 | ::fill(dataFloat, dataFloat+SIZE, float(init_value)); 417 | float var1Float_1, var1Float_2, var1Float_3, var1Float_4; 418 | var1Float_1 = float(temp); 419 | var1Float_2 = var1Float_1 * float(2.0); 420 | var1Float_3 = var1Float_1 + float(2.0); 421 | var1Float_4 = var1Float_1 + var1Float_2 / var1Float_3; 422 | 423 | // test moving redundant calcs out of loop 424 | test_variable1< float, custom_add_variable > (dataFloat, SIZE, var1Float_1, "float variable add"); 425 | test_hoisted_variable1< float, custom_add_variable > (dataFloat, SIZE, var1Float_1, "float variable add hoisted"); 426 | test_variable4< float, custom_add_multiple_variable > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable adds"); 427 | 428 | test_variable1< float, custom_sub_variable > (dataFloat, SIZE, var1Float_1, "float variable subtract"); 429 | test_variable4< float, custom_sub_multiple_variable > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable subtracts"); 430 | 431 | test_variable1< float, custom_multiply_variable > (dataFloat, SIZE, var1Float_1, "float variable multiply"); 432 | test_variable4< float, custom_multiply_multiple_variable > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable multiplies"); 433 | test_variable4< float, custom_multiply_multiple_variable2 > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable multiplies2"); 434 | 435 | test_variable1< float, custom_divide_variable > (dataFloat, SIZE, var1Float_1, "float variable divide"); 436 | test_variable4< float, custom_divide_multiple_variable > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable divides"); 437 | test_variable4< float, custom_divide_multiple_variable2 > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable divides2"); 438 | 439 | test_variable4< float, custom_mixed_multiple_variable > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable mixed"); 440 | 441 | summarize("float loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 442 | 443 | 444 | // double 445 | ::fill(dataDouble, dataDouble+SIZE, double(init_value)); 446 | double var1Double_1, var1Double_2, var1Double_3, var1Double_4; 447 | var1Double_1 = double(temp); 448 | var1Double_2 = var1Double_1 * double(2.0); 449 | var1Double_3 = var1Double_1 + double(2.0); 450 | var1Double_4 = var1Double_1 + var1Double_2 / var1Double_3; 451 | 452 | // test moving redundant calcs out of loop 453 | test_variable1< double, custom_add_variable > (dataDouble, SIZE, var1Double_1, "double variable add"); 454 | test_hoisted_variable1< double, custom_add_variable > (dataDouble, SIZE, var1Double_1, "double variable add hoisted"); 455 | test_variable4< double, custom_add_multiple_variable > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable adds"); 456 | 457 | test_variable1< double, custom_sub_variable > (dataDouble, SIZE, var1Double_1, "double variable subtract"); 458 | test_variable4< double, custom_sub_multiple_variable > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable subtracts"); 459 | 460 | test_variable1< double, custom_multiply_variable > (dataDouble, SIZE, var1Double_1, "double variable multiply"); 461 | test_variable4< double, custom_multiply_multiple_variable > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable multiplies"); 462 | test_variable4< double, custom_multiply_multiple_variable2 > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable multiplies2"); 463 | 464 | test_variable1< double, custom_divide_variable > (dataDouble, SIZE, var1Double_1, "double variable divide"); 465 | test_variable4< double, custom_divide_multiple_variable > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable divides"); 466 | test_variable4< double, custom_divide_multiple_variable2 > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable divides2"); 467 | 468 | test_variable4< double, custom_mixed_multiple_variable > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable mixed"); 469 | 470 | summarize("double loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty ); 471 | 472 | 473 | return 0; 474 | } 475 | 476 | // the end 477 | /******************************************************************************/ 478 | /******************************************************************************/ 479 | -------------------------------------------------------------------------------- /src/stepanov_abstraction.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html ) 5 | 6 | 7 | Goal: examine any change in performance when adding abstraction to simple data types 8 | in other words: what happens when adding {} around a type. 9 | 10 | 11 | Assumptions: 12 | 13 | 1) A value wrapped in a struct or class should not perform worse than a raw value 14 | 15 | 2) A value recursively wrapped in a struct or class should not perform worse than the raw value 16 | 17 | 18 | History: 19 | Alex Stepanov created the abstraction penalty benchmark. 20 | Recently, Alex suggested that I take ownership of his benchmark and extend it. 21 | 22 | The original accumulation tests used to show large penalties for using abstraction, 23 | but compilers have improved. I have added three sorting tests with non-trivial 24 | value and pointer usage that show some compilers still have more 25 | opportunities for optimization. 26 | 27 | Chris Cox 28 | February 2008 29 | 30 | */ 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include "benchmark_results.h" 38 | #include "benchmark_timer.h" 39 | #include "benchmark_algorithms.h" 40 | 41 | /******************************************************************************/ 42 | 43 | // a value wrapped in a struct, recursively 44 | 45 | template 46 | struct ValueWrapper { 47 | T value; 48 | ValueWrapper() {} 49 | template 50 | inline operator TT () const { return (TT)value; } 51 | template 52 | ValueWrapper(const TT& x) : value(x) {} 53 | T& operator*() const { return *value; } 54 | }; 55 | 56 | template 57 | inline ValueWrapper operator+(const ValueWrapper& x, const ValueWrapper& y) { 58 | return ValueWrapper(x.value + y.value); 59 | } 60 | 61 | template 62 | inline bool operator<(const ValueWrapper& x, const ValueWrapper& y) { 63 | return (x.value < y.value); 64 | } 65 | 66 | /******************************************************************************/ 67 | 68 | typedef ValueWrapper DoubleValueWrapper; 69 | typedef ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper > > > > > > > > > DoubleValueWrapper10; 70 | 71 | /******************************************************************************/ 72 | 73 | // a pointer wrapped in a struct, aka an iterator 74 | 75 | template 76 | struct PointerWrapper { 77 | T* current; 78 | PointerWrapper() {} 79 | PointerWrapper(T* x) : current(x) {} 80 | T& operator*() const { return *current; } 81 | }; 82 | 83 | // really a distance between pointers, which must return ptrdiff_t 84 | // because (ptr - ptr) --> ptrdiff_t 85 | template 86 | inline ptrdiff_t operator-(PointerWrapper& xx, PointerWrapper& yy) { 87 | return (ptrdiff_t)( xx.current - yy.current ); 88 | } 89 | 90 | template 91 | inline PointerWrapper& operator++(PointerWrapper &xx) { 92 | ++xx.current; 93 | return xx; 94 | } 95 | 96 | template 97 | inline PointerWrapper& operator--(PointerWrapper &xx) { 98 | --xx.current; 99 | return xx; 100 | } 101 | 102 | template 103 | inline PointerWrapper operator++(PointerWrapper &xx, int) { 104 | PointerWrapper tmp = xx; 105 | ++xx; 106 | return tmp; 107 | } 108 | 109 | template 110 | inline PointerWrapper operator--(PointerWrapper &xx, int) { 111 | PointerWrapper tmp = xx; 112 | --xx; 113 | return tmp; 114 | } 115 | 116 | template 117 | inline PointerWrapper operator-(PointerWrapper &xx, ptrdiff_t inc) { 118 | PointerWrapper tmp = xx; 119 | tmp.current -= inc; 120 | return tmp; 121 | } 122 | 123 | template 124 | inline PointerWrapper operator+(PointerWrapper &xx, ptrdiff_t inc) { 125 | PointerWrapper tmp = xx; 126 | tmp.current += inc; 127 | return tmp; 128 | } 129 | 130 | template 131 | inline PointerWrapper& operator+=(PointerWrapper &xx, ptrdiff_t inc) { 132 | xx.current += inc; 133 | return xx; 134 | } 135 | 136 | template 137 | inline PointerWrapper& operator-=(PointerWrapper &xx, ptrdiff_t inc) { 138 | xx.current -= inc; 139 | return xx; 140 | } 141 | 142 | template 143 | inline bool operator<(const PointerWrapper& x, const PointerWrapper& y) { 144 | return (x.current < y.current); 145 | } 146 | 147 | template 148 | inline bool operator==(const PointerWrapper& x, const PointerWrapper& y) { 149 | return (x.current == y.current); 150 | } 151 | 152 | template 153 | inline bool operator!=(const PointerWrapper& x, const PointerWrapper& y) { 154 | return (x.current != y.current); 155 | } 156 | 157 | /******************************************************************************/ 158 | 159 | typedef PointerWrapper double_pointer; 160 | typedef PointerWrapper doubleValueWrapper_pointer; 161 | typedef PointerWrapper doubleValueWrapper10_pointer; 162 | 163 | /******************************************************************************/ 164 | /******************************************************************************/ 165 | 166 | // this constant may need to be adjusted to give reasonable minimum times 167 | // For best results, times should be about 1.0 seconds for the minimum test run 168 | int iterations = 2000000; 169 | 170 | // 2000 items, or about 16k of data 171 | // this is intended to remain within the L2 cache of most common CPUs 172 | const int SIZE = 2000; 173 | 174 | // initial value for filling our arrays, may be changed from the command line 175 | double init_value = 3.0; 176 | 177 | /******************************************************************************/ 178 | /******************************************************************************/ 179 | 180 | inline void check_sum(double result) { 181 | if (result != SIZE * init_value) printf("test %i failed\n", current_test); 182 | } 183 | 184 | /******************************************************************************/ 185 | 186 | template 187 | void verify_sorted(Iterator first, Iterator last) { 188 | if (!is_sorted(first,last)) 189 | printf("sort test %i failed\n", current_test); 190 | } 191 | 192 | /******************************************************************************/ 193 | /******************************************************************************/ 194 | 195 | template 196 | void test_accumulate(Iterator first, Iterator last, T zero, const char *label) { 197 | int i; 198 | 199 | start_timer(); 200 | 201 | for(i = 0; i < iterations; ++i) 202 | check_sum( double( accumulate(first, last, zero) ) ); 203 | 204 | record_result( timer(), label ); 205 | } 206 | 207 | /******************************************************************************/ 208 | 209 | template 210 | void test_insertion_sort(Iterator firstSource, Iterator lastSource, Iterator firstDest, 211 | Iterator lastDest, T zero, const char *label) { 212 | int i; 213 | 214 | start_timer(); 215 | 216 | for(i = 0; i < iterations; ++i) { 217 | ::copy(firstSource, lastSource, firstDest); 218 | insertionSort< Iterator, T>( firstDest, lastDest ); 219 | verify_sorted( firstDest, lastDest ); 220 | } 221 | 222 | record_result( timer(), label ); 223 | } 224 | 225 | /******************************************************************************/ 226 | 227 | template 228 | void test_quicksort(Iterator firstSource, Iterator lastSource, Iterator firstDest, 229 | Iterator lastDest, T zero, const char *label) { 230 | int i; 231 | 232 | start_timer(); 233 | 234 | for(i = 0; i < iterations; ++i) { 235 | ::copy(firstSource, lastSource, firstDest); 236 | quicksort< Iterator, T>( firstDest, lastDest ); 237 | verify_sorted( firstDest, lastDest ); 238 | } 239 | 240 | record_result( timer(), label ); 241 | } 242 | 243 | /******************************************************************************/ 244 | 245 | template 246 | void test_heap_sort(Iterator firstSource, Iterator lastSource, Iterator firstDest, 247 | Iterator lastDest, T zero, const char *label) { 248 | int i; 249 | 250 | start_timer(); 251 | 252 | for(i = 0; i < iterations; ++i) { 253 | ::copy(firstSource, lastSource, firstDest); 254 | heapsort< Iterator, T>( firstDest, lastDest ); 255 | verify_sorted( firstDest, lastDest ); 256 | } 257 | 258 | record_result( timer(), label ); 259 | } 260 | 261 | /******************************************************************************/ 262 | /******************************************************************************/ 263 | 264 | // our global arrays of numbers to be summed 265 | 266 | double data[SIZE]; 267 | DoubleValueWrapper VData[SIZE]; 268 | DoubleValueWrapper10 V10Data[SIZE]; 269 | 270 | double dataMaster[SIZE]; 271 | DoubleValueWrapper VDataMaster[SIZE]; 272 | DoubleValueWrapper10 V10DataMaster[SIZE]; 273 | 274 | /******************************************************************************/ 275 | 276 | // declaration of our iterator types and begin/end pairs 277 | typedef double* dp; 278 | dp dpb = data; 279 | dp dpe = data + SIZE; 280 | dp dMpb = dataMaster; 281 | dp dMpe = dataMaster + SIZE; 282 | 283 | typedef DoubleValueWrapper* DVp; 284 | DVp DVpb = VData; 285 | DVp DVpe = VData + SIZE; 286 | DVp DVMpb = VDataMaster; 287 | DVp DVMpe = VDataMaster + SIZE; 288 | 289 | typedef DoubleValueWrapper10* DV10p; 290 | DV10p DV10pb = V10Data; 291 | DV10p DV10pe = V10Data + SIZE; 292 | DV10p DV10Mpb = V10DataMaster; 293 | DV10p DV10Mpe = V10DataMaster + SIZE; 294 | 295 | typedef double_pointer dP; 296 | dP dPb(dpb); 297 | dP dPe(dpe); 298 | dP dMPb(dMpb); 299 | dP dMPe(dMpe); 300 | 301 | typedef doubleValueWrapper_pointer DVP; 302 | DVP DVPb(DVpb); 303 | DVP DVPe(DVpe); 304 | DVP DVMPb(DVMpb); 305 | DVP DVMPe(DVMpe); 306 | 307 | typedef doubleValueWrapper10_pointer DV10P; 308 | DV10P DV10Pb(DV10pb); 309 | DV10P DV10Pe(DV10pe); 310 | DV10P DV10MPb(DV10Mpb); 311 | DV10P DV10MPe(DV10Mpe); 312 | 313 | /******************************************************************************/ 314 | /******************************************************************************/ 315 | 316 | int main(int argc, char** argv) { 317 | 318 | double dZero = 0.0; 319 | DoubleValueWrapper DVZero = 0.0; 320 | DoubleValueWrapper10 DV10Zero = DoubleValueWrapper10(0.0); 321 | 322 | // output command for documentation: 323 | int i; 324 | for (i = 0; i < argc; ++i) 325 | printf("%s ", argv[i] ); 326 | printf("\n"); 327 | 328 | if (argc > 1) iterations = atoi(argv[1]); 329 | if (argc > 2) init_value = (double) atof(argv[2]); 330 | 331 | // seed the random number generator so we get repeatable results 332 | srand( (int)init_value + 123 ); 333 | 334 | 335 | fill(dpb, dpe, double(init_value)); 336 | fill(DVpb, DVpe, DoubleValueWrapper(init_value)); 337 | fill(DV10pb, DV10pe, DoubleValueWrapper10(init_value)); 338 | 339 | test_accumulate(dpb, dpe, dZero, "double pointer"); 340 | test_accumulate(dPb, dPe, dZero, "double pointer_class"); 341 | test_accumulate(DVpb, DVpe, DVZero, "DoubleValueWrapper pointer"); 342 | test_accumulate(DVPb, DVPe, DVZero, "DoubleValueWrapper pointer_class"); 343 | test_accumulate(DV10pb, DV10pe, DV10Zero, "DoubleValueWrapper10 pointer"); 344 | test_accumulate(DV10Pb, DV10Pe, DV10Zero, "DoubleValueWrapper10 pointer_class"); 345 | 346 | summarize("Abstraction Accumulate", SIZE, iterations, kShowGMeans, kShowPenalty ); 347 | 348 | 349 | // the sorting tests are much slower than the accumulation tests - O(N^2) 350 | iterations = iterations / 2000; 351 | 352 | // fill one set of random numbers 353 | fill_random( dMpb, dMpe ); 354 | // copy to the other sets, so we have the same numbers 355 | ::copy( dMpb, dMpe, DVMpb ); 356 | ::copy( dMpb, dMpe, DV10Mpb ); 357 | 358 | test_insertion_sort(dMpb, dMpe, dpb, dpe, dZero, "insertion_sort double pointer"); 359 | test_insertion_sort(dMPb, dMPe, dPb, dPe, dZero, "insertion_sort double pointer_class"); 360 | test_insertion_sort(DVMpb, DVMpe, DVpb, DVpe, DVZero, "insertion_sort DoubleValueWrapper pointer"); 361 | test_insertion_sort(DVMPb, DVMPe, DVPb, DVPe, DVZero, "insertion_sort DoubleValueWrapper pointer_class"); 362 | test_insertion_sort(DV10Mpb, DV10Mpe, DV10pb, DV10pe, DV10Zero, "insertion_sort DoubleValueWrapper10 pointer"); 363 | test_insertion_sort(DV10MPb, DV10MPe, DV10Pb, DV10Pe, DV10Zero, "insertion_sort DoubleValueWrapper10 pointer_class"); 364 | 365 | summarize("Abstraction Insertion Sort", SIZE, iterations, kShowGMeans, kShowPenalty ); 366 | 367 | 368 | // these are slightly faster - O(NLog2(N)) 369 | iterations = iterations * 8; 370 | 371 | test_quicksort(dMpb, dMpe, dpb, dpe, dZero, "quicksort double pointer"); 372 | test_quicksort(dMPb, dMPe, dPb, dPe, dZero, "quicksort double pointer_class"); 373 | test_quicksort(DVMpb, DVMpe, DVpb, DVpe, DVZero, "quicksort DoubleValueWrapper pointer"); 374 | test_quicksort(DVMPb, DVMPe, DVPb, DVPe, DVZero, "quicksort DoubleValueWrapper pointer_class"); 375 | test_quicksort(DV10Mpb, DV10Mpe, DV10pb, DV10pe, DV10Zero, "quicksort DoubleValueWrapper10 pointer"); 376 | test_quicksort(DV10MPb, DV10MPe, DV10Pb, DV10Pe, DV10Zero, "quicksort DoubleValueWrapper10 pointer_class"); 377 | 378 | summarize("Abstraction Quicksort", SIZE, iterations, kShowGMeans, kShowPenalty ); 379 | 380 | 381 | test_heap_sort(dMpb, dMpe, dpb, dpe, dZero, "heap_sort double pointer"); 382 | test_heap_sort(dMPb, dMPe, dPb, dPe, dZero, "heap_sort double pointer_class"); 383 | test_heap_sort(DVMpb, DVMpe, DVpb, DVpe, DVZero, "heap_sort DoubleValueWrapper pointer"); 384 | test_heap_sort(DVMPb, DVMPe, DVPb, DVPe, DVZero, "heap_sort DoubleValueWrapper pointer_class"); 385 | test_heap_sort(DV10Mpb, DV10Mpe, DV10pb, DV10pe, DV10Zero, "heap_sort DoubleValueWrapper10 pointer"); 386 | test_heap_sort(DV10MPb, DV10MPe, DV10Pb, DV10Pe, DV10Zero, "heap_sort DoubleValueWrapper10 pointer_class"); 387 | 388 | summarize("Abstraction Heap Sort", SIZE, iterations, kShowGMeans, kShowPenalty ); 389 | 390 | 391 | return 0; 392 | } 393 | 394 | // the end 395 | /******************************************************************************/ 396 | /******************************************************************************/ 397 | -------------------------------------------------------------------------------- /src/stepanov_vector.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2007-2008 Adobe Systems Incorporated 3 | Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt 4 | or a copy at http://stlab.adobe.com/licenses.html ) 5 | 6 | 7 | Goal: examine any change in performance when moving from pointers to vector iterators 8 | 9 | 10 | Assumptions: 11 | 1) Vector iterators should not perform worse than raw pointers. 12 | 13 | Programmers should never be tempted to write 14 | std::sort( &*vec.begin(), &*( vec.begin() + vec.size() ) ) 15 | instead of 16 | std::sort( vec.begin(), vec.end() ) 17 | 18 | HIstory: 19 | This is an extension to Alex Stepanov's original abstraction penalty benchmark 20 | to test the compiler vendor implementation of vector iterators. 21 | 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "benchmark_results.h" 31 | #include "benchmark_timer.h" 32 | #include "benchmark_algorithms.h" 33 | 34 | /******************************************************************************/ 35 | /******************************************************************************/ 36 | 37 | // this constant may need to be adjusted to give reasonable minimum times 38 | // For best results, times should be about 1.0 seconds for the minimum test run 39 | int iterations = 1500000; 40 | 41 | // 2000 items, or about 16k of data 42 | // this is intended to remain within the L2 cache of most common CPUs 43 | const int SIZE = 2000; 44 | 45 | // initial value for filling our arrays, may be changed from the command line 46 | double init_value = 3.0; 47 | 48 | /******************************************************************************/ 49 | /******************************************************************************/ 50 | 51 | inline void check_sum(double result) { 52 | if (result != SIZE * init_value) printf("test %i failed\n", current_test); 53 | } 54 | 55 | /******************************************************************************/ 56 | 57 | template 58 | void verify_sorted(Iterator first, Iterator last) { 59 | if (!is_sorted(first,last)) 60 | printf("sort test %i failed\n", current_test); 61 | } 62 | 63 | /******************************************************************************/ 64 | 65 | // a template using the accumulate template and iterators 66 | 67 | template 68 | void test_accumulate(Iterator first, Iterator last, T zero, const char *label) { 69 | int i; 70 | 71 | start_timer(); 72 | 73 | for(i = 0; i < iterations; ++i) 74 | check_sum( double( accumulate(first, last, zero) ) ); 75 | 76 | record_result( timer(), label ); 77 | } 78 | 79 | /******************************************************************************/ 80 | 81 | template 82 | void test_insertion_sort(Iterator firstSource, Iterator lastSource, Iterator firstDest, 83 | Iterator lastDest, T zero, const char *label) { 84 | int i; 85 | 86 | start_timer(); 87 | 88 | for(i = 0; i < iterations; ++i) { 89 | ::copy(firstSource, lastSource, firstDest); 90 | insertionSort< Iterator, T>( firstDest, lastDest ); 91 | verify_sorted( firstDest, lastDest ); 92 | } 93 | 94 | record_result( timer(), label ); 95 | } 96 | 97 | /******************************************************************************/ 98 | 99 | template 100 | void test_quicksort(Iterator firstSource, Iterator lastSource, Iterator firstDest, 101 | Iterator lastDest, T zero, const char *label) { 102 | int i; 103 | 104 | start_timer(); 105 | 106 | for(i = 0; i < iterations; ++i) { 107 | ::copy(firstSource, lastSource, firstDest); 108 | quicksort< Iterator, T>( firstDest, lastDest ); 109 | verify_sorted( firstDest, lastDest ); 110 | } 111 | 112 | record_result( timer(), label ); 113 | } 114 | 115 | /******************************************************************************/ 116 | 117 | template 118 | void test_heap_sort(Iterator firstSource, Iterator lastSource, Iterator firstDest, 119 | Iterator lastDest, T zero, const char *label) { 120 | int i; 121 | 122 | start_timer(); 123 | 124 | for(i = 0; i < iterations; ++i) { 125 | ::copy(firstSource, lastSource, firstDest); 126 | heapsort< Iterator, T>( firstDest, lastDest ); 127 | verify_sorted( firstDest, lastDest ); 128 | } 129 | 130 | record_result( timer(), label ); 131 | } 132 | 133 | /******************************************************************************/ 134 | /******************************************************************************/ 135 | 136 | // our global arrays of numbers to be summed 137 | 138 | double data[SIZE]; 139 | double dataMaster[SIZE]; 140 | 141 | /******************************************************************************/ 142 | 143 | // declaration of our iterator types and begin/end pairs 144 | typedef double* dp; 145 | dp dpb = data; 146 | dp dpe = data + SIZE; 147 | dp dMpb = dataMaster; 148 | dp dMpe = dataMaster + SIZE; 149 | 150 | typedef std::reverse_iterator rdp; 151 | rdp rdpb(dpe); 152 | rdp rdpe(dpb); 153 | rdp rdMpb(dMpe); 154 | rdp rdMpe(dMpb); 155 | 156 | typedef std::reverse_iterator rrdp; 157 | rrdp rrdpb(rdpe); 158 | rrdp rrdpe(rdpb); 159 | rrdp rrdMpb(rdMpe); 160 | rrdp rrdMpe(rdMpb); 161 | 162 | typedef std::vector::iterator vdp; 163 | 164 | typedef std::vector::reverse_iterator rvdp; 165 | typedef std::reverse_iterator< vdp > rtvdp; 166 | 167 | typedef std::reverse_iterator rtrvdp; 168 | typedef std::reverse_iterator< rtvdp > rtrtvdp; 169 | 170 | 171 | /******************************************************************************/ 172 | /******************************************************************************/ 173 | 174 | 175 | int main(int argc, char** argv) { 176 | 177 | double dZero = 0.0; 178 | 179 | // output command for documentation: 180 | int i; 181 | for (i = 0; i < argc; ++i) 182 | printf("%s ", argv[i] ); 183 | printf("\n"); 184 | 185 | if (argc > 1) iterations = atoi(argv[1]); 186 | if (argc > 2) init_value = (double) atof(argv[2]); 187 | 188 | // seed the random number generator so we get repeatable results 189 | srand( (int)init_value + 123 ); 190 | 191 | 192 | ::fill(dpb, dpe, double(init_value)); 193 | 194 | std::vector vec_data; 195 | vec_data.resize(SIZE); 196 | 197 | ::fill(vec_data.begin(), vec_data.end(), double(init_value)); 198 | 199 | rtvdp rtvdpb(vec_data.end()); 200 | rtvdp rtvdpe(vec_data.begin()); 201 | 202 | rtrvdp rtrvdpb(vec_data.rend()); 203 | rtrvdp rtrvdpe(vec_data.rbegin()); 204 | 205 | rtrtvdp rtrtvdpb(rtvdpe); 206 | rtrtvdp rtrtvdpe(rtvdpb); 207 | 208 | test_accumulate(dpb, dpe, dZero, "double pointer verify2"); 209 | test_accumulate(vec_data.begin(), vec_data.end(), dZero, "double vector iterator"); 210 | test_accumulate(rdpb, rdpe, dZero, "double pointer reverse"); 211 | test_accumulate(vec_data.rbegin(), vec_data.rend(), dZero, "double vector reverse_iterator"); 212 | test_accumulate(rtvdpb, rtvdpe, dZero, "double vector iterator reverse"); 213 | test_accumulate(rrdpb, rrdpe, dZero, "double pointer reverse reverse"); 214 | test_accumulate(rtrvdpb, rtrvdpe, dZero, "double vector reverse_iterator reverse"); 215 | test_accumulate(rtrtvdpb, rtrtvdpe, dZero, "double vector iterator reverse reverse"); 216 | 217 | summarize("Vector accumulate", SIZE, iterations, kShowGMeans, kShowPenalty ); 218 | 219 | 220 | 221 | // the sorting tests are much slower than the accumulation tests - O(N^2) 222 | iterations = iterations / 1000; 223 | 224 | std::vector vec_dataMaster; 225 | vec_dataMaster.resize(SIZE); 226 | 227 | // fill one set of random numbers 228 | fill_random( dMpb, dMpe ); 229 | 230 | // copy to the other sets, so we have the same numbers 231 | ::copy( dMpb, dMpe, vec_dataMaster.begin() ); 232 | 233 | rtvdp rtvdMpb(vec_dataMaster.end()); 234 | rtvdp rtvdMpe(vec_dataMaster.begin()); 235 | 236 | rtrvdp rtrvdMpb(vec_dataMaster.rend()); 237 | rtrvdp rtrvdMpe(vec_dataMaster.rbegin()); 238 | 239 | rtrtvdp rtrtvdMpb(rtvdMpe); 240 | rtrtvdp rtrtvdMpe(rtvdMpb); 241 | 242 | test_insertion_sort(dMpb, dMpe, dpb, dpe, dZero, "insertion_sort double pointer verify2"); 243 | test_insertion_sort(vec_dataMaster.begin(), vec_dataMaster.end(), vec_data.begin(), vec_data.end(), dZero, "insertion_sort double vector iterator"); 244 | test_insertion_sort(rdMpb, rdMpe, rdpb, rdpe, dZero, "insertion_sort double pointer reverse"); 245 | test_insertion_sort(vec_dataMaster.rbegin(), vec_dataMaster.rend(), vec_data.rbegin(), vec_data.rend(), dZero, "insertion_sort double vector reverse_iterator"); 246 | test_insertion_sort(rtvdMpb, rtvdMpe, rtvdpb, rtvdpe, dZero, "insertion_sort double vector iterator reverse"); 247 | test_insertion_sort(rrdMpb, rrdMpe, rrdpb, rrdpe, dZero, "insertion_sort double pointer reverse reverse"); 248 | test_insertion_sort(rtrvdMpb, rtrvdMpe, rtrvdpb, rtrvdpe, dZero, "insertion_sort double vector reverse_iterator reverse"); 249 | test_insertion_sort(rtrtvdMpb, rtrtvdMpe, rtrtvdpb, rtrtvdpe, dZero, "insertion_sort double vector iterator reverse reverse"); 250 | 251 | summarize("Vector Insertion Sort", SIZE, iterations, kShowGMeans, kShowPenalty ); 252 | 253 | 254 | // these are slightly faster - O(NLog2(N)) 255 | iterations = iterations * 8; 256 | 257 | test_quicksort(dMpb, dMpe, dpb, dpe, dZero, "quicksort double pointer verify2"); 258 | test_quicksort(vec_dataMaster.begin(), vec_dataMaster.end(), vec_data.begin(), vec_data.end(), dZero, "quicksort double vector iterator"); 259 | test_quicksort(rdMpb, rdMpe, rdpb, rdpe, dZero, "quicksort double pointer reverse"); 260 | test_quicksort(vec_dataMaster.rbegin(), vec_dataMaster.rend(), vec_data.rbegin(), vec_data.rend(), dZero, "quicksort double vector reverse_iterator"); 261 | test_quicksort(rtvdMpb, rtvdMpe, rtvdpb, rtvdpe, dZero, "quicksort double vector iterator reverse"); 262 | test_quicksort(rrdMpb, rrdMpe, rrdpb, rrdpe, dZero, "quicksort double pointer reverse reverse"); 263 | test_quicksort(rtrvdMpb, rtrvdMpe, rtrvdpb, rtrvdpe, dZero, "quicksort double vector reverse_iterator reverse"); 264 | test_quicksort(rtrtvdMpb, rtrtvdMpe, rtrtvdpb, rtrtvdpe, dZero, "quicksort double vector iterator reverse reverse"); 265 | 266 | summarize("Vector Quicksort", SIZE, iterations, kShowGMeans, kShowPenalty ); 267 | 268 | 269 | test_heap_sort(dMpb, dMpe, dpb, dpe, dZero, "heap_sort double pointer verify2"); 270 | test_heap_sort(vec_dataMaster.begin(), vec_dataMaster.end(), vec_data.begin(), vec_data.end(), dZero, "heap_sort double vector iterator"); 271 | test_heap_sort(rdMpb, rdMpe, rdpb, rdpe, dZero, "heap_sort double pointer reverse"); 272 | test_heap_sort(vec_dataMaster.rbegin(), vec_dataMaster.rend(), vec_data.rbegin(), vec_data.rend(), dZero, "heap_sort double vector reverse_iterator"); 273 | test_heap_sort(rtvdMpb, rtvdMpe, rtvdpb, rtvdpe, dZero, "heap_sort double vector iterator reverse"); 274 | test_heap_sort(rrdMpb, rrdMpe, rrdpb, rrdpe, dZero, "heap_sort double pointer reverse reverse"); 275 | test_heap_sort(rtrvdMpb, rtrvdMpe, rtrvdpb, rtrvdpe, dZero, "heap_sort double vector reverse_iterator reverse"); 276 | test_heap_sort(rtrtvdMpb, rtrtvdMpe, rtrtvdpb, rtrtvdpe, dZero, "heap_sort double vector iterator reverse reverse"); 277 | 278 | summarize("Vector Heap Sort", SIZE, iterations, kShowGMeans, kShowPenalty ); 279 | 280 | 281 | 282 | return 0; 283 | } 284 | 285 | // the end 286 | /******************************************************************************/ 287 | /******************************************************************************/ 288 | --------------------------------------------------------------------------------