├── .gitignore
├── CMakeLists.txt
├── LICENSE_1_0_0.txt
├── README.md
├── docs
    └── ORIGINAL_README.txt
├── include
    ├── benchmark_algorithms.h
    ├── benchmark_results.h
    ├── benchmark_shared_tests.h
    ├── benchmark_stdint.hpp
    └── benchmark_timer.h
└── src
    ├── functionobjects.cpp
    ├── loop_unroll.cpp
    ├── machine.cpp
    ├── simple_types_constant_folding.cpp
    ├── simple_types_loop_invariant.cpp
    ├── stepanov_abstraction.cpp
    └── stepanov_vector.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | /build/*


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.10)
 2 | project(cpp_benchmark)
 3 | 
 4 | include_directories(include)
 5 | 
 6 | add_executable(machine src/machine.cpp)
 7 | add_executable(stepanov_abstraction src/stepanov_abstraction.cpp)
 8 | add_executable(stepanov_vector src/stepanov_vector.cpp)
 9 | add_executable(loop_unroll src/loop_unroll.cpp)
10 | add_executable(simple_types_loop_invariant src/simple_types_loop_invariant.cpp)
11 | add_executable(functionobjects src/functionobjects.cpp)
12 | add_executable(simple_types_constant_folding src/simple_types_constant_folding.cpp)
13 | 
14 | enable_testing()
15 | add_test(NAME report_machine COMMAND machine)
16 | add_test(NAME report_stepanov_abstraction COMMAND stepanov_abstraction)
17 | add_test(NAME report_stepanov_vector COMMAND stepanov_vector)
18 | add_test(NAME report_loop_unroll COMMAND loop_unroll)
19 | add_test(NAME report_simple_types_loop_invariant COMMAND simple_types_loop_invariant)
20 | add_test(NAME report_functionobjects COMMAND functionobjects)
21 | add_test(NAME report_simple_types_constant_folding COMMAND simple_types_constant_folding)


--------------------------------------------------------------------------------
/LICENSE_1_0_0.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2007-2008 Adobe Systems Incorporated
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so,
 8 | subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Adobe's C++ Performance Benchmarks for modern compilers (and build systems)
 2 | 
 3 | Imported and cleaned up from http://stlab.adobe.com/performance/.
 4 | 
 5 | ### Overview
 6 | 
 7 | This is a updated version of Adobe STL Labs' C++ Performance Benchmark.
 8 | The code has mostly been left untouched, the main goal of this version is to provide a simple CMake
 9 | build so that it can be run on any modern toolchain.
10 | 
11 | The original readme that includes the creator's intent can still be found under `docs/`.
12 | 
13 | ### Build & run
14 | 
15 | Simply build it like any CMake project and run all tests through `ctest -V`.
16 | 
17 | ### Credits
18 | 
19 | Original work by Chris Cox and Adobe STL Labs: http://stlab.adobe.com/performance/credits.html.
20 | 
21 | ### License
22 | 
23 | This project is licensed under the MIT License.


--------------------------------------------------------------------------------
/docs/ORIGINAL_README.txt:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Copyright 2007-2008 Adobe Systems Incorporated
 3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
 4 |     or a copy at http://stlab.adobe.com/licenses.html )
 5 | */
 6 | 
 7 | /******************************************************************************/
 8 | 
 9 | Goals:
10 | 
11 | To help compiler vendors identify places where they may be able to improve
12 | the performance of the code they generate.
13 | 
14 | To help developers understand the performance impact of using different
15 | data types, operations, and C++ langugage features with their
16 | target compilers and OSes.
17 | 
18 | /******************************************************************************/
19 | 
20 | Secondary goals:
21 | 
22 | To take performance problems found in real world code and turn them
23 |     into benchmarks for compiler vendors and other developers to learn from.
24 | 
25 | Keep the benchmark portable to as many compilers and OSes as possible
26 |     This means keeping things simple and external dependencies minimal
27 | 
28 | Not to use specialized optimization flags per test
29 |     No pragmas or other compiler directives are allowed in the source.
30 |     All source files should use the same compilation flags.
31 |     Use the common optimization flags (-O, -O1, -O2, -O3, or -Os).
32 |     If another option improves optimization, then why isn't it on for -O3?
33 |     If an optimization flag doesn't always improve performance, that is
34 |         most likely a bug in the optimization code that needs to be fixed.
35 | 	In the real world, developers can't test all permutations of all
36 | 		optimization flags.  They expect the standard flags to work.
37 | 
38 | /******************************************************************************/
39 | 
40 | **** A note to compiler vendors:
41 |      Please match the idioms, not the instances.
42 |      The benchmark code will be changing over time.
43 |      And we do read your assembly output.
44 | 
45 | /******************************************************************************/
46 | 
47 | Building:
48 | 
49 | Unix users should be able to use "make all" to build and "make report"
50 | to generate the report. If you wish to use a different compiler, you can
51 | set that from the make command line, or edit the makefile.
52 | 
53 | Windows users will need to make sure that the VC environment variables
54 | are set for their shell (command prompt), then use "nmake -f makefile.nt all"
55 | and "nmake -f makefile.nt report" from within that shell.
56 | 
57 | 


--------------------------------------------------------------------------------
/include/benchmark_algorithms.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright 2007-2008 Adobe Systems Incorporated
  3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
  4 |     or a copy at http://stlab.adobe.com/licenses.html)
  5 |     
  6 |     Shared source file for algorithms used in multiple benchmark files
  7 | */
  8 | 
  9 | namespace benchmark {
 10 | 
 11 | /******************************************************************************/
 12 | 
 13 | template <typename Iterator>
 14 | bool is_sorted(Iterator first, Iterator last) {
 15 | 	Iterator prev = first;
 16 | 	first++;
 17 | 	while (first != last) {
 18 | 		if ( *first++ < *prev++)
 19 | 			return false;
 20 | 	}
 21 | 	return true;
 22 | }
 23 | 
 24 | /******************************************************************************/
 25 | 
 26 | template <typename Iterator, typename T>
 27 | void fill(Iterator first, Iterator last, T value) {
 28 | 	while (first != last) *first++ = value;
 29 | }
 30 | 
 31 | /******************************************************************************/
 32 | 
 33 | template <typename Iterator, typename T>
 34 | void fill_random(Iterator first, Iterator last) {
 35 | 	while (first != last) {
 36 | 		*first++ = static_cast<T>( rand() );
 37 | 	}
 38 | }
 39 | 
 40 | /******************************************************************************/
 41 | 
 42 | template <class Iterator, class T>
 43 | void fill_descending(Iterator first, Iterator last, unsigned count) {
 44 | 	while (first != last) {
 45 | 		*first++ = static_cast<T>( --count );
 46 | 	}
 47 | }
 48 | 
 49 | /******************************************************************************/
 50 | 
 51 | template <typename Iterator1, typename Iterator2>
 52 | void copy(Iterator1 firstSource, Iterator1 lastSource, Iterator2 firstDest) {
 53 | 	while (firstSource != lastSource) *(firstDest++) = *(firstSource++);
 54 | }
 55 | 
 56 | /******************************************************************************/
 57 | 
 58 | template <class Iterator, class Swapper>
 59 | void reverse(Iterator begin, Iterator end, Swapper doswap)
 60 | {
 61 | 	while (begin != end)
 62 | 	{
 63 | 		--end;
 64 | 		if (begin == end)
 65 | 			break;
 66 | 		doswap(begin, end);
 67 | 		++begin;
 68 | 	}
 69 | }
 70 | 
 71 | /******************************************************************************/
 72 | 
 73 | // our accumulator function template, using iterators or pointers
 74 | template <typename Iterator, typename Number>
 75 | Number accumulate(Iterator first, Iterator last, Number result) {
 76 | 	while (first != last) result =  result + *first++;
 77 | 	return result;
 78 | }
 79 | 
 80 | /******************************************************************************/
 81 | 
 82 | template <typename Iterator, typename T>
 83 | void insertionSort( Iterator begin, Iterator end )
 84 | {
 85 | 	Iterator p = begin;
 86 | 	p++;
 87 | 
 88 | 	while ( p != end ) {
 89 | 		T tmp = *p;
 90 | 		Iterator j = p;
 91 | 		Iterator prev = j;
 92 | 
 93 | 		for (  ; j != begin && tmp < *--prev; --j ) {
 94 | 			*j = *prev;
 95 | 		}
 96 | 
 97 | 		*j = tmp;
 98 | 		p++;
 99 | 	}
100 | }
101 | 
102 | /******************************************************************************/
103 | 
104 | template<typename Iterator, typename T>
105 | void quicksort(Iterator begin, Iterator end)
106 | {
107 | 	if ( (end - begin) > 1 ) {
108 | 
109 | 		T middleValue = *begin;
110 | 		Iterator left = begin;
111 | 		Iterator right = end;
112 | 
113 | 		for(;;) {
114 | 
115 | 			while ( middleValue < *(--right) );
116 | 			if ( !(left < right ) ) break;
117 | 			
118 | 			while ( *(left) < middleValue )
119 | 				++left;
120 | 			if ( !(left < right ) ) break;
121 | 
122 | 			// swap
123 | 			T temp = *right;
124 | 			*right = *left;
125 | 			*left = temp;
126 | 		}
127 | 
128 | 		quicksort<Iterator,T>( begin, right + 1 );
129 | 		quicksort<Iterator,T>( right + 1, end );
130 | 	}
131 | }
132 | 
133 | /******************************************************************************/
134 | 
135 | template<typename Iterator, typename T, class Swapper>
136 | void quicksort(Iterator begin, Iterator end, Swapper doswap)
137 | {
138 | 	if ( (end - begin) > 1 ) {
139 | 
140 | 		T middleValue = *begin;
141 | 		Iterator left = begin;
142 | 		Iterator right = end;
143 | 
144 | 		for(;;) {
145 | 
146 | 			while ( middleValue < *(--right) );
147 | 			if ( !(left < right ) ) break;
148 | 			
149 | 			while ( *(left) < middleValue )
150 | 				++left;
151 | 			if ( !(left < right ) ) break;
152 | 
153 | 			// swap
154 | 			doswap( right, left );
155 | 		}
156 | 
157 | 		quicksort<Iterator,T, Swapper>( begin, right + 1, doswap );
158 | 		quicksort<Iterator,T, Swapper>( right + 1, end, doswap );
159 | 	}
160 | }
161 | 
162 | /******************************************************************************/
163 | 
164 | template<typename Iterator, typename T>
165 | void sift_in(ptrdiff_t count, Iterator begin, ptrdiff_t free_in, T next)
166 | {
167 | 	ptrdiff_t i;
168 | 	ptrdiff_t free = free_in;
169 | 
170 | 	// sift up the free node 
171 | 	for ( i = 2*(free+1); i < count; i += i) {
172 | 		if ( *(begin+(i-1)) < *(begin+i))
173 | 			i++;
174 | 		*(begin + free) = *(begin+(i-1));
175 | 		free = i-1;
176 | 	}
177 | 
178 | 	// special case in sift up if the last inner node has only 1 child
179 | 	if (i == count) {
180 | 		*(begin + free) = *(begin+(i-1));
181 | 		free = i-1;
182 | 	}
183 | 
184 | 	// sift down the new item next
185 | 	i = (free-1)/2;
186 | 	while( (free > free_in)  &&  *(begin+i) < next) {
187 | 		*(begin + free) = *(begin+i);
188 | 		free = i;
189 | 		i = (free-1)/2;
190 | 	}
191 | 
192 | 	*(begin + free) = next;
193 | }
194 | 
195 | template<typename Iterator, typename T>
196 | void heapsort(Iterator begin, Iterator end)
197 | {
198 | 	ptrdiff_t  j;
199 | 	ptrdiff_t count = end - begin;
200 | 
201 | 	// build the heap structure 
202 | 	for( j = (count / 2) - 1; j >= 0; --j) {
203 | 		T  next = *(begin+j);
204 | 		sift_in< Iterator, T>(count, begin, j, next);
205 | 	}
206 | 
207 | 	// search next by next remaining extremal element
208 | 	for( j = count - 1; j >= 1; --j) {
209 | 		T next = *(begin+j);
210 | 		*(begin+j) = *(begin);
211 | 		sift_in< Iterator, T>(j, begin, 0, next);
212 | 	}
213 | }
214 | 
215 | }	// end namespace benchmark
216 | 
217 | using namespace benchmark;
218 | 
219 | /******************************************************************************/
220 | /******************************************************************************/
221 | /******************************************************************************/
222 | 


--------------------------------------------------------------------------------
/include/benchmark_results.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright 2007-2008 Adobe Systems Incorporated
  3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
  4 |     or a copy at http://stlab.adobe.com/licenses.html)
  5 |     
  6 |     Source file for shared result reporting used by most of the benchmarks
  7 | */
  8 | 
  9 | /******************************************************************************/
 10 | 
 11 | /* 
 12 |  Yes, this would be easier with a class or std::vector
 13 |   but it needs to work for both C and C++ code
 14 | */
 15 | 
 16 | /* declarations */
 17 | 
 18 | typedef struct one_result {
 19 | 	double time;
 20 | 	const char *label;
 21 |  } one_result;
 22 | 
 23 | extern one_result *results;
 24 | 
 25 | void record_result( double time, const char *label );
 26 | 
 27 | 
 28 | /******************************************************************************/
 29 | 
 30 | /* implementation */
 31 | 
 32 | #include <stdlib.h>
 33 | #include <math.h>
 34 | #include <stddef.h>
 35 | #include <stdio.h>
 36 | #include <string.h>
 37 | 
 38 | one_result *results = NULL;
 39 | int current_test = 0;
 40 | int allocated_results = 0;
 41 | 
 42 | void record_result( double time, const char *label ) {
 43 | 
 44 | 	if ( results == NULL || current_test >= allocated_results) {
 45 | 		allocated_results += 10;
 46 | 		results = (one_result *) realloc( results, allocated_results*sizeof(one_result) );
 47 | 		if (results == NULL) {
 48 | 			printf("Could not allocate %d results\n", allocated_results);
 49 | 			exit(-1);
 50 | 		}
 51 | 	}
 52 | 	
 53 | 	results[current_test].time = time;
 54 | 	results[current_test].label = label;
 55 | 	current_test++;
 56 | }
 57 | 
 58 | /******************************************************************************/
 59 | 
 60 | const int kShowGMeans = 1;
 61 | const int kDontShowGMeans = 0;
 62 | 
 63 | const int kShowPenalty = 1;
 64 | const int kDontShowPenalty = 0;
 65 | 
 66 | /******************************************************************************/
 67 | 
 68 | /*
 69 | I need to be able to parse the label and absolute time from each entry, correctly
 70 | 	BUT this also needs to be human readable for people testing/debugging the code
 71 | 	(otherwise I'd use XML and make it really easy (if somewhat slow) to parse)
 72 | 	(No, XML does not qualify as human readable)
 73 | 
 74 | parse as:
 75 | %i ([ ]*)\"%s\"  %f sec   %f M      %f\r
 76 | 
 77 | */
 78 | void summarize(const char *name, int size, int iterations, int show_gmeans, int show_penalty ) {
 79 | 	int i;
 80 | 	double millions = ((double)(size) * iterations)/1000000.0;
 81 | 	double total_absolute_times = 0.0;
 82 | 	double gmean_ratio = 0.0;
 83 | 	
 84 | 	
 85 | 	/* find longest label so we can adjust formatting
 86 | 		12 = strlen("description")+1 */
 87 | 	int longest_label_len = 12;
 88 | 	for (i = 0; i < current_test; ++i) {
 89 | 		int len = (int)strlen(results[i].label);
 90 | 		if (len > longest_label_len)
 91 | 			longest_label_len = len;
 92 | 	}
 93 | 
 94 | 	printf("\ntest %*s description   absolute   operations   ratio with\n", longest_label_len-12, " ");
 95 | 	printf("number %*s time       per second   test0\n\n", longest_label_len, " ");
 96 | 
 97 | 	for (i = 0; i < current_test; ++i)
 98 | 		printf("%2i %*s\"%s\"  %5.2f sec   %5.2f M     %.2f\n",
 99 | 				i,
100 | 				(int)(longest_label_len - strlen(results[i].label)),
101 | 				"",
102 | 				results[i].label,
103 | 				results[i].time,
104 | 				millions/results[i].time,
105 | 				results[i].time/results[0].time);
106 | 
107 | 	// calculate total time
108 | 	for (i = 0; i < current_test; ++i) {
109 | 		total_absolute_times += results[i].time;
110 | 	}
111 | 
112 | 	// report total time
113 | 	printf("\nTotal absolute time for %s: %.2f sec\n", name, total_absolute_times);
114 | 
115 | 	if ( current_test > 1 && show_penalty ) {
116 | 	
117 | 		// calculate gmean of tests compared to baseline
118 | 		for (i = 1; i < current_test; ++i) {
119 | 			gmean_ratio += log(results[i].time/results[0].time);
120 | 		}
121 | 		
122 | 		// report gmean of tests as the penalty
123 | 		printf("\n%s Penalty: %.2f\n\n", name, exp(gmean_ratio/(current_test-1)));
124 | 	}
125 | 
126 | 	// reset the test counter so we can run more tests
127 | 	current_test = 0;
128 | }
129 | 
130 | /******************************************************************************/
131 | 
132 | void summarize_simplef( FILE *output, const char *name ) {
133 | 	int i;
134 | 	double total_absolute_times = 0.0;
135 | 	
136 | 	/* find longest label so we can adjust formatting
137 | 		12 = strlen("description")+1 */
138 | 	int longest_label_len = 12;
139 | 	for (i = 0; i < current_test; ++i) {
140 | 		int len = (int)strlen(results[i].label);
141 | 		if (len > longest_label_len)
142 | 			longest_label_len = len;
143 | 	}
144 | 
145 | 	fprintf(output,"\ntest %*s description   absolute\n", longest_label_len-12, " ");
146 | 	fprintf(output,"number %*s time\n\n", longest_label_len, " ");
147 | 
148 | 	for (i = 0; i < current_test; ++i)
149 | 		fprintf(output,"%2i %*s\"%s\"  %5.2f sec\n",
150 | 				i,
151 | 				(int)(longest_label_len - strlen(results[i].label)),
152 | 				"",
153 | 				results[i].label,
154 | 				results[i].time);
155 | 
156 | 	// calculate total time
157 | 	for (i = 0; i < current_test; ++i) {
158 | 		total_absolute_times += results[i].time;
159 | 	}
160 | 
161 | 	// report total time
162 | 	fprintf(output,"\nTotal absolute time for %s: %.2f sec\n", name, total_absolute_times);
163 | 
164 | 	// reset the test counter so we can run more tests
165 | 	current_test = 0;
166 | }
167 | 
168 | /******************************************************************************/
169 | 


--------------------------------------------------------------------------------
/include/benchmark_shared_tests.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright 2007-2008 Adobe Systems Incorporated
  3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
  4 |     or a copy at http://stlab.adobe.com/licenses.html )
  5 |     
  6 |     
  7 |     Source file for tests shared among several benchmarks
  8 | */
  9 | 
 10 | /******************************************************************************/
 11 | 
 12 | template<typename T>
 13 | inline bool tolerance_equal(T &a, T &b) {
 14 | 	T diff = a - b;
 15 | 	return (abs(diff) < 1.0e-6);
 16 | }
 17 | 
 18 | 
 19 | template<>
 20 | inline bool tolerance_equal(int32_t &a, int32_t &b) {
 21 | 	return (a == b);
 22 | }
 23 | template<>
 24 | inline bool tolerance_equal(uint32_t &a, uint32_t &b) {
 25 | 	return (a == b);
 26 | }
 27 | template<>
 28 | inline bool tolerance_equal(uint64_t &a, uint64_t &b) {
 29 | 	return (a == b);
 30 | }
 31 | template<>
 32 | inline bool tolerance_equal(int64_t &a, int64_t &b) {
 33 | 	return (a == b);
 34 | }
 35 | 
 36 | template<>
 37 | inline bool tolerance_equal(double &a, double &b) {
 38 | 	double diff = a - b;
 39 | 	double reldiff = diff;
 40 | 	if (fabs(a) > 1.0e-8)
 41 | 		reldiff = diff / a;
 42 | 	return (fabs(reldiff) < 1.0e-6);
 43 | }
 44 | 
 45 | template<>
 46 | inline bool tolerance_equal(float &a, float &b) {
 47 | 	float diff = a - b;
 48 | 	double reldiff = diff;
 49 | 	if (fabs(a) > 1.0e-4)
 50 | 		reldiff = diff / a;
 51 | 	return (fabs(reldiff) < 1.0e-3);		// single precision divide test is really imprecise
 52 | }
 53 | 
 54 | /******************************************************************************/
 55 | 
 56 | template <typename T, typename Shifter>
 57 | inline void check_shifted_sum(T result) {
 58 | 	T temp = (T)SIZE * Shifter::do_shift((T)init_value);
 59 | 	if (!tolerance_equal<T>(result,temp))
 60 | 		printf("test %i failed\n", current_test);
 61 | }
 62 | 
 63 | template <typename T, typename Shifter>
 64 | inline void check_shifted_sum_CSE(T result) {
 65 | 	T temp = (T)0.0;
 66 | 	if (!tolerance_equal<T>(result,temp))
 67 | 		printf("test %i failed\n", current_test);
 68 | }
 69 | 
 70 | template <typename T, typename Shifter>
 71 | inline void check_shifted_variable_sum(T result, T var) {
 72 | 	T temp = (T)SIZE * Shifter::do_shift((T)init_value, var);
 73 | 	if (!tolerance_equal<T>(result,temp))
 74 | 		printf("test %i failed\n", current_test);
 75 | }
 76 | 
 77 | template <typename T, typename Shifter>
 78 | inline void check_shifted_variable_sum(T result, T var1, T var2, T var3, T var4) {
 79 | 	T temp = (T)SIZE * Shifter::do_shift((T)init_value, var1, var2, var3, var4);
 80 | 	if (!tolerance_equal<T>(result,temp))
 81 | 		printf("test %i failed\n", current_test);
 82 | }
 83 | 
 84 | template <typename T, typename Shifter>
 85 | inline void check_shifted_variable_sum_CSE(T result, T var) {
 86 | 	T temp = (T)0.0;
 87 | 	if (!tolerance_equal<T>(result,temp))
 88 | 		printf("test %i failed\n", current_test);
 89 | }
 90 | 
 91 | template <typename T, typename Shifter>
 92 | inline void check_shifted_variable_sum_CSE(T result, T var1, T var2, T var3, T var4) {
 93 | 	T temp = (T)0.0;
 94 | 	if (!tolerance_equal<T>(result,temp))
 95 | 		printf("test %i failed\n", current_test);
 96 | }
 97 | 
 98 | 
 99 | /******************************************************************************/
100 | 
101 | template <typename Iterator, typename T>
102 | void fill(Iterator first, Iterator last, T value) {
103 | 	while (first != last) *first++ = value;
104 | }
105 | 
106 | /******************************************************************************/
107 | 
108 | template <typename T>
109 | 	struct custom_constant_add {
110 | 	  static T do_shift(T input) { return (input + T(10)); }
111 | 	};
112 | 
113 | /******************************************************************************/
114 | 
115 | template <typename T>
116 | 	struct custom_multiple_constant_add {
117 | 	  static T do_shift(T input) { return (input + T(1) + T(2) + T(3) + T(4)); }
118 | 	};
119 | 
120 | /******************************************************************************/
121 | 
122 | template <typename T>
123 | 	struct custom_constant_sub {
124 | 	  static T do_shift(T input) { return (input - T(10)); }
125 | 	};
126 | 
127 | /******************************************************************************/
128 | 
129 | template <typename T>
130 | 	struct custom_multiple_constant_sub {
131 | 	  static T do_shift(T input) { return (input - T(1) - T(2) - T(3) - T(4)); }
132 | 	};
133 | 
134 | /******************************************************************************/
135 | 
136 | template <typename T>
137 | 	struct custom_constant_multiply {
138 | 	  static T do_shift(T input) { return (input * T(120)); }
139 | 	};
140 | 
141 | /******************************************************************************/
142 | 
143 | // this should result in a single multiply
144 | template <typename T>
145 | 	struct custom_multiple_constant_multiply {
146 | 	  static T do_shift(T input) { return (input * T(2) * T(3) * T(4) * T(5)); }
147 | 	};
148 | 
149 | /******************************************************************************/
150 | 
151 | // this should result in a single add
152 | template <typename T>
153 | 	struct custom_multiple_constant_multiply2 {
154 | 	  static T do_shift(T input) { return (input + T(2) * T(3) * T(4) * T(5)); }
155 | 	};
156 | 
157 | /******************************************************************************/
158 | 
159 | template <typename T>
160 | 	struct custom_constant_divide {
161 | 	  static T do_shift(T input) { return (input / T(5)); }
162 | 	};
163 | 
164 | /******************************************************************************/
165 | 
166 | template <typename T>
167 | 	struct custom_multiple_constant_divide {
168 | 	  static T do_shift(T input) { return ((((input / T(2) ) / T(3) ) / T(4)) / T(5)); }
169 | 	};
170 | 
171 | /******************************************************************************/
172 | 
173 | // this more likely to have constants fused than the version above
174 | template <typename T>
175 | 	struct custom_multiple_constant_divide2 {
176 | 	  static T do_shift(T input) { return (input + (((T(120) / T(3) ) / T(4)) / T(5))); }
177 | 	};
178 | 
179 | /******************************************************************************/
180 | 
181 | template <typename T>
182 | 	struct custom_multiple_constant_mixed {
183 | 	  static T do_shift(T input) { return (input + T(2) - T(3) * T(4) / T(5)); }
184 | 	};
185 | 
186 | /******************************************************************************/
187 | 
188 | template <typename T>
189 | 	struct custom_constant_and {
190 | 	  static T do_shift(T input) { return (input & T(10)); }
191 | 	};
192 | 
193 | /******************************************************************************/
194 | 
195 | template <typename T>
196 | 	struct custom_multiple_constant_and {
197 | 	  static T do_shift(T input) { return (input & T(15) & T(30) & T(31) & T(63)); }
198 | 	};
199 | 
200 | /******************************************************************************/
201 | 
202 | template <typename T>
203 | 	struct custom_constant_or {
204 | 	  static T do_shift(T input) { return (input | T(10)); }
205 | 	};
206 | 
207 | /******************************************************************************/
208 | 
209 | template <typename T>
210 | 	struct custom_multiple_constant_or {
211 | 	  static T do_shift(T input) { return (input | T(15) | T(30) | T(31) | T(63)); }
212 | 	};
213 | 
214 | /******************************************************************************/
215 | 
216 | template <typename T>
217 | 	struct custom_constant_xor {
218 | 	  static T do_shift(T input) { return (input ^ T(10)); }
219 | 	};
220 | 
221 | /******************************************************************************/
222 | 
223 | template <typename T>
224 | 	struct custom_multiple_constant_xor {
225 | 	  static T do_shift(T input) { return (input ^ T(15) ^ T(30) ^ T(31) ^ T(63)); }
226 | 	};
227 | 
228 | /******************************************************************************/
229 | 
230 | template <typename T>
231 | 	struct custom_two {
232 | 	  static T do_shift(T input) { return (T(2)); }
233 | 	};
234 | 
235 | /******************************************************************************/
236 | 	
237 | template <typename T>
238 | 	struct custom_add_constants {
239 | 	  static T do_shift(T input) { return (T(1) + T(2)); }
240 | 	};
241 | 
242 | /******************************************************************************/
243 | 
244 | template <typename T>
245 | 	struct custom_sub_constants {
246 | 	  static T do_shift(T input) { return (T(2) - T(1)); }
247 | 	};
248 | 
249 | /******************************************************************************/
250 | 
251 | template <typename T>
252 | 	struct custom_multiply_constants {
253 | 	  static T do_shift(T input) { return (T(2) * T(3)); }
254 | 	};
255 | 
256 | /******************************************************************************/
257 | 
258 | template <typename T>
259 | 	struct custom_divide_constants {
260 | 	  static T do_shift(T input) { return (T(20) / T(10)); }
261 | 	};
262 | 
263 | /******************************************************************************/
264 | 
265 | template <typename T>
266 | 	struct custom_mod_constants {
267 | 	  static T do_shift(T input) { return (T(23) % T(10)); }
268 | 	};
269 | 
270 | /******************************************************************************/
271 | 
272 | template <typename T>
273 | 	struct custom_and_constants {
274 | 	  static T do_shift(T input) { return (T(23) & T(10)); }
275 | 	};
276 | 
277 | /******************************************************************************/
278 | 
279 | template <typename T>
280 | 	struct custom_or_constants {
281 | 	  static T do_shift(T input) { return (T(23) | T(10)); }
282 | 	};
283 | 
284 | /******************************************************************************/
285 | 
286 | template <typename T>
287 | 	struct custom_xor_constants {
288 | 	  static T do_shift(T input) { return (T(23) ^ T(10)); }
289 | 	};
290 | 
291 | /******************************************************************************/
292 | 
293 | template <typename T>
294 | 	struct custom_equal_constants {
295 | 	  static T do_shift(T input) { return (T(23) == T(10)); }
296 | 	};
297 | 
298 | /******************************************************************************/
299 | 
300 | template <typename T>
301 | 	struct custom_notequal_constants {
302 | 	  static T do_shift(T input) { return (T(23) != T(10)); }
303 | 	};
304 | 
305 | /******************************************************************************/
306 | 
307 | template <typename T>
308 | 	struct custom_greaterthan_constants {
309 | 	  static T do_shift(T input) { return (T(23) > T(10)); }
310 | 	};
311 | 
312 | /******************************************************************************/
313 | 
314 | template <typename T>
315 | 	struct custom_lessthan_constants {
316 | 	  static T do_shift(T input) { return (T(23) < T(10)); }
317 | 	};
318 | 
319 | /******************************************************************************/
320 | 
321 | template <typename T>
322 | 	struct custom_greaterthanequal_constants {
323 | 	  static T do_shift(T input) { return (T(23) >= T(10)); }
324 | 	};
325 | 
326 | /******************************************************************************/
327 | 
328 | template <typename T>
329 | 	struct custom_lessthanequal_constants {
330 | 	  static T do_shift(T input) { return (T(23) <= T(10)); }
331 | 	};
332 | 
333 | /******************************************************************************/
334 | 
335 | template <typename T>
336 | 	struct custom_add_variable {
337 | 	  static T do_shift(T input, T v1) { return (input + v1); }
338 | 	};
339 | 
340 | /******************************************************************************/
341 | 
342 | template <typename T>
343 | 	struct custom_sub_variable {
344 | 	  static T do_shift(T input, T v1) { return (input - v1); }
345 | 	};
346 | 
347 | /******************************************************************************/
348 | 
349 | template <typename T>
350 | 	struct custom_multiply_variable {
351 | 	  static T do_shift(T input, T v1) { return (input * v1); }
352 | 	};
353 | 
354 | /******************************************************************************/
355 | 
356 | template <typename T>
357 | 	struct custom_divide_variable {
358 | 	  static T do_shift(T input, T v1) { return (input / v1); }
359 | 	};
360 | 
361 | /******************************************************************************/
362 | 
363 | template <typename T>
364 | 	struct custom_add_multiple_variable {
365 | 	  static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input + v1 + v2 + v3 + v4); }
366 | 	};
367 | 
368 | /******************************************************************************/
369 | 
370 | template <typename T>
371 | 	struct custom_sub_multiple_variable {
372 | 	  static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input - v1 - v2 - v3 - v4); }
373 | 	};
374 | 
375 | /******************************************************************************/
376 | 
377 | template <typename T>
378 | 	struct custom_multiply_multiple_variable {
379 | 	  static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input * v1 * v2 * v3 * v4); }
380 | 	};
381 | 
382 | /******************************************************************************/
383 | 
384 | // something more likely to be moved out of loops, and a sanity check
385 | template <typename T>
386 | 	struct custom_multiply_multiple_variable2 {
387 | 	  static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input + v1 * v2 * v3 * v4); }
388 | 	};
389 | 
390 | /******************************************************************************/
391 | 
392 | // this can NOT have CSE and loop invariant motion applied in integer math
393 | // and can only be optimized in float if inexact math is allowed
394 | template <typename T>
395 | 	struct custom_divide_multiple_variable {
396 | 	  static T do_shift(T input, T v1, T v2, T v3, T v4) { return ((((input / v1 ) / v2 ) / v3) / v4); }
397 | 	};
398 | 
399 | /******************************************************************************/
400 | 
401 | // this can have CSE and loop invariant motion applied in integer math
402 | // this should be optimizeable without inexact math
403 | template <typename T>
404 | 	struct custom_divide_multiple_variable2 {
405 | 	  static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input + (((v1 / v2 ) / v3) / v4)); }
406 | 	};
407 | 
408 | /******************************************************************************/
409 | 
410 | template <typename T>
411 | 	struct custom_mixed_multiple_variable {
412 | 	  static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input + v1 - v2 * v3 / v4); }
413 | 	};
414 | 
415 | /******************************************************************************/
416 | 
417 | template <typename T>
418 | 	struct custom_variable_and {
419 | 	  static T do_shift(T input, T v1) { return (input & v1); }
420 | 	};
421 | 
422 | /******************************************************************************/
423 | 
424 | template <typename T>
425 | 	struct custom_multiple_variable_and {
426 | 	  static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input & v1 & v2 & v3 & v4); }
427 | 	};
428 | 
429 | /******************************************************************************/
430 | 
431 | template <typename T>
432 | 	struct custom_variable_or {
433 | 	  static T do_shift(T input, T v1) { return (input | v1); }
434 | 	};
435 | 
436 | /******************************************************************************/
437 | 
438 | template <typename T>
439 | 	struct custom_multiple_variable_or {
440 | 	  static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input | v1 | v2 | v3 | v4); }
441 | 	};
442 | 
443 | /******************************************************************************/
444 | 
445 | template <typename T>
446 | 	struct custom_variable_xor {
447 | 	  static T do_shift(T input, T v1) { return (input ^ v1); }
448 | 	};
449 | 
450 | /******************************************************************************/
451 | 
452 | template <typename T>
453 | 	struct custom_multiple_variable_xor {
454 | 	  static T do_shift(T input, T v1, T v2, T v3, T v4) { return (input ^ v1 ^ v2 ^ v3 ^ v4); }
455 | 	};
456 | 
457 | 
458 | /******************************************************************************/
459 | 
460 | template <typename T>
461 | 	struct custom_identity {
462 | 	  static T do_shift(T input) { return (input); }
463 | 	};
464 | 
465 | /******************************************************************************/
466 | 
467 | template <typename T>
468 | 	struct custom_add_zero {
469 | 	  static T do_shift(T input) { return (input + T(0)); }
470 | 	};
471 | 
472 | /******************************************************************************/
473 | 
474 | template <typename T>
475 | 	struct custom_sub_zero {
476 | 	  static T do_shift(T input) { return (input - T(0)); }
477 | 	};
478 | 
479 | /******************************************************************************/
480 | 
481 | template <typename T>
482 | 	struct custom_negate {
483 | 	  static T do_shift(T input) { return (-input); }
484 | 	};
485 | 
486 | /******************************************************************************/
487 | 
488 | template <typename T>
489 | 	struct custom_negate_twice {
490 | 	  static T do_shift(T input) { return (-(-input)); }
491 | 	};
492 | 
493 | /******************************************************************************/
494 | 
495 | template <typename T>
496 | 	struct custom_zero_minus {
497 | 	  static T do_shift(T input) { return (T(0) - input); }
498 | 	};
499 | 
500 | /******************************************************************************/
501 | 
502 | template <typename T>
503 | 	struct custom_times_one {
504 | 	  static T do_shift(T input) { return (input * T(1)); }
505 | 	};
506 | 
507 | /******************************************************************************/
508 | 
509 | template <typename T>
510 | 	struct custom_divideby_one {
511 | 	  static T do_shift(T input) { return (input / T(1)); }
512 | 	};
513 | 
514 | /******************************************************************************/
515 | 
516 | template <typename T>
517 | 	struct custom_algebra_mixed {
518 | 	  static T do_shift(T input) { return (-(T(0) - (((input + T(0)) - T(0)) / T(1)))) * T(1); }
519 | 	};
520 | 
521 | /******************************************************************************/
522 | 
523 | template <typename T>
524 | 	struct custom_zero {
525 | 	  static T do_shift(T input) { return T(0); }
526 | 	};
527 | 
528 | /******************************************************************************/
529 | 
530 | template <typename T>
531 | 	struct custom_times_zero {
532 | 	  static T do_shift(T input) { return (input * T(0)); }
533 | 	};
534 | 
535 | /******************************************************************************/
536 | 
537 | template <typename T>
538 | 	struct custom_subtract_self {
539 | 	  static T do_shift(T input) { return (input - input); }
540 | 	};
541 | /******************************************************************************/
542 | 
543 | template <typename T>
544 | 	struct custom_algebra_mixed_constant {
545 | 	  static T do_shift(T input) { return (input - (-(T(0) - (((input + T(0)) / T(1)) - T(0)))) * T(1)); }
546 | 	};
547 | 
548 | /******************************************************************************/
549 | 
550 | template <typename T>
551 | 	struct custom_cse1 {
552 | 	  static T do_shift(T v1, T v2, T v3) { return (v1 * (v2 - v3) ); }
553 | 	};
554 | 
555 | /******************************************************************************/
556 | 
557 | template <typename T>
558 | 	struct custom_and_self {
559 | 	  static T do_shift(T input) { return (input & input); }
560 | 	};
561 | 
562 | /******************************************************************************/
563 | 
564 | template <typename T>
565 | 	struct custom_or_self {
566 | 	  static T do_shift(T input) { return (input | input); }
567 | 	};
568 | 
569 | /******************************************************************************/
570 | 
571 | template <typename T>
572 | 	struct custom_xor_self {
573 | 	  static T do_shift(T input) { return (input ^ input); }
574 | 	};
575 | 
576 | /******************************************************************************/
577 | 
578 | template <typename T>
579 | 	struct custom_or_zero {
580 | 	  static T do_shift(T input) { return (input | T(0)); }
581 | 	};
582 | 
583 | /******************************************************************************/
584 | 
585 | template <typename T>
586 | 	struct custom_xor_zero {
587 | 	  static T do_shift(T input) { return (input ^ T(0)); }
588 | 	};
589 | 
590 | /******************************************************************************/
591 | 
592 | template <typename T>
593 | 	struct custom_andnot_zero {
594 | 	  static T do_shift(T input) { return (input & ~ T(0)); }
595 | 	};
596 | 
597 | /******************************************************************************/
598 | 
599 | template <typename T>
600 | 	struct custom_and_zero {
601 | 	  static T do_shift(T input) { return (input & T(0)); }
602 | 	};
603 | 
604 | /******************************************************************************/
605 | 
606 | template <typename T>
607 | 	struct custom_mod_one {
608 | 	  static T do_shift(T input) { return (input % T(1)); }
609 | 	};
610 | 
611 | /******************************************************************************/
612 | 
613 | template <typename T>
614 | 	struct custom_equal_self {
615 | 	  static T do_shift(T input) { return (input == input); }
616 | 	};
617 | 
618 | /******************************************************************************/
619 | 
620 | template <typename T>
621 | 	struct custom_notequal_self {
622 | 	  static T do_shift(T input) { return (input != input); }
623 | 	};
624 | 
625 | /******************************************************************************/
626 | 
627 | template <typename T>
628 | 	struct custom_greaterthan_self {
629 | 	  static T do_shift(T input) { return (input > input); }
630 | 	};
631 | 
632 | /******************************************************************************/
633 | 
634 | template <typename T>
635 | 	struct custom_lessthan_self {
636 | 	  static T do_shift(T input) { return (input < input); }
637 | 	};
638 | 
639 | /******************************************************************************/
640 | 
641 | template <typename T>
642 | 	struct custom_greaterthanequal_self {
643 | 	  static T do_shift(T input) { return (input >= input); }
644 | 	};
645 | 
646 | /******************************************************************************/
647 | 
648 | template <typename T>
649 | 	struct custom_lessthanequal_self {
650 | 	  static T do_shift(T input) { return (input <= input); }
651 | 	};
652 | 
653 | /******************************************************************************/
654 | 
655 | template <typename T, typename Shifter>
656 | void test_constant(T* first, int count, const char *label) {
657 |   int i;
658 |   
659 |   start_timer();
660 |   
661 |   for(i = 0; i < iterations; ++i) {
662 |     T result = 0;
663 |     for (int n = 0; n < count; ++n) {
664 | 		result += Shifter::do_shift( first[n] );
665 | 	}
666 |     check_shifted_sum<T, Shifter>(result);
667 |   }
668 |   
669 |   record_result( timer(), label );
670 | }
671 | 
672 | /******************************************************************************/
673 | 
674 | template <typename T, typename Shifter>
675 | void test_variable1(T* first, int count, T v1, const char *label) {
676 |   int i;
677 |   
678 |   start_timer();
679 |   
680 |   for(i = 0; i < iterations; ++i) {
681 |     T result = 0;
682 |     for (int n = 0; n < count; ++n) {
683 | 		result += Shifter::do_shift( first[n], v1 );
684 | 	}
685 |     check_shifted_variable_sum<T, Shifter>(result, v1);
686 |   }
687 |   
688 |   record_result( timer(), label );
689 | }
690 | 
691 | /******************************************************************************/
692 | 
693 | template <typename T, typename Shifter>
694 | void test_variable4(T* first, int count, T v1, T v2, T v3, T v4, const char *label) {
695 |   int i;
696 |   
697 |   start_timer();
698 |   
699 |   for(i = 0; i < iterations; ++i) {
700 |     T result = 0;
701 |     for (int n = 0; n < count; ++n) {
702 | 		result += Shifter::do_shift( first[n], v1, v2, v3, v4 );
703 | 	}
704 |     check_shifted_variable_sum<T, Shifter>(result, v1, v2, v3, v4);
705 |   }
706 |   
707 |   record_result( timer(), label );
708 | }
709 | 
710 | /******************************************************************************/
711 | 
712 | template <typename T, typename Shifter>
713 | void test_CSE_opt(T* first, int count, T v1, const char *label) {
714 |   int i;
715 |   
716 |   start_timer();
717 |   
718 |   for(i = 0; i < iterations; ++i) {
719 |     T result = 0;
720 | 	T temp = Shifter::do_shift( v1, first[0], first[1] );
721 | 	temp += temp;
722 | 	result += first[0] + temp;
723 | 	result -= first[1] + temp;
724 |     for (int n = 1; n < count; ++n) {
725 | 		temp = Shifter::do_shift( v1, first[n-1], first[n] );
726 | 		temp += temp;
727 | 		result += first[n-1] + temp;
728 | 		result -= first[n] + temp;
729 | 	}
730 |     check_shifted_variable_sum_CSE<T, Shifter>(result, v1);
731 |   }
732 |   
733 |   record_result( timer(), label );
734 | }
735 | 
736 | /******************************************************************************/
737 | 
738 | template <typename T, typename Shifter>
739 | void test_CSE(T* first, int count, T v1, const char *label) {
740 |   int i;
741 |   
742 |   start_timer();
743 |   
744 |   for(i = 0; i < iterations; ++i) {
745 |     T result = 0;
746 | 	result += first[0] + Shifter::do_shift( v1, first[0], first[1] ) + Shifter::do_shift( v1, first[0], first[1] );
747 | 	result -= first[1] + Shifter::do_shift( v1, first[0], first[1] ) + Shifter::do_shift( v1, first[0], first[1] );
748 |     for (int n = 1; n < count; ++n) {
749 | 		result += first[n-1] + Shifter::do_shift( v1, first[n-1], first[n] ) + Shifter::do_shift( v1, first[n-1], first[n] );
750 | 		result -= first[n] + Shifter::do_shift( v1, first[n-1], first[n] ) + Shifter::do_shift( v1, first[n-1], first[n] );
751 | 	}
752 |     check_shifted_variable_sum_CSE<T, Shifter>(result, v1);
753 |   }
754 |   
755 |   record_result( timer(), label );
756 | }
757 | 
758 | /******************************************************************************/
759 | 


--------------------------------------------------------------------------------
/include/benchmark_stdint.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Copyright 2007-2008 Adobe Systems Incorporated
 3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
 4 |     or a copy at http://stlab.adobe.com/licenses.html )
 5 |     
 6 | 
 7 | 	Not all compilers support <cstdint> yet, and some don't even have <stdint.h>
 8 | 	So, we have to do our own type definitions.
 9 | */
10 | 
11 | 
12 | /******************************************************************************/
13 | 
14 | #if _MSC_VER
15 | 
16 | 	// for platforms that don't include the C99 header stdint.h
17 | 	
18 | 	// if your platform does not include stdint.h, please edit the #if above
19 | 	// to include your platform (and remember that it has to work elsewhere as well)
20 | 
21 | 	template<bool P, typename T, typename E>
22 | 	struct if_;
23 | 
24 | 	template<typename T, typename E>
25 | 	struct if_<true, T, E> { typedef T type; };
26 | 
27 | 	template<typename T, typename E>
28 | 	struct if_<false, T, E>{ typedef E type; };
29 | 
30 | 	typedef signed char int8_t;
31 | 	typedef unsigned char uint8_t;
32 | 
33 | 	typedef signed short int16_t;
34 | 	typedef unsigned short uint16_t;
35 | 
36 | 	typedef if_<sizeof(int) == 4, int, long>::type int32_t;
37 | 	typedef if_<sizeof(unsigned int) == 4, unsigned int, unsigned long>::type uint32_t;
38 | 
39 | 	#if _MSC_VER
40 | 		typedef __int64 int64_t;
41 | 		typedef unsigned __int64 uint64_t;
42 | 	#else
43 | 		typedef long long int64_t;
44 | 		typedef unsigned long long uint64_t;
45 | 	#endif
46 | 
47 | #else
48 | 
49 | 	// for platforms that do have stdint.h
50 | 	
51 | 	// if your platform fails to find this header, please edit the #if above
52 | 	// to include your platform (and remember that it has to work elsewhere as well)
53 | 	
54 | 	#include <stdint.h>
55 | 	
56 | #endif
57 | 
58 | /******************************************************************************/
59 | 


--------------------------------------------------------------------------------
/include/benchmark_timer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Copyright 2007-2008 Adobe Systems Incorporated
 3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
 4 |     or a copy at http://stlab.adobe.com/licenses.html)
 5 |     
 6 |     Shared source file for timing, used by all the benchmarks
 7 | */
 8 | 
 9 | /******************************************************************************/
10 | 
11 | #include <time.h>
12 | 
13 | /******************************************************************************/
14 | 
15 | /* Yes, this would be easier with a class or vector
16 |  but it needs to work for both C and C++ code
17 | */
18 | 
19 | /*  simple timer functions */
20 | clock_t start_time, end_time;
21 | 
22 | void start_timer() { start_time = clock(); }
23 | 
24 | double timer() {
25 |   end_time = clock();
26 |   return (end_time - start_time)/ (double)(CLOCKS_PER_SEC);
27 | }
28 | 
29 | /******************************************************************************/
30 | 


--------------------------------------------------------------------------------
/src/functionobjects.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright 2007-2008 Adobe Systems Incorporated
  3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
  4 |     or a copy at http://stlab.adobe.com/licenses.html )
  5 | 	
  6 | 
  7 | 	This test file started life as 
  8 | 		ISO/IEC TR 18015:2006(E) Appendix D.4
  9 | 
 10 | 
 11 | Goals:
 12 | 	
 13 | 	Compare the performance of function pointers, functors, inline functors,
 14 | 		standard functors, and native comparison operators
 15 | 
 16 | 	Also compare the performance of qsort(), quicksort template, and std::sort
 17 | 
 18 | 
 19 | Assumptions:
 20 | 	
 21 | 	1) inline functors, standard functors and inlined native
 22 | 		comparisons will perform similarly
 23 | 	
 24 | 	2) using functors is faster than using function pointers
 25 | 	
 26 | 	3) inline functors are as fast or faster than out of line functors
 27 | 	
 28 | 	4) a template is at least as fast as a hard coded function of
 29 | 		the same algorithm, sometimes faster
 30 | 	
 31 | 	5) std::sort is faster than qsort()
 32 | 	
 33 | 	6) std::sort is faster than a naive quicksort template using the same functor
 34 | 
 35 | 
 36 | Since qsort's comparison function must return int (less than 0, 0, greater than 0)
 37 | 	and std::sort's must return a bool, it is not possible to test them with each
 38 | 	other's comparator.
 39 | 
 40 | */
 41 | 
 42 | 
 43 | /******************************************************************************/
 44 | 
 45 | #include <functional>
 46 | #include <algorithm>
 47 | #include <cstdlib>
 48 | #include "benchmark_results.h"
 49 | #include "benchmark_timer.h"
 50 | 
 51 | using namespace std;
 52 | 
 53 | /******************************************************************************/
 54 | 
 55 | template <class Iterator>
 56 | void verify_sorted(Iterator first, Iterator last) {
 57 | 	Iterator prev = first;
 58 | 	first++;
 59 | 	while (first != last) {
 60 | 		if (*first++ < *prev++) {
 61 | 			printf("test %i failed\n", current_test);
 62 | 			break;
 63 | 		}
 64 | 	}
 65 | }
 66 | 
 67 | /******************************************************************************/
 68 | // --------- helper functions --------------------------------------------
 69 | 
 70 | // qsort passes void * arguments to its comparison function,
 71 | // which must return negative, 0, or positive value
 72 | int
 73 | less_than_function1( const void * lhs, const void * rhs )
 74 | 	{
 75 | 	if( *(const double *) lhs < *(const double *) rhs ) return -1;
 76 | 	if( *(const double *) lhs > *(const double *) rhs ) return 1;
 77 | 	return 0;
 78 | 	}
 79 | 
 80 | // std::sort, on the other hand, needs a comparator that returns true or false
 81 | bool
 82 | less_than_function2( const double lhs, const double rhs )
 83 | 	{
 84 | 	return( lhs < rhs? true : false );
 85 | 	}
 86 | 
 87 | // the comparison operator in the following functor is defined out of line
 88 | struct less_than_functor
 89 | {
 90 | 	bool operator()( const double& lhs, const double& rhs ) const;
 91 | };
 92 | 
 93 | bool
 94 | less_than_functor::operator()( const double& lhs, const double& rhs ) const
 95 | 	{
 96 | 	return( lhs < rhs? true : false );
 97 | 	}
 98 | 
 99 | // the comparison operator in the following functor is defined inline
100 | struct inline_less_than_functor
101 | {
102 | 	inline bool operator()( const double& lhs, const double& rhs ) const
103 | 		{
104 | 		return( lhs < rhs? true : false );
105 | 		}
106 | };
107 | 
108 | /******************************************************************************/
109 | 
110 | // hard coded comparison function
111 | template<class Iterator>
112 | void quicksort(Iterator begin, Iterator end)
113 | {
114 | 	// this only works for pointers and STL iterators
115 | 	typedef typename iterator_traits<Iterator>::value_type T;
116 | 	
117 | 	if ( (end - begin) > 1 ) {
118 | 
119 | 		T middleValue = *begin;
120 | 		Iterator left = begin;
121 | 		Iterator right = end;
122 | 
123 | 		for(;;) {
124 | 
125 | 			while ( middleValue < *(--right) );
126 | 			if ( !(left < right ) ) break;
127 | 			
128 | 			while ( *(left) < middleValue )
129 | 				++left;
130 | 			if ( !(left < right ) ) break;
131 | 
132 | 			// swap
133 | 			T temp = *right;
134 | 			*right = *left;
135 | 			*left = temp;
136 | 		}
137 | 		
138 | 		quicksort( begin, right + 1 );
139 | 		quicksort( right + 1, end );
140 | 	}
141 | }
142 | 
143 | /******************************************************************************/
144 | 
145 | // comparison function passed in as a functor
146 | template<class Iterator, typename Comparator>
147 | void quicksort(Iterator begin, Iterator end, Comparator compare)
148 | {
149 | 	// this only works for pointers and STL iterators
150 | 	typedef typename iterator_traits<Iterator>::value_type T;
151 | 	
152 | 	if ( (end - begin) > 1 ) {
153 | 
154 | 		T middleValue = *begin;
155 | 		Iterator left = begin;
156 | 		Iterator right = end;
157 | 
158 | 		for(;;) {
159 | 
160 | 			while ( compare( middleValue, *(--right) ) );
161 | 			if ( !(left < right ) ) break;
162 | 			while ( compare( *(left), middleValue ) )
163 | 				++left;
164 | 			if ( !(left < right ) ) break;
165 | 
166 | 			// swap
167 | 			T temp = *right;
168 | 			*right = *left;
169 | 			*left = temp;
170 | 		}
171 | 		
172 | 		quicksort( begin, right + 1, compare );
173 | 		quicksort( right + 1, end, compare );
174 | 	}
175 | }
176 | 
177 | /******************************************************************************/
178 | 
179 | typedef bool comparator_function( const double x, const double y );
180 | 
181 | // use a pointer to function as a template parameter
182 | // exact function is known at compile time, and can be inlined
183 | template<class Iterator, comparator_function compare>
184 | void quicksort(Iterator begin, Iterator end)
185 | {
186 | 	// this only works for pointers and STL iterators
187 | 	typedef typename iterator_traits<Iterator>::value_type T;
188 | 	
189 | 	if ( (end - begin) > 1 ) {
190 | 
191 | 		T middleValue = *begin;
192 | 		Iterator left = begin;
193 | 		Iterator right = end;
194 | 
195 | 		for(;;) {
196 | 
197 | 			while ( compare( middleValue, *(--right) ) );
198 | 			if ( !(left < right ) ) break;
199 | 			while ( compare( *(left), middleValue ) )
200 | 				++left;
201 | 			if ( !(left < right ) ) break;
202 | 
203 | 			// swap
204 | 			T temp = *right;
205 | 			*right = *left;
206 | 			*left = temp;
207 | 		}
208 | 		
209 | 		quicksort( begin, right + 1, compare );
210 | 		quicksort( right + 1, end, compare );
211 | 	}
212 | }
213 | 
214 | /******************************************************************************/
215 | 
216 | // use a function pointer
217 | // most compilers will not inline the function argument
218 | void quicksort_function(double* begin, double* end, comparator_function compare)
219 | {
220 | 	if ( (end - begin) > 1 ) {
221 | 
222 | 		double middleValue = *begin;
223 | 		double* left = begin;
224 | 		double* right = end;
225 | 
226 | 		for(;;) {
227 | 
228 | 			while ( compare( middleValue, *(--right) ) );
229 | 			if ( !(left < right ) ) break;
230 | 			while ( compare( *(left), middleValue ) )
231 | 				++left;
232 | 			if ( !(left < right ) ) break;
233 | 
234 | 			// swap
235 | 			double temp = *right;
236 | 			*right = *left;
237 | 			*left = temp;
238 | 		}
239 | 		
240 | 		quicksort( begin, right + 1, compare );
241 | 		quicksort( right + 1, end, compare );
242 | 	}
243 | }
244 | 
245 | /******************************************************************************/
246 | 
247 | int main(int argc, char* argv[])
248 | {
249 | 	int i;
250 | 	int iterations = (1 < argc) ? atoi(argv[1]) : 2000; // number of iterations
251 | 	int tablesize = (2 < argc) ? atoi(argv[2]) : 10000; // size of array
252 | 	
253 | 	
254 | 	// output command for documentation
255 | 	for (i = 0; i < argc; ++i)
256 | 		printf("%s ", argv[i] );
257 | 	printf("\n");
258 | 	
259 | 	
260 | 	// seed the random number generator, so we get repeatable results
261 | 	srand( tablesize + 123 );
262 | 	
263 | 	
264 | 	// initialize the table to sort
265 | 	double * master_table = new double[tablesize];
266 | 	for( int n = 0; n < tablesize; ++n )
267 | 		{
268 | 		master_table[n] = static_cast<double>( rand() );
269 | 		}
270 | 	
271 | 	double * table = new double[tablesize]; // working copy
272 | 	
273 | 	
274 | 	
275 | 	// here is where the timing starts
276 | 	
277 | 	start_timer();
278 | 	for (i = 0; i < iterations; ++i)
279 | 		{
280 | 		copy(master_table, master_table+tablesize, table);
281 | 		qsort( table, tablesize, sizeof(double), less_than_function1 );
282 | 		verify_sorted( table, table + tablesize );
283 | 		}
284 | 	record_result( timer(), "qsort array with function pointer" );
285 | 	
286 | 	start_timer();
287 | 	for (i = 0; i < iterations; ++i)
288 | 		{
289 | 		copy(master_table, master_table+tablesize, table);
290 | 		quicksort_function( table, table + tablesize, less_than_function2 );
291 | 		verify_sorted( table, table + tablesize );
292 | 		}
293 | 	record_result( timer(), "quicksort function array with function pointer" );
294 | 	
295 | 	start_timer();
296 | 	for (i = 0; i < iterations; ++i)
297 | 		{
298 | 		copy(master_table, master_table+tablesize, table);
299 | 		quicksort( table, table + tablesize, less_than_function2 );
300 | 		verify_sorted( table, table + tablesize );
301 | 		}
302 | 	record_result( timer(), "quicksort template array with function pointer" );
303 | 	
304 | 	start_timer();
305 | 	for (i = 0; i < iterations; ++i)
306 | 		{
307 | 		copy(master_table, master_table+tablesize, table);
308 | 		quicksort<double *, less_than_function2 >( table, table + tablesize  );
309 | 		verify_sorted( table, table + tablesize );
310 | 		}
311 | 	record_result( timer(), "quicksort template array with template function pointer" );
312 | 
313 | 	start_timer();
314 | 	for (i = 0; i < iterations; ++i)
315 | 		{
316 | 		copy(master_table, master_table+tablesize, table);
317 | 		sort( table, table + tablesize, less_than_function2 );
318 | 		verify_sorted( table, table + tablesize );
319 | 		}
320 | 	record_result( timer(), "sort array with function pointer" );
321 | 
322 | 	start_timer();
323 | 	for (i = 0; i < iterations; ++i)
324 | 		{
325 | 		copy(master_table, master_table+tablesize, table);
326 | 		quicksort( table, table + tablesize, less_than_functor() );
327 | 		verify_sorted( table, table + tablesize );
328 | 		}
329 | 	record_result( timer(), "quicksort template array with user-supplied functor" );
330 | 	
331 | 	start_timer();
332 | 	for (i = 0; i < iterations; ++i)
333 | 		{
334 | 		copy(master_table, master_table+tablesize, table);
335 | 		sort( table, table + tablesize, less_than_functor() );
336 | 		verify_sorted( table, table + tablesize );
337 | 		}
338 | 	record_result( timer(), "sort array with user-supplied functor");
339 | 
340 | 	start_timer();
341 | 	for (i = 0; i < iterations; ++i)
342 | 		{
343 | 		copy(master_table, master_table+tablesize, table);
344 | 		quicksort( table, table + tablesize, inline_less_than_functor() );
345 | 		verify_sorted( table, table + tablesize );
346 | 		}
347 | 	record_result( timer(), "quicksort template array with user-supplied inline functor" );
348 | 	
349 | 	start_timer();
350 | 	for (i = 0; i < iterations; ++i)
351 | 		{
352 | 		copy(master_table, master_table+tablesize, table);
353 | 		sort( table, table + tablesize, inline_less_than_functor() );
354 | 		verify_sorted( table, table + tablesize );
355 | 		}
356 | 	record_result( timer(), "sort array with user-supplied inline functor");
357 | 	
358 | 	start_timer();
359 | 	for (i = 0; i < iterations; ++i)
360 | 		{
361 | 		copy(master_table, master_table+tablesize, table);
362 | 		quicksort( table, table + tablesize, less<double>() );
363 | 		verify_sorted( table, table + tablesize );
364 | 		}
365 | 	record_result( timer(), "quicksort template array with standard functor" );
366 | 	
367 | 	start_timer();
368 | 	for (i = 0; i < iterations; ++i)
369 | 		{
370 | 		copy(master_table, master_table+tablesize, table);
371 | 		sort( table, table + tablesize, less<double>() );
372 | 		verify_sorted( table, table + tablesize );
373 | 		}
374 | 	record_result( timer(), "sort array with standard functor");
375 | 	
376 | 	start_timer();
377 | 	for (i = 0; i < iterations; ++i)
378 | 		{
379 | 		copy(master_table, master_table+tablesize, table);
380 | 		quicksort( table, table + tablesize );
381 | 		verify_sorted( table, table + tablesize );
382 | 		}
383 | 	record_result( timer(), "quicksort template array with native < operator" );
384 | 
385 | 	start_timer();
386 | 	for (i = 0; i < iterations; ++i)
387 | 		{
388 | 		copy( master_table, master_table+tablesize, table );
389 | 		sort( table, table + tablesize );
390 | 		verify_sorted( table, table + tablesize );
391 | 		}
392 | 	record_result( timer(), "sort array with native < operator");
393 | 	
394 | 
395 | 	summarize("Function Objects", tablesize, iterations, kDontShowGMeans, kDontShowPenalty );
396 | 	
397 | 	delete[] table;
398 | 	delete[] master_table;
399 | 	
400 | 	return 0;
401 | }
402 | 


--------------------------------------------------------------------------------
/src/loop_unroll.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright 2007-2008 Adobe Systems Incorporated
  3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
  4 |     or a copy at http://stlab.adobe.com/licenses.html)
  5 | 
  6 | 
  7 | Goal:  Test compiler optimizations related to loop unrolling
  8 | 
  9 | Assumptions:
 10 | 
 11 | 	1) the compiler will unroll loops to hide instruction latency
 12 | 		for() {}
 13 | 		while() {}
 14 | 		do {} while()
 15 | 		goto
 16 | 
 17 | 	2) if the compiler unrolls the loop, it should not be slower than the original loop without unrolling
 18 | 
 19 | 	3) the compiler should unroll a multi-calculation loop as well as a single calculation loop 
 20 | 		up to the limit of performance gain for unrolling that loop
 21 | 		in other words: no penalty for manually unrolling,
 22 | 						as long as the manual unroll is less than or equal to the optimum unroll factor
 23 | 
 24 | 	4) The compiler should recognize and unroll all loop styles with the same efficiency
 25 | 		in other words: do, while, for, and goto should have identical performance
 26 | 
 27 | */
 28 | 
 29 | #include "benchmark_stdint.hpp"
 30 | #include <stddef.h>
 31 | #include <stdio.h>
 32 | #include <time.h>
 33 | #include <stdlib.h>
 34 | #include <math.h>
 35 | #include "benchmark_results.h"
 36 | #include "benchmark_timer.h"
 37 | 
 38 | /******************************************************************************/
 39 | 
 40 | // this constant may need to be adjusted to give reasonable minimum times
 41 | // For best results, times should be about 1.0 seconds for the minimum test run
 42 | int iterations = 300000;
 43 | 
 44 | // 8000 items, or between 8k and 64k of data
 45 | // this is intended to remain within the L2 cache of most common CPUs
 46 | #define SIZE 	8000
 47 | 
 48 | // initial value for filling our arrays, may be changed from the command line
 49 | double init_value = 1.0;
 50 | 
 51 | /******************************************************************************/
 52 | 
 53 | #include "benchmark_shared_tests.h"
 54 | 
 55 | /******************************************************************************/
 56 | /******************************************************************************/
 57 | 
 58 | template <typename T>
 59 | T hash_func2(T seed) {
 60 | 	return (914237 * (seed + 12345)) - 13;
 61 | }
 62 | 
 63 | template <typename T>
 64 | T complete_hash_func(T seed) {
 65 | 	return hash_func2( hash_func2( hash_func2( seed ) ) );
 66 | }
 67 | 
 68 | /******************************************************************************/
 69 | 
 70 | template <typename T>
 71 | inline void check_sum(T result) {
 72 |   T temp = (T)SIZE * complete_hash_func( (T)init_value );
 73 |   if (!tolerance_equal<T>(result,temp)) printf("test %i failed\n", current_test);
 74 | }
 75 | 
 76 | /******************************************************************************/
 77 | 
 78 | // this is the heart of our loop unrolling - a class that unrolls itself to generate the inner loop code
 79 | // at least as long as we keep F < 50 (or some compilers won't compile it)
 80 | template< int F, typename T >
 81 | struct loop_inner_body {
 82 | 	inline static void do_work(T &result, const T *first, int n) {
 83 | 		loop_inner_body<F-1,T>::do_work(result, first, n);
 84 | 		T temp = first[ n + (F-1) ];
 85 | 		temp = complete_hash_func( temp );
 86 | 		result += temp;
 87 | 	}
 88 | };
 89 | 
 90 | template< typename T >
 91 | struct loop_inner_body<0,T> {
 92 | 	inline static void do_work(T &, const T *, int) {
 93 | 	}
 94 | };
 95 | 
 96 | /******************************************************************************/
 97 | /******************************************************************************/
 98 | 
 99 | // F is the unrolling factor
100 | template <int F, typename T >
101 | void test_for_loop_unroll_factor(const T* first, int count, const char *label) {
102 |   int i;
103 |   
104 |   start_timer();
105 |   
106 |   for(i = 0; i < iterations; ++i) {
107 |     T result = 0;
108 | 	int n = 0;
109 | 	
110 |     for (; n < (count - (F-1)); n += F) {
111 | 		loop_inner_body<F,T>::do_work(result,first, n);
112 | 	}
113 | 	
114 |     for (; n < count; ++n) {
115 | 		result += complete_hash_func( first[n] );
116 | 	}
117 | 	
118 |     check_sum<T>(result);
119 |   }
120 |   
121 |   record_result( timer(), label );
122 | }
123 | 
124 | /******************************************************************************/
125 | 
126 | // F is the unrolling factor
127 | template <int F, typename T >
128 | void test_while_loop_unroll_factor(const T* first, int count, const char *label) {
129 |   int i;
130 |   
131 |   start_timer();
132 |   
133 |   for(i = 0; i < iterations; ++i) {
134 |     T result = 0;
135 | 	int n = 0;
136 | 	
137 |     while ( n < (count - (F-1)) ) {
138 | 		loop_inner_body<F,T>::do_work(result,first, n);
139 | 		n += F;
140 | 	}
141 | 	
142 |     while ( n < count ) {
143 | 		result += complete_hash_func( first[n] );
144 | 		++n;
145 | 	}
146 | 	
147 |     check_sum<T>(result);
148 |   }
149 |   
150 |   record_result( timer(), label );
151 | }
152 | 
153 | /******************************************************************************/
154 | 
155 | // F is the unrolling factor
156 | template <int F, typename T >
157 | void test_do_loop_unroll_factor(const T* first, int count, const char *label) {
158 |   int i;
159 |   
160 |   start_timer();
161 |   
162 |   for(i = 0; i < iterations; ++i) {
163 |     T result = 0;
164 | 	int n = 0;
165 | 	
166 | 	if ((count - n) >= F)
167 | 		do {
168 | 			loop_inner_body<F,T>::do_work(result,first, n);
169 | 			n += F;
170 | 		} while (n < (count - (F-1)));
171 | 	
172 | 	if (n < count)
173 | 		do {
174 | 			result += complete_hash_func( first[n] );
175 | 			++n;
176 | 		} while (n != count);
177 | 	
178 |     check_sum<T>(result);
179 |   }
180 |   
181 |   record_result( timer(), label );
182 | }
183 | 
184 | /******************************************************************************/
185 | 
186 | // F is the unrolling factor
187 | template <int F, typename T >
188 | void test_goto_loop_unroll_factor(const T* first, int count, const char *label) {
189 |   int i;
190 |   
191 |   start_timer();
192 |   
193 |   for(i = 0; i < iterations; ++i) {
194 |     T result = 0;
195 | 	int n = 0;
196 | 	
197 | 	if ((count - n) >= F) {
198 | loop2_start:
199 | 		loop_inner_body<F,T>::do_work(result,first, n);
200 | 		n += F;
201 | 		
202 | 		if (n < (count - (F-1)))
203 | 			goto loop2_start;
204 | 	}
205 | 
206 | 	if (n < count) {
207 | loop_start:
208 | 		result += complete_hash_func( first[n] );
209 | 		++n;
210 | 		
211 | 		if (n != count)
212 | 			goto loop_start;
213 | 	}
214 | 	
215 |     check_sum<T>(result);
216 |   }
217 |   
218 |   record_result( timer(), label );
219 | }
220 | 
221 | /******************************************************************************/
222 | /******************************************************************************/
223 | 
224 | // our global arrays of numbers to be operated upon
225 | 
226 | double dataDouble[SIZE];
227 | 
228 | int32_t data32[SIZE];
229 | 
230 | // not elegant, but I need strings to hang around until we print the results
231 | // and I don't want to pull in STL
232 | const int UnrollLimit = 32;
233 | char temp_string[UnrollLimit][100];
234 | 
235 | /******************************************************************************/
236 | /******************************************************************************/
237 | 
238 | // another unrolled loop to create all of our tests
239 | template< int N, typename T >
240 | struct for_loop_tests {
241 | 	static void do_test( const T *data, const char *label_base ) {
242 | 		for_loop_tests<N-1, T>::do_test(data, label_base);
243 | 		sprintf( temp_string[N-1], "%s %d", label_base, N );
244 | 		test_for_loop_unroll_factor<N>( data, SIZE, temp_string[N-1] );
245 | 	}
246 | };
247 | 
248 | template<typename T>
249 | struct for_loop_tests<0,T> {
250 | 	static void do_test( const T *, const char * ) {
251 | 	}
252 | };
253 | 
254 | /******************************************************************************/
255 | 
256 | template< int N, typename T >
257 | struct while_loop_tests {
258 | 	static void do_test( const T *data, const char *label_base ) {
259 | 		while_loop_tests<N-1, T>::do_test(data, label_base);
260 | 		sprintf( temp_string[N-1], "%s %d", label_base, N );
261 | 		test_while_loop_unroll_factor<N>( data, SIZE, temp_string[N-1] );
262 | 	}
263 | };
264 | 
265 | template<typename T>
266 | struct while_loop_tests<0,T> {
267 | 	static void do_test( const T *, const char * ) {
268 | 	}
269 | };
270 | 
271 | /******************************************************************************/
272 | 
273 | template< int N, typename T >
274 | struct do_loop_tests {
275 | 	static void do_test( const T *data, const char *label_base ) {
276 | 		do_loop_tests<N-1, T>::do_test(data, label_base);
277 | 		sprintf( temp_string[N-1], "%s %d", label_base, N );
278 | 		test_do_loop_unroll_factor<N>( data, SIZE, temp_string[N-1] );
279 | 	}
280 | };
281 | 
282 | template<typename T>
283 | struct do_loop_tests<0,T> {
284 | 	static void do_test( const T *, const char * ) {
285 | 	}
286 | };
287 | 
288 | /******************************************************************************/
289 | 
290 | template< int N, typename T >
291 | struct goto_loop_tests {
292 | 	static void do_test( const T *data, const char *label_base ) {
293 | 		goto_loop_tests<N-1, T>::do_test(data, label_base);
294 | 		sprintf( temp_string[N-1], "%s %d", label_base, N );
295 | 		test_goto_loop_unroll_factor<N>( data, SIZE, temp_string[N-1] );
296 | 	}
297 | };
298 | 
299 | template<typename T>
300 | struct goto_loop_tests<0,T> {
301 | 	static void do_test( const T *, const char * ) {
302 | 	}
303 | };
304 | 
305 | /******************************************************************************/
306 | /******************************************************************************/
307 | 
308 | int main(int argc, char** argv) {
309 | 	
310 | 	// output command for documentation:
311 | 	int i;
312 | 	for (i = 0; i < argc; ++i)
313 | 		printf("%s ", argv[i] );
314 | 	printf("\n");
315 | 
316 | 	if (argc > 1) iterations = atoi(argv[1]);
317 | 	if (argc > 2) init_value = (double) atof(argv[2]);
318 | 
319 | 
320 | // int32_t
321 | 	::fill(data32, data32+SIZE, int32_t(init_value));
322 | 	
323 | 	for_loop_tests<UnrollLimit, int32_t>::do_test( data32, "int32_t for loop unroll" );
324 | 	summarize("int32_t for loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
325 | 	
326 | 	while_loop_tests<UnrollLimit, int32_t>::do_test( data32, "int32_t while loop unroll" );
327 | 	summarize("int32_t while loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
328 | 
329 | 	do_loop_tests<UnrollLimit, int32_t>::do_test( data32, "int32_t do loop unroll" );
330 | 	summarize("int32_t do loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
331 | 
332 | 	goto_loop_tests<UnrollLimit, int32_t>::do_test( data32, "int32_t goto loop unroll" );	
333 | 	summarize("int32_t goto loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
334 | 
335 | 
336 | // double
337 | 	iterations /= 4;
338 | 
339 | 	::fill(dataDouble, dataDouble+SIZE, double(init_value));
340 | 	
341 | 	for_loop_tests<UnrollLimit, double>::do_test( dataDouble, "double for loop unroll" );
342 | 	summarize("double for loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
343 | 	
344 | 	while_loop_tests<UnrollLimit, double>::do_test( dataDouble, "double while loop unroll" );
345 | 	summarize("double while loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
346 | 
347 | 	do_loop_tests<UnrollLimit, double>::do_test( dataDouble, "double do loop unroll" );
348 | 	summarize("double do loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
349 | 
350 | 	goto_loop_tests<UnrollLimit, double>::do_test( dataDouble, "double goto loop unroll" );	
351 | 	summarize("double goto loop unrolling", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
352 | 
353 | 
354 | 	return 0;
355 | }
356 | 
357 | // the end
358 | /******************************************************************************/
359 | /******************************************************************************/
360 | 


--------------------------------------------------------------------------------
/src/machine.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright 2007-2008 Adobe Systems Incorporated
  3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
  4 |     or a copy at http://stlab.adobe.com/licenses.html)
  5 | 
  6 | 
  7 | The purpose of this source file is to report information about the compiler,
  8 | OS and machine running the benchmark
  9 | 
 10 | When adding reporting for your compiler, OS and CPU:
 11 | 	Please remember that this source file has to compile everywhere else as well.
 12 | 
 13 | See http://predef.sourceforge.net/precomp.html for some older compilers
 14 | and architectures.
 15 | See source for Unix hostinfo.
 16 | 
 17 | All trademarks used herein are the property of their owner, and are only used
 18 | for correct identification of their products
 19 | 
 20 | */
 21 | 
 22 | /******************************************************************************/
 23 | 
 24 | #include <stdio.h>
 25 | #include <sys/types.h>
 26 | #include "benchmark_stdint.hpp"
 27 | 
 28 | // this should be defined on Mach derived OSes (MacOS, FreeBSD, etc.)
 29 | #if defined(_MACHTYPES_H_)
 30 | #include <sys/sysctl.h>
 31 | #endif
 32 | 
 33 | 
 34 | /******************************************************************************/
 35 | 
 36 | void VerifyTypeSizes()
 37 | {
 38 | 	if (sizeof(int8_t) != 1)
 39 | 		printf("Found size of int8_t was %d instead of 1\n", (int)sizeof(int8_t) );
 40 | 	if (sizeof(uint8_t) != 1)
 41 | 		printf("Found size of uint8_t was %d instead of 1\n", (int)sizeof(uint8_t) );
 42 | 	if (sizeof(int16_t) != 2)
 43 | 		printf("Found size of int16_t was %d instead of 2\n", (int)sizeof(int16_t) );
 44 | 	if (sizeof(uint16_t) != 2)
 45 | 		printf("Found size of uint16_t was %d instead of 2\n", (int)sizeof(uint16_t) );
 46 | 	if (sizeof(int32_t) != 4)
 47 | 		printf("Found size of int32_t was %d instead of 4\n", (int)sizeof(int32_t) );
 48 | 	if (sizeof(uint32_t) != 4)
 49 | 		printf("Found size of uint32_t was %d instead of 4\n", (int)sizeof(uint32_t) );
 50 | 	if (sizeof(int64_t) != 8)
 51 | 		printf("Found size of int64_t was %d instead of 8\n", (int)sizeof(int64_t) );
 52 | 	if (sizeof(uint64_t) != 8)
 53 | 		printf("Found size of uint64_t was %d instead of 8\n", (int)sizeof(uint64_t) );
 54 | }
 55 | 
 56 | /******************************************************************************/
 57 | 
 58 | // compiler version and any major targetting options (such as 32 vs 64 bit)
 59 | void ReportCompiler() 
 60 | {
 61 | 
 62 | 	printf("##Compiler\n");
 63 | 
 64 | #if __INTEL_COMPILER
 65 | 
 66 | 	printf("Intel Compiler version %d\n", __INTEL_COMPILER );
 67 | 	printf("Build %d\n", __INTEL_COMPILER_BUILD_DATE );
 68 | 
 69 | 	#if defined(__WIN32__) || defined(_WIN32)
 70 | 		printf("Compiling for Windows 32 bit\n" );
 71 | 	#endif
 72 | 	
 73 | 	#if __WIN64__
 74 | 		printf("Compiling for Windows 64 bit\n" );
 75 | 	#endif
 76 | 
 77 | #elif _MSC_VER
 78 | 
 79 | 	printf("Microsoft VisualC++ version %d\n", _MSC_VER );
 80 | 	
 81 | 	#if defined(_WIN64)
 82 | 		printf("Compiling for Windows 64 bit\n" );
 83 | 	#elif defined(_WIN32)
 84 | 		printf("Compiling for Windows 32 bit\n");
 85 | 	#endif
 86 | 	
 87 | 	#if defined(__CLR_VER)
 88 | 		printf("CLR version %s\n", __CLR_VER );
 89 | 	#endif
 90 | 
 91 | 	/*
 92 | 	See http://msdn2.microsoft.com/en-us/library/b0084kay(VS.80).aspx#_predir_table_1..3
 93 | 	*/
 94 | 
 95 | #elif __GNUC__
 96 | 
 97 | 	printf("GCC version %s\n", __VERSION__ );
 98 | 	
 99 | 	/*
100 | 	printf("%d.%d", __GNUC__, __GNUC_MINOR__ );
101 | 	#if defined(__GNUC_PATCHLEVEL__)
102 | 		printf(" . %d", __GNUC_PATCHLEVEL__ );
103 | 	#endif
104 | 	printf("\n");
105 | 	*/
106 | 	
107 | 	#if __LP64__
108 | 		printf("Compiled for LP64\n");
109 | 	#endif
110 | 	
111 | 	
112 | 	/*
113 | 	Other useful macros:
114 | 	__OPTIMIZE__
115 | 	__OPTIMIZE_SIZE__
116 | 	__NO_INLINE__
117 | 
118 | 	See http://developer.apple.com/documentation/DeveloperTools/gcc-4.0.1/cpp/Common-Predefined-Macros.html
119 | 	*/
120 | 	
121 | #elif __MWERKS__
122 | 	printf("Metrowerks CodeWarrior version 0x%8.8lX\n", __MWERKS__ );
123 | #elif __MRC__
124 | 	printf("Apple MrC[pp] version 0x%8.8lX\n", __MRC__ );
125 | #elif __MOTO__
126 | 	printf("Motorola MCC version 0x%8.8lX\n", __MOTO__ );
127 | #else
128 | 	printf("********\n" );
129 | 	printf("Unknown compiler, please update %s for your compiler\n", __FILE__ );
130 | 	printf("********\n" );
131 | #endif
132 | 
133 | }
134 | 
135 | /******************************************************************************/
136 | 
137 | // what kind of CPU is the compiler targetting?
138 | void ReportCPUTarget()
139 | {
140 | 
141 | 	printf("##Target CPU\n");
142 | 
143 | #if _MANAGED
144 | 
145 | 	printf("Compiled for Microsoft managed code (CLR)\n" );
146 | 
147 | #elif defined(__ppc64__)
148 | 
149 | 	printf("Compiled for PowerPC 64bit\n");
150 | 
151 | #elif defined(__powerc) || defined(__ppc__) || defined(powerpc) || defined(ppc)
152 | 
153 | 	printf("Compiled for PowerPC 32bit\n");
154 | 
155 | #elif defined(_M_IA64)
156 | 
157 | 	printf("Compiled for Intel Architecture 64\n" );
158 | 
159 | #elif defined(_M_X64) || defined(__x86_64__)
160 | 
161 | 	printf("Compiled for x86 64bit\n" );
162 | 
163 | #elif defined(__i386__) || defined(i386) || defined(_X86_) || defined(_M_IX86)
164 | 
165 | 	printf("Compiled for x86 32bit\n" );
166 | 
167 | 	#if _M_IX86
168 | 	switch( _M_IX86) {
169 | 	case 300:
170 | 		printf("Compiled for 80386\n" );
171 | 		break;
172 | 	case 400:
173 | 		printf("Compiled for 80486\n" );
174 | 		break;
175 | 	case 500:
176 | 		printf("Compiled for Pentium\n" );
177 | 		break;
178 | 	case 600:
179 | 		printf("Compiled for PentiumII\n" );
180 | 		break;
181 | 	default:	
182 | 		printf("********\n" );
183 | 		printf("Unknown x86 target, please update %s for your cpu\n", __FILE__ );
184 | 		printf("********\n" );
185 | 		break;
186 | 	}
187 | 	#endif
188 | 
189 | #elif defined(_ALPHA_)
190 | 
191 | 	printf("Compiled for Alpha\n" );
192 | 	
193 | #else
194 | 	printf("********\n" );
195 | 	printf("Unknown target CPU, please update %s for your cpu\n", __FILE__ );
196 | 	printf("********\n" );
197 | #endif
198 | 
199 | }
200 | 
201 | /******************************************************************************/
202 | 
203 | // byte order of the CPU we're running on
204 | void ReportEndian()
205 | {
206 | 	static uint32_t cookie = 0x01020304;
207 | 	unsigned char *testPtr = (unsigned char *) &cookie;
208 | 	
209 | 	if (*testPtr == 0x01) {
210 | 		printf( "Big Endian\n" );
211 | 	} else if (*testPtr == 0x04) {
212 | 		printf( "Little Endian\n" );
213 | 	} else {
214 | 		printf("********\n" );
215 | 		printf("Unknown byteorder, please update %s for your cpu\n", __FILE__ );
216 | 		printf("********\n" );
217 | 	}
218 | }
219 | 
220 | /******************************************************************************/
221 | 
222 | 	
223 | // what CPU are we actually running on
224 | // architecture, revision, speed
225 | 
226 | // methods for obtaining this information are probably OS specific
227 | 	
228 | void ReportCPUPhysical()
229 | {
230 | 	const int one_million = 1000000L;
231 | 	
232 | 	printf("##Physical CPU\n");
233 | 
234 | 
235 | 
236 | // this should work for any Mach based OS (MacOS, FreeBSD, etc.)
237 | #if defined(_MACHTYPES_H_)
238 | 
239 | // see sysctl.h for the definitions
240 | 	{
241 | 	long returnBuffer=0, retval=0;
242 | 	long long bigBuffer = 0;
243 | 	size_t len;
244 | 	
245 | 	// this gets us the CPU family, but not the exact CPU model and rev!
246 | 	len = 4;
247 | 	retval = sysctlbyname("hw.cputype", &returnBuffer, &len, NULL, 0);
248 | 	if (retval == 0) {
249 | 		printf("Mach CPU type %ld\n", returnBuffer );
250 | 	
251 | 		// from sys/machine.h
252 | 		switch(returnBuffer) {
253 | 			case 1:
254 | 				printf("CPU_TYPE VAX\n");
255 | 				break;
256 | 			case 6:
257 | 				printf("CPU_TYPE MC680x0\n");
258 | 				break;
259 | 			case 7:
260 | 				printf("CPU_TYPE x86\n");
261 | 				break;
262 | 			case 8:
263 | 				printf("CPU_TYPE MIPS\n");
264 | 				break;
265 | 			case 10:
266 | 				printf("CPU_TYPE MC98000\n");
267 | 				break;
268 | 			case 11:
269 | 				printf("CPU_TYPE HPPA\n");
270 | 				break;
271 | 			case 12:
272 | 				printf("CPU_TYPE ARM\n");
273 | 				break;
274 | 			case 13:
275 | 				printf("CPU_TYPE MC8880x0\n");
276 | 				break;
277 | 			case 14:
278 | 				printf("CPU_TYPE SPARC\n");
279 | 				break;
280 | 			case 15:
281 | 				printf("CPU_TYPE i860\n");
282 | 				break;
283 | 			case 16:
284 | 				printf("CPU_TYPE Alpha\n");
285 | 				break;
286 | 			case 18:
287 | 				printf("CPU_TYPE PowerPC\n");
288 | 				break;
289 | 			default:
290 | 				printf("********\n" );
291 | 				printf("Unknown Mach CPU Type, please update %s for your cpu\n", __FILE__ );
292 | 				printf("********\n" );
293 | 				break;
294 | 		}
295 | 	
296 | 	}
297 | 	
298 | 	// corresponds to CPU types, but the list is kinda big and dependent on CPU major type
299 | 	len = 4;
300 | 	retval = sysctlbyname("hw.cpusubtype", &returnBuffer, &len, NULL, 0);
301 | 	if (retval == 0)
302 | 		printf("Mach CPU subtype %ld\n", returnBuffer );
303 | 	
304 | 	len = 8;
305 | 	retval = sysctlbyname("hw.cpufrequency_max", &bigBuffer, &len, NULL, 0);
306 | 	if (retval == 0)
307 | 		printf("CPU frequency: %.2f Mhz\n", (double)bigBuffer/one_million );
308 | 	
309 | 	
310 | 	// PowerPC CPU extensions
311 | 	len = 4;
312 | 	retval = sysctlbyname("hw.optional.floatingpoint", &returnBuffer, &len, NULL, 0);
313 | 	if (retval == 0 && returnBuffer != 0)
314 | 		printf("CPU has optional floating point instructions\n" );
315 | 	
316 | 	len = 4;
317 | 	retval = sysctlbyname("hw.optional.altivec", &returnBuffer, &len, NULL, 0);
318 | 	if (retval == 0 && returnBuffer != 0)
319 | 		printf("CPU has AltiVec instructions\n" );
320 | 	
321 | 	len = 4;
322 | 	retval = sysctlbyname("hw.optional.64bitops", &returnBuffer, &len, NULL, 0);
323 | 	if (retval == 0 && returnBuffer != 0)
324 | 		printf("CPU has 64 bit instructions\n" );
325 | 	
326 | 	len = 4;
327 | 	retval = sysctlbyname("hw.optional.fsqrt", &returnBuffer, &len, NULL, 0);
328 | 	if (retval == 0 && returnBuffer != 0)
329 | 		printf("CPU has fsqrt instruction\n" );
330 | 	
331 | 	
332 | 	// x86 CPU extension
333 | 	len = 4;
334 | 	retval = sysctlbyname("hw.optional.mmx", &returnBuffer, &len, NULL, 0);
335 | 	if (retval == 0 && returnBuffer != 0)
336 | 		printf("CPU has MMX instructions\n" );
337 | 	
338 | 	len = 4;
339 | 	retval = sysctlbyname("hw.optional.sse", &returnBuffer, &len, NULL, 0);
340 | 	if (retval == 0 && returnBuffer != 0)
341 | 		printf("CPU has SSE instructions\n" );
342 | 	
343 | 	len = 4;
344 | 	retval = sysctlbyname("hw.optional.sse2", &returnBuffer, &len, NULL, 0);
345 | 	if (retval == 0 && returnBuffer != 0)
346 | 		printf("CPU has SSE2 instructions\n" );
347 | 	
348 | 	len = 4;
349 | 	retval = sysctlbyname("hw.optional.sse3", &returnBuffer, &len, NULL, 0);
350 | 	if (retval == 0 && returnBuffer != 0)
351 | 		printf("CPU has SSE3 instructions\n" );
352 | 	
353 | 	len = 4;
354 | 	retval = sysctlbyname("hw.optional.sse4", &returnBuffer, &len, NULL, 0);
355 | 	if (retval == 0 && returnBuffer != 0)
356 | 		printf("CPU has SSE4 instructions\n" );
357 | 	
358 | 	len = 4;
359 | 	retval = sysctlbyname("hw.optional.sse5", &returnBuffer, &len, NULL, 0);
360 | 	if (retval == 0 && returnBuffer != 0)
361 | 		printf("CPU has SSE5 instructions\n" );
362 | 	
363 | 	len = 4;
364 | 	retval = sysctlbyname("hw.optional.x86_64", &returnBuffer, &len, NULL, 0);
365 | 	if (retval == 0 && returnBuffer != 0)
366 | 		printf("CPU has x86_64 instructions\n" );
367 | 	
368 | 	}
369 | 	
370 | #endif	// _MACHTYPES_H_
371 | 	
372 | 	// useful information, and not so dependent
373 | 	ReportEndian();
374 | }
375 | 
376 | /******************************************************************************/
377 | 
378 | // format a number of bytes and print (without return)
379 | void printMemSize( long long input )
380 | {
381 | 	double meg = (double)input / (1024.0*1024.0);
382 | 	double tera = (double)input / (1024.0*1024.0*1024.0*1024.0);
383 | 
384 | 	if (input < 1024) {	// format as bytes
385 | 		printf("%lld bytes", input );
386 | 	} else if (input < (1024*1024)) {	// format as KB
387 | 		printf("%.2f KBytes", (double)input/1024.0 );
388 | 	} else if (meg < 1024.0) {	// format as MB
389 | 		printf("%.2f MBytes", meg );
390 | 	} else if (meg < (1024.0*1024.0)) {	// format as GB
391 | 		printf("%.2f GBytes", meg/1024.0 );
392 | 	} else if (tera < (1024.0)) {	// format as TB
393 | 		printf("%.2f TeraBytes", tera );
394 | 	} else if (tera < (1024.0*1024.0)) {	// format as PB
395 | 		printf("%.2f PetaBytes", tera/1024.0 );
396 | 	} else {	// format as EB
397 | 		printf("%.2f ExaBytes", tera/(1024.0*1024.0) );
398 | 	}
399 | }
400 | 
401 | /******************************************************************************/
402 | 
403 | // information about the machine, outside of the CPU
404 | void ReportMachinePhysical()
405 | {
406 | 	printf("##Machine\n");
407 | 	
408 | 
409 | // this should work for any Mach based OS (MacOS, FreeBSD, etc.)
410 | #if defined(_MACHTYPES_H_)
411 | 
412 | // see sysctl.h for the definitions
413 | 	{
414 | 	long returnBuffer=0, retval=0;
415 | 	long long bigBuffer = 0;
416 | 	size_t len;
417 | 	
418 | 	len = 4;
419 | 	returnBuffer = 0;
420 | 	retval = sysctlbyname("hw.ncpu", &returnBuffer, &len, NULL, 0);
421 | 	if (retval == 0) {
422 | 		printf("Machine has %ld CPUs\n", returnBuffer );
423 | 		}
424 | 
425 | 	
426 | 	len = 4;
427 | 	retval = sysctlbyname("hw.physicalcpu_max", &returnBuffer, &len, NULL, 0);
428 | 	if (retval == 0)
429 | 		printf("Machine has %ld physical CPUs\n", returnBuffer );
430 | 	
431 | 	len = 4;
432 | 	retval = sysctlbyname("hw.logicalcpu_max", &returnBuffer, &len, NULL, 0);
433 | 	if (retval == 0)
434 | 		printf("Machine has %ld logical CPUs\n", returnBuffer );
435 | 	
436 | 	
437 | 	len = 8;
438 | 	retval = sysctlbyname("hw.memsize", &bigBuffer, &len, NULL, 0);
439 | 	if (retval == 0) {
440 | 		printf("Machine has ");
441 | 		printMemSize( bigBuffer );
442 | 		printf(" of RAM\n");
443 | 		}
444 | 	
445 | 	len = 8;
446 | 	retval = sysctlbyname("hw.pagesize", &bigBuffer, &len, NULL, 0);
447 | 	if (retval == 0) {
448 | 		printf("Machine using ");
449 | 		printMemSize( bigBuffer );
450 | 		printf(" pagesize\n");
451 | 		}
452 | 
453 | 	}
454 | 	
455 | #endif	// _MACHTYPES_H_
456 | 
457 | 	
458 | }
459 | 
460 | /******************************************************************************/
461 | 
462 | void ReportOS()
463 | {
464 | 	printf("##Operating System\n");
465 | 	
466 | 
467 | // this should work for any Mach based OS (MacOS, FreeBSD, etc.)
468 | #if defined(_MACHTYPES_H_)
469 | 
470 | // see sysctl.h for the definitions
471 | 	{
472 | 	//long returnBuffer;
473 | 	//long long bigBuffer;
474 | 	char string_buffer[1024];
475 | 	long retval=0;
476 | 	int mib[4];
477 | 	size_t len;
478 | 	
479 | 	mib[0] = CTL_KERN;
480 | 	mib[1] = KERN_VERSION;
481 | 	len = sizeof(string_buffer);
482 | 	retval = sysctl(mib, 2, string_buffer, &len, NULL, 0);
483 | 	if (retval == 0)
484 | 		printf("Kernel OS Version: %s\n", string_buffer );
485 | 	
486 | 	}
487 | 	
488 | #endif	// _MACHTYPES_H_
489 | }
490 | 
491 | /******************************************************************************/
492 | 
493 | int main (int argc, char *argv[])
494 | {
495 | 	// this should only be changed when the reporting tags have changed in an incompatible way
496 | 	const char version[] = "version 1.0";
497 | 
498 | 	printf("##Start machine report %s\n", version );
499 | 	VerifyTypeSizes();
500 | 	ReportCompiler();
501 | 	ReportCPUTarget();
502 | 	ReportCPUPhysical();
503 | 	ReportMachinePhysical();
504 | 	ReportOS();
505 | 	printf("##End machine report\n");
506 | 
507 | 	return 0;
508 | }
509 | 
510 | 


--------------------------------------------------------------------------------
/src/simple_types_constant_folding.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright 2007-2008 Adobe Systems Incorporated
  3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
  4 |     or a copy at http://stlab.adobe.com/licenses.html )
  5 | 
  6 | 
  7 | Goal:  Test compiler optimizations related to constant folding of simple language defined types
  8 | 
  9 | Assumptions:
 10 | 
 11 | 	1) the compiler will combine constant calculations into a single constant for simple types
 12 | 		aka constant folding
 13 | 		result = A + B			==>		result = constant
 14 | 		result = A - B			==>		result = constant
 15 | 		result = A * B			==>		result = constant
 16 | 		result = A / B			==>		result = constant
 17 | 		result = A % B			==>		result = constant	for integer types
 18 | 		result = (A == B)		==>		result = constant	for integer types
 19 | 		result = (A != B)		==>		result = constant	for integer types
 20 | 		result = (A > B)		==>		result = constant	for integer types
 21 | 		result = (A < B)		==>		result = constant	for integer types
 22 | 		result = (A >= B)		==>		result = constant	for integer types
 23 | 		result = (A <= B)		==>		result = constant	for integer types
 24 | 		result = (A & B)		==>		result = constant	for integer types
 25 | 		result = (A | B)		==>		result = constant	for integer types
 26 | 		result = (A ^ B)		==>		result = constant	for integer types
 27 | 		
 28 | 		result = input + A + B + C + D	==>		result = input + (A+B+C+D)
 29 | 		result = input - A - B - C - D	==>		result = input - (A+B+C+D)
 30 | 		result = input * A * B * C * D	==>		result = input * (A*B*C*D)
 31 | 		result = input + A * B * C * D	==>		result = input + (A*B*C*D)
 32 | 		result = ((((input/A) /B) /C) /D)	==>	result = input / (A*B*C*D)
 33 | 		result = input + (((A /B) /C) /D)	==>	result = input + (A/B/C/D)
 34 | 		result = input & A & B & C & D	==>		result = input & (A&B&C&D)			for integer types
 35 | 		result = input | A | B | C | D	==>		result = input | (A|B|C|D)			for integer types
 36 | 		result = input ^ A ^ B ^ C ^ D	==>		result = input ^ (A^B^C^D)			for integer types
 37 | 
 38 | 
 39 | NOTE - in some cases, loop invariant code motion might move the constant calculation out of the inner loop
 40 | 	making it appear that the constants were folded
 41 | 		But in the constant result cases, we want the compiler to recognize the constant and move it out of the loop
 42 | 
 43 | */
 44 | 
 45 | /******************************************************************************/
 46 | 
 47 | #include "benchmark_stdint.hpp"
 48 | #include <cstddef>
 49 | #include <cstdio>
 50 | #include <ctime>
 51 | #include <cstdlib>
 52 | #include <cmath>
 53 | #include "benchmark_results.h"
 54 | #include "benchmark_timer.h"
 55 | 
 56 | /******************************************************************************/
 57 | 
 58 | // this constant may need to be adjusted to give reasonable minimum times
 59 | // For best results, times should be about 1.0 seconds for the minimum test run
 60 | int base_iterations = 2000000;
 61 | int iterations = base_iterations;
 62 | 
 63 | 
 64 | // 8000 items, or between 8k and 64k of data
 65 | // this is intended to remain within the L2 cache of most common CPUs
 66 | const int SIZE 	= 8000;
 67 | 
 68 | 
 69 | // initial value for filling our arrays, may be changed from the command line
 70 | double init_value = 1.0;
 71 | 
 72 | /******************************************************************************/
 73 | 
 74 | // our global arrays of numbers to be operated upon
 75 | 
 76 | double dataDouble[SIZE];
 77 | float dataFloat[SIZE];
 78 | 
 79 | uint64_t data64unsigned[SIZE];
 80 | int64_t data64[SIZE];
 81 | 
 82 | uint32_t data32unsigned[SIZE];
 83 | int32_t data32[SIZE];
 84 | 
 85 | uint16_t data16unsigned[SIZE];
 86 | int16_t data16[SIZE];
 87 | 
 88 | uint8_t data8unsigned[SIZE];
 89 | int8_t data8[SIZE];
 90 | 
 91 | /******************************************************************************/
 92 | 
 93 | #include "benchmark_shared_tests.h"
 94 | 
 95 | /******************************************************************************/
 96 | 
 97 | 
 98 | int main(int argc, char** argv) {
 99 | 	
100 | 	// output command for documentation:
101 | 	int i;
102 | 	for (i = 0; i < argc; ++i)
103 | 		printf("%s ", argv[i] );
104 | 	printf("\n");
105 | 
106 | 	if (argc > 1) base_iterations = atoi(argv[1]);
107 | 	if (argc > 2) init_value = (double) atof(argv[2]);
108 | 
109 | 
110 | 
111 | // int8_t
112 | 	::fill(data8, data8+SIZE, int8_t(init_value));
113 | 	
114 | 	iterations = base_iterations;
115 | 	test_constant<int8_t, custom_two<int8_t> >(data8,SIZE,"int8_t constant");
116 | 	test_constant<int8_t, custom_add_constants<int8_t> >(data8,SIZE,"int8_t add constants");
117 | 	test_constant<int8_t, custom_sub_constants<int8_t> >(data8,SIZE,"int8_t subtract constants");
118 | 	test_constant<int8_t, custom_multiply_constants<int8_t> >(data8,SIZE,"int8_t multiply constants");
119 | 	test_constant<int8_t, custom_divide_constants<int8_t> >(data8,SIZE,"int8_t divide constants");
120 | 	test_constant<int8_t, custom_mod_constants<int8_t> >(data8,SIZE,"int8_t mod constants");
121 | 	test_constant<int8_t, custom_equal_constants<int8_t> >(data8,SIZE,"int8_t equal constants");
122 | 	test_constant<int8_t, custom_notequal_constants<int8_t> >(data8,SIZE,"int8_t notequal constants");
123 | 	test_constant<int8_t, custom_greaterthan_constants<int8_t> >(data8,SIZE,"int8_t greater than constants");
124 | 	test_constant<int8_t, custom_lessthan_constants<int8_t> >(data8,SIZE,"int8_t less than constants");
125 | 	test_constant<int8_t, custom_greaterthanequal_constants<int8_t> >(data8,SIZE,"int8_t greater than equal constants");
126 | 	test_constant<int8_t, custom_lessthanequal_constants<int8_t> >(data8,SIZE,"int8_t less than equal constants");
127 | 	test_constant<int8_t, custom_and_constants<int8_t> >(data8,SIZE,"int8_t and constants");
128 | 	test_constant<int8_t, custom_or_constants<int8_t> >(data8,SIZE,"int8_t or constants");
129 | 	test_constant<int8_t, custom_xor_constants<int8_t> >(data8,SIZE,"int8_t xor constants");
130 | 	
131 | 	summarize("int8_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
132 | 	
133 | 	
134 | 	iterations = base_iterations / 10;
135 | 	test_constant<int8_t, custom_constant_add<int8_t> >(data8,SIZE,"int8_t constant add");
136 | 	test_constant<int8_t, custom_multiple_constant_add<int8_t> >(data8,SIZE,"int8_t multiple constant adds");
137 | 
138 | 	test_constant<int8_t, custom_constant_sub<int8_t> >(data8,SIZE,"int8_t constant subtract");
139 | 	test_constant<int8_t, custom_multiple_constant_sub<int8_t> >(data8,SIZE,"int8_t multiple constant subtracts");
140 | 
141 | 	test_constant<int8_t, custom_constant_multiply<int8_t> >(data8,SIZE,"int8_t constant multiply");
142 | 	test_constant<int8_t, custom_multiple_constant_multiply<int8_t> >(data8,SIZE,"int8_t multiple constant multiplies");
143 | 	test_constant<int8_t, custom_multiple_constant_multiply2<int8_t> >(data8,SIZE,"int8_t multiple constant multiply2");
144 | 
145 | 	test_constant<int8_t, custom_constant_divide<int8_t> >(data8,SIZE,"int8_t constant divide");
146 | 	test_constant<int8_t, custom_multiple_constant_divide<int8_t> >(data8,SIZE,"int8_t multiple constant divides");
147 | 	test_constant<int8_t, custom_multiple_constant_divide2<int8_t> >(data8,SIZE,"int8_t multiple constant divide2");
148 | 	
149 | 	test_constant<int8_t, custom_multiple_constant_mixed<int8_t> >(data8,SIZE,"int8_t multiple constant mixed");
150 | 
151 | 	test_constant<int8_t, custom_constant_and<int8_t> >(data8,SIZE,"int8_t constant and");
152 | 	test_constant<int8_t, custom_multiple_constant_and<int8_t> >(data8,SIZE,"int8_t multiple constant and");
153 | 
154 | 	test_constant<int8_t, custom_constant_or<int8_t> >(data8,SIZE,"int8_t constant or");
155 | 	test_constant<int8_t, custom_multiple_constant_or<int8_t> >(data8,SIZE,"int8_t multiple constant or");
156 | 
157 | 	test_constant<int8_t, custom_constant_xor<int8_t> >(data8,SIZE,"int8_t constant xor");
158 | 	test_constant<int8_t, custom_multiple_constant_xor<int8_t> >(data8,SIZE,"int8_t multiple constant xor");
159 | 
160 | 	summarize("int8_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
161 | 
162 | 
163 | // unsigned8
164 | 	::fill(data8unsigned, data8unsigned+SIZE, uint8_t(init_value));
165 | 	
166 | 	iterations = base_iterations;
167 | 	test_constant<uint8_t, custom_two<uint8_t> >(data8unsigned,SIZE,"uint8_t constant");
168 | 	test_constant<uint8_t, custom_add_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t add constants");
169 | 	test_constant<uint8_t, custom_sub_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t subtract constants");
170 | 	test_constant<uint8_t, custom_multiply_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t multiply constants");
171 | 	test_constant<uint8_t, custom_divide_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t divide constants");
172 | 	test_constant<uint8_t, custom_mod_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t mod constants");
173 | 	test_constant<uint8_t, custom_equal_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t equal constants");
174 | 	test_constant<uint8_t, custom_notequal_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t notequal constants");
175 | 	test_constant<uint8_t, custom_greaterthan_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t greater than constants");
176 | 	test_constant<uint8_t, custom_lessthan_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t less than constants");
177 | 	test_constant<uint8_t, custom_greaterthanequal_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t greater than equal constants");
178 | 	test_constant<uint8_t, custom_lessthanequal_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t less than equal constants");
179 | 	test_constant<uint8_t, custom_and_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t and constants");
180 | 	test_constant<uint8_t, custom_or_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t or constants");
181 | 	test_constant<uint8_t, custom_xor_constants<uint8_t> >(data8unsigned,SIZE,"uint8_t xor constants");
182 | 	
183 | 	summarize("uint8_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
184 | 	
185 | 	
186 | 	iterations = base_iterations / 10;
187 | 	test_constant<uint8_t, custom_constant_add<uint8_t> >(data8unsigned,SIZE,"uint8_t constant add");
188 | 	test_constant<uint8_t, custom_multiple_constant_add<uint8_t> >(data8unsigned,SIZE,"uint8_t multiple constant adds");
189 | 
190 | 	test_constant<uint8_t, custom_constant_sub<uint8_t> >(data8unsigned,SIZE,"uint8_t constant subtract");
191 | 	test_constant<uint8_t, custom_multiple_constant_sub<uint8_t> >(data8unsigned,SIZE,"uint8_t multiple constant subtracts");
192 | 
193 | 	test_constant<uint8_t, custom_constant_multiply<uint8_t> >(data8unsigned,SIZE,"uint8_t constant multiply");
194 | 	test_constant<uint8_t, custom_multiple_constant_multiply<uint8_t> >(data8unsigned,SIZE,"uint8_t multiple constant multiplies");
195 | 	test_constant<uint8_t, custom_multiple_constant_multiply2<uint8_t> >(data8unsigned,SIZE,"uint8_t multiple constant multiply2");
196 | 
197 | 	test_constant<uint8_t, custom_constant_divide<uint8_t> >(data8unsigned,SIZE,"uint8_t constant divide");
198 | 	test_constant<uint8_t, custom_multiple_constant_divide<uint8_t> >(data8unsigned,SIZE,"uint8_t multiple constant divides");
199 | 	test_constant<uint8_t, custom_multiple_constant_divide2<uint8_t> >(data8unsigned,SIZE,"uint8_t multiple constant divide2");
200 | 	
201 | 	test_constant<uint8_t, custom_multiple_constant_mixed<uint8_t> >(data8unsigned,SIZE,"uint8_t multiple constant mixed");
202 | 
203 | 	test_constant<uint8_t, custom_constant_and<uint8_t> >(data8unsigned,SIZE,"uint8_t constant and");
204 | 	test_constant<uint8_t, custom_multiple_constant_and<uint8_t> >(data8unsigned,SIZE,"uint8_t multiple constant and");
205 | 
206 | 	test_constant<uint8_t, custom_constant_or<uint8_t> >(data8unsigned,SIZE,"uint8_t constant or");
207 | 	test_constant<uint8_t, custom_multiple_constant_or<uint8_t> >(data8unsigned,SIZE,"uint8_t multiple constant or");
208 | 
209 | 	test_constant<uint8_t, custom_constant_xor<uint8_t> >(data8unsigned,SIZE,"uint8_t constant xor");
210 | 	test_constant<uint8_t, custom_multiple_constant_xor<uint8_t> >(data8unsigned,SIZE,"uint8_t multiple constant xor");
211 | 
212 | 	summarize("uint8_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
213 | 
214 | 
215 | // int16_t
216 | 	::fill(data16, data16+SIZE, int16_t(init_value));
217 | 	
218 | 	iterations = base_iterations;
219 | 	test_constant<int16_t, custom_two<int16_t> >(data16,SIZE,"int16_t constant");
220 | 	test_constant<int16_t, custom_add_constants<int16_t> >(data16,SIZE,"int16_t add constants");
221 | 	test_constant<int16_t, custom_sub_constants<int16_t> >(data16,SIZE,"int16_t subtract constants");
222 | 	test_constant<int16_t, custom_multiply_constants<int16_t> >(data16,SIZE,"int16_t multiply constants");
223 | 	test_constant<int16_t, custom_divide_constants<int16_t> >(data16,SIZE,"int16_t divide constants");
224 | 	test_constant<int16_t, custom_mod_constants<int16_t> >(data16,SIZE,"int16_t mod constants");
225 | 	test_constant<int16_t, custom_equal_constants<int16_t> >(data16,SIZE,"int16_t equal constants");
226 | 	test_constant<int16_t, custom_notequal_constants<int16_t> >(data16,SIZE,"int16_t notequal constants");
227 | 	test_constant<int16_t, custom_greaterthan_constants<int16_t> >(data16,SIZE,"int16_t greater than constants");
228 | 	test_constant<int16_t, custom_lessthan_constants<int16_t> >(data16,SIZE,"int16_t less than constants");
229 | 	test_constant<int16_t, custom_greaterthanequal_constants<int16_t> >(data16,SIZE,"int16_t greater than equal constants");
230 | 	test_constant<int16_t, custom_lessthanequal_constants<int16_t> >(data16,SIZE,"int16_t less than equal constants");
231 | 	test_constant<int16_t, custom_and_constants<int16_t> >(data16,SIZE,"int16_t and constants");
232 | 	test_constant<int16_t, custom_or_constants<int16_t> >(data16,SIZE,"int16_t or constants");
233 | 	test_constant<int16_t, custom_xor_constants<int16_t> >(data16,SIZE,"int16_t xor constants");
234 | 	
235 | 	summarize("int16_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
236 | 	
237 | 	
238 | 	iterations = base_iterations / 10;
239 | 	test_constant<int16_t, custom_constant_add<int16_t> >(data16,SIZE,"int16_t constant add");
240 | 	test_constant<int16_t, custom_multiple_constant_add<int16_t> >(data16,SIZE,"int16_t multiple constant adds");
241 | 
242 | 	test_constant<int16_t, custom_constant_sub<int16_t> >(data16,SIZE,"int16_t constant subtract");
243 | 	test_constant<int16_t, custom_multiple_constant_sub<int16_t> >(data16,SIZE,"int16_t multiple constant subtracts");
244 | 
245 | 	test_constant<int16_t, custom_constant_multiply<int16_t> >(data16,SIZE,"int16_t constant multiply");
246 | 	test_constant<int16_t, custom_multiple_constant_multiply<int16_t> >(data16,SIZE,"int16_t multiple constant multiplies");
247 | 	test_constant<int16_t, custom_multiple_constant_multiply2<int16_t> >(data16,SIZE,"int16_t multiple constant multiply2");
248 | 
249 | 	test_constant<int16_t, custom_constant_divide<int16_t> >(data16,SIZE,"int16_t constant divide");
250 | 	test_constant<int16_t, custom_multiple_constant_divide<int16_t> >(data16,SIZE,"int16_t multiple constant divides");
251 | 	test_constant<int16_t, custom_multiple_constant_divide2<int16_t> >(data16,SIZE,"int16_t multiple constant divide2");
252 | 	
253 | 	test_constant<int16_t, custom_multiple_constant_mixed<int16_t> >(data16,SIZE,"int16_t multiple constant mixed");
254 | 
255 | 	test_constant<int16_t, custom_constant_and<int16_t> >(data16,SIZE,"int16_t constant and");
256 | 	test_constant<int16_t, custom_multiple_constant_and<int16_t> >(data16,SIZE,"int16_t multiple constant and");
257 | 
258 | 	test_constant<int16_t, custom_constant_or<int16_t> >(data16,SIZE,"int16_t constant or");
259 | 	test_constant<int16_t, custom_multiple_constant_or<int16_t> >(data16,SIZE,"int16_t multiple constant or");
260 | 
261 | 	test_constant<int16_t, custom_constant_xor<int16_t> >(data16,SIZE,"int16_t constant xor");
262 | 	test_constant<int16_t, custom_multiple_constant_xor<int16_t> >(data16,SIZE,"int16_t multiple constant xor");
263 | 
264 | 	summarize("int16_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
265 | 
266 | 
267 | // unsigned16
268 | 	::fill(data16unsigned, data16unsigned+SIZE, uint16_t(init_value));
269 | 	
270 | 	iterations = base_iterations;
271 | 	test_constant<uint16_t, custom_two<uint16_t> >(data16unsigned,SIZE,"uint16_t constant");
272 | 	test_constant<uint16_t, custom_add_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t add constants");
273 | 	test_constant<uint16_t, custom_sub_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t subtract constants");
274 | 	test_constant<uint16_t, custom_multiply_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t multiply constants");
275 | 	test_constant<uint16_t, custom_divide_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t divide constants");
276 | 	test_constant<uint16_t, custom_mod_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t mod constants");
277 | 	test_constant<uint16_t, custom_equal_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t equal constants");
278 | 	test_constant<uint16_t, custom_notequal_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t notequal constants");
279 | 	test_constant<uint16_t, custom_greaterthan_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t greater than constants");
280 | 	test_constant<uint16_t, custom_lessthan_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t less than constants");
281 | 	test_constant<uint16_t, custom_greaterthanequal_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t greater than equal constants");
282 | 	test_constant<uint16_t, custom_lessthanequal_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t less than equal constants");
283 | 	test_constant<uint16_t, custom_and_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t and constants");
284 | 	test_constant<uint16_t, custom_or_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t or constants");
285 | 	test_constant<uint16_t, custom_xor_constants<uint16_t> >(data16unsigned,SIZE,"uint16_t xor constants");
286 | 	
287 | 	summarize("uint16_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
288 | 	
289 | 	
290 | 	iterations = base_iterations / 10;
291 | 	test_constant<uint16_t, custom_constant_add<uint16_t> >(data16unsigned,SIZE,"uint16_t constant add");
292 | 	test_constant<uint16_t, custom_multiple_constant_add<uint16_t> >(data16unsigned,SIZE,"uint16_t multiple constant adds");
293 | 
294 | 	test_constant<uint16_t, custom_constant_sub<uint16_t> >(data16unsigned,SIZE,"uint16_t constant subtract");
295 | 	test_constant<uint16_t, custom_multiple_constant_sub<uint16_t> >(data16unsigned,SIZE,"uint16_t multiple constant subtracts");
296 | 
297 | 	test_constant<uint16_t, custom_constant_multiply<uint16_t> >(data16unsigned,SIZE,"uint16_t constant multiply");
298 | 	test_constant<uint16_t, custom_multiple_constant_multiply<uint16_t> >(data16unsigned,SIZE,"uint16_t multiple constant multiplies");
299 | 	test_constant<uint16_t, custom_multiple_constant_multiply2<uint16_t> >(data16unsigned,SIZE,"uint16_t multiple constant multiply2");
300 | 
301 | 	test_constant<uint16_t, custom_constant_divide<uint16_t> >(data16unsigned,SIZE,"uint16_t constant divide");
302 | 	test_constant<uint16_t, custom_multiple_constant_divide<uint16_t> >(data16unsigned,SIZE,"uint16_t multiple constant divides");
303 | 	test_constant<uint16_t, custom_multiple_constant_divide2<uint16_t> >(data16unsigned,SIZE,"uint16_t multiple constant divide2");
304 | 	
305 | 	test_constant<uint16_t, custom_multiple_constant_mixed<uint16_t> >(data16unsigned,SIZE,"uint16_t multiple constant mixed");
306 | 
307 | 	test_constant<uint16_t, custom_constant_and<uint16_t> >(data16unsigned,SIZE,"uint16_t constant and");
308 | 	test_constant<uint16_t, custom_multiple_constant_and<uint16_t> >(data16unsigned,SIZE,"uint16_t multiple constant and");
309 | 
310 | 	test_constant<uint16_t, custom_constant_or<uint16_t> >(data16unsigned,SIZE,"uint16_t constant or");
311 | 	test_constant<uint16_t, custom_multiple_constant_or<uint16_t> >(data16unsigned,SIZE,"uint16_t multiple constant or");
312 | 
313 | 	test_constant<uint16_t, custom_constant_xor<uint16_t> >(data16unsigned,SIZE,"uint16_t constant xor");
314 | 	test_constant<uint16_t, custom_multiple_constant_xor<uint16_t> >(data16unsigned,SIZE,"uint16_t multiple constant xor");
315 | 
316 | 	summarize("uint16_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
317 | 
318 | 
319 | 
320 | // int32_t
321 | 	::fill(data32, data32+SIZE, int32_t(init_value));
322 | 
323 | 	iterations = base_iterations;
324 | 	test_constant<int32_t, custom_two<int32_t> >(data32,SIZE,"int32_t constant");
325 | 	test_constant<int32_t, custom_add_constants<int32_t> >(data32,SIZE,"int32_t add constants");
326 | 	test_constant<int32_t, custom_sub_constants<int32_t> >(data32,SIZE,"int32_t subtract constants");
327 | 	test_constant<int32_t, custom_multiply_constants<int32_t> >(data32,SIZE,"int32_t multiply constants");
328 | 	test_constant<int32_t, custom_divide_constants<int32_t> >(data32,SIZE,"int32_t divide constants");
329 | 	test_constant<int32_t, custom_mod_constants<int32_t> >(data32,SIZE,"int32_t mod constants");
330 | 	test_constant<int32_t, custom_equal_constants<int32_t> >(data32,SIZE,"int32_t equal constants");
331 | 	test_constant<int32_t, custom_notequal_constants<int32_t> >(data32,SIZE,"int32_t notequal constants");
332 | 	test_constant<int32_t, custom_greaterthan_constants<int32_t> >(data32,SIZE,"int32_t greater than constants");
333 | 	test_constant<int32_t, custom_lessthan_constants<int32_t> >(data32,SIZE,"int32_t less than constants");
334 | 	test_constant<int32_t, custom_greaterthanequal_constants<int32_t> >(data32,SIZE,"int32_t greater than equal constants");
335 | 	test_constant<int32_t, custom_lessthanequal_constants<int32_t> >(data32,SIZE,"int32_t less than equal constants");
336 | 	test_constant<int32_t, custom_and_constants<int32_t> >(data32,SIZE,"int32_t and constants");
337 | 	test_constant<int32_t, custom_or_constants<int32_t> >(data32,SIZE,"int32_t or constants");
338 | 	test_constant<int32_t, custom_xor_constants<int32_t> >(data32,SIZE,"int32_t xor constants");
339 | 	
340 | 	summarize("int32_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
341 | 	
342 | 	
343 | 	iterations = base_iterations / 10;
344 | 	test_constant<int32_t, custom_constant_add<int32_t> >(data32,SIZE,"int32_t constant add");
345 | 	test_constant<int32_t, custom_multiple_constant_add<int32_t> >(data32,SIZE,"int32_t multiple constant adds");
346 | 
347 | 	test_constant<int32_t, custom_constant_sub<int32_t> >(data32,SIZE,"int32_t constant subtract");
348 | 	test_constant<int32_t, custom_multiple_constant_sub<int32_t> >(data32,SIZE,"int32_t multiple constant subtracts");
349 | 
350 | 	test_constant<int32_t, custom_constant_multiply<int32_t> >(data32,SIZE,"int32_t constant multiply");
351 | 	test_constant<int32_t, custom_multiple_constant_multiply<int32_t> >(data32,SIZE,"int32_t multiple constant multiplies");
352 | 	test_constant<int32_t, custom_multiple_constant_multiply2<int32_t> >(data32,SIZE,"int32_t multiple constant multiply2");
353 | 
354 | 	test_constant<int32_t, custom_constant_divide<int32_t> >(data32,SIZE,"int32_t constant divide");
355 | 	test_constant<int32_t, custom_multiple_constant_divide<int32_t> >(data32,SIZE,"int32_t multiple constant divides");
356 | 	test_constant<int32_t, custom_multiple_constant_divide2<int32_t> >(data32,SIZE,"int32_t multiple constant divide2");
357 | 	
358 | 	test_constant<int32_t, custom_multiple_constant_mixed<int32_t> >(data32,SIZE,"int32_t multiple constant mixed");
359 | 
360 | 	test_constant<int32_t, custom_constant_and<int32_t> >(data32,SIZE,"int32_t constant and");
361 | 	test_constant<int32_t, custom_multiple_constant_and<int32_t> >(data32,SIZE,"int32_t multiple constant and");
362 | 
363 | 	test_constant<int32_t, custom_constant_or<int32_t> >(data32,SIZE,"int32_t constant or");
364 | 	test_constant<int32_t, custom_multiple_constant_or<int32_t> >(data32,SIZE,"int32_t multiple constant or");
365 | 
366 | 	test_constant<int32_t, custom_constant_xor<int32_t> >(data32,SIZE,"int32_t constant xor");
367 | 	test_constant<int32_t, custom_multiple_constant_xor<int32_t> >(data32,SIZE,"int32_t multiple constant xor");
368 | 
369 | 	summarize("int32_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
370 | 
371 | 
372 | 
373 | // unsigned32
374 | 	::fill(data32unsigned, data32unsigned+SIZE, uint32_t(init_value));
375 | 
376 | 	iterations = base_iterations;
377 | 	test_constant<uint32_t, custom_two<uint32_t> >(data32unsigned,SIZE,"uint32_t constant");
378 | 	test_constant<uint32_t, custom_add_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t add constants");
379 | 	test_constant<uint32_t, custom_sub_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t subtract constants");
380 | 	test_constant<uint32_t, custom_multiply_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t multiply constants");
381 | 	test_constant<uint32_t, custom_divide_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t divide constants");
382 | 	test_constant<uint32_t, custom_mod_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t mod constants");
383 | 	test_constant<uint32_t, custom_equal_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t equal constants");
384 | 	test_constant<uint32_t, custom_notequal_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t notequal constants");
385 | 	test_constant<uint32_t, custom_greaterthan_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t greater than constants");
386 | 	test_constant<uint32_t, custom_lessthan_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t less than constants");
387 | 	test_constant<uint32_t, custom_greaterthanequal_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t greater than equal constants");
388 | 	test_constant<uint32_t, custom_lessthanequal_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t less than equal constants");
389 | 	test_constant<uint32_t, custom_and_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t and constants");
390 | 	test_constant<uint32_t, custom_or_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t or constants");
391 | 	test_constant<uint32_t, custom_xor_constants<uint32_t> >(data32unsigned,SIZE,"uint32_t xor constants");
392 | 	
393 | 	summarize("uint32_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
394 | 	
395 | 	
396 | 	iterations = base_iterations / 10;
397 | 	test_constant<uint32_t, custom_constant_add<uint32_t> >(data32unsigned,SIZE,"uint32_t constant add");
398 | 	test_constant<uint32_t, custom_multiple_constant_add<uint32_t> >(data32unsigned,SIZE,"uint32_t multiple constant adds");
399 | 
400 | 	test_constant<uint32_t, custom_constant_sub<uint32_t> >(data32unsigned,SIZE,"uint32_t constant subtract");
401 | 	test_constant<uint32_t, custom_multiple_constant_sub<uint32_t> >(data32unsigned,SIZE,"uint32_t multiple constant subtracts");
402 | 
403 | 	test_constant<uint32_t, custom_constant_multiply<uint32_t> >(data32unsigned,SIZE,"uint32_t constant multiply");
404 | 	test_constant<uint32_t, custom_multiple_constant_multiply<uint32_t> >(data32unsigned,SIZE,"uint32_t multiple constant multiplies");
405 | 	test_constant<uint32_t, custom_multiple_constant_multiply2<uint32_t> >(data32unsigned,SIZE,"uint32_t multiple constant multiply2");
406 | 
407 | 	test_constant<uint32_t, custom_constant_divide<uint32_t> >(data32unsigned,SIZE,"uint32_t constant divide");
408 | 	test_constant<uint32_t, custom_multiple_constant_divide<uint32_t> >(data32unsigned,SIZE,"uint32_t multiple constant divides");
409 | 	test_constant<uint32_t, custom_multiple_constant_divide2<uint32_t> >(data32unsigned,SIZE,"uint32_t multiple constant divide2");
410 | 	
411 | 	test_constant<uint32_t, custom_multiple_constant_mixed<uint32_t> >(data32unsigned,SIZE,"uint32_t multiple constant mixed");
412 | 
413 | 	test_constant<uint32_t, custom_constant_and<uint32_t> >(data32unsigned,SIZE,"uint32_t constant and");
414 | 	test_constant<uint32_t, custom_multiple_constant_and<uint32_t> >(data32unsigned,SIZE,"uint32_t multiple constant and");
415 | 
416 | 	test_constant<uint32_t, custom_constant_or<uint32_t> >(data32unsigned,SIZE,"uint32_t constant or");
417 | 	test_constant<uint32_t, custom_multiple_constant_or<uint32_t> >(data32unsigned,SIZE,"uint32_t multiple constant or");
418 | 
419 | 	test_constant<uint32_t, custom_constant_xor<uint32_t> >(data32unsigned,SIZE,"uint32_t constant xor");
420 | 	test_constant<uint32_t, custom_multiple_constant_xor<uint32_t> >(data32unsigned,SIZE,"uint32_t multiple constant xor");
421 | 
422 | 	summarize("uint32_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
423 | 
424 | 	
425 | 
426 | // int64_t
427 | 	::fill(data64, data64+SIZE, int64_t(init_value));
428 | 
429 | 	iterations = base_iterations;
430 | 	test_constant<int64_t, custom_two<int64_t> >(data64,SIZE,"int64_t constant");
431 | 	test_constant<int64_t, custom_add_constants<int64_t> >(data64,SIZE,"int64_t add constants");
432 | 	test_constant<int64_t, custom_sub_constants<int64_t> >(data64,SIZE,"int64_t subtract constants");
433 | 	test_constant<int64_t, custom_multiply_constants<int64_t> >(data64,SIZE,"int64_t multiply constants");
434 | 	test_constant<int64_t, custom_divide_constants<int64_t> >(data64,SIZE,"int64_t divide constants");
435 | 	test_constant<int64_t, custom_mod_constants<int64_t> >(data64,SIZE,"int64_t mod constants");
436 | 	test_constant<int64_t, custom_equal_constants<int64_t> >(data64,SIZE,"int64_t equal constants");
437 | 	test_constant<int64_t, custom_notequal_constants<int64_t> >(data64,SIZE,"int64_t notequal constants");
438 | 	test_constant<int64_t, custom_greaterthan_constants<int64_t> >(data64,SIZE,"int64_t greater than constants");
439 | 	test_constant<int64_t, custom_lessthan_constants<int64_t> >(data64,SIZE,"int64_t less than constants");
440 | 	test_constant<int64_t, custom_greaterthanequal_constants<int64_t> >(data64,SIZE,"int64_t greater than equal constants");
441 | 	test_constant<int64_t, custom_lessthanequal_constants<int64_t> >(data64,SIZE,"int64_t less than equal constants");
442 | 	test_constant<int64_t, custom_and_constants<int64_t> >(data64,SIZE,"int64_t and constants");
443 | 	test_constant<int64_t, custom_or_constants<int64_t> >(data64,SIZE,"int64_t or constants");
444 | 	test_constant<int64_t, custom_xor_constants<int64_t> >(data64,SIZE,"int64_t xor constants");
445 | 	
446 | 	summarize("int64_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
447 | 	
448 | 	
449 | 	iterations = base_iterations / 10;
450 | 	test_constant<int64_t, custom_constant_add<int64_t> >(data64,SIZE,"int64_t constant add");
451 | 	test_constant<int64_t, custom_multiple_constant_add<int64_t> >(data64,SIZE,"int64_t multiple constant adds");
452 | 
453 | 	test_constant<int64_t, custom_constant_sub<int64_t> >(data64,SIZE,"int64_t constant subtract");
454 | 	test_constant<int64_t, custom_multiple_constant_sub<int64_t> >(data64,SIZE,"int64_t multiple constant subtracts");
455 | 
456 | 	test_constant<int64_t, custom_constant_multiply<int64_t> >(data64,SIZE,"int64_t constant multiply");
457 | 	test_constant<int64_t, custom_multiple_constant_multiply<int64_t> >(data64,SIZE,"int64_t multiple constant multiplies");
458 | 	test_constant<int64_t, custom_multiple_constant_multiply2<int64_t> >(data64,SIZE,"int64_t multiple constant multiply2");
459 | 
460 | 	test_constant<int64_t, custom_constant_divide<int64_t> >(data64,SIZE,"int64_t constant divide");
461 | 	test_constant<int64_t, custom_multiple_constant_divide<int64_t> >(data64,SIZE,"int64_t multiple constant divides");
462 | 	test_constant<int64_t, custom_multiple_constant_divide2<int64_t> >(data64,SIZE,"int64_t multiple constant divide2");
463 | 	
464 | 	test_constant<int64_t, custom_multiple_constant_mixed<int64_t> >(data64,SIZE,"int64_t multiple constant mixed");
465 | 
466 | 	test_constant<int64_t, custom_constant_and<int64_t> >(data64,SIZE,"int64_t constant and");
467 | 	test_constant<int64_t, custom_multiple_constant_and<int64_t> >(data64,SIZE,"int64_t multiple constant and");
468 | 
469 | 	test_constant<int64_t, custom_constant_or<int64_t> >(data64,SIZE,"int64_t constant or");
470 | 	test_constant<int64_t, custom_multiple_constant_or<int64_t> >(data64,SIZE,"int64_t multiple constant or");
471 | 
472 | 	test_constant<int64_t, custom_constant_xor<int64_t> >(data64,SIZE,"int64_t constant xor");
473 | 	test_constant<int64_t, custom_multiple_constant_xor<int64_t> >(data64,SIZE,"int64_t multiple constant xor");
474 | 
475 | 	summarize("int64_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
476 | 
477 | 
478 | // unsigned64
479 | 	::fill(data64unsigned, data64unsigned+SIZE, uint64_t(init_value));
480 | 	
481 | 	iterations = base_iterations;
482 | 	test_constant<uint64_t, custom_two<uint64_t> >(data64unsigned,SIZE,"uint64_t constant");
483 | 	test_constant<uint64_t, custom_add_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t add constants");
484 | 	test_constant<uint64_t, custom_sub_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t subtract constants");
485 | 	test_constant<uint64_t, custom_multiply_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t multiply constants");
486 | 	test_constant<uint64_t, custom_divide_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t divide constants");
487 | 	test_constant<uint64_t, custom_mod_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t mod constants");
488 | 	test_constant<uint64_t, custom_equal_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t equal constants");
489 | 	test_constant<uint64_t, custom_notequal_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t notequal constants");
490 | 	test_constant<uint64_t, custom_greaterthan_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t greater than constants");
491 | 	test_constant<uint64_t, custom_lessthan_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t less than constants");
492 | 	test_constant<uint64_t, custom_greaterthanequal_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t greater than equal constants");
493 | 	test_constant<uint64_t, custom_lessthanequal_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t less than equal constants");
494 | 	test_constant<uint64_t, custom_and_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t and constants");
495 | 	test_constant<uint64_t, custom_or_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t or constants");
496 | 	test_constant<uint64_t, custom_xor_constants<uint64_t> >(data64unsigned,SIZE,"uint64_t xor constants");
497 | 	
498 | 	summarize("uint64_t simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
499 | 	
500 | 	
501 | 	iterations = base_iterations / 10;
502 | 	test_constant<uint64_t, custom_constant_add<uint64_t> >(data64unsigned,SIZE,"uint64_t constant add");
503 | 	test_constant<uint64_t, custom_multiple_constant_add<uint64_t> >(data64unsigned,SIZE,"uint64_t multiple constant adds");
504 | 
505 | 	test_constant<uint64_t, custom_constant_sub<uint64_t> >(data64unsigned,SIZE,"uint64_t constant subtract");
506 | 	test_constant<uint64_t, custom_multiple_constant_sub<uint64_t> >(data64unsigned,SIZE,"uint64_t multiple constant subtracts");
507 | 
508 | 	test_constant<uint64_t, custom_constant_multiply<uint64_t> >(data64unsigned,SIZE,"uint64_t constant multiply");
509 | 	test_constant<uint64_t, custom_multiple_constant_multiply<uint64_t> >(data64unsigned,SIZE,"uint64_t multiple constant multiplies");
510 | 	test_constant<uint64_t, custom_multiple_constant_multiply2<uint64_t> >(data64unsigned,SIZE,"uint64_t multiple constant multiply2");
511 | 
512 | 	test_constant<uint64_t, custom_constant_divide<uint64_t> >(data64unsigned,SIZE,"uint64_t constant divide");
513 | 	test_constant<uint64_t, custom_multiple_constant_divide<uint64_t> >(data64unsigned,SIZE,"uint64_t multiple constant divides");
514 | 	test_constant<uint64_t, custom_multiple_constant_divide2<uint64_t> >(data64unsigned,SIZE,"uint64_t multiple constant divide2");
515 | 	
516 | 	test_constant<uint64_t, custom_multiple_constant_mixed<uint64_t> >(data64unsigned,SIZE,"uint64_t multiple constant mixed");
517 | 
518 | 	test_constant<uint64_t, custom_constant_and<uint64_t> >(data64unsigned,SIZE,"uint64_t constant and");
519 | 	test_constant<uint64_t, custom_multiple_constant_and<uint64_t> >(data64unsigned,SIZE,"uint64_t multiple constant and");
520 | 
521 | 	test_constant<uint64_t, custom_constant_or<uint64_t> >(data64unsigned,SIZE,"uint64_t constant or");
522 | 	test_constant<uint64_t, custom_multiple_constant_or<uint64_t> >(data64unsigned,SIZE,"uint64_t multiple constant or");
523 | 
524 | 	test_constant<uint64_t, custom_constant_xor<uint64_t> >(data64unsigned,SIZE,"uint64_t constant xor");
525 | 	test_constant<uint64_t, custom_multiple_constant_xor<uint64_t> >(data64unsigned,SIZE,"uint64_t multiple constant xor");
526 | 
527 | 	summarize("uint64_t constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
528 | 
529 | 
530 | 
531 | // float
532 | 	::fill(dataFloat, dataFloat+SIZE, float(init_value));
533 | 
534 | 	iterations = base_iterations;
535 | 	test_constant<float, custom_two<float> >(dataFloat,SIZE,"float constant");
536 | 	test_constant<float, custom_add_constants<float> >(dataFloat,SIZE,"float add constants");
537 | 	test_constant<float, custom_sub_constants<float> >(dataFloat,SIZE,"float subtract constants");
538 | 	test_constant<float, custom_multiply_constants<float> >(dataFloat,SIZE,"float multiply constants");
539 | 	test_constant<float, custom_divide_constants<float> >(dataFloat,SIZE,"float divide constants");
540 | 	
541 | 	summarize("float simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
542 | 	
543 | 	
544 | 	iterations = base_iterations / 10;
545 | 	test_constant<float, custom_constant_add<float> >(dataFloat,SIZE,"float constant add");
546 | 	test_constant<float, custom_multiple_constant_add<float> >(dataFloat,SIZE,"float multiple constant adds");
547 | 
548 | 	test_constant<float, custom_constant_sub<float> >(dataFloat,SIZE,"float constant subtract");
549 | 	test_constant<float, custom_multiple_constant_sub<float> >(dataFloat,SIZE,"float multiple constant subtracts");
550 | 
551 | 	test_constant<float, custom_constant_multiply<float> >(dataFloat,SIZE,"float constant multiply");
552 | 	test_constant<float, custom_multiple_constant_multiply<float> >(dataFloat,SIZE,"float multiple constant multiplies");
553 | 	test_constant<float, custom_multiple_constant_multiply2<float> >(dataFloat,SIZE,"float multiple constant multiply2");
554 | 
555 | 	test_constant<float, custom_constant_divide<float> >(dataFloat,SIZE,"float constant divide");
556 | 	test_constant<float, custom_multiple_constant_divide<float> >(dataFloat,SIZE,"float multiple constant divides");
557 | 	test_constant<float, custom_multiple_constant_divide2<float> >(dataFloat,SIZE,"float multiple constant divide2");
558 | 	
559 | 	test_constant<float, custom_multiple_constant_mixed<float> >(dataFloat,SIZE,"float multiple constant mixed");
560 | 
561 | 	summarize("float constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
562 | 
563 | 
564 | // double
565 | 	::fill(dataDouble, dataDouble+SIZE, double(init_value));
566 | 
567 | 	iterations = base_iterations;
568 | 	test_constant<double, custom_two<double> >(dataDouble,SIZE,"double constant");
569 | 	test_constant<double, custom_add_constants<double> >(dataDouble,SIZE,"double add constants");
570 | 	test_constant<double, custom_sub_constants<double> >(dataDouble,SIZE,"double subtract constants");
571 | 	test_constant<double, custom_multiply_constants<double> >(dataDouble,SIZE,"double multiply constants");
572 | 	test_constant<double, custom_divide_constants<double> >(dataDouble,SIZE,"double divide constants");
573 | 	
574 | 	summarize("double simple constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
575 | 
576 | 	
577 | 	iterations = base_iterations / 10;
578 | 	test_constant<double, custom_constant_add<double> >(dataDouble,SIZE,"double constant add");
579 | 	test_constant<double, custom_multiple_constant_add<double> >(dataDouble,SIZE,"double multiple constant adds");
580 | 
581 | 	test_constant<double, custom_constant_sub<double> >(dataDouble,SIZE,"double constant subtract");
582 | 	test_constant<double, custom_multiple_constant_sub<double> >(dataDouble,SIZE,"double multiple constant subtracts");
583 | 
584 | 	test_constant<double, custom_constant_multiply<double> >(dataDouble,SIZE,"double constant multiply");
585 | 	test_constant<double, custom_multiple_constant_multiply<double> >(dataDouble,SIZE,"double multiple constant multiplies");
586 | 	test_constant<double, custom_multiple_constant_multiply2<double> >(dataDouble,SIZE,"double multiple constant multiply2");
587 | 
588 | 	test_constant<double, custom_constant_divide<double> >(dataDouble,SIZE,"double constant divide");
589 | 	test_constant<double, custom_multiple_constant_divide<double> >(dataDouble,SIZE,"double multiple constant divides");
590 | 	test_constant<double, custom_multiple_constant_divide2<double> >(dataDouble,SIZE,"double multiple constant divide2");
591 | 	
592 | 	test_constant<double, custom_multiple_constant_mixed<double> >(dataDouble,SIZE,"double multiple constant mixed");
593 | 
594 | 	summarize("double constant folding", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
595 | 
596 | 	
597 | 	return 0;
598 | }
599 | 
600 | // the end
601 | /******************************************************************************/
602 | /******************************************************************************/
603 | 


--------------------------------------------------------------------------------
/src/simple_types_loop_invariant.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright 2007-2008 Adobe Systems Incorporated
  3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
  4 |     or a copy at http://stlab.adobe.com/licenses.html )
  5 | 
  6 | 
  7 | Goal:  Test compiler optimizations related to simple language defined types
  8 | 
  9 | Assumptions:
 10 | 
 11 | 	1) the compiler will move loop invariant calculations on simple types out of a loop
 12 | 		aka: loop invariant code motion
 13 | 		
 14 | 		for (i = 0; i < N; ++i)						temp = A + B + C + D;
 15 | 			result = input[i] + A+B+C+D;	==>		for (i = 0; i < N; ++i)
 16 | 														result = input[i] + temp;
 17 | 
 18 | */
 19 | 
 20 | /******************************************************************************/
 21 | 
 22 | #include "benchmark_stdint.hpp"
 23 | #include <cstddef>
 24 | #include <cstdio>
 25 | #include <ctime>
 26 | #include <cstdlib>
 27 | #include <cmath>
 28 | #include "benchmark_results.h"
 29 | #include "benchmark_timer.h"
 30 | 
 31 | /******************************************************************************/
 32 | 
 33 | // this constant may need to be adjusted to give reasonable minimum times
 34 | // For best results, times should be about 1.0 seconds for the minimum test run
 35 | int iterations = 200000;
 36 | 
 37 | 
 38 | // 8000 items, or between 8k and 64k of data
 39 | // this is intended to remain within the L2 cache of most common CPUs
 40 | const int SIZE = 8000;
 41 | 
 42 | 
 43 | // initial value for filling our arrays, may be changed from the command line
 44 | double init_value = 1.0;
 45 | 
 46 | /******************************************************************************/
 47 | 
 48 | // our global arrays of numbers to be operated upon
 49 | 
 50 | double dataDouble[SIZE];
 51 | float dataFloat[SIZE];
 52 | 
 53 | uint64_t data64unsigned[SIZE];
 54 | int64_t data64[SIZE];
 55 | 
 56 | uint32_t data32unsigned[SIZE];
 57 | int32_t data32[SIZE];
 58 | 
 59 | uint16_t data16unsigned[SIZE];
 60 | int16_t data16[SIZE];
 61 | 
 62 | uint8_t data8unsigned[SIZE];
 63 | int8_t data8[SIZE];
 64 | 
 65 | /******************************************************************************/
 66 | 
 67 | #include "benchmark_shared_tests.h"
 68 | 
 69 | /******************************************************************************/
 70 | 
 71 | // v1 is constant in the function, so we can move the addition or subtraction of it outside the loop entirely
 72 | // converting it to a multiply and a summation of the input array
 73 | // Note that this is always legal for integers
 74 | // it can only be applied to floating point if using inexact math (relaxed IEEE rules)
 75 | template <typename T, typename Shifter>
 76 | void test_hoisted_variable1(T* first, int count, T v1, const char *label) {
 77 |   int i;
 78 |   
 79 |   start_timer();
 80 |   
 81 |   for(i = 0; i < iterations; ++i) {
 82 |     T result = 0;
 83 |     for (int n = 0; n < count; ++n) {
 84 | 		result += first[n];
 85 | 	}
 86 | 	result += count * v1;
 87 |     check_shifted_variable_sum<T, Shifter>(result, v1);
 88 |   }
 89 |   
 90 |   record_result( timer(), label );
 91 | }
 92 | 
 93 | /******************************************************************************/
 94 | 
 95 | 
 96 | int main(int argc, char** argv) {
 97 | 	double temp = 1.0;
 98 | 	
 99 | 	// output command for documentation:
100 | 	int i;
101 | 	for (i = 0; i < argc; ++i)
102 | 		printf("%s ", argv[i] );
103 | 	printf("\n");
104 | 
105 | 	if (argc > 1) iterations = atoi(argv[1]);
106 | 	if (argc > 2) init_value = (double) atof(argv[2]);
107 | 	if (argc > 3) temp = (double)atof(argv[3]);
108 | 
109 | 
110 | // int8_t
111 | 	::fill(data8, data8+SIZE, int8_t(init_value));
112 | 	int8_t var1int8_1, var1int8_2, var1int8_3, var1int8_4;
113 | 	var1int8_1 = int8_t(temp);
114 | 	var1int8_2 = var1int8_1 * int8_t(2);
115 | 	var1int8_3 = var1int8_1 + int8_t(2);
116 | 	var1int8_4 = var1int8_1 + var1int8_2 / var1int8_3;
117 | 	
118 | 	// test moving redundant calcs out of loop
119 | 	test_variable1< int8_t, custom_add_variable<int8_t> > (data8, SIZE, var1int8_1, "int8_t variable add");
120 | 	test_hoisted_variable1< int8_t, custom_add_variable<int8_t> > (data8, SIZE, var1int8_1, "int8_t variable add hoisted");
121 | 	test_variable4< int8_t, custom_add_multiple_variable<int8_t> > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable adds");
122 | 
123 | 	test_variable1< int8_t, custom_sub_variable<int8_t> > (data8, SIZE, var1int8_1, "int8_t variable subtract");
124 | 	test_variable4< int8_t, custom_sub_multiple_variable<int8_t> > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable subtracts");
125 | 	
126 | 	test_variable1< int8_t, custom_multiply_variable<int8_t> > (data8, SIZE, var1int8_1, "int8_t variable multiply");
127 | 	test_variable4< int8_t, custom_multiply_multiple_variable<int8_t> > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable multiplies");
128 | 	test_variable4< int8_t, custom_multiply_multiple_variable2<int8_t> > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable multiplies2");
129 | 
130 | 	test_variable1< int8_t, custom_divide_variable<int8_t> > (data8, SIZE, var1int8_1, "int8_t variable divide");
131 | 	test_variable4< int8_t, custom_divide_multiple_variable<int8_t> > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable divides");
132 | 	test_variable4< int8_t, custom_divide_multiple_variable2<int8_t> > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable divides2");
133 | 	
134 | 	test_variable4< int8_t, custom_mixed_multiple_variable<int8_t> > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable mixed");
135 | 
136 | 	test_variable1< int8_t, custom_variable_and<int8_t> > (data8, SIZE, var1int8_1, "int8_t variable and");
137 | 	test_variable4< int8_t, custom_multiple_variable_and<int8_t> > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable and");
138 | 
139 | 	test_variable1< int8_t, custom_variable_or<int8_t> > (data8, SIZE, var1int8_1, "int8_t variable or");
140 | 	test_variable4< int8_t, custom_multiple_variable_or<int8_t> > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable or");
141 | 
142 | 	test_variable1< int8_t, custom_variable_xor<int8_t> > (data8, SIZE, var1int8_1, "int8_t variable xor");
143 | 	test_variable4< int8_t, custom_multiple_variable_xor<int8_t> > (data8, SIZE, var1int8_1, var1int8_2, var1int8_3, var1int8_4, "int8_t multiple variable xor");
144 | 	
145 | 	summarize("int8_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
146 | 
147 | 
148 | // unsigned8
149 | 	::fill(data8unsigned, data8unsigned+SIZE, uint8_t(init_value));
150 | 	uint8_t var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4;
151 | 	var1uint8_1 = uint8_t(temp);
152 | 	var1uint8_2 = var1uint8_1 * uint8_t(2);
153 | 	var1uint8_3 = var1uint8_1 + uint8_t(2);
154 | 	var1uint8_4 = var1uint8_1 + var1uint8_2 / var1uint8_3;
155 | 	
156 | 	// test moving redundant calcs out of loop
157 | 	test_variable1< uint8_t, custom_add_variable<uint8_t> > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable add");
158 | 	test_hoisted_variable1< uint8_t, custom_add_variable<uint8_t> > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable add hoisted");
159 | 	test_variable4< uint8_t, custom_add_multiple_variable<uint8_t> > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable adds");
160 | 
161 | 	test_variable1< uint8_t, custom_sub_variable<uint8_t> > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable subtract");
162 | 	test_variable4< uint8_t, custom_sub_multiple_variable<uint8_t> > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable subtracts");
163 | 	
164 | 	test_variable1< uint8_t, custom_multiply_variable<uint8_t> > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable multiply");
165 | 	test_variable4< uint8_t, custom_multiply_multiple_variable<uint8_t> > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable multiplies");
166 | 	test_variable4< uint8_t, custom_multiply_multiple_variable2<uint8_t> > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable multiplies2");
167 | 
168 | 	test_variable1< uint8_t, custom_divide_variable<uint8_t> > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable divide");
169 | 	test_variable4< uint8_t, custom_divide_multiple_variable<uint8_t> > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable divides");
170 | 	test_variable4< uint8_t, custom_divide_multiple_variable2<uint8_t> > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable divides2");
171 | 	
172 | 	test_variable4< uint8_t, custom_mixed_multiple_variable<uint8_t> > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable mixed");
173 | 
174 | 	test_variable1< uint8_t, custom_variable_and<uint8_t> > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable and");
175 | 	test_variable4< uint8_t, custom_multiple_variable_and<uint8_t> > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable and");
176 | 
177 | 	test_variable1< uint8_t, custom_variable_or<uint8_t> > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable or");
178 | 	test_variable4< uint8_t, custom_multiple_variable_or<uint8_t> > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable or");
179 | 
180 | 	test_variable1< uint8_t, custom_variable_xor<uint8_t> > (data8unsigned, SIZE, var1uint8_1, "uint8_t variable xor");
181 | 	test_variable4< uint8_t, custom_multiple_variable_xor<uint8_t> > (data8unsigned, SIZE, var1uint8_1, var1uint8_2, var1uint8_3, var1uint8_4, "uint8_t multiple variable xor");
182 | 	
183 | 	summarize("uint8_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
184 | 
185 | 
186 | // int16_t
187 | 	::fill(data16, data16+SIZE, int16_t(init_value));
188 | 	int16_t var1int16_1, var1int16_2, var1int16_3, var1int16_4;
189 | 	var1int16_1 = int16_t(temp);
190 | 	var1int16_2 = var1int16_1 * int16_t(2);
191 | 	var1int16_3 = var1int16_1 + int16_t(2);
192 | 	var1int16_4 = var1int16_1 + var1int16_2 / var1int16_3;
193 | 
194 | 	// test moving redundant calcs out of loop
195 | 	test_variable1< int16_t, custom_add_variable<int16_t> > (data16, SIZE, var1int16_1, "int16_t variable add");
196 | 	test_hoisted_variable1< int16_t, custom_add_variable<int16_t> > (data16, SIZE, var1int16_1, "int16_t variable add hoisted");
197 | 	test_variable4< int16_t, custom_add_multiple_variable<int16_t> > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable adds");
198 | 
199 | 	test_variable1< int16_t, custom_sub_variable<int16_t> > (data16, SIZE, var1int16_1, "int16_t variable subtract");
200 | 	test_variable4< int16_t, custom_sub_multiple_variable<int16_t> > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable subtracts");
201 | 	
202 | 	test_variable1< int16_t, custom_multiply_variable<int16_t> > (data16, SIZE, var1int16_1, "int16_t variable multiply");
203 | 	test_variable4< int16_t, custom_multiply_multiple_variable<int16_t> > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable multiplies");
204 | 	test_variable4< int16_t, custom_multiply_multiple_variable2<int16_t> > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable multiplies2");
205 | 
206 | 	test_variable1< int16_t, custom_divide_variable<int16_t> > (data16, SIZE, var1int16_1, "int16_t variable divide");
207 | 	test_variable4< int16_t, custom_divide_multiple_variable<int16_t> > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable divides");
208 | 	test_variable4< int16_t, custom_divide_multiple_variable2<int16_t> > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable divides2");
209 | 	
210 | 	test_variable4< int16_t, custom_mixed_multiple_variable<int16_t> > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable mixed");
211 | 
212 | 	test_variable1< int16_t, custom_variable_and<int16_t> > (data16, SIZE, var1int16_1, "int16_t variable and");
213 | 	test_variable4< int16_t, custom_multiple_variable_and<int16_t> > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable and");
214 | 
215 | 	test_variable1< int16_t, custom_variable_or<int16_t> > (data16, SIZE, var1int16_1, "int16_t variable or");
216 | 	test_variable4< int16_t, custom_multiple_variable_or<int16_t> > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable or");
217 | 
218 | 	test_variable1< int16_t, custom_variable_xor<int16_t> > (data16, SIZE, var1int16_1, "int16_t variable xor");
219 | 	test_variable4< int16_t, custom_multiple_variable_xor<int16_t> > (data16, SIZE, var1int16_1, var1int16_2, var1int16_3, var1int16_4, "int16_t multiple variable xor");
220 | 	
221 | 	summarize("int16_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
222 | 
223 | 
224 | // unsigned16
225 | 	::fill(data16unsigned, data16unsigned+SIZE, uint16_t(init_value));
226 | 	uint16_t var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4;
227 | 	var1uint16_1 = uint16_t(temp);
228 | 	var1uint16_2 = var1uint16_1 * uint16_t(2);
229 | 	var1uint16_3 = var1uint16_1 + uint16_t(2);
230 | 	var1uint16_4 = var1uint16_1 + var1uint16_2 / var1uint16_3;
231 | 
232 | 	// test moving redundant calcs out of loop
233 | 	test_variable1< uint16_t, custom_add_variable<uint16_t> > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable add");
234 | 	test_hoisted_variable1< uint16_t, custom_add_variable<uint16_t> > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable add hoisted");
235 | 	test_variable4< uint16_t, custom_add_multiple_variable<uint16_t> > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable adds");
236 | 
237 | 	test_variable1< uint16_t, custom_sub_variable<uint16_t> > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable subtract");
238 | 	test_variable4< uint16_t, custom_sub_multiple_variable<uint16_t> > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable subtracts");
239 | 	
240 | 	test_variable1< uint16_t, custom_multiply_variable<uint16_t> > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable multiply");
241 | 	test_variable4< uint16_t, custom_multiply_multiple_variable<uint16_t> > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable multiplies");
242 | 	test_variable4< uint16_t, custom_multiply_multiple_variable2<uint16_t> > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable multiplies2");
243 | 
244 | 	test_variable1< uint16_t, custom_divide_variable<uint16_t> > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable divide");
245 | 	test_variable4< uint16_t, custom_divide_multiple_variable<uint16_t> > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable divides");
246 | 	test_variable4< uint16_t, custom_divide_multiple_variable2<uint16_t> > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable divides2");
247 | 	
248 | 	test_variable4< uint16_t, custom_mixed_multiple_variable<uint16_t> > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable mixed");
249 | 
250 | 	test_variable1< uint16_t, custom_variable_and<uint16_t> > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable and");
251 | 	test_variable4< uint16_t, custom_multiple_variable_and<uint16_t> > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable and");
252 | 
253 | 	test_variable1< uint16_t, custom_variable_or<uint16_t> > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable or");
254 | 	test_variable4< uint16_t, custom_multiple_variable_or<uint16_t> > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable or");
255 | 
256 | 	test_variable1< uint16_t, custom_variable_xor<uint16_t> > (data16unsigned, SIZE, var1uint16_1, "uint16_t variable xor");
257 | 	test_variable4< uint16_t, custom_multiple_variable_xor<uint16_t> > (data16unsigned, SIZE, var1uint16_1, var1uint16_2, var1uint16_3, var1uint16_4, "uint16_t multiple variable xor");
258 | 	
259 | 	summarize("uint16_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
260 | 
261 | 
262 | // int32_t
263 | 	::fill(data32, data32+SIZE, int32_t(init_value));
264 | 	int32_t var1int32_1, var1int32_2, var1int32_3, var1int32_4;
265 | 	var1int32_1 = int32_t(temp);
266 | 	var1int32_2 = var1int32_1 * int32_t(2);
267 | 	var1int32_3 = var1int32_1 + int32_t(2);
268 | 	var1int32_4 = var1int32_1 + var1int32_2 / var1int32_3;
269 | 
270 | 	// test moving redundant calcs out of loop
271 | 	test_variable1< int32_t, custom_add_variable<int32_t> > (data32, SIZE, var1int32_1, "int32_t variable add");
272 | 	test_hoisted_variable1< int32_t, custom_add_variable<int32_t> > (data32, SIZE, var1int32_1, "int32_t variable add hoisted");
273 | 	test_variable4< int32_t, custom_add_multiple_variable<int32_t> > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable adds");
274 | 
275 | 	test_variable1< int32_t, custom_sub_variable<int32_t> > (data32, SIZE, var1int32_1, "int32_t variable subtract");
276 | 	test_variable4< int32_t, custom_sub_multiple_variable<int32_t> > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable subtracts");
277 | 	
278 | 	test_variable1< int32_t, custom_multiply_variable<int32_t> > (data32, SIZE, var1int32_1, "int32_t variable multiply");
279 | 	test_variable4< int32_t, custom_multiply_multiple_variable<int32_t> > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable multiplies");
280 | 	test_variable4< int32_t, custom_multiply_multiple_variable2<int32_t> > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable multiplies2");
281 | 
282 | 	test_variable1< int32_t, custom_divide_variable<int32_t> > (data32, SIZE, var1int32_1, "int32_t variable divide");
283 | 	test_variable4< int32_t, custom_divide_multiple_variable<int32_t> > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable divides");
284 | 	test_variable4< int32_t, custom_divide_multiple_variable2<int32_t> > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable divides2");
285 | 	
286 | 	test_variable4< int32_t, custom_mixed_multiple_variable<int32_t> > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable mixed");
287 | 
288 | 	test_variable1< int32_t, custom_variable_and<int32_t> > (data32, SIZE, var1int32_1, "int32_t variable and");
289 | 	test_variable4< int32_t, custom_multiple_variable_and<int32_t> > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable and");
290 | 
291 | 	test_variable1< int32_t, custom_variable_or<int32_t> > (data32, SIZE, var1int32_1, "int32_t variable or");
292 | 	test_variable4< int32_t, custom_multiple_variable_or<int32_t> > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable or");
293 | 
294 | 	test_variable1< int32_t, custom_variable_xor<int32_t> > (data32, SIZE, var1int32_1, "int32_t variable xor");
295 | 	test_variable4< int32_t, custom_multiple_variable_xor<int32_t> > (data32, SIZE, var1int32_1, var1int32_2, var1int32_3, var1int32_4, "int32_t multiple variable xor");
296 | 	
297 | 	summarize("int32_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
298 | 
299 | 
300 | // unsigned32
301 | 	::fill(data32unsigned, data32unsigned+SIZE, uint32_t(init_value));
302 | 	uint32_t var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4;
303 | 	var1uint32_1 = uint32_t(temp);
304 | 	var1uint32_2 = var1uint32_1 * uint32_t(2);
305 | 	var1uint32_3 = var1uint32_1 + uint32_t(2);
306 | 	var1uint32_4 = var1uint32_1 + var1uint32_2 / var1uint32_3;
307 | 	
308 | 	// test moving redundant calcs out of loop
309 | 	test_variable1< uint32_t, custom_add_variable<uint32_t> > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable add");
310 | 	test_hoisted_variable1< uint32_t, custom_add_variable<uint32_t> > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable add hoisted");
311 | 	test_variable4< uint32_t, custom_add_multiple_variable<uint32_t> > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable adds");
312 | 
313 | 	test_variable1< uint32_t, custom_sub_variable<uint32_t> > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable subtract");
314 | 	test_variable4< uint32_t, custom_sub_multiple_variable<uint32_t> > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable subtracts");
315 | 	
316 | 	test_variable1< uint32_t, custom_multiply_variable<uint32_t> > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable multiply");
317 | 	test_variable4< uint32_t, custom_multiply_multiple_variable<uint32_t> > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable multiplies");
318 | 	test_variable4< uint32_t, custom_multiply_multiple_variable2<uint32_t> > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable multiplies2");
319 | 
320 | 	test_variable1< uint32_t, custom_divide_variable<uint32_t> > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable divide");
321 | 	test_variable4< uint32_t, custom_divide_multiple_variable<uint32_t> > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable divides");
322 | 	test_variable4< uint32_t, custom_divide_multiple_variable2<uint32_t> > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable divides2");
323 | 	
324 | 	test_variable4< uint32_t, custom_mixed_multiple_variable<uint32_t> > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable mixed");
325 | 
326 | 	test_variable1< uint32_t, custom_variable_and<uint32_t> > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable and");
327 | 	test_variable4< uint32_t, custom_multiple_variable_and<uint32_t> > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable and");
328 | 
329 | 	test_variable1< uint32_t, custom_variable_or<uint32_t> > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable or");
330 | 	test_variable4< uint32_t, custom_multiple_variable_or<uint32_t> > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable or");
331 | 
332 | 	test_variable1< uint32_t, custom_variable_xor<uint32_t> > (data32unsigned, SIZE, var1uint32_1, "uint32_t variable xor");
333 | 	test_variable4< uint32_t, custom_multiple_variable_xor<uint32_t> > (data32unsigned, SIZE, var1uint32_1, var1uint32_2, var1uint32_3, var1uint32_4, "uint32_t multiple variable xor");
334 | 	
335 | 	summarize("uint32_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
336 | 	
337 | 
338 | // int64_t
339 | 	::fill(data64, data64+SIZE, int64_t(init_value));
340 | 	int64_t var1int64_1, var1int64_2, var1int64_3, var1int64_4;
341 | 	var1int64_1 = int64_t(temp);
342 | 	var1int64_2 = var1int64_1 * int64_t(2);
343 | 	var1int64_3 = var1int64_1 + int64_t(2);
344 | 	var1int64_4 = var1int64_1 + var1int64_2 / var1int64_3;
345 | 
346 | 	// test moving redundant calcs out of loop
347 | 	test_variable1< int64_t, custom_add_variable<int64_t> > (data64, SIZE, var1int64_1, "int64_t variable add");
348 | 	test_hoisted_variable1< int64_t, custom_add_variable<int64_t> > (data64, SIZE, var1int64_1, "int64_t variable add hoisted");
349 | 	test_variable4< int64_t, custom_add_multiple_variable<int64_t> > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable adds");
350 | 
351 | 	test_variable1< int64_t, custom_sub_variable<int64_t> > (data64, SIZE, var1int64_1, "int64_t variable subtract");
352 | 	test_variable4< int64_t, custom_sub_multiple_variable<int64_t> > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable subtracts");
353 | 	
354 | 	test_variable1< int64_t, custom_multiply_variable<int64_t> > (data64, SIZE, var1int64_1, "int64_t variable multiply");
355 | 	test_variable4< int64_t, custom_multiply_multiple_variable<int64_t> > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable multiplies");
356 | 	test_variable4< int64_t, custom_multiply_multiple_variable2<int64_t> > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable multiplies2");
357 | 
358 | 	test_variable1< int64_t, custom_divide_variable<int64_t> > (data64, SIZE, var1int64_1, "int64_t variable divide");
359 | 	test_variable4< int64_t, custom_divide_multiple_variable<int64_t> > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable divides");
360 | 	test_variable4< int64_t, custom_divide_multiple_variable2<int64_t> > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable divides2");
361 | 	
362 | 	test_variable4< int64_t, custom_mixed_multiple_variable<int64_t> > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable mixed");
363 | 
364 | 	test_variable1< int64_t, custom_variable_and<int64_t> > (data64, SIZE, var1int64_1, "int64_t variable and");
365 | 	test_variable4< int64_t, custom_multiple_variable_and<int64_t> > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable and");
366 | 
367 | 	test_variable1< int64_t, custom_variable_or<int64_t> > (data64, SIZE, var1int64_1, "int64_t variable or");
368 | 	test_variable4< int64_t, custom_multiple_variable_or<int64_t> > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable or");
369 | 
370 | 	test_variable1< int64_t, custom_variable_xor<int64_t> > (data64, SIZE, var1int64_1, "int64_t variable xor");
371 | 	test_variable4< int64_t, custom_multiple_variable_xor<int64_t> > (data64, SIZE, var1int64_1, var1int64_2, var1int64_3, var1int64_4, "int64_t multiple variable xor");
372 | 	
373 | 	summarize("int64_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
374 | 
375 | 
376 | // unsigned64
377 | 	::fill(data64unsigned, data64unsigned+SIZE, uint64_t(init_value));
378 | 	uint64_t var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4;
379 | 	var1uint64_1 = uint64_t(temp);
380 | 	var1uint64_2 = var1uint64_1 * uint64_t(2);
381 | 	var1uint64_3 = var1uint64_1 + uint64_t(2);
382 | 	var1uint64_4 = var1uint64_1 + var1uint64_2 / var1uint64_3;
383 | 
384 | 	// test moving redundant calcs out of loop
385 | 	test_variable1< uint64_t, custom_add_variable<uint64_t> > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable add");
386 | 	test_hoisted_variable1< uint64_t, custom_add_variable<uint64_t> > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable add hoisted");
387 | 	test_variable4< uint64_t, custom_add_multiple_variable<uint64_t> > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable adds");
388 | 
389 | 	test_variable1< uint64_t, custom_sub_variable<uint64_t> > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable subtract");
390 | 	test_variable4< uint64_t, custom_sub_multiple_variable<uint64_t> > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable subtracts");
391 | 	
392 | 	test_variable1< uint64_t, custom_multiply_variable<uint64_t> > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable multiply");
393 | 	test_variable4< uint64_t, custom_multiply_multiple_variable<uint64_t> > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable multiplies");
394 | 	test_variable4< uint64_t, custom_multiply_multiple_variable2<uint64_t> > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable multiplies2");
395 | 
396 | 	test_variable1< uint64_t, custom_divide_variable<uint64_t> > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable divide");
397 | 	test_variable4< uint64_t, custom_divide_multiple_variable<uint64_t> > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable divides");
398 | 	test_variable4< uint64_t, custom_divide_multiple_variable2<uint64_t> > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable divides2");
399 | 	
400 | 	test_variable4< uint64_t, custom_mixed_multiple_variable<uint64_t> > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable mixed");
401 | 
402 | 	test_variable1< uint64_t, custom_variable_and<uint64_t> > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable and");
403 | 	test_variable4< uint64_t, custom_multiple_variable_and<uint64_t> > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable and");
404 | 
405 | 	test_variable1< uint64_t, custom_variable_or<uint64_t> > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable or");
406 | 	test_variable4< uint64_t, custom_multiple_variable_or<uint64_t> > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable or");
407 | 
408 | 	test_variable1< uint64_t, custom_variable_xor<uint64_t> > (data64unsigned, SIZE, var1uint64_1, "uint64_t variable xor");
409 | 	test_variable4< uint64_t, custom_multiple_variable_xor<uint64_t> > (data64unsigned, SIZE, var1uint64_1, var1uint64_2, var1uint64_3, var1uint64_4, "uint64_t multiple variable xor");
410 | 	
411 | 	summarize("uint64_t loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
412 | 
413 | 
414 | 
415 | // float
416 | 	::fill(dataFloat, dataFloat+SIZE, float(init_value));
417 | 	float var1Float_1, var1Float_2, var1Float_3, var1Float_4;
418 | 	var1Float_1 = float(temp);
419 | 	var1Float_2 = var1Float_1 * float(2.0);
420 | 	var1Float_3 = var1Float_1 + float(2.0);
421 | 	var1Float_4 = var1Float_1 + var1Float_2 / var1Float_3;
422 | 
423 | 	// test moving redundant calcs out of loop
424 | 	test_variable1< float, custom_add_variable<float> > (dataFloat, SIZE, var1Float_1, "float variable add");
425 | 	test_hoisted_variable1< float, custom_add_variable<float> > (dataFloat, SIZE, var1Float_1, "float variable add hoisted");
426 | 	test_variable4< float, custom_add_multiple_variable<float> > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable adds");
427 | 
428 | 	test_variable1< float, custom_sub_variable<float> > (dataFloat, SIZE, var1Float_1, "float variable subtract");
429 | 	test_variable4< float, custom_sub_multiple_variable<float> > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable subtracts");
430 | 	
431 | 	test_variable1< float, custom_multiply_variable<float> > (dataFloat, SIZE, var1Float_1, "float variable multiply");
432 | 	test_variable4< float, custom_multiply_multiple_variable<float> > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable multiplies");
433 | 	test_variable4< float, custom_multiply_multiple_variable2<float> > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable multiplies2");
434 | 
435 | 	test_variable1< float, custom_divide_variable<float> > (dataFloat, SIZE, var1Float_1, "float variable divide");
436 | 	test_variable4< float, custom_divide_multiple_variable<float> > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable divides");
437 | 	test_variable4< float, custom_divide_multiple_variable2<float> > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable divides2");
438 | 	
439 | 	test_variable4< float, custom_mixed_multiple_variable<float> > (dataFloat, SIZE, var1Float_1, var1Float_2, var1Float_3, var1Float_4, "float multiple variable mixed");
440 | 	
441 | 	summarize("float loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
442 | 
443 | 
444 | // double
445 | 	::fill(dataDouble, dataDouble+SIZE, double(init_value));
446 | 	double var1Double_1, var1Double_2, var1Double_3, var1Double_4;
447 | 	var1Double_1 = double(temp);
448 | 	var1Double_2 = var1Double_1 * double(2.0);
449 | 	var1Double_3 = var1Double_1 + double(2.0);
450 | 	var1Double_4 = var1Double_1 + var1Double_2 / var1Double_3;
451 | 
452 | 	// test moving redundant calcs out of loop
453 | 	test_variable1< double, custom_add_variable<double> > (dataDouble, SIZE, var1Double_1, "double variable add");
454 | 	test_hoisted_variable1< double, custom_add_variable<double> > (dataDouble, SIZE, var1Double_1, "double variable add hoisted");
455 | 	test_variable4< double, custom_add_multiple_variable<double> > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable adds");
456 | 	
457 | 	test_variable1< double, custom_sub_variable<double> > (dataDouble, SIZE, var1Double_1, "double variable subtract");
458 | 	test_variable4< double, custom_sub_multiple_variable<double> > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable subtracts");
459 | 	
460 | 	test_variable1< double, custom_multiply_variable<double> > (dataDouble, SIZE, var1Double_1, "double variable multiply");
461 | 	test_variable4< double, custom_multiply_multiple_variable<double> > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable multiplies");
462 | 	test_variable4< double, custom_multiply_multiple_variable2<double> > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable multiplies2");
463 | 
464 | 	test_variable1< double, custom_divide_variable<double> > (dataDouble, SIZE, var1Double_1, "double variable divide");
465 | 	test_variable4< double, custom_divide_multiple_variable<double> > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable divides");
466 | 	test_variable4< double, custom_divide_multiple_variable2<double> > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable divides2");
467 | 	
468 | 	test_variable4< double, custom_mixed_multiple_variable<double> > (dataDouble, SIZE, var1Double_1, var1Double_2, var1Double_3, var1Double_4, "double multiple variable mixed");
469 | 	
470 | 	summarize("double loop invariant", SIZE, iterations, kDontShowGMeans, kDontShowPenalty );
471 | 
472 | 	
473 | 	return 0;
474 | }
475 | 
476 | // the end
477 | /******************************************************************************/
478 | /******************************************************************************/
479 | 


--------------------------------------------------------------------------------
/src/stepanov_abstraction.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright 2007-2008 Adobe Systems Incorporated
  3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
  4 |     or a copy at http://stlab.adobe.com/licenses.html )
  5 | 
  6 | 
  7 | Goal:  examine any change in performance when adding abstraction to simple data types
  8 | 	in other words:  what happens when adding {} around a type.
  9 | 
 10 | 
 11 | Assumptions:
 12 | 	
 13 | 	1) A value wrapped in a struct or class should not perform worse than a raw value
 14 | 	
 15 | 	2) A value recursively wrapped in a struct or class should not perform worse than the raw value
 16 | 
 17 | 
 18 | History:
 19 | 	Alex Stepanov created the abstraction penalty benchmark. 
 20 | 	Recently, Alex suggested that I take ownership of his benchmark and extend it.
 21 | 	
 22 | 	The original accumulation tests used to show large penalties for using abstraction,
 23 | 	but compilers have improved.  I have added three sorting tests with non-trivial
 24 | 	value and pointer usage that show some compilers still have more
 25 | 	opportunities for optimization.
 26 | 	
 27 | 	Chris Cox
 28 | 	February 2008
 29 | 
 30 | */
 31 | 
 32 | #include <cstddef>
 33 | #include <cstdio>
 34 | #include <ctime>
 35 | #include <cmath>
 36 | #include <cstdlib>
 37 | #include "benchmark_results.h"
 38 | #include "benchmark_timer.h"
 39 | #include "benchmark_algorithms.h"
 40 | 
 41 | /******************************************************************************/
 42 | 
 43 | // a value wrapped in a struct, recursively
 44 | 
 45 | template <typename T>
 46 | struct ValueWrapper {
 47 | 	T value;
 48 | 	ValueWrapper() {}
 49 | 	template<typename TT>
 50 | 		inline operator TT () const { return (TT)value; }
 51 | 	template<typename TT>
 52 | 		ValueWrapper(const TT& x) : value(x) {}
 53 | 	T& operator*() const { return *value; }
 54 | };
 55 | 
 56 | template <typename T>
 57 | inline ValueWrapper<T> operator+(const ValueWrapper<T>& x, const ValueWrapper<T>& y) {
 58 | 	return ValueWrapper<T>(x.value + y.value);
 59 | }
 60 | 
 61 | template <typename T>
 62 | inline bool operator<(const ValueWrapper<T>& x, const ValueWrapper<T>& y) {
 63 | 	return (x.value < y.value);
 64 | }
 65 | 
 66 | /******************************************************************************/
 67 | 
 68 | typedef ValueWrapper<double>	DoubleValueWrapper;
 69 | typedef ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper< ValueWrapper<double> > > > > > > > > >	DoubleValueWrapper10;
 70 | 
 71 | /******************************************************************************/
 72 | 
 73 | // a pointer wrapped in a struct, aka an iterator
 74 | 
 75 | template<typename T>
 76 | struct PointerWrapper {
 77 | 	T* current;
 78 | 	PointerWrapper() {}
 79 | 	PointerWrapper(T* x) : current(x) {}
 80 | 	T& operator*() const { return *current; }
 81 | };
 82 | 
 83 | // really a distance between pointers, which must return ptrdiff_t
 84 | // because (ptr - ptr) --> ptrdiff_t
 85 | template <typename T>
 86 | inline ptrdiff_t operator-(PointerWrapper<T>& xx, PointerWrapper<T>& yy) {
 87 | 	return (ptrdiff_t)( xx.current - yy.current );
 88 | }
 89 | 
 90 | template <typename T>
 91 | inline PointerWrapper<T>& operator++(PointerWrapper<T> &xx) {
 92 | 	++xx.current;
 93 | 	return xx;
 94 | }
 95 | 
 96 | template <typename T>
 97 | inline PointerWrapper<T>& operator--(PointerWrapper<T> &xx) {
 98 | 	--xx.current;
 99 | 	return xx;
100 | }
101 | 
102 | template <typename T>
103 | inline PointerWrapper<T> operator++(PointerWrapper<T> &xx, int) {
104 | 	PointerWrapper<T> tmp = xx;
105 | 	++xx;
106 | 	return tmp;
107 | }
108 | 
109 | template <typename T>
110 | inline PointerWrapper<T> operator--(PointerWrapper<T> &xx, int) {
111 | 	PointerWrapper<T> tmp = xx;
112 | 	--xx;
113 | 	return tmp;
114 | }
115 | 
116 | template <typename T>
117 | inline PointerWrapper<T> operator-(PointerWrapper<T> &xx, ptrdiff_t inc) {
118 | 	PointerWrapper<T> tmp = xx;
119 | 	tmp.current -= inc;
120 | 	return tmp;
121 | }
122 | 
123 | template <typename T>
124 | inline PointerWrapper<T> operator+(PointerWrapper<T> &xx, ptrdiff_t inc) {
125 | 	PointerWrapper<T> tmp = xx;
126 | 	tmp.current += inc;
127 | 	return tmp;
128 | }
129 | 
130 | template <typename T>
131 | inline PointerWrapper<T>& operator+=(PointerWrapper<T> &xx, ptrdiff_t inc) {
132 | 	xx.current += inc;
133 | 	return xx;
134 | }
135 | 
136 | template <typename T>
137 | inline PointerWrapper<T>& operator-=(PointerWrapper<T> &xx, ptrdiff_t inc) {
138 | 	xx.current -= inc;
139 | 	return xx;
140 | }
141 | 
142 | template <typename T>
143 | inline bool operator<(const PointerWrapper<T>& x, const PointerWrapper<T>& y) {
144 | 	return (x.current < y.current);
145 | }
146 | 
147 | template <typename T>
148 | inline bool operator==(const PointerWrapper<T>& x, const PointerWrapper<T>& y) {
149 | 	return (x.current == y.current);
150 | }
151 | 
152 | template <typename T>
153 | inline bool operator!=(const PointerWrapper<T>& x, const PointerWrapper<T>& y) {
154 | 	return (x.current != y.current);
155 | }
156 | 
157 | /******************************************************************************/
158 | 
159 | typedef PointerWrapper<double> double_pointer;
160 | typedef PointerWrapper<DoubleValueWrapper> doubleValueWrapper_pointer;
161 | typedef PointerWrapper<DoubleValueWrapper10> doubleValueWrapper10_pointer;
162 | 
163 | /******************************************************************************/
164 | /******************************************************************************/
165 | 
166 | // this constant may need to be adjusted to give reasonable minimum times
167 | // For best results, times should be about 1.0 seconds for the minimum test run
168 | int iterations = 2000000;
169 | 
170 | // 2000 items, or about 16k of data
171 | // this is intended to remain within the L2 cache of most common CPUs
172 | const int SIZE = 2000;
173 | 
174 | // initial value for filling our arrays, may be changed from the command line
175 | double init_value = 3.0;
176 | 
177 | /******************************************************************************/
178 | /******************************************************************************/
179 | 
180 | inline void check_sum(double result) {
181 |   if (result != SIZE * init_value) printf("test %i failed\n", current_test);
182 | }
183 | 
184 | /******************************************************************************/
185 | 
186 | template <typename Iterator>
187 | void verify_sorted(Iterator first, Iterator last) {
188 | 	if (!is_sorted(first,last))
189 | 		printf("sort test %i failed\n", current_test);
190 | }
191 | 
192 | /******************************************************************************/
193 | /******************************************************************************/
194 | 
195 | template <typename Iterator, typename T>
196 | void test_accumulate(Iterator first, Iterator last, T zero, const char *label) {
197 |   int i;
198 |   
199 |   start_timer();
200 |   
201 |   for(i = 0; i < iterations; ++i)
202 | 	check_sum( double( accumulate(first, last, zero) ) );
203 | 	
204 |   record_result( timer(), label );
205 | }
206 | 
207 | /******************************************************************************/
208 | 
209 | template <typename Iterator, typename T>
210 | void test_insertion_sort(Iterator firstSource, Iterator lastSource, Iterator firstDest,
211 | 						Iterator lastDest, T zero, const char *label) {
212 | 	int i;
213 | 
214 | 	start_timer();
215 | 
216 | 	for(i = 0; i < iterations; ++i) {
217 | 		::copy(firstSource, lastSource, firstDest);
218 | 		insertionSort< Iterator, T>( firstDest, lastDest );
219 | 		verify_sorted( firstDest, lastDest );
220 | 	}
221 | 	
222 | 	record_result( timer(), label );
223 | }
224 | 
225 | /******************************************************************************/
226 | 
227 | template <typename Iterator, typename T>
228 | void test_quicksort(Iterator firstSource, Iterator lastSource, Iterator firstDest,
229 | 					Iterator lastDest, T zero, const char *label) {
230 | 	int i;
231 | 
232 | 	start_timer();
233 | 
234 | 	for(i = 0; i < iterations; ++i) {
235 | 		::copy(firstSource, lastSource, firstDest);
236 | 		quicksort< Iterator, T>( firstDest, lastDest );
237 | 		verify_sorted( firstDest, lastDest );
238 | 	}
239 | 	
240 | 	record_result( timer(), label );
241 | }
242 | 
243 | /******************************************************************************/
244 | 
245 | template <typename Iterator, typename T>
246 | void test_heap_sort(Iterator firstSource, Iterator lastSource, Iterator firstDest,
247 | 					Iterator lastDest, T zero, const char *label) {
248 | 	int i;
249 | 
250 | 	start_timer();
251 | 
252 | 	for(i = 0; i < iterations; ++i) {
253 | 		::copy(firstSource, lastSource, firstDest);
254 | 		heapsort< Iterator, T>( firstDest, lastDest );
255 | 		verify_sorted( firstDest, lastDest );
256 | 	}
257 | 	
258 | 	record_result( timer(), label );
259 | }
260 | 
261 | /******************************************************************************/
262 | /******************************************************************************/
263 | 
264 | // our global arrays of numbers to be summed
265 | 
266 | double data[SIZE];
267 | DoubleValueWrapper VData[SIZE];
268 | DoubleValueWrapper10 V10Data[SIZE];
269 | 
270 | double dataMaster[SIZE];
271 | DoubleValueWrapper VDataMaster[SIZE];
272 | DoubleValueWrapper10 V10DataMaster[SIZE];
273 | 
274 | /******************************************************************************/
275 | 
276 | // declaration of our iterator types and begin/end pairs
277 | typedef double* dp;
278 | dp dpb = data;
279 | dp dpe = data + SIZE;
280 | dp dMpb = dataMaster;
281 | dp dMpe = dataMaster + SIZE;
282 | 
283 | typedef DoubleValueWrapper* DVp;
284 | DVp DVpb = VData;
285 | DVp DVpe = VData + SIZE;
286 | DVp DVMpb = VDataMaster;
287 | DVp DVMpe = VDataMaster + SIZE;
288 | 
289 | typedef DoubleValueWrapper10* DV10p;
290 | DV10p DV10pb = V10Data;
291 | DV10p DV10pe = V10Data + SIZE;
292 | DV10p DV10Mpb = V10DataMaster;
293 | DV10p DV10Mpe = V10DataMaster + SIZE;
294 | 
295 | typedef double_pointer dP;
296 | dP dPb(dpb);
297 | dP dPe(dpe);
298 | dP dMPb(dMpb);
299 | dP dMPe(dMpe);
300 | 
301 | typedef doubleValueWrapper_pointer DVP;
302 | DVP DVPb(DVpb);
303 | DVP DVPe(DVpe);
304 | DVP DVMPb(DVMpb);
305 | DVP DVMPe(DVMpe);
306 | 
307 | typedef doubleValueWrapper10_pointer DV10P;
308 | DV10P DV10Pb(DV10pb);
309 | DV10P DV10Pe(DV10pe);
310 | DV10P DV10MPb(DV10Mpb);
311 | DV10P DV10MPe(DV10Mpe);
312 | 
313 | /******************************************************************************/
314 | /******************************************************************************/
315 | 
316 | int main(int argc, char** argv) {
317 | 
318 | 	double dZero = 0.0;
319 | 	DoubleValueWrapper DVZero = 0.0;
320 | 	DoubleValueWrapper10 DV10Zero = DoubleValueWrapper10(0.0);
321 | 
322 | 	// output command for documentation:
323 | 	int i;
324 | 	for (i = 0; i < argc; ++i)
325 | 		printf("%s ", argv[i] );
326 | 	printf("\n");
327 | 
328 | 	if (argc > 1) iterations = atoi(argv[1]);
329 | 	if (argc > 2) init_value = (double) atof(argv[2]);
330 | 
331 | 	// seed the random number generator so we get repeatable results
332 | 	srand( (int)init_value + 123 );
333 | 
334 | 
335 | 	fill(dpb, dpe, double(init_value));
336 | 	fill(DVpb, DVpe, DoubleValueWrapper(init_value));
337 | 	fill(DV10pb, DV10pe, DoubleValueWrapper10(init_value));
338 | 
339 | 	test_accumulate(dpb, dpe, dZero, "double pointer");
340 | 	test_accumulate(dPb, dPe, dZero, "double pointer_class");
341 | 	test_accumulate(DVpb, DVpe, DVZero, "DoubleValueWrapper pointer");
342 | 	test_accumulate(DVPb, DVPe, DVZero, "DoubleValueWrapper pointer_class");
343 | 	test_accumulate(DV10pb, DV10pe, DV10Zero, "DoubleValueWrapper10 pointer");
344 | 	test_accumulate(DV10Pb, DV10Pe, DV10Zero, "DoubleValueWrapper10 pointer_class");
345 | 
346 | 	summarize("Abstraction Accumulate", SIZE, iterations, kShowGMeans, kShowPenalty );
347 | 
348 | 
349 | 	// the sorting tests are much slower than the accumulation tests - O(N^2)
350 | 	iterations = iterations / 2000;
351 | 	
352 | 	// fill one set of random numbers
353 | 	fill_random<double *, double>( dMpb, dMpe );
354 | 	// copy to the other sets, so we have the same numbers
355 | 	::copy( dMpb, dMpe, DVMpb );
356 | 	::copy( dMpb, dMpe, DV10Mpb );
357 | 
358 | 	test_insertion_sort(dMpb, dMpe, dpb, dpe, dZero, "insertion_sort double pointer");
359 | 	test_insertion_sort(dMPb, dMPe, dPb, dPe, dZero, "insertion_sort double pointer_class");
360 | 	test_insertion_sort(DVMpb, DVMpe, DVpb, DVpe, DVZero, "insertion_sort DoubleValueWrapper pointer");
361 | 	test_insertion_sort(DVMPb, DVMPe, DVPb, DVPe, DVZero, "insertion_sort DoubleValueWrapper pointer_class");
362 | 	test_insertion_sort(DV10Mpb, DV10Mpe, DV10pb, DV10pe, DV10Zero, "insertion_sort DoubleValueWrapper10 pointer");
363 | 	test_insertion_sort(DV10MPb, DV10MPe, DV10Pb, DV10Pe, DV10Zero, "insertion_sort DoubleValueWrapper10 pointer_class");
364 | 	
365 | 	summarize("Abstraction Insertion Sort", SIZE, iterations, kShowGMeans, kShowPenalty );
366 | 
367 | 
368 | 	// these are slightly faster - O(NLog2(N))
369 | 	iterations = iterations * 8;
370 | 
371 | 	test_quicksort(dMpb, dMpe, dpb, dpe, dZero, "quicksort double pointer");
372 | 	test_quicksort(dMPb, dMPe, dPb, dPe, dZero, "quicksort double pointer_class");
373 | 	test_quicksort(DVMpb, DVMpe, DVpb, DVpe, DVZero, "quicksort DoubleValueWrapper pointer");
374 | 	test_quicksort(DVMPb, DVMPe, DVPb, DVPe, DVZero, "quicksort DoubleValueWrapper pointer_class");
375 | 	test_quicksort(DV10Mpb, DV10Mpe, DV10pb, DV10pe, DV10Zero, "quicksort DoubleValueWrapper10 pointer");
376 | 	test_quicksort(DV10MPb, DV10MPe, DV10Pb, DV10Pe, DV10Zero, "quicksort DoubleValueWrapper10 pointer_class");
377 | 
378 | 	summarize("Abstraction Quicksort", SIZE, iterations, kShowGMeans, kShowPenalty );
379 | 
380 | 
381 | 	test_heap_sort(dMpb, dMpe, dpb, dpe, dZero, "heap_sort double pointer");
382 | 	test_heap_sort(dMPb, dMPe, dPb, dPe, dZero, "heap_sort double pointer_class");
383 | 	test_heap_sort(DVMpb, DVMpe, DVpb, DVpe, DVZero, "heap_sort DoubleValueWrapper pointer");
384 | 	test_heap_sort(DVMPb, DVMPe, DVPb, DVPe, DVZero, "heap_sort DoubleValueWrapper pointer_class");
385 | 	test_heap_sort(DV10Mpb, DV10Mpe, DV10pb, DV10pe, DV10Zero, "heap_sort DoubleValueWrapper10 pointer");
386 | 	test_heap_sort(DV10MPb, DV10MPe, DV10Pb, DV10Pe, DV10Zero, "heap_sort DoubleValueWrapper10 pointer_class");
387 | 	
388 | 	summarize("Abstraction Heap Sort", SIZE, iterations, kShowGMeans, kShowPenalty );
389 | 
390 | 
391 | 	return 0;
392 | }
393 | 
394 | // the end
395 | /******************************************************************************/
396 | /******************************************************************************/
397 | 


--------------------------------------------------------------------------------
/src/stepanov_vector.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright 2007-2008 Adobe Systems Incorporated
  3 |     Distributed under the MIT License (see accompanying file LICENSE_1_0_0.txt
  4 |     or a copy at http://stlab.adobe.com/licenses.html )
  5 | 
  6 | 
  7 | Goal:  examine any change in performance when moving from pointers to vector iterators
  8 | 
  9 | 
 10 | Assumptions:
 11 | 	1) Vector iterators should not perform worse than raw pointers.
 12 | 	
 13 | 		Programmers should never be tempted to write
 14 | 			std::sort( &*vec.begin(), &*( vec.begin() + vec.size() ) )
 15 | 		instead of
 16 | 			std::sort( vec.begin(), vec.end() )
 17 | 
 18 | HIstory:
 19 | 	This is an extension to Alex Stepanov's original abstraction penalty benchmark
 20 | 	to test the compiler vendor implementation of vector iterators.
 21 | 
 22 | */
 23 | 
 24 | #include <cstddef>
 25 | #include <cstdio>
 26 | #include <ctime>
 27 | #include <cmath>
 28 | #include <cstdlib>
 29 | #include <vector>
 30 | #include "benchmark_results.h"
 31 | #include "benchmark_timer.h"
 32 | #include "benchmark_algorithms.h"
 33 | 
 34 | /******************************************************************************/
 35 | /******************************************************************************/
 36 | 
 37 | // this constant may need to be adjusted to give reasonable minimum times
 38 | // For best results, times should be about 1.0 seconds for the minimum test run
 39 | int iterations = 1500000;
 40 | 
 41 | // 2000 items, or about 16k of data
 42 | // this is intended to remain within the L2 cache of most common CPUs
 43 | const int SIZE = 2000;
 44 | 
 45 | // initial value for filling our arrays, may be changed from the command line
 46 | double init_value = 3.0;
 47 | 
 48 | /******************************************************************************/
 49 | /******************************************************************************/
 50 | 
 51 | inline void check_sum(double result) {
 52 |   if (result != SIZE * init_value) printf("test %i failed\n", current_test);
 53 | }
 54 | 
 55 | /******************************************************************************/
 56 | 
 57 | template <typename Iterator>
 58 | void verify_sorted(Iterator first, Iterator last) {
 59 | 	if (!is_sorted(first,last))
 60 | 		printf("sort test %i failed\n", current_test);
 61 | }
 62 | 
 63 | /******************************************************************************/
 64 | 
 65 | // a template using the accumulate template and iterators
 66 | 
 67 | template <typename Iterator, typename T>
 68 | void test_accumulate(Iterator first, Iterator last, T zero, const char *label) {
 69 | 	int i;
 70 | 
 71 | 	start_timer();
 72 | 
 73 | 	for(i = 0; i < iterations; ++i)
 74 | 		check_sum( double( accumulate(first, last, zero) ) );
 75 | 
 76 | 	record_result( timer(), label );
 77 | }
 78 | 
 79 | /******************************************************************************/
 80 | 
 81 | template <typename Iterator, typename T>
 82 | void test_insertion_sort(Iterator firstSource, Iterator lastSource, Iterator firstDest,
 83 | 						Iterator lastDest, T zero, const char *label) {
 84 | 	int i;
 85 | 
 86 | 	start_timer();
 87 | 
 88 | 	for(i = 0; i < iterations; ++i) {
 89 | 		::copy(firstSource, lastSource, firstDest);
 90 | 		insertionSort< Iterator, T>( firstDest, lastDest );
 91 | 		verify_sorted( firstDest, lastDest );
 92 | 	}
 93 | 	
 94 | 	record_result( timer(), label );
 95 | }
 96 | 
 97 | /******************************************************************************/
 98 | 
 99 | template <typename Iterator, typename T>
100 | void test_quicksort(Iterator firstSource, Iterator lastSource, Iterator firstDest,
101 | 					Iterator lastDest, T zero, const char *label) {
102 | 	int i;
103 | 
104 | 	start_timer();
105 | 
106 | 	for(i = 0; i < iterations; ++i) {
107 | 		::copy(firstSource, lastSource, firstDest);
108 | 		quicksort< Iterator, T>( firstDest, lastDest );
109 | 		verify_sorted( firstDest, lastDest );
110 | 	}
111 | 	
112 | 	record_result( timer(), label );
113 | }
114 | 
115 | /******************************************************************************/
116 | 
117 | template <typename Iterator, typename T>
118 | void test_heap_sort(Iterator firstSource, Iterator lastSource, Iterator firstDest,
119 | 					Iterator lastDest, T zero, const char *label) {
120 | 	int i;
121 | 
122 | 	start_timer();
123 | 
124 | 	for(i = 0; i < iterations; ++i) {
125 | 		::copy(firstSource, lastSource, firstDest);
126 | 		heapsort< Iterator, T>( firstDest, lastDest );
127 | 		verify_sorted( firstDest, lastDest );
128 | 	}
129 | 	
130 | 	record_result( timer(), label );
131 | }
132 | 
133 | /******************************************************************************/
134 | /******************************************************************************/
135 | 
136 | // our global arrays of numbers to be summed
137 | 
138 | double data[SIZE];
139 | double dataMaster[SIZE];
140 | 
141 | /******************************************************************************/
142 | 
143 | // declaration of our iterator types and begin/end pairs
144 | typedef double* dp;
145 | dp dpb = data;
146 | dp dpe = data + SIZE;
147 | dp dMpb = dataMaster;
148 | dp dMpe = dataMaster + SIZE;
149 | 
150 | typedef std::reverse_iterator<dp> rdp;
151 | rdp rdpb(dpe);
152 | rdp rdpe(dpb);
153 | rdp rdMpb(dMpe);
154 | rdp rdMpe(dMpb);
155 | 
156 | typedef std::reverse_iterator<rdp> rrdp;
157 | rrdp rrdpb(rdpe);
158 | rrdp rrdpe(rdpb);
159 | rrdp rrdMpb(rdMpe);
160 | rrdp rrdMpe(rdMpb);
161 | 
162 | typedef std::vector<double>::iterator vdp;
163 | 
164 | typedef std::vector<double>::reverse_iterator rvdp;
165 | typedef std::reverse_iterator< vdp > rtvdp;
166 | 
167 | typedef std::reverse_iterator<rvdp> rtrvdp;
168 | typedef std::reverse_iterator< rtvdp > rtrtvdp;
169 | 
170 | 
171 | /******************************************************************************/
172 | /******************************************************************************/
173 | 
174 | 
175 | int main(int argc, char** argv) {
176 | 
177 | 	double dZero = 0.0;
178 | 
179 | 	// output command for documentation:
180 | 	int i;
181 | 	for (i = 0; i < argc; ++i)
182 | 		printf("%s ", argv[i] );
183 | 	printf("\n");
184 | 
185 | 	if (argc > 1) iterations = atoi(argv[1]);
186 | 	if (argc > 2) init_value = (double) atof(argv[2]);
187 | 	
188 | 	// seed the random number generator so we get repeatable results
189 | 	srand( (int)init_value + 123 );
190 | 	
191 | 
192 | 	::fill(dpb, dpe, double(init_value));
193 | 	
194 | 	std::vector<double>   vec_data;
195 | 	vec_data.resize(SIZE);
196 | 
197 | 	::fill(vec_data.begin(), vec_data.end(), double(init_value));
198 | 	
199 | 	rtvdp rtvdpb(vec_data.end());
200 | 	rtvdp rtvdpe(vec_data.begin());
201 | 	
202 | 	rtrvdp rtrvdpb(vec_data.rend());
203 | 	rtrvdp rtrvdpe(vec_data.rbegin());
204 | 	
205 | 	rtrtvdp rtrtvdpb(rtvdpe);
206 | 	rtrtvdp rtrtvdpe(rtvdpb);
207 | 
208 | 	test_accumulate(dpb, dpe, dZero, "double pointer verify2");
209 | 	test_accumulate(vec_data.begin(), vec_data.end(), dZero, "double vector iterator");
210 | 	test_accumulate(rdpb, rdpe, dZero, "double pointer reverse");
211 | 	test_accumulate(vec_data.rbegin(), vec_data.rend(), dZero, "double vector reverse_iterator");
212 | 	test_accumulate(rtvdpb, rtvdpe, dZero, "double vector iterator reverse");
213 | 	test_accumulate(rrdpb, rrdpe, dZero, "double pointer reverse reverse");
214 | 	test_accumulate(rtrvdpb, rtrvdpe, dZero, "double vector reverse_iterator reverse");
215 | 	test_accumulate(rtrtvdpb, rtrtvdpe, dZero, "double vector iterator reverse reverse");
216 | 
217 | 	summarize("Vector accumulate", SIZE, iterations, kShowGMeans, kShowPenalty );
218 | 
219 | 
220 | 
221 | 	// the sorting tests are much slower than the accumulation tests - O(N^2)
222 | 	iterations = iterations / 1000;
223 | 	
224 | 	std::vector<double>   vec_dataMaster;
225 | 	vec_dataMaster.resize(SIZE);
226 | 	
227 | 	// fill one set of random numbers
228 | 	fill_random<double *, double>( dMpb, dMpe );
229 | 	
230 | 	// copy to the other sets, so we have the same numbers
231 | 	::copy( dMpb, dMpe, vec_dataMaster.begin() );
232 | 	
233 | 	rtvdp rtvdMpb(vec_dataMaster.end());
234 | 	rtvdp rtvdMpe(vec_dataMaster.begin());
235 | 	
236 | 	rtrvdp rtrvdMpb(vec_dataMaster.rend());
237 | 	rtrvdp rtrvdMpe(vec_dataMaster.rbegin());
238 | 	
239 | 	rtrtvdp rtrtvdMpb(rtvdMpe);
240 | 	rtrtvdp rtrtvdMpe(rtvdMpb);
241 | 
242 | 	test_insertion_sort(dMpb, dMpe, dpb, dpe, dZero, "insertion_sort double pointer verify2");
243 | 	test_insertion_sort(vec_dataMaster.begin(), vec_dataMaster.end(), vec_data.begin(), vec_data.end(), dZero, "insertion_sort double vector iterator");
244 | 	test_insertion_sort(rdMpb, rdMpe, rdpb, rdpe, dZero, "insertion_sort double pointer reverse");
245 | 	test_insertion_sort(vec_dataMaster.rbegin(), vec_dataMaster.rend(), vec_data.rbegin(), vec_data.rend(), dZero, "insertion_sort double vector reverse_iterator");
246 | 	test_insertion_sort(rtvdMpb, rtvdMpe, rtvdpb, rtvdpe, dZero, "insertion_sort double vector iterator reverse");
247 | 	test_insertion_sort(rrdMpb, rrdMpe, rrdpb, rrdpe, dZero, "insertion_sort double pointer reverse reverse");
248 | 	test_insertion_sort(rtrvdMpb, rtrvdMpe, rtrvdpb, rtrvdpe, dZero, "insertion_sort double vector reverse_iterator reverse");
249 | 	test_insertion_sort(rtrtvdMpb, rtrtvdMpe, rtrtvdpb, rtrtvdpe, dZero, "insertion_sort double vector iterator reverse reverse");
250 | 
251 | 	summarize("Vector Insertion Sort", SIZE, iterations, kShowGMeans, kShowPenalty );
252 | 
253 | 	
254 | 	// these are slightly faster - O(NLog2(N))
255 | 	iterations = iterations * 8;
256 | 	
257 | 	test_quicksort(dMpb, dMpe, dpb, dpe, dZero, "quicksort double pointer verify2");
258 | 	test_quicksort(vec_dataMaster.begin(), vec_dataMaster.end(), vec_data.begin(), vec_data.end(), dZero, "quicksort double vector iterator");
259 | 	test_quicksort(rdMpb, rdMpe, rdpb, rdpe, dZero, "quicksort double pointer reverse");
260 | 	test_quicksort(vec_dataMaster.rbegin(), vec_dataMaster.rend(), vec_data.rbegin(), vec_data.rend(), dZero, "quicksort double vector reverse_iterator");
261 | 	test_quicksort(rtvdMpb, rtvdMpe, rtvdpb, rtvdpe, dZero, "quicksort double vector iterator reverse");
262 | 	test_quicksort(rrdMpb, rrdMpe, rrdpb, rrdpe, dZero, "quicksort double pointer reverse reverse");
263 | 	test_quicksort(rtrvdMpb, rtrvdMpe, rtrvdpb, rtrvdpe, dZero, "quicksort double vector reverse_iterator reverse");
264 | 	test_quicksort(rtrtvdMpb, rtrtvdMpe, rtrtvdpb, rtrtvdpe, dZero, "quicksort double vector iterator reverse reverse");
265 | 
266 | 	summarize("Vector Quicksort", SIZE, iterations, kShowGMeans, kShowPenalty );
267 | 
268 | 	
269 | 	test_heap_sort(dMpb, dMpe, dpb, dpe, dZero, "heap_sort double pointer verify2");
270 | 	test_heap_sort(vec_dataMaster.begin(), vec_dataMaster.end(), vec_data.begin(), vec_data.end(), dZero, "heap_sort double vector iterator");
271 | 	test_heap_sort(rdMpb, rdMpe, rdpb, rdpe, dZero, "heap_sort double pointer reverse");
272 | 	test_heap_sort(vec_dataMaster.rbegin(), vec_dataMaster.rend(), vec_data.rbegin(), vec_data.rend(), dZero, "heap_sort double vector reverse_iterator");
273 | 	test_heap_sort(rtvdMpb, rtvdMpe, rtvdpb, rtvdpe, dZero, "heap_sort double vector iterator reverse");
274 | 	test_heap_sort(rrdMpb, rrdMpe, rrdpb, rrdpe, dZero, "heap_sort double pointer reverse reverse");
275 | 	test_heap_sort(rtrvdMpb, rtrvdMpe, rtrvdpb, rtrvdpe, dZero, "heap_sort double vector reverse_iterator reverse");
276 | 	test_heap_sort(rtrtvdMpb, rtrtvdMpe, rtrtvdpb, rtrtvdpe, dZero, "heap_sort double vector iterator reverse reverse");
277 | 
278 | 	summarize("Vector Heap Sort", SIZE, iterations, kShowGMeans, kShowPenalty );
279 | 
280 | 
281 | 
282 | 	return 0;
283 | }
284 | 
285 | // the end
286 | /******************************************************************************/
287 | /******************************************************************************/
288 | 


--------------------------------------------------------------------------------