├── .gitignore ├── LICENSE ├── README.md ├── bench.py ├── benchmark.bash ├── build.py ├── plot.bash ├── plot_cpu_architecture.sh ├── plot_reports.py ├── reports ├── Xeon-E3-1230-v5 │ ├── multi-thread │ │ ├── clang │ │ │ ├── v0_baseline.txt │ │ │ ├── v1_linear_reading.txt │ │ │ ├── v2_instr_level_parallelism.txt │ │ │ ├── v3_simd.txt │ │ │ ├── v4_register_reuse.txt │ │ │ ├── v5_more_register_reuse.txt │ │ │ ├── v6_prefetch.txt │ │ │ └── v7_cache_reuse.txt │ │ ├── gcc │ │ │ ├── v0_baseline.txt │ │ │ ├── v1_linear_reading.txt │ │ │ ├── v2_instr_level_parallelism.txt │ │ │ ├── v3_simd.txt │ │ │ ├── v4_register_reuse.txt │ │ │ ├── v5_more_register_reuse.txt │ │ │ ├── v6_prefetch.txt │ │ │ └── v7_cache_reuse.txt │ │ └── rustc │ │ │ ├── v0_baseline.txt │ │ │ ├── v1_linear_reading.txt │ │ │ ├── v2_instr_level_parallelism.txt │ │ │ ├── v3_simd.txt │ │ │ ├── v4_register_reuse.txt │ │ │ ├── v5_more_register_reuse.txt │ │ │ ├── v6_prefetch.txt │ │ │ └── v7_cache_reuse.txt │ └── single-thread │ │ ├── clang │ │ ├── v0_baseline.txt │ │ ├── v1_linear_reading.txt │ │ ├── v2_instr_level_parallelism.txt │ │ ├── v3_simd.txt │ │ ├── v4_register_reuse.txt │ │ ├── v5_more_register_reuse.txt │ │ ├── v6_prefetch.txt │ │ └── v7_cache_reuse.txt │ │ ├── gcc │ │ ├── v0_baseline.txt │ │ ├── v1_linear_reading.txt │ │ ├── v2_instr_level_parallelism.txt │ │ ├── v3_simd.txt │ │ ├── v4_register_reuse.txt │ │ ├── v5_more_register_reuse.txt │ │ ├── v6_prefetch.txt │ │ └── v7_cache_reuse.txt │ │ └── rustc │ │ ├── v0_baseline.txt │ │ ├── v1_linear_reading.txt │ │ ├── v2_instr_level_parallelism.txt │ │ ├── v3_simd.txt │ │ ├── v4_register_reuse.txt │ │ ├── v5_more_register_reuse.txt │ │ ├── v6_prefetch.txt │ │ └── v7_cache_reuse.txt ├── i5-4690k │ ├── multi-thread │ │ ├── clang │ │ │ ├── v0_baseline.txt │ │ │ ├── v1_linear_reading.txt │ │ │ ├── v2_instr_level_parallelism.txt │ │ │ ├── v3_simd.txt │ │ │ ├── v4_register_reuse.txt │ │ │ ├── v5_more_register_reuse.txt │ │ │ ├── v6_prefetch.txt │ │ │ └── v7_cache_reuse.txt │ │ ├── gcc │ │ │ ├── v0_baseline.txt │ │ │ ├── v1_linear_reading.txt │ │ │ ├── v2_instr_level_parallelism.txt │ │ │ ├── v3_simd.txt │ │ │ ├── v4_register_reuse.txt │ │ │ ├── v5_more_register_reuse.txt │ │ │ ├── v6_prefetch.txt │ │ │ └── v7_cache_reuse.txt │ │ └── rustc │ │ │ ├── v0_baseline.txt │ │ │ ├── v1_linear_reading.txt │ │ │ ├── v2_instr_level_parallelism.txt │ │ │ ├── v3_simd.txt │ │ │ ├── v4_register_reuse.txt │ │ │ ├── v5_more_register_reuse.txt │ │ │ ├── v6_prefetch.txt │ │ │ └── v7_cache_reuse.txt │ └── single-thread │ │ ├── clang │ │ ├── v0_baseline.txt │ │ ├── v1_linear_reading.txt │ │ ├── v2_instr_level_parallelism.txt │ │ ├── v3_simd.txt │ │ ├── v4_register_reuse.txt │ │ ├── v5_more_register_reuse.txt │ │ ├── v6_prefetch.txt │ │ └── v7_cache_reuse.txt │ │ ├── gcc │ │ ├── v0_baseline.txt │ │ ├── v1_linear_reading.txt │ │ ├── v2_instr_level_parallelism.txt │ │ ├── v3_simd.txt │ │ ├── v4_register_reuse.txt │ │ ├── v5_more_register_reuse.txt │ │ ├── v6_prefetch.txt │ │ └── v7_cache_reuse.txt │ │ └── rustc │ │ ├── v0_baseline.txt │ │ ├── v1_linear_reading.txt │ │ ├── v2_instr_level_parallelism.txt │ │ ├── v3_simd.txt │ │ ├── v4_register_reuse.txt │ │ ├── v5_more_register_reuse.txt │ │ ├── v6_prefetch.txt │ │ └── v7_cache_reuse.txt └── i5-8250U │ ├── multi-thread │ ├── clang │ │ ├── v0_baseline.txt │ │ ├── v1_linear_reading.txt │ │ ├── v2_instr_level_parallelism.txt │ │ ├── v3_simd.txt │ │ ├── v4_register_reuse.txt │ │ ├── v5_more_register_reuse.txt │ │ ├── v6_prefetch.txt │ │ └── v7_cache_reuse.txt │ ├── gcc │ │ ├── v0_baseline.txt │ │ ├── v1_linear_reading.txt │ │ ├── v2_instr_level_parallelism.txt │ │ ├── v3_simd.txt │ │ ├── v4_register_reuse.txt │ │ ├── v5_more_register_reuse.txt │ │ ├── v6_prefetch.txt │ │ └── v7_cache_reuse.txt │ └── rustc │ │ ├── v0_baseline.txt │ │ ├── v1_linear_reading.txt │ │ ├── v2_instr_level_parallelism.txt │ │ ├── v3_simd.txt │ │ ├── v4_register_reuse.txt │ │ ├── v5_more_register_reuse.txt │ │ ├── v6_prefetch.txt │ │ └── v7_cache_reuse.txt │ └── single-thread │ ├── clang │ ├── v0_baseline.txt │ ├── v1_linear_reading.txt │ ├── v2_instr_level_parallelism.txt │ ├── v3_simd.txt │ ├── v4_register_reuse.txt │ ├── v5_more_register_reuse.txt │ ├── v6_prefetch.txt │ └── v7_cache_reuse.txt │ ├── gcc │ ├── v0_baseline.txt │ ├── v1_linear_reading.txt │ ├── v2_instr_level_parallelism.txt │ ├── v3_simd.txt │ ├── v4_register_reuse.txt │ ├── v5_more_register_reuse.txt │ ├── v6_prefetch.txt │ └── v7_cache_reuse.txt │ └── rustc │ ├── v0_baseline.txt │ ├── v1_linear_reading.txt │ ├── v2_instr_level_parallelism.txt │ ├── v3_simd.txt │ ├── v4_register_reuse.txt │ ├── v5_more_register_reuse.txt │ ├── v6_prefetch.txt │ └── v7_cache_reuse.txt ├── src ├── CMakeLists.txt ├── cpp │ ├── tools │ │ └── simd.hpp │ ├── v0_baseline │ │ └── step.cpp │ ├── v1_linear_reading │ │ └── step.cpp │ ├── v2_instr_level_parallelism │ │ └── step.cpp │ ├── v3_simd │ │ └── step.cpp │ ├── v4_register_reuse │ │ └── step.cpp │ ├── v5_more_register_reuse │ │ └── step.cpp │ ├── v6_prefetch │ │ └── step.cpp │ └── v7_cache_reuse │ │ └── step.cpp ├── main │ ├── main.cpp │ ├── step.hpp │ ├── step_reference.cpp │ └── step_reference.hpp ├── rust │ ├── tools │ │ ├── Cargo.toml │ │ └── src │ │ │ ├── lib.rs │ │ │ ├── simd.rs │ │ │ └── timer.rs │ ├── v0_baseline │ │ ├── Cargo.toml │ │ └── src │ │ │ └── lib.rs │ ├── v1_linear_reading │ │ ├── Cargo.toml │ │ └── src │ │ │ ├── bad_loop.rs │ │ │ ├── lib.rs │ │ │ └── not_terrible_loop.rs │ ├── v2_instr_level_parallelism │ │ ├── Cargo.toml │ │ └── src │ │ │ ├── lib.rs │ │ │ └── no_autovec.rs │ ├── v3_simd │ │ ├── Cargo.toml │ │ └── src │ │ │ └── lib.rs │ ├── v4_register_reuse │ │ ├── Cargo.toml │ │ └── src │ │ │ └── lib.rs │ ├── v5_more_register_reuse │ │ ├── Cargo.toml │ │ └── src │ │ │ └── lib.rs │ ├── v6_prefetch │ │ ├── Cargo.toml │ │ └── src │ │ │ ├── lib.rs │ │ │ └── spilling.rs │ └── v7_cache_reuse │ │ ├── Cargo.toml │ │ └── src │ │ └── lib.rs └── step_implementations.txt └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw[op] 2 | *.zip 3 | *.rar 4 | *.txt 5 | *.doc[x] 6 | *.pdf 7 | *.lock 8 | 9 | build 10 | target 11 | compile_commands.json 12 | CMake* 13 | !CMakeLists.txt 14 | cmake_install.cmake 15 | Makefile 16 | 17 | perf* 18 | 19 | *.py[co] 20 | __pycache__ 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Matias Lindgren 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Performance comparison of parallel Rust and C++ 2 | 3 | This project compares the behaviour and performance of two solutions to a simple graph problem, called the shortcut problem. 4 | The reference solution, written in C++, and a description of the shortcut problem can be found [here](http://ppc.cs.aalto.fi/ch2/). 5 | The reference solution will be compared to a [Rust](https://github.com/rust-lang/rust) implementation, which is provided by this project. 6 | 7 | This repository contains the benchmark program and source code of all `step`-function implementations. 8 | A human-readable explanation of the Rust implementations can be found on [this page](https://parallel-rust-cpp.github.io/). 9 | 10 | 11 | ## Running the benchmarks 12 | 13 | Run the whole pipeline with a smaller, debug benchmark size to check everything is working (should not take more than 15 minutes): 14 | ```bash 15 | bash benchmark.bash --debug 16 | ``` 17 | If you want to run the same benchmarks as described [here](https://parallel-rust-cpp.github.io/), run without `--debug` (might take a few hours): 18 | ```bash 19 | bash benchmark.bash 20 | ``` 21 | -------------------------------------------------------------------------------- /plot.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | cpus=(i5-8250U i5-4690k Xeon-E3-1230-v5) 5 | output_dir=plots 6 | 7 | for cpu in ${cpus[*]}; do 8 | mkdir --parents --verbose ${output_dir}/${cpu} 9 | python3 plot_reports.py \ 10 | reports/${cpu}/single-thread \ 11 | --title Single-core \ 12 | --output-path ${output_dir}/${cpu}/single_core.png 13 | python3 plot_reports.py \ 14 | reports/${cpu}/multi-thread \ 15 | --title Multi-core \ 16 | --output-path ${output_dir}/${cpu}/multi_core.png 17 | done 18 | -------------------------------------------------------------------------------- /plot_cpu_architecture.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | 4 | model_name=$(lscpu | grep 'Model name:' | sed 's/Model name:[[:space:]]*//g') 5 | cat << __END__ >> README.md 6 | 7 | ### CPU: $model_name 8 | #### Topology 9 | ![CPU architecture sketch](cpu.png) 10 | __END__ 11 | 12 | lstopo --fontsize 20 \ 13 | --gridsize 20 \ 14 | --no-icaches \ 15 | --no-io \ 16 | --no-legend \ 17 | --output-format png > cpu.png 18 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/clang/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp for 20 iterations with input containing 36000000 elements 2 | 298.8503 3 | 298.3828 4 | 298.7258 5 | 298.5392 6 | 298.2517 7 | 299.286 8 | 298.4738 9 | 299.274 10 | 298.8592 11 | 298.213 12 | 298.2287 13 | 299.2587 14 | 299.5129 15 | 298.7695 16 | 298.5376 17 | 299.2327 18 | 298.9675 19 | 298.999 20 | 299.2038 21 | 299.2158 22 | 23884688.527991,,task-clock,23884688527991,100.00,3.964,CPUs utilized 23 | 140999,,context-switches,23884688527991,100.00,0.006,K/sec 24 | 267,,cpu-migrations,23884688527991,100.00,0.000,K/sec 25 | 70577,,page-faults,23884688527991,100.00,0.003,K/sec 26 | 85801901855929,,cycles,23884889382239,100.00,3.592,GHz 27 | 21786232144912,,instructions,23884889382239,100.00,0.25,insn per cycle 28 | 1117904761365,,branches,23884889382239,100.00,46.804,M/sec 29 | 882904045,,branch-misses,23884889382239,100.00,0.08,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/clang/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp for 20 iterations with input containing 36000000 elements 2 | 60.58123 3 | 60.58981 4 | 60.58868 5 | 60.62275 6 | 60.59073 7 | 60.58766 8 | 60.58707 9 | 60.58902 10 | 60.65767 11 | 60.58945 12 | 60.59081 13 | 60.58981 14 | 60.58951 15 | 60.61554 16 | 60.5738 17 | 60.57967 18 | 60.58863 19 | 60.5881 20 | 62.17468 21 | 60.58651 22 | 4902175.485797,,task-clock,4902175485797,100.00,3.886,CPUs utilized 23 | 30135,,context-switches,4902175485797,100.00,0.006,K/sec 24 | 168,,cpu-migrations,4902175485797,100.00,0.000,K/sec 25 | 773717,,page-faults,4902175485797,100.00,0.158,K/sec 26 | 17615734024537,,cycles,4902215107165,100.00,3.593,GHz 27 | 16353777146088,,instructions,4902215107165,100.00,0.93,insn per cycle 28 | 1112712242645,,branches,4902215107165,100.00,226.983,M/sec 29 | 757051429,,branch-misses,4902215107165,100.00,0.07,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/clang/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp for 20 iterations with input containing 36000000 elements 2 | 44.56617 3 | 44.50015 4 | 44.61848 5 | 44.69595 6 | 45.07917 7 | 44.59142 8 | 44.6136 9 | 44.6824 10 | 44.83667 11 | 44.51321 12 | 44.86982 13 | 44.63151 14 | 44.94756 15 | 44.59098 16 | 44.82292 17 | 44.76363 18 | 45.01354 19 | 45.10165 20 | 44.61921 21 | 44.71511 22 | 3630066.074140,,task-clock,3630066074140,100.00,3.850,CPUs utilized 23 | 22278,,context-switches,3630066074140,100.00,0.006,K/sec 24 | 4,,cpu-migrations,3630066074140,100.00,0.000,K/sec 25 | 1476854,,page-faults,3630066074140,100.00,0.407,K/sec 26 | 13045517099146,,cycles,3630094002413,100.00,3.594,GHz 27 | 42277306489470,,instructions,3630094002413,100.00,3.24,insn per cycle 28 | 1111129939310,,branches,3630094002413,100.00,306.091,M/sec 29 | 752302079,,branch-misses,3630094002413,100.00,0.07,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/clang/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp for 20 iterations with input containing 36000000 elements 2 | 11.84695 3 | 11.84424 4 | 11.84594 5 | 11.84239 6 | 11.84558 7 | 11.84704 8 | 11.8406 9 | 11.8476 10 | 11.84712 11 | 12.37245 12 | 11.8473 13 | 11.85102 14 | 11.84637 15 | 11.83621 16 | 11.83722 17 | 11.83681 18 | 11.8406 19 | 11.8423 20 | 11.84804 21 | 11.84057 22 | 1009630.063272,,task-clock,1009630063272,100.00,3.530,CPUs utilized 23 | 6626,,context-switches,1009630063272,100.00,0.007,K/sec 24 | 3,,cpu-migrations,1009630063272,100.00,0.000,K/sec 25 | 2883097,,page-faults,1009630063272,100.00,0.003,M/sec 26 | 3630995364167,,cycles,1009638998843,100.00,3.596,GHz 27 | 3980504897307,,instructions,1009638998843,100.00,1.10,insn per cycle 28 | 573840160668,,branches,1009638998843,100.00,568.367,M/sec 29 | 753606247,,branch-misses,1009638998843,100.00,0.13,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/clang/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 3.755643 3 | 3.763403 4 | 3.768738 5 | 3.751888 6 | 3.76313 7 | 3.759418 8 | 3.750492 9 | 3.762716 10 | 3.74914 11 | 3.750919 12 | 3.756431 13 | 3.74986 14 | 3.751856 15 | 3.764617 16 | 3.755086 17 | 3.754675 18 | 3.766628 19 | 3.765755 20 | 3.76309 21 | 3.753499 22 | 360255.574571,,task-clock,360255574571,100.00,2.913,CPUs utilized 23 | 2999,,context-switches,360255574571,100.00,0.008,K/sec 24 | 3,,cpu-migrations,360255574571,100.00,0.000,K/sec 25 | 2883095,,page-faults,360255574571,100.00,0.008,M/sec 26 | 1302727041934,,cycles,360259954885,100.00,3.616,GHz 27 | 1870934917475,,instructions,360259954885,100.00,1.44,insn per cycle 28 | 93318689383,,branches,360259954885,100.00,259.035,M/sec 29 | 109101044,,branch-misses,360259954885,100.00,0.12,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/clang/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 2.563528 3 | 2.552443 4 | 2.548315 5 | 2.557518 6 | 2.826334 7 | 2.595185 8 | 2.52076 9 | 2.721587 10 | 2.682065 11 | 2.587124 12 | 2.657417 13 | 2.714462 14 | 2.767267 15 | 2.62742 16 | 2.751099 17 | 2.609528 18 | 2.633461 19 | 2.633605 20 | 2.761131 21 | 2.504815 22 | 267951.853885,,task-clock,267951853885,100.00,2.646,CPUs utilized 23 | 2590,,context-switches,267951853885,100.00,0.010,K/sec 24 | 4,,cpu-migrations,267951853885,100.00,0.000,K/sec 25 | 2883095,,page-faults,267951853885,100.00,0.011,M/sec 26 | 969881988576,,cycles,267955424866,100.00,3.620,GHz 27 | 1938593550174,,instructions,267955424866,100.00,2.00,insn per cycle 28 | 101220317557,,branches,267955424866,100.00,377.756,M/sec 29 | 47683028,,branch-misses,267955424866,100.00,0.05,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/clang/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp for 20 iterations with input containing 36000000 elements 2 | 2.311962 3 | 2.311616 4 | 2.311084 5 | 2.312692 6 | 2.362197 7 | 2.371553 8 | 2.371829 9 | 2.370107 10 | 2.312762 11 | 2.41862 12 | 2.30983 13 | 2.31238 14 | 2.31109 15 | 2.309192 16 | 2.311732 17 | 2.313347 18 | 2.31031 19 | 2.360627 20 | 2.3696 21 | 2.366253 22 | 245816.525598,,task-clock,245816525598,100.00,2.582,CPUs utilized 23 | 2489,,context-switches,245816525598,100.00,0.010,K/sec 24 | 4,,cpu-migrations,245816525598,100.00,0.000,K/sec 25 | 2883094,,page-faults,245816525598,100.00,0.012,M/sec 26 | 889937273761,,cycles,245819768337,100.00,3.620,GHz 27 | 2004103128306,,instructions,245819768337,100.00,2.25,insn per cycle 28 | 100583341097,,branches,245819768337,100.00,409.181,M/sec 29 | 55146617,,branch-misses,245819768337,100.00,0.05,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/clang/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 2.158426 3 | 2.157286 4 | 2.160818 5 | 2.16883 6 | 2.158083 7 | 2.171359 8 | 2.163911 9 | 2.170808 10 | 2.160268 11 | 2.158473 12 | 2.17072 13 | 2.161093 14 | 2.178597 15 | 2.185996 16 | 2.181514 17 | 2.164231 18 | 2.169622 19 | 2.164684 20 | 2.165019 21 | 2.166737 22 | 233447.026187,,task-clock,233447026187,100.00,2.546,CPUs utilized 23 | 2108,,context-switches,233447026187,100.00,0.009,K/sec 24 | 3,,cpu-migrations,233447026187,100.00,0.000,K/sec 25 | 1006676,,page-faults,233447026187,100.00,0.004,M/sec 26 | 847246361038,,cycles,233449977133,100.00,3.629,GHz 27 | 1873169128262,,instructions,233449977133,100.00,2.21,insn per cycle 28 | 100526878129,,branches,233449977133,100.00,430.620,M/sec 29 | 187600217,,branch-misses,233449977133,100.00,0.19,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/gcc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp for 20 iterations with input containing 36000000 elements 2 | 290.5382 3 | 290.4824 4 | 290.3337 5 | 293.1196 6 | 290.4956 7 | 290.5174 8 | 290.5142 9 | 290.5103 10 | 290.434 11 | 290.318 12 | 290.534 13 | 290.5064 14 | 290.5035 15 | 290.5275 16 | 290.4275 17 | 290.3257 18 | 292.378 19 | 290.5151 20 | 290.5442 21 | 290.5683 22 | 23192705.247283,,task-clock,23192705247283,100.00,3.986,CPUs utilized 23 | 138423,,context-switches,23192705247283,100.00,0.006,K/sec 24 | 450,,cpu-migrations,23192705247283,100.00,0.000,K/sec 25 | 70530,,page-faults,23192705247283,100.00,0.003,K/sec 26 | 83301915543965,,cycles,23192898887242,100.00,3.592,GHz 27 | 30322586485756,,instructions,23192898887242,100.00,0.36,insn per cycle 28 | 4333410682764,,branches,23192898887242,100.00,186.844,M/sec 29 | 874181648,,branch-misses,23192898887242,100.00,0.02,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/gcc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp for 20 iterations with input containing 36000000 elements 2 | 60.58588 3 | 60.58961 4 | 60.58569 5 | 60.58704 6 | 60.65012 7 | 60.59177 8 | 60.59256 9 | 60.92627 10 | 60.59761 11 | 62.01024 12 | 60.59851 13 | 60.59621 14 | 60.5979 15 | 60.5976 16 | 60.60065 17 | 60.60054 18 | 60.59941 19 | 60.60094 20 | 60.6056 21 | 60.59513 22 | 4848057.952669,,task-clock,4848057952669,100.00,3.982,CPUs utilized 23 | 29478,,context-switches,4848057952669,100.00,0.006,K/sec 24 | 250,,cpu-migrations,4848057952669,100.00,0.000,K/sec 25 | 773665,,page-faults,4848057952669,100.00,0.160,K/sec 26 | 17413198452451,,cycles,4848096478779,100.00,3.592,GHz 27 | 25981917531817,,instructions,4848096478779,100.00,1.49,insn per cycle 28 | 4329903542993,,branches,4848096478779,100.00,893.121,M/sec 29 | 754572762,,branch-misses,4848096478779,100.00,0.02,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/gcc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp for 20 iterations with input containing 36000000 elements 2 | 20.91733 3 | 21.19346 4 | 21.37341 5 | 20.8652 6 | 21.17298 7 | 21.13977 8 | 21.06119 9 | 21.14474 10 | 20.89988 11 | 21.06981 12 | 21.26398 13 | 20.92303 14 | 21.11199 15 | 20.96229 16 | 20.93805 17 | 21.29647 18 | 21.09178 19 | 20.86378 20 | 21.34537 21 | 20.90986 22 | 1681700.782477,,task-clock,1681700782477,100.00,3.954,CPUs utilized 23 | 9976,,context-switches,1681700782477,100.00,0.006,K/sec 24 | 0,,cpu-migrations,1681700782477,100.00,0.000,K/sec 25 | 1476807,,page-faults,1681700782477,100.00,0.878,K/sec 26 | 6039521760871,,cycles,1681715196472,100.00,3.591,GHz 27 | 17353262267892,,instructions,1681715196472,100.00,2.87,insn per cycle 28 | 1091069705437,,branches,1681715196472,100.00,648.789,M/sec 29 | 916175295,,branch-misses,1681715196472,100.00,0.08,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/gcc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp for 20 iterations with input containing 36000000 elements 2 | 11.57365 3 | 11.57626 4 | 11.57559 5 | 11.58033 6 | 11.59385 7 | 11.57694 8 | 11.5746 9 | 11.57791 10 | 11.57026 11 | 11.57403 12 | 11.58108 13 | 11.57784 14 | 11.57777 15 | 11.81345 16 | 11.57857 17 | 11.57795 18 | 11.57493 19 | 11.57891 20 | 11.57587 21 | 11.57518 22 | 930842.404031,,task-clock,930842404031,100.00,3.953,CPUs utilized 23 | 5702,,context-switches,930842404031,100.00,0.006,K/sec 24 | 0,,cpu-migrations,930842404031,100.00,0.000,K/sec 25 | 1476806,,page-faults,930842404031,100.00,0.002,M/sec 26 | 3341493623336,,cycles,930850630511,100.00,3.590,GHz 27 | 3318818707790,,instructions,930850630511,100.00,0.99,insn per cycle 28 | 551478480715,,branches,930850630511,100.00,592.451,M/sec 29 | 820470925,,branch-misses,930850630511,100.00,0.15,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/gcc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 4.208216 3 | 4.18649 4 | 4.233499 5 | 4.177824 6 | 4.18131 7 | 4.184774 8 | 4.17836 9 | 4.17439 10 | 4.175255 11 | 4.183895 12 | 4.18468 13 | 4.185634 14 | 4.210761 15 | 4.232924 16 | 4.177158 17 | 4.209357 18 | 4.204488 19 | 4.177522 20 | 4.182062 21 | 4.177741 22 | 339128.604323,,task-clock,339128604323,100.00,3.874,CPUs utilized 23 | 2028,,context-switches,339128604323,100.00,0.006,K/sec 24 | 1,,cpu-migrations,339128604323,100.00,0.000,K/sec 25 | 1476808,,page-faults,339128604323,100.00,0.004,M/sec 26 | 1218681999255,,cycles,339131751837,100.00,3.594,GHz 27 | 2413114790438,,instructions,339131751837,100.00,1.98,insn per cycle 28 | 70058686119,,branches,339131751837,100.00,206.584,M/sec 29 | 85645837,,branch-misses,339131751837,100.00,0.12,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/gcc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 2.460467 3 | 2.361925 4 | 2.362945 5 | 2.36406 6 | 2.38044 7 | 2.361157 8 | 2.477746 9 | 2.497625 10 | 2.517644 11 | 2.456303 12 | 2.38731 13 | 2.500648 14 | 2.462069 15 | 2.361078 16 | 2.362335 17 | 2.362773 18 | 2.363046 19 | 2.363326 20 | 2.361338 21 | 2.467604 22 | 196525.754199,,task-clock,196525754199,100.00,3.783,CPUs utilized 23 | 1397,,context-switches,196525754199,100.00,0.007,K/sec 24 | 0,,cpu-migrations,196525754199,100.00,0.000,K/sec 25 | 1476807,,page-faults,196525754199,100.00,0.008,M/sec 26 | 701697031062,,cycles,196528081867,100.00,3.571,GHz 27 | 1754754462412,,instructions,196528081867,100.00,2.50,insn per cycle 28 | 76879798300,,branches,196528081867,100.00,391.195,M/sec 29 | 26644113,,branch-misses,196528081867,100.00,0.03,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/gcc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp for 20 iterations with input containing 36000000 elements 2 | 2.091842 3 | 2.092037 4 | 2.090893 5 | 2.090851 6 | 2.090615 7 | 2.091353 8 | 2.135087 9 | 2.170089 10 | 2.091177 11 | 2.091598 12 | 2.089793 13 | 2.0911 14 | 2.14728 15 | 2.090825 16 | 2.093526 17 | 2.09069 18 | 2.144084 19 | 2.123419 20 | 2.091536 21 | 2.090315 22 | 172221.103503,,task-clock,172221103503,100.00,3.759,CPUs utilized 23 | 1266,,context-switches,172221103503,100.00,0.007,K/sec 24 | 0,,cpu-migrations,172221103503,100.00,0.000,K/sec 25 | 1476809,,page-faults,172221103503,100.00,0.009,M/sec 26 | 614507754030,,cycles,172223111060,100.00,3.568,GHz 27 | 1957200775551,,instructions,172223111060,100.00,3.18,insn per cycle 28 | 76868313102,,branches,172223111060,100.00,446.335,M/sec 29 | 30890568,,branch-misses,172223111060,100.00,0.04,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/gcc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 2.028977 3 | 2.034142 4 | 2.032948 5 | 2.035028 6 | 2.044538 7 | 2.041476 8 | 2.029742 9 | 2.04544 10 | 2.03436 11 | 2.031181 12 | 2.032235 13 | 2.050894 14 | 2.0441 15 | 2.036702 16 | 2.030045 17 | 2.034637 18 | 2.033783 19 | 2.028535 20 | 2.037993 21 | 2.03633 22 | 166968.603829,,task-clock,166968603829,100.00,3.757,CPUs utilized 23 | 1107,,context-switches,166968603829,100.00,0.007,K/sec 24 | 0,,cpu-migrations,166968603829,100.00,0.000,K/sec 25 | 896888,,page-faults,166968603829,100.00,0.005,M/sec 26 | 600338732113,,cycles,166970526098,100.00,3.596,GHz 27 | 1760973216262,,instructions,166970526098,100.00,2.93,insn per cycle 28 | 77605542603,,branches,166970526098,100.00,464.791,M/sec 29 | 177140848,,branch-misses,166970526098,100.00,0.23,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/rustc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_rust for 20 iterations with input containing 36000000 elements 2 | 274.2671 3 | 274.2282 4 | 274.4714 5 | 274.269 6 | 274.4746 7 | 274.2847 8 | 274.2711 9 | 274.473 10 | 274.439 11 | 277.8735 12 | 274.2827 13 | 274.467 14 | 274.4469 15 | 274.4744 16 | 274.3282 17 | 274.2693 18 | 274.4699 19 | 276.2102 20 | 274.2499 21 | 274.3003 22 | 21948810.361030,,task-clock,21948810361030,100.00,3.993,CPUs utilized 23 | 130167,,context-switches,21948810361030,100.00,0.006,K/sec 24 | 267,,cpu-migrations,21948810361030,100.00,0.000,K/sec 25 | 70587,,page-faults,21948810361030,100.00,0.003,K/sec 26 | 78832199614963,,cycles,21948980643743,100.00,3.592,GHz 27 | 60567139397470,,instructions,21948980643743,100.00,0.77,insn per cycle 28 | 12974486330556,,branches,21948980643743,100.00,591.125,M/sec 29 | 867103451,,branch-misses,21948980643743,100.00,0.01,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/rustc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_rust for 20 iterations with input containing 36000000 elements 2 | 60.52898 3 | 60.53096 4 | 60.52771 5 | 60.55428 6 | 60.53023 7 | 60.53953 8 | 60.57701 9 | 60.53904 10 | 60.58854 11 | 60.57936 12 | 60.54167 13 | 60.53088 14 | 60.56148 15 | 60.53724 16 | 60.53835 17 | 60.52724 18 | 60.52795 19 | 60.53637 20 | 60.53275 21 | 60.53759 22 | 4843055.524443,,task-clock,4843055524443,100.00,3.987,CPUs utilized 23 | 28987,,context-switches,4843055524443,100.00,0.006,K/sec 24 | 54,,cpu-migrations,4843055524443,100.00,0.000,K/sec 25 | 773725,,page-faults,4843055524443,100.00,0.160,K/sec 26 | 17395167220846,,cycles,4843093825054,100.00,3.592,GHz 27 | 14651073876236,,instructions,4843093825054,100.00,0.84,insn per cycle 28 | 551999868733,,branches,4843093825054,100.00,113.978,M/sec 29 | 752646151,,branch-misses,4843093825054,100.00,0.14,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/rustc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_rust for 20 iterations with input containing 36000000 elements 2 | 16.50363 3 | 17.00799 4 | 16.66866 5 | 16.49549 6 | 16.67087 7 | 17.21198 8 | 16.65219 9 | 16.50091 10 | 17.01415 11 | 16.64241 12 | 16.50858 13 | 16.66339 14 | 16.95101 15 | 16.49926 16 | 16.49544 17 | 16.97056 18 | 16.64811 19 | 16.51002 20 | 16.5732 21 | 16.94453 22 | 1333766.698809,,task-clock,1333766698809,100.00,3.948,CPUs utilized 23 | 8692,,context-switches,1333766698809,100.00,0.007,K/sec 24 | 51,,cpu-migrations,1333766698809,100.00,0.000,K/sec 25 | 1476867,,page-faults,1333766698809,100.00,0.001,M/sec 26 | 4789802466976,,cycles,1333778411889,100.00,3.591,GHz 27 | 11959069470670,,instructions,1333778411889,100.00,2.50,insn per cycle 28 | 2172908217345,,branches,1333778411889,100.00,1629.152,M/sec 29 | 732003632,,branch-misses,1333778411889,100.00,0.03,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/rustc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_rust for 20 iterations with input containing 36000000 elements 2 | 11.44183 3 | 11.8702 4 | 11.44892 5 | 11.43702 6 | 11.4505 7 | 11.45552 8 | 11.45519 9 | 11.43972 10 | 11.43207 11 | 11.44071 12 | 11.60523 13 | 11.44905 14 | 11.43736 15 | 11.44811 16 | 11.43891 17 | 11.46026 18 | 11.45267 19 | 11.44406 20 | 11.44119 21 | 11.76609 22 | 914307.741508,,task-clock,914307741508,100.00,3.915,CPUs utilized 23 | 5769,,context-switches,914307741508,100.00,0.006,K/sec 24 | 44,,cpu-migrations,914307741508,100.00,0.000,K/sec 25 | 1476867,,page-faults,914307741508,100.00,0.002,M/sec 26 | 3280514735670,,cycles,914316192310,100.00,3.588,GHz 27 | 2247949509189,,instructions,914316192310,100.00,0.69,insn per cycle 28 | 149165958964,,branches,914316192310,100.00,163.146,M/sec 29 | 729317541,,branch-misses,914316192310,100.00,0.49,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/rustc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_rust for 20 iterations with input containing 36000000 elements 2 | 3.687449 3 | 3.695385 4 | 3.686751 5 | 3.692289 6 | 3.699295 7 | 3.688591 8 | 3.679851 9 | 3.689751 10 | 3.688343 11 | 3.680773 12 | 3.686363 13 | 3.681272 14 | 3.685914 15 | 3.685788 16 | 3.681935 17 | 3.67855 18 | 3.68252 19 | 3.690467 20 | 3.692739 21 | 3.680601 22 | 293282.372611,,task-clock,293282372611,100.00,3.787,CPUs utilized 23 | 1990,,context-switches,293282372611,100.00,0.007,K/sec 24 | 49,,cpu-migrations,293282372611,100.00,0.000,K/sec 25 | 1476867,,page-faults,293282372611,100.00,0.005,M/sec 26 | 1052503541591,,cycles,293285533477,100.00,3.589,GHz 27 | 1706430374833,,instructions,293285533477,100.00,1.62,insn per cycle 28 | 73814737679,,branches,293285533477,100.00,251.685,M/sec 29 | 84909081,,branch-misses,293285533477,100.00,0.12,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/rustc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_rust for 20 iterations with input containing 36000000 elements 2 | 2.524124 3 | 2.541064 4 | 2.597247 5 | 2.727401 6 | 2.515567 7 | 2.569911 8 | 2.681785 9 | 2.598449 10 | 2.579232 11 | 2.560792 12 | 2.504129 13 | 2.593805 14 | 2.570111 15 | 2.605782 16 | 2.526531 17 | 2.669717 18 | 2.645837 19 | 2.479613 20 | 2.500578 21 | 2.617567 22 | 203331.105544,,task-clock,203331105544,100.00,3.676,CPUs utilized 23 | 1551,,context-switches,203331105544,100.00,0.008,K/sec 24 | 44,,cpu-migrations,203331105544,100.00,0.000,K/sec 25 | 1476868,,page-faults,203331105544,100.00,0.007,M/sec 26 | 724869129039,,cycles,203334070613,100.00,3.565,GHz 27 | 1829649157954,,instructions,203334070613,100.00,2.52,insn per cycle 28 | 80144087462,,branches,203334070613,100.00,394.156,M/sec 29 | 28580922,,branch-misses,203334070613,100.00,0.04,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/rustc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_rust for 20 iterations with input containing 36000000 elements 2 | 2.159045 3 | 2.157449 4 | 2.167414 5 | 2.158795 6 | 2.157431 7 | 2.158116 8 | 2.156733 9 | 2.157825 10 | 2.156889 11 | 2.15785 12 | 2.159279 13 | 2.157764 14 | 2.202527 15 | 2.157635 16 | 2.191308 17 | 2.159403 18 | 2.196027 19 | 2.158042 20 | 2.217278 21 | 2.158589 22 | 171542.901442,,task-clock,171542901442,100.00,3.645,CPUs utilized 23 | 1380,,context-switches,171542901442,100.00,0.008,K/sec 24 | 38,,cpu-migrations,171542901442,100.00,0.000,K/sec 25 | 1476868,,page-faults,171542901442,100.00,0.009,M/sec 26 | 611115776648,,cycles,171545495061,100.00,3.562,GHz 27 | 1964030201867,,instructions,171545495061,100.00,3.21,insn per cycle 28 | 79942293813,,branches,171545495061,100.00,466.019,M/sec 29 | 36256389,,branch-misses,171545495061,100.00,0.05,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/multi-thread/rustc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_rust for 20 iterations with input containing 36000000 elements 2 | 2.23422 3 | 2.251217 4 | 2.247567 5 | 2.250916 6 | 2.262002 7 | 2.254699 8 | 2.264085 9 | 2.265373 10 | 2.263529 11 | 2.245219 12 | 2.252879 13 | 2.255095 14 | 2.240132 15 | 2.248327 16 | 2.243755 17 | 2.251702 18 | 2.24324 19 | 2.241856 20 | 2.244685 21 | 2.259172 22 | 178925.717904,,task-clock,178925717904,100.00,3.663,CPUs utilized 23 | 5342,,context-switches,178925717904,100.00,0.030,K/sec 24 | 568,,cpu-migrations,178925717904,100.00,0.003,K/sec 25 | 818406,,page-faults,178925717904,100.00,0.005,M/sec 26 | 641780856286,,cycles,178939435102,100.00,3.587,GHz 27 | 1784655594899,,instructions,178939435102,100.00,2.78,insn per cycle 28 | 82298511959,,branches,178939435102,100.00,459.959,M/sec 29 | 188695900,,branch-misses,178939435102,100.00,0.23,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/clang/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp for 20 iterations with input containing 36000000 elements 2 | 1133.33 3 | 1133.301 4 | 1133.633 5 | 1133.152 6 | 1133.205 7 | 1133.802 8 | 1133.398 9 | 1133.381 10 | 1133.825 11 | 1133.464 12 | 1133.368 13 | 1133.226 14 | 1133.641 15 | 1133.306 16 | 1133.35 17 | 1133.772 18 | 1133.229 19 | 1133.362 20 | 1133.937 21 | 1133.387 22 | 22716972.799383,,task-clock,22716972799383,100.00,1.000,CPUs utilized 23 | 28636,,context-switches,22716972799383,100.00,0.001,K/sec 24 | 0,,cpu-migrations,22716972799383,100.00,0.000,K/sec 25 | 70504,,page-faults,22716972799383,100.00,0.003,K/sec 26 | 86054384601475,,cycles,22717005954613,100.00,3.788,GHz 27 | 21767496136050,,instructions,22717005954613,100.00,0.25,insn per cycle 28 | 1112425556000,,branches,22717005954613,100.00,48.969,M/sec 29 | 836708459,,branch-misses,22717005954613,100.00,0.08,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/clang/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp for 20 iterations with input containing 36000000 elements 2 | 232.7582 3 | 232.7682 4 | 232.7288 5 | 232.7277 6 | 232.7265 7 | 232.715 8 | 232.708 9 | 233.0299 10 | 232.7191 11 | 232.7258 12 | 232.7123 13 | 232.6948 14 | 232.717 15 | 232.723 16 | 232.7168 17 | 232.7032 18 | 232.6292 19 | 232.59 20 | 232.6867 21 | 232.719 22 | 4702525.386515,,task-clock,4702525386515,100.00,1.000,CPUs utilized 23 | 5923,,context-switches,4702525386515,100.00,0.001,K/sec 24 | 0,,cpu-migrations,4702525386515,100.00,0.000,K/sec 25 | 773646,,page-faults,4702525386515,100.00,0.165,K/sec 26 | 17814496683311,,cycles,4702531553680,100.00,3.788,GHz 27 | 16340143492386,,instructions,4702531553680,100.00,0.92,insn per cycle 28 | 1108711262536,,branches,4702531553680,100.00,235.769,M/sec 29 | 744523488,,branch-misses,4702531553680,100.00,0.07,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/clang/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp for 20 iterations with input containing 36000000 elements 2 | 238.1084 3 | 238.1419 4 | 238.7258 5 | 238.1426 6 | 238.1954 7 | 238.1181 8 | 238.1363 9 | 238.2108 10 | 238.1507 11 | 238.1385 12 | 238.1204 13 | 238.1384 14 | 238.0832 15 | 238.1076 16 | 238.1494 17 | 238.3204 18 | 238.1357 19 | 238.7255 20 | 238.1375 21 | 238.1449 22 | 4811996.330769,,task-clock,4811996330769,100.00,1.000,CPUs utilized 23 | 6060,,context-switches,4811996330769,100.00,0.001,K/sec 24 | 0,,cpu-migrations,4811996330769,100.00,0.000,K/sec 25 | 1476784,,page-faults,4811996330769,100.00,0.307,K/sec 26 | 18227372684933,,cycles,4812003061164,100.00,3.788,GHz 27 | 62789454751906,,instructions,4812003061164,100.00,3.44,insn per cycle 28 | 1107564081558,,branches,4812003061164,100.00,230.167,M/sec 29 | 747340628,,branch-misses,4812003061164,100.00,0.07,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/clang/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp for 20 iterations with input containing 36000000 elements 2 | 40.95536 3 | 40.86259 4 | 40.7879 5 | 40.52425 6 | 40.54025 7 | 40.99585 8 | 40.54585 9 | 40.90763 10 | 40.51994 11 | 40.89165 12 | 40.90829 13 | 40.55072 14 | 40.95753 15 | 40.82156 16 | 40.52369 17 | 40.84399 18 | 40.50178 19 | 40.94994 20 | 40.92217 21 | 40.5999 22 | 862957.738527,,task-clock,862957738527,100.00,1.000,CPUs utilized 23 | 1093,,context-switches,862957738527,100.00,0.001,K/sec 24 | 1,,cpu-migrations,862957738527,100.00,0.000,K/sec 25 | 2883024,,page-faults,862957738527,100.00,0.003,M/sec 26 | 2970446009915,,cycles,862958957951,100.00,3.442,GHz 27 | 3933963550937,,instructions,862958957951,100.00,1.32,insn per cycle 28 | 568495423162,,branches,862958957951,100.00,658.776,M/sec 29 | 729478911,,branch-misses,862958957951,100.00,0.13,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/clang/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 13.55643 3 | 13.9215 4 | 13.4982 5 | 13.50385 6 | 13.49305 7 | 13.50833 8 | 13.51705 9 | 13.48709 10 | 13.50584 11 | 13.48244 12 | 13.5049 13 | 13.51665 14 | 13.50189 15 | 13.4867 16 | 13.50295 17 | 13.52297 18 | 13.49454 19 | 13.51167 20 | 13.50345 21 | 13.52805 22 | 318776.065810,,task-clock,318776065810,100.00,1.000,CPUs utilized 23 | 410,,context-switches,318776065810,100.00,0.001,K/sec 24 | 1,,cpu-migrations,318776065810,100.00,0.000,K/sec 25 | 2883026,,page-faults,318776065810,100.00,0.009,M/sec 26 | 1207436743510,,cycles,318776530816,100.00,3.788,GHz 27 | 1831767643625,,instructions,318776530816,100.00,1.52,insn per cycle 28 | 88691002156,,branches,318776530816,100.00,278.224,M/sec 29 | 86340210,,branch-misses,318776530816,100.00,0.10,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/clang/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 9.072468 3 | 9.06865 4 | 9.070002 5 | 9.019465 6 | 9.017647 7 | 9.015681 8 | 9.025183 9 | 9.028546 10 | 9.070066 11 | 9.065443 12 | 9.024264 13 | 9.015307 14 | 9.029653 15 | 9.0188 16 | 9.067633 17 | 9.019157 18 | 9.020562 19 | 9.019055 20 | 9.020235 21 | 9.067186 22 | 232908.591906,,task-clock,232908591906,100.00,1.000,CPUs utilized 23 | 300,,context-switches,232908591906,100.00,0.001,K/sec 24 | 0,,cpu-migrations,232908591906,100.00,0.000,K/sec 25 | 2883025,,page-faults,232908591906,100.00,0.012,M/sec 26 | 882364086020,,cycles,232908953724,100.00,3.788,GHz 27 | 1831472371797,,instructions,232908953724,100.00,2.08,insn per cycle 28 | 95864764613,,branches,232908953724,100.00,411.598,M/sec 29 | 32912510,,branch-misses,232908953724,100.00,0.03,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/clang/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp for 20 iterations with input containing 36000000 elements 2 | 8.223697 3 | 8.21992 4 | 8.221355 5 | 8.223888 6 | 8.239441 7 | 8.262341 8 | 8.234802 9 | 8.236466 10 | 8.242466 11 | 8.241079 12 | 8.236423 13 | 8.234324 14 | 8.221849 15 | 8.219875 16 | 8.222937 17 | 8.239992 18 | 8.236205 19 | 8.242164 20 | 8.222181 21 | 8.218103 22 | 212472.105563,,task-clock,212472105563,100.00,1.000,CPUs utilized 23 | 275,,context-switches,212472105563,100.00,0.001,K/sec 24 | 1,,cpu-migrations,212472105563,100.00,0.000,K/sec 25 | 2883026,,page-faults,212472105563,100.00,0.014,M/sec 26 | 805290179031,,cycles,212472377100,100.00,3.790,GHz 27 | 1966465877157,,instructions,212472377100,100.00,2.44,insn per cycle 28 | 95865733173,,branches,212472377100,100.00,451.192,M/sec 29 | 44484338,,branch-misses,212472377100,100.00,0.05,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/clang/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 7.513166 3 | 7.50684 4 | 7.510826 5 | 7.495531 6 | 7.494366 7 | 7.491263 8 | 7.510608 9 | 7.494476 10 | 7.49638 11 | 7.49405 12 | 7.493315 13 | 7.494064 14 | 7.497462 15 | 7.49875 16 | 7.517972 17 | 7.500975 18 | 7.501249 19 | 7.502306 20 | 7.500847 21 | 7.499052 22 | 197918.079772,,task-clock,197918079772,100.00,1.000,CPUs utilized 23 | 276,,context-switches,197918079772,100.00,0.001,K/sec 24 | 0,,cpu-migrations,197918079772,100.00,0.000,K/sec 25 | 969752,,page-faults,197918079772,100.00,0.005,M/sec 26 | 750007936912,,cycles,197918391293,100.00,3.789,GHz 27 | 1908788389760,,instructions,197918391293,100.00,2.55,insn per cycle 28 | 96114043763,,branches,197918391293,100.00,485.625,M/sec 29 | 179115706,,branch-misses,197918391293,100.00,0.19,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/gcc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp for 20 iterations with input containing 36000000 elements 2 | 1106.633 3 | 1106.011 4 | 1106.006 5 | 1106.639 6 | 1105.999 7 | 1106.033 8 | 1106.584 9 | 1105.892 10 | 1105.91 11 | 1105.988 12 | 1106.67 13 | 1106.034 14 | 1106.042 15 | 1106.464 16 | 1106.069 17 | 1105.991 18 | 1106.621 19 | 1106.06 20 | 1105.973 21 | 1106.559 22 | 22127572.797888,,task-clock,22127572797888,100.00,1.000,CPUs utilized 23 | 28014,,context-switches,22127572797888,100.00,0.001,K/sec 24 | 1,,cpu-migrations,22127572797888,100.00,0.000,K/sec 25 | 70504,,page-faults,22127572797888,100.00,0.003,K/sec 26 | 83815193933354,,cycles,22127606814713,100.00,3.788,GHz 27 | 30316292528192,,instructions,22127606814713,100.00,0.36,insn per cycle 28 | 4332201891784,,branches,22127606814713,100.00,195.783,M/sec 29 | 828144666,,branch-misses,22127606814713,100.00,0.02,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/gcc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp for 20 iterations with input containing 36000000 elements 2 | 232.8825 3 | 233.23 4 | 232.8917 5 | 232.9184 6 | 232.88 7 | 232.8757 8 | 232.8592 9 | 232.9094 10 | 232.8648 11 | 232.8907 12 | 232.8772 13 | 232.8522 14 | 232.761 15 | 232.7845 16 | 232.7547 17 | 232.832 18 | 232.9316 19 | 233.1903 20 | 232.7832 21 | 232.8835 22 | 4661507.434824,,task-clock,4661507434824,100.00,1.000,CPUs utilized 23 | 5909,,context-switches,4661507434824,100.00,0.001,K/sec 24 | 1,,cpu-migrations,4661507434824,100.00,0.000,K/sec 25 | 773644,,page-faults,4661507434824,100.00,0.166,K/sec 26 | 17658670958263,,cycles,4661514188148,100.00,3.788,GHz 27 | 25980439261628,,instructions,4661514188148,100.00,1.47,insn per cycle 28 | 4329600107485,,branches,4661514188148,100.00,928.798,M/sec 29 | 745806159,,branch-misses,4661514188148,100.00,0.02,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/gcc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp for 20 iterations with input containing 36000000 elements 2 | 78.53203 3 | 78.50991 4 | 78.52498 5 | 78.49879 6 | 78.52975 7 | 78.51864 8 | 78.50186 9 | 78.49695 10 | 78.47276 11 | 78.47148 12 | 78.50005 13 | 78.52081 14 | 78.52853 15 | 78.51401 16 | 78.50753 17 | 78.53557 18 | 78.52079 19 | 78.50616 20 | 78.49767 21 | 78.53104 22 | 1573912.621924,,task-clock,1573912621924,100.00,1.000,CPUs utilized 23 | 1998,,context-switches,1573912621924,100.00,0.001,K/sec 24 | 0,,cpu-migrations,1573912621924,100.00,0.000,K/sec 25 | 1476784,,page-faults,1573912621924,100.00,0.938,K/sec 26 | 5963153131121,,cycles,1573914682014,100.00,3.789,GHz 27 | 17347472422960,,instructions,1573914682014,100.00,2.91,insn per cycle 28 | 1090904699143,,branches,1573914682014,100.00,693.116,M/sec 29 | 914310913,,branch-misses,1573914682014,100.00,0.08,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/gcc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp for 20 iterations with input containing 36000000 elements 2 | 42.83123 3 | 43.33353 4 | 43.24306 5 | 42.42239 6 | 42.18383 7 | 43.22226 8 | 42.34877 9 | 43.3255 10 | 42.37985 11 | 43.29428 12 | 43.28549 13 | 43.25565 14 | 42.52327 15 | 42.31618 16 | 42.40033 17 | 43.30832 18 | 42.3674 19 | 43.35801 20 | 42.38923 21 | 42.35493 22 | 859852.336696,,task-clock,859852336696,100.00,1.000,CPUs utilized 23 | 1099,,context-switches,859852336696,100.00,0.001,K/sec 24 | 1,,cpu-migrations,859852336696,100.00,0.000,K/sec 25 | 1476786,,page-faults,859852336696,100.00,0.002,M/sec 26 | 2665464736900,,cycles,859853644105,100.00,3.100,GHz 27 | 3318589388572,,instructions,859853644105,100.00,1.25,insn per cycle 28 | 551389343510,,branches,859853644105,100.00,641.261,M/sec 29 | 820366018,,branch-misses,859853644105,100.00,0.15,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/gcc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 14.87467 3 | 14.65292 4 | 14.85276 5 | 14.84601 6 | 14.88516 7 | 14.68551 8 | 14.62535 9 | 14.639 10 | 14.6566 11 | 14.63271 12 | 14.84981 13 | 14.84834 14 | 14.86356 15 | 14.67815 16 | 14.8609 17 | 14.84929 18 | 14.86339 19 | 14.88395 20 | 14.69758 21 | 14.65687 22 | 299111.438804,,task-clock,299111438804,100.00,1.000,CPUs utilized 23 | 387,,context-switches,299111438804,100.00,0.001,K/sec 24 | 1,,cpu-migrations,299111438804,100.00,0.000,K/sec 25 | 1476786,,page-faults,299111438804,100.00,0.005,M/sec 26 | 1133118438359,,cycles,299111829155,100.00,3.788,GHz 27 | 2412668240485,,instructions,299111829155,100.00,2.13,insn per cycle 28 | 69987315964,,branches,299111829155,100.00,233.984,M/sec 29 | 84145785,,branch-misses,299111829155,100.00,0.12,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/gcc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 8.486051 3 | 8.477171 4 | 8.4794 5 | 8.487592 6 | 8.491851 7 | 8.482849 8 | 8.487493 9 | 8.556219 10 | 8.554699 11 | 8.5647 12 | 8.489266 13 | 8.558198 14 | 8.490123 15 | 8.489978 16 | 8.484636 17 | 8.485872 18 | 8.4912 19 | 8.553019 20 | 8.542949 21 | 8.485306 22 | 173851.612703,,task-clock,173851612703,100.00,1.000,CPUs utilized 23 | 226,,context-switches,173851612703,100.00,0.001,K/sec 24 | 1,,cpu-migrations,173851612703,100.00,0.000,K/sec 25 | 1476783,,page-faults,173851612703,100.00,0.008,M/sec 26 | 658658193162,,cycles,173851824012,100.00,3.789,GHz 27 | 1753941830783,,instructions,173851824012,100.00,2.66,insn per cycle 28 | 76787133275,,branches,173851824012,100.00,441.682,M/sec 29 | 21704184,,branch-misses,173851824012,100.00,0.03,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/gcc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp for 20 iterations with input containing 36000000 elements 2 | 7.729567 3 | 7.741821 4 | 7.738839 5 | 7.738925 6 | 7.725572 7 | 7.7268 8 | 7.726988 9 | 7.724115 10 | 7.722554 11 | 7.721869 12 | 7.72273 13 | 7.721862 14 | 7.722678 15 | 7.725265 16 | 7.743058 17 | 7.738955 18 | 7.74058 19 | 7.741349 20 | 7.737575 21 | 7.737062 22 | 158327.208310,,task-clock,158327208310,100.00,1.000,CPUs utilized 23 | 207,,context-switches,158327208310,100.00,0.001,K/sec 24 | 1,,cpu-migrations,158327208310,100.00,0.000,K/sec 25 | 1476782,,page-faults,158327208310,100.00,0.009,M/sec 26 | 599821282174,,cycles,158327476731,100.00,3.788,GHz 27 | 1956439597722,,instructions,158327476731,100.00,3.26,insn per cycle 28 | 76784995596,,branches,158327476731,100.00,484.977,M/sec 29 | 28490946,,branch-misses,158327476731,100.00,0.04,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/gcc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp for 20 iterations with input containing 36000000 elements 2 | 7.450346 3 | 7.502234 4 | 7.459814 5 | 7.455333 6 | 7.454891 7 | 7.450869 8 | 7.450973 9 | 7.442755 10 | 7.441383 11 | 7.440994 12 | 7.44181 13 | 7.443527 14 | 7.445188 15 | 7.444952 16 | 7.44328 17 | 7.44438 18 | 7.442347 19 | 7.442871 20 | 7.44344 21 | 7.455194 22 | 152711.811250,,task-clock,152711811250,100.00,1.000,CPUs utilized 23 | 201,,context-switches,152711811250,100.00,0.001,K/sec 24 | 1,,cpu-migrations,152711811250,100.00,0.000,K/sec 25 | 873359,,page-faults,152711811250,100.00,0.006,M/sec 26 | 578375979959,,cycles,152712058188,100.00,3.787,GHz 27 | 1895430431792,,instructions,152712058188,100.00,3.28,insn per cycle 28 | 77443045259,,branches,152712058188,100.00,507.119,M/sec 29 | 178011056,,branch-misses,152712058188,100.00,0.23,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/rustc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_rust for 20 iterations with input containing 36000000 elements 2 | 1059.168 3 | 1059.128 4 | 1059.19 5 | 1059.598 6 | 1059.167 7 | 1059.116 8 | 1059.672 9 | 1058.99 10 | 1059.121 11 | 1059.159 12 | 1059.879 13 | 1059.041 14 | 1059.093 15 | 1059.772 16 | 1059.163 17 | 1059.163 18 | 1059.792 19 | 1059.118 20 | 1061.352 21 | 1059.158 22 | 21191276.971082,,task-clock,21191276971082,100.00,1.000,CPUs utilized 23 | 26815,,context-switches,21191276971082,100.00,0.001,K/sec 24 | 0,,cpu-migrations,21191276971082,100.00,0.000,K/sec 25 | 70523,,page-faults,21191276971082,100.00,0.003,K/sec 26 | 80265735926811,,cycles,21191305401972,100.00,3.788,GHz 27 | 60557542154236,,instructions,21191305401972,100.00,0.75,insn per cycle 28 | 12972047521374,,branches,21191305401972,100.00,612.141,M/sec 29 | 830924180,,branch-misses,21191305401972,100.00,0.01,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/rustc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_rust for 20 iterations with input containing 36000000 elements 2 | 232.8533 3 | 232.8329 4 | 232.8468 5 | 232.842 6 | 232.847 7 | 232.8291 8 | 232.8176 9 | 232.842 10 | 232.8479 11 | 232.8385 12 | 232.8228 13 | 232.8514 14 | 233.1269 15 | 232.7684 16 | 232.8311 17 | 232.7881 18 | 232.8622 19 | 232.844 20 | 232.8429 21 | 232.8292 22 | 4660620.829422,,task-clock,4660620829422,100.00,1.000,CPUs utilized 23 | 5897,,context-switches,4660620829422,100.00,0.001,K/sec 24 | 1,,cpu-migrations,4660620829422,100.00,0.000,K/sec 25 | 773668,,page-faults,4660620829422,100.00,0.166,K/sec 26 | 17655325550058,,cycles,4660627413679,100.00,3.788,GHz 27 | 14646963170460,,instructions,4660627413679,100.00,0.83,insn per cycle 28 | 551767937282,,branches,4660627413679,100.00,118.389,M/sec 29 | 744660008,,branch-misses,4660627413679,100.00,0.13,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/rustc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_rust for 20 iterations with input containing 36000000 elements 2 | 63.09269 3 | 63.08422 4 | 63.06413 5 | 63.0855 6 | 63.95391 7 | 63.04715 8 | 63.03739 9 | 63.03677 10 | 63.08673 11 | 63.10951 12 | 63.07407 13 | 63.08542 14 | 63.08214 15 | 63.10749 16 | 63.04433 17 | 63.07745 18 | 63.03719 19 | 63.07239 20 | 63.04723 21 | 63.04967 22 | 1265964.875621,,task-clock,1265964875621,100.00,1.000,CPUs utilized 23 | 1610,,context-switches,1265964875621,100.00,0.001,K/sec 24 | 1,,cpu-migrations,1265964875621,100.00,0.000,K/sec 25 | 1476804,,page-faults,1265964875621,100.00,0.001,M/sec 26 | 4795571299074,,cycles,1265966860908,100.00,3.788,GHz 27 | 9793608680903,,instructions,1265966860908,100.00,2.04,insn per cycle 28 | 282827269670,,branches,1265966860908,100.00,223.408,M/sec 29 | 729066536,,branch-misses,1265966860908,100.00,0.26,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/rustc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_rust for 20 iterations with input containing 36000000 elements 2 | 42.13279 3 | 42.20621 4 | 42.21878 5 | 42.21152 6 | 42.21894 7 | 42.22558 8 | 42.22223 9 | 42.2386 10 | 42.22379 11 | 42.22439 12 | 42.23265 13 | 42.2367 14 | 42.24424 15 | 42.2371 16 | 42.22288 17 | 42.21999 18 | 42.16222 19 | 42.25023 20 | 42.23985 21 | 42.25864 22 | 848136.789886,,task-clock,848136789886,100.00,1.000,CPUs utilized 23 | 1086,,context-switches,848136789886,100.00,0.001,K/sec 24 | 1,,cpu-migrations,848136789886,100.00,0.000,K/sec 25 | 1476804,,page-faults,848136789886,100.00,0.002,M/sec 26 | 2628870811122,,cycles,848138305161,100.00,3.100,GHz 27 | 2243805613077,,instructions,848138305161,100.00,0.85,insn per cycle 28 | 148935940790,,branches,848138305161,100.00,175.604,M/sec 29 | 727231642,,branch-misses,848138305161,100.00,0.49,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/rustc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_rust for 20 iterations with input containing 36000000 elements 2 | 13.16338 3 | 13.14433 4 | 13.14858 5 | 13.13956 6 | 13.15039 7 | 13.17483 8 | 13.13937 9 | 13.15646 10 | 13.15586 11 | 13.14308 12 | 13.13984 13 | 13.1546 14 | 13.1585 15 | 13.14506 16 | 13.15853 17 | 13.14105 18 | 13.15764 19 | 13.13245 20 | 13.16927 21 | 13.19644 22 | 266776.413447,,task-clock,266776413447,100.00,1.000,CPUs utilized 23 | 344,,context-switches,266776413447,100.00,0.001,K/sec 24 | 1,,cpu-migrations,266776413447,100.00,0.000,K/sec 25 | 1476805,,page-faults,266776413447,100.00,0.006,M/sec 26 | 1010665182258,,cycles,266776812254,100.00,3.788,GHz 27 | 1758739003462,,instructions,266776812254,100.00,1.74,insn per cycle 28 | 72167964361,,branches,266776812254,100.00,270.519,M/sec 29 | 84020827,,branch-misses,266776812254,100.00,0.12,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/rustc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_rust for 20 iterations with input containing 36000000 elements 2 | 8.809267 3 | 8.807184 4 | 8.807642 5 | 8.807523 6 | 8.810328 7 | 8.798672 8 | 8.79455 9 | 8.792348 10 | 8.791255 11 | 8.792114 12 | 8.791951 13 | 8.799048 14 | 8.802911 15 | 8.831481 16 | 8.805656 17 | 8.806079 18 | 8.940366 19 | 9.096379 20 | 9.343872 21 | 8.811234 22 | 180744.047191,,task-clock,180744047191,100.00,1.000,CPUs utilized 23 | 237,,context-switches,180744047191,100.00,0.001,K/sec 24 | 1,,cpu-migrations,180744047191,100.00,0.000,K/sec 25 | 1476804,,page-faults,180744047191,100.00,0.008,M/sec 26 | 683339244371,,cycles,180744416864,100.00,3.781,GHz 27 | 1823195340945,,instructions,180744416864,100.00,2.67,insn per cycle 28 | 79294970074,,branches,180744416864,100.00,438.714,M/sec 29 | 40527119,,branch-misses,180744416864,100.00,0.05,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/rustc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_rust for 20 iterations with input containing 36000000 elements 2 | 7.725586 3 | 7.724339 4 | 7.722237 5 | 7.717602 6 | 7.716591 7 | 7.718408 8 | 7.716093 9 | 7.71704 10 | 7.717309 11 | 7.734722 12 | 7.718193 13 | 7.741469 14 | 7.719352 15 | 7.716117 16 | 7.716169 17 | 7.720254 18 | 7.725892 19 | 7.717891 20 | 7.717659 21 | 7.717067 22 | 158121.391305,,task-clock,158121391305,100.00,1.000,CPUs utilized 23 | 209,,context-switches,158121391305,100.00,0.001,K/sec 24 | 1,,cpu-migrations,158121391305,100.00,0.000,K/sec 25 | 1476804,,page-faults,158121391305,100.00,0.009,M/sec 26 | 599004927360,,cycles,158121652150,100.00,3.788,GHz 27 | 1958146626356,,instructions,158121652150,100.00,3.27,insn per cycle 28 | 79102216367,,branches,158121652150,100.00,500.263,M/sec 29 | 29644373,,branch-misses,158121652150,100.00,0.04,of all branches 30 | -------------------------------------------------------------------------------- /reports/Xeon-E3-1230-v5/single-thread/rustc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_rust for 20 iterations with input containing 36000000 elements 2 | 9.7345 3 | 9.713578 4 | 9.712243 5 | 9.707807 6 | 9.711102 7 | 9.72049 8 | 9.721735 9 | 9.745283 10 | 9.721373 11 | 9.719403 12 | 9.72069 13 | 9.720367 14 | 9.720451 15 | 9.716066 16 | 9.713538 17 | 9.713459 18 | 9.715779 19 | 9.715846 20 | 9.719216 21 | 9.716205 22 | 198144.599873,,task-clock,198144599873,100.00,1.000,CPUs utilized 23 | 258,,context-switches,198144599873,100.00,0.001,K/sec 24 | 1,,cpu-migrations,198144599873,100.00,0.000,K/sec 25 | 832993,,page-faults,198144599873,100.00,0.004,M/sec 26 | 749601475827,,cycles,198144901347,100.00,3.783,GHz 27 | 2316755181235,,instructions,198144901347,100.00,3.09,insn per cycle 28 | 80711033745,,branches,198144901347,100.00,407.334,M/sec 29 | 186630699,,branch-misses,198144901347,100.00,0.23,of all branches 30 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/clang/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 455.8575 3 | 454.5273 4 | 3627494.71,msec,task-clock:u,3627494705955,100.00,3.965,CPUs utilized 5 | 0,,context-switches:u,3627494705955,100.00,0.000,K/sec 6 | 0,,cpu-migrations:u,3627494705955,100.00,0.000,K/sec 7 | 70578,,page-faults:u,3627494705955,100.00,0.019,K/sec 8 | 15569737633909,,cycles:u,3627513910364,100.00,4.292,GHz 9 | 2178478044900,,instructions:u,3627513910364,100.00,0.14,insn per cycle 10 | 111792975800,,branches:u,3627513910364,100.00,30.818,M/sec 11 | 72375431,,branch-misses:u,3627513910364,100.00,0.06,of all branches 12 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/clang/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 41.82728 3 | 41.8556 4 | 41.84472 5 | 41.84977 6 | 41.8406 7 | 41.81837 8 | 41.8377 9 | 41.83718 10 | 41.83194 11 | 41.83755 12 | 41.84825 13 | 41.84829 14 | 41.83397 15 | 41.8392 16 | 41.84638 17 | 2549900.75,msec,task-clock:u,2549900747045,100.00,3.857,CPUs utilized 18 | 0,,context-switches:u,2549900747045,100.00,0.000,K/sec 19 | 0,,cpu-migrations:u,2549900747045,100.00,0.000,K/sec 20 | 597936,,page-faults:u,2549900747045,100.00,0.234,K/sec 21 | 10928505205125,,cycles:u,2549915457450,100.00,4.286,GHz 22 | 12285167450692,,instructions:u,2549915457450,100.00,1.12,insn per cycle 23 | 839565080369,,branches:u,2549915457450,100.00,329.254,M/sec 24 | 542896590,,branch-misses:u,2549915457450,100.00,0.06,of all branches 25 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/clang/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 48.028 3 | 48.10112 4 | 48.10282 5 | 48.09298 6 | 48.1047 7 | 48.09552 8 | 48.1063 9 | 48.1012 10 | 48.10129 11 | 48.09431 12 | 48.11792 13 | 48.09961 14 | 48.10943 15 | 2533443.29,msec,task-clock:u,2533443292244,100.00,3.874,CPUs utilized 16 | 0,,context-switches:u,2533443292244,100.00,0.000,K/sec 17 | 0,,cpu-migrations:u,2533443292244,100.00,0.000,K/sec 18 | 984661,,page-faults:u,2533443292244,100.00,0.389,K/sec 19 | 10857241123321,,cycles:u,2533462865493,100.00,4.286,GHz 20 | 31728097157844,,instructions:u,2533462865493,100.00,2.92,insn per cycle 21 | 3534807692580,,branches:u,2533462865493,100.00,1395.258,M/sec 22 | 12980978510,,branch-misses:u,2533462865493,100.00,0.37,of all branches 23 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/clang/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 13.74906 3 | 13.75354 4 | 13.83752 5 | 13.8462 6 | 13.85451 7 | 13.75563 8 | 13.83063 9 | 13.7541 10 | 13.83289 11 | 13.85162 12 | 13.75413 13 | 13.75208 14 | 13.83992 15 | 13.84892 16 | 13.85128 17 | 13.85198 18 | 13.75756 19 | 13.75582 20 | 13.75166 21 | 13.74948 22 | 13.75163 23 | 13.75254 24 | 13.75487 25 | 13.75245 26 | 13.7555 27 | 13.7538 28 | 13.7494 29 | 13.75422 30 | 13.76466 31 | 13.75296 32 | 13.75442 33 | 13.84714 34 | 13.85227 35 | 13.82368 36 | 13.84336 37 | 13.75325 38 | 13.84978 39 | 13.84417 40 | 13.75729 41 | 13.75257 42 | 13.85098 43 | 13.75718 44 | 13.75347 45 | 13.84632 46 | 2550883.33,msec,task-clock:u,2550883330204,100.00,3.617,CPUs utilized 47 | 0,,context-switches:u,2550883330204,100.00,0.000,K/sec 48 | 0,,cpu-migrations:u,2550883330204,100.00,0.000,K/sec 49 | 6258122,,page-faults:u,2550883330204,100.00,0.002,M/sec 50 | 10870897142292,,cycles:u,2550900792990,100.00,4.262,GHz 51 | 5274464330041,,instructions:u,2550900792990,100.00,0.49,insn per cycle 52 | 392274951402,,branches:u,2550900792990,100.00,153.780,M/sec 53 | 1593443275,,branch-misses:u,2550900792990,100.00,0.41,of all branches 54 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/clang/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 4.736702 3 | 4.739531 4 | 4.732621 5 | 4.741631 6 | 4.744546 7 | 4.759461 8 | 4.751265 9 | 4.740279 10 | 4.741278 11 | 4.731262 12 | 4.730335 13 | 4.743871 14 | 4.725515 15 | 4.748131 16 | 4.752199 17 | 4.746246 18 | 4.741248 19 | 4.748475 20 | 4.733142 21 | 4.744569 22 | 4.743423 23 | 4.734087 24 | 4.741428 25 | 4.742873 26 | 4.732432 27 | 4.749702 28 | 4.758365 29 | 4.726148 30 | 4.738788 31 | 4.745378 32 | 4.731898 33 | 4.756222 34 | 4.732795 35 | 4.741696 36 | 4.732024 37 | 4.724975 38 | 4.750806 39 | 4.732962 40 | 4.740284 41 | 4.747657 42 | 4.74441 43 | 4.742144 44 | 4.729434 45 | 4.744285 46 | 4.732987 47 | 4.74747 48 | 4.747619 49 | 4.754475 50 | 4.729029 51 | 4.744436 52 | 4.725821 53 | 4.74861 54 | 4.745909 55 | 4.737093 56 | 4.724365 57 | 4.745234 58 | 4.727144 59 | 4.727782 60 | 4.742249 61 | 4.750735 62 | 4.720634 63 | 4.75493 64 | 4.733673 65 | 4.734325 66 | 4.746308 67 | 4.724899 68 | 4.718437 69 | 4.727801 70 | 4.734918 71 | 4.739247 72 | 4.745999 73 | 4.74558 74 | 4.729037 75 | 4.732798 76 | 4.735395 77 | 4.743992 78 | 4.7183 79 | 4.735 80 | 4.750313 81 | 4.729297 82 | 4.752147 83 | 4.725591 84 | 4.728643 85 | 4.733115 86 | 4.748603 87 | 4.743279 88 | 4.756246 89 | 4.737494 90 | 4.724812 91 | 4.731911 92 | 4.749075 93 | 4.744945 94 | 4.728241 95 | 4.73847 96 | 4.743755 97 | 4.743434 98 | 4.752152 99 | 4.723064 100 | 4.733862 101 | 4.718021 102 | 2178131.75,msec,task-clock:u,2178131746219,100.00,3.112,CPUs utilized 103 | 0,,context-switches:u,2178131746219,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,2178131746219,100.00,0.000,K/sec 105 | 14133173,,page-faults:u,2178131746219,100.00,0.006,M/sec 106 | 9183157351806,,cycles:u,2178148737956,100.00,4.216,GHz 107 | 9203012178960,,instructions:u,2178148737956,100.00,1.00,insn per cycle 108 | 506506783805,,branches:u,2178148737956,100.00,232.542,M/sec 109 | 419739538,,branch-misses:u,2178148737956,100.00,0.08,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/clang/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 3.417081 3 | 3.382345 4 | 3.38256 5 | 3.38786 6 | 3.395097 7 | 3.391184 8 | 3.386091 9 | 3.396241 10 | 3.382703 11 | 3.38606 12 | 3.398079 13 | 3.386044 14 | 3.401644 15 | 3.390223 16 | 3.387328 17 | 3.397655 18 | 3.386969 19 | 3.385428 20 | 3.392571 21 | 3.383991 22 | 3.383521 23 | 3.389752 24 | 3.384967 25 | 3.391806 26 | 3.384476 27 | 3.385144 28 | 3.379022 29 | 3.385024 30 | 3.402552 31 | 3.391195 32 | 3.3858 33 | 3.394759 34 | 3.386321 35 | 3.381275 36 | 3.395906 37 | 3.384787 38 | 3.384588 39 | 3.431229 40 | 3.38289 41 | 3.396216 42 | 3.383764 43 | 3.381509 44 | 3.396498 45 | 3.386717 46 | 3.386403 47 | 3.389893 48 | 3.3859 49 | 3.402481 50 | 3.386023 51 | 3.384663 52 | 3.396452 53 | 3.38456 54 | 3.387826 55 | 3.389324 56 | 3.38681 57 | 3.396795 58 | 3.383075 59 | 3.381234 60 | 3.396257 61 | 3.386903 62 | 3.385459 63 | 3.388005 64 | 3.385091 65 | 3.399801 66 | 3.385355 67 | 3.381871 68 | 3.396736 69 | 3.382443 70 | 3.381312 71 | 3.39078 72 | 3.383664 73 | 3.399482 74 | 3.388685 75 | 3.382715 76 | 3.389843 77 | 3.386424 78 | 3.381001 79 | 3.384432 80 | 3.382889 81 | 3.400026 82 | 3.396359 83 | 3.381621 84 | 3.393677 85 | 3.385559 86 | 3.38484 87 | 3.384787 88 | 3.384934 89 | 3.400426 90 | 3.389452 91 | 3.386346 92 | 3.394929 93 | 3.386337 94 | 3.381449 95 | 3.388616 96 | 3.385654 97 | 3.392855 98 | 3.388075 99 | 3.381152 100 | 3.389324 101 | 3.385832 102 | 1636206.35,msec,task-clock:u,1636206349726,100.00,2.896,CPUs utilized 103 | 0,,context-switches:u,1636206349726,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1636206349726,100.00,0.000,K/sec 105 | 14133174,,page-faults:u,1636206349726,100.00,0.009,M/sec 106 | 6852594423662,,cycles:u,1636218062198,100.00,4.188,GHz 107 | 9529965185542,,instructions:u,1636218062198,100.00,1.39,insn per cycle 108 | 553350667497,,branches:u,1636218062198,100.00,338.191,M/sec 109 | 77948266,,branch-misses:u,1636218062198,100.00,0.01,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/clang/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 3.375239 3 | 3.360937 4 | 3.363144 5 | 3.363562 6 | 3.363717 7 | 3.360244 8 | 3.375735 9 | 3.36353 10 | 3.361133 11 | 3.365484 12 | 3.36265 13 | 3.407627 14 | 3.366359 15 | 3.382068 16 | 3.412544 17 | 3.362875 18 | 3.373773 19 | 3.362537 20 | 3.371631 21 | 3.477172 22 | 3.363393 23 | 3.36123 24 | 3.366125 25 | 3.362109 26 | 3.363494 27 | 3.362771 28 | 3.368486 29 | 3.416516 30 | 3.363815 31 | 3.363842 32 | 3.36619 33 | 3.36393 34 | 3.365854 35 | 3.372147 36 | 3.379642 37 | 3.364029 38 | 3.376179 39 | 3.365904 40 | 3.364443 41 | 3.373646 42 | 3.367152 43 | 3.362146 44 | 3.411579 45 | 3.362013 46 | 3.371118 47 | 3.413072 48 | 3.361391 49 | 3.374579 50 | 3.406513 51 | 3.370913 52 | 3.368006 53 | 3.3648 54 | 3.413595 55 | 3.364173 56 | 3.362194 57 | 3.361932 58 | 3.362737 59 | 3.361724 60 | 3.364608 61 | 3.376831 62 | 3.361144 63 | 3.361934 64 | 3.373991 65 | 3.365597 66 | 3.376002 67 | 3.362619 68 | 3.409376 69 | 3.361828 70 | 3.36699 71 | 3.368015 72 | 3.370386 73 | 3.416638 74 | 3.360185 75 | 3.364132 76 | 3.360982 77 | 3.376611 78 | 3.415015 79 | 3.361636 80 | 3.363395 81 | 3.365719 82 | 3.363982 83 | 3.363974 84 | 3.36189 85 | 3.365384 86 | 3.365037 87 | 3.379841 88 | 3.363702 89 | 3.369961 90 | 3.424117 91 | 3.364259 92 | 3.364115 93 | 3.367182 94 | 3.406657 95 | 3.3615 96 | 3.363018 97 | 3.361501 98 | 3.365314 99 | 3.397674 100 | 3.364849 101 | 3.411587 102 | 1628314.95,msec,task-clock:u,1628314952749,100.00,2.890,CPUs utilized 103 | 0,,context-switches:u,1628314952749,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1628314952749,100.00,0.000,K/sec 105 | 14133180,,page-faults:u,1628314952749,100.00,0.009,M/sec 106 | 6820612586087,,cycles:u,1628326950976,100.00,4.189,GHz 107 | 10195658578056,,instructions:u,1628326950976,100.00,1.49,insn per cycle 108 | 550512319353,,branches:u,1628326950976,100.00,338.087,M/sec 109 | 77420694,,branch-misses:u,1628326950976,100.00,0.01,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/clang/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 3.316847 3 | 3.315059 4 | 3.314832 5 | 3.35387 6 | 3.316347 7 | 3.342104 8 | 3.308614 9 | 3.358031 10 | 3.355875 11 | 3.308029 12 | 3.307697 13 | 3.33308 14 | 3.34889 15 | 3.346484 16 | 3.310941 17 | 3.306212 18 | 3.3067 19 | 3.306754 20 | 3.309206 21 | 3.325367 22 | 3.360164 23 | 3.309238 24 | 3.313534 25 | 3.307344 26 | 3.305932 27 | 3.306534 28 | 3.306366 29 | 3.306926 30 | 3.307646 31 | 3.306169 32 | 3.308011 33 | 3.307227 34 | 3.30809 35 | 3.306802 36 | 3.315819 37 | 3.307474 38 | 3.307387 39 | 3.306055 40 | 3.306856 41 | 3.311565 42 | 3.306939 43 | 3.308759 44 | 3.331098 45 | 3.308662 46 | 3.307148 47 | 3.305949 48 | 3.306341 49 | 3.308079 50 | 3.316218 51 | 3.347311 52 | 3.309284 53 | 3.307122 54 | 3.306562 55 | 3.306165 56 | 3.306548 57 | 3.307331 58 | 3.306981 59 | 3.311725 60 | 3.30632 61 | 3.306806 62 | 3.307918 63 | 3.317883 64 | 3.319029 65 | 3.334685 66 | 3.311206 67 | 3.315027 68 | 3.306392 69 | 3.344684 70 | 3.307511 71 | 3.307659 72 | 3.312138 73 | 3.3083 74 | 3.311882 75 | 3.307193 76 | 3.309482 77 | 3.333195 78 | 3.341594 79 | 3.341804 80 | 3.307398 81 | 3.307816 82 | 3.324432 83 | 3.352443 84 | 3.35567 85 | 3.334576 86 | 3.341749 87 | 3.31791 88 | 3.308031 89 | 3.335785 90 | 3.344419 91 | 3.328579 92 | 3.326579 93 | 3.314281 94 | 3.333903 95 | 3.315096 96 | 3.332976 97 | 3.309312 98 | 3.322965 99 | 3.31559 100 | 3.314717 101 | 3.316378 102 | 1613273.73,msec,task-clock:u,1613273726040,100.00,2.891,CPUs utilized 103 | 0,,context-switches:u,1613273726040,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1613273726040,100.00,0.000,K/sec 105 | 4758267,,page-faults:u,1613273726040,100.00,0.003,M/sec 106 | 6861678736564,,cycles:u,1613285340118,100.00,4.253,GHz 107 | 9555074738072,,instructions:u,1613285340118,100.00,1.39,insn per cycle 108 | 553354250940,,branches:u,1613285340118,100.00,343.001,M/sec 109 | 868339429,,branch-misses:u,1613285340118,100.00,0.16,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/gcc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 458.3613 3 | 457.1151 4 | 3641202.93,msec,task-clock:u,3641202929148,100.00,3.975,CPUs utilized 5 | 0,,context-switches:u,3641202929148,100.00,0.000,K/sec 6 | 0,,cpu-migrations:u,3641202929148,100.00,0.000,K/sec 7 | 70522,,page-faults:u,3641202929148,100.00,0.019,K/sec 8 | 15629256484747,,cycles:u,3641222736367,100.00,4.292,GHz 9 | 3028941461468,,instructions:u,3641222736367,100.00,0.19,insn per cycle 10 | 432731837265,,branches:u,3641222736367,100.00,118.843,M/sec 11 | 72052922,,branch-misses:u,3641222736367,100.00,0.02,of all branches 12 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/gcc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 38.04425 3 | 38.04169 4 | 38.04512 5 | 38.04502 6 | 38.04227 7 | 38.78494 8 | 38.04592 9 | 38.0451 10 | 38.67476 11 | 38.04724 12 | 38.04534 13 | 38.04369 14 | 38.54027 15 | 38.04009 16 | 38.04333 17 | 38.04586 18 | 2442598.13,msec,task-clock:u,2442598129397,100.00,3.982,CPUs utilized 19 | 0,,context-switches:u,2442598129397,100.00,0.000,K/sec 20 | 0,,cpu-migrations:u,2442598129397,100.00,0.000,K/sec 21 | 633035,,page-faults:u,2442598129397,100.00,0.259,K/sec 22 | 10472808323566,,cycles:u,2442612280608,100.00,4.288,GHz 23 | 24232505958957,,instructions:u,2442612280608,100.00,2.31,insn per cycle 24 | 3462387917559,,branches:u,2442612280608,100.00,1417.502,M/sec 25 | 576263293,,branch-misses:u,2442612280608,100.00,0.02,of all branches 26 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/gcc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 25.63218 3 | 25.56598 4 | 25.62753 5 | 25.57349 6 | 25.56425 7 | 25.63099 8 | 25.56898 9 | 25.63885 10 | 25.56545 11 | 25.62779 12 | 25.69563 13 | 25.56448 14 | 25.57088 15 | 25.63968 16 | 25.56475 17 | 25.568 18 | 25.56431 19 | 25.56374 20 | 25.56367 21 | 25.5655 22 | 25.56346 23 | 25.56509 24 | 25.63277 25 | 25.56688 26 | 2454622.96,msec,task-clock:u,2454622962102,100.00,3.969,CPUs utilized 27 | 0,,context-switches:u,2454622962102,100.00,0.000,K/sec 28 | 0,,cpu-migrations:u,2454622962102,100.00,0.000,K/sec 29 | 1758058,,page-faults:u,2454622962102,100.00,0.716,K/sec 30 | 10520266643687,,cycles:u,2454638270644,100.00,4.286,GHz 31 | 20811469527891,,instructions:u,2454638270644,100.00,1.98,insn per cycle 32 | 1306019013993,,branches:u,2454638270644,100.00,532.065,M/sec 33 | 1083654240,,branch-misses:u,2454638270644,100.00,0.08,of all branches 34 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/gcc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 13.45977 3 | 13.46439 4 | 13.40841 5 | 13.4921 6 | 13.41007 7 | 13.49707 8 | 13.48556 9 | 13.41175 10 | 13.42819 11 | 13.41098 12 | 13.41067 13 | 13.41174 14 | 13.50366 15 | 13.41036 16 | 13.42674 17 | 13.41643 18 | 13.41167 19 | 13.48979 20 | 13.48548 21 | 13.41139 22 | 13.41176 23 | 13.40886 24 | 13.41248 25 | 13.49952 26 | 13.49842 27 | 13.48696 28 | 13.49721 29 | 13.41131 30 | 13.49471 31 | 13.47508 32 | 13.41928 33 | 13.50539 34 | 13.49941 35 | 13.49934 36 | 13.50442 37 | 13.41349 38 | 13.44271 39 | 13.4981 40 | 13.41143 41 | 13.4128 42 | 13.41022 43 | 13.41274 44 | 13.41252 45 | 13.41052 46 | 13.41106 47 | 2427321.48,msec,task-clock:u,2427321475975,100.00,3.959,CPUs utilized 48 | 0,,context-switches:u,2427321475975,100.00,0.000,K/sec 49 | 0,,cpu-migrations:u,2427321475975,100.00,0.000,K/sec 50 | 3234654,,page-faults:u,2427321475975,100.00,0.001,M/sec 51 | 10394332111958,,cycles:u,2427339012467,100.00,4.282,GHz 52 | 7453677315205,,instructions:u,2427339012467,100.00,0.72,insn per cycle 53 | 1234976266783,,branches:u,2427339012467,100.00,508.782,M/sec 54 | 1824928804,,branch-misses:u,2427339012467,100.00,0.15,of all branches 55 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/gcc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 4.45185 3 | 4.465175 4 | 4.458802 5 | 4.472656 6 | 4.441669 7 | 4.445611 8 | 4.467134 9 | 4.458311 10 | 4.477398 11 | 4.444721 12 | 4.469279 13 | 4.444841 14 | 4.445482 15 | 4.441816 16 | 4.471443 17 | 4.465532 18 | 4.456101 19 | 4.447551 20 | 4.437399 21 | 4.471858 22 | 4.442847 23 | 4.441508 24 | 4.441415 25 | 4.432446 26 | 4.457218 27 | 4.448291 28 | 4.454109 29 | 4.442139 30 | 4.457481 31 | 4.442297 32 | 4.46139 33 | 4.438792 34 | 4.450353 35 | 4.467483 36 | 4.456778 37 | 4.444857 38 | 4.440243 39 | 4.442783 40 | 4.455652 41 | 4.455616 42 | 4.459555 43 | 4.461814 44 | 4.454052 45 | 4.451465 46 | 4.450234 47 | 4.468017 48 | 4.441156 49 | 4.434469 50 | 4.435907 51 | 4.437805 52 | 4.458196 53 | 4.463985 54 | 4.450304 55 | 4.460829 56 | 4.440306 57 | 4.464091 58 | 4.440071 59 | 4.442176 60 | 4.440071 61 | 4.436153 62 | 4.448683 63 | 4.462391 64 | 4.464931 65 | 4.467814 66 | 4.455834 67 | 4.446612 68 | 4.457439 69 | 4.44817 70 | 4.458866 71 | 4.459374 72 | 4.469536 73 | 4.437278 74 | 4.457261 75 | 4.430844 76 | 4.444159 77 | 4.438894 78 | 4.438249 79 | 4.451599 80 | 4.452388 81 | 4.432299 82 | 4.444221 83 | 4.461082 84 | 4.440157 85 | 4.463282 86 | 4.456651 87 | 4.441677 88 | 4.426311 89 | 4.444513 90 | 4.439933 91 | 4.460963 92 | 4.461564 93 | 4.440478 94 | 4.441005 95 | 4.430808 96 | 4.431732 97 | 4.453079 98 | 4.438925 99 | 4.44237 100 | 4.463201 101 | 4.454485 102 | 1795901.20,msec,task-clock:u,1795901196878,100.00,3.879,CPUs utilized 103 | 0,,context-switches:u,1795901196878,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1795901196878,100.00,0.000,K/sec 105 | 7101926,,page-faults:u,1795901196878,100.00,0.004,M/sec 106 | 7666127118650,,cycles:u,1795916708758,100.00,4.269,GHz 107 | 9349776607672,,instructions:u,1795916708758,100.00,1.22,insn per cycle 108 | 341881221444,,branches:u,1795916708758,100.00,190.368,M/sec 109 | 400989564,,branch-misses:u,1795916708758,100.00,0.12,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/gcc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 3.277192 3 | 3.271383 4 | 3.271533 5 | 3.302106 6 | 3.276914 7 | 3.272136 8 | 3.271716 9 | 3.271179 10 | 3.272446 11 | 3.271513 12 | 3.271524 13 | 3.276648 14 | 3.276942 15 | 3.272558 16 | 3.27218 17 | 3.273311 18 | 3.274866 19 | 3.274333 20 | 3.27489 21 | 3.331064 22 | 3.272554 23 | 3.277097 24 | 3.272214 25 | 3.286632 26 | 3.271731 27 | 3.271865 28 | 3.270503 29 | 3.276862 30 | 3.272528 31 | 3.271266 32 | 3.271191 33 | 3.271874 34 | 3.276968 35 | 3.272495 36 | 3.273977 37 | 3.271168 38 | 3.276324 39 | 3.273825 40 | 3.270911 41 | 3.2763 42 | 3.27183 43 | 3.273477 44 | 3.276605 45 | 3.274503 46 | 3.27531 47 | 3.272794 48 | 3.27136 49 | 3.276557 50 | 3.272556 51 | 3.271897 52 | 3.27358 53 | 3.276402 54 | 3.274678 55 | 3.328477 56 | 3.274067 57 | 3.287317 58 | 3.271674 59 | 3.271636 60 | 3.276543 61 | 3.272669 62 | 3.276597 63 | 3.277632 64 | 3.277523 65 | 3.272071 66 | 3.271413 67 | 3.276851 68 | 3.276372 69 | 3.275776 70 | 3.271934 71 | 3.266558 72 | 3.276459 73 | 3.274323 74 | 3.274417 75 | 3.273294 76 | 3.275539 77 | 3.274479 78 | 3.273075 79 | 3.294802 80 | 3.274479 81 | 3.274396 82 | 3.267627 83 | 3.271903 84 | 3.271629 85 | 3.277009 86 | 3.277683 87 | 3.275012 88 | 3.275234 89 | 3.272529 90 | 3.271687 91 | 3.272634 92 | 3.274139 93 | 3.273806 94 | 3.273355 95 | 3.269275 96 | 3.276991 97 | 3.331339 98 | 3.272491 99 | 3.330467 100 | 3.271725 101 | 3.272284 102 | 1320590.86,msec,task-clock:u,1320590864878,100.00,3.821,CPUs utilized 103 | 0,,context-switches:u,1320590864878,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1320590864878,100.00,0.000,K/sec 105 | 7101927,,page-faults:u,1320590864878,100.00,0.005,M/sec 106 | 5630593401556,,cycles:u,1320602170568,100.00,4.264,GHz 107 | 8748065653322,,instructions:u,1320602170568,100.00,1.55,insn per cycle 108 | 375745569259,,branches:u,1320602170568,100.00,284.528,M/sec 109 | 57327217,,branch-misses:u,1320602170568,100.00,0.02,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/gcc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 3.254328 3 | 3.306905 4 | 3.254651 5 | 3.302915 6 | 3.255 7 | 3.260643 8 | 3.262076 9 | 3.300713 10 | 3.254866 11 | 3.301617 12 | 3.254443 13 | 3.317192 14 | 3.264714 15 | 3.300468 16 | 3.267554 17 | 3.263163 18 | 3.268995 19 | 3.303566 20 | 3.273925 21 | 3.297574 22 | 3.269409 23 | 3.300688 24 | 3.253635 25 | 3.298646 26 | 3.26108 27 | 3.301956 28 | 3.254834 29 | 3.300042 30 | 3.254188 31 | 3.254653 32 | 3.263923 33 | 3.258106 34 | 3.254595 35 | 3.269071 36 | 3.259668 37 | 3.299773 38 | 3.263015 39 | 3.353242 40 | 3.25557 41 | 3.262549 42 | 3.255303 43 | 3.259355 44 | 3.254121 45 | 3.299903 46 | 3.265922 47 | 3.305598 48 | 3.268734 49 | 3.361148 50 | 3.271925 51 | 3.305787 52 | 3.270912 53 | 3.310994 54 | 3.255177 55 | 3.256361 56 | 3.323333 57 | 3.274537 58 | 3.255923 59 | 3.344493 60 | 3.26475 61 | 3.268428 62 | 3.263198 63 | 3.270596 64 | 3.303919 65 | 3.259463 66 | 3.271186 67 | 3.25739 68 | 3.312598 69 | 3.262501 70 | 3.254928 71 | 3.303175 72 | 3.261037 73 | 3.317342 74 | 3.301115 75 | 3.298987 76 | 3.269581 77 | 3.301498 78 | 3.255953 79 | 3.257739 80 | 3.25442 81 | 3.298101 82 | 3.259744 83 | 3.256852 84 | 3.254505 85 | 3.258378 86 | 3.253977 87 | 3.298273 88 | 3.269146 89 | 3.262762 90 | 3.267537 91 | 3.30428 92 | 3.254392 93 | 3.306289 94 | 3.255731 95 | 3.30545 96 | 3.266055 97 | 3.303141 98 | 3.269309 99 | 3.312688 100 | 3.269429 101 | 3.306346 102 | 1322019.79,msec,task-clock:u,1322019788222,100.00,3.823,CPUs utilized 103 | 0,,context-switches:u,1322019788222,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1322019788222,100.00,0.000,K/sec 105 | 7101927,,page-faults:u,1322019788222,100.00,0.005,M/sec 106 | 5636928320745,,cycles:u,1322030715243,100.00,4.264,GHz 107 | 9760277998297,,instructions:u,1322030715243,100.00,1.73,insn per cycle 108 | 375695418902,,branches:u,1322030715243,100.00,284.183,M/sec 109 | 56564131,,branch-misses:u,1322030715243,100.00,0.02,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/gcc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 3.268904 3 | 3.26761 4 | 3.264447 5 | 3.264842 6 | 3.264808 7 | 3.265005 8 | 3.264908 9 | 3.26467 10 | 3.264741 11 | 3.264682 12 | 3.264799 13 | 3.264718 14 | 3.264899 15 | 3.265059 16 | 3.265024 17 | 3.264781 18 | 3.264742 19 | 3.264989 20 | 3.264841 21 | 3.26499 22 | 3.264834 23 | 3.26501 24 | 3.264656 25 | 3.264806 26 | 3.264782 27 | 3.264899 28 | 3.265045 29 | 3.26481 30 | 3.264687 31 | 3.264863 32 | 3.264803 33 | 3.264625 34 | 3.264891 35 | 3.264819 36 | 3.264782 37 | 3.265041 38 | 3.264797 39 | 3.264722 40 | 3.264755 41 | 3.265027 42 | 3.264901 43 | 3.264741 44 | 3.264628 45 | 3.265084 46 | 3.264832 47 | 3.264815 48 | 3.264777 49 | 3.264709 50 | 3.265098 51 | 3.264858 52 | 3.264911 53 | 3.264826 54 | 3.264738 55 | 3.264881 56 | 3.264631 57 | 3.264838 58 | 3.264766 59 | 3.264849 60 | 3.2645 61 | 3.264903 62 | 3.264687 63 | 3.265208 64 | 3.264756 65 | 3.264782 66 | 3.264715 67 | 3.264811 68 | 3.264911 69 | 3.265003 70 | 3.265011 71 | 3.264846 72 | 3.264823 73 | 3.264859 74 | 3.264954 75 | 3.265043 76 | 3.264777 77 | 3.265081 78 | 3.264689 79 | 3.264847 80 | 3.265024 81 | 3.265211 82 | 3.264782 83 | 3.264822 84 | 3.264923 85 | 3.264658 86 | 3.264769 87 | 3.264946 88 | 3.264706 89 | 3.264822 90 | 3.264721 91 | 3.264576 92 | 3.26479 93 | 3.264749 94 | 3.264961 95 | 3.26445 96 | 3.264889 97 | 3.265152 98 | 3.264769 99 | 3.264821 100 | 3.264541 101 | 3.265172 102 | 1322777.69,msec,task-clock:u,1322777685028,100.00,3.840,CPUs utilized 103 | 0,,context-switches:u,1322777685028,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1322777685028,100.00,0.000,K/sec 105 | 3607467,,page-faults:u,1322777685028,100.00,0.003,M/sec 106 | 5658087153098,,cycles:u,1322786134566,100.00,4.277,GHz 107 | 8787498131297,,instructions:u,1322786134566,100.00,1.55,insn per cycle 108 | 381005991328,,branches:u,1322786134566,100.00,288.035,M/sec 109 | 846969914,,branch-misses:u,1322786134566,100.00,0.22,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/rustc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 439.3356 3 | 439.3686 4 | 3515031.82,msec,task-clock:u,3515031816002,100.00,3.998,CPUs utilized 5 | 0,,context-switches:u,3515031816002,100.00,0.000,K/sec 6 | 0,,cpu-migrations:u,3515031816002,100.00,0.000,K/sec 7 | 70570,,page-faults:u,3515031816002,100.00,0.020,K/sec 8 | 15098008180255,,cycles:u,3515053029724,100.00,4.295,GHz 9 | 6053423198129,,instructions:u,3515053029724,100.00,0.40,insn per cycle 10 | 1296797370085,,branches:u,3515053029724,100.00,368.929,M/sec 11 | 72062798,,branch-misses:u,3515053029724,100.00,0.01,of all branches 12 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/rustc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 41.91768 3 | 43.18643 4 | 41.88899 5 | 41.94347 6 | 41.92847 7 | 41.91513 8 | 41.94373 9 | 41.94864 10 | 41.92039 11 | 41.9518 12 | 41.91089 13 | 41.92521 14 | 41.91063 15 | 41.91564 16 | 41.95192 17 | 2517308.36,msec,task-clock:u,2517308359642,100.00,3.977,CPUs utilized 18 | 0,,context-switches:u,2517308359642,100.00,0.000,K/sec 19 | 0,,cpu-migrations:u,2517308359642,100.00,0.000,K/sec 20 | 597939,,page-faults:u,2517308359642,100.00,0.238,K/sec 21 | 10793088039126,,cycles:u,2517323346542,100.00,4.288,GHz 22 | 10980400944403,,instructions:u,2517323346542,100.00,1.02,insn per cycle 23 | 412028457291,,branches:u,2517323346542,100.00,163.678,M/sec 24 | 540521567,,branch-misses:u,2517323346542,100.00,0.13,of all branches 25 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/rustc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 15.9291 3 | 15.8903 4 | 15.91213 5 | 15.91541 6 | 15.86994 7 | 15.84319 8 | 15.92065 9 | 15.90996 10 | 15.87426 11 | 15.87505 12 | 15.91497 13 | 15.90945 14 | 15.87053 15 | 15.8818 16 | 15.91162 17 | 15.90818 18 | 15.91685 19 | 15.85568 20 | 15.88653 21 | 15.87557 22 | 15.87355 23 | 15.90859 24 | 15.90207 25 | 15.92797 26 | 15.90222 27 | 15.91629 28 | 15.89163 29 | 15.83992 30 | 15.8721 31 | 15.87648 32 | 15.89646 33 | 15.90775 34 | 15.91427 35 | 15.90305 36 | 15.89137 37 | 15.89263 38 | 15.91248 39 | 15.90542 40 | 2412669.31,msec,task-clock:u,2412669309794,100.00,3.950,CPUs utilized 41 | 0,,context-switches:u,2412669309794,100.00,0.000,K/sec 42 | 0,,cpu-migrations:u,2412669309794,100.00,0.000,K/sec 43 | 2742531,,page-faults:u,2412669309794,100.00,0.001,M/sec 44 | 10336044526349,,cycles:u,2412687560054,100.00,4.284,GHz 45 | 22707879933766,,instructions:u,2412687560054,100.00,2.20,insn per cycle 46 | 4124545476507,,branches:u,2412687560054,100.00,1709.536,M/sec 47 | 1368908176,,branch-misses:u,2412687560054,100.00,0.03,of all branches 48 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/rustc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 13.62626 3 | 13.65181 4 | 13.64919 5 | 13.646 6 | 13.69454 7 | 13.66079 8 | 13.66105 9 | 13.65479 10 | 13.62886 11 | 13.62883 12 | 13.65072 13 | 13.63061 14 | 13.62889 15 | 13.62832 16 | 13.63798 17 | 13.63208 18 | 13.62762 19 | 13.6293 20 | 13.63224 21 | 13.637 22 | 13.64934 23 | 13.63154 24 | 13.6928 25 | 13.69534 26 | 13.63876 27 | 13.62957 28 | 13.63386 29 | 13.67665 30 | 13.70554 31 | 13.72253 32 | 13.6926 33 | 13.63453 34 | 13.64058 35 | 13.64098 36 | 13.68599 37 | 13.70488 38 | 13.70526 39 | 13.71916 40 | 13.69446 41 | 13.63608 42 | 13.6297 43 | 13.65701 44 | 13.6834 45 | 13.71819 46 | 2399951.95,msec,task-clock:u,2399951949262,100.00,3.942,CPUs utilized 47 | 0,,context-switches:u,2399951949262,100.00,0.000,K/sec 48 | 0,,cpu-migrations:u,2399951949262,100.00,0.000,K/sec 49 | 3164407,,page-faults:u,2399951949262,100.00,0.001,M/sec 50 | 10279657544161,,cycles:u,2399967904861,100.00,4.283,GHz 51 | 4930238684491,,instructions:u,2399967904861,100.00,0.48,insn per cycle 52 | 323796312624,,branches:u,2399967904861,100.00,134.918,M/sec 53 | 1585625604,,branch-misses:u,2399967904861,100.00,0.49,of all branches 54 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/rustc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 4.161156 3 | 4.167697 4 | 4.184439 5 | 4.165604 6 | 4.166163 7 | 4.161656 8 | 4.16244 9 | 4.16364 10 | 4.17492 11 | 4.187015 12 | 4.16548 13 | 4.168132 14 | 4.19629 15 | 4.290576 16 | 4.16948 17 | 4.167232 18 | 4.166696 19 | 4.167409 20 | 4.184584 21 | 4.165854 22 | 4.173351 23 | 4.172181 24 | 4.17944 25 | 4.176035 26 | 4.166896 27 | 4.171459 28 | 4.172755 29 | 4.174858 30 | 4.168533 31 | 4.1685 32 | 4.182471 33 | 4.172812 34 | 4.17345 35 | 4.178399 36 | 4.16882 37 | 4.168261 38 | 4.163656 39 | 4.174432 40 | 4.167066 41 | 4.166494 42 | 4.161231 43 | 4.167318 44 | 4.164999 45 | 4.164355 46 | 4.167665 47 | 4.277095 48 | 4.168307 49 | 4.169075 50 | 4.175732 51 | 4.187386 52 | 4.178648 53 | 4.179818 54 | 4.181967 55 | 4.179706 56 | 4.170366 57 | 4.184524 58 | 4.167853 59 | 4.165143 60 | 4.179815 61 | 4.167871 62 | 4.17348 63 | 4.165117 64 | 4.170191 65 | 4.181526 66 | 4.168341 67 | 4.16732 68 | 4.168087 69 | 4.171824 70 | 4.179679 71 | 4.181104 72 | 4.178723 73 | 4.176076 74 | 4.166728 75 | 4.177029 76 | 4.171767 77 | 4.178119 78 | 4.167845 79 | 4.167336 80 | 4.170563 81 | 4.169374 82 | 4.179418 83 | 4.207005 84 | 4.190192 85 | 4.185328 86 | 4.179792 87 | 4.171015 88 | 4.169595 89 | 4.16989 90 | 4.167598 91 | 4.166168 92 | 4.16849 93 | 4.170752 94 | 4.176952 95 | 4.182527 96 | 4.169832 97 | 4.180781 98 | 4.16607 99 | 4.170743 100 | 4.163949 101 | 4.169158 102 | 1661429.17,msec,task-clock:u,1661429167070,100.00,3.815,CPUs utilized 103 | 0,,context-switches:u,1661429167070,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1661429167070,100.00,0.000,K/sec 105 | 7101991,,page-faults:u,1661429167070,100.00,0.004,M/sec 106 | 7096209337977,,cycles:u,1661443322201,100.00,4.271,GHz 107 | 8503872136227,,instructions:u,1661443322201,100.00,1.20,insn per cycle 108 | 360177698096,,branches:u,1661443322201,100.00,216.788,M/sec 109 | 401451613,,branch-misses:u,1661443322201,100.00,0.11,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/rustc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 3.344396 3 | 3.376633 4 | 3.346057 5 | 3.376571 6 | 3.34848 7 | 3.378017 8 | 3.347805 9 | 3.364503 10 | 3.363545 11 | 3.346612 12 | 3.347054 13 | 3.349246 14 | 3.347443 15 | 3.364915 16 | 3.347648 17 | 3.347058 18 | 3.347804 19 | 3.346712 20 | 3.348257 21 | 3.347535 22 | 3.347864 23 | 3.361048 24 | 3.348361 25 | 3.376975 26 | 3.346472 27 | 3.361864 28 | 3.347014 29 | 3.38077 30 | 3.347634 31 | 3.363353 32 | 3.348377 33 | 3.349274 34 | 3.346645 35 | 3.347713 36 | 3.346396 37 | 3.346717 38 | 3.345994 39 | 3.347342 40 | 3.347545 41 | 3.350997 42 | 3.348953 43 | 3.348114 44 | 3.348001 45 | 3.347458 46 | 3.347237 47 | 3.362523 48 | 3.349508 49 | 3.347467 50 | 3.347644 51 | 3.349025 52 | 3.349177 53 | 3.379478 54 | 3.349392 55 | 3.34842 56 | 3.347713 57 | 3.348136 58 | 3.345434 59 | 3.361333 60 | 3.345119 61 | 3.346595 62 | 3.3469 63 | 3.34795 64 | 3.348199 65 | 3.348201 66 | 3.3484 67 | 3.348709 68 | 3.34977 69 | 3.36289 70 | 3.3491 71 | 3.349028 72 | 3.347876 73 | 3.348165 74 | 3.346959 75 | 3.346976 76 | 3.34674 77 | 3.347478 78 | 3.344944 79 | 3.37868 80 | 3.363158 81 | 3.34676 82 | 3.34859 83 | 3.348807 84 | 3.347309 85 | 3.346511 86 | 3.346359 87 | 3.360227 88 | 3.347091 89 | 3.344836 90 | 3.34551 91 | 3.344904 92 | 3.345481 93 | 3.347875 94 | 3.348406 95 | 3.348751 96 | 3.347939 97 | 3.350175 98 | 3.347084 99 | 3.348042 100 | 3.34514 101 | 3.389307 102 | 1330107.16,msec,task-clock:u,1330107162230,100.00,3.767,CPUs utilized 103 | 0,,context-switches:u,1330107162230,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1330107162230,100.00,0.000,K/sec 105 | 7101993,,page-faults:u,1330107162230,100.00,0.005,M/sec 106 | 5675797647182,,cycles:u,1330119400859,100.00,4.267,GHz 107 | 9120565723007,,instructions:u,1330119400859,100.00,1.61,insn per cycle 108 | 391926324630,,branches:u,1330119400859,100.00,294.658,M/sec 109 | 57425060,,branch-misses:u,1330119400859,100.00,0.01,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/rustc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 3.391815 3 | 3.354579 4 | 3.324928 5 | 3.394934 6 | 3.389482 7 | 3.384811 8 | 3.340719 9 | 3.33606 10 | 3.338368 11 | 3.396572 12 | 3.339392 13 | 3.401743 14 | 3.369336 15 | 3.324527 16 | 3.380695 17 | 3.373656 18 | 3.337957 19 | 3.329562 20 | 3.324264 21 | 3.382621 22 | 3.338447 23 | 3.325531 24 | 3.325768 25 | 3.376741 26 | 3.347132 27 | 3.390403 28 | 3.324698 29 | 3.325433 30 | 3.342487 31 | 3.325196 32 | 3.325959 33 | 3.326767 34 | 3.326329 35 | 3.349211 36 | 3.32643 37 | 3.37446 38 | 3.325605 39 | 3.371849 40 | 3.324941 41 | 3.397925 42 | 3.355564 43 | 3.374328 44 | 3.344623 45 | 3.387638 46 | 3.341394 47 | 3.373867 48 | 3.343307 49 | 3.380097 50 | 3.341398 51 | 3.379394 52 | 3.324476 53 | 3.390287 54 | 3.32428 55 | 3.326074 56 | 3.327468 57 | 3.36336 58 | 3.326882 59 | 3.331866 60 | 3.326193 61 | 3.334535 62 | 3.324336 63 | 3.343995 64 | 3.331657 65 | 3.390438 66 | 3.343392 67 | 3.380959 68 | 3.345109 69 | 3.379557 70 | 3.327814 71 | 3.328595 72 | 3.326273 73 | 3.327644 74 | 3.412817 75 | 3.328285 76 | 3.375609 77 | 3.327539 78 | 3.326967 79 | 3.345531 80 | 3.326864 81 | 3.392958 82 | 3.327237 83 | 3.332293 84 | 3.347554 85 | 3.390179 86 | 3.326378 87 | 3.32736 88 | 3.331361 89 | 3.377019 90 | 3.328368 91 | 3.330439 92 | 3.394953 93 | 3.370664 94 | 3.326419 95 | 3.335664 96 | 3.333444 97 | 3.376667 98 | 3.326226 99 | 3.345482 100 | 3.33938 101 | 3.369779 102 | 1329325.54,msec,task-clock:u,1329325542223,100.00,3.766,CPUs utilized 103 | 0,,context-switches:u,1329325542223,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1329325542223,100.00,0.000,K/sec 105 | 7101990,,page-faults:u,1329325542223,100.00,0.005,M/sec 106 | 5672520070759,,cycles:u,1329337518617,100.00,4.267,GHz 107 | 9792756424078,,instructions:u,1329337518617,100.00,1.73,insn per cycle 108 | 390970747610,,branches:u,1329337518617,100.00,294.112,M/sec 109 | 61396618,,branch-misses:u,1329337518617,100.00,0.02,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/multi-thread/rustc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 3.338083 3 | 3.347926 4 | 3.344989 5 | 3.332429 6 | 3.366736 7 | 3.344052 8 | 3.35619 9 | 3.355221 10 | 3.333394 11 | 3.337888 12 | 3.33252 13 | 3.339912 14 | 3.340505 15 | 3.339651 16 | 3.331347 17 | 3.329199 18 | 3.373041 19 | 3.330855 20 | 3.329248 21 | 3.328716 22 | 3.330236 23 | 3.329796 24 | 3.331752 25 | 3.327561 26 | 3.332211 27 | 3.327594 28 | 3.329632 29 | 3.349328 30 | 3.332388 31 | 3.335554 32 | 3.332186 33 | 3.362874 34 | 3.326446 35 | 3.332952 36 | 3.340925 37 | 3.337696 38 | 3.327687 39 | 3.334718 40 | 3.329456 41 | 3.338923 42 | 3.339415 43 | 3.335424 44 | 3.343693 45 | 3.333736 46 | 3.343379 47 | 3.337064 48 | 3.328935 49 | 3.328438 50 | 3.340129 51 | 3.351933 52 | 3.327548 53 | 3.329186 54 | 3.329966 55 | 3.329927 56 | 3.350561 57 | 3.329939 58 | 3.337636 59 | 3.328465 60 | 3.329681 61 | 3.342959 62 | 3.329623 63 | 3.328697 64 | 3.338245 65 | 3.347372 66 | 3.330781 67 | 3.329772 68 | 3.333117 69 | 3.334936 70 | 3.33062 71 | 3.335221 72 | 3.334507 73 | 3.327705 74 | 3.33278 75 | 3.331709 76 | 3.334524 77 | 3.331483 78 | 3.339427 79 | 3.3317 80 | 3.330848 81 | 3.331241 82 | 3.329944 83 | 3.329582 84 | 3.328928 85 | 3.341773 86 | 3.352921 87 | 3.351797 88 | 3.330534 89 | 3.331795 90 | 3.340063 91 | 3.347231 92 | 3.332645 93 | 3.329288 94 | 3.331248 95 | 3.330929 96 | 3.334972 97 | 3.328717 98 | 3.328729 99 | 3.334378 100 | 3.330372 101 | 3.328484 102 | 1334857.12,msec,task-clock:u,1334857118004,100.00,3.797,CPUs utilized 103 | 0,,context-switches:u,1334857118004,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1334857118004,100.00,0.000,K/sec 105 | 3630970,,page-faults:u,1334857118004,100.00,0.003,M/sec 106 | 5708384389036,,cycles:u,1334883443122,100.00,4.276,GHz 107 | 8904095849350,,instructions:u,1334883443122,100.00,1.56,insn per cycle 108 | 404093146645,,branches:u,1334883443122,100.00,302.724,M/sec 109 | 916706656,,branch-misses:u,1334883443122,100.00,0.23,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/clang/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 1808.228 3 | 1810561.77,msec,task-clock:u,1810561771497,100.00,1.000,CPUs utilized 4 | 0,,context-switches:u,1810561771497,100.00,0.000,K/sec 5 | 0,,cpu-migrations:u,1810561771497,100.00,0.000,K/sec 6 | 70500,,page-faults:u,1810561771497,100.00,0.039,K/sec 7 | 7776165823446,,cycles:u,1810565083017,100.00,4.295,GHz 8 | 1086996040255,,instructions:u,1810565083017,100.00,0.14,insn per cycle 9 | 55273529987,,branches:u,1810565083017,100.00,30.528,M/sec 10 | 36044200,,branch-misses:u,1810565083017,100.00,0.07,of all branches 11 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/clang/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 166.7658 3 | 166.7461 4 | 166.7529 5 | 166.7511 6 | 676066.16,msec,task-clock:u,676066162913,100.00,1.000,CPUs utilized 7 | 0,,context-switches:u,676066162913,100.00,0.000,K/sec 8 | 0,,cpu-migrations:u,676066162913,100.00,0.000,K/sec 9 | 211127,,page-faults:u,676066162913,100.00,0.312,K/sec 10 | 2902927838764,,cycles:u,676067567371,100.00,4.294,GHz 11 | 3266868451270,,instructions:u,676067567371,100.00,1.13,insn per cycle 12 | 221071428090,,branches:u,676067567371,100.00,326.997,M/sec 13 | 144103508,,branch-misses:u,676067567371,100.00,0.07,of all branches 14 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/clang/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 258.7073 3 | 258.6537 4 | 258.6703 5 | 782761.51,msec,task-clock:u,782761511453,100.00,1.000,CPUs utilized 6 | 0,,context-switches:u,782761511453,100.00,0.000,K/sec 7 | 0,,cpu-migrations:u,782761511453,100.00,0.000,K/sec 8 | 281443,,page-faults:u,782761511453,100.00,0.360,K/sec 9 | 3356442264106,,cycles:u,782762709728,100.00,4.288,GHz 10 | 9903246106106,,instructions:u,782762709728,100.00,2.95,insn per cycle 11 | 165605355124,,branches:u,782762709728,100.00,211.566,M/sec 12 | 108089169,,branch-misses:u,782762709728,100.00,0.07,of all branches 13 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/clang/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 48.87506 3 | 48.97108 4 | 48.99161 5 | 48.9776 6 | 48.99501 7 | 49.00052 8 | 48.96392 9 | 48.9827 10 | 48.98326 11 | 48.98814 12 | 48.98198 13 | 49.00695 14 | 48.99822 15 | 665748.85,msec,task-clock:u,665748849871,100.00,1.000,CPUs utilized 16 | 0,,context-switches:u,665748849871,100.00,0.000,K/sec 17 | 0,,cpu-migrations:u,665748849871,100.00,0.000,K/sec 18 | 1898642,,page-faults:u,665748849871,100.00,0.003,M/sec 19 | 2850586300108,,cycles:u,665749939022,100.00,4.282,GHz 20 | 2553634931042,,instructions:u,665749939022,100.00,0.90,insn per cycle 21 | 367453681385,,branches:u,665749939022,100.00,551.940,M/sec 22 | 468198076,,branch-misses:u,665749939022,100.00,0.13,of all branches 23 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/clang/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 17.69937 3 | 17.80449 4 | 17.80391 5 | 17.80555 6 | 17.80608 7 | 17.7983 8 | 17.80572 9 | 17.805 10 | 17.80571 11 | 17.80425 12 | 17.80526 13 | 17.80215 14 | 17.80544 15 | 17.72948 16 | 17.65965 17 | 17.65622 18 | 17.65352 19 | 17.65534 20 | 17.65149 21 | 17.64765 22 | 17.66381 23 | 17.64108 24 | 17.66463 25 | 17.65964 26 | 17.66981 27 | 17.65305 28 | 17.67474 29 | 17.65064 30 | 17.67681 31 | 17.64443 32 | 17.64472 33 | 17.66626 34 | 17.66587 35 | 17.65463 36 | 677975.47,msec,task-clock:u,677975468783,100.00,1.000,CPUs utilized 37 | 0,,context-switches:u,677975468783,100.00,0.000,K/sec 38 | 0,,cpu-migrations:u,677975468783,100.00,0.000,K/sec 39 | 4851785,,page-faults:u,677975468783,100.00,0.007,M/sec 40 | 2890320037797,,cycles:u,677976684330,100.00,4.263,GHz 41 | 3106060352793,,instructions:u,677976684330,100.00,1.07,insn per cycle 42 | 145555341665,,branches:u,677976684330,100.00,214.691,M/sec 43 | 136356810,,branch-misses:u,677976684330,100.00,0.09,of all branches 44 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/clang/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 13.14012 3 | 13.13718 4 | 13.13781 5 | 13.13704 6 | 13.13712 7 | 13.13696 8 | 13.13698 9 | 13.13658 10 | 13.13654 11 | 13.13687 12 | 13.13699 13 | 13.13664 14 | 13.13734 15 | 13.13663 16 | 13.13566 17 | 13.1328 18 | 13.13275 19 | 13.1327 20 | 13.1327 21 | 13.13274 22 | 13.13267 23 | 13.13273 24 | 13.13269 25 | 13.13565 26 | 13.13714 27 | 13.13739 28 | 13.13418 29 | 13.1375 30 | 13.13443 31 | 13.13704 32 | 13.13783 33 | 13.13716 34 | 13.13812 35 | 13.135 36 | 13.13585 37 | 13.13274 38 | 13.1326 39 | 13.13279 40 | 13.13268 41 | 13.13273 42 | 13.13268 43 | 13.13271 44 | 13.13272 45 | 13.13274 46 | 13.13268 47 | 13.1328 48 | 706559.54,msec,task-clock:u,706559538705,100.00,1.000,CPUs utilized 49 | 0,,context-switches:u,706559538705,100.00,0.000,K/sec 50 | 0,,cpu-migrations:u,706559538705,100.00,0.000,K/sec 51 | 6539297,,page-faults:u,706559538705,100.00,0.009,M/sec 52 | 3009811133704,,cycles:u,706561103696,100.00,4.260,GHz 53 | 4202002205467,,instructions:u,706561103696,100.00,1.40,insn per cycle 54 | 213488532857,,branches:u,706561103696,100.00,302.152,M/sec 55 | 25998981,,branch-misses:u,706561103696,100.00,0.01,of all branches 56 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/clang/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 13.10448 3 | 13.10257 4 | 13.10202 5 | 13.10019 6 | 13.09989 7 | 13.10098 8 | 13.101 9 | 13.10103 10 | 13.10114 11 | 13.10124 12 | 13.1017 13 | 13.10208 14 | 13.10221 15 | 13.10252 16 | 13.10266 17 | 13.10177 18 | 13.10368 19 | 13.10082 20 | 13.10176 21 | 13.10018 22 | 13.1003 23 | 13.10014 24 | 13.10019 25 | 13.09994 26 | 13.09991 27 | 13.1001 28 | 13.09989 29 | 13.10125 30 | 13.10188 31 | 13.10014 32 | 13.09979 33 | 13.09987 34 | 13.09986 35 | 13.10001 36 | 13.10068 37 | 13.10049 38 | 13.10048 39 | 13.1004 40 | 13.10191 41 | 13.09998 42 | 13.10045 43 | 13.10013 44 | 13.10153 45 | 13.10187 46 | 13.10058 47 | 13.10181 48 | 705100.21,msec,task-clock:u,705100211562,100.00,1.000,CPUs utilized 49 | 0,,context-switches:u,705100211562,100.00,0.000,K/sec 50 | 0,,cpu-migrations:u,705100211562,100.00,0.000,K/sec 51 | 6539296,,page-faults:u,705100211562,100.00,0.009,M/sec 52 | 3003491900657,,cycles:u,705101765609,100.00,4.260,GHz 53 | 4512502202002,,instructions:u,705101765609,100.00,1.50,insn per cycle 54 | 213488532134,,branches:u,705101765609,100.00,302.778,M/sec 55 | 25992112,,branch-misses:u,705101765609,100.00,0.01,of all branches 56 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/clang/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 12.94461 3 | 12.94722 4 | 12.94506 5 | 12.94129 6 | 12.94149 7 | 12.9364 8 | 12.95996 9 | 12.94134 10 | 12.94143 11 | 12.94111 12 | 12.94149 13 | 12.94331 14 | 12.96024 15 | 12.95946 16 | 12.96077 17 | 12.95597 18 | 12.95528 19 | 12.95809 20 | 12.95945 21 | 12.95422 22 | 12.95371 23 | 12.95361 24 | 12.95402 25 | 12.95757 26 | 12.95744 27 | 12.95728 28 | 12.95403 29 | 12.94958 30 | 12.94024 31 | 12.95342 32 | 12.95314 33 | 12.95154 34 | 12.95317 35 | 12.95795 36 | 12.95415 37 | 12.95612 38 | 12.95515 39 | 12.95351 40 | 12.95728 41 | 12.94897 42 | 12.96223 43 | 12.95766 44 | 12.95456 45 | 12.95724 46 | 12.95514 47 | 12.95706 48 | 12.95434 49 | 713480.17,msec,task-clock:u,713480169455,100.00,1.000,CPUs utilized 50 | 0,,context-switches:u,713480169455,100.00,0.000,K/sec 51 | 0,,cpu-migrations:u,713480169455,100.00,0.000,K/sec 52 | 2233588,,page-faults:u,713480169455,100.00,0.003,M/sec 53 | 3055679693980,,cycles:u,713481722544,100.00,4.283,GHz 54 | 4485671522625,,instructions:u,713481722544,100.00,1.47,insn per cycle 55 | 220224832792,,branches:u,713481722544,100.00,308.663,M/sec 56 | 400783292,,branch-misses:u,713481722544,100.00,0.18,of all branches 57 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/gcc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 1821.949 3 | 1822214.91,msec,task-clock:u,1822214912319,100.00,1.000,CPUs utilized 4 | 0,,context-switches:u,1822214912319,100.00,0.000,K/sec 5 | 0,,cpu-migrations:u,1822214912319,100.00,0.000,K/sec 6 | 70498,,page-faults:u,1822214912319,100.00,0.039,K/sec 7 | 7826161892186,,cycles:u,1822217857758,100.00,4.295,GHz 8 | 1514469085728,,instructions:u,1822217857758,100.00,0.19,insn per cycle 9 | 216364538760,,branches:u,1822217857758,100.00,118.737,M/sec 10 | 36053556,,branch-misses:u,1822217857758,100.00,0.02,of all branches 11 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/gcc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 152.0287 3 | 151.9906 4 | 151.9901 5 | 151.9944 6 | 608799.91,msec,task-clock:u,608799914663,100.00,1.000,CPUs utilized 7 | 0,,context-switches:u,608799914663,100.00,0.000,K/sec 8 | 0,,cpu-migrations:u,608799914663,100.00,0.000,K/sec 9 | 211126,,page-faults:u,608799914663,100.00,0.347,K/sec 10 | 2611647821007,,cycles:u,608801184800,100.00,4.290,GHz 11 | 6058101175801,,instructions:u,608801184800,100.00,2.32,insn per cycle 12 | 865588355729,,branches:u,608801184800,100.00,1421.794,M/sec 13 | 144122505,,branch-misses:u,608801184800,100.00,0.02,of all branches 14 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/gcc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 101.7884 3 | 101.7871 4 | 101.7949 5 | 101.7957 6 | 101.8007 7 | 101.7901 8 | 611905.75,msec,task-clock:u,611905747730,100.00,1.000,CPUs utilized 9 | 0,,context-switches:u,611905747730,100.00,0.000,K/sec 10 | 0,,cpu-migrations:u,611905747730,100.00,0.000,K/sec 11 | 492385,,page-faults:u,611905747730,100.00,0.805,K/sec 12 | 2625054691209,,cycles:u,611907019493,100.00,4.290,GHz 13 | 5525734897225,,instructions:u,611907019493,100.00,2.10,insn per cycle 14 | 326434998885,,branches:u,611907019493,100.00,533.473,M/sec 15 | 272295289,,branch-misses:u,611907019493,100.00,0.08,of all branches 16 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/gcc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 47.03723 3 | 46.78181 4 | 46.78338 5 | 46.85339 6 | 46.87645 7 | 46.88366 8 | 46.80642 9 | 46.84826 10 | 46.80637 11 | 46.80599 12 | 46.80521 13 | 46.89913 14 | 46.77704 15 | 611327.84,msec,task-clock:u,611327844652,100.00,1.000,CPUs utilized 16 | 0,,context-switches:u,611327844652,100.00,0.000,K/sec 17 | 0,,cpu-migrations:u,611327844652,100.00,0.000,K/sec 18 | 984577,,page-faults:u,611327844652,100.00,0.002,M/sec 19 | 2619411389068,,cycles:u,611328878831,100.00,4.285,GHz 20 | 2502420081473,,instructions:u,611328878831,100.00,0.96,insn per cycle 21 | 356270257789,,branches:u,611328878831,100.00,582.781,M/sec 22 | 527001316,,branch-misses:u,611328878831,100.00,0.15,of all branches 23 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/gcc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 16.58754 3 | 16.47927 4 | 16.46577 5 | 16.4674 6 | 16.46523 7 | 16.47501 8 | 16.46663 9 | 16.46653 10 | 16.46587 11 | 16.46902 12 | 16.49185 13 | 16.48967 14 | 16.49983 15 | 16.50215 16 | 16.60708 17 | 16.59934 18 | 16.59492 19 | 16.59791 20 | 16.6007 21 | 16.59012 22 | 16.58214 23 | 16.64779 24 | 16.58546 25 | 16.5756 26 | 16.58813 27 | 16.58862 28 | 16.58372 29 | 16.57995 30 | 16.58627 31 | 16.5828 32 | 16.5809 33 | 16.58633 34 | 16.5826 35 | 16.59035 36 | 16.58572 37 | 16.64115 38 | 16.5856 39 | 618996.34,msec,task-clock:u,618996335464,100.00,1.000,CPUs utilized 40 | 0,,context-switches:u,618996335464,100.00,0.000,K/sec 41 | 0,,cpu-migrations:u,618996335464,100.00,0.000,K/sec 42 | 2672115,,page-faults:u,618996335464,100.00,0.004,M/sec 43 | 2642563818852,,cycles:u,618997451389,100.00,4.269,GHz 44 | 3566682231099,,instructions:u,618997451389,100.00,1.35,insn per cycle 45 | 126140321715,,branches:u,618997451389,100.00,203.782,M/sec 46 | 148357004,,branch-misses:u,618997451389,100.00,0.12,of all branches 47 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/gcc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 13.03715 3 | 13.03407 4 | 13.03413 5 | 13.03653 6 | 13.03648 7 | 13.03651 8 | 13.03626 9 | 13.03566 10 | 13.03638 11 | 13.03613 12 | 13.03612 13 | 13.03568 14 | 13.0363 15 | 13.0359 16 | 13.03639 17 | 13.036 18 | 13.03594 19 | 13.03587 20 | 13.03628 21 | 13.03637 22 | 13.0355 23 | 13.03581 24 | 13.03426 25 | 13.03355 26 | 13.03349 27 | 13.03386 28 | 13.03368 29 | 13.03451 30 | 13.0339 31 | 13.03395 32 | 13.0339 33 | 13.03402 34 | 13.03384 35 | 13.03358 36 | 13.03372 37 | 13.03356 38 | 13.03375 39 | 13.03384 40 | 13.03372 41 | 13.0339 42 | 13.03459 43 | 13.03222 44 | 13.03652 45 | 13.03616 46 | 13.03647 47 | 13.03645 48 | 13.03647 49 | 620955.32,msec,task-clock:u,620955321105,100.00,1.000,CPUs utilized 50 | 0,,context-switches:u,620955321105,100.00,0.000,K/sec 51 | 0,,cpu-migrations:u,620955321105,100.00,0.000,K/sec 52 | 3375256,,page-faults:u,620955321105,100.00,0.005,M/sec 53 | 2650740365034,,cycles:u,620956476060,100.00,4.269,GHz 54 | 4111508049567,,instructions:u,620956476060,100.00,1.55,insn per cycle 55 | 177694055603,,branches:u,620956476060,100.00,286.162,M/sec 56 | 26556324,,branch-misses:u,620956476060,100.00,0.01,of all branches 57 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/gcc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 13.00952 3 | 13.00577 4 | 13.00301 5 | 13.0045 6 | 13.00446 7 | 13.00426 8 | 13.00432 9 | 13.00377 10 | 13.00531 11 | 13.00423 12 | 13.00419 13 | 13.00394 14 | 13.00421 15 | 13.00406 16 | 13.0046 17 | 13.00404 18 | 13.00484 19 | 13.00414 20 | 13.00406 21 | 13.00554 22 | 13.00363 23 | 13.00603 24 | 13.00359 25 | 13.00395 26 | 13.00341 27 | 13.00617 28 | 13.00482 29 | 13.00432 30 | 13.00394 31 | 13.00404 32 | 13.0053 33 | 13.00444 34 | 13.00359 35 | 13.00325 36 | 13.00358 37 | 13.00343 38 | 13.00334 39 | 13.00322 40 | 13.00324 41 | 13.00342 42 | 13.00376 43 | 13.00333 44 | 13.00321 45 | 13.00337 46 | 13.00331 47 | 13.00318 48 | 13.00308 49 | 619501.71,msec,task-clock:u,619501706339,100.00,1.000,CPUs utilized 50 | 0,,context-switches:u,619501706339,100.00,0.000,K/sec 51 | 0,,cpu-migrations:u,619501706339,100.00,0.000,K/sec 52 | 3375258,,page-faults:u,619501706339,100.00,0.005,M/sec 53 | 2644649690989,,cycles:u,619502699326,100.00,4.269,GHz 54 | 4587277488708,,instructions:u,619502699326,100.00,1.73,insn per cycle 55 | 177694073532,,branches:u,619502699326,100.00,286.834,M/sec 56 | 26824716,,branch-misses:u,619502699326,100.00,0.02,of all branches 57 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/gcc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 12.87883 3 | 12.86195 4 | 12.88268 5 | 12.87734 6 | 12.87841 7 | 12.87485 8 | 12.87701 9 | 12.8723 10 | 12.87196 11 | 12.87309 12 | 12.87358 13 | 12.87429 14 | 12.88075 15 | 12.88094 16 | 12.88322 17 | 12.8827 18 | 12.882 19 | 12.88342 20 | 12.88208 21 | 12.8833 22 | 12.88306 23 | 12.88392 24 | 12.884 25 | 12.88317 26 | 12.883 27 | 12.88255 28 | 12.88284 29 | 12.88284 30 | 12.88228 31 | 12.88125 32 | 12.88225 33 | 12.88227 34 | 12.88249 35 | 12.88212 36 | 12.88078 37 | 12.88157 38 | 12.88064 39 | 12.88175 40 | 12.88202 41 | 12.88089 42 | 12.88062 43 | 12.87994 44 | 12.88306 45 | 12.88432 46 | 12.8806 47 | 12.88283 48 | 12.88246 49 | 613683.95,msec,task-clock:u,613683945495,100.00,1.000,CPUs utilized 50 | 0,,context-switches:u,613683945495,100.00,0.000,K/sec 51 | 0,,cpu-migrations:u,613683945495,100.00,0.000,K/sec 52 | 1979923,,page-faults:u,613683945495,100.00,0.003,M/sec 53 | 2627653804055,,cycles:u,613685047825,100.00,4.282,GHz 54 | 4130428540654,,instructions:u,613685047825,100.00,1.57,insn per cycle 55 | 179362686889,,branches:u,613685047825,100.00,292.272,M/sec 56 | 402459681,,branch-misses:u,613685047825,100.00,0.22,of all branches 57 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/rustc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 1800.989 3 | 1801246.27,msec,task-clock:u,1801246268846,100.00,1.000,CPUs utilized 4 | 0,,context-switches:u,1801246268846,100.00,0.000,K/sec 5 | 0,,cpu-migrations:u,1801246268846,100.00,0.000,K/sec 6 | 70520,,page-faults:u,1801246268846,100.00,0.039,K/sec 7 | 7735993888822,,cycles:u,1801249715550,100.00,4.295,GHz 8 | 3026505927136,,instructions:u,1801249715550,100.00,0.39,insn per cycle 9 | 648328708715,,branches:u,1801249715550,100.00,359.933,M/sec 10 | 36043065,,branch-misses:u,1801249715550,100.00,0.01,of all branches 11 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/rustc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 168.0096 3 | 167.836 4 | 167.9187 5 | 167.8634 6 | 672416.75,msec,task-clock:u,672416751346,100.00,1.000,CPUs utilized 7 | 0,,context-switches:u,672416751346,100.00,0.000,K/sec 8 | 0,,cpu-migrations:u,672416751346,100.00,0.000,K/sec 9 | 211147,,page-faults:u,672416751346,100.00,0.314,K/sec 10 | 2884730467250,,cycles:u,672418190978,100.00,4.290,GHz 11 | 2927543104163,,instructions:u,672418190978,100.00,1.01,insn per cycle 12 | 109876637291,,branches:u,672418190978,100.00,163.406,M/sec 13 | 144082626,,branch-misses:u,672418190978,100.00,0.13,of all branches 14 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/rustc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 64.08903 3 | 64.61665 4 | 64.52528 5 | 64.50558 6 | 64.28774 7 | 64.28466 8 | 64.25513 9 | 64.28964 10 | 64.29266 11 | 64.25107 12 | 645234.15,msec,task-clock:u,645234153529,100.00,1.000,CPUs utilized 13 | 0,,context-switches:u,645234153529,100.00,0.000,K/sec 14 | 0,,cpu-migrations:u,645234153529,100.00,0.000,K/sec 15 | 773663,,page-faults:u,645234153529,100.00,0.001,M/sec 16 | 2764260293564,,cycles:u,645235448522,100.00,4.284,GHz 17 | 4893246990109,,instructions:u,645235448522,100.00,1.77,insn per cycle 18 | 140408349261,,branches:u,645235448522,100.00,217.608,M/sec 19 | 360218315,,branch-misses:u,645235448522,100.00,0.26,of all branches 20 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/rustc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 48.37498 3 | 48.15842 4 | 48.17199 5 | 48.14788 6 | 48.42369 7 | 48.4404 8 | 48.45085 9 | 48.44223 10 | 48.43613 11 | 48.45036 12 | 48.43691 13 | 48.11543 14 | 48.11019 15 | 630521.09,msec,task-clock:u,630521092558,100.00,1.000,CPUs utilized 16 | 0,,context-switches:u,630521092558,100.00,0.000,K/sec 17 | 0,,cpu-migrations:u,630521092558,100.00,0.000,K/sec 18 | 984601,,page-faults:u,630521092558,100.00,0.002,M/sec 19 | 2700146168775,,cycles:u,630522503566,100.00,4.282,GHz 20 | 1454150863988,,instructions:u,630522503566,100.00,0.54,insn per cycle 21 | 95550922975,,branches:u,630522503566,100.00,151.543,M/sec 22 | 468333881,,branch-misses:u,630522503566,100.00,0.49,of all branches 23 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/rustc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 15.74123 3 | 15.74148 4 | 15.74167 5 | 15.74066 6 | 15.86015 7 | 15.74383 8 | 15.74398 9 | 15.74422 10 | 15.74348 11 | 15.74379 12 | 15.74221 13 | 15.74234 14 | 15.7416 15 | 15.74319 16 | 15.74175 17 | 15.7771 18 | 15.74298 19 | 15.74333 20 | 15.7417 21 | 15.74159 22 | 15.73879 23 | 15.73781 24 | 15.74061 25 | 15.73893 26 | 15.73896 27 | 15.73918 28 | 15.7397 29 | 15.74056 30 | 15.74026 31 | 15.74182 32 | 15.73822 33 | 15.73867 34 | 15.74278 35 | 15.7425 36 | 15.74383 37 | 15.74202 38 | 15.74121 39 | 15.74217 40 | 15.74397 41 | 620981.07,msec,task-clock:u,620981068111,100.00,1.000,CPUs utilized 42 | 0,,context-switches:u,620981068111,100.00,0.000,K/sec 43 | 0,,cpu-migrations:u,620981068111,100.00,0.000,K/sec 44 | 2812768,,page-faults:u,620981068111,100.00,0.005,M/sec 45 | 2651314901771,,cycles:u,620982215211,100.00,4.270,GHz 46 | 3417975788718,,instructions:u,620982215211,100.00,1.29,insn per cycle 47 | 137195656024,,branches:u,620982215211,100.00,220.934,M/sec 48 | 156570356,,branch-misses:u,620982215211,100.00,0.11,of all branches 49 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/rustc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 13.07106 3 | 13.07097 4 | 13.07079 5 | 13.07226 6 | 13.07036 7 | 13.07014 8 | 13.07135 9 | 13.07113 10 | 13.07093 11 | 13.06972 12 | 13.07081 13 | 13.07149 14 | 13.06961 15 | 13.07168 16 | 13.07347 17 | 13.07206 18 | 13.07279 19 | 13.07246 20 | 13.07223 21 | 13.06996 22 | 13.06977 23 | 13.07004 24 | 13.07042 25 | 13.07046 26 | 13.07066 27 | 13.07079 28 | 13.07099 29 | 13.07135 30 | 13.09468 31 | 13.07104 32 | 13.07155 33 | 13.06981 34 | 13.07042 35 | 13.07017 36 | 13.06977 37 | 13.07276 38 | 13.07076 39 | 13.07028 40 | 13.07112 41 | 13.07066 42 | 13.0716 43 | 13.07165 44 | 13.07095 45 | 13.07131 46 | 13.0713 47 | 13.07145 48 | 609429.55,msec,task-clock:u,609429552427,100.00,1.000,CPUs utilized 49 | 0,,context-switches:u,609429552427,100.00,0.000,K/sec 50 | 0,,cpu-migrations:u,609429552427,100.00,0.000,K/sec 51 | 3304965,,page-faults:u,609429552427,100.00,0.005,M/sec 52 | 2602393638273,,cycles:u,609430734804,100.00,4.270,GHz 53 | 4179980612716,,instructions:u,609430734804,100.00,1.61,insn per cycle 54 | 178260789939,,branches:u,609430734804,100.00,292.504,M/sec 55 | 26343419,,branch-misses:u,609430734804,100.00,0.01,of all branches 56 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/rustc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 12.85541 3 | 12.87658 4 | 12.87718 5 | 12.87685 6 | 12.85056 7 | 12.87627 8 | 12.87587 9 | 12.87756 10 | 12.877 11 | 12.87707 12 | 12.87729 13 | 12.87772 14 | 12.85502 15 | 12.87657 16 | 12.87664 17 | 12.85119 18 | 12.87732 19 | 12.87649 20 | 12.87532 21 | 12.87535 22 | 12.87495 23 | 12.87503 24 | 12.87536 25 | 12.87518 26 | 12.87488 27 | 12.87485 28 | 12.87499 29 | 12.87521 30 | 12.87512 31 | 12.87497 32 | 12.87541 33 | 12.87471 34 | 12.87454 35 | 12.87537 36 | 12.8749 37 | 12.87531 38 | 12.875 39 | 12.84486 40 | 12.84672 41 | 12.84554 42 | 12.84656 43 | 12.84628 44 | 12.84624 45 | 12.85118 46 | 12.87171 47 | 12.85673 48 | 12.87121 49 | 613150.22,msec,task-clock:u,613150218466,100.00,1.000,CPUs utilized 50 | 0,,context-switches:u,613150218466,100.00,0.000,K/sec 51 | 0,,cpu-migrations:u,613150218466,100.00,0.000,K/sec 52 | 3375278,,page-faults:u,613150218466,100.00,0.006,M/sec 53 | 2616208629387,,cycles:u,613151232814,100.00,4.267,GHz 54 | 4588020073393,,instructions:u,613151232814,100.00,1.75,insn per cycle 55 | 181686483484,,branches:u,613151232814,100.00,296.316,M/sec 56 | 26931388,,branch-misses:u,613151232814,100.00,0.01,of all branches 57 | -------------------------------------------------------------------------------- /reports/i5-4690k/single-thread/rustc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 600 seconds 2 | 12.97663 3 | 12.97541 4 | 12.97139 5 | 12.97646 6 | 12.97554 7 | 12.97168 8 | 12.97192 9 | 12.97363 10 | 12.9756 11 | 12.9738 12 | 12.97167 13 | 12.97101 14 | 12.97091 15 | 12.96876 16 | 12.96862 17 | 12.9686 18 | 12.96853 19 | 12.96901 20 | 12.96833 21 | 12.9688 22 | 12.96917 23 | 12.96842 24 | 12.96944 25 | 12.96893 26 | 12.96919 27 | 12.96949 28 | 12.96921 29 | 12.96897 30 | 12.96888 31 | 12.96972 32 | 12.96927 33 | 12.96953 34 | 12.96861 35 | 12.96854 36 | 12.96807 37 | 12.96831 38 | 12.96871 39 | 12.96867 40 | 12.9667 41 | 12.96878 42 | 12.96862 43 | 12.96799 44 | 12.96859 45 | 12.9675 46 | 12.96874 47 | 12.96855 48 | 12.96864 49 | 617903.61,msec,task-clock:u,617903606313,100.00,1.000,CPUs utilized 50 | 0,,context-switches:u,617903606313,100.00,0.000,K/sec 51 | 0,,cpu-migrations:u,617903606313,100.00,0.000,K/sec 52 | 1782228,,page-faults:u,617903606313,100.00,0.003,M/sec 53 | 2647020517112,,cycles:u,617904594486,100.00,4.284,GHz 54 | 5434919619963,,instructions:u,617904594486,100.00,2.05,insn per cycle 55 | 186171124815,,branches:u,617904594486,100.00,301.295,M/sec 56 | 427055028,,branch-misses:u,617904594486,100.00,0.23,of all branches 57 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/clang/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 379.035 3 | 377.734 4 | 378.1944 5 | 4549134.44,msec,task-clock:u,4549134435761,100.00,3.980,CPUs utilized 6 | 0,,context-switches:u,4549134435761,100.00,0.000,K/sec 7 | 0,,cpu-migrations:u,4549134435761,100.00,0.000,K/sec 8 | 70579,,page-faults:u,4549134435761,100.00,0.016,K/sec 9 | 13107718560514,,cycles:u,4549139367813,100.00,2.881,GHz 10 | 3262484116085,,instructions:u,4549139367813,100.00,0.25,insn per cycle 11 | 166111337005,,branches:u,4549139367813,100.00,36.515,M/sec 12 | 108251210,,branch-misses:u,4549139367813,100.00,0.07,of all branches 13 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/clang/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 74.91881 3 | 74.92405 4 | 74.96001 5 | 74.8686 6 | 74.93469 7 | 74.89296 8 | 74.91566 9 | 74.86263 10 | 74.86372 11 | 74.88955 12 | 74.96108 13 | 74.96316 14 | 74.93665 15 | 3934822.27,msec,task-clock:u,3934822269776,100.00,3.902,CPUs utilized 16 | 0,,context-switches:u,3934822269776,100.00,0.000,K/sec 17 | 0,,cpu-migrations:u,3934822269776,100.00,0.000,K/sec 18 | 527626,,page-faults:u,3934822269776,100.00,0.134,K/sec 19 | 11885263269894,,cycles:u,3934826672598,100.00,3.021,GHz 20 | 10623140780120,,instructions:u,3934826672598,100.00,0.89,insn per cycle 21 | 720381644310,,branches:u,3934826672598,100.00,183.079,M/sec 22 | 468978622,,branch-misses:u,3934826672598,100.00,0.07,of all branches 23 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/clang/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 79.16061 3 | 79.42045 4 | 78.94648 5 | 78.84136 6 | 78.85192 7 | 78.65863 8 | 79.06378 9 | 79.199 10 | 78.73746 11 | 79.47263 12 | 79.43966 13 | 79.37678 14 | 3830347.19,msec,task-clock:u,3830347191216,100.00,3.903,CPUs utilized 15 | 0,,context-switches:u,3830347191216,100.00,0.000,K/sec 16 | 0,,cpu-migrations:u,3830347191216,100.00,0.000,K/sec 17 | 914352,,page-faults:u,3830347191216,100.00,0.239,K/sec 18 | 10028283817728,,cycles:u,3830352269884,100.00,2.618,GHz 19 | 29896770197044,,instructions:u,3830352269884,100.00,2.98,insn per cycle 20 | 664028106520,,branches:u,3830352269884,100.00,173.360,M/sec 21 | 432980019,,branch-misses:u,3830352269884,100.00,0.07,of all branches 22 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/clang/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 16.69595 3 | 16.70155 4 | 16.70075 5 | 16.7095 6 | 16.70836 7 | 16.69374 8 | 16.69858 9 | 16.70453 10 | 16.70228 11 | 16.70841 12 | 16.71019 13 | 16.71125 14 | 16.71408 15 | 16.71226 16 | 16.71418 17 | 16.70172 18 | 16.70711 19 | 16.71321 20 | 16.71184 21 | 16.70283 22 | 16.7185 23 | 16.70817 24 | 16.72968 25 | 16.70734 26 | 16.69727 27 | 16.70002 28 | 16.70826 29 | 16.69902 30 | 16.70889 31 | 16.7048 32 | 16.69571 33 | 16.7027 34 | 16.69621 35 | 16.69366 36 | 16.70352 37 | 16.69641 38 | 16.69852 39 | 16.69936 40 | 16.70225 41 | 16.6925 42 | 16.69705 43 | 16.68291 44 | 16.69296 45 | 16.69102 46 | 16.68909 47 | 16.69645 48 | 16.69378 49 | 16.70185 50 | 16.69474 51 | 16.69102 52 | 16.69686 53 | 16.69436 54 | 16.68065 55 | 16.69467 56 | 3782435.17,msec,task-clock:u,3782435165072,100.00,3.614,CPUs utilized 57 | 0,,context-switches:u,3782435165072,100.00,0.000,K/sec 58 | 0,,cpu-migrations:u,3782435165072,100.00,0.000,K/sec 59 | 7664384,,page-faults:u,3782435165072,100.00,0.002,M/sec 60 | 9367639402447,,cycles:u,3782440614985,100.00,2.477,GHz 61 | 6359151544060,,instructions:u,3782440614985,100.00,0.68,insn per cycle 62 | 447021152370,,branches:u,3782440614985,100.00,118.183,M/sec 63 | 1947941103,,branch-misses:u,3782440614985,100.00,0.44,of all branches 64 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/clang/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 5.31708 3 | 5.335222 4 | 5.33904 5 | 5.344798 6 | 5.32946 7 | 5.344743 8 | 5.342911 9 | 5.339829 10 | 5.335473 11 | 5.342469 12 | 5.343207 13 | 5.344834 14 | 5.338264 15 | 5.344721 16 | 5.324705 17 | 5.330923 18 | 5.341955 19 | 5.338248 20 | 5.337754 21 | 5.345894 22 | 5.344339 23 | 5.348205 24 | 5.350484 25 | 5.339491 26 | 5.349526 27 | 5.347939 28 | 5.345092 29 | 5.333261 30 | 5.341883 31 | 5.335455 32 | 5.350124 33 | 5.36194 34 | 5.347855 35 | 5.330959 36 | 5.351506 37 | 5.334949 38 | 5.337813 39 | 5.343166 40 | 5.338472 41 | 5.34612 42 | 5.347406 43 | 5.345253 44 | 5.337646 45 | 5.332772 46 | 5.340289 47 | 5.335093 48 | 5.34416 49 | 5.341485 50 | 5.342071 51 | 5.346503 52 | 5.351175 53 | 5.343102 54 | 5.347006 55 | 5.339437 56 | 5.351105 57 | 5.348811 58 | 5.347595 59 | 5.356118 60 | 5.347017 61 | 5.34057 62 | 5.346855 63 | 5.34484 64 | 5.343484 65 | 5.339954 66 | 5.340497 67 | 5.344531 68 | 5.337011 69 | 5.347193 70 | 5.337771 71 | 5.330696 72 | 5.335668 73 | 5.326725 74 | 5.351254 75 | 5.343632 76 | 5.340861 77 | 5.346552 78 | 5.335972 79 | 5.338757 80 | 5.337769 81 | 5.333985 82 | 5.336775 83 | 5.349617 84 | 5.339619 85 | 5.345387 86 | 5.343915 87 | 5.350558 88 | 5.348432 89 | 5.336337 90 | 5.336369 91 | 5.338194 92 | 5.338973 93 | 5.340476 94 | 5.339427 95 | 5.352057 96 | 5.340894 97 | 5.340097 98 | 5.34912 99 | 5.350208 100 | 5.339617 101 | 5.342588 102 | 2463288.82,msec,task-clock:u,2463288817181,100.00,3.060,CPUs utilized 103 | 0,,context-switches:u,2463288817181,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,2463288817181,100.00,0.000,K/sec 105 | 14133179,,page-faults:u,2463288817181,100.00,0.006,M/sec 106 | 6438067209495,,cycles:u,2463293567293,100.00,2.614,GHz 107 | 8994373826130,,instructions:u,2463293567293,100.00,1.40,insn per cycle 108 | 443565874740,,branches:u,2463293567293,100.00,180.071,M/sec 109 | 406262789,,branch-misses:u,2463293567293,100.00,0.09,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/clang/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 3.264444 3 | 3.538751 4 | 3.726321 5 | 3.708987 6 | 3.781725 7 | 3.430194 8 | 3.673665 9 | 3.535752 10 | 3.639679 11 | 3.428107 12 | 3.425294 13 | 3.697353 14 | 3.500541 15 | 3.900328 16 | 3.5979 17 | 3.758962 18 | 3.418146 19 | 3.764496 20 | 3.546249 21 | 3.427274 22 | 3.425969 23 | 3.42526 24 | 3.557233 25 | 3.428037 26 | 3.737932 27 | 3.425458 28 | 3.598869 29 | 3.425705 30 | 3.654968 31 | 3.422669 32 | 3.597085 33 | 3.427907 34 | 3.421944 35 | 3.782473 36 | 3.559773 37 | 3.658962 38 | 3.93435 39 | 3.418461 40 | 3.428724 41 | 3.42474 42 | 3.660843 43 | 3.417931 44 | 3.648182 45 | 3.422969 46 | 3.76713 47 | 3.790047 48 | 3.420767 49 | 3.427064 50 | 3.780665 51 | 3.965775 52 | 3.416298 53 | 3.574124 54 | 3.780693 55 | 3.847636 56 | 3.930609 57 | 3.865974 58 | 3.418919 59 | 3.970152 60 | 3.521471 61 | 3.592745 62 | 3.422355 63 | 3.589774 64 | 3.85198 65 | 3.542189 66 | 3.428307 67 | 3.617774 68 | 3.424819 69 | 3.428292 70 | 3.698499 71 | 3.418602 72 | 3.784366 73 | 3.432713 74 | 3.426365 75 | 3.8727 76 | 3.419259 77 | 3.432065 78 | 3.428676 79 | 3.736286 80 | 3.612896 81 | 3.684851 82 | 3.712983 83 | 3.418638 84 | 3.781713 85 | 3.507659 86 | 3.728583 87 | 3.915489 88 | 3.623347 89 | 3.789287 90 | 3.42389 91 | 3.629311 92 | 3.424353 93 | 3.779706 94 | 3.730464 95 | 3.579519 96 | 3.718616 97 | 3.723365 98 | 3.66963 99 | 3.90573 100 | 3.582959 101 | 3.672207 102 | 1759700.24,msec,task-clock:u,1759700237656,100.00,2.790,CPUs utilized 103 | 0,,context-switches:u,1759700237656,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1759700237656,100.00,0.000,K/sec 105 | 14133176,,page-faults:u,1759700237656,100.00,0.008,M/sec 106 | 4716278695817,,cycles:u,1759704428543,100.00,2.680,GHz 107 | 9287621684371,,instructions:u,1759704428543,100.00,1.97,insn per cycle 108 | 480258817312,,branches:u,1759704428543,100.00,272.921,M/sec 109 | 136170968,,branch-misses:u,1759704428543,100.00,0.03,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/clang/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 3.106304 3 | 3.120431 4 | 3.241954 5 | 3.240818 6 | 3.237655 7 | 3.238459 8 | 3.23646 9 | 3.240312 10 | 3.227814 11 | 3.230497 12 | 3.234448 13 | 3.241875 14 | 3.237797 15 | 3.238117 16 | 3.235301 17 | 3.241729 18 | 3.236711 19 | 3.237987 20 | 3.240241 21 | 3.243455 22 | 3.242594 23 | 3.240368 24 | 3.233771 25 | 3.241459 26 | 3.235869 27 | 3.241591 28 | 3.230361 29 | 3.230167 30 | 3.242583 31 | 3.244658 32 | 3.245352 33 | 3.228145 34 | 3.239812 35 | 3.235119 36 | 3.240733 37 | 3.236918 38 | 3.241706 39 | 3.236149 40 | 3.232722 41 | 3.226559 42 | 3.238317 43 | 3.23117 44 | 3.237434 45 | 3.226198 46 | 3.232488 47 | 3.235286 48 | 3.246349 49 | 3.237504 50 | 3.242636 51 | 3.236108 52 | 3.232964 53 | 3.242654 54 | 3.232989 55 | 3.235747 56 | 3.239161 57 | 3.239845 58 | 3.237635 59 | 3.2404 60 | 3.245967 61 | 3.244215 62 | 3.247647 63 | 3.243052 64 | 3.234994 65 | 3.238909 66 | 3.240083 67 | 3.239208 68 | 3.234255 69 | 3.242473 70 | 3.242697 71 | 3.24044 72 | 3.229542 73 | 3.235801 74 | 3.241386 75 | 3.250671 76 | 3.249698 77 | 3.242494 78 | 3.244586 79 | 3.243173 80 | 3.248078 81 | 3.249057 82 | 3.246013 83 | 3.238126 84 | 3.239846 85 | 3.241202 86 | 3.242492 87 | 3.240475 88 | 3.240751 89 | 3.241349 90 | 3.250588 91 | 3.240541 92 | 3.247368 93 | 3.236484 94 | 3.23506 95 | 3.236182 96 | 3.242815 97 | 3.232518 98 | 3.235655 99 | 3.227275 100 | 3.229544 101 | 3.225978 102 | 1618811.59,msec,task-clock:u,1618811586704,100.00,2.721,CPUs utilized 103 | 0,,context-switches:u,1618811586704,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1618811586704,100.00,0.000,K/sec 105 | 14133177,,page-faults:u,1618811586704,100.00,0.009,M/sec 106 | 4284545674170,,cycles:u,1618815472718,100.00,2.647,GHz 107 | 9958553081046,,instructions:u,1618815472718,100.00,2.32,insn per cycle 108 | 479039126440,,branches:u,1618815472718,100.00,295.920,M/sec 109 | 146293958,,branch-misses:u,1618815472718,100.00,0.03,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/clang/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 2.788661 3 | 2.796417 4 | 2.977765 5 | 2.9034 6 | 2.91422 7 | 2.940208 8 | 2.919092 9 | 2.961225 10 | 2.929595 11 | 2.904385 12 | 2.911592 13 | 2.920417 14 | 2.916194 15 | 2.905437 16 | 2.905459 17 | 2.966837 18 | 2.94831 19 | 2.967498 20 | 2.931506 21 | 2.909277 22 | 2.953128 23 | 2.942951 24 | 2.984508 25 | 2.917522 26 | 2.985195 27 | 2.951568 28 | 2.906841 29 | 2.975806 30 | 2.915064 31 | 2.928592 32 | 2.911317 33 | 2.901511 34 | 2.910075 35 | 2.91598 36 | 2.960025 37 | 2.937574 38 | 2.916547 39 | 2.90773 40 | 2.914907 41 | 2.928727 42 | 2.915262 43 | 2.904626 44 | 2.964247 45 | 2.89765 46 | 2.920927 47 | 2.918334 48 | 2.912239 49 | 2.970494 50 | 2.909086 51 | 2.921833 52 | 2.93394 53 | 2.89952 54 | 2.960226 55 | 2.90618 56 | 2.922661 57 | 2.919674 58 | 2.929096 59 | 2.943231 60 | 2.905865 61 | 2.914187 62 | 2.908396 63 | 2.983113 64 | 2.926365 65 | 2.924325 66 | 2.945751 67 | 2.928382 68 | 2.925472 69 | 2.913101 70 | 2.897292 71 | 2.904126 72 | 2.900461 73 | 2.939186 74 | 2.931338 75 | 2.922467 76 | 2.91158 77 | 2.923298 78 | 2.971395 79 | 2.923326 80 | 2.910463 81 | 2.93325 82 | 2.929292 83 | 2.966166 84 | 2.917861 85 | 2.898728 86 | 2.902589 87 | 2.909781 88 | 2.941369 89 | 2.911217 90 | 2.913089 91 | 2.901754 92 | 2.907408 93 | 2.900518 94 | 2.89815 95 | 2.90311 96 | 2.931825 97 | 2.936959 98 | 2.930021 99 | 2.911868 100 | 2.8965 101 | 2.913291 102 | 1498545.22,msec,task-clock:u,1498545222895,100.00,2.655,CPUs utilized 103 | 0,,context-switches:u,1498545222895,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1498545222895,100.00,0.000,K/sec 105 | 4720549,,page-faults:u,1498545222895,100.00,0.003,M/sec 106 | 4342565627008,,cycles:u,1498549223025,100.00,2.898,GHz 107 | 9333328164637,,instructions:u,1498549223025,100.00,2.15,insn per cycle 108 | 486469748855,,branches:u,1498549223025,100.00,324.628,M/sec 109 | 864928827,,branch-misses:u,1498549223025,100.00,0.18,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/gcc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 367.0069 3 | 365.6897 4 | 373.9812 5 | 4426266.96,msec,task-clock:u,4426266959858,100.00,3.997,CPUs utilized 6 | 0,,context-switches:u,4426266959858,100.00,0.000,K/sec 7 | 0,,cpu-migrations:u,4426266959858,100.00,0.000,K/sec 8 | 70522,,page-faults:u,4426266959858,100.00,0.016,K/sec 9 | 12832262870226,,cycles:u,4426271944174,100.00,2.899,GHz 10 | 4543391490834,,instructions:u,4426271944174,100.00,0.35,insn per cycle 11 | 649092742289,,branches:u,4426271944174,100.00,146.646,M/sec 12 | 108444445,,branch-misses:u,4426271944174,100.00,0.02,of all branches 13 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/gcc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 77.72367 3 | 77.75206 4 | 77.75278 5 | 77.74951 6 | 77.76365 7 | 77.78526 8 | 77.74386 9 | 77.7568 10 | 77.75327 11 | 77.8309 12 | 77.77934 13 | 77.78292 14 | 3733684.05,msec,task-clock:u,3733684050148,100.00,3.989,CPUs utilized 15 | 0,,context-switches:u,3733684050148,100.00,0.000,K/sec 16 | 0,,cpu-migrations:u,3733684050148,100.00,0.000,K/sec 17 | 492411,,page-faults:u,3733684050148,100.00,0.132,K/sec 18 | 10843977009021,,cycles:u,3733688171919,100.00,2.904,GHz 19 | 18174343681632,,instructions:u,3733688171919,100.00,1.68,insn per cycle 20 | 2596780644518,,branches:u,3733688171919,100.00,695.501,M/sec 21 | 432230847,,branch-misses:u,3733688171919,100.00,0.02,of all branches 22 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/gcc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 28.20614 3 | 28.10834 4 | 28.32423 5 | 28.03013 6 | 28.35282 7 | 28.14389 8 | 28.12722 9 | 28.033 10 | 28.14095 11 | 28.12197 12 | 27.99882 13 | 28.27249 14 | 27.94969 15 | 28.01373 16 | 28.34403 17 | 28.11862 18 | 28.61654 19 | 28.31642 20 | 28.11933 21 | 28.12078 22 | 28.11485 23 | 28.19072 24 | 28.50303 25 | 28.24369 26 | 28.21271 27 | 28.48766 28 | 28.17232 29 | 28.27142 30 | 28.15186 31 | 28.15959 32 | 28.31021 33 | 28.22437 34 | 3600036.19,msec,task-clock:u,3600036194222,100.00,3.957,CPUs utilized 35 | 0,,context-switches:u,3600036194222,100.00,0.000,K/sec 36 | 0,,cpu-migrations:u,3600036194222,100.00,0.000,K/sec 37 | 2320573,,page-faults:u,3600036194222,100.00,0.645,K/sec 38 | 9096029145405,,cycles:u,3600041977289,100.00,2.527,GHz 39 | 27748645815680,,instructions:u,3600041977289,100.00,3.05,insn per cycle 40 | 1741365038286,,branches:u,3600041977289,100.00,483.708,M/sec 41 | 1445557124,,branch-misses:u,3600041977289,100.00,0.08,of all branches 42 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/gcc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 16.73119 3 | 16.74141 4 | 16.7339 5 | 16.73183 6 | 16.74033 7 | 16.73586 8 | 16.74965 9 | 16.74954 10 | 16.74066 11 | 16.73388 12 | 16.73641 13 | 16.73897 14 | 16.73706 15 | 16.73742 16 | 16.73544 17 | 16.73299 18 | 16.73204 19 | 16.73002 20 | 16.73249 21 | 16.73716 22 | 16.73174 23 | 16.74106 24 | 16.74312 25 | 16.7368 26 | 16.74162 27 | 16.74406 28 | 16.73843 29 | 16.73783 30 | 16.74617 31 | 16.74656 32 | 16.74193 33 | 16.73855 34 | 16.7286 35 | 16.73644 36 | 16.73701 37 | 16.7409 38 | 16.73762 39 | 16.74077 40 | 16.72882 41 | 16.73755 42 | 16.72773 43 | 16.74282 44 | 16.73933 45 | 16.74019 46 | 16.73693 47 | 16.73851 48 | 16.74298 49 | 16.74854 50 | 16.74469 51 | 16.73617 52 | 16.73639 53 | 16.73536 54 | 16.73872 55 | 16.73654 56 | 3629657.17,msec,task-clock:u,3629657173074,100.00,3.958,CPUs utilized 57 | 0,,context-switches:u,3629657173074,100.00,0.000,K/sec 58 | 0,,cpu-migrations:u,3629657173074,100.00,0.000,K/sec 59 | 3867484,,page-faults:u,3629657173074,100.00,0.001,M/sec 60 | 8742831741984,,cycles:u,3629662208416,100.00,2.409,GHz 61 | 8944262266686,,instructions:u,3629662208416,100.00,1.02,insn per cycle 62 | 1481929142602,,branches:u,3629662208416,100.00,408.284,M/sec 63 | 2194765367,,branch-misses:u,3629662208416,100.00,0.15,of all branches 64 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/gcc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 5.535699 3 | 5.553719 4 | 5.55784 5 | 5.544241 6 | 5.553136 7 | 5.554242 8 | 5.551 9 | 5.548448 10 | 5.546638 11 | 5.54323 12 | 5.540191 13 | 5.562043 14 | 5.541402 15 | 5.545697 16 | 5.554177 17 | 5.544137 18 | 5.558865 19 | 5.560138 20 | 5.559286 21 | 5.559404 22 | 5.544063 23 | 5.553195 24 | 5.551488 25 | 5.559127 26 | 5.562873 27 | 5.549771 28 | 5.549161 29 | 5.554641 30 | 5.555928 31 | 5.550806 32 | 5.544977 33 | 5.545465 34 | 5.558383 35 | 5.551523 36 | 5.549317 37 | 5.549198 38 | 5.550553 39 | 5.551178 40 | 5.550416 41 | 5.542071 42 | 5.528931 43 | 5.547295 44 | 5.550389 45 | 5.556127 46 | 5.548682 47 | 5.546023 48 | 5.562443 49 | 5.558207 50 | 5.547091 51 | 5.554918 52 | 5.560417 53 | 5.55732 54 | 5.563655 55 | 5.55506 56 | 5.556717 57 | 5.553091 58 | 5.548542 59 | 5.552509 60 | 5.557944 61 | 5.558647 62 | 5.553868 63 | 5.559494 64 | 5.552696 65 | 5.560529 66 | 5.545541 67 | 5.547791 68 | 5.55166 69 | 5.553187 70 | 5.545936 71 | 5.550918 72 | 5.543829 73 | 5.552616 74 | 5.551389 75 | 5.557983 76 | 5.553615 77 | 5.546628 78 | 5.552794 79 | 5.556737 80 | 5.557505 81 | 5.554921 82 | 5.560264 83 | 5.566617 84 | 5.555824 85 | 5.553243 86 | 5.548405 87 | 5.558766 88 | 5.547028 89 | 5.556164 90 | 5.554622 91 | 5.56556 92 | 5.546882 93 | 5.547257 94 | 5.55825 95 | 5.556118 96 | 5.546843 97 | 5.55178 98 | 5.555501 99 | 5.559586 100 | 5.56498 101 | 5.557992 102 | 2248284.25,msec,task-clock:u,2248284247545,100.00,3.873,CPUs utilized 103 | 0,,context-switches:u,2248284247545,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,2248284247545,100.00,0.000,K/sec 105 | 7101924,,page-faults:u,2248284247545,100.00,0.003,M/sec 106 | 5251406134337,,cycles:u,2248288844229,100.00,2.336,GHz 107 | 8149885419216,,instructions:u,2248288844229,100.00,1.55,insn per cycle 108 | 341798400185,,branches:u,2248288844229,100.00,152.026,M/sec 109 | 400992772,,branch-misses:u,2248288844229,100.00,0.12,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/gcc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 3.631653 3 | 3.667758 4 | 3.549881 5 | 3.711758 6 | 3.798917 7 | 3.584452 8 | 3.665464 9 | 3.820714 10 | 3.602289 11 | 3.560833 12 | 3.602845 13 | 3.702406 14 | 3.910238 15 | 3.706757 16 | 3.875885 17 | 3.86126 18 | 3.610605 19 | 3.713338 20 | 3.796368 21 | 3.805935 22 | 3.635906 23 | 3.892802 24 | 3.779756 25 | 3.795703 26 | 3.823432 27 | 3.702534 28 | 3.778708 29 | 3.659339 30 | 3.750425 31 | 3.873172 32 | 3.857835 33 | 3.846482 34 | 3.751651 35 | 3.618715 36 | 3.816809 37 | 3.836535 38 | 3.80433 39 | 3.79917 40 | 3.770593 41 | 3.675073 42 | 3.689844 43 | 3.76182 44 | 3.63844 45 | 3.732911 46 | 3.819343 47 | 3.849734 48 | 3.621412 49 | 3.696513 50 | 3.649235 51 | 3.847573 52 | 3.708099 53 | 3.541449 54 | 3.712669 55 | 3.689722 56 | 3.883379 57 | 3.870335 58 | 3.700022 59 | 3.789308 60 | 3.684004 61 | 3.811558 62 | 3.847963 63 | 3.725742 64 | 3.661668 65 | 3.545805 66 | 3.778838 67 | 3.65769 68 | 3.728402 69 | 3.905509 70 | 3.780837 71 | 3.635055 72 | 3.856933 73 | 3.750813 74 | 3.812883 75 | 3.866369 76 | 3.699192 77 | 3.913837 78 | 3.539552 79 | 3.938622 80 | 3.855381 81 | 3.715241 82 | 3.906159 83 | 3.889676 84 | 3.664505 85 | 3.693039 86 | 3.795144 87 | 3.80061 88 | 3.840877 89 | 3.66256 90 | 3.736932 91 | 3.81265 92 | 3.776718 93 | 3.741396 94 | 3.854059 95 | 3.626356 96 | 3.684011 97 | 3.658297 98 | 3.881852 99 | 4.151247 100 | 3.817303 101 | 3.941132 102 | 1520319.86,msec,task-clock:u,1520319861831,100.00,3.796,CPUs utilized 103 | 0,,context-switches:u,1520319861831,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1520319861831,100.00,0.000,K/sec 105 | 7101921,,page-faults:u,1520319861831,100.00,0.005,M/sec 106 | 3530558804165,,cycles:u,1520325110785,100.00,2.322,GHz 107 | 8747639167345,,instructions:u,1520325110785,100.00,2.48,insn per cycle 108 | 375623767077,,branches:u,1520325110785,100.00,247.069,M/sec 109 | 84203846,,branch-misses:u,1520325110785,100.00,0.02,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/gcc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 3.323428 3 | 3.39156 4 | 3.395837 5 | 3.389508 6 | 3.386932 7 | 3.381283 8 | 3.387021 9 | 3.387196 10 | 3.388816 11 | 3.387256 12 | 3.382762 13 | 3.38313 14 | 3.38519 15 | 3.380386 16 | 3.387611 17 | 3.382959 18 | 3.380856 19 | 3.373634 20 | 3.388438 21 | 3.389462 22 | 3.386205 23 | 3.375215 24 | 3.376279 25 | 3.387908 26 | 3.380478 27 | 3.388265 28 | 3.38124 29 | 3.380279 30 | 3.382044 31 | 3.389715 32 | 3.387104 33 | 3.387714 34 | 3.386049 35 | 3.386194 36 | 3.383563 37 | 3.38377 38 | 3.385477 39 | 3.381306 40 | 3.383088 41 | 3.380664 42 | 3.385748 43 | 3.374771 44 | 3.389355 45 | 3.388191 46 | 3.390963 47 | 3.386624 48 | 3.390319 49 | 3.391759 50 | 3.383841 51 | 3.389135 52 | 3.388992 53 | 3.381738 54 | 3.380604 55 | 3.385194 56 | 3.38436 57 | 3.379755 58 | 3.382765 59 | 3.389681 60 | 3.389335 61 | 3.384781 62 | 3.387348 63 | 3.383999 64 | 3.380005 65 | 3.379954 66 | 3.379955 67 | 3.380644 68 | 3.383459 69 | 3.381205 70 | 3.384407 71 | 3.390393 72 | 3.387328 73 | 3.383511 74 | 3.386031 75 | 3.385693 76 | 3.390424 77 | 3.384283 78 | 3.382718 79 | 3.381717 80 | 3.383931 81 | 3.381603 82 | 3.380405 83 | 3.383468 84 | 3.389966 85 | 3.390022 86 | 3.394676 87 | 3.38722 88 | 3.401509 89 | 3.390715 90 | 3.390022 91 | 3.391025 92 | 3.388465 93 | 3.38332 94 | 3.383859 95 | 3.385212 96 | 3.397207 97 | 3.38168 98 | 3.388405 99 | 3.380793 100 | 3.381369 101 | 3.37944 102 | 1382429.09,msec,task-clock:u,1382429087347,100.00,3.790,CPUs utilized 103 | 0,,context-switches:u,1382429087347,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1382429087347,100.00,0.000,K/sec 105 | 7101924,,page-faults:u,1382429087347,100.00,0.005,M/sec 106 | 3072889872109,,cycles:u,1382433161249,100.00,2.223,GHz 107 | 9759861417754,,instructions:u,1382433161249,100.00,3.18,insn per cycle 108 | 375576406533,,branches:u,1382433161249,100.00,271.679,M/sec 109 | 118766558,,branch-misses:u,1382433161249,100.00,0.03,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/gcc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 3.078284 3 | 3.117652 4 | 3.125627 5 | 3.114809 6 | 3.131621 7 | 3.133192 8 | 3.124027 9 | 3.144378 10 | 3.115887 11 | 3.128965 12 | 3.119764 13 | 3.122761 14 | 3.149038 15 | 3.128715 16 | 3.11266 17 | 3.137619 18 | 3.12845 19 | 3.111754 20 | 3.117016 21 | 3.127567 22 | 3.11207 23 | 3.128159 24 | 3.127914 25 | 3.125685 26 | 3.137756 27 | 3.146362 28 | 3.143817 29 | 3.11373 30 | 3.127873 31 | 3.120617 32 | 3.135558 33 | 3.115136 34 | 3.129517 35 | 3.122765 36 | 3.139928 37 | 3.129869 38 | 3.126944 39 | 3.147294 40 | 3.119752 41 | 3.12073 42 | 3.121579 43 | 3.118529 44 | 3.116759 45 | 3.135472 46 | 3.120545 47 | 3.137958 48 | 3.120394 49 | 3.130548 50 | 3.119388 51 | 3.12306 52 | 3.151047 53 | 3.131945 54 | 3.123702 55 | 3.138468 56 | 3.134306 57 | 3.120715 58 | 3.144909 59 | 3.127139 60 | 3.161025 61 | 3.127499 62 | 3.125899 63 | 3.121638 64 | 3.134799 65 | 3.123947 66 | 3.142292 67 | 3.136786 68 | 3.116916 69 | 3.134872 70 | 3.135749 71 | 3.160067 72 | 3.134365 73 | 3.128418 74 | 3.131549 75 | 3.130516 76 | 3.123591 77 | 3.139105 78 | 3.121237 79 | 3.140691 80 | 3.14198 81 | 3.120537 82 | 3.13458 83 | 3.132599 84 | 3.150111 85 | 3.156507 86 | 3.172827 87 | 3.12686 88 | 3.138568 89 | 3.126846 90 | 3.129101 91 | 3.134565 92 | 3.136124 93 | 3.132734 94 | 3.129526 95 | 3.128536 96 | 3.132892 97 | 3.120859 98 | 3.142072 99 | 3.126377 100 | 3.116737 101 | 3.136464 102 | 1280230.66,msec,task-clock:u,1280230656744,100.00,3.789,CPUs utilized 103 | 0,,context-switches:u,1280230656744,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1280230656744,100.00,0.000,K/sec 105 | 4175679,,page-faults:u,1280230656744,100.00,0.003,M/sec 106 | 3103244507171,,cycles:u,1280234285308,100.00,2.424,GHz 107 | 8785076559000,,instructions:u,1280234285308,100.00,2.83,insn per cycle 108 | 380700222889,,branches:u,1280234285308,100.00,297.368,M/sec 109 | 855087780,,branch-misses:u,1280234285308,100.00,0.22,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/rustc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 401.2816 3 | 355.5921 4 | 355.7548 5 | 4443170.20,msec,task-clock:u,4443170201819,100.00,3.991,CPUs utilized 6 | 0,,context-switches:u,4443170201819,100.00,0.000,K/sec 7 | 0,,cpu-migrations:u,4443170201819,100.00,0.000,K/sec 8 | 70569,,page-faults:u,4443170201819,100.00,0.016,K/sec 9 | 12401997900298,,cycles:u,4443177413130,100.00,2.791,GHz 10 | 9080125164454,,instructions:u,4443177413130,100.00,0.73,insn per cycle 11 | 1945194678903,,branches:u,4443177413130,100.00,437.794,M/sec 12 | 109287422,,branch-misses:u,4443177413130,100.00,0.01,of all branches 13 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/rustc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 78.16853 3 | 78.19552 4 | 78.22723 5 | 78.20248 6 | 78.33114 7 | 78.27235 8 | 78.35899 9 | 78.3275 10 | 78.30158 11 | 78.31883 12 | 78.27418 13 | 78.2224 14 | 3758237.64,msec,task-clock:u,3758237642639,100.00,3.990,CPUs utilized 15 | 0,,context-switches:u,3758237642639,100.00,0.000,K/sec 16 | 0,,cpu-migrations:u,3758237642639,100.00,0.000,K/sec 17 | 492462,,page-faults:u,3758237642639,100.00,0.131,K/sec 18 | 10830657818317,,cycles:u,3758242416166,100.00,2.882,GHz 19 | 8784325701508,,instructions:u,3758242416166,100.00,0.81,insn per cycle 20 | 329624124840,,branches:u,3758242416166,100.00,87.707,M/sec 21 | 432392011,,branch-misses:u,3758242416166,100.00,0.13,of all branches 22 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/rustc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 23.59614 3 | 23.60381 4 | 23.60999 5 | 23.5871 6 | 23.59959 7 | 23.64451 8 | 23.60686 9 | 23.62471 10 | 23.61868 11 | 23.6154 12 | 23.76109 13 | 23.60281 14 | 23.60613 15 | 23.80029 16 | 23.84466 17 | 23.63669 18 | 23.62212 19 | 23.60593 20 | 23.62556 21 | 23.59211 22 | 23.66517 23 | 23.59767 24 | 23.61345 25 | 23.61556 26 | 23.64282 27 | 23.59065 28 | 23.6083 29 | 23.64078 30 | 23.58606 31 | 23.64487 32 | 23.62458 33 | 23.62029 34 | 23.71958 35 | 23.66956 36 | 23.62757 37 | 23.62653 38 | 23.62875 39 | 23.58735 40 | 23.60216 41 | 3681703.20,msec,task-clock:u,3681703197197,100.00,3.954,CPUs utilized 42 | 0,,context-switches:u,3681703197197,100.00,0.000,K/sec 43 | 0,,cpu-migrations:u,3681703197197,100.00,0.000,K/sec 44 | 2812838,,page-faults:u,3681703197197,100.00,0.764,K/sec 45 | 9158548004466,,cycles:u,3681709959699,100.00,2.488,GHz 46 | 23305456690812,,instructions:u,3681709959699,100.00,2.54,insn per cycle 47 | 4233086565412,,branches:u,3681709959699,100.00,1149.763,M/sec 48 | 1404938281,,branch-misses:u,3681709959699,100.00,0.03,of all branches 49 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/rustc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 16.66083 3 | 16.65561 4 | 16.65871 5 | 16.67824 6 | 16.66653 7 | 16.66649 8 | 17.10807 9 | 16.64227 10 | 16.65435 11 | 16.65804 12 | 16.6693 13 | 16.67673 14 | 16.67173 15 | 16.65921 16 | 16.66211 17 | 16.66441 18 | 16.67723 19 | 16.67549 20 | 16.6639 21 | 16.65759 22 | 16.67002 23 | 16.67531 24 | 17.63399 25 | 16.67019 26 | 16.67361 27 | 16.66647 28 | 16.67419 29 | 16.65665 30 | 17.32111 31 | 16.65623 32 | 16.66764 33 | 16.65745 34 | 16.6696 35 | 16.66569 36 | 16.66086 37 | 17.10282 38 | 16.63543 39 | 16.67646 40 | 17.12961 41 | 16.66567 42 | 16.66158 43 | 16.65818 44 | 17.20138 45 | 16.62721 46 | 16.64966 47 | 16.66091 48 | 16.65405 49 | 16.66012 50 | 16.66771 51 | 16.65996 52 | 16.66254 53 | 16.67458 54 | 16.65905 55 | 16.67632 56 | 3600534.64,msec,task-clock:u,3600534642483,100.00,3.929,CPUs utilized 57 | 0,,context-switches:u,3600534642483,100.00,0.000,K/sec 58 | 0,,cpu-migrations:u,3600534642483,100.00,0.000,K/sec 59 | 3867553,,page-faults:u,3600534642483,100.00,0.001,M/sec 60 | 8640828608472,,cycles:u,3600542000803,100.00,2.400,GHz 61 | 6050742941197,,instructions:u,3600542000803,100.00,0.70,insn per cycle 62 | 397385605513,,branches:u,3600542000803,100.00,110.368,M/sec 63 | 1945554238,,branch-misses:u,3600542000803,100.00,0.49,of all branches 64 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/rustc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 5.399178 3 | 5.426381 4 | 5.425876 5 | 5.430157 6 | 5.431025 7 | 5.416199 8 | 5.416247 9 | 5.428231 10 | 5.430609 11 | 5.421122 12 | 5.424732 13 | 5.422343 14 | 5.444095 15 | 5.430251 16 | 5.42515 17 | 5.432224 18 | 5.416706 19 | 5.427034 20 | 5.416989 21 | 5.427121 22 | 5.418625 23 | 5.429446 24 | 5.435752 25 | 5.425242 26 | 5.449229 27 | 5.425335 28 | 5.436482 29 | 5.430106 30 | 5.427629 31 | 5.439159 32 | 5.440762 33 | 5.429582 34 | 5.426525 35 | 5.426466 36 | 5.426532 37 | 5.431894 38 | 5.4398 39 | 5.42553 40 | 5.426752 41 | 5.42452 42 | 5.427121 43 | 5.430495 44 | 5.423321 45 | 5.425563 46 | 5.430197 47 | 5.423512 48 | 5.440902 49 | 5.428041 50 | 5.426326 51 | 5.427358 52 | 5.425679 53 | 5.428494 54 | 5.432748 55 | 5.427439 56 | 5.425182 57 | 5.4276 58 | 5.426728 59 | 5.426872 60 | 5.428556 61 | 5.429123 62 | 5.424603 63 | 5.450033 64 | 5.425606 65 | 5.427161 66 | 5.429102 67 | 5.432639 68 | 5.432914 69 | 5.425272 70 | 5.43173 71 | 5.45354 72 | 5.45061 73 | 5.434788 74 | 5.431167 75 | 5.43288 76 | 5.431196 77 | 5.42238 78 | 5.429692 79 | 5.43084 80 | 5.426635 81 | 5.425178 82 | 5.425113 83 | 5.433111 84 | 5.424843 85 | 5.430964 86 | 5.436264 87 | 5.424374 88 | 5.440241 89 | 5.420583 90 | 5.428564 91 | 5.426904 92 | 5.425439 93 | 5.426914 94 | 5.429997 95 | 5.438099 96 | 5.430884 97 | 5.432976 98 | 5.428815 99 | 5.4269 100 | 5.438837 101 | 5.428345 102 | 2160708.19,msec,task-clock:u,2160708194316,100.00,3.804,CPUs utilized 103 | 0,,context-switches:u,2160708194316,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,2160708194316,100.00,0.000,K/sec 105 | 7101998,,page-faults:u,2160708194316,100.00,0.003,M/sec 106 | 5205069594116,,cycles:u,2160715495096,100.00,2.409,GHz 107 | 8503869547294,,instructions:u,2160715495096,100.00,1.63,insn per cycle 108 | 360176956003,,branches:u,2160715495096,100.00,166.694,M/sec 109 | 401899917,,branch-misses:u,2160715495096,100.00,0.11,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/rustc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 3.816737 3 | 3.718824 4 | 3.78161 5 | 3.812567 6 | 3.68187 7 | 3.809396 8 | 3.710722 9 | 3.77181 10 | 3.751647 11 | 3.673215 12 | 3.817866 13 | 3.764454 14 | 3.748741 15 | 3.790122 16 | 3.722781 17 | 3.737368 18 | 3.951584 19 | 3.723147 20 | 3.73567 21 | 3.755595 22 | 3.744701 23 | 3.676236 24 | 3.779681 25 | 3.73296 26 | 3.743059 27 | 3.804739 28 | 3.733954 29 | 3.729822 30 | 3.772933 31 | 3.795165 32 | 3.803853 33 | 3.73943 34 | 3.783473 35 | 3.781118 36 | 3.745593 37 | 3.935322 38 | 3.762105 39 | 3.746948 40 | 4.06761 41 | 3.94002 42 | 3.860522 43 | 3.917813 44 | 3.844722 45 | 3.777807 46 | 3.926887 47 | 3.746956 48 | 3.769902 49 | 3.780334 50 | 3.856579 51 | 3.873204 52 | 3.919349 53 | 3.83614 54 | 3.736657 55 | 3.813358 56 | 3.856388 57 | 3.905765 58 | 3.882486 59 | 3.79811 60 | 3.769933 61 | 3.776159 62 | 3.819941 63 | 3.752918 64 | 3.718887 65 | 3.898859 66 | 3.763633 67 | 3.804799 68 | 3.745095 69 | 3.765343 70 | 3.78189 71 | 3.842931 72 | 3.738287 73 | 3.745433 74 | 3.748717 75 | 3.794731 76 | 3.82205 77 | 3.69094 78 | 3.783226 79 | 3.795935 80 | 3.725252 81 | 3.742451 82 | 3.622581 83 | 3.907691 84 | 3.805382 85 | 3.771713 86 | 3.780737 87 | 3.751476 88 | 3.728399 89 | 3.732212 90 | 3.764637 91 | 3.783278 92 | 3.777251 93 | 3.756197 94 | 3.70926 95 | 3.762687 96 | 3.931043 97 | 3.72982 98 | 3.772659 99 | 3.707114 100 | 3.916638 101 | 3.772104 102 | 1497022.06,msec,task-clock:u,1497022061565,100.00,3.711,CPUs utilized 103 | 0,,context-switches:u,1497022061565,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1497022061565,100.00,0.000,K/sec 105 | 7101996,,page-faults:u,1497022061565,100.00,0.005,M/sec 106 | 3536293889438,,cycles:u,1497030204435,100.00,2.362,GHz 107 | 9120569908300,,instructions:u,1497030204435,100.00,2.58,insn per cycle 108 | 391927240398,,branches:u,1497030204435,100.00,261.805,M/sec 109 | 162299952,,branch-misses:u,1497030204435,100.00,0.04,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/rustc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 3.370019 3 | 3.428489 4 | 3.433821 5 | 3.428073 6 | 3.434349 7 | 3.431515 8 | 3.43748 9 | 3.448488 10 | 3.429347 11 | 3.435373 12 | 3.432059 13 | 3.427557 14 | 3.437181 15 | 3.436821 16 | 3.439126 17 | 3.437324 18 | 3.4389 19 | 3.433045 20 | 3.435359 21 | 3.433734 22 | 3.43636 23 | 3.437126 24 | 3.433908 25 | 3.455251 26 | 3.429824 27 | 3.437598 28 | 3.434393 29 | 3.429001 30 | 3.431051 31 | 3.429527 32 | 3.433801 33 | 3.429734 34 | 3.42675 35 | 3.429969 36 | 3.43882 37 | 3.425659 38 | 3.435577 39 | 3.432842 40 | 3.426804 41 | 3.430784 42 | 3.427251 43 | 3.42659 44 | 3.433471 45 | 3.427374 46 | 3.433379 47 | 3.433874 48 | 3.438099 49 | 3.43209 50 | 3.436064 51 | 3.435429 52 | 3.437479 53 | 3.434393 54 | 3.434633 55 | 3.43452 56 | 3.441087 57 | 3.435779 58 | 3.434397 59 | 3.431523 60 | 3.432661 61 | 3.427164 62 | 3.434582 63 | 3.576687 64 | 3.433702 65 | 3.436305 66 | 3.440411 67 | 3.427339 68 | 3.430593 69 | 3.443562 70 | 3.430298 71 | 3.444552 72 | 3.428275 73 | 3.428042 74 | 3.433076 75 | 3.428598 76 | 3.431718 77 | 3.43405 78 | 3.433975 79 | 3.450093 80 | 3.433103 81 | 3.448735 82 | 3.435748 83 | 3.433902 84 | 3.437425 85 | 3.435409 86 | 3.430579 87 | 3.432064 88 | 3.439029 89 | 3.440087 90 | 3.436765 91 | 3.440817 92 | 3.433008 93 | 3.432319 94 | 3.433388 95 | 3.426758 96 | 3.428304 97 | 3.428732 98 | 3.429485 99 | 3.426996 100 | 3.430475 101 | 3.425993 102 | 1360329.30,msec,task-clock:u,1360329299085,100.00,3.687,CPUs utilized 103 | 0,,context-switches:u,1360329299085,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1360329299085,100.00,0.000,K/sec 105 | 7101999,,page-faults:u,1360329299085,100.00,0.005,M/sec 106 | 3072678100754,,cycles:u,1360337376611,100.00,2.259,GHz 107 | 9792761101855,,instructions:u,1360337376611,100.00,3.19,insn per cycle 108 | 390971808887,,branches:u,1360337376611,100.00,287.410,M/sec 109 | 159601350,,branch-misses:u,1360337376611,100.00,0.04,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/multi-thread/rustc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 3.293674 3 | 3.350653 4 | 3.328546 5 | 3.341669 6 | 3.324885 7 | 3.317233 8 | 3.343759 9 | 3.3206 10 | 3.321883 11 | 3.328816 12 | 3.348019 13 | 3.339249 14 | 3.344189 15 | 3.331354 16 | 3.316135 17 | 3.33268 18 | 3.347268 19 | 3.305328 20 | 3.323567 21 | 3.335784 22 | 3.348796 23 | 3.319069 24 | 3.305951 25 | 3.336851 26 | 3.329738 27 | 3.34936 28 | 3.327496 29 | 3.356866 30 | 3.323814 31 | 3.33361 32 | 3.305487 33 | 3.343168 34 | 3.317747 35 | 3.332236 36 | 3.332291 37 | 3.360801 38 | 3.329529 39 | 3.336458 40 | 3.345718 41 | 3.355305 42 | 3.355881 43 | 3.347582 44 | 3.329143 45 | 3.35638 46 | 3.328746 47 | 3.350077 48 | 3.328263 49 | 3.351643 50 | 3.358186 51 | 3.352144 52 | 3.350137 53 | 3.332962 54 | 3.318752 55 | 3.33425 56 | 3.313517 57 | 3.333137 58 | 3.3509 59 | 3.320528 60 | 3.330637 61 | 3.35178 62 | 3.345968 63 | 3.371292 64 | 3.31852 65 | 3.323081 66 | 3.327166 67 | 3.348325 68 | 3.336891 69 | 3.33899 70 | 3.342942 71 | 3.344654 72 | 3.358366 73 | 3.336301 74 | 3.331634 75 | 3.329823 76 | 3.331883 77 | 3.336066 78 | 3.340623 79 | 3.335329 80 | 3.359384 81 | 3.334522 82 | 3.332415 83 | 3.335799 84 | 3.322657 85 | 3.33364 86 | 3.327407 87 | 3.332373 88 | 3.318444 89 | 3.321457 90 | 3.329163 91 | 3.34833 92 | 3.321558 93 | 3.328828 94 | 3.349174 95 | 3.335675 96 | 3.341253 97 | 3.336395 98 | 3.316179 99 | 3.320281 100 | 3.330939 101 | 3.333168 102 | 1324312.47,msec,task-clock:u,1324312471894,100.00,3.705,CPUs utilized 103 | 0,,context-switches:u,1324312471894,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1324312471894,100.00,0.000,K/sec 105 | 3630977,,page-faults:u,1324312471894,100.00,0.003,M/sec 106 | 3300151697839,,cycles:u,1324349131032,100.00,2.492,GHz 107 | 8904136475569,,instructions:u,1324349131032,100.00,2.70,insn per cycle 108 | 404097675076,,branches:u,1324349131032,100.00,305.138,M/sec 109 | 929142034,,branch-misses:u,1324349131032,100.00,0.23,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/clang/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 1302.1 3 | 1304837.09,msec,task-clock:u,1304837090331,100.00,1.000,CPUs utilized 4 | 0,,context-switches:u,1304837090331,100.00,0.000,K/sec 5 | 0,,cpu-migrations:u,1304837090331,100.00,0.000,K/sec 6 | 70501,,page-faults:u,1304837090331,100.00,0.054,K/sec 7 | 4416296539338,,cycles:u,1304839342253,100.00,3.385,GHz 8 | 1086995967818,,instructions:u,1304839342253,100.00,0.25,insn per cycle 9 | 55273389811,,branches:u,1304839342253,100.00,42.360,M/sec 10 | 36038165,,branch-misses:u,1304839342253,100.00,0.07,of all branches 11 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/clang/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 265.1723 3 | 265.1777 4 | 265.1497 5 | 265.1677 6 | 1071299.62,msec,task-clock:u,1071299617835,100.00,1.000,CPUs utilized 7 | 0,,context-switches:u,1071299617835,100.00,0.000,K/sec 8 | 0,,cpu-migrations:u,1071299617835,100.00,0.000,K/sec 9 | 211128,,page-faults:u,1071299617835,100.00,0.197,K/sec 10 | 3629230190329,,cycles:u,1071301895754,100.00,3.388,GHz 11 | 3266868641710,,instructions:u,1071301895754,100.00,0.90,insn per cycle 12 | 221071563093,,branches:u,1071301895754,100.00,206.358,M/sec 13 | 144087375,,branch-misses:u,1071301895754,100.00,0.07,of all branches 14 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/clang/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 282.5595 3 | 282.4662 4 | 282.5261 5 | 282.5954 6 | 1140791.65,msec,task-clock:u,1140791652204,100.00,1.000,CPUs utilized 7 | 0,,context-switches:u,1140791652204,100.00,0.000,K/sec 8 | 0,,cpu-migrations:u,1140791652204,100.00,0.000,K/sec 9 | 351756,,page-faults:u,1140791652204,100.00,0.308,K/sec 10 | 3864227310443,,cycles:u,1140794182592,100.00,3.387,GHz 11 | 12556304443768,,instructions:u,1140794182592,100.00,3.25,insn per cycle 12 | 220771450658,,branches:u,1140794182592,100.00,193.525,M/sec 13 | 144129675,,branch-misses:u,1140794182592,100.00,0.07,of all branches 14 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/clang/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 58.17582 3 | 56.66148 4 | 58.07611 5 | 58.05804 6 | 56.78421 7 | 56.51473 8 | 58.13382 9 | 56.72097 10 | 58.05426 11 | 58.03685 12 | 56.75717 13 | 56.36092 14 | 58.13965 15 | 58.12599 16 | 56.73394 17 | 56.73319 18 | 960320.28,msec,task-clock:u,960320280352,100.00,1.000,CPUs utilized 19 | 0,,context-switches:u,960320280352,100.00,0.000,K/sec 20 | 0,,cpu-migrations:u,960320280352,100.00,0.000,K/sec 21 | 2320515,,page-faults:u,960320280352,100.00,0.002,M/sec 22 | 2782669653717,,cycles:u,960323522721,100.00,2.898,GHz 23 | 3142924392075,,instructions:u,960323522721,100.00,1.13,insn per cycle 24 | 452247731805,,branches:u,960323522721,100.00,470.934,M/sec 25 | 576229198,,branch-misses:u,960323522721,100.00,0.13,of all branches 26 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/clang/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 19.13035 3 | 18.8558 4 | 18.84252 5 | 18.83761 6 | 18.84677 7 | 18.8428 8 | 18.85089 9 | 19.07567 10 | 18.84141 11 | 18.8252 12 | 18.8263 13 | 18.84459 14 | 18.83212 15 | 18.83928 16 | 18.83242 17 | 18.83478 18 | 19.08376 19 | 19.10859 20 | 19.10994 21 | 19.12057 22 | 19.12128 23 | 19.1155 24 | 19.12469 25 | 18.84719 26 | 18.85095 27 | 18.85022 28 | 18.8425 29 | 18.83308 30 | 18.82956 31 | 19.06984 32 | 19.11155 33 | 19.10241 34 | 19.11889 35 | 19.09939 36 | 18.83853 37 | 18.84086 38 | 18.83305 39 | 18.8354 40 | 18.83733 41 | 18.83267 42 | 19.07557 43 | 19.10756 44 | 19.08822 45 | 19.11175 46 | 19.10625 47 | 19.11344 48 | 18.84608 49 | 18.83197 50 | 1036226.82,msec,task-clock:u,1036226822905,100.00,1.000,CPUs utilized 51 | 0,,context-switches:u,1036226822905,100.00,0.000,K/sec 52 | 0,,cpu-migrations:u,1036226822905,100.00,0.000,K/sec 53 | 6820548,,page-faults:u,1036226822905,100.00,0.007,M/sec 54 | 3483583193506,,cycles:u,1036229690739,100.00,3.362,GHz 55 | 4385006943133,,instructions:u,1036229690739,100.00,1.26,insn per cycle 56 | 205484596200,,branches:u,1036229690739,100.00,198.301,M/sec 57 | 192472425,,branch-misses:u,1036229690739,100.00,0.09,of all branches 58 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/clang/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 10.42565 3 | 10.43079 4 | 10.42289 5 | 10.43233 6 | 10.42802 7 | 10.42621 8 | 10.42857 9 | 10.41885 10 | 10.42471 11 | 10.41799 12 | 10.41909 13 | 10.42097 14 | 10.42763 15 | 10.43178 16 | 10.43192 17 | 10.42476 18 | 10.41378 19 | 10.43446 20 | 10.4285 21 | 10.4244 22 | 10.40923 23 | 10.43976 24 | 10.4247 25 | 10.4251 26 | 10.43048 27 | 10.43098 28 | 10.43266 29 | 10.44022 30 | 10.42014 31 | 10.42443 32 | 10.43013 33 | 10.42614 34 | 10.41993 35 | 10.42189 36 | 10.41743 37 | 10.41604 38 | 10.41977 39 | 10.41758 40 | 10.41563 41 | 10.42142 42 | 10.41722 43 | 10.42254 44 | 10.41154 45 | 10.42114 46 | 10.43047 47 | 10.42872 48 | 10.42346 49 | 10.41998 50 | 10.43028 51 | 10.4383 52 | 10.42824 53 | 10.42914 54 | 10.43235 55 | 10.42072 56 | 10.43371 57 | 10.429 58 | 10.42556 59 | 10.43514 60 | 10.42033 61 | 10.41527 62 | 10.41027 63 | 10.41821 64 | 10.42108 65 | 10.4189 66 | 10.41688 67 | 10.43042 68 | 10.43161 69 | 10.41981 70 | 10.43379 71 | 10.42549 72 | 10.42529 73 | 10.42313 74 | 10.42998 75 | 10.40555 76 | 10.41809 77 | 10.41409 78 | 10.41121 79 | 10.41764 80 | 10.41636 81 | 10.42182 82 | 10.41499 83 | 10.41841 84 | 10.41812 85 | 10.41884 86 | 10.4177 87 | 10.43313 88 | 10.41989 89 | 1136191.64,msec,task-clock:u,1136191641805,100.00,1.000,CPUs utilized 90 | 0,,context-switches:u,1136191641805,100.00,0.000,K/sec 91 | 0,,cpu-migrations:u,1136191641805,100.00,0.000,K/sec 92 | 12304960,,page-faults:u,1136191641805,100.00,0.011,M/sec 93 | 3802159126504,,cycles:u,1136194575018,100.00,3.346,GHz 94 | 7947222553712,,instructions:u,1136194575018,100.00,2.09,insn per cycle 95 | 403760139574,,branches:u,1136194575018,100.00,355.363,M/sec 96 | 119112897,,branch-misses:u,1136194575018,100.00,0.03,of all branches 97 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/clang/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 9.525298 3 | 9.500107 4 | 9.493165 5 | 9.525084 6 | 9.516562 7 | 9.516355 8 | 9.516675 9 | 9.515702 10 | 9.497742 11 | 9.485681 12 | 9.494135 13 | 9.492985 14 | 9.493177 15 | 9.493001 16 | 9.492756 17 | 9.491195 18 | 9.495405 19 | 9.492917 20 | 9.492784 21 | 9.492875 22 | 9.520669 23 | 9.513652 24 | 9.515849 25 | 9.514002 26 | 9.513371 27 | 9.513201 28 | 9.513982 29 | 9.515961 30 | 9.51378 31 | 9.511144 32 | 9.517434 33 | 9.516855 34 | 9.514893 35 | 9.498484 36 | 9.493709 37 | 9.492361 38 | 9.491965 39 | 9.49259 40 | 9.490296 41 | 9.491923 42 | 9.520501 43 | 9.515985 44 | 9.515569 45 | 9.515689 46 | 9.517416 47 | 9.500072 48 | 9.49445 49 | 9.491791 50 | 9.491611 51 | 9.493113 52 | 9.491521 53 | 9.492748 54 | 9.493503 55 | 9.491843 56 | 9.493231 57 | 9.491722 58 | 9.492989 59 | 9.523006 60 | 9.517618 61 | 9.517036 62 | 9.513704 63 | 9.519549 64 | 9.514566 65 | 9.516944 66 | 9.515245 67 | 9.514241 68 | 9.514961 69 | 9.498842 70 | 9.493694 71 | 9.492723 72 | 9.492723 73 | 9.492614 74 | 9.491759 75 | 9.495221 76 | 9.492373 77 | 9.521488 78 | 9.516069 79 | 9.498116 80 | 9.4857 81 | 9.492598 82 | 9.492219 83 | 9.492451 84 | 9.489337 85 | 9.491683 86 | 9.491749 87 | 9.493464 88 | 9.492333 89 | 9.521878 90 | 9.515138 91 | 9.515265 92 | 9.516319 93 | 9.51652 94 | 9.496285 95 | 9.492623 96 | 9.493216 97 | 1153335.85,msec,task-clock:u,1153335850221,100.00,1.000,CPUs utilized 98 | 0,,context-switches:u,1153335850221,100.00,0.000,K/sec 99 | 0,,cpu-migrations:u,1153335850221,100.00,0.000,K/sec 100 | 13429972,,page-faults:u,1153335850221,100.00,0.012,M/sec 101 | 3856350191534,,cycles:u,1153339163081,100.00,3.344,GHz 102 | 9319247219179,,instructions:u,1153339163081,100.00,2.42,insn per cycle 103 | 440886279326,,branches:u,1153339163081,100.00,382.271,M/sec 104 | 129076447,,branch-misses:u,1153339163081,100.00,0.03,of all branches 105 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/clang/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 8.579722 3 | 8.565363 4 | 8.602211 5 | 8.558297 6 | 8.558992 7 | 8.563809 8 | 8.6066 9 | 8.562318 10 | 8.564075 11 | 8.561586 12 | 8.560841 13 | 8.562404 14 | 8.566869 15 | 8.567926 16 | 8.567017 17 | 8.566502 18 | 8.567903 19 | 8.567179 20 | 8.56561 21 | 8.565549 22 | 8.566845 23 | 8.57118 24 | 8.567049 25 | 8.566574 26 | 8.560174 27 | 8.566968 28 | 8.562805 29 | 8.569084 30 | 8.566846 31 | 8.566378 32 | 8.567039 33 | 8.569012 34 | 8.567259 35 | 8.569197 36 | 8.566397 37 | 8.571501 38 | 8.565743 39 | 8.569412 40 | 8.57021 41 | 8.569471 42 | 8.570008 43 | 8.564157 44 | 8.570724 45 | 8.569926 46 | 8.567472 47 | 8.566791 48 | 8.567122 49 | 8.568217 50 | 8.561919 51 | 8.568887 52 | 8.570243 53 | 8.568161 54 | 8.565828 55 | 8.573846 56 | 8.569296 57 | 8.5667 58 | 8.570856 59 | 8.568094 60 | 8.56853 61 | 8.570832 62 | 8.568101 63 | 8.561397 64 | 8.567569 65 | 8.566422 66 | 8.56809 67 | 8.563861 68 | 8.564583 69 | 8.566974 70 | 8.567802 71 | 8.559019 72 | 8.568474 73 | 8.567235 74 | 8.567898 75 | 8.567259 76 | 8.568729 77 | 8.565257 78 | 8.569543 79 | 8.566181 80 | 8.567841 81 | 8.569665 82 | 8.56356 83 | 8.558732 84 | 8.569117 85 | 8.577206 86 | 8.566652 87 | 8.577291 88 | 8.572914 89 | 8.563182 90 | 8.567739 91 | 8.573534 92 | 8.572495 93 | 8.576669 94 | 8.569399 95 | 8.571202 96 | 8.564163 97 | 8.57102 98 | 8.56787 99 | 8.573375 100 | 8.570033 101 | 8.561018 102 | 1123009.93,msec,task-clock:u,1123009932255,100.00,1.000,CPUs utilized 103 | 0,,context-switches:u,1123009932255,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,1123009932255,100.00,0.000,K/sec 105 | 4714465,,page-faults:u,1123009932255,100.00,0.004,M/sec 106 | 3783561032752,,cycles:u,1123012664008,100.00,3.369,GHz 107 | 9549972880387,,instructions:u,1123012664008,100.00,2.52,insn per cycle 108 | 469699987056,,branches:u,1123012664008,100.00,418.251,M/sec 109 | 870159823,,branch-misses:u,1123012664008,100.00,0.19,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/gcc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 1272.607 3 | 1272905.10,msec,task-clock:u,1272905099044,100.00,1.000,CPUs utilized 4 | 0,,context-switches:u,1272905099044,100.00,0.000,K/sec 5 | 0,,cpu-migrations:u,1272905099044,100.00,0.000,K/sec 6 | 70498,,page-faults:u,1272905099044,100.00,0.055,K/sec 7 | 4307896614800,,cycles:u,1272907607080,100.00,3.384,GHz 8 | 1514468891653,,instructions:u,1272907607080,100.00,0.35,insn per cycle 9 | 216364372241,,branches:u,1272907607080,100.00,169.977,M/sec 10 | 36039739,,branch-misses:u,1272907607080,100.00,0.02,of all branches 11 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/gcc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 265.5179 3 | 265.5305 4 | 265.5485 5 | 265.5587 6 | 1063074.21,msec,task-clock:u,1063074212831,100.00,1.000,CPUs utilized 7 | 0,,context-switches:u,1063074212831,100.00,0.000,K/sec 8 | 0,,cpu-migrations:u,1063074212831,100.00,0.000,K/sec 9 | 211124,,page-faults:u,1063074212831,100.00,0.199,K/sec 10 | 3601370133876,,cycles:u,1063076355258,100.00,3.388,GHz 11 | 6058101389908,,instructions:u,1063076355258,100.00,1.68,insn per cycle 12 | 865588509500,,branches:u,1063076355258,100.00,814.231,M/sec 13 | 144083957,,branch-misses:u,1063076355258,100.00,0.02,of all branches 14 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/gcc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 84.71143 3 | 84.71501 4 | 84.70699 5 | 84.71631 6 | 84.71066 7 | 84.7072 8 | 84.71087 9 | 84.71316 10 | 84.70263 11 | 84.7029 12 | 84.71085 13 | 934175.38,msec,task-clock:u,934175378733,100.00,1.000,CPUs utilized 14 | 0,,context-switches:u,934175378733,100.00,0.000,K/sec 15 | 0,,cpu-migrations:u,934175378733,100.00,0.000,K/sec 16 | 843953,,page-faults:u,934175378733,100.00,0.903,K/sec 17 | 3161380686551,,cycles:u,934177952241,100.00,3.384,GHz 18 | 10130892926081,,instructions:u,934177952241,100.00,3.20,insn per cycle 19 | 598460820475,,branches:u,934177952241,100.00,640.630,M/sec 20 | 496498580,,branch-misses:u,934177952241,100.00,0.08,of all branches 21 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/gcc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 58.67742 3 | 57.07801 4 | 58.62082 5 | 58.63282 6 | 58.68675 7 | 57.0047 8 | 57.0909 9 | 58.61305 10 | 57.13783 11 | 56.77741 12 | 58.66628 13 | 58.67012 14 | 56.94586 15 | 56.79184 16 | 56.85284 17 | 57.70159 18 | 927350.90,msec,task-clock:u,927350896444,100.00,1.000,CPUs utilized 19 | 0,,context-switches:u,927350896444,100.00,0.000,K/sec 20 | 0,,cpu-migrations:u,927350896444,100.00,0.000,K/sec 21 | 1195523,,page-faults:u,927350896444,100.00,0.001,M/sec 22 | 2560247732837,,cycles:u,927353851757,100.00,2.761,GHz 23 | 3079249016875,,instructions:u,927353851757,100.00,1.20,insn per cycle 24 | 438485593932,,branches:u,927353851757,100.00,472.837,M/sec 25 | 650817600,,branch-misses:u,927353851757,100.00,0.15,of all branches 26 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/gcc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 19.16647 3 | 18.96339 4 | 19.14535 5 | 18.95474 6 | 18.94749 7 | 18.95194 8 | 18.95322 9 | 18.94167 10 | 19.14456 11 | 19.15888 12 | 19.17267 13 | 19.16106 14 | 19.1622 15 | 19.16949 16 | 18.95214 17 | 18.94834 18 | 18.95053 19 | 18.94755 20 | 18.93841 21 | 18.95844 22 | 18.95486 23 | 18.95242 24 | 19.15794 25 | 19.17136 26 | 19.18797 27 | 19.16199 28 | 19.16779 29 | 19.16372 30 | 19.16184 31 | 19.18463 32 | 18.95653 33 | 18.95058 34 | 18.94582 35 | 18.95523 36 | 18.93572 37 | 18.9455 38 | 19.16294 39 | 19.16406 40 | 19.16494 41 | 19.1586 42 | 19.1705 43 | 19.16955 44 | 18.95473 45 | 18.94697 46 | 18.95344 47 | 18.94983 48 | 18.9613 49 | 18.94827 50 | 924332.35,msec,task-clock:u,924332345660,100.00,1.000,CPUs utilized 51 | 0,,context-switches:u,924332345660,100.00,0.000,K/sec 52 | 0,,cpu-migrations:u,924332345660,100.00,0.000,K/sec 53 | 3445572,,page-faults:u,924332345660,100.00,0.004,M/sec 54 | 3114639359782,,cycles:u,924335252412,100.00,3.370,GHz 55 | 4195209021484,,instructions:u,924335252412,100.00,1.35,insn per cycle 56 | 163640354424,,branches:u,924335252412,100.00,177.036,M/sec 57 | 192477384,,branch-misses:u,924335252412,100.00,0.12,of all branches 58 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/gcc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 9.830049 3 | 9.848555 4 | 9.836472 5 | 9.83154 6 | 9.826758 7 | 9.840521 8 | 9.838269 9 | 9.841139 10 | 9.84103 11 | 9.825592 12 | 9.84774 13 | 9.836381 14 | 9.842304 15 | 9.840376 16 | 9.834928 17 | 9.831574 18 | 9.826328 19 | 9.847239 20 | 9.828582 21 | 9.825925 22 | 9.829056 23 | 9.833508 24 | 9.849304 25 | 9.835158 26 | 9.84081 27 | 9.846087 28 | 9.838755 29 | 9.841213 30 | 9.83536 31 | 9.83677 32 | 9.845072 33 | 9.827435 34 | 9.835354 35 | 9.833189 36 | 9.827566 37 | 9.828053 38 | 9.835764 39 | 9.842192 40 | 9.834476 41 | 9.822838 42 | 9.831638 43 | 9.828705 44 | 9.838714 45 | 9.823581 46 | 9.830381 47 | 9.829281 48 | 9.832667 49 | 9.829472 50 | 9.827655 51 | 9.835258 52 | 9.827677 53 | 9.838804 54 | 9.831262 55 | 9.8495 56 | 9.831923 57 | 9.827056 58 | 9.831557 59 | 9.838889 60 | 9.828402 61 | 9.827596 62 | 9.829524 63 | 9.829328 64 | 9.842507 65 | 9.843069 66 | 9.835759 67 | 9.846424 68 | 9.835559 69 | 9.840693 70 | 9.839477 71 | 9.840306 72 | 9.836918 73 | 9.842196 74 | 9.84039 75 | 9.840108 76 | 9.839373 77 | 9.844494 78 | 9.841996 79 | 9.841019 80 | 9.844271 81 | 9.837017 82 | 9.840361 83 | 9.845167 84 | 9.83907 85 | 9.842799 86 | 9.844974 87 | 9.845559 88 | 9.83316 89 | 9.843441 90 | 9.844824 91 | 9.84615 92 | 9.837638 93 | 9.841208 94 | 924022.05,msec,task-clock:u,924022048736,100.00,1.000,CPUs utilized 95 | 0,,context-switches:u,924022048736,100.00,0.000,K/sec 96 | 0,,cpu-migrations:u,924022048736,100.00,0.000,K/sec 97 | 6539387,,page-faults:u,924022048736,100.00,0.007,M/sec 98 | 3100713066768,,cycles:u,924025051751,100.00,3.356,GHz 99 | 8047624739683,,instructions:u,924025051751,100.00,2.60,insn per cycle 100 | 347822771109,,branches:u,924025051751,100.00,376.423,M/sec 101 | 127588069,,branch-misses:u,924025051751,100.00,0.04,of all branches 102 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/gcc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 8.87634 3 | 8.825243 4 | 8.820091 5 | 8.820797 6 | 8.818348 7 | 8.819685 8 | 8.822502 9 | 8.818169 10 | 8.821562 11 | 8.869499 12 | 8.859686 13 | 8.826593 14 | 8.817453 15 | 8.820142 16 | 8.819066 17 | 8.819757 18 | 8.81957 19 | 8.867908 20 | 8.860771 21 | 8.865326 22 | 8.853316 23 | 8.859663 24 | 8.866425 25 | 8.858953 26 | 8.863858 27 | 8.855362 28 | 8.861783 29 | 8.860944 30 | 8.858145 31 | 8.861124 32 | 8.857188 33 | 8.859835 34 | 8.857654 35 | 8.852873 36 | 8.855604 37 | 8.858935 38 | 8.85085 39 | 8.858212 40 | 8.859283 41 | 8.854999 42 | 8.85603 43 | 8.858663 44 | 8.859589 45 | 8.856561 46 | 8.854408 47 | 8.858752 48 | 8.856864 49 | 8.854383 50 | 8.824775 51 | 8.818632 52 | 8.86904 53 | 8.863519 54 | 8.86345 55 | 8.858855 56 | 8.862251 57 | 8.862798 58 | 8.858987 59 | 8.856919 60 | 8.863246 61 | 8.858451 62 | 8.868857 63 | 8.861074 64 | 8.864338 65 | 8.822474 66 | 8.819791 67 | 8.817778 68 | 8.81848 69 | 8.819511 70 | 8.863249 71 | 8.855144 72 | 8.86155 73 | 8.860216 74 | 8.85834 75 | 8.856448 76 | 8.856605 77 | 8.857738 78 | 8.855968 79 | 8.864662 80 | 8.858612 81 | 8.861712 82 | 8.82323 83 | 8.818369 84 | 8.819683 85 | 8.817841 86 | 8.817264 87 | 8.818352 88 | 8.869676 89 | 8.824137 90 | 8.819147 91 | 8.818176 92 | 8.818792 93 | 8.818495 94 | 8.819265 95 | 8.819036 96 | 8.820872 97 | 8.820903 98 | 8.864542 99 | 8.855793 100 | 8.854489 101 | 8.853251 102 | 905463.97,msec,task-clock:u,905463968360,100.00,1.000,CPUs utilized 103 | 0,,context-switches:u,905463968360,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,905463968360,100.00,0.000,K/sec 105 | 7101896,,page-faults:u,905463968360,100.00,0.008,M/sec 106 | 3035016674202,,cycles:u,905466924449,100.00,3.352,GHz 107 | 9761740084078,,instructions:u,905466924449,100.00,3.22,insn per cycle 108 | 378355068056,,branches:u,905466924449,100.00,417.858,M/sec 109 | 71332532,,branch-misses:u,905466924449,100.00,0.02,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/gcc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_cpp with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 8.50467 3 | 8.49896 4 | 8.522979 5 | 8.504661 6 | 8.502994 7 | 8.494215 8 | 8.517154 9 | 8.497517 10 | 8.495463 11 | 8.496443 12 | 8.495622 13 | 8.496366 14 | 8.504113 15 | 8.500787 16 | 8.503928 17 | 8.501952 18 | 8.49573 19 | 8.502422 20 | 8.502661 21 | 8.500075 22 | 8.504632 23 | 8.501146 24 | 8.502609 25 | 8.501628 26 | 8.501137 27 | 8.50806 28 | 8.50448 29 | 8.503298 30 | 8.503124 31 | 8.504316 32 | 8.502549 33 | 8.501806 34 | 8.506144 35 | 8.49986 36 | 8.499672 37 | 8.500157 38 | 8.506036 39 | 8.501783 40 | 8.504604 41 | 8.501756 42 | 8.502575 43 | 8.50004 44 | 8.504465 45 | 8.502839 46 | 8.502596 47 | 8.505416 48 | 8.50456 49 | 8.505135 50 | 8.497748 51 | 8.503444 52 | 8.510558 53 | 8.503252 54 | 8.509506 55 | 8.503563 56 | 8.502383 57 | 8.501566 58 | 8.504187 59 | 8.503251 60 | 8.504928 61 | 8.50032 62 | 8.510975 63 | 8.501672 64 | 8.506721 65 | 8.501033 66 | 8.504787 67 | 8.502017 68 | 8.505784 69 | 8.499812 70 | 8.50316 71 | 8.505531 72 | 8.496813 73 | 8.494918 74 | 8.499563 75 | 8.498891 76 | 8.50112 77 | 8.503426 78 | 8.50176 79 | 8.502104 80 | 8.511884 81 | 8.50321 82 | 8.502398 83 | 8.507035 84 | 8.501061 85 | 8.50661 86 | 8.503716 87 | 8.500608 88 | 8.501599 89 | 8.504733 90 | 8.501516 91 | 8.499701 92 | 8.501968 93 | 8.496744 94 | 8.496017 95 | 8.502524 96 | 8.500463 97 | 8.501539 98 | 8.501894 99 | 8.503311 100 | 8.506387 101 | 8.499228 102 | 871165.12,msec,task-clock:u,871165115743,100.00,1.000,CPUs utilized 103 | 0,,context-switches:u,871165115743,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,871165115743,100.00,0.000,K/sec 105 | 4152072,,page-faults:u,871165115743,100.00,0.005,M/sec 106 | 2931784981599,,cycles:u,871167503558,100.00,3.365,GHz 107 | 8790622030130,,instructions:u,871167503558,100.00,3.00,insn per cycle 108 | 382356253334,,branches:u,871167503558,100.00,438.902,M/sec 109 | 888590205,,branch-misses:u,871167503558,100.00,0.23,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/rustc/v0_baseline.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v0_baseline_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 1228.904 3 | 1229199.13,msec,task-clock:u,1229199127712,100.00,1.000,CPUs utilized 4 | 0,,context-switches:u,1229199127712,100.00,0.000,K/sec 5 | 0,,cpu-migrations:u,1229199127712,100.00,0.000,K/sec 6 | 70517,,page-faults:u,1229199127712,100.00,0.057,K/sec 7 | 4159997920738,,cycles:u,1229201418690,100.00,3.384,GHz 8 | 3026505686408,,instructions:u,1229201418690,100.00,0.73,insn per cycle 9 | 648328526288,,branches:u,1229201418690,100.00,527.440,M/sec 10 | 36042486,,branch-misses:u,1229201418690,100.00,0.01,of all branches 11 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/rustc/v1_linear_reading.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v1_linear_reading_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 265.2273 3 | 265.2159 4 | 265.2044 5 | 265.2111 6 | 1061779.33,msec,task-clock:u,1061779330968,100.00,1.000,CPUs utilized 7 | 0,,context-switches:u,1061779330968,100.00,0.000,K/sec 8 | 0,,cpu-migrations:u,1061779330968,100.00,0.000,K/sec 9 | 211147,,page-faults:u,1061779330968,100.00,0.199,K/sec 10 | 3596865325450,,cycles:u,1061781549930,100.00,3.388,GHz 11 | 2927543292022,,instructions:u,1061781549930,100.00,0.81,insn per cycle 12 | 109876769944,,branches:u,1061781549930,100.00,103.484,M/sec 13 | 144085595,,branch-misses:u,1061781549930,100.00,0.13,of all branches 14 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/rustc/v2_instr_level_parallelism.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v2_instr_level_parallelism_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 72.79124 3 | 72.80359 4 | 72.75555 5 | 72.76579 6 | 72.758 7 | 72.76572 8 | 72.76398 9 | 72.75889 10 | 72.77058 11 | 72.76354 12 | 72.76237 13 | 72.76588 14 | 72.76522 15 | 948768.67,msec,task-clock:u,948768665763,100.00,1.000,CPUs utilized 16 | 0,,context-switches:u,948768665763,100.00,0.000,K/sec 17 | 0,,cpu-migrations:u,948768665763,100.00,0.000,K/sec 18 | 984601,,page-faults:u,948768665763,100.00,0.001,M/sec 19 | 3210278024825,,cycles:u,948771246094,100.00,3.384,GHz 20 | 6361214685122,,instructions:u,948771246094,100.00,1.98,insn per cycle 21 | 182529632472,,branches:u,948771246094,100.00,192.386,M/sec 22 | 468272029,,branch-misses:u,948771246094,100.00,0.26,of all branches 23 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/rustc/v3_simd.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v3_simd_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 60.03186 3 | 60.07764 4 | 60.08811 5 | 60.06666 6 | 60.05531 7 | 60.06515 8 | 59.95527 9 | 59.83472 10 | 59.84373 11 | 59.8253 12 | 59.79597 13 | 59.9532 14 | 59.76457 15 | 59.78703 16 | 59.8676 17 | 59.78983 18 | 962207.01,msec,task-clock:u,962207012736,100.00,1.000,CPUs utilized 19 | 0,,context-switches:u,962207012736,100.00,0.000,K/sec 20 | 0,,cpu-migrations:u,962207012736,100.00,0.000,K/sec 21 | 1195549,,page-faults:u,962207012736,100.00,0.001,M/sec 22 | 2399750671217,,cycles:u,962211210753,100.00,2.494,GHz 23 | 1789719257602,,instructions:u,962211210753,100.00,0.75,insn per cycle 24 | 117600228809,,branches:u,962211210753,100.00,122.219,M/sec 25 | 576330362,,branch-misses:u,962211210753,100.00,0.49,of all branches 26 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/rustc/v4_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v4_register_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 18.52068 3 | 18.52099 4 | 18.51723 5 | 18.51883 6 | 18.51843 7 | 18.51234 8 | 18.50687 9 | 18.5216 10 | 18.50486 11 | 18.51246 12 | 18.50081 13 | 18.50628 14 | 18.49378 15 | 18.50483 16 | 18.49381 17 | 18.4992 18 | 18.49973 19 | 18.5015 20 | 18.50266 21 | 18.50828 22 | 18.50403 23 | 18.52204 24 | 18.50638 25 | 18.51238 26 | 18.51359 27 | 18.5121 28 | 18.51338 29 | 18.51284 30 | 18.50631 31 | 18.51153 32 | 18.50966 33 | 18.51744 34 | 18.51702 35 | 18.50999 36 | 18.51443 37 | 18.52109 38 | 18.51364 39 | 18.51731 40 | 18.5127 41 | 18.52772 42 | 18.52277 43 | 18.52858 44 | 18.52778 45 | 18.51771 46 | 18.52174 47 | 18.5111 48 | 18.51689 49 | 18.51534 50 | 18.52935 51 | 917319.61,msec,task-clock:u,917319613936,100.00,1.000,CPUs utilized 52 | 0,,context-switches:u,917319613936,100.00,0.000,K/sec 53 | 0,,cpu-migrations:u,917319613936,100.00,0.000,K/sec 54 | 3515906,,page-faults:u,917319613936,100.00,0.004,M/sec 55 | 3092040295364,,cycles:u,917322890150,100.00,3.371,GHz 56 | 4294374436498,,instructions:u,917322890150,100.00,1.39,insn per cycle 57 | 172373005422,,branches:u,917322890150,100.00,187.909,M/sec 58 | 196598967,,branch-misses:u,917322890150,100.00,0.11,of all branches 59 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/rustc/v5_more_register_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v5_more_register_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 10.15037 3 | 10.14882 4 | 10.1489 5 | 10.143 6 | 10.13882 7 | 10.14136 8 | 10.14703 9 | 10.13929 10 | 10.14306 11 | 10.14645 12 | 10.13922 13 | 10.15012 14 | 10.14753 15 | 10.14473 16 | 10.13923 17 | 10.14542 18 | 10.13604 19 | 10.13846 20 | 10.14159 21 | 10.14227 22 | 10.14127 23 | 10.12771 24 | 10.14765 25 | 10.14675 26 | 10.14838 27 | 10.13883 28 | 10.13609 29 | 10.14777 30 | 10.14342 31 | 10.13595 32 | 10.14643 33 | 10.14778 34 | 10.13878 35 | 10.14217 36 | 10.13908 37 | 10.15063 38 | 10.14636 39 | 10.14571 40 | 10.14503 41 | 10.14294 42 | 10.14657 43 | 10.14913 44 | 10.13369 45 | 10.15075 46 | 10.14617 47 | 10.13457 48 | 10.15071 49 | 10.14946 50 | 10.14877 51 | 10.14438 52 | 10.15026 53 | 10.13936 54 | 10.14403 55 | 10.14057 56 | 10.13647 57 | 10.12686 58 | 10.14943 59 | 10.14815 60 | 10.13854 61 | 10.14515 62 | 10.14622 63 | 10.14946 64 | 10.14567 65 | 10.14336 66 | 10.14399 67 | 10.14949 68 | 10.14413 69 | 10.14251 70 | 10.14044 71 | 10.14633 72 | 10.13861 73 | 10.13994 74 | 10.13102 75 | 10.14061 76 | 10.14926 77 | 10.14554 78 | 10.14693 79 | 10.1486 80 | 10.13923 81 | 10.14929 82 | 10.13444 83 | 10.13722 84 | 10.14483 85 | 10.14906 86 | 10.15456 87 | 10.13903 88 | 10.13943 89 | 10.14368 90 | 10.15026 91 | 921202.95,msec,task-clock:u,921202947611,100.00,1.000,CPUs utilized 92 | 0,,context-switches:u,921202947611,100.00,0.000,K/sec 93 | 0,,cpu-migrations:u,921202947611,100.00,0.000,K/sec 94 | 6328465,,page-faults:u,921202947611,100.00,0.007,M/sec 95 | 3092265275246,,cycles:u,921205950003,100.00,3.357,GHz 96 | 8087333819937,,instructions:u,921205950003,100.00,2.62,insn per cycle 97 | 344891922543,,branches:u,921205950003,100.00,374.393,M/sec 98 | 135775051,,branch-misses:u,921205950003,100.00,0.04,of all branches 99 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/rustc/v6_prefetch.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v6_prefetch_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 8.891988 3 | 8.892324 4 | 8.893475 5 | 8.892407 6 | 8.89162 7 | 8.892879 8 | 8.896861 9 | 8.891275 10 | 8.891552 11 | 8.889735 12 | 8.890484 13 | 8.894369 14 | 8.892517 15 | 8.89256 16 | 8.892714 17 | 8.896035 18 | 8.891739 19 | 8.892485 20 | 8.893988 21 | 8.896248 22 | 8.89296 23 | 8.893991 24 | 8.895443 25 | 8.897857 26 | 8.896304 27 | 8.900363 28 | 8.89317 29 | 8.896651 30 | 8.894177 31 | 8.892566 32 | 8.899452 33 | 8.893447 34 | 8.891879 35 | 8.893199 36 | 8.895652 37 | 8.893929 38 | 8.892171 39 | 8.894643 40 | 8.893822 41 | 8.898697 42 | 8.892653 43 | 8.892403 44 | 8.892626 45 | 8.892682 46 | 8.891974 47 | 8.894448 48 | 8.892262 49 | 8.89525 50 | 8.891835 51 | 8.900544 52 | 8.892509 53 | 8.894528 54 | 8.897165 55 | 8.890518 56 | 8.897224 57 | 8.895721 58 | 8.894279 59 | 8.89272 60 | 8.89394 61 | 8.89324 62 | 8.897823 63 | 8.891629 64 | 8.896458 65 | 8.891854 66 | 8.893358 67 | 8.892969 68 | 8.893229 69 | 8.901045 70 | 8.896246 71 | 8.893929 72 | 8.902966 73 | 8.892042 74 | 8.89618 75 | 8.895383 76 | 8.891806 77 | 8.89529 78 | 8.891854 79 | 8.893357 80 | 8.891867 81 | 8.898466 82 | 8.890761 83 | 8.891772 84 | 8.892203 85 | 8.89309 86 | 8.892433 87 | 8.894885 88 | 8.893107 89 | 8.893502 90 | 8.896434 91 | 8.892705 92 | 8.901033 93 | 8.891302 94 | 8.892306 95 | 8.899378 96 | 8.897124 97 | 8.898105 98 | 8.892891 99 | 8.893995 100 | 8.894099 101 | 8.896009 102 | 910322.44,msec,task-clock:u,910322440704,100.00,1.000,CPUs utilized 103 | 0,,context-switches:u,910322440704,100.00,0.000,K/sec 104 | 0,,cpu-migrations:u,910322440704,100.00,0.000,K/sec 105 | 7101922,,page-faults:u,910322440704,100.00,0.008,M/sec 106 | 3052262742320,,cycles:u,910325491746,100.00,3.353,GHz 107 | 9763769256471,,instructions:u,910325491746,100.00,3.20,insn per cycle 108 | 386849419075,,branches:u,910325491746,100.00,424.959,M/sec 109 | 100783578,,branch-misses:u,910325491746,100.00,0.03,of all branches 110 | -------------------------------------------------------------------------------- /reports/i5-8250U/single-thread/rustc/v7_cache_reuse.txt: -------------------------------------------------------------------------------- 1 | benchmarking ./build/bin/v7_cache_reuse_rust with input containing 36000000 elements, stopping after 100 iterations or 900 seconds 2 | 10.96578 3 | 10.93709 4 | 10.92254 5 | 10.93238 6 | 10.9416 7 | 10.94432 8 | 10.94151 9 | 10.93564 10 | 10.94632 11 | 10.94911 12 | 10.94388 13 | 10.95709 14 | 10.95733 15 | 10.9455 16 | 10.95314 17 | 10.931 18 | 10.94347 19 | 10.94797 20 | 10.94135 21 | 10.95814 22 | 10.95101 23 | 10.94943 24 | 10.93761 25 | 10.94821 26 | 10.95455 27 | 10.93846 28 | 10.95776 29 | 10.94355 30 | 10.94644 31 | 10.9431 32 | 10.95401 33 | 10.94246 34 | 10.9532 35 | 10.95312 36 | 10.94724 37 | 10.94635 38 | 10.95246 39 | 10.94287 40 | 10.94768 41 | 10.94671 42 | 10.94041 43 | 10.94969 44 | 10.93632 45 | 10.94507 46 | 10.94304 47 | 10.9455 48 | 10.94277 49 | 10.95643 50 | 10.93966 51 | 10.93443 52 | 10.94173 53 | 10.94841 54 | 10.95738 55 | 10.94697 56 | 10.94715 57 | 10.9311 58 | 10.94599 59 | 10.94148 60 | 10.95403 61 | 10.93452 62 | 10.94628 63 | 10.94325 64 | 10.95208 65 | 10.95012 66 | 10.94811 67 | 10.9529 68 | 10.95202 69 | 10.93258 70 | 10.94122 71 | 10.94815 72 | 10.94824 73 | 10.94335 74 | 10.93947 75 | 10.95663 76 | 10.94032 77 | 10.94736 78 | 10.93982 79 | 10.95874 80 | 10.9408 81 | 10.94686 82 | 10.93413 83 | 10.94737 84 | 10.93784 85 | 925901.19,msec,task-clock:u,925901185946,100.00,1.000,CPUs utilized 86 | 0,,context-switches:u,925901185946,100.00,0.000,K/sec 87 | 0,,cpu-migrations:u,925901185946,100.00,0.000,K/sec 88 | 3047876,,page-faults:u,925901185946,100.00,0.003,M/sec 89 | 3118642837849,,cycles:u,925903553663,100.00,3.368,GHz 90 | 9597997180867,,instructions:u,925903553663,100.00,3.08,insn per cycle 91 | 328765209163,,branches:u,925903553663,100.00,355.076,M/sec 92 | 765217697,,branch-misses:u,925903553663,100.00,0.23,of all branches 93 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(ShortcutBenchmark) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -g -O3 -Wall -Wno-unknown-pragmas -pedantic") 6 | # Check if multithreading should be disabled 7 | if(${SC_NO_MULTI_THREAD}) 8 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNO_MULTI_THREAD=1") 9 | else() 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -DNO_MULTI_THREAD=0") 11 | endif() 12 | 13 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 14 | set(BENCHMARK_SOURCES main/main.cpp main/step_reference.cpp) 15 | 16 | # The Unix module in Rust depends on pthreads 17 | set(THREADS_PREFER_PTHREAD_FLAG ON) 18 | find_package(Threads REQUIRED) 19 | 20 | # Read step implementations from each line into list 21 | file(STRINGS step_implementations.txt STEP_IMPLEMENTATIONS) 22 | 23 | foreach(STEP_IMPL ${STEP_IMPLEMENTATIONS}) 24 | set(CPP_STEP_LIB "${STEP_IMPL}_cpp_lib") 25 | # Compile C++ step implementation into static library 26 | add_library(${CPP_STEP_LIB} STATIC "cpp/${STEP_IMPL}/step.cpp") 27 | # Path to Rust step implementation as a static library; TODO call cargo build from here 28 | set(RUST_STEP_LIB "${CMAKE_BINARY_DIR}/rust_cargo/release/lib${STEP_IMPL}.a") 29 | 30 | # Create C++ benchmark executable 31 | set(CPP_BENCHMARK_BIN "${STEP_IMPL}_cpp") 32 | add_executable(${CPP_BENCHMARK_BIN} ${BENCHMARK_SOURCES}) 33 | target_include_directories(${CPP_BENCHMARK_BIN} PRIVATE main) 34 | target_include_directories(${CPP_STEP_LIB} PRIVATE main) 35 | target_include_directories(${CPP_STEP_LIB} PRIVATE cpp/tools) 36 | target_link_libraries(${CPP_BENCHMARK_BIN} ${CPP_STEP_LIB}) 37 | 38 | # Create Rust benchmark executable 39 | set(RUST_BENCHMARK_BIN "${STEP_IMPL}_rust") 40 | add_executable(${RUST_BENCHMARK_BIN} ${BENCHMARK_SOURCES}) 41 | target_include_directories(${RUST_BENCHMARK_BIN} PRIVATE main) 42 | target_link_libraries(${RUST_BENCHMARK_BIN} 43 | Threads::Threads 44 | ${RUST_STEP_LIB} 45 | ${CMAKE_DL_LIBS}) 46 | endforeach() 47 | -------------------------------------------------------------------------------- /src/cpp/tools/simd.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #ifdef __GNUC__ 7 | typedef float float8_t __attribute__ ((vector_size (8 * sizeof(float)))); 8 | #elif __clang__ 9 | typedef float float8_t __attribute__ ((ext_vector_type(8))); 10 | #else 11 | #error "SIMD helpers currently typedef'd only for Clang and GNU GCC" 12 | #endif 13 | 14 | // Allocate memory for a 256-bit vector of floats and return the pointer 15 | static float8_t* float8_alloc(size_t n) { 16 | void *ptr = 0; 17 | if (posix_memalign(&ptr, sizeof(float8_t), n * sizeof(float8_t))) { 18 | throw std::bad_alloc(); 19 | } 20 | return (float8_t*)ptr; 21 | } 22 | 23 | constexpr float infty = std::numeric_limits::infinity(); 24 | constexpr float8_t f8infty { 25 | infty, infty, infty, infty, 26 | infty, infty, infty, infty 27 | }; 28 | 29 | // Return the value of the smallest element in a float8 vector 30 | inline float hmin8(const float8_t& v) { 31 | float res = infty; 32 | for (int i = 0; i < 8; ++i) { 33 | res = std::min(v[i], res); 34 | } 35 | return res; 36 | } 37 | 38 | // Return a vector of the minimum elements for each pair of elements of two float8 vectors 39 | inline float8_t min8(const float8_t& v, const float8_t& w) { 40 | #ifdef __clang__ 41 | return _mm256_min_ps(v, w); 42 | #else 43 | return v < w ? v : w; 44 | #endif 45 | } 46 | 47 | inline float8_t swap4(float8_t x) { 48 | return _mm256_permute2f128_ps(x, x, 0b00000001); 49 | } 50 | 51 | inline float8_t swap2(float8_t x) { 52 | return _mm256_permute_ps(x, 0b01001110); 53 | } 54 | 55 | inline float8_t swap1(float8_t x) { 56 | return _mm256_permute_ps(x, 0b10110001); 57 | } 58 | -------------------------------------------------------------------------------- /src/cpp/v0_baseline/step.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * From http://ppc.cs.aalto.fi/ch2/v0/ 3 | */ 4 | #include 5 | #include 6 | #include "step.hpp" 7 | 8 | void step(float* r, const float* d, int n) { 9 | #pragma omp parallel for 10 | for (int i = 0; i < n; ++i) { 11 | for (int j = 0; j < n; ++j) { 12 | float v = std::numeric_limits::infinity(); 13 | for (int k = 0; k < n; ++k) { 14 | float x = d[n*i + k]; 15 | float y = d[n*k + j]; 16 | float z = x + y; 17 | v = std::min(v, z); 18 | } 19 | r[n*i + j] = v; 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/cpp/v1_linear_reading/step.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * From http://ppc.cs.aalto.fi/ch2/v1/ 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include "step.hpp" 8 | 9 | void step(float* r, const float* d, int n) { 10 | std::vector t(n*n); 11 | #pragma omp parallel for 12 | for (int i = 0; i < n; ++i) { 13 | for (int j = 0; j < n; ++j) { 14 | t[n*j + i] = d[n*i + j]; 15 | } 16 | } 17 | 18 | #pragma omp parallel for 19 | for (int i = 0; i < n; ++i) { 20 | for (int j = 0; j < n; ++j) { 21 | float v = std::numeric_limits::infinity(); 22 | for (int k = 0; k < n; ++k) { 23 | float x = d[n*i + k]; 24 | float y = t[n*j + k]; 25 | float z = x + y; 26 | v = std::min(v, z); 27 | } 28 | r[n*i + j] = v; 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/cpp/v2_instr_level_parallelism/step.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * From http://ppc.cs.aalto.fi/ch2/v2/ 3 | */ 4 | #include 5 | #include 6 | #include 7 | #include "step.hpp" 8 | 9 | constexpr float infty = std::numeric_limits::infinity(); 10 | 11 | void step(float* r, const float* d_input, int n) { 12 | constexpr int nb = 4; 13 | int na = (n + nb - 1) / nb; 14 | int nab = na*nb; 15 | 16 | std::vector d(n*nab, infty); 17 | std::vector t(n*nab, infty); 18 | 19 | #pragma omp parallel for 20 | for (int i = 0; i < n; ++i) { 21 | for (int j = 0; j < n; ++j) { 22 | d[nab*i + j] = d_input[n*i + j]; 23 | t[nab*i + j] = d_input[n*j + i]; 24 | } 25 | } 26 | 27 | #pragma omp parallel for 28 | for (int i = 0; i < n; ++i) { 29 | for (int j = 0; j < n; ++j) { 30 | float vv[nb]; 31 | for (int kb = 0; kb < nb; ++kb) { 32 | vv[kb] = infty; 33 | } 34 | for (int ka = 0; ka < na; ++ka) { 35 | for (int kb = 0; kb < nb; ++kb) { 36 | float x = d[nab*i + ka * nb + kb]; 37 | float y = t[nab*j + ka * nb + kb]; 38 | float z = x + y; 39 | vv[kb] = std::min(vv[kb], z); 40 | } 41 | } 42 | float v = infty; 43 | for (int kb = 0; kb < nb; ++kb) { 44 | v = std::min(vv[kb], v); 45 | } 46 | r[n*i + j] = v; 47 | } 48 | } 49 | } 50 | 51 | -------------------------------------------------------------------------------- /src/cpp/v3_simd/step.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * From http://ppc.cs.aalto.fi/ch2/v3/ 3 | */ 4 | #include "step.hpp" 5 | #include "simd.hpp" 6 | 7 | void step(float* r, const float* d_, int n) { 8 | constexpr int nb = 8; 9 | int na = (n + nb - 1) / nb; 10 | 11 | float8_t* vd = float8_alloc(n*na); 12 | float8_t* vt = float8_alloc(n*na); 13 | 14 | #pragma omp parallel for 15 | for (int j = 0; j < n; ++j) { 16 | for (int ka = 0; ka < na; ++ka) { 17 | for (int kb = 0; kb < nb; ++kb) { 18 | int i = ka * nb + kb; 19 | vd[na*j + ka][kb] = i < n ? d_[n*j + i] : infty; 20 | vt[na*j + ka][kb] = i < n ? d_[n*i + j] : infty; 21 | } 22 | } 23 | } 24 | 25 | #pragma omp parallel for 26 | for (int i = 0; i < n; ++i) { 27 | for (int j = 0; j < n; ++j) { 28 | float8_t vv = f8infty; 29 | for (int ka = 0; ka < na; ++ka) { 30 | float8_t x = vd[na*i + ka]; 31 | float8_t y = vt[na*j + ka]; 32 | float8_t z = x + y; 33 | vv = min8(vv, z); 34 | } 35 | r[n*i + j] = hmin8(vv); 36 | } 37 | } 38 | 39 | std::free(vt); 40 | std::free(vd); 41 | } 42 | -------------------------------------------------------------------------------- /src/cpp/v5_more_register_reuse/step.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * From http://ppc.cs.aalto.fi/ch2/v5/ 3 | */ 4 | #include "step.hpp" 5 | #include "simd.hpp" 6 | 7 | void step(float* r, const float* d_, int n) { 8 | int na = (n + 8 - 1) / 8; 9 | 10 | float8_t* vd = float8_alloc(na*n); 11 | float8_t* vt = float8_alloc(na*n); 12 | 13 | #pragma omp parallel for 14 | for (int ja = 0; ja < na; ++ja) { 15 | for (int i = 0; i < n; ++i) { 16 | for (int jb = 0; jb < 8; ++jb) { 17 | int j = ja * 8 + jb; 18 | vd[n*ja + i][jb] = j < n ? d_[n*j + i] : infty; 19 | vt[n*ja + i][jb] = j < n ? d_[n*i + j] : infty; 20 | } 21 | } 22 | } 23 | 24 | #pragma omp parallel for 25 | for (int ia = 0; ia < na; ++ia) { 26 | for (int ja = 0; ja < na; ++ja) { 27 | float8_t vv000 = f8infty; 28 | float8_t vv001 = f8infty; 29 | float8_t vv010 = f8infty; 30 | float8_t vv011 = f8infty; 31 | float8_t vv100 = f8infty; 32 | float8_t vv101 = f8infty; 33 | float8_t vv110 = f8infty; 34 | float8_t vv111 = f8infty; 35 | for (int k = 0; k < n; ++k) { 36 | float8_t a000 = vd[n*ia + k]; 37 | float8_t b000 = vt[n*ja + k]; 38 | float8_t a100 = swap4(a000); 39 | float8_t a010 = swap2(a000); 40 | float8_t a110 = swap2(a100); 41 | float8_t b001 = swap1(b000); 42 | vv000 = min8(vv000, a000 + b000); 43 | vv001 = min8(vv001, a000 + b001); 44 | vv010 = min8(vv010, a010 + b000); 45 | vv011 = min8(vv011, a010 + b001); 46 | vv100 = min8(vv100, a100 + b000); 47 | vv101 = min8(vv101, a100 + b001); 48 | vv110 = min8(vv110, a110 + b000); 49 | vv111 = min8(vv111, a110 + b001); 50 | } 51 | float8_t vv[8] = { vv000, vv001, vv010, vv011, vv100, vv101, vv110, vv111 }; 52 | for (int kb = 1; kb < 8; kb += 2) { 53 | vv[kb] = swap1(vv[kb]); 54 | } 55 | for (int jb = 0; jb < 8; ++jb) { 56 | for (int ib = 0; ib < 8; ++ib) { 57 | int i = ib + ia*8; 58 | int j = jb + ja*8; 59 | if (j < n && i < n) { 60 | r[n*i + j] = vv[ib^jb][jb]; 61 | } 62 | } 63 | } 64 | } 65 | } 66 | 67 | std::free(vt); 68 | std::free(vd); 69 | } 70 | -------------------------------------------------------------------------------- /src/cpp/v6_prefetch/step.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * From http://ppc.cs.aalto.fi/ch2/v6/ 3 | */ 4 | #include "step.hpp" 5 | #include "simd.hpp" 6 | 7 | void step(float* r, const float* d_, int n) { 8 | int na = (n + 8 - 1) / 8; 9 | 10 | float8_t* vd = float8_alloc(na*n); 11 | float8_t* vt = float8_alloc(na*n); 12 | 13 | #pragma omp parallel for 14 | for (int ja = 0; ja < na; ++ja) { 15 | for (int i = 0; i < n; ++i) { 16 | for (int jb = 0; jb < 8; ++jb) { 17 | int j = ja * 8 + jb; 18 | vd[n*ja + i][jb] = j < n ? d_[n*j + i] : infty; 19 | vt[n*ja + i][jb] = j < n ? d_[n*i + j] : infty; 20 | } 21 | } 22 | } 23 | 24 | #pragma omp parallel for 25 | for (int ia = 0; ia < na; ++ia) { 26 | for (int ja = 0; ja < na; ++ja) { 27 | float8_t vv000 = f8infty; 28 | float8_t vv001 = f8infty; 29 | float8_t vv010 = f8infty; 30 | float8_t vv011 = f8infty; 31 | float8_t vv100 = f8infty; 32 | float8_t vv101 = f8infty; 33 | float8_t vv110 = f8infty; 34 | float8_t vv111 = f8infty; 35 | for (int k = 0; k < n; ++k) { 36 | constexpr int PF = 20; 37 | __builtin_prefetch(&vd[n*ia + k + PF]); 38 | __builtin_prefetch(&vt[n*ja + k + PF]); 39 | float8_t a000 = vd[n*ia + k]; 40 | float8_t b000 = vt[n*ja + k]; 41 | float8_t a100 = swap4(a000); 42 | float8_t a010 = swap2(a000); 43 | float8_t a110 = swap2(a100); 44 | float8_t b001 = swap1(b000); 45 | vv000 = min8(vv000, a000 + b000); 46 | vv001 = min8(vv001, a000 + b001); 47 | vv010 = min8(vv010, a010 + b000); 48 | vv011 = min8(vv011, a010 + b001); 49 | vv100 = min8(vv100, a100 + b000); 50 | vv101 = min8(vv101, a100 + b001); 51 | vv110 = min8(vv110, a110 + b000); 52 | vv111 = min8(vv111, a110 + b001); 53 | } 54 | float8_t vv[8] = { vv000, vv001, vv010, vv011, vv100, vv101, vv110, vv111 }; 55 | for (int kb = 1; kb < 8; kb += 2) { 56 | vv[kb] = swap1(vv[kb]); 57 | } 58 | for (int jb = 0; jb < 8; ++jb) { 59 | for (int ib = 0; ib < 8; ++ib) { 60 | int i = ib + ia*8; 61 | int j = jb + ja*8; 62 | if (j < n && i < n) { 63 | r[n*i + j] = vv[ib^jb][jb]; 64 | } 65 | } 66 | } 67 | } 68 | } 69 | 70 | std::free(vt); 71 | std::free(vd); 72 | } 73 | -------------------------------------------------------------------------------- /src/main/step.hpp: -------------------------------------------------------------------------------- 1 | #ifndef STEP_HPP 2 | #define STEP_HPP 3 | 4 | // ANCHOR: step 5 | extern "C" { 6 | void step(float*, const float*, int); 7 | } 8 | // ANCHOR_END: step 9 | 10 | #endif // STEP_HPP 11 | -------------------------------------------------------------------------------- /src/main/step_reference.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "step_reference.hpp" 4 | 5 | // Copy paste from v0_baseline 6 | void step_reference(float* r, const float* d, int n) { 7 | #pragma omp parallel for 8 | for (int i = 0; i < n; ++i) { 9 | for (int j = 0; j < n; ++j) { 10 | float v = std::numeric_limits::infinity(); 11 | for (int k = 0; k < n; ++k) { 12 | float x = d[n*i + k]; 13 | float y = d[n*k + j]; 14 | float z = x + y; 15 | v = std::min(v, z); 16 | } 17 | r[n*i + j] = v; 18 | } 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/step_reference.hpp: -------------------------------------------------------------------------------- 1 | #ifndef STEP_REFERENCE_HPP 2 | #define STEP_REFERENCE_HPP 3 | 4 | // Provides a baseline when testing for correctness 5 | void step_reference(float*, const float*, int); 6 | 7 | #endif // STEP_REFERENCE_HPP 8 | -------------------------------------------------------------------------------- /src/rust/tools/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tools" 3 | version = "0.1.0" 4 | 5 | [lib] 6 | name = "tools" 7 | path = "src/lib.rs" 8 | -------------------------------------------------------------------------------- /src/rust/tools/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(core_intrinsics)] 2 | extern crate core; 3 | // For interleaving bits to construct Z-order curve 4 | use core::arch::x86_64::_pdep_u32; 5 | 6 | pub mod simd; 7 | pub mod timer; 8 | 9 | /// Extern C-ABI wrapper for moving data by raw pointers to a Rust 'step'-implementation 10 | #[macro_export] 11 | macro_rules! create_extern_c_wrapper { 12 | ($extern_func:ident, $wrapped_func:ident) => { 13 | #[no_mangle] 14 | pub extern "C" fn $extern_func(r_raw: *mut f32, d_raw: *const f32, n: i32) { 15 | // Catch any unwinding panics so that they won't propagate over the ABI to the calling program, which would be undefined behaviour 16 | let result = std::panic::catch_unwind(|| { 17 | // Wrap raw pointers into 'not unsafe' Rust slices with a well defined size 18 | let d = unsafe { std::slice::from_raw_parts(d_raw, (n * n) as usize) }; 19 | let mut r = unsafe { std::slice::from_raw_parts_mut(r_raw, (n * n) as usize) }; 20 | // Evaluate the wrapped function 21 | $wrapped_func(&mut r, d, n as usize); 22 | }); 23 | // Print an error to stderr if something went horribly wrong 24 | if result.is_err() { 25 | eprintln!("error: rust panicked"); 26 | } 27 | } 28 | }; 29 | } 30 | 31 | // ANCHOR: min 32 | #[inline(always)] 33 | pub fn min(x: f32, y: f32) -> f32 { 34 | if x < y { x } else { y } 35 | } 36 | // ANCHOR_END: min 37 | 38 | // ANCHOR: z_encode 39 | #[inline] 40 | pub fn z_encode(x: u32, y: u32) -> u32 { 41 | let odd_bits = 0x55555555; 42 | let even_bits = 0xAAAAAAAA; 43 | unsafe { _pdep_u32(x, odd_bits) | _pdep_u32(y, even_bits) } 44 | } 45 | // ANCHOR_END: z_encode 46 | -------------------------------------------------------------------------------- /src/rust/tools/src/timer.rs: -------------------------------------------------------------------------------- 1 | use std::time; 2 | use std::vec; 3 | extern crate core; 4 | 5 | pub struct Stopwatch { 6 | instants: vec::Vec, 7 | } 8 | 9 | impl Stopwatch { 10 | pub fn new() -> Stopwatch { 11 | Stopwatch { instants: vec::Vec::new() } 12 | } 13 | 14 | pub fn click(&mut self) { 15 | self.instants.push(time::Instant::now()); 16 | } 17 | 18 | pub fn report(&self) { 19 | self.instants.as_slice().windows(2).enumerate().for_each(|(i, w)| { 20 | let (earlier, later) = (w[0], w[1]); 21 | let elapsed = later.duration_since(earlier); 22 | let elapsed_ms = 1000 * elapsed.as_secs() + elapsed.subsec_millis() as u64; 23 | println!("{}-{} : {} ms", i, i+1, elapsed_ms); 24 | }); 25 | } 26 | } 27 | 28 | // TODO generics or associated types or something 29 | 30 | pub struct CycleCounter { 31 | instants: vec::Vec, 32 | } 33 | 34 | impl CycleCounter { 35 | pub fn new() -> CycleCounter { 36 | CycleCounter { instants: vec::Vec::new() } 37 | } 38 | 39 | pub fn click(&mut self) { 40 | let cpu_timestamp = unsafe { core::arch::x86_64::_rdtsc() }; 41 | self.instants.push(cpu_timestamp); 42 | } 43 | 44 | pub fn report(&self) { 45 | self.instants.as_slice().windows(2).enumerate().for_each(|(i, w)| { 46 | let (earlier, later) = (w[0], w[1]); 47 | let cycles = later - earlier; 48 | println!("{}-{} : {} cycles", i, i+1, cycles); 49 | }); 50 | } 51 | } 52 | 53 | /* 54 | * cargo: 55 | * [dependencies] 56 | * tools = { path = "../tools"} 57 | * 58 | * main.rs: 59 | * extern crate tools; 60 | * use tools::timer; 61 | * 62 | * in function: 63 | * let mut s = timer::CycleCounter::new(); 64 | * s.click(); 65 | * ... do many cycles ... 66 | * s.click(); 67 | * s.report(); 68 | * 69 | */ 70 | -------------------------------------------------------------------------------- /src/rust/v0_baseline/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "v0_baseline" 3 | version = "0.1.0" 4 | edition = "2018" 5 | publish = false 6 | 7 | [dependencies] 8 | rayon = "1.*" 9 | tools = { path = "../tools" } 10 | 11 | [lib] 12 | name = "v0_baseline" 13 | crate-type = ["staticlib"] 14 | 15 | [profile.release] 16 | debug = false 17 | debug-assertions = false 18 | incremental = false 19 | lto = false 20 | opt-level = 3 21 | overflow-checks = false 22 | -------------------------------------------------------------------------------- /src/rust/v0_baseline/src/lib.rs: -------------------------------------------------------------------------------- 1 | use tools::create_extern_c_wrapper; 2 | 3 | #[cfg(not(feature = "no-multi-thread"))] 4 | extern crate rayon; // Data-parallelism library with a work-stealing approach 5 | #[cfg(not(feature = "no-multi-thread"))] 6 | use rayon::prelude::*; // Parallel chunks iterator 7 | 8 | 9 | #[inline] 10 | fn _step(r: &mut [f32], d: &[f32], n: usize) { 11 | // ANCHOR: step_row 12 | // Function: for some row i and every column j in d, 13 | // compute n results into r (r_row) 14 | let step_row = |(i, r_row): (usize, &mut [f32])| { 15 | for (j, res) in r_row.iter_mut().enumerate() { 16 | let mut v = std::f32::INFINITY; 17 | for k in 0..n { 18 | let x = d[n*i + k]; 19 | let y = d[n*k + j]; 20 | let z = x + y; 21 | v = v.min(z); 22 | } 23 | *res = v; 24 | } 25 | }; 26 | // ANCHOR_END: step_row 27 | // Partition r into slices, each containing a single row and apply the function on the rows 28 | // ANCHOR: chunks 29 | #[cfg(not(feature = "no-multi-thread"))] // Process each row as a separate task in parallel 30 | //// ANCHOR: par_chunks 31 | r.par_chunks_mut(n) 32 | .enumerate() 33 | .for_each(step_row); 34 | //// ANCHOR_END: par_chunks 35 | #[cfg(feature = "no-multi-thread")] // Process all rows in the main thread 36 | //// ANCHOR: seq_chunks 37 | //// ANCHOR: seq_chunks_mut 38 | r.chunks_mut(n) 39 | //// ANCHOR_END: seq_chunks_mut 40 | .enumerate() 41 | .for_each(step_row); 42 | //// ANCHOR_END: seq_chunks 43 | // ANCHOR_END: chunks 44 | } 45 | 46 | // ANCHOR: extern_macro_call 47 | create_extern_c_wrapper!(step, _step); 48 | // ANCHOR_END: extern_macro_call 49 | -------------------------------------------------------------------------------- /src/rust/v1_linear_reading/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "v1_linear_reading" 3 | version = "0.1.0" 4 | edition = "2018" 5 | publish = false 6 | 7 | [dependencies] 8 | rayon = "1.*" 9 | tools = { path = "../tools" } 10 | 11 | [lib] 12 | name = "v1_linear_reading" 13 | crate-type = ["staticlib"] 14 | 15 | [profile.release] 16 | debug = false 17 | debug-assertions = false 18 | incremental = false 19 | lto = false 20 | opt-level = 3 21 | overflow-checks = false 22 | -------------------------------------------------------------------------------- /src/rust/v1_linear_reading/src/bad_loop.rs: -------------------------------------------------------------------------------- 1 | #[inline] 2 | fn _step(r: &mut [f32], d: &[f32], n: usize) { 3 | let mut t = vec![0.0; n * n]; 4 | // ANCHOR: step_row 5 | // Function: for some row i in d and all rows t, 6 | // compute n results into row i in r (r_row) 7 | let step_row = |(i, r_row): (usize, &mut [f32])| { 8 | for (j, res) in r_row.iter_mut().enumerate() { 9 | let mut v = std::f32::INFINITY; 10 | // ANCHOR: step_row_inner 11 | for k in 0..n { 12 | let x = d[n*i + k]; 13 | let y = t[n*j + k]; 14 | let z = x + y; 15 | v = v.min(z); 16 | } 17 | // ANCHOR_END: step_row_inner 18 | *res = v; 19 | } 20 | }; 21 | // Partition r into rows containing n elements, 22 | // and apply step_row on all rows in parallel 23 | r.par_chunks_mut(n) 24 | .enumerate() 25 | .for_each(step_row); 26 | // ANCHOR_END: step_row 27 | } 28 | 29 | // ANCHOR: step_row_inner_no_nan 30 | for k in 0..n { 31 | let x = d[n*i + k]; 32 | let y = t[n*j + k]; 33 | let z = x + y; 34 | v = if v < z { v } else { z }; 35 | } 36 | // ANCHOR_END: step_row_inner_no_nan 37 | -------------------------------------------------------------------------------- /src/rust/v1_linear_reading/src/lib.rs: -------------------------------------------------------------------------------- 1 | use tools::{create_extern_c_wrapper, min}; 2 | 3 | #[cfg(not(feature = "no-multi-thread"))] 4 | extern crate rayon; 5 | #[cfg(not(feature = "no-multi-thread"))] 6 | use rayon::prelude::*; 7 | 8 | 9 | #[inline] 10 | fn _step(r: &mut [f32], d: &[f32], n: usize) { 11 | // ANCHOR: transpose 12 | // Transpose of d 13 | let mut t = std::vec![0.0; n * n]; 14 | // Function: for some column j in d, 15 | // copy all elements of that column into row i in t (t_row) 16 | let transpose_column = |(j, t_row): (usize, &mut [f32])| { 17 | for (i, x) in t_row.iter_mut().enumerate() { 18 | *x = d[n*i + j]; 19 | } 20 | }; 21 | // Copy all columns of d into rows of t in parallel 22 | // ANCHOR_END: transpose 23 | #[cfg(not(feature = "no-multi-thread"))] 24 | // ANCHOR: transpose_apply 25 | t.par_chunks_mut(n) 26 | .enumerate() 27 | .for_each(transpose_column); 28 | // ANCHOR_END: transpose_apply 29 | #[cfg(feature = "no-multi-thread")] 30 | t.chunks_mut(n) 31 | .enumerate() 32 | .for_each(transpose_column); 33 | // ANCHOR: step_row 34 | // Function: for some row i in d (d_row) and all rows t (t_rows), 35 | // compute n results into a row in r (r_row) 36 | let step_row = |(r_row, d_row): (&mut [f32], &[f32])| { 37 | let t_rows = t.chunks_exact(n); 38 | // ANCHOR: step_row_inner 39 | for (res, t_row) in r_row.iter_mut().zip(t_rows) { 40 | *res = d_row.iter() 41 | .zip(t_row) 42 | .fold(std::f32::INFINITY, |v, (&x, &y)| min(v, x + y)); 43 | } 44 | // ANCHOR_END: step_row_inner 45 | }; 46 | // Partition r and d into slices, each containing a single row of r and d, 47 | // and apply the function on the row pairs 48 | // ANCHOR_END: step_row 49 | #[cfg(not(feature = "no-multi-thread"))] 50 | // ANCHOR: step_row_apply 51 | r.par_chunks_mut(n) 52 | .zip(d.par_chunks(n)) 53 | .for_each(step_row); 54 | // ANCHOR_END: step_row_apply 55 | #[cfg(feature = "no-multi-thread")] 56 | r.chunks_mut(n) 57 | .zip(d.chunks(n)) 58 | .for_each(step_row); 59 | } 60 | 61 | 62 | create_extern_c_wrapper!(step, _step); 63 | -------------------------------------------------------------------------------- /src/rust/v1_linear_reading/src/not_terrible_loop.rs: -------------------------------------------------------------------------------- 1 | // ANCHOR: step_row 2 | let step_row = |(i, r_row): (usize, &mut [f32])| { 3 | // Get a view of row i of d as a subslice 4 | let d_row = &d[n*i..n*(i+1)]; 5 | for (j, res) in r_row.iter_mut().enumerate() { 6 | // Same for row j in t 7 | let t_row = &t[n*j..n*(j+1)]; 8 | let mut v = std::f32::INFINITY; 9 | for k in 0..n { 10 | let x = d_row[k]; 11 | let y = t_row[k]; 12 | let z = x + y; 13 | v = if v < z { v } else { z }; 14 | } 15 | *res = v; 16 | } 17 | }; 18 | // ANCHOR_END: step_row 19 | // ANCHOR: step_row_inner_iter 20 | for (&x, &y) in d_row.iter().zip(t_row.iter()) { 21 | let z = x + y; 22 | v = if v < z { v } else { z }; 23 | } 24 | // ANCHOR_END: step_row_inner_iter 25 | -------------------------------------------------------------------------------- /src/rust/v2_instr_level_parallelism/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "v2_instr_level_parallelism" 3 | version = "0.1.0" 4 | edition = "2018" 5 | publish = false 6 | 7 | [dependencies] 8 | rayon = "1.*" 9 | tools = { path = "../tools" } 10 | itertools = "0.*" 11 | 12 | [lib] 13 | name = "v2_instr_level_parallelism" 14 | crate-type = ["staticlib"] 15 | 16 | [profile.release] 17 | debug = false 18 | debug-assertions = false 19 | incremental = false 20 | lto = false 21 | opt-level = 3 22 | overflow-checks = false 23 | -------------------------------------------------------------------------------- /src/rust/v2_instr_level_parallelism/src/no_autovec.rs: -------------------------------------------------------------------------------- 1 | // Destructure iterator into tuple 2 | use itertools::Itertools; 3 | 4 | #[inline] 5 | fn _step(r: &mut [f32], d: &[f32], n: usize) { 6 | let step_row = |(r_row, vd_row): (&mut [f32], &[f32])| { 7 | let vt_rows = vt.chunks_exact(n_padded); 8 | for (res, vt_row) in r_row.iter_mut().zip(vt_rows) { 9 | // ANCHOR: inner_loop 10 | let vd_blocks = vd_row.chunks_exact(BLOCK_SIZE); 11 | let vt_blocks = vt_row.chunks_exact(BLOCK_SIZE); 12 | // Encourage the compiler to use different registers for each f32 value 13 | let mut tmp0 = std::f32::INFINITY; 14 | let mut tmp1 = std::f32::INFINITY; 15 | let mut tmp2 = std::f32::INFINITY; 16 | let mut tmp3 = std::f32::INFINITY; 17 | for (vd_block, vt_block) in vd_blocks.zip(vt_blocks) { 18 | let (x0, x1, x2, x3) = vd_block.iter().next_tuple().unwrap(); 19 | let (y0, y1, y2, y3) = vt_block.iter().next_tuple().unwrap(); 20 | tmp0 = min(tmp0, x0 + y0); 21 | tmp1 = min(tmp1, x1 + y1); 22 | tmp2 = min(tmp2, x2 + y2); 23 | tmp3 = min(tmp3, x3 + y3); 24 | } 25 | *res = min(tmp0, min(tmp1, min(tmp2, tmp3))); 26 | // ANCHOR_END: inner_loop 27 | } 28 | }; 29 | } 30 | -------------------------------------------------------------------------------- /src/rust/v3_simd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "v3_simd" 3 | version = "0.1.0" 4 | edition = "2018" 5 | publish = false 6 | 7 | [dependencies] 8 | rayon = "1.*" 9 | tools = { path = "../tools" } 10 | 11 | [lib] 12 | name = "v3_simd" 13 | crate-type = ["staticlib"] 14 | 15 | [profile.release] 16 | debug = false 17 | debug-assertions = false 18 | incremental = false 19 | lto = false 20 | opt-level = 3 21 | overflow-checks = false 22 | -------------------------------------------------------------------------------- /src/rust/v4_register_reuse/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "v4_register_reuse" 3 | version = "0.1.0" 4 | edition = "2018" 5 | publish = false 6 | 7 | [dependencies] 8 | rayon = "1.*" 9 | tools = { path = "../tools" } 10 | itertools = "0.*" 11 | 12 | [lib] 13 | name = "v4_register_reuse" 14 | crate-type = ["staticlib"] 15 | 16 | [profile.release] 17 | debug = false 18 | debug-assertions = false 19 | incremental = false 20 | lto = false 21 | opt-level = 3 22 | overflow-checks = false 23 | -------------------------------------------------------------------------------- /src/rust/v5_more_register_reuse/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "v5_more_register_reuse" 3 | version = "0.1.0" 4 | edition = "2018" 5 | publish = false 6 | 7 | [dependencies] 8 | rayon = "1.*" 9 | tools = { path = "../tools" } 10 | 11 | [lib] 12 | name = "v5_more_register_reuse" 13 | crate-type = ["staticlib"] 14 | 15 | [profile.release] 16 | debug = false 17 | debug-assertions = false 18 | incremental = false 19 | lto = false 20 | opt-level = 3 21 | overflow-checks = false 22 | -------------------------------------------------------------------------------- /src/rust/v6_prefetch/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "v6_prefetch" 3 | version = "0.1.0" 4 | edition = "2018" 5 | publish = false 6 | 7 | [dependencies] 8 | rayon = "1.*" 9 | tools = { path = "../tools" } 10 | 11 | [lib] 12 | name = "v6_prefetch" 13 | crate-type = ["staticlib"] 14 | 15 | [profile.release] 16 | debug = false 17 | debug-assertions = false 18 | incremental = false 19 | lto = false 20 | opt-level = 3 21 | overflow-checks = false 22 | -------------------------------------------------------------------------------- /src/rust/v7_cache_reuse/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "v7_cache_reuse" 3 | version = "0.1.0" 4 | edition = "2018" 5 | publish = false 6 | 7 | [dependencies] 8 | rayon = "1.*" 9 | tools = { path = "../tools" } 10 | 11 | [lib] 12 | name = "v7_cache_reuse" 13 | crate-type = ["staticlib"] 14 | 15 | [profile.release] 16 | debug = false 17 | debug-assertions = false 18 | incremental = false 19 | lto = false 20 | opt-level = 3 21 | overflow-checks = false 22 | -------------------------------------------------------------------------------- /src/step_implementations.txt: -------------------------------------------------------------------------------- 1 | v0_baseline 2 | v1_linear_reading 3 | v2_instr_level_parallelism 4 | v3_simd 5 | v4_register_reuse 6 | v5_more_register_reuse 7 | v6_prefetch 8 | v7_cache_reuse 9 | --------------------------------------------------------------------------------