├── build.gradle ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── plots ├── MatrixMulComparison.csv ├── data │ ├── fmaBench.csv │ ├── fmaScalar.csv │ ├── fmaSpeedup.csv │ ├── fmaVector.csv │ ├── sumScalar.csv │ └── sumVector.csv └── data_post2 │ ├── human_withSuperWord.txt │ ├── human_withoutSuperWord.txt │ ├── results_withSuperWord.txt │ └── results_withoutSuperWord.txt ├── settings.gradle └── src ├── jmh └── java │ └── ch │ └── styp │ ├── BooleanPoolingJMH.java │ ├── FloatMatrixMatrixMultiplication.java │ ├── FmaArrayBenchmark.java │ ├── SumArrayBenchManual.java │ ├── SumArrayBenchNoSuperVectorOpt.java │ └── SumArrayBenchmark.java ├── main ├── java │ └── ch │ │ └── styp │ │ ├── FmaArray.java │ │ ├── GeneratorHelpers.java │ │ ├── MatrixMul.java │ │ ├── SumArray.java │ │ └── TestMain.java └── module-info.java └── test └── java └── ch └── styp ├── MatrixMulTest.java └── SumArrayTest.java /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'java' 3 | id "me.champeau.jmh" version "0.6.6" 4 | } 5 | 6 | group 'ch.styp' 7 | version '1.0-SNAPSHOT' 8 | 9 | repositories { 10 | mavenCentral() 11 | } 12 | 13 | test { 14 | useJUnitPlatform() 15 | } 16 | 17 | dependencies { 18 | testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.1' 19 | testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.8.1' 20 | } 21 | 22 | var incubatorArguments = ["--add-modules", "jdk.incubator.vector"] 23 | tasks.withType(AbstractCompile) { options.compilerArgs += incubatorArguments } 24 | tasks.withType(Test) { jvmArgs += incubatorArguments } 25 | 26 | jmh { 27 | iterations = 5 // Number of measurement iterations to do. 28 | benchmarkMode = ['sample'] // Benchmark mode. Available modes are: [Throughput/thrpt, AverageTime/avgt, SampleTime/sample, SingleShotTime/ss, All/all] 29 | batchSize = 1 // Batch size: number of benchmark method calls per operation. (some benchmark modes can ignore this setting) 30 | fork = 1 // How many times to forks a single benchmark. Use 0 to disable forking altogether 31 | failOnError = true // Should JMH fail immediately if any benchmark had experienced the unrecoverable error? 32 | forceGC = false // Should JMH force GC between iterations? 33 | jvmArgsAppend = incubatorArguments // + " -XX:-UseSuperWord" 34 | humanOutputFile = project.file("${project.buildDir}/reports/jmh/human.txt") // human-readable output file 35 | resultsFile = project.file("${project.buildDir}/reports/jmh/results.txt") // results file 36 | operationsPerInvocation = 10 // Operations per invocation. 37 | benchmarkParameters = [:] // Benchmark parameters. 38 | profilers = [] // Use profilers to collect additional data. Supported profilers: [cl, comp, gc, stack, perf, perfnorm, perfasm, xperf, xperfasm, hs_cl, hs_comp, hs_gc, hs_rt, hs_thr, async] 39 | // profilers = ['perfnorm'] // Use profilers to collect additional data. Supported profilers: [cl, comp, gc, stack, perf, perfnorm, perfasm, xperf, xperfasm, hs_cl, hs_comp, hs_gc, hs_rt, hs_thr, async] 40 | timeOnIteration = '30s' // Time to spend at each measurement iteration. 41 | resultFormat = 'CSV' // Result format type (one of CSV, JSON, NONE, SCSV, TEXT) 42 | synchronizeIterations = false // Synchronize iterations? 43 | //threads = 4 // Number of worker threads to run with. 44 | //threadGroups = [2,3,4] //Override thread group distribution for asymmetric benchmarks. 45 | timeUnit = 'ns' // Output time unit. Available time units are: [m, s, ms, us, ns]. 46 | verbosity = 'NORMAL' // Verbosity mode. Available modes are: [SILENT, NORMAL, EXTRA] 47 | warmup = '1s' // Time to spend at each warmup iteration. 48 | warmupBatchSize = 10 // Warmup batch size: number of benchmark method calls per operation. 49 | warmupForks = 0 // How many warmup forks to make for a single benchmark. 0 to disable warmup forks. 50 | warmupIterations = 1 // Number of warmup iterations to do. 51 | warmupMode = 'INDI' // Warmup mode for warming up selected benchmarks. Warmup modes are: [INDI, BULK, BULK_INDI]. 52 | warmupBenchmarks = ['.*Warmup'] // Warmup benchmarks to include in the run in addition to already selected. JMH will not measure these benchmarks, but only use them for the warmup. 53 | zip64 = true // Use ZIP64 format for bigger archives 54 | jmhVersion = '1.29' // Specifies JMH version 55 | includeTests = false // Allows to include test sources into generate JMH jar, i.e. use it when benchmarks depend on the test classes. 56 | duplicateClassesStrategy = DuplicatesStrategy.EXCLUDE // Strategy to apply when encountring duplicate classes during creation of the fat jar (i.e. while executing jmhJar task) 57 | } -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Styp/java-vbench/5b4c34fbb754520ef020d4d7e7bb724032fd4182/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | # 21 | # Gradle start up script for POSIX generated by Gradle. 22 | # 23 | # Important for running: 24 | # 25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 26 | # noncompliant, but you have some other compliant shell such as ksh or 27 | # bash, then to run this script, type that shell name before the whole 28 | # command line, like: 29 | # 30 | # ksh Gradle 31 | # 32 | # Busybox and similar reduced shells will NOT work, because this script 33 | # requires all of these POSIX shell features: 34 | # * functions; 35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 37 | # * compound commands having a testable exit status, especially «case»; 38 | # * various built-in commands including «command», «set», and «ulimit». 39 | # 40 | # Important for patching: 41 | # 42 | # (2) This script targets any POSIX shell, so it avoids extensions provided 43 | # by Bash, Ksh, etc; in particular arrays are avoided. 44 | # 45 | # The "traditional" practice of packing multiple parameters into a 46 | # space-separated string is a well documented source of bugs and security 47 | # problems, so this is (mostly) avoided, by progressively accumulating 48 | # options in "$@", and eventually passing that to Java. 49 | # 50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 52 | # see the in-line comments for details. 53 | # 54 | # There are tweaks for specific operating systems such as AIX, CygWin, 55 | # Darwin, MinGW, and NonStop. 56 | # 57 | # (3) This script is generated from the Groovy template 58 | # https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 59 | # within the Gradle project. 60 | # 61 | # You can find Gradle at https://github.com/gradle/gradle/. 62 | # 63 | ############################################################################## 64 | 65 | # Attempt to set APP_HOME 66 | 67 | # Resolve links: $0 may be a link 68 | app_path=$0 69 | 70 | # Need this for daisy-chained symlinks. 71 | while 72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 73 | [ -h "$app_path" ] 74 | do 75 | ls=$( ls -ld "$app_path" ) 76 | link=${ls#*' -> '} 77 | case $link in #( 78 | /*) app_path=$link ;; #( 79 | *) app_path=$APP_HOME$link ;; 80 | esac 81 | done 82 | 83 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit 84 | 85 | APP_NAME="Gradle" 86 | APP_BASE_NAME=${0##*/} 87 | 88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 118 | 119 | 120 | # Determine the Java command to use to start the JVM. 121 | if [ -n "$JAVA_HOME" ] ; then 122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 123 | # IBM's JDK on AIX uses strange locations for the executables 124 | JAVACMD=$JAVA_HOME/jre/sh/java 125 | else 126 | JAVACMD=$JAVA_HOME/bin/java 127 | fi 128 | if [ ! -x "$JAVACMD" ] ; then 129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 130 | 131 | Please set the JAVA_HOME variable in your environment to match the 132 | location of your Java installation." 133 | fi 134 | else 135 | JAVACMD=java 136 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 137 | 138 | Please set the JAVA_HOME variable in your environment to match the 139 | location of your Java installation." 140 | fi 141 | 142 | # Increase the maximum file descriptors if we can. 143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 144 | case $MAX_FD in #( 145 | max*) 146 | MAX_FD=$( ulimit -H -n ) || 147 | warn "Could not query maximum file descriptor limit" 148 | esac 149 | case $MAX_FD in #( 150 | '' | soft) :;; #( 151 | *) 152 | ulimit -n "$MAX_FD" || 153 | warn "Could not set maximum file descriptor limit to $MAX_FD" 154 | esac 155 | fi 156 | 157 | # Collect all arguments for the java command, stacking in reverse order: 158 | # * args from the command line 159 | # * the main class name 160 | # * -classpath 161 | # * -D...appname settings 162 | # * --module-path (only if needed) 163 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 164 | 165 | # For Cygwin or MSYS, switch paths to Windows format before running java 166 | if "$cygwin" || "$msys" ; then 167 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 168 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 169 | 170 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 171 | 172 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 173 | for arg do 174 | if 175 | case $arg in #( 176 | -*) false ;; # don't mess with options #( 177 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 178 | [ -e "$t" ] ;; #( 179 | *) false ;; 180 | esac 181 | then 182 | arg=$( cygpath --path --ignore --mixed "$arg" ) 183 | fi 184 | # Roll the args list around exactly as many times as the number of 185 | # args, so each arg winds up back in the position where it started, but 186 | # possibly modified. 187 | # 188 | # NB: a `for` loop captures its iteration list before it begins, so 189 | # changing the positional parameters here affects neither the number of 190 | # iterations, nor the values presented in `arg`. 191 | shift # remove old arg 192 | set -- "$@" "$arg" # push replacement arg 193 | done 194 | fi 195 | 196 | # Collect all arguments for the java command; 197 | # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of 198 | # shell script including quotes and variable substitutions, so put them in 199 | # double quotes to make sure that they get re-expanded; and 200 | # * put everything else in single quotes, so that it's not re-expanded. 201 | 202 | set -- \ 203 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 204 | -classpath "$CLASSPATH" \ 205 | org.gradle.wrapper.GradleWrapperMain \ 206 | "$@" 207 | 208 | # Use "xargs" to parse quoted args. 209 | # 210 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 211 | # 212 | # In Bash we could simply go: 213 | # 214 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 215 | # set -- "${ARGS[@]}" "$@" 216 | # 217 | # but POSIX shell has neither arrays nor command substitution, so instead we 218 | # post-process each arg (as a line of input to sed) to backslash-escape any 219 | # character that might be a shell metacharacter, then use eval to reverse 220 | # that process (while maintaining the separation between arguments), and wrap 221 | # the whole thing up as a single "set" statement. 222 | # 223 | # This will of course break if any of these variables contains a newline or 224 | # an unmatched quote. 225 | # 226 | 227 | eval "set -- $( 228 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 229 | xargs -n1 | 230 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 231 | tr '\n' ' ' 232 | )" '"$@"' 233 | 234 | exec "$JAVACMD" "$@" 235 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if "%ERRORLEVEL%"=="0" goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 84 | exit /b 1 85 | 86 | :mainEnd 87 | if "%OS%"=="Windows_NT" endlocal 88 | 89 | :omega 90 | -------------------------------------------------------------------------------- /plots/MatrixMulComparison.csv: -------------------------------------------------------------------------------- 1 | size,simpleMatrix,vectorMatrix,SpeedUp 2 | 64,11058,1733,6.38 3 | 128,81768,12929,6.32 4 | 256,1435921,102085,14.0 5 | 512,11449915,900619,12.71 6 | 1024,98568527,8745681,11.27 7 | -------------------------------------------------------------------------------- /plots/data/fmaBench.csv: -------------------------------------------------------------------------------- 1 | size,scalar,vector,speedup,scalar_per_element,vector_per_element 2 | 15,3.813,3.651,1.044371405,0.2542,0.2434 3 | 255,29.196,7.661,3.810990732,0.114494118,0.030043137 4 | 4095,430.907,34.814,12.37740564,0.105227595,0.008501587 5 | 65535,6842.321,654.758,10.45015258,0.104407126,0.009990967 6 | 1048575,109483.73,31302.406,3.497613889,0.104411921,0.029852329 7 | 16777215,1844256.801,799812.011,2.305862847,0.109926278,0.047672514 8 | 268435455,29633085.44,14556856.32,2.035678912,0.110391846,0.054228516 -------------------------------------------------------------------------------- /plots/data/fmaScalar.csv: -------------------------------------------------------------------------------- 1 | size, ops_sec 2 | 15,906293.240 3 | 255,39762.945 4 | 4095,2276.469 5 | 65535,141.664 6 | 1048575,8.843 7 | 16777215,0.531 8 | 268435455,0.033 9 | -------------------------------------------------------------------------------- /plots/data/fmaSpeedup.csv: -------------------------------------------------------------------------------- 1 | size, ops_sec 2 | 15, 1.143 3 | 255, 6.714 4 | 4095, 14.24 5 | 65535, 10.75 6 | 1048575, 3.53 7 | 16777215, 2.33 8 | 268435455, 2 9 | -------------------------------------------------------------------------------- /plots/data/fmaVector.csv: -------------------------------------------------------------------------------- 1 | size, ops_sec 2 | 15,792869.865 3 | 255,266949.446 4 | 4095,32428.567 5 | 65535,1523.130 6 | 1048575,31.248 7 | 16777215,1.241 8 | 268435455,0.066 9 | -------------------------------------------------------------------------------- /plots/data/sumScalar.csv: -------------------------------------------------------------------------------- 1 | size, ops_sec 2 | 15,660843.630 3 | 255,122504.912 4 | 4095,6076.249 5 | 65535,387.500 6 | 1048575,14.238 7 | 16777215,0.588 8 | 268435455,0.036 -------------------------------------------------------------------------------- /plots/data/sumVector.csv: -------------------------------------------------------------------------------- 1 | size, ops_sec 2 | 15,683927.065 3 | 255,125584.998 4 | 4095,6080.694 5 | 65535,380.024 6 | 1048575,14.503 7 | 16777215,0.612 8 | 268435455,0.035 -------------------------------------------------------------------------------- /plots/data_post2/human_withoutSuperWord.txt: -------------------------------------------------------------------------------- 1 | # JMH version: 1.29 2 | # VM version: JDK 17.0.2, OpenJDK 64-Bit Server VM, 17.0.2+8-86 3 | # VM invoker: /home/styp/.jdks/openjdk-17.0.2/bin/java 4 | # VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/home/styp/Desktop/java-vbench/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant --add-modules jdk.incubator.vector -XX:-UseSuperWord 5 | # Blackhole mode: full + dont-inline hint 6 | # Warmup: 1 iterations, 1 s each, 10 calls per op 7 | # Measurement: 25 iterations, 10 s each 8 | # Timeout: 10 min per iteration 9 | # Threads: 1 thread, ***WARNING: Synchronize iterations are disabled!*** 10 | # Benchmark mode: Sampling time 11 | # Benchmark: ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 12 | # Parameters: (LENGTH = 15) 13 | 14 | # Run progress: 0.00% complete, ETA 00:29:17 15 | # Fork: 1 of 1 16 | WARNING: Using incubator modules: jdk.incubator.vector 17 | # Warmup Iteration 1: 68.293 ±(99.9%) 128.870 ns/op 18 | Iteration 1: 3.669 ±(99.9%) 0.150 ns/op 19 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 20 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 21 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 22 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 23 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 24 | arraySumScalarNoOpt·p0.999: 18.000 ns/op 25 | arraySumScalarNoOpt·p0.9999: 1050.538 ns/op 26 | arraySumScalarNoOpt·p1.00: 8496.000 ns/op 27 | 28 | Iteration 2: 3.741 ±(99.9%) 0.106 ns/op 29 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 30 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 31 | arraySumScalarNoOpt·p0.90: 5.000 ns/op 32 | arraySumScalarNoOpt·p0.95: 5.000 ns/op 33 | arraySumScalarNoOpt·p0.99: 6.000 ns/op 34 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 35 | arraySumScalarNoOpt·p0.9999: 1061.638 ns/op 36 | arraySumScalarNoOpt·p1.00: 1642.000 ns/op 37 | 38 | Iteration 3: 3.550 ±(99.9%) 0.110 ns/op 39 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 40 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 41 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 42 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 43 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 44 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 45 | arraySumScalarNoOpt·p0.9999: 1063.799 ns/op 46 | arraySumScalarNoOpt·p1.00: 1628.000 ns/op 47 | 48 | Iteration 4: 3.677 ±(99.9%) 0.096 ns/op 49 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 50 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 51 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 52 | arraySumScalarNoOpt·p0.95: 5.000 ns/op 53 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 54 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 55 | arraySumScalarNoOpt·p0.9999: 1035.327 ns/op 56 | arraySumScalarNoOpt·p1.00: 1632.000 ns/op 57 | 58 | Iteration 5: 3.514 ±(99.9%) 0.106 ns/op 59 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 60 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 61 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 62 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 63 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 64 | arraySumScalarNoOpt·p0.999: 16.000 ns/op 65 | arraySumScalarNoOpt·p0.9999: 1061.707 ns/op 66 | arraySumScalarNoOpt·p1.00: 1646.000 ns/op 67 | 68 | Iteration 6: 3.552 ±(99.9%) 0.104 ns/op 69 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 70 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 71 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 72 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 73 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 74 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 75 | arraySumScalarNoOpt·p0.9999: 1045.711 ns/op 76 | arraySumScalarNoOpt·p1.00: 1632.000 ns/op 77 | 78 | Iteration 7: 3.686 ±(99.9%) 0.104 ns/op 79 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 80 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 81 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 82 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 83 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 84 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 85 | arraySumScalarNoOpt·p0.9999: 1039.837 ns/op 86 | arraySumScalarNoOpt·p1.00: 1610.000 ns/op 87 | 88 | Iteration 8: 4.260 ±(99.9%) 2.191 ns/op 89 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 90 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 91 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 92 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 93 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 94 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 95 | arraySumScalarNoOpt·p0.9999: 1045.650 ns/op 96 | arraySumScalarNoOpt·p1.00: 174080.000 ns/op 97 | 98 | Iteration 9: 3.580 ±(99.9%) 0.095 ns/op 99 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 100 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 101 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 102 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 103 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 104 | arraySumScalarNoOpt·p0.999: 16.000 ns/op 105 | arraySumScalarNoOpt·p0.9999: 1045.246 ns/op 106 | arraySumScalarNoOpt·p1.00: 1620.000 ns/op 107 | 108 | Iteration 10: 3.550 ±(99.9%) 0.101 ns/op 109 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 110 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 111 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 112 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 113 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 114 | arraySumScalarNoOpt·p0.999: 16.395 ns/op 115 | arraySumScalarNoOpt·p0.9999: 1051.679 ns/op 116 | arraySumScalarNoOpt·p1.00: 1942.000 ns/op 117 | 118 | Iteration 11: 3.554 ±(99.9%) 0.113 ns/op 119 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 120 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 121 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 122 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 123 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 124 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 125 | arraySumScalarNoOpt·p0.9999: 1068.945 ns/op 126 | arraySumScalarNoOpt·p1.00: 1630.000 ns/op 127 | 128 | Iteration 12: 3.489 ±(99.9%) 0.107 ns/op 129 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 130 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 131 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 132 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 133 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 134 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 135 | arraySumScalarNoOpt·p0.9999: 1057.693 ns/op 136 | arraySumScalarNoOpt·p1.00: 1632.000 ns/op 137 | 138 | Iteration 13: 3.512 ±(99.9%) 0.109 ns/op 139 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 140 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 141 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 142 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 143 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 144 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 145 | arraySumScalarNoOpt·p0.9999: 1057.678 ns/op 146 | arraySumScalarNoOpt·p1.00: 1638.000 ns/op 147 | 148 | Iteration 14: 3.735 ±(99.9%) 0.100 ns/op 149 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 150 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 151 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 152 | arraySumScalarNoOpt·p0.95: 5.000 ns/op 153 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 154 | arraySumScalarNoOpt·p0.999: 16.000 ns/op 155 | arraySumScalarNoOpt·p0.9999: 1051.730 ns/op 156 | arraySumScalarNoOpt·p1.00: 1626.000 ns/op 157 | 158 | Iteration 15: 3.506 ±(99.9%) 0.091 ns/op 159 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 160 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 161 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 162 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 163 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 164 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 165 | arraySumScalarNoOpt·p0.9999: 964.003 ns/op 166 | arraySumScalarNoOpt·p1.00: 1628.000 ns/op 167 | 168 | Iteration 16: 3.633 ±(99.9%) 0.100 ns/op 169 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 170 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 171 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 172 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 173 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 174 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 175 | arraySumScalarNoOpt·p0.9999: 1047.811 ns/op 176 | arraySumScalarNoOpt·p1.00: 1622.000 ns/op 177 | 178 | Iteration 17: 3.852 ±(99.9%) 0.101 ns/op 179 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 180 | arraySumScalarNoOpt·p0.50: 4.000 ns/op 181 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 182 | arraySumScalarNoOpt·p0.95: 5.000 ns/op 183 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 184 | arraySumScalarNoOpt·p0.999: 18.000 ns/op 185 | arraySumScalarNoOpt·p0.9999: 1055.596 ns/op 186 | arraySumScalarNoOpt·p1.00: 1634.000 ns/op 187 | 188 | Iteration 18: 3.619 ±(99.9%) 0.101 ns/op 189 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 190 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 191 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 192 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 193 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 194 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 195 | arraySumScalarNoOpt·p0.9999: 1055.790 ns/op 196 | arraySumScalarNoOpt·p1.00: 1642.000 ns/op 197 | 198 | Iteration 19: 3.693 ±(99.9%) 0.112 ns/op 199 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 200 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 201 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 202 | arraySumScalarNoOpt·p0.95: 5.000 ns/op 203 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 204 | arraySumScalarNoOpt·p0.999: 18.000 ns/op 205 | arraySumScalarNoOpt·p0.9999: 1061.821 ns/op 206 | arraySumScalarNoOpt·p1.00: 2006.000 ns/op 207 | 208 | Iteration 20: 3.720 ±(99.9%) 0.105 ns/op 209 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 210 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 211 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 212 | arraySumScalarNoOpt·p0.95: 5.000 ns/op 213 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 214 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 215 | arraySumScalarNoOpt·p0.9999: 1059.819 ns/op 216 | arraySumScalarNoOpt·p1.00: 1642.000 ns/op 217 | 218 | Iteration 21: 3.730 ±(99.9%) 0.103 ns/op 219 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 220 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 221 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 222 | arraySumScalarNoOpt·p0.95: 5.000 ns/op 223 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 224 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 225 | arraySumScalarNoOpt·p0.9999: 1057.749 ns/op 226 | arraySumScalarNoOpt·p1.00: 1632.000 ns/op 227 | 228 | Iteration 22: 3.735 ±(99.9%) 0.111 ns/op 229 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 230 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 231 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 232 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 233 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 234 | arraySumScalarNoOpt·p0.999: 18.000 ns/op 235 | arraySumScalarNoOpt·p0.9999: 1063.740 ns/op 236 | arraySumScalarNoOpt·p1.00: 1644.000 ns/op 237 | 238 | Iteration 23: 3.595 ±(99.9%) 0.103 ns/op 239 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 240 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 241 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 242 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 243 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 244 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 245 | arraySumScalarNoOpt·p0.9999: 1057.697 ns/op 246 | arraySumScalarNoOpt·p1.00: 1632.000 ns/op 247 | 248 | Iteration 24: 3.804 ±(99.9%) 0.100 ns/op 249 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 250 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 251 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 252 | arraySumScalarNoOpt·p0.95: 5.000 ns/op 253 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 254 | arraySumScalarNoOpt·p0.999: 17.000 ns/op 255 | arraySumScalarNoOpt·p0.9999: 1059.767 ns/op 256 | arraySumScalarNoOpt·p1.00: 1636.000 ns/op 257 | 258 | Iteration 25: 3.539 ±(99.9%) 0.108 ns/op 259 | arraySumScalarNoOpt·p0.00: 3.000 ns/op 260 | arraySumScalarNoOpt·p0.50: 3.000 ns/op 261 | arraySumScalarNoOpt·p0.90: 4.000 ns/op 262 | arraySumScalarNoOpt·p0.95: 4.000 ns/op 263 | arraySumScalarNoOpt·p0.99: 5.000 ns/op 264 | arraySumScalarNoOpt·p0.999: 18.000 ns/op 265 | arraySumScalarNoOpt·p0.9999: 1061.768 ns/op 266 | arraySumScalarNoOpt·p1.00: 1630.000 ns/op 267 | 268 | 269 | 270 | Result "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt": 271 | N = 6527740 272 | mean = 3.660 ±(99.9%) 0.090 ns/op 273 | 274 | Histogram, ns/op: 275 | [ 0.000, 12500.000) = 6527739 276 | [ 12500.000, 25000.000) = 0 277 | [ 25000.000, 37500.000) = 0 278 | [ 37500.000, 50000.000) = 0 279 | [ 50000.000, 62500.000) = 0 280 | [ 62500.000, 75000.000) = 0 281 | [ 75000.000, 87500.000) = 0 282 | [ 87500.000, 100000.000) = 0 283 | [100000.000, 112500.000) = 0 284 | [112500.000, 125000.000) = 0 285 | [125000.000, 137500.000) = 0 286 | [137500.000, 150000.000) = 0 287 | [150000.000, 162500.000) = 0 288 | [162500.000, 175000.000) = 1 289 | [175000.000, 187500.000) = 0 290 | 291 | Percentiles, ns/op: 292 | p(0.0000) = 3.000 ns/op 293 | p(50.0000) = 3.000 ns/op 294 | p(90.0000) = 4.000 ns/op 295 | p(95.0000) = 5.000 ns/op 296 | p(99.0000) = 5.000 ns/op 297 | p(99.9000) = 17.000 ns/op 298 | p(99.9900) = 1054.000 ns/op 299 | p(99.9990) = 1620.000 ns/op 300 | p(99.9999) = 1809.009 ns/op 301 | p(100.0000) = 174080.000 ns/op 302 | 303 | 304 | # JMH version: 1.29 305 | # VM version: JDK 17.0.2, OpenJDK 64-Bit Server VM, 17.0.2+8-86 306 | # VM invoker: /home/styp/.jdks/openjdk-17.0.2/bin/java 307 | # VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/home/styp/Desktop/java-vbench/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant --add-modules jdk.incubator.vector -XX:-UseSuperWord 308 | # Blackhole mode: full + dont-inline hint 309 | # Warmup: 1 iterations, 1 s each, 10 calls per op 310 | # Measurement: 25 iterations, 10 s each 311 | # Timeout: 10 min per iteration 312 | # Threads: 1 thread, ***WARNING: Synchronize iterations are disabled!*** 313 | # Benchmark mode: Sampling time 314 | # Benchmark: ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 315 | # Parameters: (LENGTH = 255) 316 | 317 | # Run progress: 14.29% complete, ETA 00:25:08 318 | # Fork: 1 of 1 319 | WARNING: Using incubator modules: jdk.incubator.vector 320 | # Warmup Iteration 1: 312.880 ±(99.9%) 163.057 ns/op 321 | Iteration 1: 15.852 ±(99.9%) 0.209 ns/op 322 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 323 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 324 | arraySumScalarNoOpt·p0.90: 18.000 ns/op 325 | arraySumScalarNoOpt·p0.95: 21.000 ns/op 326 | arraySumScalarNoOpt·p0.99: 24.000 ns/op 327 | arraySumScalarNoOpt·p0.999: 325.217 ns/op 328 | arraySumScalarNoOpt·p0.9999: 1636.487 ns/op 329 | arraySumScalarNoOpt·p1.00: 4792.000 ns/op 330 | 331 | Iteration 2: 15.657 ±(99.9%) 2.262 ns/op 332 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 333 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 334 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 335 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 336 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 337 | arraySumScalarNoOpt·p0.999: 130.000 ns/op 338 | arraySumScalarNoOpt·p0.9999: 1585.815 ns/op 339 | arraySumScalarNoOpt·p1.00: 192768.000 ns/op 340 | 341 | Iteration 3: 15.559 ±(99.9%) 2.077 ns/op 342 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 343 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 344 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 345 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 346 | arraySumScalarNoOpt·p0.99: 25.000 ns/op 347 | arraySumScalarNoOpt·p0.999: 113.792 ns/op 348 | arraySumScalarNoOpt·p0.9999: 1579.517 ns/op 349 | arraySumScalarNoOpt·p1.00: 177152.000 ns/op 350 | 351 | Iteration 4: 17.750 ±(99.9%) 4.570 ns/op 352 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 353 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 354 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 355 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 356 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 357 | arraySumScalarNoOpt·p0.999: 121.049 ns/op 358 | arraySumScalarNoOpt·p0.9999: 1589.810 ns/op 359 | arraySumScalarNoOpt·p1.00: 205056.000 ns/op 360 | 361 | Iteration 5: 15.080 ±(99.9%) 0.159 ns/op 362 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 363 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 364 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 365 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 366 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 367 | arraySumScalarNoOpt·p0.999: 158.514 ns/op 368 | arraySumScalarNoOpt·p0.9999: 1587.851 ns/op 369 | arraySumScalarNoOpt·p1.00: 3176.000 ns/op 370 | 371 | Iteration 6: 15.627 ±(99.9%) 1.966 ns/op 372 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 373 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 374 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 375 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 376 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 377 | arraySumScalarNoOpt·p0.999: 125.004 ns/op 378 | arraySumScalarNoOpt·p0.9999: 1579.402 ns/op 379 | arraySumScalarNoOpt·p1.00: 167424.000 ns/op 380 | 381 | Iteration 7: 14.998 ±(99.9%) 0.147 ns/op 382 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 383 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 384 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 385 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 386 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 387 | arraySumScalarNoOpt·p0.999: 124.041 ns/op 388 | arraySumScalarNoOpt·p0.9999: 1585.808 ns/op 389 | arraySumScalarNoOpt·p1.00: 1652.000 ns/op 390 | 391 | Iteration 8: 15.507 ±(99.9%) 1.890 ns/op 392 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 393 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 394 | arraySumScalarNoOpt·p0.90: 16.000 ns/op 395 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 396 | arraySumScalarNoOpt·p0.99: 24.000 ns/op 397 | arraySumScalarNoOpt·p0.999: 112.902 ns/op 398 | arraySumScalarNoOpt·p0.9999: 1575.780 ns/op 399 | arraySumScalarNoOpt·p1.00: 161024.000 ns/op 400 | 401 | Iteration 9: 18.928 ±(99.9%) 5.347 ns/op 402 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 403 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 404 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 405 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 406 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 407 | arraySumScalarNoOpt·p0.999: 123.445 ns/op 408 | arraySumScalarNoOpt·p0.9999: 1589.689 ns/op 409 | arraySumScalarNoOpt·p1.00: 196608.000 ns/op 410 | 411 | Iteration 10: 14.965 ±(99.9%) 0.142 ns/op 412 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 413 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 414 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 415 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 416 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 417 | arraySumScalarNoOpt·p0.999: 113.334 ns/op 418 | arraySumScalarNoOpt·p0.9999: 1573.867 ns/op 419 | arraySumScalarNoOpt·p1.00: 1644.000 ns/op 420 | 421 | Iteration 11: 15.003 ±(99.9%) 0.149 ns/op 422 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 423 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 424 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 425 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 426 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 427 | arraySumScalarNoOpt·p0.999: 132.022 ns/op 428 | arraySumScalarNoOpt·p0.9999: 1573.804 ns/op 429 | arraySumScalarNoOpt·p1.00: 1822.000 ns/op 430 | 431 | Iteration 12: 16.246 ±(99.9%) 2.932 ns/op 432 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 433 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 434 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 435 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 436 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 437 | arraySumScalarNoOpt·p0.999: 121.080 ns/op 438 | arraySumScalarNoOpt·p0.9999: 1587.816 ns/op 439 | arraySumScalarNoOpt·p1.00: 178432.000 ns/op 440 | 441 | Iteration 13: 14.992 ±(99.9%) 0.146 ns/op 442 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 443 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 444 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 445 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 446 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 447 | arraySumScalarNoOpt·p0.999: 121.167 ns/op 448 | arraySumScalarNoOpt·p0.9999: 1573.833 ns/op 449 | arraySumScalarNoOpt·p1.00: 1654.000 ns/op 450 | 451 | Iteration 14: 17.627 ±(99.9%) 4.344 ns/op 452 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 453 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 454 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 455 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 456 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 457 | arraySumScalarNoOpt·p0.999: 135.923 ns/op 458 | arraySumScalarNoOpt·p0.9999: 1583.569 ns/op 459 | arraySumScalarNoOpt·p1.00: 199424.000 ns/op 460 | 461 | Iteration 15: 15.016 ±(99.9%) 0.147 ns/op 462 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 463 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 464 | arraySumScalarNoOpt·p0.90: 16.000 ns/op 465 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 466 | arraySumScalarNoOpt·p0.99: 25.000 ns/op 467 | arraySumScalarNoOpt·p0.999: 138.163 ns/op 468 | arraySumScalarNoOpt·p0.9999: 1573.833 ns/op 469 | arraySumScalarNoOpt·p1.00: 1838.000 ns/op 470 | 471 | Iteration 16: 15.579 ±(99.9%) 2.034 ns/op 472 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 473 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 474 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 475 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 476 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 477 | arraySumScalarNoOpt·p0.999: 124.000 ns/op 478 | arraySumScalarNoOpt·p0.9999: 1573.782 ns/op 479 | arraySumScalarNoOpt·p1.00: 173312.000 ns/op 480 | 481 | Iteration 17: 15.610 ±(99.9%) 2.054 ns/op 482 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 483 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 484 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 485 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 486 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 487 | arraySumScalarNoOpt·p0.999: 126.779 ns/op 488 | arraySumScalarNoOpt·p0.9999: 1579.756 ns/op 489 | arraySumScalarNoOpt·p1.00: 175104.000 ns/op 490 | 491 | Iteration 18: 14.923 ±(99.9%) 0.135 ns/op 492 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 493 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 494 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 495 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 496 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 497 | arraySumScalarNoOpt·p0.999: 114.498 ns/op 498 | arraySumScalarNoOpt·p0.9999: 1565.099 ns/op 499 | arraySumScalarNoOpt·p1.00: 2528.000 ns/op 500 | 501 | Iteration 19: 16.336 ±(99.9%) 3.083 ns/op 502 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 503 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 504 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 505 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 506 | arraySumScalarNoOpt·p0.99: 24.000 ns/op 507 | arraySumScalarNoOpt·p0.999: 125.406 ns/op 508 | arraySumScalarNoOpt·p0.9999: 1579.881 ns/op 509 | arraySumScalarNoOpt·p1.00: 196352.000 ns/op 510 | 511 | Iteration 20: 15.569 ±(99.9%) 1.914 ns/op 512 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 513 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 514 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 515 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 516 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 517 | arraySumScalarNoOpt·p0.999: 135.245 ns/op 518 | arraySumScalarNoOpt·p0.9999: 1581.649 ns/op 519 | arraySumScalarNoOpt·p1.00: 163328.000 ns/op 520 | 521 | Iteration 21: 15.587 ±(99.9%) 1.856 ns/op 522 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 523 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 524 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 525 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 526 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 527 | arraySumScalarNoOpt·p0.999: 119.501 ns/op 528 | arraySumScalarNoOpt·p0.9999: 1575.900 ns/op 529 | arraySumScalarNoOpt·p1.00: 157696.000 ns/op 530 | 531 | Iteration 22: 14.957 ±(99.9%) 0.134 ns/op 532 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 533 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 534 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 535 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 536 | arraySumScalarNoOpt·p0.99: 24.000 ns/op 537 | arraySumScalarNoOpt·p0.999: 109.690 ns/op 538 | arraySumScalarNoOpt·p0.9999: 1569.876 ns/op 539 | arraySumScalarNoOpt·p1.00: 1770.000 ns/op 540 | 541 | Iteration 23: 14.984 ±(99.9%) 0.142 ns/op 542 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 543 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 544 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 545 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 546 | arraySumScalarNoOpt·p0.99: 24.000 ns/op 547 | arraySumScalarNoOpt·p0.999: 121.704 ns/op 548 | arraySumScalarNoOpt·p0.9999: 1575.941 ns/op 549 | arraySumScalarNoOpt·p1.00: 1950.000 ns/op 550 | 551 | Iteration 24: 15.586 ±(99.9%) 2.013 ns/op 552 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 553 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 554 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 555 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 556 | arraySumScalarNoOpt·p0.99: 24.000 ns/op 557 | arraySumScalarNoOpt·p0.999: 112.000 ns/op 558 | arraySumScalarNoOpt·p0.9999: 1573.882 ns/op 559 | arraySumScalarNoOpt·p1.00: 171264.000 ns/op 560 | 561 | Iteration 25: 14.967 ±(99.9%) 0.141 ns/op 562 | arraySumScalarNoOpt·p0.00: 12.000 ns/op 563 | arraySumScalarNoOpt·p0.50: 14.000 ns/op 564 | arraySumScalarNoOpt·p0.90: 17.000 ns/op 565 | arraySumScalarNoOpt·p0.95: 20.000 ns/op 566 | arraySumScalarNoOpt·p0.99: 23.000 ns/op 567 | arraySumScalarNoOpt·p0.999: 111.358 ns/op 568 | arraySumScalarNoOpt·p0.9999: 1563.872 ns/op 569 | arraySumScalarNoOpt·p1.00: 1950.000 ns/op 570 | 571 | 572 | 573 | Result "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt": 574 | N = 7021000 575 | mean = 15.717 ±(99.9%) 0.444 ns/op 576 | 577 | Histogram, ns/op: 578 | [ 0.000, 25000.000) = 7020973 579 | [ 25000.000, 50000.000) = 0 580 | [ 50000.000, 75000.000) = 0 581 | [ 75000.000, 100000.000) = 0 582 | [100000.000, 125000.000) = 0 583 | [125000.000, 150000.000) = 0 584 | [150000.000, 175000.000) = 8 585 | [175000.000, 200000.000) = 18 586 | [200000.000, 225000.000) = 1 587 | [225000.000, 250000.000) = 0 588 | [250000.000, 275000.000) = 0 589 | 590 | Percentiles, ns/op: 591 | p(0.0000) = 12.000 ns/op 592 | p(50.0000) = 14.000 ns/op 593 | p(90.0000) = 17.000 ns/op 594 | p(95.0000) = 20.000 ns/op 595 | p(99.0000) = 23.000 ns/op 596 | p(99.9000) = 130.000 ns/op 597 | p(99.9900) = 1580.000 ns/op 598 | p(99.9990) = 1676.740 ns/op 599 | p(99.9999) = 191412.732 ns/op 600 | p(100.0000) = 205056.000 ns/op 601 | 602 | 603 | # JMH version: 1.29 604 | # VM version: JDK 17.0.2, OpenJDK 64-Bit Server VM, 17.0.2+8-86 605 | # VM invoker: /home/styp/.jdks/openjdk-17.0.2/bin/java 606 | # VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/home/styp/Desktop/java-vbench/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant --add-modules jdk.incubator.vector -XX:-UseSuperWord 607 | # Blackhole mode: full + dont-inline hint 608 | # Warmup: 1 iterations, 1 s each, 10 calls per op 609 | # Measurement: 25 iterations, 10 s each 610 | # Timeout: 10 min per iteration 611 | # Threads: 1 thread, ***WARNING: Synchronize iterations are disabled!*** 612 | # Benchmark mode: Sampling time 613 | # Benchmark: ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 614 | # Parameters: (LENGTH = 4095) 615 | 616 | # Run progress: 28.57% complete, ETA 00:20:57 617 | # Fork: 1 of 1 618 | WARNING: Using incubator modules: jdk.incubator.vector 619 | # Warmup Iteration 1: 4189.468 ±(99.9%) 468.385 ns/op 620 | Iteration 1: 286.168 ±(99.9%) 8.568 ns/op 621 | arraySumScalarNoOpt·p0.00: 228.000 ns/op 622 | arraySumScalarNoOpt·p0.50: 275.000 ns/op 623 | arraySumScalarNoOpt·p0.90: 295.000 ns/op 624 | arraySumScalarNoOpt·p0.95: 304.000 ns/op 625 | arraySumScalarNoOpt·p0.99: 349.000 ns/op 626 | arraySumScalarNoOpt·p0.999: 1628.276 ns/op 627 | arraySumScalarNoOpt·p0.9999: 2119.628 ns/op 628 | arraySumScalarNoOpt·p1.00: 342528.000 ns/op 629 | 630 | Iteration 2: 280.922 ±(99.9%) 3.818 ns/op 631 | arraySumScalarNoOpt·p0.00: 233.000 ns/op 632 | arraySumScalarNoOpt·p0.50: 274.000 ns/op 633 | arraySumScalarNoOpt·p0.90: 295.000 ns/op 634 | arraySumScalarNoOpt·p0.95: 306.000 ns/op 635 | arraySumScalarNoOpt·p0.99: 343.000 ns/op 636 | arraySumScalarNoOpt·p0.999: 1350.776 ns/op 637 | arraySumScalarNoOpt·p0.9999: 1919.478 ns/op 638 | arraySumScalarNoOpt·p1.00: 183808.000 ns/op 639 | 640 | Iteration 3: 293.114 ±(99.9%) 10.904 ns/op 641 | arraySumScalarNoOpt·p0.00: 230.000 ns/op 642 | arraySumScalarNoOpt·p0.50: 274.000 ns/op 643 | arraySumScalarNoOpt·p0.90: 295.000 ns/op 644 | arraySumScalarNoOpt·p0.95: 306.000 ns/op 645 | arraySumScalarNoOpt·p0.99: 356.000 ns/op 646 | arraySumScalarNoOpt·p0.999: 1368.468 ns/op 647 | arraySumScalarNoOpt·p0.9999: 2062.042 ns/op 648 | arraySumScalarNoOpt·p1.00: 195328.000 ns/op 649 | 650 | Iteration 4: 288.545 ±(99.9%) 8.595 ns/op 651 | arraySumScalarNoOpt·p0.00: 237.000 ns/op 652 | arraySumScalarNoOpt·p0.50: 274.000 ns/op 653 | arraySumScalarNoOpt·p0.90: 295.000 ns/op 654 | arraySumScalarNoOpt·p0.95: 306.000 ns/op 655 | arraySumScalarNoOpt·p0.99: 361.000 ns/op 656 | arraySumScalarNoOpt·p0.999: 1386.704 ns/op 657 | arraySumScalarNoOpt·p0.9999: 2016.642 ns/op 658 | arraySumScalarNoOpt·p1.00: 185856.000 ns/op 659 | 660 | Iteration 5: 290.601 ±(99.9%) 9.978 ns/op 661 | arraySumScalarNoOpt·p0.00: 221.000 ns/op 662 | arraySumScalarNoOpt·p0.50: 274.000 ns/op 663 | arraySumScalarNoOpt·p0.90: 296.000 ns/op 664 | arraySumScalarNoOpt·p0.95: 307.000 ns/op 665 | arraySumScalarNoOpt·p0.99: 363.000 ns/op 666 | arraySumScalarNoOpt·p0.999: 1393.520 ns/op 667 | arraySumScalarNoOpt·p0.9999: 1993.520 ns/op 668 | arraySumScalarNoOpt·p1.00: 223232.000 ns/op 669 | 670 | Iteration 6: 290.693 ±(99.9%) 10.288 ns/op 671 | arraySumScalarNoOpt·p0.00: 238.000 ns/op 672 | arraySumScalarNoOpt·p0.50: 273.000 ns/op 673 | arraySumScalarNoOpt·p0.90: 295.000 ns/op 674 | arraySumScalarNoOpt·p0.95: 305.000 ns/op 675 | arraySumScalarNoOpt·p0.99: 357.000 ns/op 676 | arraySumScalarNoOpt·p0.999: 1372.486 ns/op 677 | arraySumScalarNoOpt·p0.9999: 1971.383 ns/op 678 | arraySumScalarNoOpt·p1.00: 204800.000 ns/op 679 | 680 | Iteration 7: 281.694 ±(99.9%) 4.601 ns/op 681 | arraySumScalarNoOpt·p0.00: 229.000 ns/op 682 | arraySumScalarNoOpt·p0.50: 274.000 ns/op 683 | arraySumScalarNoOpt·p0.90: 295.000 ns/op 684 | arraySumScalarNoOpt·p0.95: 305.000 ns/op 685 | arraySumScalarNoOpt·p0.99: 341.000 ns/op 686 | arraySumScalarNoOpt·p0.999: 1358.000 ns/op 687 | arraySumScalarNoOpt·p0.9999: 1931.506 ns/op 688 | arraySumScalarNoOpt·p1.00: 185344.000 ns/op 689 | 690 | Iteration 8: 292.478 ±(99.9%) 10.583 ns/op 691 | arraySumScalarNoOpt·p0.00: 225.000 ns/op 692 | arraySumScalarNoOpt·p0.50: 274.000 ns/op 693 | arraySumScalarNoOpt·p0.90: 296.000 ns/op 694 | arraySumScalarNoOpt·p0.95: 309.000 ns/op 695 | arraySumScalarNoOpt·p0.99: 364.000 ns/op 696 | arraySumScalarNoOpt·p0.999: 1364.082 ns/op 697 | arraySumScalarNoOpt·p0.9999: 2106.597 ns/op 698 | arraySumScalarNoOpt·p1.00: 214016.000 ns/op 699 | 700 | Iteration 9: 293.647 ±(99.9%) 11.477 ns/op 701 | arraySumScalarNoOpt·p0.00: 235.000 ns/op 702 | arraySumScalarNoOpt·p0.50: 274.000 ns/op 703 | arraySumScalarNoOpt·p0.90: 294.000 ns/op 704 | arraySumScalarNoOpt·p0.95: 304.000 ns/op 705 | arraySumScalarNoOpt·p0.99: 355.000 ns/op 706 | arraySumScalarNoOpt·p0.999: 1358.000 ns/op 707 | arraySumScalarNoOpt·p0.9999: 2120.936 ns/op 708 | arraySumScalarNoOpt·p1.00: 210944.000 ns/op 709 | 710 | Iteration 10: 290.961 ±(99.9%) 10.305 ns/op 711 | arraySumScalarNoOpt·p0.00: 235.000 ns/op 712 | arraySumScalarNoOpt·p0.50: 274.000 ns/op 713 | arraySumScalarNoOpt·p0.90: 295.000 ns/op 714 | arraySumScalarNoOpt·p0.95: 306.000 ns/op 715 | arraySumScalarNoOpt·p0.99: 356.000 ns/op 716 | arraySumScalarNoOpt·p0.999: 1365.160 ns/op 717 | arraySumScalarNoOpt·p0.9999: 2005.160 ns/op 718 | arraySumScalarNoOpt·p1.00: 209152.000 ns/op 719 | 720 | Iteration 11: 293.038 ±(99.9%) 10.942 ns/op 721 | arraySumScalarNoOpt·p0.00: 232.000 ns/op 722 | arraySumScalarNoOpt·p0.50: 273.000 ns/op 723 | arraySumScalarNoOpt·p0.90: 295.000 ns/op 724 | arraySumScalarNoOpt·p0.95: 306.000 ns/op 725 | arraySumScalarNoOpt·p0.99: 369.000 ns/op 726 | arraySumScalarNoOpt·p0.999: 1358.852 ns/op 727 | arraySumScalarNoOpt·p0.9999: 2176.793 ns/op 728 | arraySumScalarNoOpt·p1.00: 205568.000 ns/op 729 | 730 | Iteration 12: 280.454 ±(99.9%) 4.851 ns/op 731 | arraySumScalarNoOpt·p0.00: 224.000 ns/op 732 | arraySumScalarNoOpt·p0.50: 273.000 ns/op 733 | arraySumScalarNoOpt·p0.90: 293.000 ns/op 734 | arraySumScalarNoOpt·p0.95: 303.000 ns/op 735 | arraySumScalarNoOpt·p0.99: 342.000 ns/op 736 | arraySumScalarNoOpt·p0.999: 1338.000 ns/op 737 | arraySumScalarNoOpt·p0.9999: 1899.290 ns/op 738 | arraySumScalarNoOpt·p1.00: 200704.000 ns/op 739 | 740 | Iteration 13: 280.572 ±(99.9%) 4.868 ns/op 741 | arraySumScalarNoOpt·p0.00: 226.000 ns/op 742 | arraySumScalarNoOpt·p0.50: 273.000 ns/op 743 | arraySumScalarNoOpt·p0.90: 293.000 ns/op 744 | arraySumScalarNoOpt·p0.95: 303.000 ns/op 745 | arraySumScalarNoOpt·p0.99: 336.000 ns/op 746 | arraySumScalarNoOpt·p0.999: 1344.000 ns/op 747 | arraySumScalarNoOpt·p0.9999: 1911.301 ns/op 748 | arraySumScalarNoOpt·p1.00: 192000.000 ns/op 749 | 750 | Iteration 14: 290.182 ±(99.9%) 10.008 ns/op 751 | arraySumScalarNoOpt·p0.00: 227.000 ns/op 752 | arraySumScalarNoOpt·p0.50: 273.000 ns/op 753 | arraySumScalarNoOpt·p0.90: 294.000 ns/op 754 | arraySumScalarNoOpt·p0.95: 305.000 ns/op 755 | arraySumScalarNoOpt·p0.99: 351.000 ns/op 756 | arraySumScalarNoOpt·p0.999: 1346.000 ns/op 757 | arraySumScalarNoOpt·p0.9999: 1933.345 ns/op 758 | arraySumScalarNoOpt·p1.00: 192512.000 ns/op 759 | 760 | Iteration 15: 280.499 ±(99.9%) 4.882 ns/op 761 | arraySumScalarNoOpt·p0.00: 224.000 ns/op 762 | arraySumScalarNoOpt·p0.50: 273.000 ns/op 763 | arraySumScalarNoOpt·p0.90: 293.000 ns/op 764 | arraySumScalarNoOpt·p0.95: 302.000 ns/op 765 | arraySumScalarNoOpt·p0.99: 333.000 ns/op 766 | arraySumScalarNoOpt·p0.999: 1342.000 ns/op 767 | arraySumScalarNoOpt·p0.9999: 1904.000 ns/op 768 | arraySumScalarNoOpt·p1.00: 203264.000 ns/op 769 | 770 | Iteration 16: 292.752 ±(99.9%) 11.272 ns/op 771 | arraySumScalarNoOpt·p0.00: 239.000 ns/op 772 | arraySumScalarNoOpt·p0.50: 272.000 ns/op 773 | arraySumScalarNoOpt·p0.90: 294.000 ns/op 774 | arraySumScalarNoOpt·p0.95: 307.000 ns/op 775 | arraySumScalarNoOpt·p0.99: 370.000 ns/op 776 | arraySumScalarNoOpt·p0.999: 1342.000 ns/op 777 | arraySumScalarNoOpt·p0.9999: 2056.371 ns/op 778 | arraySumScalarNoOpt·p1.00: 205056.000 ns/op 779 | 780 | Iteration 17: 278.580 ±(99.9%) 4.954 ns/op 781 | arraySumScalarNoOpt·p0.00: 228.000 ns/op 782 | arraySumScalarNoOpt·p0.50: 270.000 ns/op 783 | arraySumScalarNoOpt·p0.90: 291.000 ns/op 784 | arraySumScalarNoOpt·p0.95: 302.000 ns/op 785 | arraySumScalarNoOpt·p0.99: 337.000 ns/op 786 | arraySumScalarNoOpt·p0.999: 1332.000 ns/op 787 | arraySumScalarNoOpt·p0.9999: 1926.027 ns/op 788 | arraySumScalarNoOpt·p1.00: 197376.000 ns/op 789 | 790 | Iteration 18: 277.732 ±(99.9%) 4.859 ns/op 791 | arraySumScalarNoOpt·p0.00: 224.000 ns/op 792 | arraySumScalarNoOpt·p0.50: 270.000 ns/op 793 | arraySumScalarNoOpt·p0.90: 290.000 ns/op 794 | arraySumScalarNoOpt·p0.95: 301.000 ns/op 795 | arraySumScalarNoOpt·p0.99: 332.000 ns/op 796 | arraySumScalarNoOpt·p0.999: 1330.780 ns/op 797 | arraySumScalarNoOpt·p0.9999: 1920.878 ns/op 798 | arraySumScalarNoOpt·p1.00: 196864.000 ns/op 799 | 800 | Iteration 19: 286.832 ±(99.9%) 9.803 ns/op 801 | arraySumScalarNoOpt·p0.00: 222.000 ns/op 802 | arraySumScalarNoOpt·p0.50: 270.000 ns/op 803 | arraySumScalarNoOpt·p0.90: 291.000 ns/op 804 | arraySumScalarNoOpt·p0.95: 303.000 ns/op 805 | arraySumScalarNoOpt·p0.99: 350.000 ns/op 806 | arraySumScalarNoOpt·p0.999: 1328.000 ns/op 807 | arraySumScalarNoOpt·p0.9999: 2026.662 ns/op 808 | arraySumScalarNoOpt·p1.00: 198400.000 ns/op 809 | 810 | Iteration 20: 308.389 ±(99.9%) 16.652 ns/op 811 | arraySumScalarNoOpt·p0.00: 229.000 ns/op 812 | arraySumScalarNoOpt·p0.50: 270.000 ns/op 813 | arraySumScalarNoOpt·p0.90: 291.000 ns/op 814 | arraySumScalarNoOpt·p0.95: 302.000 ns/op 815 | arraySumScalarNoOpt·p0.99: 346.000 ns/op 816 | arraySumScalarNoOpt·p0.999: 1358.564 ns/op 817 | arraySumScalarNoOpt·p0.9999: 171336.858 ns/op 818 | arraySumScalarNoOpt·p1.00: 214784.000 ns/op 819 | 820 | Iteration 21: 287.381 ±(99.9%) 9.999 ns/op 821 | arraySumScalarNoOpt·p0.00: 229.000 ns/op 822 | arraySumScalarNoOpt·p0.50: 270.000 ns/op 823 | arraySumScalarNoOpt·p0.90: 292.000 ns/op 824 | arraySumScalarNoOpt·p0.95: 303.000 ns/op 825 | arraySumScalarNoOpt·p0.99: 352.000 ns/op 826 | arraySumScalarNoOpt·p0.999: 1347.716 ns/op 827 | arraySumScalarNoOpt·p0.9999: 1970.858 ns/op 828 | arraySumScalarNoOpt·p1.00: 216832.000 ns/op 829 | 830 | Iteration 22: 288.131 ±(99.9%) 10.547 ns/op 831 | arraySumScalarNoOpt·p0.00: 224.000 ns/op 832 | arraySumScalarNoOpt·p0.50: 270.000 ns/op 833 | arraySumScalarNoOpt·p0.90: 291.000 ns/op 834 | arraySumScalarNoOpt·p0.95: 301.000 ns/op 835 | arraySumScalarNoOpt·p0.99: 344.000 ns/op 836 | arraySumScalarNoOpt·p0.999: 1350.364 ns/op 837 | arraySumScalarNoOpt·p0.9999: 2193.673 ns/op 838 | arraySumScalarNoOpt·p1.00: 204544.000 ns/op 839 | 840 | Iteration 23: 290.716 ±(99.9%) 11.207 ns/op 841 | arraySumScalarNoOpt·p0.00: 230.000 ns/op 842 | arraySumScalarNoOpt·p0.50: 271.000 ns/op 843 | arraySumScalarNoOpt·p0.90: 291.000 ns/op 844 | arraySumScalarNoOpt·p0.95: 302.000 ns/op 845 | arraySumScalarNoOpt·p0.99: 348.000 ns/op 846 | arraySumScalarNoOpt·p0.999: 1381.238 ns/op 847 | arraySumScalarNoOpt·p0.9999: 2199.695 ns/op 848 | arraySumScalarNoOpt·p1.00: 199424.000 ns/op 849 | 850 | Iteration 24: 288.227 ±(99.9%) 10.345 ns/op 851 | arraySumScalarNoOpt·p0.00: 230.000 ns/op 852 | arraySumScalarNoOpt·p0.50: 271.000 ns/op 853 | arraySumScalarNoOpt·p0.90: 292.000 ns/op 854 | arraySumScalarNoOpt·p0.95: 303.000 ns/op 855 | arraySumScalarNoOpt·p0.99: 350.000 ns/op 856 | arraySumScalarNoOpt·p0.999: 1339.782 ns/op 857 | arraySumScalarNoOpt·p0.9999: 1968.804 ns/op 858 | arraySumScalarNoOpt·p1.00: 205056.000 ns/op 859 | 860 | Iteration 25: 285.149 ±(99.9%) 9.383 ns/op 861 | arraySumScalarNoOpt·p0.00: 224.000 ns/op 862 | arraySumScalarNoOpt·p0.50: 270.000 ns/op 863 | arraySumScalarNoOpt·p0.90: 291.000 ns/op 864 | arraySumScalarNoOpt·p0.95: 301.000 ns/op 865 | arraySumScalarNoOpt·p0.99: 347.000 ns/op 866 | arraySumScalarNoOpt·p0.999: 1334.000 ns/op 867 | arraySumScalarNoOpt·p0.9999: 1960.883 ns/op 868 | arraySumScalarNoOpt·p1.00: 203008.000 ns/op 869 | 870 | 871 | 872 | Result "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt": 873 | N = 5591963 874 | mean = 287.897 ±(99.9%) 1.890 ns/op 875 | 876 | Histogram, ns/op: 877 | [ 0.000, 25000.000) = 5591670 878 | [ 25000.000, 50000.000) = 0 879 | [ 50000.000, 75000.000) = 0 880 | [ 75000.000, 100000.000) = 0 881 | [100000.000, 125000.000) = 0 882 | [125000.000, 150000.000) = 0 883 | [150000.000, 175000.000) = 66 884 | [175000.000, 200000.000) = 192 885 | [200000.000, 225000.000) = 32 886 | [225000.000, 250000.000) = 0 887 | [250000.000, 275000.000) = 0 888 | [275000.000, 300000.000) = 0 889 | [300000.000, 325000.000) = 1 890 | [325000.000, 350000.000) = 2 891 | [350000.000, 375000.000) = 0 892 | 893 | Percentiles, ns/op: 894 | p(0.0000) = 221.000 ns/op 895 | p(50.0000) = 272.000 ns/op 896 | p(90.0000) = 293.000 ns/op 897 | p(95.0000) = 304.000 ns/op 898 | p(99.0000) = 349.000 ns/op 899 | p(99.9000) = 1352.000 ns/op 900 | p(99.9900) = 1960.000 ns/op 901 | p(99.9990) = 197376.000 ns/op 902 | p(99.9999) = 215619.658 ns/op 903 | p(100.0000) = 342528.000 ns/op 904 | 905 | 906 | # JMH version: 1.29 907 | # VM version: JDK 17.0.2, OpenJDK 64-Bit Server VM, 17.0.2+8-86 908 | # VM invoker: /home/styp/.jdks/openjdk-17.0.2/bin/java 909 | # VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/home/styp/Desktop/java-vbench/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant --add-modules jdk.incubator.vector -XX:-UseSuperWord 910 | # Blackhole mode: full + dont-inline hint 911 | # Warmup: 1 iterations, 1 s each, 10 calls per op 912 | # Measurement: 25 iterations, 10 s each 913 | # Timeout: 10 min per iteration 914 | # Threads: 1 thread, ***WARNING: Synchronize iterations are disabled!*** 915 | # Benchmark mode: Sampling time 916 | # Benchmark: ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 917 | # Parameters: (LENGTH = 65535) 918 | 919 | # Run progress: 42.86% complete, ETA 00:16:45 920 | # Fork: 1 of 1 921 | WARNING: Using incubator modules: jdk.incubator.vector 922 | # Warmup Iteration 1: 61914.990 ±(99.9%) 4886.222 ns/op 923 | Iteration 1: 4098.910 ±(99.9%) 41.511 ns/op 924 | arraySumScalarNoOpt·p0.00: 3740.000 ns/op 925 | arraySumScalarNoOpt·p0.50: 3912.000 ns/op 926 | arraySumScalarNoOpt·p0.90: 4216.000 ns/op 927 | arraySumScalarNoOpt·p0.95: 4456.000 ns/op 928 | arraySumScalarNoOpt·p0.99: 5752.000 ns/op 929 | arraySumScalarNoOpt·p0.999: 10672.000 ns/op 930 | arraySumScalarNoOpt·p0.9999: 218298.470 ns/op 931 | arraySumScalarNoOpt·p1.00: 498176.000 ns/op 932 | 933 | Iteration 2: 4000.456 ±(99.9%) 29.579 ns/op 934 | arraySumScalarNoOpt·p0.00: 3744.000 ns/op 935 | arraySumScalarNoOpt·p0.50: 3864.000 ns/op 936 | arraySumScalarNoOpt·p0.90: 4080.000 ns/op 937 | arraySumScalarNoOpt·p0.95: 4320.000 ns/op 938 | arraySumScalarNoOpt·p0.99: 5504.000 ns/op 939 | arraySumScalarNoOpt·p0.999: 6422.768 ns/op 940 | arraySumScalarNoOpt·p0.9999: 183644.518 ns/op 941 | arraySumScalarNoOpt·p1.00: 209920.000 ns/op 942 | 943 | Iteration 3: 4033.460 ±(99.9%) 28.849 ns/op 944 | arraySumScalarNoOpt·p0.00: 3744.000 ns/op 945 | arraySumScalarNoOpt·p0.50: 3880.000 ns/op 946 | arraySumScalarNoOpt·p0.90: 4192.000 ns/op 947 | arraySumScalarNoOpt·p0.95: 4568.000 ns/op 948 | arraySumScalarNoOpt·p0.99: 5536.000 ns/op 949 | arraySumScalarNoOpt·p0.999: 6671.640 ns/op 950 | arraySumScalarNoOpt·p0.9999: 181656.448 ns/op 951 | arraySumScalarNoOpt·p1.00: 211200.000 ns/op 952 | 953 | Iteration 4: 4038.582 ±(99.9%) 31.932 ns/op 954 | arraySumScalarNoOpt·p0.00: 3724.000 ns/op 955 | arraySumScalarNoOpt·p0.50: 3872.000 ns/op 956 | arraySumScalarNoOpt·p0.90: 4184.000 ns/op 957 | arraySumScalarNoOpt·p0.95: 4552.000 ns/op 958 | arraySumScalarNoOpt·p0.99: 5536.000 ns/op 959 | arraySumScalarNoOpt·p0.999: 8086.432 ns/op 960 | arraySumScalarNoOpt·p0.9999: 190859.571 ns/op 961 | arraySumScalarNoOpt·p1.00: 222976.000 ns/op 962 | 963 | Iteration 5: 4177.855 ±(99.9%) 32.732 ns/op 964 | arraySumScalarNoOpt·p0.00: 3728.000 ns/op 965 | arraySumScalarNoOpt·p0.50: 3956.000 ns/op 966 | arraySumScalarNoOpt·p0.90: 4768.000 ns/op 967 | arraySumScalarNoOpt·p0.95: 4960.000 ns/op 968 | arraySumScalarNoOpt·p0.99: 5600.000 ns/op 969 | arraySumScalarNoOpt·p0.999: 6935.400 ns/op 970 | arraySumScalarNoOpt·p0.9999: 183294.080 ns/op 971 | arraySumScalarNoOpt·p1.00: 205312.000 ns/op 972 | 973 | Iteration 6: 4032.514 ±(99.9%) 34.426 ns/op 974 | arraySumScalarNoOpt·p0.00: 3724.000 ns/op 975 | arraySumScalarNoOpt·p0.50: 3864.000 ns/op 976 | arraySumScalarNoOpt·p0.90: 4176.000 ns/op 977 | arraySumScalarNoOpt·p0.95: 4560.000 ns/op 978 | arraySumScalarNoOpt·p0.99: 5480.000 ns/op 979 | arraySumScalarNoOpt·p0.999: 6803.024 ns/op 980 | arraySumScalarNoOpt·p0.9999: 196212.122 ns/op 981 | arraySumScalarNoOpt·p1.00: 217088.000 ns/op 982 | 983 | Iteration 7: 4010.641 ±(99.9%) 31.213 ns/op 984 | arraySumScalarNoOpt·p0.00: 3724.000 ns/op 985 | arraySumScalarNoOpt·p0.50: 3852.000 ns/op 986 | arraySumScalarNoOpt·p0.90: 4144.000 ns/op 987 | arraySumScalarNoOpt·p0.95: 4496.000 ns/op 988 | arraySumScalarNoOpt·p0.99: 5488.000 ns/op 989 | arraySumScalarNoOpt·p0.999: 6713.424 ns/op 990 | arraySumScalarNoOpt·p0.9999: 196244.506 ns/op 991 | arraySumScalarNoOpt·p1.00: 218368.000 ns/op 992 | 993 | Iteration 8: 4019.507 ±(99.9%) 31.191 ns/op 994 | arraySumScalarNoOpt·p0.00: 3720.000 ns/op 995 | arraySumScalarNoOpt·p0.50: 3860.000 ns/op 996 | arraySumScalarNoOpt·p0.90: 4192.000 ns/op 997 | arraySumScalarNoOpt·p0.95: 4712.000 ns/op 998 | arraySumScalarNoOpt·p0.99: 5496.000 ns/op 999 | arraySumScalarNoOpt·p0.999: 6640.000 ns/op 1000 | arraySumScalarNoOpt·p0.9999: 201081.856 ns/op 1001 | arraySumScalarNoOpt·p1.00: 223744.000 ns/op 1002 | 1003 | Iteration 9: 4014.470 ±(99.9%) 30.490 ns/op 1004 | arraySumScalarNoOpt·p0.00: 3720.000 ns/op 1005 | arraySumScalarNoOpt·p0.50: 3856.000 ns/op 1006 | arraySumScalarNoOpt·p0.90: 4176.000 ns/op 1007 | arraySumScalarNoOpt·p0.95: 4624.000 ns/op 1008 | arraySumScalarNoOpt·p0.99: 5504.000 ns/op 1009 | arraySumScalarNoOpt·p0.999: 6682.176 ns/op 1010 | arraySumScalarNoOpt·p0.9999: 190342.963 ns/op 1011 | arraySumScalarNoOpt·p1.00: 204032.000 ns/op 1012 | 1013 | Iteration 10: 4125.274 ±(99.9%) 34.851 ns/op 1014 | arraySumScalarNoOpt·p0.00: 3724.000 ns/op 1015 | arraySumScalarNoOpt·p0.50: 3928.000 ns/op 1016 | arraySumScalarNoOpt·p0.90: 4688.000 ns/op 1017 | arraySumScalarNoOpt·p0.95: 4912.000 ns/op 1018 | arraySumScalarNoOpt·p0.99: 5592.000 ns/op 1019 | arraySumScalarNoOpt·p0.999: 6947.360 ns/op 1020 | arraySumScalarNoOpt·p0.9999: 197842.176 ns/op 1021 | arraySumScalarNoOpt·p1.00: 221952.000 ns/op 1022 | 1023 | Iteration 11: 4085.842 ±(99.9%) 32.901 ns/op 1024 | arraySumScalarNoOpt·p0.00: 3728.000 ns/op 1025 | arraySumScalarNoOpt·p0.50: 3904.000 ns/op 1026 | arraySumScalarNoOpt·p0.90: 4264.000 ns/op 1027 | arraySumScalarNoOpt·p0.95: 4752.000 ns/op 1028 | arraySumScalarNoOpt·p0.99: 5576.000 ns/op 1029 | arraySumScalarNoOpt·p0.999: 7754.128 ns/op 1030 | arraySumScalarNoOpt·p0.9999: 197818.010 ns/op 1031 | arraySumScalarNoOpt·p1.00: 215040.000 ns/op 1032 | 1033 | Iteration 12: 4068.053 ±(99.9%) 31.139 ns/op 1034 | arraySumScalarNoOpt·p0.00: 3736.000 ns/op 1035 | arraySumScalarNoOpt·p0.50: 3904.000 ns/op 1036 | arraySumScalarNoOpt·p0.90: 4216.000 ns/op 1037 | arraySumScalarNoOpt·p0.95: 4744.000 ns/op 1038 | arraySumScalarNoOpt·p0.99: 5552.000 ns/op 1039 | arraySumScalarNoOpt·p0.999: 6767.728 ns/op 1040 | arraySumScalarNoOpt·p0.9999: 207126.989 ns/op 1041 | arraySumScalarNoOpt·p1.00: 215296.000 ns/op 1042 | 1043 | Iteration 13: 4000.297 ±(99.9%) 32.655 ns/op 1044 | arraySumScalarNoOpt·p0.00: 3724.000 ns/op 1045 | arraySumScalarNoOpt·p0.50: 3844.000 ns/op 1046 | arraySumScalarNoOpt·p0.90: 4104.000 ns/op 1047 | arraySumScalarNoOpt·p0.95: 4408.000 ns/op 1048 | arraySumScalarNoOpt·p0.99: 5472.000 ns/op 1049 | arraySumScalarNoOpt·p0.999: 6605.064 ns/op 1050 | arraySumScalarNoOpt·p0.9999: 199448.653 ns/op 1051 | arraySumScalarNoOpt·p1.00: 218624.000 ns/op 1052 | 1053 | Iteration 14: 4015.606 ±(99.9%) 35.377 ns/op 1054 | arraySumScalarNoOpt·p0.00: 3728.000 ns/op 1055 | arraySumScalarNoOpt·p0.50: 3864.000 ns/op 1056 | arraySumScalarNoOpt·p0.90: 4096.000 ns/op 1057 | arraySumScalarNoOpt·p0.95: 4288.000 ns/op 1058 | arraySumScalarNoOpt·p0.99: 5480.000 ns/op 1059 | arraySumScalarNoOpt·p0.999: 6589.800 ns/op 1060 | arraySumScalarNoOpt·p0.9999: 197603.840 ns/op 1061 | arraySumScalarNoOpt·p1.00: 219392.000 ns/op 1062 | 1063 | Iteration 15: 4002.243 ±(99.9%) 31.822 ns/op 1064 | arraySumScalarNoOpt·p0.00: 3728.000 ns/op 1065 | arraySumScalarNoOpt·p0.50: 3856.000 ns/op 1066 | arraySumScalarNoOpt·p0.90: 4104.000 ns/op 1067 | arraySumScalarNoOpt·p0.95: 4344.000 ns/op 1068 | arraySumScalarNoOpt·p0.99: 5480.000 ns/op 1069 | arraySumScalarNoOpt·p0.999: 6568.000 ns/op 1070 | arraySumScalarNoOpt·p0.9999: 199347.302 ns/op 1071 | arraySumScalarNoOpt·p1.00: 218112.000 ns/op 1072 | 1073 | Iteration 16: 4062.512 ±(99.9%) 34.673 ns/op 1074 | arraySumScalarNoOpt·p0.00: 3732.000 ns/op 1075 | arraySumScalarNoOpt·p0.50: 3900.000 ns/op 1076 | arraySumScalarNoOpt·p0.90: 4136.000 ns/op 1077 | arraySumScalarNoOpt·p0.95: 4384.000 ns/op 1078 | arraySumScalarNoOpt·p0.99: 5544.000 ns/op 1079 | arraySumScalarNoOpt·p0.999: 6608.000 ns/op 1080 | arraySumScalarNoOpt·p0.9999: 195905.690 ns/op 1081 | arraySumScalarNoOpt·p1.00: 215808.000 ns/op 1082 | 1083 | Iteration 17: 4008.664 ±(99.9%) 33.800 ns/op 1084 | arraySumScalarNoOpt·p0.00: 3720.000 ns/op 1085 | arraySumScalarNoOpt·p0.50: 3856.000 ns/op 1086 | arraySumScalarNoOpt·p0.90: 4084.000 ns/op 1087 | arraySumScalarNoOpt·p0.95: 4344.000 ns/op 1088 | arraySumScalarNoOpt·p0.99: 5488.000 ns/op 1089 | arraySumScalarNoOpt·p0.999: 6592.000 ns/op 1090 | arraySumScalarNoOpt·p0.9999: 190562.125 ns/op 1091 | arraySumScalarNoOpt·p1.00: 201216.000 ns/op 1092 | 1093 | Iteration 18: 4030.280 ±(99.9%) 30.917 ns/op 1094 | arraySumScalarNoOpt·p0.00: 3732.000 ns/op 1095 | arraySumScalarNoOpt·p0.50: 3888.000 ns/op 1096 | arraySumScalarNoOpt·p0.90: 4112.000 ns/op 1097 | arraySumScalarNoOpt·p0.95: 4344.000 ns/op 1098 | arraySumScalarNoOpt·p0.99: 5528.000 ns/op 1099 | arraySumScalarNoOpt·p0.999: 6496.000 ns/op 1100 | arraySumScalarNoOpt·p0.9999: 196154.163 ns/op 1101 | arraySumScalarNoOpt·p1.00: 213504.000 ns/op 1102 | 1103 | Iteration 19: 4225.737 ±(99.9%) 36.504 ns/op 1104 | arraySumScalarNoOpt·p0.00: 3732.000 ns/op 1105 | arraySumScalarNoOpt·p0.50: 4000.000 ns/op 1106 | arraySumScalarNoOpt·p0.90: 4872.000 ns/op 1107 | arraySumScalarNoOpt·p0.95: 5032.000 ns/op 1108 | arraySumScalarNoOpt·p0.99: 5712.000 ns/op 1109 | arraySumScalarNoOpt·p0.999: 7695.872 ns/op 1110 | arraySumScalarNoOpt·p0.9999: 199856.333 ns/op 1111 | arraySumScalarNoOpt·p1.00: 221952.000 ns/op 1112 | 1113 | Iteration 20: 3999.610 ±(99.9%) 35.478 ns/op 1114 | arraySumScalarNoOpt·p0.00: 3724.000 ns/op 1115 | arraySumScalarNoOpt·p0.50: 3844.000 ns/op 1116 | arraySumScalarNoOpt·p0.90: 4060.000 ns/op 1117 | arraySumScalarNoOpt·p0.95: 4272.000 ns/op 1118 | arraySumScalarNoOpt·p0.99: 5464.000 ns/op 1119 | arraySumScalarNoOpt·p0.999: 6521.336 ns/op 1120 | arraySumScalarNoOpt·p0.9999: 199488.128 ns/op 1121 | arraySumScalarNoOpt·p1.00: 225536.000 ns/op 1122 | 1123 | Iteration 21: 4004.522 ±(99.9%) 34.231 ns/op 1124 | arraySumScalarNoOpt·p0.00: 3724.000 ns/op 1125 | arraySumScalarNoOpt·p0.50: 3852.000 ns/op 1126 | arraySumScalarNoOpt·p0.90: 4088.000 ns/op 1127 | arraySumScalarNoOpt·p0.95: 4336.000 ns/op 1128 | arraySumScalarNoOpt·p0.99: 5464.000 ns/op 1129 | arraySumScalarNoOpt·p0.999: 6480.000 ns/op 1130 | arraySumScalarNoOpt·p0.9999: 197350.861 ns/op 1131 | arraySumScalarNoOpt·p1.00: 219136.000 ns/op 1132 | 1133 | Iteration 22: 4057.925 ±(99.9%) 35.808 ns/op 1134 | arraySumScalarNoOpt·p0.00: 3728.000 ns/op 1135 | arraySumScalarNoOpt·p0.50: 3892.000 ns/op 1136 | arraySumScalarNoOpt·p0.90: 4128.000 ns/op 1137 | arraySumScalarNoOpt·p0.95: 4376.000 ns/op 1138 | arraySumScalarNoOpt·p0.99: 5512.000 ns/op 1139 | arraySumScalarNoOpt·p0.999: 6535.640 ns/op 1140 | arraySumScalarNoOpt·p0.9999: 195736.448 ns/op 1141 | arraySumScalarNoOpt·p1.00: 223232.000 ns/op 1142 | 1143 | Iteration 23: 4081.490 ±(99.9%) 32.706 ns/op 1144 | arraySumScalarNoOpt·p0.00: 3724.000 ns/op 1145 | arraySumScalarNoOpt·p0.50: 3904.000 ns/op 1146 | arraySumScalarNoOpt·p0.90: 4328.000 ns/op 1147 | arraySumScalarNoOpt·p0.95: 4768.000 ns/op 1148 | arraySumScalarNoOpt·p0.99: 5560.000 ns/op 1149 | arraySumScalarNoOpt·p0.999: 6904.776 ns/op 1150 | arraySumScalarNoOpt·p0.9999: 196071.347 ns/op 1151 | arraySumScalarNoOpt·p1.00: 218624.000 ns/op 1152 | 1153 | Iteration 24: 4016.126 ±(99.9%) 34.381 ns/op 1154 | arraySumScalarNoOpt·p0.00: 3720.000 ns/op 1155 | arraySumScalarNoOpt·p0.50: 3852.000 ns/op 1156 | arraySumScalarNoOpt·p0.90: 4112.000 ns/op 1157 | arraySumScalarNoOpt·p0.95: 4408.000 ns/op 1158 | arraySumScalarNoOpt·p0.99: 5488.000 ns/op 1159 | arraySumScalarNoOpt·p0.999: 6608.000 ns/op 1160 | arraySumScalarNoOpt·p0.9999: 189519.437 ns/op 1161 | arraySumScalarNoOpt·p1.00: 221440.000 ns/op 1162 | 1163 | Iteration 25: 4045.321 ±(99.9%) 35.571 ns/op 1164 | arraySumScalarNoOpt·p0.00: 3728.000 ns/op 1165 | arraySumScalarNoOpt·p0.50: 3880.000 ns/op 1166 | arraySumScalarNoOpt·p0.90: 4112.000 ns/op 1167 | arraySumScalarNoOpt·p0.95: 4416.000 ns/op 1168 | arraySumScalarNoOpt·p0.99: 5496.000 ns/op 1169 | arraySumScalarNoOpt·p0.999: 6624.000 ns/op 1170 | arraySumScalarNoOpt·p0.9999: 199747.072 ns/op 1171 | arraySumScalarNoOpt·p1.00: 219136.000 ns/op 1172 | 1173 | 1174 | 1175 | Result "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt": 1176 | N = 3098411 1177 | mean = 4049.493 ±(99.9%) 6.696 ns/op 1178 | 1179 | Histogram, ns/op: 1180 | [ 0.000, 50000.000) = 3097254 1181 | [ 50000.000, 100000.000) = 1 1182 | [100000.000, 150000.000) = 1 1183 | [150000.000, 200000.000) = 936 1184 | [200000.000, 250000.000) = 211 1185 | [250000.000, 300000.000) = 0 1186 | [300000.000, 350000.000) = 1 1187 | [350000.000, 400000.000) = 6 1188 | [400000.000, 450000.000) = 0 1189 | 1190 | Percentiles, ns/op: 1191 | p(0.0000) = 3720.000 ns/op 1192 | p(50.0000) = 3884.000 ns/op 1193 | p(90.0000) = 4192.000 ns/op 1194 | p(95.0000) = 4720.000 ns/op 1195 | p(99.0000) = 5528.000 ns/op 1196 | p(99.9000) = 7024.000 ns/op 1197 | p(99.9900) = 193576.653 ns/op 1198 | p(99.9990) = 216836.065 ns/op 1199 | p(99.9999) = 363469.613 ns/op 1200 | p(100.0000) = 498176.000 ns/op 1201 | 1202 | 1203 | # JMH version: 1.29 1204 | # VM version: JDK 17.0.2, OpenJDK 64-Bit Server VM, 17.0.2+8-86 1205 | # VM invoker: /home/styp/.jdks/openjdk-17.0.2/bin/java 1206 | # VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/home/styp/Desktop/java-vbench/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant --add-modules jdk.incubator.vector -XX:-UseSuperWord 1207 | # Blackhole mode: full + dont-inline hint 1208 | # Warmup: 1 iterations, 1 s each, 10 calls per op 1209 | # Measurement: 25 iterations, 10 s each 1210 | # Timeout: 10 min per iteration 1211 | # Threads: 1 thread, ***WARNING: Synchronize iterations are disabled!*** 1212 | # Benchmark mode: Sampling time 1213 | # Benchmark: ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 1214 | # Parameters: (LENGTH = 1048575) 1215 | 1216 | # Run progress: 57.14% complete, ETA 00:12:34 1217 | # Fork: 1 of 1 1218 | WARNING: Using incubator modules: jdk.incubator.vector 1219 | # Warmup Iteration 1: 936056.471 ±(99.9%) 138786.755 ns/op 1220 | Iteration 1: 74664.920 ±(99.9%) 561.536 ns/op 1221 | arraySumScalarNoOpt·p0.00: 70656.000 ns/op 1222 | arraySumScalarNoOpt·p0.50: 72064.000 ns/op 1223 | arraySumScalarNoOpt·p0.90: 75904.000 ns/op 1224 | arraySumScalarNoOpt·p0.95: 77184.000 ns/op 1225 | arraySumScalarNoOpt·p0.99: 81024.000 ns/op 1226 | arraySumScalarNoOpt·p0.999: 317775.872 ns/op 1227 | arraySumScalarNoOpt·p0.9999: 664867.635 ns/op 1228 | arraySumScalarNoOpt·p1.00: 727040.000 ns/op 1229 | 1230 | Iteration 2: 74963.381 ±(99.9%) 499.542 ns/op 1231 | arraySumScalarNoOpt·p0.00: 71168.000 ns/op 1232 | arraySumScalarNoOpt·p0.50: 72704.000 ns/op 1233 | arraySumScalarNoOpt·p0.90: 76672.000 ns/op 1234 | arraySumScalarNoOpt·p0.95: 77952.000 ns/op 1235 | arraySumScalarNoOpt·p0.99: 81280.000 ns/op 1236 | arraySumScalarNoOpt·p0.999: 308076.032 ns/op 1237 | arraySumScalarNoOpt·p0.9999: 328744.858 ns/op 1238 | arraySumScalarNoOpt·p1.00: 331776.000 ns/op 1239 | 1240 | Iteration 3: 74285.564 ±(99.9%) 492.866 ns/op 1241 | arraySumScalarNoOpt·p0.00: 70784.000 ns/op 1242 | arraySumScalarNoOpt·p0.50: 71936.000 ns/op 1243 | arraySumScalarNoOpt·p0.90: 75776.000 ns/op 1244 | arraySumScalarNoOpt·p0.95: 76928.000 ns/op 1245 | arraySumScalarNoOpt·p0.99: 79872.000 ns/op 1246 | arraySumScalarNoOpt·p0.999: 302080.000 ns/op 1247 | arraySumScalarNoOpt·p0.9999: 317750.989 ns/op 1248 | arraySumScalarNoOpt·p1.00: 318976.000 ns/op 1249 | 1250 | Iteration 4: 74118.043 ±(99.9%) 487.796 ns/op 1251 | arraySumScalarNoOpt·p0.00: 70784.000 ns/op 1252 | arraySumScalarNoOpt·p0.50: 71808.000 ns/op 1253 | arraySumScalarNoOpt·p0.90: 75648.000 ns/op 1254 | arraySumScalarNoOpt·p0.95: 76800.000 ns/op 1255 | arraySumScalarNoOpt·p0.99: 79616.000 ns/op 1256 | arraySumScalarNoOpt·p0.999: 300032.000 ns/op 1257 | arraySumScalarNoOpt·p0.9999: 313837.926 ns/op 1258 | arraySumScalarNoOpt·p1.00: 314368.000 ns/op 1259 | 1260 | Iteration 5: 74096.597 ±(99.9%) 489.987 ns/op 1261 | arraySumScalarNoOpt·p0.00: 70784.000 ns/op 1262 | arraySumScalarNoOpt·p0.50: 71808.000 ns/op 1263 | arraySumScalarNoOpt·p0.90: 75648.000 ns/op 1264 | arraySumScalarNoOpt·p0.95: 76672.000 ns/op 1265 | arraySumScalarNoOpt·p0.99: 79744.000 ns/op 1266 | arraySumScalarNoOpt·p0.999: 304407.040 ns/op 1267 | arraySumScalarNoOpt·p0.9999: 314172.416 ns/op 1268 | arraySumScalarNoOpt·p1.00: 314880.000 ns/op 1269 | 1270 | Iteration 6: 74154.692 ±(99.9%) 503.733 ns/op 1271 | arraySumScalarNoOpt·p0.00: 70656.000 ns/op 1272 | arraySumScalarNoOpt·p0.50: 71808.000 ns/op 1273 | arraySumScalarNoOpt·p0.90: 75776.000 ns/op 1274 | arraySumScalarNoOpt·p0.95: 76928.000 ns/op 1275 | arraySumScalarNoOpt·p0.99: 79872.000 ns/op 1276 | arraySumScalarNoOpt·p0.999: 305152.000 ns/op 1277 | arraySumScalarNoOpt·p0.9999: 320142.080 ns/op 1278 | arraySumScalarNoOpt·p1.00: 321024.000 ns/op 1279 | 1280 | Iteration 7: 74270.536 ±(99.9%) 500.359 ns/op 1281 | arraySumScalarNoOpt·p0.00: 70784.000 ns/op 1282 | arraySumScalarNoOpt·p0.50: 72064.000 ns/op 1283 | arraySumScalarNoOpt·p0.90: 75776.000 ns/op 1284 | arraySumScalarNoOpt·p0.95: 76928.000 ns/op 1285 | arraySumScalarNoOpt·p0.99: 80000.000 ns/op 1286 | arraySumScalarNoOpt·p0.999: 306983.424 ns/op 1287 | arraySumScalarNoOpt·p0.9999: 321845.197 ns/op 1288 | arraySumScalarNoOpt·p1.00: 323072.000 ns/op 1289 | 1290 | Iteration 8: 74111.867 ±(99.9%) 498.588 ns/op 1291 | arraySumScalarNoOpt·p0.00: 70656.000 ns/op 1292 | arraySumScalarNoOpt·p0.50: 71680.000 ns/op 1293 | arraySumScalarNoOpt·p0.90: 75648.000 ns/op 1294 | arraySumScalarNoOpt·p0.95: 76800.000 ns/op 1295 | arraySumScalarNoOpt·p0.99: 79488.000 ns/op 1296 | arraySumScalarNoOpt·p0.999: 304640.000 ns/op 1297 | arraySumScalarNoOpt·p0.9999: 314684.826 ns/op 1298 | arraySumScalarNoOpt·p1.00: 315392.000 ns/op 1299 | 1300 | Iteration 9: 74217.198 ±(99.9%) 506.445 ns/op 1301 | arraySumScalarNoOpt·p0.00: 70784.000 ns/op 1302 | arraySumScalarNoOpt·p0.50: 72064.000 ns/op 1303 | arraySumScalarNoOpt·p0.90: 75648.000 ns/op 1304 | arraySumScalarNoOpt·p0.95: 76800.000 ns/op 1305 | arraySumScalarNoOpt·p0.99: 79616.000 ns/op 1306 | arraySumScalarNoOpt·p0.999: 312609.792 ns/op 1307 | arraySumScalarNoOpt·p0.9999: 322720.358 ns/op 1308 | arraySumScalarNoOpt·p1.00: 323072.000 ns/op 1309 | 1310 | Iteration 10: 74292.377 ±(99.9%) 503.883 ns/op 1311 | arraySumScalarNoOpt·p0.00: 70656.000 ns/op 1312 | arraySumScalarNoOpt·p0.50: 72064.000 ns/op 1313 | arraySumScalarNoOpt·p0.90: 75776.000 ns/op 1314 | arraySumScalarNoOpt·p0.95: 77056.000 ns/op 1315 | arraySumScalarNoOpt·p0.99: 79872.000 ns/op 1316 | arraySumScalarNoOpt·p0.999: 309544.960 ns/op 1317 | arraySumScalarNoOpt·p0.9999: 320336.896 ns/op 1318 | arraySumScalarNoOpt·p1.00: 320512.000 ns/op 1319 | 1320 | Iteration 11: 74219.331 ±(99.9%) 507.269 ns/op 1321 | arraySumScalarNoOpt·p0.00: 70656.000 ns/op 1322 | arraySumScalarNoOpt·p0.50: 71808.000 ns/op 1323 | arraySumScalarNoOpt·p0.90: 75648.000 ns/op 1324 | arraySumScalarNoOpt·p0.95: 76800.000 ns/op 1325 | arraySumScalarNoOpt·p0.99: 80085.760 ns/op 1326 | arraySumScalarNoOpt·p0.999: 307712.000 ns/op 1327 | arraySumScalarNoOpt·p0.9999: 323553.382 ns/op 1328 | arraySumScalarNoOpt·p1.00: 324608.000 ns/op 1329 | 1330 | Iteration 12: 74441.988 ±(99.9%) 501.937 ns/op 1331 | arraySumScalarNoOpt·p0.00: 70784.000 ns/op 1332 | arraySumScalarNoOpt·p0.50: 72192.000 ns/op 1333 | arraySumScalarNoOpt·p0.90: 75904.000 ns/op 1334 | arraySumScalarNoOpt·p0.95: 77056.000 ns/op 1335 | arraySumScalarNoOpt·p0.99: 80000.000 ns/op 1336 | arraySumScalarNoOpt·p0.999: 308534.784 ns/op 1337 | arraySumScalarNoOpt·p0.9999: 321481.011 ns/op 1338 | arraySumScalarNoOpt·p1.00: 324608.000 ns/op 1339 | 1340 | Iteration 13: 74245.462 ±(99.9%) 512.068 ns/op 1341 | arraySumScalarNoOpt·p0.00: 70400.000 ns/op 1342 | arraySumScalarNoOpt·p0.50: 71936.000 ns/op 1343 | arraySumScalarNoOpt·p0.90: 75648.000 ns/op 1344 | arraySumScalarNoOpt·p0.95: 76800.000 ns/op 1345 | arraySumScalarNoOpt·p0.99: 80312.320 ns/op 1346 | arraySumScalarNoOpt·p0.999: 309028.864 ns/op 1347 | arraySumScalarNoOpt·p0.9999: 356515.021 ns/op 1348 | arraySumScalarNoOpt·p1.00: 369152.000 ns/op 1349 | 1350 | Iteration 14: 74173.400 ±(99.9%) 525.962 ns/op 1351 | arraySumScalarNoOpt·p0.00: 70528.000 ns/op 1352 | arraySumScalarNoOpt·p0.50: 71808.000 ns/op 1353 | arraySumScalarNoOpt·p0.90: 75520.000 ns/op 1354 | arraySumScalarNoOpt·p0.95: 76672.000 ns/op 1355 | arraySumScalarNoOpt·p0.99: 79744.000 ns/op 1356 | arraySumScalarNoOpt·p0.999: 307998.208 ns/op 1357 | arraySumScalarNoOpt·p0.9999: 576960.256 ns/op 1358 | arraySumScalarNoOpt·p1.00: 655360.000 ns/op 1359 | 1360 | Iteration 15: 74287.850 ±(99.9%) 504.466 ns/op 1361 | arraySumScalarNoOpt·p0.00: 70784.000 ns/op 1362 | arraySumScalarNoOpt·p0.50: 71808.000 ns/op 1363 | arraySumScalarNoOpt·p0.90: 75776.000 ns/op 1364 | arraySumScalarNoOpt·p0.95: 76928.000 ns/op 1365 | arraySumScalarNoOpt·p0.99: 80640.000 ns/op 1366 | arraySumScalarNoOpt·p0.999: 306473.472 ns/op 1367 | arraySumScalarNoOpt·p0.9999: 352198.195 ns/op 1368 | arraySumScalarNoOpt·p1.00: 368128.000 ns/op 1369 | 1370 | Iteration 16: 74834.291 ±(99.9%) 513.376 ns/op 1371 | arraySumScalarNoOpt·p0.00: 70656.000 ns/op 1372 | arraySumScalarNoOpt·p0.50: 73088.000 ns/op 1373 | arraySumScalarNoOpt·p0.90: 76416.000 ns/op 1374 | arraySumScalarNoOpt·p0.95: 77568.000 ns/op 1375 | arraySumScalarNoOpt·p0.99: 81280.000 ns/op 1376 | arraySumScalarNoOpt·p0.999: 307035.136 ns/op 1377 | arraySumScalarNoOpt·p0.9999: 381209.498 ns/op 1378 | arraySumScalarNoOpt·p1.00: 413696.000 ns/op 1379 | 1380 | Iteration 17: 76210.087 ±(99.9%) 515.475 ns/op 1381 | arraySumScalarNoOpt·p0.00: 72576.000 ns/op 1382 | arraySumScalarNoOpt·p0.50: 73984.000 ns/op 1383 | arraySumScalarNoOpt·p0.90: 77696.000 ns/op 1384 | arraySumScalarNoOpt·p0.95: 78848.000 ns/op 1385 | arraySumScalarNoOpt·p0.99: 82325.760 ns/op 1386 | arraySumScalarNoOpt·p0.999: 309248.000 ns/op 1387 | arraySumScalarNoOpt·p0.9999: 320588.902 ns/op 1388 | arraySumScalarNoOpt·p1.00: 321536.000 ns/op 1389 | 1390 | Iteration 18: 75861.009 ±(99.9%) 507.004 ns/op 1391 | arraySumScalarNoOpt·p0.00: 72320.000 ns/op 1392 | arraySumScalarNoOpt·p0.50: 73600.000 ns/op 1393 | arraySumScalarNoOpt·p0.90: 77312.000 ns/op 1394 | arraySumScalarNoOpt·p0.95: 78336.000 ns/op 1395 | arraySumScalarNoOpt·p0.99: 81152.000 ns/op 1396 | arraySumScalarNoOpt·p0.999: 305152.000 ns/op 1397 | arraySumScalarNoOpt·p0.9999: 334832.998 ns/op 1398 | arraySumScalarNoOpt·p1.00: 343040.000 ns/op 1399 | 1400 | Iteration 19: 75932.283 ±(99.9%) 505.478 ns/op 1401 | arraySumScalarNoOpt·p0.00: 72448.000 ns/op 1402 | arraySumScalarNoOpt·p0.50: 73728.000 ns/op 1403 | arraySumScalarNoOpt·p0.90: 77440.000 ns/op 1404 | arraySumScalarNoOpt·p0.95: 78592.000 ns/op 1405 | arraySumScalarNoOpt·p0.99: 81625.600 ns/op 1406 | arraySumScalarNoOpt·p0.999: 307712.000 ns/op 1407 | arraySumScalarNoOpt·p0.9999: 326718.464 ns/op 1408 | arraySumScalarNoOpt·p1.00: 327680.000 ns/op 1409 | 1410 | Iteration 20: 75817.891 ±(99.9%) 494.813 ns/op 1411 | arraySumScalarNoOpt·p0.00: 72448.000 ns/op 1412 | arraySumScalarNoOpt·p0.50: 73472.000 ns/op 1413 | arraySumScalarNoOpt·p0.90: 77312.000 ns/op 1414 | arraySumScalarNoOpt·p0.95: 78464.000 ns/op 1415 | arraySumScalarNoOpt·p0.99: 81024.000 ns/op 1416 | arraySumScalarNoOpt·p0.999: 303027.712 ns/op 1417 | arraySumScalarNoOpt·p0.9999: 311513.856 ns/op 1418 | arraySumScalarNoOpt·p1.00: 312320.000 ns/op 1419 | 1420 | Iteration 21: 75881.202 ±(99.9%) 501.499 ns/op 1421 | arraySumScalarNoOpt·p0.00: 72448.000 ns/op 1422 | arraySumScalarNoOpt·p0.50: 73600.000 ns/op 1423 | arraySumScalarNoOpt·p0.90: 77312.000 ns/op 1424 | arraySumScalarNoOpt·p0.95: 78592.000 ns/op 1425 | arraySumScalarNoOpt·p0.99: 81280.000 ns/op 1426 | arraySumScalarNoOpt·p0.999: 303544.832 ns/op 1427 | arraySumScalarNoOpt·p0.9999: 314909.850 ns/op 1428 | arraySumScalarNoOpt·p1.00: 315392.000 ns/op 1429 | 1430 | Iteration 22: 75954.449 ±(99.9%) 502.240 ns/op 1431 | arraySumScalarNoOpt·p0.00: 72448.000 ns/op 1432 | arraySumScalarNoOpt·p0.50: 73728.000 ns/op 1433 | arraySumScalarNoOpt·p0.90: 77312.000 ns/op 1434 | arraySumScalarNoOpt·p0.95: 78464.000 ns/op 1435 | arraySumScalarNoOpt·p0.99: 81408.000 ns/op 1436 | arraySumScalarNoOpt·p0.999: 306683.904 ns/op 1437 | arraySumScalarNoOpt·p0.9999: 321055.846 ns/op 1438 | arraySumScalarNoOpt·p1.00: 321536.000 ns/op 1439 | 1440 | Iteration 23: 75969.634 ±(99.9%) 507.068 ns/op 1441 | arraySumScalarNoOpt·p0.00: 72448.000 ns/op 1442 | arraySumScalarNoOpt·p0.50: 73856.000 ns/op 1443 | arraySumScalarNoOpt·p0.90: 77440.000 ns/op 1444 | arraySumScalarNoOpt·p0.95: 78464.000 ns/op 1445 | arraySumScalarNoOpt·p0.99: 81280.000 ns/op 1446 | arraySumScalarNoOpt·p0.999: 306112.512 ns/op 1447 | arraySumScalarNoOpt·p0.9999: 319424.512 ns/op 1448 | arraySumScalarNoOpt·p1.00: 321024.000 ns/op 1449 | 1450 | Iteration 24: 75807.684 ±(99.9%) 503.474 ns/op 1451 | arraySumScalarNoOpt·p0.00: 72448.000 ns/op 1452 | arraySumScalarNoOpt·p0.50: 73472.000 ns/op 1453 | arraySumScalarNoOpt·p0.90: 77312.000 ns/op 1454 | arraySumScalarNoOpt·p0.95: 78464.000 ns/op 1455 | arraySumScalarNoOpt·p0.99: 81152.000 ns/op 1456 | arraySumScalarNoOpt·p0.999: 306610.688 ns/op 1457 | arraySumScalarNoOpt·p0.9999: 313884.006 ns/op 1458 | arraySumScalarNoOpt·p1.00: 314368.000 ns/op 1459 | 1460 | Iteration 25: 75909.224 ±(99.9%) 517.476 ns/op 1461 | arraySumScalarNoOpt·p0.00: 72448.000 ns/op 1462 | arraySumScalarNoOpt·p0.50: 73472.000 ns/op 1463 | arraySumScalarNoOpt·p0.90: 77312.000 ns/op 1464 | arraySumScalarNoOpt·p0.95: 78336.000 ns/op 1465 | arraySumScalarNoOpt·p0.99: 81920.000 ns/op 1466 | arraySumScalarNoOpt·p0.999: 310066.176 ns/op 1467 | arraySumScalarNoOpt·p0.9999: 353976.832 ns/op 1468 | arraySumScalarNoOpt·p1.00: 367616.000 ns/op 1469 | 1470 | 1471 | 1472 | Result "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt": 1473 | N = 332714 1474 | mean = 74900.506 ±(99.9%) 101.429 ns/op 1475 | 1476 | Histogram, ns/op: 1477 | [ 0.000, 50000.000) = 0 1478 | [ 50000.000, 100000.000) = 330561 1479 | [100000.000, 150000.000) = 55 1480 | [150000.000, 200000.000) = 32 1481 | [200000.000, 250000.000) = 5 1482 | [250000.000, 300000.000) = 1249 1483 | [300000.000, 350000.000) = 802 1484 | [350000.000, 400000.000) = 4 1485 | [400000.000, 450000.000) = 2 1486 | [450000.000, 500000.000) = 1 1487 | [500000.000, 550000.000) = 1 1488 | [550000.000, 600000.000) = 0 1489 | [600000.000, 650000.000) = 0 1490 | [650000.000, 700000.000) = 1 1491 | [700000.000, 750000.000) = 1 1492 | 1493 | Percentiles, ns/op: 1494 | p(0.0000) = 70400.000 ns/op 1495 | p(50.0000) = 73088.000 ns/op 1496 | p(90.0000) = 76544.000 ns/op 1497 | p(95.0000) = 77696.000 ns/op 1498 | p(99.0000) = 80896.000 ns/op 1499 | p(99.9000) = 307200.000 ns/op 1500 | p(99.9900) = 321536.000 ns/op 1501 | p(99.9990) = 522414.413 ns/op 1502 | p(99.9999) = 727040.000 ns/op 1503 | p(100.0000) = 727040.000 ns/op 1504 | 1505 | 1506 | # JMH version: 1.29 1507 | # VM version: JDK 17.0.2, OpenJDK 64-Bit Server VM, 17.0.2+8-86 1508 | # VM invoker: /home/styp/.jdks/openjdk-17.0.2/bin/java 1509 | # VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/home/styp/Desktop/java-vbench/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant --add-modules jdk.incubator.vector -XX:-UseSuperWord 1510 | # Blackhole mode: full + dont-inline hint 1511 | # Warmup: 1 iterations, 1 s each, 10 calls per op 1512 | # Measurement: 25 iterations, 10 s each 1513 | # Timeout: 10 min per iteration 1514 | # Threads: 1 thread, ***WARNING: Synchronize iterations are disabled!*** 1515 | # Benchmark mode: Sampling time 1516 | # Benchmark: ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 1517 | # Parameters: (LENGTH = 16777215) 1518 | 1519 | # Run progress: 71.43% complete, ETA 00:08:22 1520 | # Fork: 1 of 1 1521 | WARNING: Using incubator modules: jdk.incubator.vector 1522 | # Warmup Iteration 1: 23330816.000 ±(99.9%) 98949795.609 ns/op 1523 | Iteration 1: 1796350.575 ±(99.9%) 27752.144 ns/op 1524 | arraySumScalarNoOpt·p0.00: 1732608.000 ns/op 1525 | arraySumScalarNoOpt·p0.50: 1753088.000 ns/op 1526 | arraySumScalarNoOpt·p0.90: 1777664.000 ns/op 1527 | arraySumScalarNoOpt·p0.95: 2019328.000 ns/op 1528 | arraySumScalarNoOpt·p0.99: 3328409.600 ns/op 1529 | arraySumScalarNoOpt·p0.999: 3354624.000 ns/op 1530 | arraySumScalarNoOpt·p0.9999: 3354624.000 ns/op 1531 | arraySumScalarNoOpt·p1.00: 3354624.000 ns/op 1532 | 1533 | Iteration 2: 1775796.374 ±(99.9%) 9997.700 ns/op 1534 | arraySumScalarNoOpt·p0.00: 1732608.000 ns/op 1535 | arraySumScalarNoOpt·p0.50: 1755136.000 ns/op 1536 | arraySumScalarNoOpt·p0.90: 1791180.800 ns/op 1537 | arraySumScalarNoOpt·p0.95: 2002329.600 ns/op 1538 | arraySumScalarNoOpt·p0.99: 2028625.920 ns/op 1539 | arraySumScalarNoOpt·p0.999: 2082816.000 ns/op 1540 | arraySumScalarNoOpt·p0.9999: 2082816.000 ns/op 1541 | arraySumScalarNoOpt·p1.00: 2082816.000 ns/op 1542 | 1543 | Iteration 3: 1772776.557 ±(99.9%) 10005.827 ns/op 1544 | arraySumScalarNoOpt·p0.00: 1730560.000 ns/op 1545 | arraySumScalarNoOpt·p0.50: 1751040.000 ns/op 1546 | arraySumScalarNoOpt·p0.90: 1773568.000 ns/op 1547 | arraySumScalarNoOpt·p0.95: 1994035.200 ns/op 1548 | arraySumScalarNoOpt·p0.99: 2016317.440 ns/op 1549 | arraySumScalarNoOpt·p0.999: 2025472.000 ns/op 1550 | arraySumScalarNoOpt·p0.9999: 2025472.000 ns/op 1551 | arraySumScalarNoOpt·p1.00: 2025472.000 ns/op 1552 | 1553 | Iteration 4: 1774783.297 ±(99.9%) 10206.280 ns/op 1554 | arraySumScalarNoOpt·p0.00: 1732608.000 ns/op 1555 | arraySumScalarNoOpt·p0.50: 1751040.000 ns/op 1556 | arraySumScalarNoOpt·p0.90: 1783603.200 ns/op 1557 | arraySumScalarNoOpt·p0.95: 1998131.200 ns/op 1558 | arraySumScalarNoOpt·p0.99: 2014269.440 ns/op 1559 | arraySumScalarNoOpt·p0.999: 2021376.000 ns/op 1560 | arraySumScalarNoOpt·p0.9999: 2021376.000 ns/op 1561 | arraySumScalarNoOpt·p1.00: 2021376.000 ns/op 1562 | 1563 | Iteration 5: 1778637.270 ±(99.9%) 10260.370 ns/op 1564 | arraySumScalarNoOpt·p0.00: 1736704.000 ns/op 1565 | arraySumScalarNoOpt·p0.50: 1755136.000 ns/op 1566 | arraySumScalarNoOpt·p0.90: 1779712.000 ns/op 1567 | arraySumScalarNoOpt·p0.95: 2002944.000 ns/op 1568 | arraySumScalarNoOpt·p0.99: 2021376.000 ns/op 1569 | arraySumScalarNoOpt·p0.999: 2025472.000 ns/op 1570 | arraySumScalarNoOpt·p0.9999: 2025472.000 ns/op 1571 | arraySumScalarNoOpt·p1.00: 2025472.000 ns/op 1572 | 1573 | Iteration 6: 1807894.925 ±(99.9%) 12499.387 ns/op 1574 | arraySumScalarNoOpt·p0.00: 1757184.000 ns/op 1575 | arraySumScalarNoOpt·p0.50: 1783808.000 ns/op 1576 | arraySumScalarNoOpt·p0.90: 1817190.400 ns/op 1577 | arraySumScalarNoOpt·p0.95: 2025779.200 ns/op 1578 | arraySumScalarNoOpt·p0.99: 2041856.000 ns/op 1579 | arraySumScalarNoOpt·p0.999: 3006464.000 ns/op 1580 | arraySumScalarNoOpt·p0.9999: 3006464.000 ns/op 1581 | arraySumScalarNoOpt·p1.00: 3006464.000 ns/op 1582 | 1583 | Iteration 7: 1815532.824 ±(99.9%) 10632.596 ns/op 1584 | arraySumScalarNoOpt·p0.00: 1769472.000 ns/op 1585 | arraySumScalarNoOpt·p0.50: 1792000.000 ns/op 1586 | arraySumScalarNoOpt·p0.90: 1916928.000 ns/op 1587 | arraySumScalarNoOpt·p0.95: 2036224.000 ns/op 1588 | arraySumScalarNoOpt·p0.99: 2054144.000 ns/op 1589 | arraySumScalarNoOpt·p0.999: 2076672.000 ns/op 1590 | arraySumScalarNoOpt·p0.9999: 2076672.000 ns/op 1591 | arraySumScalarNoOpt·p1.00: 2076672.000 ns/op 1592 | 1593 | Iteration 8: 1806045.612 ±(99.9%) 10510.681 ns/op 1594 | arraySumScalarNoOpt·p0.00: 1759232.000 ns/op 1595 | arraySumScalarNoOpt·p0.50: 1781760.000 ns/op 1596 | arraySumScalarNoOpt·p0.90: 1877401.600 ns/op 1597 | arraySumScalarNoOpt·p0.95: 2025472.000 ns/op 1598 | arraySumScalarNoOpt·p0.99: 2045194.240 ns/op 1599 | arraySumScalarNoOpt·p0.999: 2052096.000 ns/op 1600 | arraySumScalarNoOpt·p0.9999: 2052096.000 ns/op 1601 | arraySumScalarNoOpt·p1.00: 2052096.000 ns/op 1602 | 1603 | Iteration 9: 1803052.335 ±(99.9%) 10554.621 ns/op 1604 | arraySumScalarNoOpt·p0.00: 1755136.000 ns/op 1605 | arraySumScalarNoOpt·p0.50: 1779712.000 ns/op 1606 | arraySumScalarNoOpt·p0.90: 1856716.800 ns/op 1607 | arraySumScalarNoOpt·p0.95: 2023424.000 ns/op 1608 | arraySumScalarNoOpt·p0.99: 2035712.000 ns/op 1609 | arraySumScalarNoOpt·p0.999: 2054144.000 ns/op 1610 | arraySumScalarNoOpt·p0.9999: 2054144.000 ns/op 1611 | arraySumScalarNoOpt·p1.00: 2054144.000 ns/op 1612 | 1613 | Iteration 10: 1814255.700 ±(99.9%) 10342.558 ns/op 1614 | arraySumScalarNoOpt·p0.00: 1767424.000 ns/op 1615 | arraySumScalarNoOpt·p0.50: 1789952.000 ns/op 1616 | arraySumScalarNoOpt·p0.90: 2001920.000 ns/op 1617 | arraySumScalarNoOpt·p0.95: 2027520.000 ns/op 1618 | arraySumScalarNoOpt·p0.99: 2043904.000 ns/op 1619 | arraySumScalarNoOpt·p0.999: 2050048.000 ns/op 1620 | arraySumScalarNoOpt·p0.9999: 2050048.000 ns/op 1621 | arraySumScalarNoOpt·p1.00: 2050048.000 ns/op 1622 | 1623 | Iteration 11: 1807065.791 ±(99.9%) 10853.978 ns/op 1624 | arraySumScalarNoOpt·p0.00: 1763328.000 ns/op 1625 | arraySumScalarNoOpt·p0.50: 1781760.000 ns/op 1626 | arraySumScalarNoOpt·p0.90: 2002739.200 ns/op 1627 | arraySumScalarNoOpt·p0.95: 2023424.000 ns/op 1628 | arraySumScalarNoOpt·p0.99: 2037002.240 ns/op 1629 | arraySumScalarNoOpt·p0.999: 2043904.000 ns/op 1630 | arraySumScalarNoOpt·p0.9999: 2043904.000 ns/op 1631 | arraySumScalarNoOpt·p1.00: 2043904.000 ns/op 1632 | 1633 | Iteration 12: 1819115.827 ±(99.9%) 11396.072 ns/op 1634 | arraySumScalarNoOpt·p0.00: 1767424.000 ns/op 1635 | arraySumScalarNoOpt·p0.50: 1792000.000 ns/op 1636 | arraySumScalarNoOpt·p0.90: 2014412.800 ns/op 1637 | arraySumScalarNoOpt·p0.95: 2027520.000 ns/op 1638 | arraySumScalarNoOpt·p0.99: 2043207.680 ns/op 1639 | arraySumScalarNoOpt·p0.999: 2486272.000 ns/op 1640 | arraySumScalarNoOpt·p0.9999: 2486272.000 ns/op 1641 | arraySumScalarNoOpt·p1.00: 2486272.000 ns/op 1642 | 1643 | Iteration 13: 1810757.383 ±(99.9%) 11064.149 ns/op 1644 | arraySumScalarNoOpt·p0.00: 1767424.000 ns/op 1645 | arraySumScalarNoOpt·p0.50: 1783808.000 ns/op 1646 | arraySumScalarNoOpt·p0.90: 2015232.000 ns/op 1647 | arraySumScalarNoOpt·p0.95: 2027520.000 ns/op 1648 | arraySumScalarNoOpt·p0.99: 2050621.440 ns/op 1649 | arraySumScalarNoOpt·p0.999: 2062336.000 ns/op 1650 | arraySumScalarNoOpt·p0.9999: 2062336.000 ns/op 1651 | arraySumScalarNoOpt·p1.00: 2062336.000 ns/op 1652 | 1653 | Iteration 14: 1824621.439 ±(99.9%) 10905.007 ns/op 1654 | arraySumScalarNoOpt·p0.00: 1775616.000 ns/op 1655 | arraySumScalarNoOpt·p0.50: 1798144.000 ns/op 1656 | arraySumScalarNoOpt·p0.90: 2021376.000 ns/op 1657 | arraySumScalarNoOpt·p0.95: 2039808.000 ns/op 1658 | arraySumScalarNoOpt·p0.99: 2052096.000 ns/op 1659 | arraySumScalarNoOpt·p0.999: 2062336.000 ns/op 1660 | arraySumScalarNoOpt·p0.9999: 2062336.000 ns/op 1661 | arraySumScalarNoOpt·p1.00: 2062336.000 ns/op 1662 | 1663 | Iteration 15: 1840147.468 ±(99.9%) 10768.291 ns/op 1664 | arraySumScalarNoOpt·p0.00: 1792000.000 ns/op 1665 | arraySumScalarNoOpt·p0.50: 1814528.000 ns/op 1666 | arraySumScalarNoOpt·p0.90: 2036326.400 ns/op 1667 | arraySumScalarNoOpt·p0.95: 2050048.000 ns/op 1668 | arraySumScalarNoOpt·p0.99: 2065326.080 ns/op 1669 | arraySumScalarNoOpt·p0.999: 2074624.000 ns/op 1670 | arraySumScalarNoOpt·p0.9999: 2074624.000 ns/op 1671 | arraySumScalarNoOpt·p1.00: 2074624.000 ns/op 1672 | 1673 | Iteration 16: 1818827.647 ±(99.9%) 11575.614 ns/op 1674 | arraySumScalarNoOpt·p0.00: 1765376.000 ns/op 1675 | arraySumScalarNoOpt·p0.50: 1792000.000 ns/op 1676 | arraySumScalarNoOpt·p0.90: 2010316.800 ns/op 1677 | arraySumScalarNoOpt·p0.95: 2029568.000 ns/op 1678 | arraySumScalarNoOpt·p0.99: 2047303.680 ns/op 1679 | arraySumScalarNoOpt·p0.999: 2519040.000 ns/op 1680 | arraySumScalarNoOpt·p0.9999: 2519040.000 ns/op 1681 | arraySumScalarNoOpt·p1.00: 2519040.000 ns/op 1682 | 1683 | Iteration 17: 1811741.189 ±(99.9%) 11300.289 ns/op 1684 | arraySumScalarNoOpt·p0.00: 1767424.000 ns/op 1685 | arraySumScalarNoOpt·p0.50: 1783808.000 ns/op 1686 | arraySumScalarNoOpt·p0.90: 2015232.000 ns/op 1687 | arraySumScalarNoOpt·p0.95: 2033664.000 ns/op 1688 | arraySumScalarNoOpt·p0.99: 2050048.000 ns/op 1689 | arraySumScalarNoOpt·p0.999: 2105344.000 ns/op 1690 | arraySumScalarNoOpt·p0.9999: 2105344.000 ns/op 1691 | arraySumScalarNoOpt·p1.00: 2105344.000 ns/op 1692 | 1693 | Iteration 18: 1825288.678 ±(99.9%) 11043.366 ns/op 1694 | arraySumScalarNoOpt·p0.00: 1775616.000 ns/op 1695 | arraySumScalarNoOpt·p0.50: 1798144.000 ns/op 1696 | arraySumScalarNoOpt·p0.90: 2027520.000 ns/op 1697 | arraySumScalarNoOpt·p0.95: 2041856.000 ns/op 1698 | arraySumScalarNoOpt·p0.99: 2056192.000 ns/op 1699 | arraySumScalarNoOpt·p0.999: 2070528.000 ns/op 1700 | arraySumScalarNoOpt·p0.9999: 2070528.000 ns/op 1701 | arraySumScalarNoOpt·p1.00: 2070528.000 ns/op 1702 | 1703 | Iteration 19: 1841560.821 ±(99.9%) 11858.458 ns/op 1704 | arraySumScalarNoOpt·p0.00: 1792000.000 ns/op 1705 | arraySumScalarNoOpt·p0.50: 1814528.000 ns/op 1706 | arraySumScalarNoOpt·p0.90: 2040422.400 ns/op 1707 | arraySumScalarNoOpt·p0.95: 2056192.000 ns/op 1708 | arraySumScalarNoOpt·p0.99: 2069975.040 ns/op 1709 | arraySumScalarNoOpt·p0.999: 2592768.000 ns/op 1710 | arraySumScalarNoOpt·p0.9999: 2592768.000 ns/op 1711 | arraySumScalarNoOpt·p1.00: 2592768.000 ns/op 1712 | 1713 | Iteration 20: 1817490.492 ±(99.9%) 11062.118 ns/op 1714 | arraySumScalarNoOpt·p0.00: 1767424.000 ns/op 1715 | arraySumScalarNoOpt·p0.50: 1789952.000 ns/op 1716 | arraySumScalarNoOpt·p0.90: 2015232.000 ns/op 1717 | arraySumScalarNoOpt·p0.95: 2033664.000 ns/op 1718 | arraySumScalarNoOpt·p0.99: 2051399.680 ns/op 1719 | arraySumScalarNoOpt·p0.999: 2058240.000 ns/op 1720 | arraySumScalarNoOpt·p0.9999: 2058240.000 ns/op 1721 | arraySumScalarNoOpt·p1.00: 2058240.000 ns/op 1722 | 1723 | Iteration 21: 1808456.597 ±(99.9%) 11305.127 ns/op 1724 | arraySumScalarNoOpt·p0.00: 1759232.000 ns/op 1725 | arraySumScalarNoOpt·p0.50: 1781760.000 ns/op 1726 | arraySumScalarNoOpt·p0.90: 2017280.000 ns/op 1727 | arraySumScalarNoOpt·p0.95: 2033664.000 ns/op 1728 | arraySumScalarNoOpt·p0.99: 2045194.240 ns/op 1729 | arraySumScalarNoOpt·p0.999: 2056192.000 ns/op 1730 | arraySumScalarNoOpt·p0.9999: 2056192.000 ns/op 1731 | arraySumScalarNoOpt·p1.00: 2056192.000 ns/op 1732 | 1733 | Iteration 22: 1806553.791 ±(99.9%) 11194.187 ns/op 1734 | arraySumScalarNoOpt·p0.00: 1757184.000 ns/op 1735 | arraySumScalarNoOpt·p0.50: 1779712.000 ns/op 1736 | arraySumScalarNoOpt·p0.90: 2013184.000 ns/op 1737 | arraySumScalarNoOpt·p0.95: 2027520.000 ns/op 1738 | arraySumScalarNoOpt·p0.99: 2043904.000 ns/op 1739 | arraySumScalarNoOpt·p0.999: 2056192.000 ns/op 1740 | arraySumScalarNoOpt·p0.9999: 2056192.000 ns/op 1741 | arraySumScalarNoOpt·p1.00: 2056192.000 ns/op 1742 | 1743 | Iteration 23: 1821853.835 ±(99.9%) 10850.301 ns/op 1744 | arraySumScalarNoOpt·p0.00: 1771520.000 ns/op 1745 | arraySumScalarNoOpt·p0.50: 1796096.000 ns/op 1746 | arraySumScalarNoOpt·p0.90: 2019328.000 ns/op 1747 | arraySumScalarNoOpt·p0.95: 2034380.800 ns/op 1748 | arraySumScalarNoOpt·p0.99: 2054164.480 ns/op 1749 | arraySumScalarNoOpt·p0.999: 2064384.000 ns/op 1750 | arraySumScalarNoOpt·p0.9999: 2064384.000 ns/op 1751 | arraySumScalarNoOpt·p1.00: 2064384.000 ns/op 1752 | 1753 | Iteration 24: 1841241.551 ±(99.9%) 10907.731 ns/op 1754 | arraySumScalarNoOpt·p0.00: 1792000.000 ns/op 1755 | arraySumScalarNoOpt·p0.50: 1816576.000 ns/op 1756 | arraySumScalarNoOpt·p0.90: 2038374.400 ns/op 1757 | arraySumScalarNoOpt·p0.95: 2058240.000 ns/op 1758 | arraySumScalarNoOpt·p0.99: 2071470.080 ns/op 1759 | arraySumScalarNoOpt·p0.999: 2076672.000 ns/op 1760 | arraySumScalarNoOpt·p0.9999: 2076672.000 ns/op 1761 | arraySumScalarNoOpt·p1.00: 2076672.000 ns/op 1762 | 1763 | Iteration 25: 1817340.638 ±(99.9%) 10851.495 ns/op 1764 | arraySumScalarNoOpt·p0.00: 1769472.000 ns/op 1765 | arraySumScalarNoOpt·p0.50: 1789952.000 ns/op 1766 | arraySumScalarNoOpt·p0.90: 2015232.000 ns/op 1767 | arraySumScalarNoOpt·p0.95: 2029568.000 ns/op 1768 | arraySumScalarNoOpt·p0.99: 2041856.000 ns/op 1769 | arraySumScalarNoOpt·p0.999: 2056192.000 ns/op 1770 | arraySumScalarNoOpt·p0.9999: 2056192.000 ns/op 1771 | arraySumScalarNoOpt·p1.00: 2056192.000 ns/op 1772 | 1773 | 1774 | 1775 | Result "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt": 1776 | N = 13380 1777 | mean = 1810092.963 ±(99.9%) 2458.668 ns/op 1778 | 1779 | Histogram, ns/op: 1780 | [1000000.000, 1250000.000) = 0 1781 | [1250000.000, 1500000.000) = 0 1782 | [1500000.000, 1750000.000) = 1033 1783 | [1750000.000, 2000000.000) = 11053 1784 | [2000000.000, 2250000.000) = 1281 1785 | [2250000.000, 2500000.000) = 2 1786 | [2500000.000, 2750000.000) = 3 1787 | [2750000.000, 3000000.000) = 0 1788 | [3000000.000, 3250000.000) = 2 1789 | [3250000.000, 3500000.000) = 6 1790 | [3500000.000, 3750000.000) = 0 1791 | 1792 | Percentiles, ns/op: 1793 | p(0.0000) = 1730560.000 ns/op 1794 | p(50.0000) = 1787904.000 ns/op 1795 | p(90.0000) = 1992704.000 ns/op 1796 | p(95.0000) = 2029568.000 ns/op 1797 | p(99.0000) = 2054144.000 ns/op 1798 | p(99.9000) = 2254356.480 ns/op 1799 | p(99.9900) = 3350469.427 ns/op 1800 | p(99.9990) = 3354624.000 ns/op 1801 | p(99.9999) = 3354624.000 ns/op 1802 | p(100.0000) = 3354624.000 ns/op 1803 | 1804 | 1805 | # JMH version: 1.29 1806 | # VM version: JDK 17.0.2, OpenJDK 64-Bit Server VM, 17.0.2+8-86 1807 | # VM invoker: /home/styp/.jdks/openjdk-17.0.2/bin/java 1808 | # VM options: -Dfile.encoding=UTF-8 -Djava.io.tmpdir=/home/styp/Desktop/java-vbench/build/tmp/jmh -Duser.country=US -Duser.language=en -Duser.variant --add-modules jdk.incubator.vector -XX:-UseSuperWord 1809 | # Blackhole mode: full + dont-inline hint 1810 | # Warmup: 1 iterations, 1 s each, 10 calls per op 1811 | # Measurement: 25 iterations, 10 s each 1812 | # Timeout: 10 min per iteration 1813 | # Threads: 1 thread, ***WARNING: Synchronize iterations are disabled!*** 1814 | # Benchmark mode: Sampling time 1815 | # Benchmark: ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 1816 | # Parameters: (LENGTH = 268435455) 1817 | 1818 | # Run progress: 85.71% complete, ETA 00:04:11 1819 | # Fork: 1 of 1 1820 | WARNING: Using incubator modules: jdk.incubator.vector 1821 | # Warmup Iteration 1: 343408640.000 ns/op 1822 | Iteration 1: 28833912.471 ±(99.9%) 122272.898 ns/op 1823 | arraySumScalarNoOpt·p0.00: 28737536.000 ns/op 1824 | arraySumScalarNoOpt·p0.50: 28803072.000 ns/op 1825 | arraySumScalarNoOpt·p0.90: 28980019.200 ns/op 1826 | arraySumScalarNoOpt·p0.95: 29294592.000 ns/op 1827 | arraySumScalarNoOpt·p0.99: 29294592.000 ns/op 1828 | arraySumScalarNoOpt·p0.999: 29294592.000 ns/op 1829 | arraySumScalarNoOpt·p0.9999: 29294592.000 ns/op 1830 | arraySumScalarNoOpt·p1.00: 29294592.000 ns/op 1831 | 1832 | Iteration 2: 28795790.222 ±(99.9%) 46451.134 ns/op 1833 | arraySumScalarNoOpt·p0.00: 28737536.000 ns/op 1834 | arraySumScalarNoOpt·p0.50: 28786688.000 ns/op 1835 | arraySumScalarNoOpt·p0.90: 28875161.600 ns/op 1836 | arraySumScalarNoOpt·p0.95: 28934144.000 ns/op 1837 | arraySumScalarNoOpt·p0.99: 28934144.000 ns/op 1838 | arraySumScalarNoOpt·p0.999: 28934144.000 ns/op 1839 | arraySumScalarNoOpt·p0.9999: 28934144.000 ns/op 1840 | arraySumScalarNoOpt·p1.00: 28934144.000 ns/op 1841 | 1842 | Iteration 3: 28597361.778 ±(99.9%) 79169.271 ns/op 1843 | arraySumScalarNoOpt·p0.00: 28475392.000 ns/op 1844 | arraySumScalarNoOpt·p0.50: 28590080.000 ns/op 1845 | arraySumScalarNoOpt·p0.90: 28773580.800 ns/op 1846 | arraySumScalarNoOpt·p0.95: 28803072.000 ns/op 1847 | arraySumScalarNoOpt·p0.99: 28803072.000 ns/op 1848 | arraySumScalarNoOpt·p0.999: 28803072.000 ns/op 1849 | arraySumScalarNoOpt·p0.9999: 28803072.000 ns/op 1850 | arraySumScalarNoOpt·p1.00: 28803072.000 ns/op 1851 | 1852 | Iteration 4: 28817635.556 ±(99.9%) 44839.619 ns/op 1853 | arraySumScalarNoOpt·p0.00: 28770304.000 ns/op 1854 | arraySumScalarNoOpt·p0.50: 28803072.000 ns/op 1855 | arraySumScalarNoOpt·p0.90: 28901376.000 ns/op 1856 | arraySumScalarNoOpt·p0.95: 28901376.000 ns/op 1857 | arraySumScalarNoOpt·p0.99: 28901376.000 ns/op 1858 | arraySumScalarNoOpt·p0.999: 28901376.000 ns/op 1859 | arraySumScalarNoOpt·p0.9999: 28901376.000 ns/op 1860 | arraySumScalarNoOpt·p1.00: 28901376.000 ns/op 1861 | 1862 | Iteration 5: 28657436.444 ±(99.9%) 228986.359 ns/op 1863 | arraySumScalarNoOpt·p0.00: 28409856.000 ns/op 1864 | arraySumScalarNoOpt·p0.50: 28606464.000 ns/op 1865 | arraySumScalarNoOpt·p0.90: 28891545.600 ns/op 1866 | arraySumScalarNoOpt·p0.95: 29392896.000 ns/op 1867 | arraySumScalarNoOpt·p0.99: 29392896.000 ns/op 1868 | arraySumScalarNoOpt·p0.999: 29392896.000 ns/op 1869 | arraySumScalarNoOpt·p0.9999: 29392896.000 ns/op 1870 | arraySumScalarNoOpt·p1.00: 29392896.000 ns/op 1871 | 1872 | Iteration 6: 28617386.667 ±(99.9%) 21008.316 ns/op 1873 | arraySumScalarNoOpt·p0.00: 28573696.000 ns/op 1874 | arraySumScalarNoOpt·p0.50: 28606464.000 ns/op 1875 | arraySumScalarNoOpt·p0.90: 28642508.800 ns/op 1876 | arraySumScalarNoOpt·p0.95: 28672000.000 ns/op 1877 | arraySumScalarNoOpt·p0.99: 28672000.000 ns/op 1878 | arraySumScalarNoOpt·p0.999: 28672000.000 ns/op 1879 | arraySumScalarNoOpt·p0.9999: 28672000.000 ns/op 1880 | arraySumScalarNoOpt·p1.00: 28672000.000 ns/op 1881 | 1882 | Iteration 7: 28817635.556 ±(99.9%) 21583.993 ns/op 1883 | arraySumScalarNoOpt·p0.00: 28770304.000 ns/op 1884 | arraySumScalarNoOpt·p0.50: 28803072.000 ns/op 1885 | arraySumScalarNoOpt·p0.90: 28839116.800 ns/op 1886 | arraySumScalarNoOpt·p0.95: 28868608.000 ns/op 1887 | arraySumScalarNoOpt·p0.99: 28868608.000 ns/op 1888 | arraySumScalarNoOpt·p0.999: 28868608.000 ns/op 1889 | arraySumScalarNoOpt·p0.9999: 28868608.000 ns/op 1890 | arraySumScalarNoOpt·p1.00: 28868608.000 ns/op 1891 | 1892 | Iteration 8: 28784867.556 ±(99.9%) 54805.359 ns/op 1893 | arraySumScalarNoOpt·p0.00: 28704768.000 ns/op 1894 | arraySumScalarNoOpt·p0.50: 28786688.000 ns/op 1895 | arraySumScalarNoOpt·p0.90: 28871884.800 ns/op 1896 | arraySumScalarNoOpt·p0.95: 28901376.000 ns/op 1897 | arraySumScalarNoOpt·p0.99: 28901376.000 ns/op 1898 | arraySumScalarNoOpt·p0.999: 28901376.000 ns/op 1899 | arraySumScalarNoOpt·p0.9999: 28901376.000 ns/op 1900 | arraySumScalarNoOpt·p1.00: 28901376.000 ns/op 1901 | 1902 | Iteration 9: 28586439.111 ±(99.9%) 149712.402 ns/op 1903 | arraySumScalarNoOpt·p0.00: 28475392.000 ns/op 1904 | arraySumScalarNoOpt·p0.50: 28540928.000 ns/op 1905 | arraySumScalarNoOpt·p0.90: 28724428.800 ns/op 1906 | arraySumScalarNoOpt·p0.95: 29196288.000 ns/op 1907 | arraySumScalarNoOpt·p0.99: 29196288.000 ns/op 1908 | arraySumScalarNoOpt·p0.999: 29196288.000 ns/op 1909 | arraySumScalarNoOpt·p0.9999: 29196288.000 ns/op 1910 | arraySumScalarNoOpt·p1.00: 29196288.000 ns/op 1911 | 1912 | Iteration 10: 28673820.444 ±(99.9%) 22213.804 ns/op 1913 | arraySumScalarNoOpt·p0.00: 28606464.000 ns/op 1914 | arraySumScalarNoOpt·p0.50: 28672000.000 ns/op 1915 | arraySumScalarNoOpt·p0.90: 28704768.000 ns/op 1916 | arraySumScalarNoOpt·p0.95: 28704768.000 ns/op 1917 | arraySumScalarNoOpt·p0.99: 28704768.000 ns/op 1918 | arraySumScalarNoOpt·p0.999: 28704768.000 ns/op 1919 | arraySumScalarNoOpt·p0.9999: 28704768.000 ns/op 1920 | arraySumScalarNoOpt·p1.00: 28704768.000 ns/op 1921 | 1922 | Iteration 11: 28750279.111 ±(99.9%) 31754.695 ns/op 1923 | arraySumScalarNoOpt·p0.00: 28672000.000 ns/op 1924 | arraySumScalarNoOpt·p0.50: 28753920.000 ns/op 1925 | arraySumScalarNoOpt·p0.90: 28803072.000 ns/op 1926 | arraySumScalarNoOpt·p0.95: 28803072.000 ns/op 1927 | arraySumScalarNoOpt·p0.99: 28803072.000 ns/op 1928 | arraySumScalarNoOpt·p0.999: 28803072.000 ns/op 1929 | arraySumScalarNoOpt·p0.9999: 28803072.000 ns/op 1930 | arraySumScalarNoOpt·p1.00: 28803072.000 ns/op 1931 | 1932 | Iteration 12: 28559132.444 ±(99.9%) 49517.075 ns/op 1933 | arraySumScalarNoOpt·p0.00: 28508160.000 ns/op 1934 | arraySumScalarNoOpt·p0.50: 28540928.000 ns/op 1935 | arraySumScalarNoOpt·p0.90: 28619571.200 ns/op 1936 | arraySumScalarNoOpt·p0.95: 28737536.000 ns/op 1937 | arraySumScalarNoOpt·p0.99: 28737536.000 ns/op 1938 | arraySumScalarNoOpt·p0.999: 28737536.000 ns/op 1939 | arraySumScalarNoOpt·p0.9999: 28737536.000 ns/op 1940 | arraySumScalarNoOpt·p1.00: 28737536.000 ns/op 1941 | 1942 | Iteration 13: 28653795.556 ±(99.9%) 26201.973 ns/op 1943 | arraySumScalarNoOpt·p0.00: 28606464.000 ns/op 1944 | arraySumScalarNoOpt·p0.50: 28639232.000 ns/op 1945 | arraySumScalarNoOpt·p0.90: 28678553.600 ns/op 1946 | arraySumScalarNoOpt·p0.95: 28737536.000 ns/op 1947 | arraySumScalarNoOpt·p0.99: 28737536.000 ns/op 1948 | arraySumScalarNoOpt·p0.999: 28737536.000 ns/op 1949 | arraySumScalarNoOpt·p0.9999: 28737536.000 ns/op 1950 | arraySumScalarNoOpt·p1.00: 28737536.000 ns/op 1951 | 1952 | Iteration 14: 28730254.222 ±(99.9%) 37220.243 ns/op 1953 | arraySumScalarNoOpt·p0.00: 28672000.000 ns/op 1954 | arraySumScalarNoOpt·p0.50: 28737536.000 ns/op 1955 | arraySumScalarNoOpt·p0.90: 28803072.000 ns/op 1956 | arraySumScalarNoOpt·p0.95: 28803072.000 ns/op 1957 | arraySumScalarNoOpt·p0.99: 28803072.000 ns/op 1958 | arraySumScalarNoOpt·p0.999: 28803072.000 ns/op 1959 | arraySumScalarNoOpt·p0.9999: 28803072.000 ns/op 1960 | arraySumScalarNoOpt·p1.00: 28803072.000 ns/op 1961 | 1962 | Iteration 15: 28584618.667 ±(99.9%) 45786.563 ns/op 1963 | arraySumScalarNoOpt·p0.00: 28508160.000 ns/op 1964 | arraySumScalarNoOpt·p0.50: 28573696.000 ns/op 1965 | arraySumScalarNoOpt·p0.90: 28642508.800 ns/op 1966 | arraySumScalarNoOpt·p0.95: 28672000.000 ns/op 1967 | arraySumScalarNoOpt·p0.99: 28672000.000 ns/op 1968 | arraySumScalarNoOpt·p0.999: 28672000.000 ns/op 1969 | arraySumScalarNoOpt·p0.9999: 28672000.000 ns/op 1970 | arraySumScalarNoOpt·p1.00: 28672000.000 ns/op 1971 | 1972 | Iteration 16: 29050652.444 ±(99.9%) 1815434.016 ns/op 1973 | arraySumScalarNoOpt·p0.00: 28475392.000 ns/op 1974 | arraySumScalarNoOpt·p0.50: 28606464.000 ns/op 1975 | arraySumScalarNoOpt·p0.90: 29517414.400 ns/op 1976 | arraySumScalarNoOpt·p0.95: 36831232.000 ns/op 1977 | arraySumScalarNoOpt·p0.99: 36831232.000 ns/op 1978 | arraySumScalarNoOpt·p0.999: 36831232.000 ns/op 1979 | arraySumScalarNoOpt·p0.9999: 36831232.000 ns/op 1980 | arraySumScalarNoOpt·p1.00: 36831232.000 ns/op 1981 | 1982 | Iteration 17: 28753920.000 ±(99.9%) 31947.149 ns/op 1983 | arraySumScalarNoOpt·p0.00: 28704768.000 ns/op 1984 | arraySumScalarNoOpt·p0.50: 28753920.000 ns/op 1985 | arraySumScalarNoOpt·p0.90: 28806348.800 ns/op 1986 | arraySumScalarNoOpt·p0.95: 28835840.000 ns/op 1987 | arraySumScalarNoOpt·p0.99: 28835840.000 ns/op 1988 | arraySumScalarNoOpt·p0.999: 28835840.000 ns/op 1989 | arraySumScalarNoOpt·p0.9999: 28835840.000 ns/op 1990 | arraySumScalarNoOpt·p1.00: 28835840.000 ns/op 1991 | 1992 | Iteration 18: 28599182.222 ±(99.9%) 34127.289 ns/op 1993 | arraySumScalarNoOpt·p0.00: 28540928.000 ns/op 1994 | arraySumScalarNoOpt·p0.50: 28606464.000 ns/op 1995 | arraySumScalarNoOpt·p0.90: 28672000.000 ns/op 1996 | arraySumScalarNoOpt·p0.95: 28672000.000 ns/op 1997 | arraySumScalarNoOpt·p0.99: 28672000.000 ns/op 1998 | arraySumScalarNoOpt·p0.999: 28672000.000 ns/op 1999 | arraySumScalarNoOpt·p0.9999: 28672000.000 ns/op 2000 | arraySumScalarNoOpt·p1.00: 28672000.000 ns/op 2001 | 2002 | Iteration 19: 28803072.000 ±(99.9%) 36387.470 ns/op 2003 | arraySumScalarNoOpt·p0.00: 28737536.000 ns/op 2004 | arraySumScalarNoOpt·p0.50: 28803072.000 ns/op 2005 | arraySumScalarNoOpt·p0.90: 28868608.000 ns/op 2006 | arraySumScalarNoOpt·p0.95: 28868608.000 ns/op 2007 | arraySumScalarNoOpt·p0.99: 28868608.000 ns/op 2008 | arraySumScalarNoOpt·p0.999: 28868608.000 ns/op 2009 | arraySumScalarNoOpt·p0.9999: 28868608.000 ns/op 2010 | arraySumScalarNoOpt·p1.00: 28868608.000 ns/op 2011 | 2012 | Iteration 20: 28753920.000 ±(99.9%) 28283.311 ns/op 2013 | arraySumScalarNoOpt·p0.00: 28704768.000 ns/op 2014 | arraySumScalarNoOpt·p0.50: 28737536.000 ns/op 2015 | arraySumScalarNoOpt·p0.90: 28803072.000 ns/op 2016 | arraySumScalarNoOpt·p0.95: 28803072.000 ns/op 2017 | arraySumScalarNoOpt·p0.99: 28803072.000 ns/op 2018 | arraySumScalarNoOpt·p0.999: 28803072.000 ns/op 2019 | arraySumScalarNoOpt·p0.9999: 28803072.000 ns/op 2020 | arraySumScalarNoOpt·p1.00: 28803072.000 ns/op 2021 | 2022 | Iteration 21: 28610104.889 ±(99.9%) 31317.348 ns/op 2023 | arraySumScalarNoOpt·p0.00: 28540928.000 ns/op 2024 | arraySumScalarNoOpt·p0.50: 28606464.000 ns/op 2025 | arraySumScalarNoOpt·p0.90: 28642508.800 ns/op 2026 | arraySumScalarNoOpt·p0.95: 28672000.000 ns/op 2027 | arraySumScalarNoOpt·p0.99: 28672000.000 ns/op 2028 | arraySumScalarNoOpt·p0.999: 28672000.000 ns/op 2029 | arraySumScalarNoOpt·p0.9999: 28672000.000 ns/op 2030 | arraySumScalarNoOpt·p1.00: 28672000.000 ns/op 2031 | 2032 | Iteration 22: 28819456.000 ±(99.9%) 153033.000 ns/op 2033 | arraySumScalarNoOpt·p0.00: 28704768.000 ns/op 2034 | arraySumScalarNoOpt·p0.50: 28786688.000 ns/op 2035 | arraySumScalarNoOpt·p0.90: 28898099.200 ns/op 2036 | arraySumScalarNoOpt·p0.95: 29458432.000 ns/op 2037 | arraySumScalarNoOpt·p0.99: 29458432.000 ns/op 2038 | arraySumScalarNoOpt·p0.999: 29458432.000 ns/op 2039 | arraySumScalarNoOpt·p0.9999: 29458432.000 ns/op 2040 | arraySumScalarNoOpt·p1.00: 29458432.000 ns/op 2041 | 2042 | Iteration 23: 28768483.556 ±(99.9%) 22213.804 ns/op 2043 | arraySumScalarNoOpt·p0.00: 28737536.000 ns/op 2044 | arraySumScalarNoOpt·p0.50: 28770304.000 ns/op 2045 | arraySumScalarNoOpt·p0.90: 28803072.000 ns/op 2046 | arraySumScalarNoOpt·p0.95: 28803072.000 ns/op 2047 | arraySumScalarNoOpt·p0.99: 28803072.000 ns/op 2048 | arraySumScalarNoOpt·p0.999: 28803072.000 ns/op 2049 | arraySumScalarNoOpt·p0.9999: 28803072.000 ns/op 2050 | arraySumScalarNoOpt·p1.00: 28803072.000 ns/op 2051 | 2052 | Iteration 24: 28630129.778 ±(99.9%) 27402.680 ns/op 2053 | arraySumScalarNoOpt·p0.00: 28573696.000 ns/op 2054 | arraySumScalarNoOpt·p0.50: 28639232.000 ns/op 2055 | arraySumScalarNoOpt·p0.90: 28672000.000 ns/op 2056 | arraySumScalarNoOpt·p0.95: 28672000.000 ns/op 2057 | arraySumScalarNoOpt·p0.99: 28672000.000 ns/op 2058 | arraySumScalarNoOpt·p0.999: 28672000.000 ns/op 2059 | arraySumScalarNoOpt·p0.9999: 28672000.000 ns/op 2060 | arraySumScalarNoOpt·p1.00: 28672000.000 ns/op 2061 | 2062 | Iteration 25: 28775765.333 ±(99.9%) 30170.897 ns/op 2063 | arraySumScalarNoOpt·p0.00: 28737536.000 ns/op 2064 | arraySumScalarNoOpt·p0.50: 28770304.000 ns/op 2065 | arraySumScalarNoOpt·p0.90: 28835840.000 ns/op 2066 | arraySumScalarNoOpt·p0.95: 28835840.000 ns/op 2067 | arraySumScalarNoOpt·p0.99: 28835840.000 ns/op 2068 | arraySumScalarNoOpt·p0.999: 28835840.000 ns/op 2069 | arraySumScalarNoOpt·p0.9999: 28835840.000 ns/op 2070 | arraySumScalarNoOpt·p1.00: 28835840.000 ns/op 2071 | 2072 | 2073 | 2074 | Result "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt": 2075 | N = 449 2076 | mean = 28720750.610 ±(99.9%) 62865.016 ns/op 2077 | 2078 | Histogram, ns/op: 2079 | [28000000.000, 28500000.000) = 11 2080 | [28500000.000, 29000000.000) = 433 2081 | [29000000.000, 29500000.000) = 4 2082 | [29500000.000, 30000000.000) = 0 2083 | [30000000.000, 30500000.000) = 0 2084 | [30500000.000, 31000000.000) = 0 2085 | [31000000.000, 31500000.000) = 0 2086 | [31500000.000, 32000000.000) = 0 2087 | [32000000.000, 32500000.000) = 0 2088 | [32500000.000, 33000000.000) = 0 2089 | [33000000.000, 33500000.000) = 0 2090 | [33500000.000, 34000000.000) = 0 2091 | [34000000.000, 34500000.000) = 0 2092 | [34500000.000, 35000000.000) = 0 2093 | [35000000.000, 35500000.000) = 0 2094 | [35500000.000, 36000000.000) = 0 2095 | [36000000.000, 36500000.000) = 0 2096 | 2097 | Percentiles, ns/op: 2098 | p(0.0000) = 28409856.000 ns/op 2099 | p(50.0000) = 28737536.000 ns/op 2100 | p(90.0000) = 28835840.000 ns/op 2101 | p(95.0000) = 28835840.000 ns/op 2102 | p(99.0000) = 29245440.000 ns/op 2103 | p(99.9000) = 36831232.000 ns/op 2104 | p(99.9900) = 36831232.000 ns/op 2105 | p(99.9990) = 36831232.000 ns/op 2106 | p(99.9999) = 36831232.000 ns/op 2107 | p(100.0000) = 36831232.000 ns/op 2108 | 2109 | 2110 | # Run complete. Total time: 00:29:33 2111 | 2112 | REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on 2113 | why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial 2114 | experiments, perform baseline and negative tests that provide experimental control, make sure 2115 | the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts. 2116 | Do not assume the numbers tell you what you want them to tell. 2117 | 2118 | Benchmark (LENGTH) Mode Cnt Score Error Units 2119 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 15 sample 6527740 3.660 ± 0.090 ns/op 2120 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00 15 sample 3.000 ns/op 2121 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50 15 sample 3.000 ns/op 2122 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90 15 sample 4.000 ns/op 2123 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95 15 sample 5.000 ns/op 2124 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99 15 sample 5.000 ns/op 2125 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999 15 sample 17.000 ns/op 2126 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999 15 sample 1054.000 ns/op 2127 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00 15 sample 174080.000 ns/op 2128 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 255 sample 7021000 15.717 ± 0.444 ns/op 2129 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00 255 sample 12.000 ns/op 2130 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50 255 sample 14.000 ns/op 2131 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90 255 sample 17.000 ns/op 2132 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95 255 sample 20.000 ns/op 2133 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99 255 sample 23.000 ns/op 2134 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999 255 sample 130.000 ns/op 2135 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999 255 sample 1580.000 ns/op 2136 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00 255 sample 205056.000 ns/op 2137 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 4095 sample 5591963 287.897 ± 1.890 ns/op 2138 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00 4095 sample 221.000 ns/op 2139 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50 4095 sample 272.000 ns/op 2140 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90 4095 sample 293.000 ns/op 2141 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95 4095 sample 304.000 ns/op 2142 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99 4095 sample 349.000 ns/op 2143 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999 4095 sample 1352.000 ns/op 2144 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999 4095 sample 1960.000 ns/op 2145 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00 4095 sample 342528.000 ns/op 2146 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 65535 sample 3098411 4049.493 ± 6.696 ns/op 2147 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00 65535 sample 3720.000 ns/op 2148 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50 65535 sample 3884.000 ns/op 2149 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90 65535 sample 4192.000 ns/op 2150 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95 65535 sample 4720.000 ns/op 2151 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99 65535 sample 5528.000 ns/op 2152 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999 65535 sample 7024.000 ns/op 2153 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999 65535 sample 193576.653 ns/op 2154 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00 65535 sample 498176.000 ns/op 2155 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 1048575 sample 332714 74900.506 ± 101.429 ns/op 2156 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00 1048575 sample 70400.000 ns/op 2157 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50 1048575 sample 73088.000 ns/op 2158 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90 1048575 sample 76544.000 ns/op 2159 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95 1048575 sample 77696.000 ns/op 2160 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99 1048575 sample 80896.000 ns/op 2161 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999 1048575 sample 307200.000 ns/op 2162 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999 1048575 sample 321536.000 ns/op 2163 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00 1048575 sample 727040.000 ns/op 2164 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 16777215 sample 13380 1810092.963 ± 2458.668 ns/op 2165 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00 16777215 sample 1730560.000 ns/op 2166 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50 16777215 sample 1787904.000 ns/op 2167 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90 16777215 sample 1992704.000 ns/op 2168 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95 16777215 sample 2029568.000 ns/op 2169 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99 16777215 sample 2054144.000 ns/op 2170 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999 16777215 sample 2254356.480 ns/op 2171 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999 16777215 sample 3350469.427 ns/op 2172 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00 16777215 sample 3354624.000 ns/op 2173 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt 268435455 sample 449 28720750.610 ± 62865.016 ns/op 2174 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00 268435455 sample 28409856.000 ns/op 2175 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50 268435455 sample 28737536.000 ns/op 2176 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90 268435455 sample 28835840.000 ns/op 2177 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95 268435455 sample 28835840.000 ns/op 2178 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99 268435455 sample 29245440.000 ns/op 2179 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999 268435455 sample 36831232.000 ns/op 2180 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999 268435455 sample 36831232.000 ns/op 2181 | SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00 268435455 sample 36831232.000 ns/op 2182 | 2183 | Benchmark result is saved to /home/styp/Desktop/java-vbench/build/reports/jmh/results.txt 2184 | -------------------------------------------------------------------------------- /plots/data_post2/results_withSuperWord.txt: -------------------------------------------------------------------------------- 1 | "Benchmark","Mode","Threads","Samples","Score","Score Error (99.9%)","Unit","Param: LENGTH" 2 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,6909450,4.257500,0.116049,"ns/op",15 3 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,2.000000,NaN,"ns/op",15 4 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,4.000000,NaN,"ns/op",15 5 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,5.000000,NaN,"ns/op",15 6 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,5.000000,NaN,"ns/op",15 7 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,6.000000,NaN,"ns/op",15 8 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,17.000000,NaN,"ns/op",15 9 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,1050.000000,NaN,"ns/op",15 10 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,176384.000000,NaN,"ns/op",15 11 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,6175667,9.175881,0.597834,"ns/op",255 12 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,4.000000,NaN,"ns/op",255 13 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,7.000000,NaN,"ns/op",255 14 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,10.000000,NaN,"ns/op",255 15 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,14.000000,NaN,"ns/op",255 16 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,21.000000,NaN,"ns/op",255 17 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,105.000000,NaN,"ns/op",255 18 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,1592.000000,NaN,"ns/op",255 19 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,210176.000000,NaN,"ns/op",255 20 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,9464599,166.780291,1.219007,"ns/op",4095 21 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,99.000000,NaN,"ns/op",4095 22 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,155.000000,NaN,"ns/op",4095 23 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,177.000000,NaN,"ns/op",4095 24 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,188.000000,NaN,"ns/op",4095 25 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,224.000000,NaN,"ns/op",4095 26 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,1184.000000,NaN,"ns/op",4095 27 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,1818.000000,NaN,"ns/op",4095 28 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,350720.000000,NaN,"ns/op",4095 29 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,4931487,2543.202443,4.701872,"ns/op",65535 30 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,2208.000000,NaN,"ns/op",65535 31 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,2448.000000,NaN,"ns/op",65535 32 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,2596.000000,NaN,"ns/op",65535 33 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,2704.000000,NaN,"ns/op",65535 34 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,3572.000000,NaN,"ns/op",65535 35 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,4664.000000,NaN,"ns/op",65535 36 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,190976.000000,NaN,"ns/op",65535 37 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,381440.000000,NaN,"ns/op",65535 38 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,360731,69073.419030,92.664304,"ns/op",1048575 39 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,65344.000000,NaN,"ns/op",1048575 40 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,67328.000000,NaN,"ns/op",1048575 41 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,69632.000000,NaN,"ns/op",1048575 42 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,70400.000000,NaN,"ns/op",1048575 43 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,74496.000000,NaN,"ns/op",1048575 44 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,295424.000000,NaN,"ns/op",1048575 45 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,313818.521600,NaN,"ns/op",1048575 46 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,726016.000000,NaN,"ns/op",1048575 47 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,14200,1705668.723380,2336.679427,"ns/op",16777215 48 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,1642496.000000,NaN,"ns/op",16777215 49 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,1681408.000000,NaN,"ns/op",16777215 50 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,1722163.200000,NaN,"ns/op",16777215 51 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,1943552.000000,NaN,"ns/op",16777215 52 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,1970176.000000,NaN,"ns/op",16777215 53 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,2158571.520000,NaN,"ns/op",16777215 54 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,3283271.270400,NaN,"ns/op",16777215 55 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,3284992.000000,NaN,"ns/op",16777215 56 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,449,28388107.973274,19523.532487,"ns/op",268435455 57 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,28246016.000000,NaN,"ns/op",268435455 58 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,28344320.000000,NaN,"ns/op",268435455 59 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,28540928.000000,NaN,"ns/op",268435455 60 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,28672000.000000,NaN,"ns/op",268435455 61 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,28835840.000000,NaN,"ns/op",268435455 62 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,29458432.000000,NaN,"ns/op",268435455 63 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,29458432.000000,NaN,"ns/op",268435455 64 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,29458432.000000,NaN,"ns/op",268435455 65 | -------------------------------------------------------------------------------- /plots/data_post2/results_withoutSuperWord.txt: -------------------------------------------------------------------------------- 1 | "Benchmark","Mode","Threads","Samples","Score","Score Error (99.9%)","Unit","Param: LENGTH" 2 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,6527740,3.659780,0.090272,"ns/op",15 3 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,3.000000,NaN,"ns/op",15 4 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,3.000000,NaN,"ns/op",15 5 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,4.000000,NaN,"ns/op",15 6 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,5.000000,NaN,"ns/op",15 7 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,5.000000,NaN,"ns/op",15 8 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,17.000000,NaN,"ns/op",15 9 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,1054.000000,NaN,"ns/op",15 10 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,174080.000000,NaN,"ns/op",15 11 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,7021000,15.716509,0.444179,"ns/op",255 12 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,12.000000,NaN,"ns/op",255 13 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,14.000000,NaN,"ns/op",255 14 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,17.000000,NaN,"ns/op",255 15 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,20.000000,NaN,"ns/op",255 16 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,23.000000,NaN,"ns/op",255 17 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,130.000000,NaN,"ns/op",255 18 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,1580.000000,NaN,"ns/op",255 19 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,205056.000000,NaN,"ns/op",255 20 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,5591963,287.897336,1.889840,"ns/op",4095 21 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,221.000000,NaN,"ns/op",4095 22 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,272.000000,NaN,"ns/op",4095 23 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,293.000000,NaN,"ns/op",4095 24 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,304.000000,NaN,"ns/op",4095 25 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,349.000000,NaN,"ns/op",4095 26 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,1352.000000,NaN,"ns/op",4095 27 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,1960.000000,NaN,"ns/op",4095 28 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,342528.000000,NaN,"ns/op",4095 29 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,3098411,4049.493075,6.695989,"ns/op",65535 30 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,3720.000000,NaN,"ns/op",65535 31 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,3884.000000,NaN,"ns/op",65535 32 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,4192.000000,NaN,"ns/op",65535 33 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,4720.000000,NaN,"ns/op",65535 34 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,5528.000000,NaN,"ns/op",65535 35 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,7024.000000,NaN,"ns/op",65535 36 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,193576.652800,NaN,"ns/op",65535 37 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,498176.000000,NaN,"ns/op",65535 38 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,332714,74900.506068,101.428617,"ns/op",1048575 39 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,70400.000000,NaN,"ns/op",1048575 40 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,73088.000000,NaN,"ns/op",1048575 41 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,76544.000000,NaN,"ns/op",1048575 42 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,77696.000000,NaN,"ns/op",1048575 43 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,80896.000000,NaN,"ns/op",1048575 44 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,307200.000000,NaN,"ns/op",1048575 45 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,321536.000000,NaN,"ns/op",1048575 46 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,727040.000000,NaN,"ns/op",1048575 47 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,13380,1810092.962631,2458.667765,"ns/op",16777215 48 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,1730560.000000,NaN,"ns/op",16777215 49 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,1787904.000000,NaN,"ns/op",16777215 50 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,1992704.000000,NaN,"ns/op",16777215 51 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,2029568.000000,NaN,"ns/op",16777215 52 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,2054144.000000,NaN,"ns/op",16777215 53 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,2254356.480000,NaN,"ns/op",16777215 54 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,3350469.427200,NaN,"ns/op",16777215 55 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,3354624.000000,NaN,"ns/op",16777215 56 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt","sample",1,449,28720750.610245,62865.016445,"ns/op",268435455 57 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.00","sample",1,1,28409856.000000,NaN,"ns/op",268435455 58 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.50","sample",1,1,28737536.000000,NaN,"ns/op",268435455 59 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.90","sample",1,1,28835840.000000,NaN,"ns/op",268435455 60 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.95","sample",1,1,28835840.000000,NaN,"ns/op",268435455 61 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.99","sample",1,1,29245440.000000,NaN,"ns/op",268435455 62 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.999","sample",1,1,36831232.000000,NaN,"ns/op",268435455 63 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p0.9999","sample",1,1,36831232.000000,NaN,"ns/op",268435455 64 | "ch.styp.SumArrayBenchNoSuperVectorOpt.arraySumScalarNoOpt:arraySumScalarNoOpt·p1.00","sample",1,1,36831232.000000,NaN,"ns/op",268435455 65 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'vbench' 2 | 3 | -------------------------------------------------------------------------------- /src/jmh/java/ch/styp/BooleanPoolingJMH.java: -------------------------------------------------------------------------------- 1 | //package ch.styp; 2 | // 3 | //import org.openjdk.jmh.annotations.Benchmark; 4 | //import org.openjdk.jmh.annotations.BenchmarkMode; 5 | //import org.openjdk.jmh.annotations.Mode; 6 | //import org.openjdk.jmh.annotations.OutputTimeUnit; 7 | //import org.openjdk.jmh.annotations.Scope; 8 | //import org.openjdk.jmh.annotations.State; 9 | //import org.openjdk.jmh.infra.Blackhole; 10 | //import java.util.concurrent.TimeUnit; 11 | //@OutputTimeUnit(TimeUnit.MICROSECONDS) 12 | //@BenchmarkMode(Mode.AverageTime) 13 | //public class BooleanPoolingJMH 14 | //{ 15 | // @State(Scope.Benchmark) 16 | // public static class MyBenchmarkState 17 | // { 18 | // final Boolean[] testArray = new Boolean[10_000_000]; 19 | // } 20 | // @Benchmark 21 | // public void newlyCreatedBooleans(MyBenchmarkState state, 22 | // Blackhole blackhole) 23 | // { 24 | // for (int i = 0; i < state.testArray.length; i++) 25 | // { 26 | // state.testArray[i] = new Boolean(true); 27 | // } 28 | // blackhole.consume(state.testArray); 29 | // } 30 | // @Benchmark 31 | // public void populateWithPooledValue(MyBenchmarkState state, 32 | // Blackhole blackhole) 33 | // { 34 | // for (int i = 0; i < state.testArray.length; i++) 35 | // { 36 | // state.testArray[i] = Boolean.TRUE; 37 | // } 38 | // blackhole.consume(state.testArray); 39 | // } 40 | //} -------------------------------------------------------------------------------- /src/jmh/java/ch/styp/FloatMatrixMatrixMultiplication.java: -------------------------------------------------------------------------------- 1 | package ch.styp; 2 | 3 | import jdk.incubator.vector.VectorSpecies; 4 | import org.openjdk.jmh.annotations.*; 5 | import org.openjdk.jmh.infra.Blackhole; 6 | 7 | import static ch.styp.GeneratorHelpers.newFloatRowMajorMatrix; 8 | import jdk.incubator.vector.FloatVector; 9 | 10 | 11 | 12 | //@BenchmarkMode(Mode.Throughput) 13 | @State(Scope.Benchmark) 14 | @Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector", 15 | "-XX:-TieredCompilation", 16 | "-Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0"}) 17 | public class FloatMatrixMatrixMultiplication { 18 | 19 | @Param({"32", "64", "128", "256", "512", "1024", "2048"}) 20 | // @Param({"128"}) 21 | int size; 22 | 23 | 24 | private float[] left; 25 | private float[] right; 26 | 27 | @Setup(Level.Iteration) 28 | public void init() { 29 | this.left = newFloatRowMajorMatrix(size * size); 30 | this.right = newFloatRowMajorMatrix(size * size); 31 | } 32 | 33 | @Benchmark 34 | public void mmBaseline(Blackhole bh) { 35 | var matrixMul = new MatrixMul(); 36 | bh.consume(matrixMul.baseline(left, right, size)); 37 | } 38 | 39 | @Benchmark 40 | public void mmBlocked(Blackhole bh) { 41 | var matrixMul = new MatrixMul(); 42 | int blocksize = 16; 43 | bh.consume(matrixMul.blocked(left, right, size, blocksize)); 44 | } 45 | 46 | @Benchmark 47 | public void mmSimpleFma(Blackhole bh) { 48 | var matrixMul = new MatrixMul(); 49 | bh.consume(matrixMul.simpleFMA(left, right, size)); 50 | } 51 | 52 | public void mmSimpleVectorPrefered(Blackhole bh) { 53 | var matrixMul = new MatrixMul(); 54 | bh.consume(matrixMul.simpleVectorPrefered(left, right, size)); 55 | } 56 | 57 | // @Benchmark 58 | // public void mmSimpleVectorAVX256(Blackhole bh) { 59 | // var matrixMul = new MatrixMul(); 60 | // bh.consume(matrixMul.simpleVectorAVX256(left, right, size)); 61 | // } 62 | // 63 | // @Benchmark 64 | // public void mmSimpleVectorAVX512(Blackhole bh) { 65 | // var matrixMul = new MatrixMul(); 66 | // bh.consume(matrixMul.simpleVectorAVX512(left, right, size)); 67 | // } 68 | 69 | @Benchmark 70 | public void mmBlockedVectorPrefered(Blackhole bh){ 71 | var matrixMul = new MatrixMul(); 72 | bh.consume(matrixMul.blockedVectorPrefered(left, right, size)); 73 | } 74 | // @Benchmark 75 | // public void mmBlockedVectorAVX256(Blackhole bh){ 76 | // var matrixMul = new MatrixMul(); 77 | // bh.consume(matrixMul.blockedVectorAVX256(left, right, size)); 78 | // } 79 | // 80 | // @Benchmark 81 | // public void mmBlockedVectorAVX512(Blackhole bh){ 82 | // var matrixMul = new MatrixMul(); 83 | // bh.consume(matrixMul.blockedVectorAVX512(left, right, size)); 84 | // } 85 | // 86 | // @Benchmark 87 | // public void mmBlockedVectorUnrolledAVX256(Blackhole bh){ 88 | // var matrixMul = new MatrixMul(); 89 | // bh.consume(matrixMul.blockedVectorUnrolledAVX256(left, right, size)); 90 | // } 91 | // 92 | // @Benchmark 93 | // public void mmBlockedVectorUnrolledAVX512(Blackhole bh){ 94 | // var matrixMul = new MatrixMul(); 95 | // bh.consume(matrixMul.blockedVectorUnrolledAVX512(left, right, size)); 96 | // } 97 | } 98 | -------------------------------------------------------------------------------- /src/jmh/java/ch/styp/FmaArrayBenchmark.java: -------------------------------------------------------------------------------- 1 | //package ch.styp; 2 | // 3 | //import org.openjdk.jmh.annotations.*; 4 | //import org.openjdk.jmh.infra.Blackhole; 5 | // 6 | //import java.util.concurrent.TimeUnit; 7 | // 8 | //@State(Scope.Benchmark) 9 | //@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector", 10 | // "-XX:-TieredCompilation", 11 | // "-XX:+UseVectorCmov", 12 | // "-XX:+UseCMoveUnconditionally", 13 | // "-Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0"}) 14 | // 15 | //public class FmaArrayBenchmark { 16 | // 17 | // // All these numbers are 2^n-1 to avoid memory alignment! 18 | // @Param({"15", "255", "4095", "65535", "1048575", "16777215", "268435455"}) 19 | // private int LENGTH; 20 | // private float[] a; 21 | // private float[] b; 22 | // 23 | // 24 | // @Setup(Level.Iteration) 25 | // public void init(){ 26 | // this.a = GeneratorHelpers.initFloatArray(LENGTH); 27 | // this.b = GeneratorHelpers.initFloatArray(LENGTH); 28 | // } 29 | // 30 | // 31 | // @Benchmark 32 | // public void arrayFmaScalar(Blackhole bh){ 33 | // bh.consume(FmaArray.scalarFMA(a, b)); 34 | // } 35 | // 36 | // @Benchmark 37 | // public void arrayFmaVector(Blackhole bh){ 38 | // bh.consume(FmaArray.vectorFMA(a, b)); 39 | // } 40 | // 41 | //} 42 | -------------------------------------------------------------------------------- /src/jmh/java/ch/styp/SumArrayBenchManual.java: -------------------------------------------------------------------------------- 1 | /* 2 | package ch.styp; 3 | 4 | import org.openjdk.jmh.annotations.*; 5 | import org.openjdk.jmh.infra.Blackhole; 6 | 7 | import jdk.incubator.vector.IntVector; 8 | import jdk.incubator.vector.VectorSpecies; 9 | 10 | 11 | @State(Scope.Benchmark) 12 | @Fork(jvmArgsAppend = "--add-modules jdk.incubator.vector") 13 | public class SumArrayBenchManual { 14 | 15 | // All these numbers are 2^n-1 to avoid memory alignment! 16 | @Param({"15", "255", "4095", "65535", "1048575", "16777215", "268435455"}) 17 | private int LENGTH; 18 | private int[] a; 19 | private int[] b; 20 | 21 | 22 | @Setup(Level.Iteration) 23 | public void init(){ 24 | this.a = GeneratorHelpers.initIntArray(LENGTH); 25 | this.b = GeneratorHelpers.initIntArray(LENGTH); 26 | } 27 | 28 | @Benchmark 29 | public void arraySumVector128(Blackhole bh){ 30 | bh.consume(SumArray.vectorComputation(a, b, IntVector.SPECIES_128)); 31 | } 32 | 33 | @Benchmark 34 | public void arraySumVector256(Blackhole bh){ 35 | bh.consume(SumArray.vectorComputation(a, b, IntVector.SPECIES_256)); 36 | } 37 | 38 | @Benchmark 39 | public void arraySumVector512(Blackhole bh){ 40 | bh.consume(SumArray.vectorComputation(a, b, IntVector.SPECIES_512)); 41 | } 42 | 43 | @Benchmark 44 | public void arraySumVectorAuto(Blackhole bh){ 45 | bh.consume(SumArray.vectorComputation(a, b, IntVector.SPECIES_PREFERRED)); 46 | } 47 | 48 | @Benchmark 49 | public void arraySumScalar(Blackhole bh) { 50 | bh.consume(SumArray.scalarComputation(a, b)); 51 | } 52 | 53 | } 54 | */ 55 | -------------------------------------------------------------------------------- /src/jmh/java/ch/styp/SumArrayBenchNoSuperVectorOpt.java: -------------------------------------------------------------------------------- 1 | //package ch.styp; 2 | // 3 | //import org.openjdk.jmh.annotations.*; 4 | //import org.openjdk.jmh.infra.Blackhole; 5 | // 6 | //import jdk.incubator.vector.IntVector; 7 | // 8 | // 9 | //@State(Scope.Benchmark) 10 | //@Fork(jvmArgsAppend = "--add-modules jdk.incubator.vector, -XX:+UseSuperWord") 11 | //public class SumArrayBenchNoSuperVectorOpt { 12 | // 13 | // // All these numbers are 2^n-1 to avoid memory alignment! 14 | // @Param({"15", "255", "4095", "65535", "1048575", "16777215", "268435455"}) 15 | // private int LENGTH; 16 | // private int[] a; 17 | // private int[] b; 18 | // 19 | // 20 | // @Setup(Level.Iteration) 21 | // public void init(){ 22 | // this.a = GeneratorHelpers.initIntArray(LENGTH); 23 | // this.b = GeneratorHelpers.initIntArray(LENGTH); 24 | // } 25 | // 26 | // @Benchmark 27 | // public void arraySumScalarNoOpt(Blackhole bh) { 28 | // bh.consume(SumArray.scalarComputation(a, b)); 29 | // } 30 | // 31 | //} 32 | -------------------------------------------------------------------------------- /src/jmh/java/ch/styp/SumArrayBenchmark.java: -------------------------------------------------------------------------------- 1 | //package ch.styp; 2 | // 3 | //import org.openjdk.jmh.annotations.*; 4 | //import org.openjdk.jmh.infra.Blackhole; 5 | // 6 | //import java.util.Random; 7 | //import java.util.concurrent.TimeUnit; 8 | // 9 | // 10 | //@State(Scope.Benchmark) 11 | //@Fork(jvmArgsPrepend = {"--add-modules=jdk.incubator.vector", 12 | // "-XX:-TieredCompilation", 13 | // "-XX:+UseVectorCmov", 14 | // "-XX:+UseCMoveUnconditionally", 15 | // "-Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=0"}) 16 | // 17 | //public class SumArrayBenchmark { 18 | // 19 | // // All these numbers are 2^n-1 to avoid memory alignment! 20 | // @Param({"15", "255", "4095", "65535", "1048575", "16777215", "268435455"}) 21 | // 22 | //// @Param({"53", "541", "5381"}) 23 | // private int LENGTH; 24 | // private int[] a; 25 | // private int[] b; 26 | // 27 | // 28 | // @Setup(Level.Iteration) 29 | // public void init() { 30 | // this.a = GeneratorHelpers.initIntArray(LENGTH); 31 | // this.b = GeneratorHelpers.initIntArray(LENGTH); 32 | // } 33 | // 34 | // @Benchmark 35 | // public void arraySumScalar(Blackhole bh) { 36 | // bh.consume(SumArray.scalarComputation(a, b)); 37 | // } 38 | // 39 | // @Benchmark 40 | // public void arraySumVector(Blackhole bh) { 41 | // bh.consume(SumArray.vectorComputation(a, b)); 42 | // } 43 | // 44 | //// @Benchmark 45 | //// public void arrayFmaScalar(Blackhole bh){ 46 | //// bh.consume(SumArray.scalarFMA(a, b)); 47 | //// } 48 | //// 49 | //// @Benchmark 50 | //// public void arrayFmaVector(Blackhole bh){ 51 | //// bh.consume(SumArray.vectorFMA(a, b)); 52 | //// } 53 | // 54 | //} -------------------------------------------------------------------------------- /src/main/java/ch/styp/FmaArray.java: -------------------------------------------------------------------------------- 1 | package ch.styp; 2 | 3 | import jdk.incubator.vector.FloatVector; 4 | import jdk.incubator.vector.IntVector; 5 | import jdk.incubator.vector.VectorOperators; 6 | import jdk.incubator.vector.VectorSpecies; 7 | 8 | public class FmaArray { 9 | 10 | private static final VectorSpecies SPECIES = FloatVector.SPECIES_PREFERRED; 11 | 12 | // FMA: Fused Multiply Add: c = c + (a * b) 13 | public static float scalarFMA(float[] a, float[] b){ 14 | var c = 0.0f; 15 | 16 | for(var i=0; i < a.length; i++){ 17 | c = Math.fma(a[i], b[i], c); 18 | } 19 | return c; 20 | } 21 | 22 | public static float vectorFMA(float[] a, float[] b){ 23 | var upperBound = SPECIES.loopBound(a.length); 24 | var sum = FloatVector.zero(SPECIES); 25 | 26 | var i = 0; 27 | for (; i < upperBound; i += SPECIES.length()) { 28 | // FloatVector va, vb, vc 29 | var va = FloatVector.fromArray(SPECIES, a, i); 30 | var vb = FloatVector.fromArray(SPECIES, b, i); 31 | sum = va.fma(vb, sum); 32 | } 33 | var c = sum.reduceLanes(VectorOperators.ADD); 34 | 35 | for (; i < a.length; i++) { // Cleanup loop 36 | c += a[i] * b[i]; 37 | } 38 | return c; 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/ch/styp/GeneratorHelpers.java: -------------------------------------------------------------------------------- 1 | package ch.styp; 2 | 3 | import java.util.Random; 4 | import java.util.concurrent.ThreadLocalRandom; 5 | 6 | public class GeneratorHelpers { 7 | 8 | public static float[] initFloatArray(int length){ 9 | var floatArray = new float[length]; 10 | 11 | Random rand = new Random(); 12 | for(var i = 0; i SPECIES = FloatVector.SPECIES_PREFERRED; 12 | 13 | public float[] baseline(float[] a, float[] b, int n) { 14 | float[] c = new float[n * n]; 15 | 16 | for (int i = 0; i < n; i++) { 17 | for (int j = 0; j < n; j++) { 18 | float sum = 0.0f; 19 | for (int k = 0; k < n; k++) { 20 | sum += a[i * n + k] * b[k * n + j]; 21 | } 22 | c[i * n + j] = sum; 23 | } 24 | } 25 | return c; 26 | } 27 | 28 | public float[] blocked(float[] a, float[] b, int n, final int blocksize) { 29 | float[] c = new float[n * n]; 30 | 31 | for (int kk = 0; kk < n; kk += blocksize) { 32 | for (int jj = 0; jj < n; jj += blocksize) { 33 | for (int i = 0; i < n; i++) { 34 | for (int j = jj; j < jj + blocksize; ++j) { 35 | float sum = c[i * n + j]; 36 | for (int k = kk; k < kk + blocksize; ++k) { 37 | sum += a[i * n + k] * b[k * n + j]; 38 | } 39 | c[i * n + j] = sum; 40 | } 41 | } 42 | } 43 | } 44 | return c; 45 | } 46 | 47 | public float[] simpleFMA(float[] a, float[] b, int n) { 48 | float[] c = new float[n * n]; 49 | 50 | for (int i = 0; i < n; i++) { 51 | for (int k = 0; k < n; k++) { 52 | float aik = a[i * n + k]; 53 | for (int j = 0; j < n; j++) { 54 | c[i * n + j] = Math.fma(aik, b[k * n + j], c[i * n + j]); 55 | } 56 | } 57 | } 58 | return c; 59 | } 60 | 61 | public float[] simpleVectorPrefered(float[] a, float[] b, int n) { 62 | final VectorSpecies SPECIES = FloatVector.SPECIES_PREFERRED; 63 | 64 | final int upperBound = SPECIES.loopBound(n); 65 | float[] c = new float[n * n]; 66 | 67 | for (int i = 0; i < n; i++) { 68 | for (int k = 0; k < n; k++) { 69 | float aik = a[i * n + k]; 70 | FloatVector vaik = FloatVector.broadcast(SPECIES, aik); 71 | for (int j = 0; j < upperBound; j += SPECIES.length()) { 72 | FloatVector vb = FloatVector.fromArray(SPECIES, b, k * n + j); 73 | FloatVector vc = FloatVector.fromArray(SPECIES, c, i * n + j); 74 | vc = vaik.fma(vb, vc); 75 | vc.intoArray(c, i * n + j); 76 | } 77 | } 78 | } 79 | return c; 80 | } 81 | 82 | public float[] simpleVectorAVX256(float[] a, float[] b, int n) { 83 | final VectorSpecies SPECIES = FloatVector.SPECIES_256; 84 | 85 | final int upperBound = SPECIES.loopBound(n); 86 | float[] c = new float[n * n]; 87 | 88 | for (int i = 0; i < n; i++) { 89 | for (int k = 0; k < n; k++) { 90 | float aik = a[i * n + k]; 91 | FloatVector vaik = FloatVector.broadcast(SPECIES, aik); 92 | for (int j = 0; j < upperBound; j += SPECIES.length()) { 93 | FloatVector vb = FloatVector.fromArray(SPECIES, b, k * n + j); 94 | FloatVector vc = FloatVector.fromArray(SPECIES, c, i * n + j); 95 | vc = vaik.fma(vb, vc); 96 | vc.intoArray(c, i * n + j); 97 | } 98 | } 99 | } 100 | return c; 101 | } 102 | 103 | public float[] simpleVectorAVX512(float[] a, float[] b, int n) { 104 | final VectorSpecies SPECIES = FloatVector.SPECIES_512; 105 | 106 | final int upperBound = SPECIES.loopBound(n); 107 | float[] c = new float[n * n]; 108 | 109 | for (int i = 0; i < n; i++) { 110 | for (int k = 0; k < n; k++) { 111 | float aik = a[i * n + k]; 112 | FloatVector vaik = FloatVector.broadcast(SPECIES, aik); 113 | for (int j = 0; j < upperBound; j += SPECIES.length()) { 114 | FloatVector vb = FloatVector.fromArray(SPECIES, b, k * n + j); 115 | FloatVector vc = FloatVector.fromArray(SPECIES, c, i * n + j); 116 | vc = vaik.fma(vb, vc); 117 | vc.intoArray(c, i * n + j); 118 | } 119 | } 120 | } 121 | return c; 122 | } 123 | 124 | public float[] blockedVectorPrefered(float[] a, float[] b, int n) { 125 | final VectorSpecies SPECIES = FloatVector.SPECIES_PREFERRED; 126 | 127 | float[] c = new float[n * n]; 128 | final int blockWidth = n >= 256 ? 512 : 256; 129 | final int blockHeight = n >= 512 ? 8 : n >= 256 ? 16 : 32; 130 | 131 | for (int rowOffset = 0; rowOffset < n; rowOffset += blockHeight) { 132 | for (int columnOffset = 0; columnOffset < n; columnOffset += blockWidth) { 133 | for (int i = 0; i < n; i++) { 134 | for (int j = columnOffset; j < columnOffset + blockWidth && j < n; j += SPECIES.length()) { 135 | FloatVector sum = FloatVector.fromArray(SPECIES, c, i * n + j); 136 | for (int k = rowOffset; k < rowOffset + blockHeight && k < n; k++) { 137 | FloatVector multiplier = FloatVector.broadcast(SPECIES, a[i * n + k]); 138 | sum = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j), sum); 139 | } 140 | sum.intoArray(c, i * n + j); 141 | } 142 | } 143 | } 144 | } 145 | return c; 146 | } 147 | 148 | public float[] blockedVectorAVX256(float[] a, float[] b, int n) { 149 | final VectorSpecies SPECIES = FloatVector.SPECIES_256; 150 | 151 | float[] c = new float[n * n]; 152 | final int blockWidth = n >= 256 ? 512 : 256; 153 | final int blockHeight = n >= 512 ? 8 : n >= 256 ? 16 : 32; 154 | 155 | for (int rowOffset = 0; rowOffset < n; rowOffset += blockHeight) { 156 | for (int columnOffset = 0; columnOffset < n; columnOffset += blockWidth) { 157 | for (int i = 0; i < n; i++) { 158 | for (int j = columnOffset; j < columnOffset + blockWidth && j < n; j += SPECIES.length()) { 159 | FloatVector sum = FloatVector.fromArray(SPECIES, c, i * n + j); 160 | for (int k = rowOffset; k < rowOffset + blockHeight && k < n; k++) { 161 | FloatVector multiplier = FloatVector.broadcast(SPECIES, a[i * n + k]); 162 | sum = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j), sum); 163 | } 164 | sum.intoArray(c, i * n + j); 165 | } 166 | } 167 | } 168 | } 169 | return c; 170 | } 171 | 172 | public float[] blockedVectorAVX512(float[] a, float[] b, int n) { 173 | final VectorSpecies SPECIES = FloatVector.SPECIES_512; 174 | 175 | float[] c = new float[n * n]; 176 | final int blockWidth = n >= 256 ? 512 : 256; 177 | final int blockHeight = n >= 512 ? 8 : n >= 256 ? 16 : 32; 178 | 179 | for (int rowOffset = 0; rowOffset < n; rowOffset += blockHeight) { 180 | for (int columnOffset = 0; columnOffset < n; columnOffset += blockWidth) { 181 | for (int i = 0; i < n; i++) { 182 | for (int j = columnOffset; j < columnOffset + blockWidth && j < n; j += SPECIES.length()) { 183 | FloatVector sum = FloatVector.fromArray(SPECIES, c, i * n + j); 184 | for (int k = rowOffset; k < rowOffset + blockHeight && k < n; k++) { 185 | FloatVector multiplier = FloatVector.broadcast(SPECIES, a[i * n + k]); 186 | sum = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j), sum); 187 | } 188 | sum.intoArray(c, i * n + j); 189 | } 190 | } 191 | } 192 | } 193 | return c; 194 | } 195 | 196 | public float[] blockedVectorUnrolledAVX512(float[] a, float[] b, int n) { 197 | float[] c = new float[n * n]; 198 | final int blockWidth = n >= 256 ? 512 : 256; 199 | final int blockHeight = n >= 512 ? 8 : n >= 256 ? 16 : 32; 200 | 201 | for (int rowOffset = 0; rowOffset < n; rowOffset += blockHeight) { 202 | for (int columnOffset = 0; columnOffset < n; columnOffset += blockWidth) { 203 | for (int i = 0; i < n; i++) { 204 | for (int j = columnOffset; j < columnOffset + blockWidth && j < n; j += 64) { 205 | FloatVector sum1 = FloatVector.fromArray(SPECIES, c, i * n + j); 206 | FloatVector sum2 = FloatVector.fromArray(SPECIES, c, i * n + j + 16); 207 | FloatVector sum3 = FloatVector.fromArray(SPECIES, c, i * n + j + 32); 208 | FloatVector sum4 = FloatVector.fromArray(SPECIES, c, i * n + j + 48); 209 | for (int k = rowOffset; k < rowOffset + blockHeight && k < n; ++k) { 210 | FloatVector multiplier = FloatVector.broadcast(SPECIES, a[i * n + k]); 211 | sum1 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j), sum1); 212 | sum2 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j + 16), sum2); 213 | sum3 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j + 32), sum3); 214 | sum4 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j + 48), sum4); 215 | } 216 | sum1.intoArray(c, i * n + j); 217 | sum2.intoArray(c, i * n + j + 16); 218 | sum3.intoArray(c, i * n + j + 32); 219 | sum4.intoArray(c, i * n + j + 48); 220 | } 221 | } 222 | } 223 | } 224 | return c; 225 | } 226 | 227 | public float[] blockedVectorUnrolledAVX256(float[] a, float[] b, int n) { 228 | float[] c = new float[n * n]; 229 | final int blockWidth = n >= 256 ? 512 : 256; 230 | final int blockHeight = n >= 512 ? 8 : n >= 256 ? 16 : 32; 231 | 232 | for (int rowOffset = 0; rowOffset < n; rowOffset += blockHeight) { 233 | for (int columnOffset = 0; columnOffset < n; columnOffset += blockWidth) { 234 | for (int i = 0; i < n; i++) { 235 | for (int j = columnOffset; j < columnOffset + blockWidth && j < n; j += 64) { 236 | FloatVector sum1 = FloatVector.fromArray(SPECIES, c, i * n + j); 237 | FloatVector sum2 = FloatVector.fromArray(SPECIES, c, i * n + j + 8); 238 | FloatVector sum3 = FloatVector.fromArray(SPECIES, c, i * n + j + 16); 239 | FloatVector sum4 = FloatVector.fromArray(SPECIES, c, i * n + j + 24); 240 | FloatVector sum5 = FloatVector.fromArray(SPECIES, c, i * n + j + 32); 241 | FloatVector sum6 = FloatVector.fromArray(SPECIES, c, i * n + j + 40); 242 | FloatVector sum7 = FloatVector.fromArray(SPECIES, c, i * n + j + 48); 243 | FloatVector sum8 = FloatVector.fromArray(SPECIES, c, i * n + j + 56); 244 | for (int k = rowOffset; k < rowOffset + blockHeight && k < n; ++k) { 245 | FloatVector multiplier = FloatVector.broadcast(SPECIES, a[i * n + k]); 246 | sum1 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j), sum1); 247 | sum2 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j + 8), sum2); 248 | sum3 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j + 16), sum3); 249 | sum4 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j + 24), sum4); 250 | sum5 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j + 32), sum5); 251 | sum6 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j + 40), sum6); 252 | sum7 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j + 48), sum7); 253 | sum8 = multiplier.fma(FloatVector.fromArray(SPECIES, b, k * n + j + 56), sum8); 254 | } 255 | sum1.intoArray(c, i * n + j); 256 | sum2.intoArray(c, i * n + j + 8); 257 | sum3.intoArray(c, i * n + j + 16); 258 | sum4.intoArray(c, i * n + j + 24); 259 | sum5.intoArray(c, i * n + j + 32); 260 | sum6.intoArray(c, i * n + j + 40); 261 | sum7.intoArray(c, i * n + j + 48); 262 | sum8.intoArray(c, i * n + j + 56); 263 | } 264 | } 265 | } 266 | } 267 | return c; 268 | } 269 | 270 | } 271 | -------------------------------------------------------------------------------- /src/main/java/ch/styp/SumArray.java: -------------------------------------------------------------------------------- 1 | package ch.styp; 2 | 3 | import jdk.incubator.vector.FloatVector; 4 | import jdk.incubator.vector.IntVector; 5 | import jdk.incubator.vector.VectorSpecies; 6 | 7 | 8 | public class SumArray { 9 | 10 | private static final VectorSpecies SPECIES = IntVector.SPECIES_PREFERRED; 11 | public static int[] scalarComputation(int[] a, int[] b) { 12 | var c = new int[a.length]; 13 | 14 | for (var i = 0; i < a.length; i++) { 15 | c[i] = a[i] + b[i]; 16 | } 17 | 18 | return c; 19 | } 20 | 21 | public static int[] vectorComputation(int[] a, int[] b, VectorSpecies species) { 22 | var c = new int[a.length]; 23 | var upperBound = species.loopBound(a.length); 24 | 25 | var i = 0; 26 | for (; i < upperBound; i += species.length()) { 27 | // FloatVector va, vb, vc 28 | var va = IntVector.fromArray(species, a, i); 29 | var vb = IntVector.fromArray(species, b, i); 30 | var vc = va.add(vb); 31 | vc.intoArray(c, i); 32 | } 33 | 34 | for (; i < a.length; i++) { // Cleanup loop 35 | c[i] = a[i] + b[i]; 36 | } 37 | 38 | return c; 39 | } 40 | 41 | public static int[] vectorComputation(int[] a, int[] b) { 42 | var c = new int[a.length]; 43 | var upperBound = SPECIES.loopBound(a.length); 44 | 45 | var i = 0; 46 | for (; i < upperBound; i += SPECIES.length()) { 47 | // FloatVector va, vb, vc 48 | var va = IntVector.fromArray(SPECIES, a, i); 49 | var vb = IntVector.fromArray(SPECIES, b, i); 50 | var vc = va.add(vb); 51 | vc.intoArray(c, i); 52 | } 53 | 54 | for (; i < a.length; i++) { // Cleanup loop 55 | c[i] = a[i] + b[i]; 56 | } 57 | 58 | return c; 59 | 60 | } 61 | 62 | 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/ch/styp/TestMain.java: -------------------------------------------------------------------------------- 1 | package ch.styp; 2 | 3 | import java.util.Random; 4 | 5 | public class TestMain { 6 | 7 | public static void main(String... args) { 8 | TestMain testMain = new TestMain(); 9 | int[] a = testMain.initIntArray(8192); 10 | int[] b = testMain.initIntArray(8192); 11 | long timeNow = System.currentTimeMillis(); 12 | for(int i = 0; i <= 100000; i++) { 13 | testMain.addArray(a, b); 14 | } 15 | System.out.print(System.currentTimeMillis() - timeNow); 16 | } 17 | 18 | public int[] initIntArray(int length){ 19 | var intArray = new int[length]; 20 | 21 | Random rand = new Random(); 22 | for(var i = 0; i SPECIES = IntVector.SPECIES_PREFERRED; 35 | System.out.print(SPECIES); 36 | } 37 | 38 | @Test 39 | void fmaArrayVectorVsScalar(){ 40 | // Prime Number, that doesn't make the registers align by accident! 41 | final int PRIME_NUMBER = 919; 42 | 43 | var a = GeneratorHelpers.initFloatArray(PRIME_NUMBER); 44 | var b = GeneratorHelpers.initFloatArray(PRIME_NUMBER); 45 | 46 | var c_scalar = FmaArray.scalarFMA(a, b); 47 | var c_vector = FmaArray.vectorFMA(a, b); 48 | 49 | assertEquals(c_scalar, c_vector, 0.0001f); 50 | 51 | } 52 | 53 | } 54 | --------------------------------------------------------------------------------