├── .gitignore ├── LICENSE ├── README.md ├── build.gradle ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── inverse32.py ├── inverse64.py ├── results ├── 32bit │ ├── benchmark-results.txt │ └── img │ │ ├── add │ │ ├── 0.5LF.svg │ │ ├── 0.75LF.svg │ │ ├── 0.95LF.svg │ │ ├── 0.99LF.svg │ │ └── 0.9LF.svg │ │ ├── contains │ │ ├── successful │ │ │ ├── avg │ │ │ │ ├── 0.5LF.svg │ │ │ │ ├── 0.75LF.svg │ │ │ │ ├── 0.95LF.svg │ │ │ │ ├── 0.99LF.svg │ │ │ │ └── 0.9LF.svg │ │ │ └── pct │ │ │ │ ├── 99 │ │ │ │ ├── 0.5LF.svg │ │ │ │ ├── 0.75LF.svg │ │ │ │ ├── 0.95LF.svg │ │ │ │ ├── 0.99LF.svg │ │ │ │ └── 0.9LF.svg │ │ │ │ └── 99.9 │ │ │ │ ├── 0.5LF.svg │ │ │ │ ├── 0.75LF.svg │ │ │ │ ├── 0.95LF.svg │ │ │ │ ├── 0.99LF.svg │ │ │ │ └── 0.9LF.svg │ │ └── unsuccessful │ │ │ ├── avg │ │ │ ├── 0.5LF.svg │ │ │ ├── 0.75LF.svg │ │ │ ├── 0.95LF.svg │ │ │ ├── 0.99LF.svg │ │ │ └── 0.9LF.svg │ │ │ └── pct │ │ │ ├── 99 │ │ │ ├── 0.5LF.svg │ │ │ ├── 0.75LF.svg │ │ │ ├── 0.95LF.svg │ │ │ ├── 0.99LF.svg │ │ │ └── 0.9LF.svg │ │ │ └── 99.9 │ │ │ ├── 0.5LF.svg │ │ │ ├── 0.75LF.svg │ │ │ ├── 0.95LF.svg │ │ │ ├── 0.99LF.svg │ │ │ └── 0.9LF.svg │ │ └── remove │ │ ├── 0.5LF.svg │ │ ├── 0.75LF.svg │ │ ├── 0.95LF.svg │ │ ├── 0.99LF.svg │ │ └── 0.9LF.svg └── 64bit │ ├── benchmarks-results.txt │ └── img │ ├── add │ ├── 0.5LF.svg │ ├── 0.75LF.svg │ ├── 0.95LF.svg │ ├── 0.99LF.svg │ └── 0.9LF.svg │ ├── contains │ ├── successful │ │ ├── avg │ │ │ ├── 0.5LF.svg │ │ │ ├── 0.75LF.svg │ │ │ ├── 0.95LF.svg │ │ │ ├── 0.99LF.svg │ │ │ └── 0.9LF.svg │ │ └── pct │ │ │ ├── 99 │ │ │ ├── 0.5LF.svg │ │ │ ├── 0.75LF.svg │ │ │ ├── 0.95LF.svg │ │ │ ├── 0.99LF.svg │ │ │ └── 0.9LF.svg │ │ │ └── 99.9 │ │ │ ├── 0.5LF.svg │ │ │ ├── 0.75LF.svg │ │ │ ├── 0.95LF.svg │ │ │ ├── 0.99LF.svg │ │ │ └── 0.9LF.svg │ └── unsuccessful │ │ ├── avg │ │ ├── 0.5LF.svg │ │ ├── 0.75LF.svg │ │ ├── 0.95LF.svg │ │ ├── 0.99LF.svg │ │ └── 0.9LF.svg │ │ └── pct │ │ ├── 99 │ │ ├── 0.5LF.svg │ │ ├── 0.75LF.svg │ │ ├── 0.95LF.svg │ │ ├── 0.99LF.svg │ │ └── 0.9LF.svg │ │ └── 99.9 │ │ ├── 0.5LF.svg │ │ ├── 0.75LF.svg │ │ ├── 0.95LF.svg │ │ ├── 0.99LF.svg │ │ └── 0.9LF.svg │ └── remove │ ├── 0.5LF.svg │ ├── 0.75LF.svg │ ├── 0.95LF.svg │ ├── 0.99LF.svg │ └── 0.9LF.svg ├── settings.gradle └── src ├── jmh └── java │ ├── hash │ ├── int32 │ │ └── Benchmarks.java │ └── int64 │ │ └── Benchmarks.java │ └── set │ ├── int32 │ └── Benchmarks.java │ └── int64 │ └── Benchmarks.java ├── main └── java │ ├── Utils.java │ ├── hash │ ├── Speck32Cipher.java │ ├── Speck64Cipher.java │ ├── SpeckCipher.java │ ├── int32 │ │ ├── H2IntHasher.java │ │ ├── IdentityIntHasher.java │ │ ├── IntHasher.java │ │ ├── Murmur3IntHasher.java │ │ ├── PhiIntHasher.java │ │ ├── Prospector2RoundIntHasher.java │ │ ├── Prospector3RoundIntHasher.java │ │ └── SpeckIntHasher.java │ └── int64 │ │ ├── DegskiLongHasher.java │ │ ├── IdentityLongHasher.java │ │ ├── LongHasher.java │ │ ├── Murmur3LongHasher.java │ │ ├── PhiLongHasher.java │ │ ├── SpeckLongHasher.java │ │ ├── Variant13LongHasher.java │ │ └── WangLongHasher.java │ └── set │ ├── int32 │ ├── BLPIntHashSet.java │ ├── IntSet.java │ ├── LCFSIntHashSet.java │ ├── LPIntHashSet.java │ └── RHIntHashSet.java │ └── int64 │ ├── BLPLongHashSet.java │ ├── LCFSLongHashSet.java │ ├── LPLongHashSet.java │ ├── LongSet.java │ └── RHLongHashSet.java └── test └── java ├── hash ├── int32 │ └── VerifyHashInverses.java └── int64 │ └── VerifyHashInverses.java └── set ├── int32 └── VerifyHashSet.java └── int64 └── VerifyHashSet.java /.gitignore: -------------------------------------------------------------------------------- 1 | # https://github.com/gradle/gradle/blob/master/.gitignore 2 | 3 | # Gradle 4 | # ------ 5 | .gradle 6 | /build 7 | /buildSrc/build 8 | /buildSrc/subprojects/*/build 9 | /subprojects/*/build 10 | /subprojects/docs/src/samples/*/*/build 11 | /subprojects/internal-android-performance-testing/build-android-libs 12 | 13 | # IDEA 14 | # ---- 15 | .idea 16 | .shelf 17 | /*.iml 18 | /*.ipr 19 | /*.iws 20 | /buildSrc/.idea 21 | /buildSrc/.shelf 22 | /buildSrc/*.iml 23 | /buildSrc/*.ipr 24 | /buildSrc/*.iws 25 | /buildSrc/out 26 | /buildSrc/subprojects/*/*.iml 27 | /buildSrc/subprojects/*/out 28 | /out 29 | /subprojects/*/*.iml 30 | /subprojects/*/out 31 | /.teamcity/*.iml 32 | /.teamcity/target 33 | 34 | # Eclipse 35 | # ------- 36 | *.classpath 37 | *.project 38 | *.settings 39 | /bin 40 | /subprojects/*/bin 41 | atlassian-ide-plugin.xml 42 | .metadata/ 43 | 44 | # NetBeans 45 | # -------- 46 | .nb-gradle 47 | .nb-gradle-properties 48 | 49 | # Vim 50 | # --- 51 | *.sw[op] 52 | 53 | # Emacs 54 | # ----- 55 | *~ 56 | \#*\# 57 | .\#* 58 | 59 | # Textmate 60 | # -------- 61 | .textmate 62 | 63 | # Sublime Text 64 | # ------------ 65 | *.sublime-* 66 | 67 | # jEnv 68 | # ---- 69 | .java-version 70 | 71 | # macOS 72 | # ---- 73 | .DS_Store 74 | 75 | # HPROF 76 | # ----- 77 | *.hprof 78 | 79 | # Work dirs 80 | # --------- 81 | /incoming-distributions 82 | /intTestHomeDir 83 | 84 | # Logs 85 | # ---- 86 | /*.log 87 | 88 | # oh-my-zsh gradle plugin 89 | .gradletasknamecache 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hashtable-benchmarks 2 | 3 | ## An Evaluation of Linear Probing Hashtable Algorithms 4 | This repository contains implementations, unit and property tests, and benchmark code for 4 linear probing algorithms: standard linear probing, Last-Come First-Served (LCFS), Robin Hood (RH), and Bidirectional Linear Probing (BLP). The latter was published by Knuth in 1973 but has received little attention since. It outperforms all other linear probing variants tested, including the recently popular "Robin Hood" variant. 5 | 6 | The current implementations only accept nonzero 32- or 64-bit integer keys, with no values (I use invertible hash functions, so there's no need to separately store hash codes). Deletions are tombstone-free, so there's no need to rehash after several deletions. Dynamic resizing is not currently supported (I have a separate project on incrementally resizing open-addressed hash tables which isn't ready to publish yet). I plan to implement `IntInt`, `LongLong`, `IntLong`, `LongInt` maps at some point. Eventually I want to implement a generic Java hash table using bidirectional linear probing and mapping hash codes to offsets in an array of object references (similar to CPython's `dict` implementation), but it will likely be a while before I have time for this. 7 | 8 | Additionally, I've collected a number of 32- and 64-bit invertible hash functions which may be of independent interest (I had to calculate most of the inverses myself, using the [inverse32.py](https://github.com/senderista/hashtable-benchmarks/blob/master/inverse32.py) and [inverse64.py](https://github.com/senderista/hashtable-benchmarks/blob/master/inverse64.py) scripts). The implementations can be found in the [src/main/java/hash/int32](https://github.com/senderista/hashtable-benchmarks/tree/master/src/main/java/hash/int32) and [src/main/java/hash/int64](https://github.com/senderista/hashtable-benchmarks/tree/master/src/main/java/hash/int64) directories. (I started work on a cryptographically strong invertible hash function based on the [Speck](https://github.com/inmcm/Simon_Speck_Ciphers) cipher as a simulation baseline, but didn't complete it, since simulation results didn't seem as practically important as performance results.) 9 | 10 | All implementations are in Java, and are benchmarked using the JMH benchmark framework. Property-based tests are implemented using the junit-quickcheck library. 11 | 12 | If you have a [JDK](https://www.oracle.com/java/technologies/downloads/) and [Gradle](https://gradle.org/install/) installed, you can run unit and property-based tests from the repository root directory by typing `gradle test`. 13 | 14 | If you have a [JDK](https://www.oracle.com/java/technologies/downloads/) and [Gradle](https://gradle.org/install/) installed, you can run benchmarks from the repository root directory by typing `gradle jmh`. 15 | 16 | Javadoc (with extensive documentation of the algorithms involved) is here: 17 | https://senderista.github.io/hashtable-benchmarks/. 18 | 19 | Benchmark results are here: 20 | 21 | https://github.com/senderista/hashtable-benchmarks/wiki/32-bit-benchmarks 22 | 23 | https://github.com/senderista/hashtable-benchmarks/wiki/64-bit-benchmarks 24 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * This generated file contains a sample Java Library project to get you started. 5 | * For more details take a look at the Java Libraries chapter in the Gradle 6 | * user guide available at https://docs.gradle.org/4.10/userguide/java_library_plugin.html 7 | */ 8 | 9 | plugins { 10 | id 'java-library' 11 | id "me.champeau.gradle.jmh" version "0.5.0" 12 | id 'com.adarshr.test-logger' version '1.5.0' 13 | } 14 | 15 | dependencies { 16 | // Use JUnit 5 test framework 17 | testImplementation 'org.junit.jupiter:junit-jupiter-api:5.1.0' 18 | testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.1.0' 19 | // QuickCheck tests depend on JUnit 4 20 | testCompileOnly 'junit:junit:4.12' 21 | testRuntimeOnly 'org.junit.vintage:junit-vintage-engine:5.1.0' 22 | // QuickCheck 23 | testCompile 'com.pholser:junit-quickcheck-core:0.9' 24 | testCompile 'com.pholser:junit-quickcheck-generators:0.9' 25 | // JMH 26 | jmh 'org.openjdk.jmh:jmh-core:1.21' 27 | jmh 'org.openjdk.jmh:jmh-generator-annprocess:1.21' 28 | } 29 | 30 | repositories { 31 | mavenCentral() 32 | } 33 | 34 | sourceSets { 35 | // Remove when Speck cipher/hasher is ready 36 | main { 37 | java { 38 | exclude '**/Speck*.java' 39 | } 40 | } 41 | sim { 42 | compileClasspath += sourceSets.main.output 43 | runtimeClasspath += sourceSets.main.output 44 | } 45 | } 46 | 47 | test { 48 | useJUnitPlatform() 49 | minHeapSize = "1g" 50 | maxHeapSize = "2g" 51 | } 52 | 53 | tasks.withType(JavaCompile) { 54 | options.compilerArgs += ["-Xlint:unchecked"] 55 | } 56 | 57 | jmh { 58 | // enable assertions 59 | jvmArgsAppend = '-ea -Xms1g -Xmx8g' 60 | } 61 | 62 | javadoc { 63 | source = sourceSets.main.allJava 64 | classpath = configurations.compile 65 | } 66 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/senderista/hashtable-benchmarks/bb0301a080bd0e14f4b9ae3f2b962f83f254e6d5/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-5.5.1-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | # 4 | # Copyright 2015 the original author or authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | ## 21 | ## Gradle start up script for UN*X 22 | ## 23 | ############################################################################## 24 | 25 | # Attempt to set APP_HOME 26 | # Resolve links: $0 may be a link 27 | PRG="$0" 28 | # Need this for relative symlinks. 29 | while [ -h "$PRG" ] ; do 30 | ls=`ls -ld "$PRG"` 31 | link=`expr "$ls" : '.*-> \(.*\)$'` 32 | if expr "$link" : '/.*' > /dev/null; then 33 | PRG="$link" 34 | else 35 | PRG=`dirname "$PRG"`"/$link" 36 | fi 37 | done 38 | SAVED="`pwd`" 39 | cd "`dirname \"$PRG\"`/" >/dev/null 40 | APP_HOME="`pwd -P`" 41 | cd "$SAVED" >/dev/null 42 | 43 | APP_NAME="Gradle" 44 | APP_BASE_NAME=`basename "$0"` 45 | 46 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 47 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 48 | 49 | # Use the maximum available, or set MAX_FD != -1 to use that value. 50 | MAX_FD="maximum" 51 | 52 | warn () { 53 | echo "$*" 54 | } 55 | 56 | die () { 57 | echo 58 | echo "$*" 59 | echo 60 | exit 1 61 | } 62 | 63 | # OS specific support (must be 'true' or 'false'). 64 | cygwin=false 65 | msys=false 66 | darwin=false 67 | nonstop=false 68 | case "`uname`" in 69 | CYGWIN* ) 70 | cygwin=true 71 | ;; 72 | Darwin* ) 73 | darwin=true 74 | ;; 75 | MINGW* ) 76 | msys=true 77 | ;; 78 | NONSTOP* ) 79 | nonstop=true 80 | ;; 81 | esac 82 | 83 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 84 | 85 | # Determine the Java command to use to start the JVM. 86 | if [ -n "$JAVA_HOME" ] ; then 87 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 88 | # IBM's JDK on AIX uses strange locations for the executables 89 | JAVACMD="$JAVA_HOME/jre/sh/java" 90 | else 91 | JAVACMD="$JAVA_HOME/bin/java" 92 | fi 93 | if [ ! -x "$JAVACMD" ] ; then 94 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 95 | 96 | Please set the JAVA_HOME variable in your environment to match the 97 | location of your Java installation." 98 | fi 99 | else 100 | JAVACMD="java" 101 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 102 | 103 | Please set the JAVA_HOME variable in your environment to match the 104 | location of your Java installation." 105 | fi 106 | 107 | # Increase the maximum file descriptors if we can. 108 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 109 | MAX_FD_LIMIT=`ulimit -H -n` 110 | if [ $? -eq 0 ] ; then 111 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 112 | MAX_FD="$MAX_FD_LIMIT" 113 | fi 114 | ulimit -n $MAX_FD 115 | if [ $? -ne 0 ] ; then 116 | warn "Could not set maximum file descriptor limit: $MAX_FD" 117 | fi 118 | else 119 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 120 | fi 121 | fi 122 | 123 | # For Darwin, add options to specify how the application appears in the dock 124 | if $darwin; then 125 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 126 | fi 127 | 128 | # For Cygwin, switch paths to Windows format before running java 129 | if $cygwin ; then 130 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 131 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 132 | JAVACMD=`cygpath --unix "$JAVACMD"` 133 | 134 | # We build the pattern for arguments to be converted via cygpath 135 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 136 | SEP="" 137 | for dir in $ROOTDIRSRAW ; do 138 | ROOTDIRS="$ROOTDIRS$SEP$dir" 139 | SEP="|" 140 | done 141 | OURCYGPATTERN="(^($ROOTDIRS))" 142 | # Add a user-defined pattern to the cygpath arguments 143 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 144 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 145 | fi 146 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 147 | i=0 148 | for arg in "$@" ; do 149 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 150 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 151 | 152 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 153 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 154 | else 155 | eval `echo args$i`="\"$arg\"" 156 | fi 157 | i=$((i+1)) 158 | done 159 | case $i in 160 | (0) set -- ;; 161 | (1) set -- "$args0" ;; 162 | (2) set -- "$args0" "$args1" ;; 163 | (3) set -- "$args0" "$args1" "$args2" ;; 164 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 165 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 166 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 167 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 168 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 169 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 170 | esac 171 | fi 172 | 173 | # Escape application args 174 | save () { 175 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 176 | echo " " 177 | } 178 | APP_ARGS=$(save "$@") 179 | 180 | # Collect all arguments for the java command, following the shell quoting and substitution rules 181 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 182 | 183 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 184 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 185 | cd "$(dirname "$0")" 186 | fi 187 | 188 | exec "$JAVACMD" "$@" 189 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 33 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 34 | 35 | @rem Find java.exe 36 | if defined JAVA_HOME goto findJavaFromJavaHome 37 | 38 | set JAVA_EXE=java.exe 39 | %JAVA_EXE% -version >NUL 2>&1 40 | if "%ERRORLEVEL%" == "0" goto init 41 | 42 | echo. 43 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 44 | echo. 45 | echo Please set the JAVA_HOME variable in your environment to match the 46 | echo location of your Java installation. 47 | 48 | goto fail 49 | 50 | :findJavaFromJavaHome 51 | set JAVA_HOME=%JAVA_HOME:"=% 52 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 53 | 54 | if exist "%JAVA_EXE%" goto init 55 | 56 | echo. 57 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 58 | echo. 59 | echo Please set the JAVA_HOME variable in your environment to match the 60 | echo location of your Java installation. 61 | 62 | goto fail 63 | 64 | :init 65 | @rem Get command-line arguments, handling Windows variants 66 | 67 | if not "%OS%" == "Windows_NT" goto win9xME_args 68 | 69 | :win9xME_args 70 | @rem Slurp the command line arguments. 71 | set CMD_LINE_ARGS= 72 | set _SKIP=2 73 | 74 | :win9xME_args_slurp 75 | if "x%~1" == "x" goto execute 76 | 77 | set CMD_LINE_ARGS=%* 78 | 79 | :execute 80 | @rem Setup the command line 81 | 82 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 83 | 84 | @rem Execute Gradle 85 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 86 | 87 | :end 88 | @rem End local scope for the variables with windows NT shell 89 | if "%ERRORLEVEL%"=="0" goto mainEnd 90 | 91 | :fail 92 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 93 | rem the _cmd.exe /c_ return code! 94 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 95 | exit /b 1 96 | 97 | :mainEnd 98 | if "%OS%"=="Windows_NT" endlocal 99 | 100 | :omega 101 | -------------------------------------------------------------------------------- /inverse32.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # calculate multiplicative inverse of odd number mod 2^32 4 | # from https://groups.google.com/forum/m/#!msg/sci.crypt/UI-UMbUnYGk/hX2-wQVyE3oJ 5 | def inverse(a): 6 | x = a 7 | assert (x * a & 0x7) == 1 8 | x += x - a * x * x 9 | assert (x * a & 0x3F) == 1 10 | x += x - a * x * x 11 | assert (x * a & 0xFFF) == 1 12 | x += x - a * x * x 13 | assert (x * a & 0xFFFFFF) == 1 14 | x += x - a * x * x 15 | assert (x * a & 0xFFFFFFFF) == 1 16 | return x & 0xFFFFFFFF 17 | 18 | 19 | arg = int(sys.argv[1], 16) 20 | print hex(inverse(arg)) 21 | -------------------------------------------------------------------------------- /inverse64.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # calculate multiplicative inverse of odd number mod 2^64 4 | # from https://groups.google.com/forum/m/#!msg/sci.crypt/UI-UMbUnYGk/hX2-wQVyE3oJ 5 | def inverse(a): 6 | x = a 7 | assert (x * a & 0x7) == 1 8 | x += x - a * x * x 9 | assert (x * a & 0x3F) == 1 10 | x += x - a * x * x 11 | assert (x * a & 0xFFF) == 1 12 | x += x - a * x * x 13 | assert (x * a & 0xFFFFFF) == 1 14 | x += x - a * x * x 15 | assert (x * a & 0xFFFFFFFF) == 1 16 | x += x - a * x * x 17 | assert (x * a & 0xFFFFFFFFFF) == 1 18 | x += x - a * x * x 19 | assert (x * a & 0xFFFFFFFFFFFF) == 1 20 | x += x - a * x * x 21 | assert (x * a & 0xFFFFFFFFFFFFFF) == 1 22 | x += x - a * x * x 23 | assert (x * a & 0xFFFFFFFFFFFFFFFF) == 1 24 | return x & 0xFFFFFFFFFFFFFFFF 25 | 26 | 27 | # arg = int(sys.argv[1], 16) 28 | arg = long(sys.argv[1]) 29 | # print hex(inverse(arg)) 30 | print inverse(arg) 31 | -------------------------------------------------------------------------------- /results/32bit/img/add/0.95LF.svg: -------------------------------------------------------------------------------- 1 | 10k100k1M10M100M5001000150020002500ClassName=LPIntHashSetClassName=LCFSIntHashSetClassName=RHIntHashSetClassName=BLPIntHashSetTime to add 1K batch (95% load factor)Table Sizeμs -------------------------------------------------------------------------------- /results/32bit/img/contains/successful/avg/0.99LF.svg: -------------------------------------------------------------------------------- 1 | 10k100k1M10M100M050100150200ClassName=LPIntHashSetClassName=LCFSIntHashSetClassName=RHIntHashSetClassName=BLPIntHashSetAverage time for successful lookup (99% load factor)Input Sizens/op -------------------------------------------------------------------------------- /results/32bit/img/contains/successful/pct/99.9/0.9LF.svg: -------------------------------------------------------------------------------- 1 | 10k100k1M10M100M150200250300350ClassName=BLPIntHashSetClassName=LCFSIntHashSetClassName=LPIntHashSetClassName=RHIntHashSet99.9th percentile time for successful lookup (90% load factor)Input Sizens/op -------------------------------------------------------------------------------- /results/32bit/img/contains/successful/pct/99/0.99LF.svg: -------------------------------------------------------------------------------- 1 | 10k100k1M10M100M100150200250300ClassName=BLPIntHashSetClassName=LCFSIntHashSetClassName=LPIntHashSetClassName=RHIntHashSet99th percentile time for successful lookup (99% load factor)Input Sizens/op -------------------------------------------------------------------------------- /results/32bit/img/contains/unsuccessful/avg/0.5LF.svg: -------------------------------------------------------------------------------- 1 | 10k100k1M10M100M810121416ClassName=LPIntHashSetClassName=LCFSIntHashSetClassName=RHIntHashSetClassName=BLPIntHashSetAverage time for unsuccessful lookup (50% load factor)Input Sizens/op -------------------------------------------------------------------------------- /results/32bit/img/contains/unsuccessful/avg/0.95LF.svg: -------------------------------------------------------------------------------- 1 | 10k100k1M10M100M20406080100ClassName=LPIntHashSetClassName=LCFSIntHashSetClassName=RHIntHashSetClassName=BLPIntHashSetAverage time for unsuccessful lookup (95% load factor)Input Sizens/op -------------------------------------------------------------------------------- /results/32bit/img/contains/unsuccessful/pct/99/0.75LF.svg: -------------------------------------------------------------------------------- 1 | 10k100k1M10M100M65707580ClassName=BLPIntHashSetClassName=LCFSIntHashSetClassName=LPIntHashSetClassName=RHIntHashSet99th percentile time for unsuccessful lookup (75% load factor)Input Sizens/op -------------------------------------------------------------------------------- /results/32bit/img/remove/0.75LF.svg: -------------------------------------------------------------------------------- 1 | 10k100k1M10M100M300400500600700ClassName=LPIntHashSetClassName=LCFSIntHashSetClassName=RHIntHashSetClassName=BLPIntHashSetTime to remove 1K batch (75% load factor)Table Sizeμs -------------------------------------------------------------------------------- /results/64bit/img/contains/unsuccessful/avg/0.75LF.svg: -------------------------------------------------------------------------------- 1 | 10k100k1M10M100M10152025ClassName=LPLongHashSetClassName=LCFSLongHashSetClassName=RHLongHashSetClassName=BLPLongHashSetAverage time for unsuccessful lookup (75% load factor)Input Sizens/op -------------------------------------------------------------------------------- /results/64bit/img/remove/0.99LF.svg: -------------------------------------------------------------------------------- 1 | 10k100k1M10M100M05k10k15k20kClassName=LPLongHashSetClassName=RHLongHashSetClassName=BLPLongHashSetClassName=LCFSLongHashSetTime to remove 1K batch (99% load factor)Table Sizeμs -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | * 4 | * The settings file is used to specify which projects to include in your build. 5 | * 6 | * Detailed information about configuring a multi-project build in Gradle can be found 7 | * in the user guide at https://docs.gradle.org/4.10/userguide/multi_project_builds.html 8 | */ 9 | 10 | rootProject.name = 'hashtable-benchmarks' 11 | -------------------------------------------------------------------------------- /src/jmh/java/hash/int32/Benchmarks.java: -------------------------------------------------------------------------------- 1 | package hash.int32; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | 5 | import java.lang.reflect.InvocationTargetException; 6 | 7 | import java.util.Random; 8 | import java.util.concurrent.TimeUnit; 9 | 10 | import java.security.SecureRandom; 11 | 12 | @State(Scope.Thread) 13 | @OutputTimeUnit(TimeUnit.NANOSECONDS) 14 | @Fork(1) 15 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 16 | @Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS) 17 | @BenchmarkMode(Mode.AverageTime) 18 | public class Benchmarks { 19 | private static final Random RND = new SecureRandom(); 20 | @Param({ 21 | IdentityIntHasher.NAME, 22 | PhiIntHasher.NAME, 23 | H2IntHasher.NAME, 24 | Murmur3IntHasher.NAME, 25 | Prospector2RoundIntHasher.NAME, 26 | Prospector3RoundIntHasher.NAME, 27 | // SpeckIntHasher.NAME, 28 | }) 29 | private String hasherClassName; 30 | private IntHasher hasher; 31 | private int randomInteger; 32 | 33 | @Setup(Level.Iteration) 34 | public void setup() throws ClassNotFoundException, InstantiationException, IllegalAccessException, 35 | NoSuchMethodException, InvocationTargetException { 36 | this.hasher = (IntHasher) Class.forName(hasherClassName).getDeclaredConstructor().newInstance(); 37 | this.randomInteger = RND.nextInt(); 38 | } 39 | 40 | @Benchmark 41 | public long measure() { 42 | return this.hasher.hash(this.randomInteger); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/jmh/java/hash/int64/Benchmarks.java: -------------------------------------------------------------------------------- 1 | package hash.int64; 2 | 3 | import org.openjdk.jmh.annotations.*; 4 | 5 | import java.lang.reflect.InvocationTargetException; 6 | 7 | import java.util.Random; 8 | import java.util.concurrent.TimeUnit; 9 | 10 | import java.security.SecureRandom; 11 | 12 | @State(Scope.Thread) 13 | @OutputTimeUnit(TimeUnit.NANOSECONDS) 14 | @Fork(1) 15 | @Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) 16 | @Measurement(iterations = 5, time = 2, timeUnit = TimeUnit.SECONDS) 17 | @BenchmarkMode(Mode.AverageTime) 18 | public class Benchmarks { 19 | private static final Random RND = new SecureRandom(); 20 | @Param({ 21 | IdentityLongHasher.NAME, 22 | PhiLongHasher.NAME, 23 | DegskiLongHasher.NAME, 24 | Murmur3LongHasher.NAME, 25 | Variant13LongHasher.NAME, 26 | WangLongHasher.NAME, 27 | // SpeckLongHasher.NAME, 28 | }) 29 | private String hasherClassName; 30 | private LongHasher hasher; 31 | private long randomLong; 32 | 33 | @Setup(Level.Iteration) 34 | public void setup() throws ClassNotFoundException, InstantiationException, IllegalAccessException, 35 | NoSuchMethodException, InvocationTargetException { 36 | this.hasher = (LongHasher) Class.forName(hasherClassName).getDeclaredConstructor().newInstance(); 37 | this.randomLong = RND.nextLong(); 38 | } 39 | 40 | @Benchmark 41 | public long measure() { 42 | return this.hasher.hash(this.randomLong); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/jmh/java/set/int32/Benchmarks.java: -------------------------------------------------------------------------------- 1 | package set.int32; 2 | 3 | import hash.int32.IntHasher; 4 | import hash.int32.PhiIntHasher; 5 | 6 | import java.util.Arrays; 7 | import java.util.Random; 8 | import java.util.concurrent.ThreadLocalRandom; 9 | import java.util.concurrent.TimeUnit; 10 | import java.util.stream.IntStream; 11 | import java.lang.reflect.InvocationTargetException; 12 | 13 | import org.openjdk.jmh.annotations.*; 14 | 15 | public class Benchmarks { 16 | 17 | private static final int FORKS = 1; 18 | private static final int ITERATIONS = 5; 19 | private static final int SINGLE_SHOT_MULTIPLIER = 1; 20 | private static final int BATCH_SIZE = 1000; 21 | 22 | // implements Fisher–Yates shuffle over range of array, inexplicably missing 23 | // from java.util.Arrays 24 | private static void shuffleArray(int[] arr, int start, int end) { 25 | Random rnd = ThreadLocalRandom.current(); 26 | for (int i = end - start - 1; i > 0; i--) { 27 | int index = rnd.nextInt(i + 1); 28 | int randomIndex = start + index; 29 | int currentIndex = start + i; 30 | assert start <= randomIndex && randomIndex < end; 31 | assert start <= currentIndex && currentIndex < end; 32 | // Simple swap 33 | int a = arr[randomIndex]; 34 | arr[randomIndex] = arr[currentIndex]; 35 | arr[currentIndex] = a; 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class BenchmarkState { 41 | @Param({ 42 | LPIntHashSet.NAME, 43 | LCFSIntHashSet.NAME, 44 | RHIntHashSet.NAME, 45 | BLPIntHashSet.NAME, 46 | }) 47 | private String setClassName; 48 | 49 | @Param({ 50 | "10000", 51 | "100000", 52 | "1000000", 53 | "10000000", 54 | "100000000", 55 | }) 56 | private int setSize; 57 | 58 | @Param({ 59 | "0.5", 60 | "0.75", 61 | "0.9", 62 | "0.95", 63 | "0.99", 64 | // "1.0", 65 | }) 66 | private double loadFactor; 67 | 68 | public IntSet hashSetTemplate; 69 | public int[] newTestData; 70 | public int[] oldTestData; 71 | // we want a random permutation on test data, not an RNG, to avoid duplicate 72 | // keys, and the Phi hash has quasi-uniform behavior on sequential integers 73 | private final IntHasher hasher = new PhiIntHasher(); 74 | 75 | @Setup(Level.Trial) 76 | public void initBenchmarkState() throws ClassNotFoundException, InstantiationException, IllegalAccessException, 77 | NoSuchMethodException, InvocationTargetException { 78 | // generate array of random ints, using random permutation rather than RNG 79 | // to avoid duplicates. 80 | int[] testData = IntStream.rangeClosed(1, setSize).map(hasher::hash).toArray(); 81 | // populate hash set under test with all test data except the last BATCH_SIZE elements, 82 | // to leave some data free for testing deletions and unsuccessful lookups. 83 | int templateSize = testData.length - BATCH_SIZE; 84 | this.hashSetTemplate = (IntSet) Class.forName(setClassName).getDeclaredConstructor(int.class, double.class) 85 | .newInstance(setSize, loadFactor); 86 | for (int i = 0; i < templateSize; ++i) { 87 | this.hashSetTemplate.add(testData[i]); 88 | } 89 | // now generate a random sample of BATCH_SIZE test data that are not present in the hash map 90 | this.newTestData = Arrays.copyOfRange(testData, testData.length - BATCH_SIZE, testData.length); 91 | shuffleArray(testData, 0, templateSize); 92 | this.oldTestData = Arrays.copyOfRange(testData, 0, BATCH_SIZE); 93 | } 94 | } 95 | 96 | @State(Scope.Thread) 97 | public static class IterationState { 98 | public IntSet hashSet; 99 | public int testDataIndex; 100 | 101 | public int getDataIndex() { 102 | int ret; 103 | if (this.testDataIndex == BATCH_SIZE) { 104 | ret = 0; 105 | } else { 106 | ret = this.testDataIndex; 107 | ++this.testDataIndex; 108 | } 109 | assert this.testDataIndex <= BATCH_SIZE; 110 | return ret; 111 | } 112 | 113 | @Setup(Level.Iteration) 114 | public void initIterationState(BenchmarkState bs) throws CloneNotSupportedException { 115 | this.hashSet = bs.hashSetTemplate.cloneSet(); 116 | this.testDataIndex = 0; 117 | } 118 | 119 | @TearDown(Level.Iteration) 120 | public void destroyIterationState() { 121 | this.hashSet.clear(); 122 | assert this.testDataIndex <= BATCH_SIZE; 123 | } 124 | } 125 | 126 | @Benchmark 127 | @Fork(FORKS) 128 | @Warmup(iterations = ITERATIONS * SINGLE_SHOT_MULTIPLIER, batchSize = BATCH_SIZE) 129 | @Measurement(iterations = ITERATIONS * SINGLE_SHOT_MULTIPLIER, batchSize = BATCH_SIZE) 130 | @BenchmarkMode(Mode.SingleShotTime) 131 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 132 | public boolean measureAdd(BenchmarkState bs, IterationState is) { 133 | boolean notPresent = is.hashSet.add(bs.newTestData[is.getDataIndex()]); 134 | assert notPresent; 135 | return notPresent; 136 | } 137 | 138 | @Benchmark 139 | @Fork(FORKS) 140 | @Warmup(iterations = ITERATIONS * SINGLE_SHOT_MULTIPLIER, batchSize = BATCH_SIZE) 141 | @Measurement(iterations = ITERATIONS * SINGLE_SHOT_MULTIPLIER, batchSize = BATCH_SIZE) 142 | @BenchmarkMode(Mode.SingleShotTime) 143 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 144 | public boolean measureRemove(BenchmarkState bs, IterationState is) { 145 | boolean present = is.hashSet.remove(bs.oldTestData[is.getDataIndex()]); 146 | assert present; 147 | return present; 148 | } 149 | 150 | @Benchmark 151 | @Fork(FORKS) 152 | @Warmup(iterations = ITERATIONS) 153 | @Measurement(iterations = ITERATIONS) 154 | @BenchmarkMode({Mode.AverageTime, Mode.SampleTime}) 155 | @OutputTimeUnit(TimeUnit.NANOSECONDS) 156 | public boolean measureSuccessfulContains(BenchmarkState bs, IterationState is) { 157 | boolean present = is.hashSet.contains(bs.oldTestData[is.getDataIndex()]); 158 | assert present; 159 | return present; 160 | } 161 | 162 | @Benchmark 163 | @Fork(FORKS) 164 | @Warmup(iterations = ITERATIONS) 165 | @Measurement(iterations = ITERATIONS) 166 | @BenchmarkMode({Mode.AverageTime, Mode.SampleTime}) 167 | @OutputTimeUnit(TimeUnit.NANOSECONDS) 168 | public boolean measureUnsuccessfulContains(BenchmarkState bs, IterationState is) { 169 | boolean present = is.hashSet.contains(bs.newTestData[is.getDataIndex()]); 170 | assert !present; 171 | return present; 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/jmh/java/set/int64/Benchmarks.java: -------------------------------------------------------------------------------- 1 | package set.int64; 2 | 3 | import hash.int64.LongHasher; 4 | import hash.int64.PhiLongHasher; 5 | 6 | import java.util.Arrays; 7 | import java.util.Random; 8 | import java.util.concurrent.ThreadLocalRandom; 9 | import java.util.concurrent.TimeUnit; 10 | import java.util.stream.LongStream; 11 | import java.lang.reflect.InvocationTargetException; 12 | 13 | import org.openjdk.jmh.annotations.*; 14 | 15 | public class Benchmarks { 16 | 17 | private static final int FORKS = 1; 18 | private static final int ITERATIONS = 5; 19 | private static final int SINGLE_SHOT_MULTIPLIER = 1; 20 | private static final int BATCH_SIZE = 1000; 21 | 22 | // implements Fisher–Yates shuffle over range of array, inexplicably missing 23 | // from java.util.Arrays 24 | private static void shuffleArray(long[] arr, int start, int end) { 25 | Random rnd = ThreadLocalRandom.current(); 26 | for (int i = end - start - 1; i > 0; i--) { 27 | int index = rnd.nextInt(i + 1); 28 | int randomIndex = start + index; 29 | int currentIndex = start + i; 30 | assert start <= randomIndex && randomIndex < end; 31 | assert start <= currentIndex && currentIndex < end; 32 | // Simple swap 33 | long a = arr[randomIndex]; 34 | arr[randomIndex] = arr[currentIndex]; 35 | arr[currentIndex] = a; 36 | } 37 | } 38 | 39 | @State(Scope.Benchmark) 40 | public static class BenchmarkState { 41 | @Param({ 42 | LPLongHashSet.NAME, 43 | LCFSLongHashSet.NAME, 44 | RHLongHashSet.NAME, 45 | BLPLongHashSet.NAME, 46 | }) 47 | private String setClassName; 48 | 49 | @Param({ 50 | "10000", 51 | "100000", 52 | "1000000", 53 | "10000000", 54 | "100000000", 55 | }) 56 | private int setSize; 57 | 58 | @Param({ 59 | "0.5", 60 | "0.75", 61 | "0.9", 62 | "0.95", 63 | "0.99", 64 | // "1.0", 65 | }) 66 | private double loadFactor; 67 | 68 | public LongSet hashSetTemplate; 69 | public long[] newTestData; 70 | public long[] oldTestData; 71 | // we want a random permutation on test data, not an RNG, to avoid duplicate 72 | // keys, and the Phi hash has quasi-uniform behavior on sequential integers 73 | private final LongHasher hasher = new PhiLongHasher(); 74 | 75 | @Setup(Level.Trial) 76 | public void initBenchmarkState() throws ClassNotFoundException, InstantiationException, IllegalAccessException, 77 | NoSuchMethodException, InvocationTargetException { 78 | // generate array of random ints, using random permutation rather than RNG 79 | // to avoid duplicates. 80 | long[] testData = LongStream.rangeClosed(1, setSize).map(hasher::hash).toArray(); 81 | // populate hash set under test with all test data except the last BATCH_SIZE elements, 82 | // to leave some data free for testing deletions and unsuccessful lookups. 83 | int templateSize = testData.length - BATCH_SIZE; 84 | this.hashSetTemplate = (LongSet) Class.forName(setClassName).getDeclaredConstructor(int.class, double.class) 85 | .newInstance(setSize, loadFactor); 86 | for (int i = 0; i < templateSize; ++i) { 87 | this.hashSetTemplate.add(testData[i]); 88 | } 89 | // now generate a random sample of BATCH_SIZE test data that are not present in the hash map 90 | this.newTestData = Arrays.copyOfRange(testData, testData.length - BATCH_SIZE, testData.length); 91 | shuffleArray(testData, 0, templateSize); 92 | this.oldTestData = Arrays.copyOfRange(testData, 0, BATCH_SIZE); 93 | } 94 | } 95 | 96 | @State(Scope.Thread) 97 | public static class IterationState { 98 | public LongSet hashSet; 99 | public int testDataIndex; 100 | 101 | public int getDataIndex() { 102 | int ret; 103 | if (this.testDataIndex == BATCH_SIZE) { 104 | ret = 0; 105 | } else { 106 | ret = this.testDataIndex; 107 | ++this.testDataIndex; 108 | } 109 | assert this.testDataIndex <= BATCH_SIZE; 110 | return ret; 111 | } 112 | 113 | @Setup(Level.Iteration) 114 | public void initIterationState(BenchmarkState bs) throws CloneNotSupportedException { 115 | this.hashSet = bs.hashSetTemplate.cloneSet(); 116 | this.testDataIndex = 0; 117 | } 118 | 119 | @TearDown(Level.Iteration) 120 | public void destroyIterationState() { 121 | this.hashSet.clear(); 122 | assert this.testDataIndex <= BATCH_SIZE; 123 | } 124 | } 125 | 126 | @Benchmark 127 | @Fork(FORKS) 128 | @Warmup(iterations = ITERATIONS * SINGLE_SHOT_MULTIPLIER, batchSize = BATCH_SIZE) 129 | @Measurement(iterations = ITERATIONS * SINGLE_SHOT_MULTIPLIER, batchSize = BATCH_SIZE) 130 | @BenchmarkMode(Mode.SingleShotTime) 131 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 132 | public boolean measureAdd(BenchmarkState bs, IterationState is) { 133 | boolean notPresent = is.hashSet.add(bs.newTestData[is.getDataIndex()]); 134 | assert notPresent; 135 | return notPresent; 136 | } 137 | 138 | @Benchmark 139 | @Fork(FORKS) 140 | @Warmup(iterations = ITERATIONS * SINGLE_SHOT_MULTIPLIER, batchSize = BATCH_SIZE) 141 | @Measurement(iterations = ITERATIONS * SINGLE_SHOT_MULTIPLIER, batchSize = BATCH_SIZE) 142 | @BenchmarkMode(Mode.SingleShotTime) 143 | @OutputTimeUnit(TimeUnit.MICROSECONDS) 144 | public boolean measureRemove(BenchmarkState bs, IterationState is) { 145 | boolean present = is.hashSet.remove(bs.oldTestData[is.getDataIndex()]); 146 | assert present; 147 | return present; 148 | } 149 | 150 | @Benchmark 151 | @Fork(FORKS) 152 | @Warmup(iterations = ITERATIONS) 153 | @Measurement(iterations = ITERATIONS) 154 | @BenchmarkMode({Mode.AverageTime, Mode.SampleTime}) 155 | @OutputTimeUnit(TimeUnit.NANOSECONDS) 156 | public boolean measureSuccessfulContains(BenchmarkState bs, IterationState is) { 157 | boolean present = is.hashSet.contains(bs.oldTestData[is.getDataIndex()]); 158 | assert present; 159 | return present; 160 | } 161 | 162 | @Benchmark 163 | @Fork(FORKS) 164 | @Warmup(iterations = ITERATIONS) 165 | @Measurement(iterations = ITERATIONS) 166 | @BenchmarkMode({Mode.AverageTime, Mode.SampleTime}) 167 | @OutputTimeUnit(TimeUnit.NANOSECONDS) 168 | public boolean measureUnsuccessfulContains(BenchmarkState bs, IterationState is) { 169 | boolean present = is.hashSet.contains(bs.newTestData[is.getDataIndex()]); 170 | assert !present; 171 | return present; 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/main/java/Utils.java: -------------------------------------------------------------------------------- 1 | // originally copied from https://raw.githubusercontent.com/vigna/fastutil/master/src/it/unimi/dsi/fastutil/HashCommon.java 2 | 3 | /* 4 | * Copyright (C) 2002-2017 Sebastiano Vigna 5 | * 6 | * Licensed under the Apache License, Version 2.0 (the "License"); 7 | * you may not use this file except in compliance with the License. 8 | * You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | 20 | /** Common code for all hash-based classes. */ 21 | 22 | public class Utils { 23 | 24 | protected Utils() {} 25 | 26 | /** Returns the hash code that would be returned by {@link Float#hashCode()}. 27 | * 28 | * @param f a float. 29 | * @return the same code as {@link Float#hashCode() new Float(f).hashCode()}. 30 | */ 31 | 32 | public static int float2int(final float f) { 33 | return Float.floatToRawIntBits(f); 34 | } 35 | 36 | /** Returns the hash code that would be returned by {@link Double#hashCode()}. 37 | * 38 | * @param d a double. 39 | * @return the same code as {@link Double#hashCode() new Double(f).hashCode()}. 40 | */ 41 | 42 | public static int double2int(final double d) { 43 | final long l = Double.doubleToRawLongBits(d); 44 | return (int)(l ^ (l >>> 32)); 45 | } 46 | 47 | /** Returns the hash code that would be returned by {@link Long#hashCode()}. 48 | * 49 | * @param l a long. 50 | * @return the same code as {@link Long#hashCode() new Long(f).hashCode()}. 51 | */ 52 | public static int long2int(final long l) { 53 | return (int)(l ^ (l >>> 32)); 54 | } 55 | 56 | /** Returns the least power of two greater than or equal to the specified value. 57 | * 58 | *

Note that this function will return 1 when the argument is 0. 59 | * 60 | * @param x an integer smaller than or equal to 230. 61 | * @return the least power of two greater than or equal to the specified value. 62 | */ 63 | public static int nextPowerOfTwo(int x) { 64 | if (x == 0) return 1; 65 | x--; 66 | x |= x >> 1; 67 | x |= x >> 2; 68 | x |= x >> 4; 69 | x |= x >> 8; 70 | return (x | x >> 16) + 1; 71 | } 72 | 73 | /** Returns the least power of two greater than or equal to the specified value. 74 | * 75 | *

Note that this function will return 1 when the argument is 0. 76 | * 77 | * @param x a long integer smaller than or equal to 262. 78 | * @return the least power of two greater than or equal to the specified value. 79 | */ 80 | public static long nextPowerOfTwo(long x) { 81 | if (x == 0) return 1; 82 | x--; 83 | x |= x >> 1; 84 | x |= x >> 2; 85 | x |= x >> 4; 86 | x |= x >> 8; 87 | x |= x >> 16; 88 | return (x | x >> 32) + 1; 89 | } 90 | 91 | /** Returns the maximum number of entries that can be filled before rehashing. 92 | * 93 | * @param n the size of the backing array. 94 | * @param f the load factor. 95 | * @return the maximum number of entries before rehashing. 96 | */ 97 | public static int maxFill(final int n, final float f) { 98 | /* We must guarantee that there is always at least 99 | * one free entry (even with pathological load factors). */ 100 | return Math.min((int)Math.ceil(n * f), n - 1); 101 | } 102 | 103 | /** Returns the maximum number of entries that can be filled before rehashing. 104 | * 105 | * @param n the size of the backing array. 106 | * @param f the load factor. 107 | * @return the maximum number of entries before rehashing. 108 | */ 109 | public static long maxFill(final long n, final float f) { 110 | /* We must guarantee that there is always at least 111 | * one free entry (even with pathological load factors). */ 112 | return Math.min((long)Math.ceil(n * f), n - 1); 113 | } 114 | 115 | /** Returns the least power of two smaller than or equal to 230 and larger than or equal to {@code Math.ceil(expected / f)}. 116 | * 117 | * @param expected the expected number of elements in a hash table. 118 | * @param f the load factor. 119 | * @return the minimum possible size for a backing array. 120 | * @throws IllegalArgumentException if the necessary size is larger than 230. 121 | */ 122 | public static int arraySize(final int expected, final float f) { 123 | final long s = Math.max(2, nextPowerOfTwo((long)Math.ceil(expected / f))); 124 | if (s > (1 << 30)) throw new IllegalArgumentException("Too large (" + expected + " expected elements with load factor " + f + ")"); 125 | return (int)s; 126 | } 127 | 128 | /** Returns the least power of two larger than or equal to {@code Math.ceil(expected / f)}. 129 | * 130 | * @param expected the expected number of elements in a hash table. 131 | * @param f the load factor. 132 | * @return the minimum possible size for a backing big array. 133 | */ 134 | public static long bigArraySize(final long expected, final float f) { 135 | return nextPowerOfTwo((long)Math.ceil(expected / f)); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/main/java/hash/Speck32Cipher.java: -------------------------------------------------------------------------------- 1 | package hash; 2 | 3 | /** 4 | * Speck32: 2 byte words, 7/2 rotation constants. 5 | *

6 | * 20 base rounds (hypothetical) 7 | *

8 | * 64 bit key/22 rounds. 9 | */ 10 | public final class Speck32Cipher 11 | extends SpeckIntCipher 12 | { 13 | 14 | public Speck32Cipher() 15 | { 16 | this(20); 17 | } 18 | 19 | public Speck32Cipher(int rounds) 20 | { 21 | super(2, rounds, 7, 2); 22 | } 23 | 24 | @Override 25 | protected int mask(int val) 26 | { 27 | return (val & 0xffff); 28 | } 29 | 30 | @Override 31 | protected void checkKeySize(int keySizeBytes) 32 | { 33 | if (keySizeBytes != 8) 34 | { 35 | throw new IllegalArgumentException("Speck32 requires a key of 64 bits."); 36 | } 37 | } 38 | 39 | public int encrypt(int value) 40 | { 41 | // extract the left and right 16 bits of this int and put each of them in the low bits of an int 42 | int low = value & 0x0000ffff; 43 | int high = value & 0xffff0000; 44 | long encryptedBlob = this.encryptValue(low, high); 45 | // extract the low 16 bits from each 32-bit word in this long and concatenate them into an int 46 | high = (int) ((encryptedBlob & 0x0000ffff00000000L) >>> 32); 47 | low = (int) (encryptedBlob & 0x000000000000ffffL); 48 | return (high << 16) | low; 49 | } 50 | 51 | public int decrypt(int value) 52 | { 53 | // extract the left and right 16 bits of this int and put each of them in the low bits of an int 54 | int low = value & 0x0000ffff; 55 | int high = value & 0xffff0000; 56 | long decryptedBlob = this.decryptValue(low, high); 57 | // extract the low 16 bits from each 32-bit word in this long and concatenate them into an int 58 | high = (int) ((decryptedBlob & 0x0000ffff00000000L) >>> 32); 59 | low = (int) (decryptedBlob & 0x000000000000ffffL); 60 | return (high << 16) | low; 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/hash/Speck64Cipher.java: -------------------------------------------------------------------------------- 1 | package hash; 2 | 3 | /** 4 | * Speck64: 4 byte words, 8/3 rotation constants. 5 | *

6 | * 25 base rounds (hypothetical) 7 | *

8 | * 96 bit key/26 rounds.
9 | * 128 bit key/27 rounds. 10 | */ 11 | public final class Speck64Cipher 12 | extends SpeckIntCipher 13 | { 14 | 15 | public Speck32Cipher() 16 | { 17 | this(25); 18 | } 19 | 20 | public Speck32Cipher(int rounds) 21 | { 22 | super(4, rounds); 23 | } 24 | 25 | @Override 26 | protected int mask(int val) 27 | { 28 | return val; 29 | } 30 | 31 | @Override 32 | protected void checkKeySize(int keySizeBytes) 33 | { 34 | if (keySizeBytes != 12 && keySizeBytes != 16) 35 | { 36 | throw new IllegalArgumentException("Speck64 requires a key of 96 or 128 bits."); 37 | } 38 | } 39 | 40 | public int encrypt(long value) 41 | { 42 | // extract the left and right 32 bits of this int 43 | int low = value & 0x00000000ffffffffL; 44 | int high = value & 0xffffffff00000000L; 45 | return this.encryptValue(low, high); 46 | } 47 | 48 | public int decrypt(int value) 49 | { 50 | // extract the left and right 32 bits of this int 51 | int low = value & 0x00000000ffffffffL; 52 | int high = value & 0xffffffff00000000L; 53 | return this.decryptValue(low, high); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/hash/int32/H2IntHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int32; 2 | 3 | /** 4 | * Implements hash function from https://github.com/h2database/h2database. 5 | * 6 | * @author tdbaker 7 | */ 8 | public class H2IntHasher implements IntHasher { 9 | public static final String NAME = "hash.int32.H2IntHasher"; 10 | @Override 11 | public int hash(int x) { 12 | if (x == 0) { 13 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 14 | } 15 | x ^= x >>> 16; 16 | x *= 0x45d9f3b; 17 | x ^= x >>> 16; 18 | x *= 0x45d9f3b; 19 | x ^= x >>> 16; 20 | return x; 21 | } 22 | 23 | @Override 24 | public int unhash(int x) { 25 | if (x == 0) { 26 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 27 | } 28 | x ^= x >>> 16; 29 | x *= 0x119de1f3; 30 | x ^= x >>> 16; 31 | x *= 0x119de1f3; 32 | x ^= x >>> 16; 33 | return x; 34 | } 35 | 36 | @Override 37 | public IntHasher cloneHasher() { 38 | // stateless 39 | return new H2IntHasher(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/hash/int32/IdentityIntHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int32; 2 | 3 | /** 4 | * Implements identity hash function. 5 | * 6 | * @author tdbaker 7 | */ 8 | public class IdentityIntHasher implements IntHasher { 9 | public static final String NAME = "hash.int32.IdentityIntHasher"; 10 | 11 | @Override 12 | public int hash(int x) { 13 | return x; 14 | } 15 | 16 | @Override 17 | public int unhash(int x) { 18 | return x; 19 | } 20 | 21 | @Override 22 | public IntHasher cloneHasher() { 23 | // stateless 24 | return new IdentityIntHasher(); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/hash/int32/IntHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int32; 2 | 3 | /** 4 | * A 32-bit hash function. 5 | * 6 | * @author tdbaker 7 | */ 8 | public interface IntHasher { 9 | /** 10 | * Apply the hash function to a value. 11 | * @param x the 32-bit integer to hash 12 | * @return the hashed value 13 | */ 14 | public int hash(int x); 15 | /** 16 | * Invert the hash function. 17 | * @param x the 32-bit integer to unhash 18 | * @return the unhashed value 19 | */ 20 | public int unhash(int x); 21 | /** 22 | * Clone the hash function object. 23 | * 24 | * @return the cloned hasher 25 | */ 26 | public IntHasher cloneHasher(); 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/hash/int32/Murmur3IntHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int32; 2 | 3 | /** 4 | * Implements Murmur3 32-bit finalizer (https://github.com/aappleby/smhasher/wiki/MurmurHash3) 5 | * 6 | * @author tdbaker 7 | */ 8 | public class Murmur3IntHasher implements IntHasher { 9 | public static final String NAME = "hash.int32.Murmur3IntHasher"; 10 | @Override 11 | public int hash(int x) { 12 | if (x == 0) { 13 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 14 | } 15 | x ^= x >>> 16; 16 | x *= 0x85ebca6b; 17 | x ^= x >>> 13; 18 | x *= 0xc2b2ae35; 19 | x ^= x >>> 16; 20 | return x; 21 | } 22 | 23 | @Override 24 | public int unhash(int x) { 25 | if (x == 0) { 26 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 27 | } 28 | x ^= x >>> 16; 29 | x *= 0x7ed1b41d; 30 | x ^= x >>> 13 ^ x >>> 26; 31 | x *= 0xa5cb9243; 32 | x ^= x >>> 16; 33 | return x; 34 | } 35 | 36 | @Override 37 | public IntHasher cloneHasher() { 38 | // stateless 39 | return new Murmur3IntHasher(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/hash/int32/PhiIntHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int32; 2 | 3 | /** 4 | * Multiply by the golden ratio and mix high bits into low bits. 5 | * Based on https://raw.githubusercontent.com/vigna/fastutil/master/src/it/unimi/dsi/fastutil/HashCommon.java. 6 | * 7 | * @author tdbaker 8 | */ 9 | public class PhiIntHasher implements IntHasher { 10 | public static final String NAME = "hash.int32.PhiIntHasher"; 11 | 12 | private static final int INT_PHI = 0x9e3779b9; 13 | private static final int INV_INT_PHI = 0x144cbc89; 14 | 15 | @Override 16 | public int hash(int x) { 17 | if (x == 0) { 18 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 19 | } 20 | x *= INT_PHI; 21 | x ^= x >>> 16; 22 | return x; 23 | } 24 | 25 | @Override 26 | public int unhash(int x) { 27 | if (x == 0) { 28 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 29 | } 30 | x ^= x >>> 16; 31 | x *= INV_INT_PHI; 32 | return x; 33 | } 34 | 35 | @Override 36 | public IntHasher cloneHasher() { 37 | // stateless 38 | return new PhiIntHasher(); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/hash/int32/Prospector2RoundIntHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int32; 2 | 3 | /** 4 | * Based on https://github.com/skeeto/hash-prospector#two-round-functions. 5 | * 6 | * @author tdbaker 7 | */ 8 | public class Prospector2RoundIntHasher implements IntHasher { 9 | public static final String NAME = "hash.int32.Prospector2RoundIntHasher"; 10 | @Override 11 | public int hash(int x) { 12 | if (x == 0) { 13 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 14 | } 15 | x ^= x >>> 16; 16 | x *= 0x7feb352d; 17 | x ^= x >>> 15; 18 | x *= 0x846ca68b; 19 | x ^= x >>> 16; 20 | return x; 21 | } 22 | 23 | @Override 24 | public int unhash(int x) { 25 | if (x == 0) { 26 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 27 | } 28 | x ^= x >>> 16; 29 | x *= 0x43021123; 30 | x ^= x >>> 15 ^ x >>> 30; 31 | x *= 0x1d69e2a5; 32 | x ^= x >>> 16; 33 | return x; 34 | } 35 | 36 | @Override 37 | public IntHasher cloneHasher() { 38 | // stateless 39 | return new Prospector2RoundIntHasher(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/hash/int32/Prospector3RoundIntHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int32; 2 | 3 | /** 4 | * Based on https://github.com/skeeto/hash-prospector#three-round-functions. 5 | * 6 | * @author tdbaker 7 | */ 8 | public class Prospector3RoundIntHasher implements IntHasher { 9 | public static final String NAME = "hash.int32.Prospector3RoundIntHasher"; 10 | @Override 11 | public int hash(int x) { 12 | if (x == 0) { 13 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 14 | } 15 | x ^= x >>> 17; 16 | x *= 0xed5ad4bb; 17 | x ^= x >>> 11; 18 | x *= 0xac4c1b51; 19 | x ^= x >>> 15; 20 | x *= 0x31848bab; 21 | x ^= x >>> 14; 22 | return x; 23 | } 24 | 25 | @Override 26 | public int unhash(int x) { 27 | if (x == 0) { 28 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 29 | } 30 | x ^= x >>> 14 ^ x >>> 28; 31 | x *= 0x32b21703; 32 | x ^= x >>> 15 ^ x >>> 30; 33 | x *= 0x469e0db1; 34 | x ^= x >>> 11 ^ x >>> 22; 35 | x *= 0x79a85073; 36 | x ^= x >>> 17; 37 | return x; 38 | } 39 | 40 | @Override 41 | public IntHasher cloneHasher() { 42 | // stateless 43 | return new Prospector3RoundIntHasher(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/hash/int32/SpeckIntHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int32; 2 | 3 | public class SpeckIntHasher implements IntHasher { 4 | public static final String NAME = "hash.int32.SpeckIntHasher"; 5 | // 32-bit Speck block cipher requires 64-bit key, default 20 rounds 6 | private static final long defaultKey = 12235564383205437408L; 7 | private final Speck32Cipher encryptor; 8 | private final Speck32Cipher decryptor; 9 | private final byte[] key; 10 | private final int rounds; 11 | 12 | public SpeckIntHasher(long key, int rounds) { 13 | this.encryptor = new Speck32Cipher(rounds); 14 | this.decryptor = new Speck32Cipher(rounds); 15 | encryptor.init(true, key); 16 | decryptor.init(false, key); 17 | } 18 | 19 | public SpeckIntHasher() { 20 | this(defaultKey, 20); 21 | } 22 | 23 | @Override 24 | public int hash(int x) { 25 | return encryptor.processBlock(x); 26 | } 27 | 28 | @Override 29 | public int unhash(int x){ 30 | return decryptor.processBlock(x); 31 | } 32 | 33 | @Override 34 | public IntHasher cloneHasher() { 35 | // NYI 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/hash/int64/DegskiLongHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int64; 2 | 3 | /** 4 | * Based on https://gist.github.com/degski/6e2069d6035ae04d5d6f64981c995ec2. 5 | * 6 | * @author tdbaker 7 | */ 8 | public class DegskiLongHasher implements LongHasher { 9 | public static final String NAME = "hash.int64.DegskiLongHasher"; 10 | @Override 11 | public long hash(long x) { 12 | if (x == 0) { 13 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 14 | } 15 | x ^= x >>> 32; 16 | x *= 0xD6E8FEB86659FD93L; 17 | x ^= x >>> 32; 18 | x *= 0xD6E8FEB86659FD93L; 19 | x ^= x >>> 32; 20 | return x; 21 | } 22 | 23 | @Override 24 | public long unhash(long x) { 25 | if (x == 0) { 26 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 27 | } 28 | x ^= x >>> 32; 29 | x *= 0xCFEE444D8B59A89BL; 30 | x ^= x >>> 32; 31 | x *= 0xCFEE444D8B59A89BL; 32 | x ^= x >>> 32; 33 | return x; 34 | } 35 | 36 | @Override 37 | public LongHasher cloneHasher() { 38 | // stateless 39 | return new DegskiLongHasher(); 40 | } 41 | } -------------------------------------------------------------------------------- /src/main/java/hash/int64/IdentityLongHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int64; 2 | 3 | /** 4 | * Implements identity hash function. 5 | * 6 | * @author tdbaker 7 | */ 8 | public class IdentityLongHasher implements LongHasher { 9 | public static final String NAME = "hash.int64.IdentityLongHasher"; 10 | 11 | @Override 12 | public long hash(long x) { 13 | return x; 14 | } 15 | 16 | @Override 17 | public long unhash(long x) { 18 | return x; 19 | } 20 | 21 | @Override 22 | public LongHasher cloneHasher() { 23 | // stateless 24 | return new IdentityLongHasher(); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/hash/int64/LongHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int64; 2 | 3 | /** 4 | * A 64-bit hash function. 5 | * 6 | * @author tdbaker 7 | */ 8 | public interface LongHasher { 9 | /** 10 | * Apply the hash function to a value. 11 | * @param x the 64-bit integer to hash 12 | * @return the hashed value 13 | */ 14 | public long hash(long x); 15 | /** 16 | * Invert the hash function. 17 | * @param x the 64-bit integer to hash 18 | * @return the unhashed value 19 | */ 20 | public long unhash(long x); 21 | /** 22 | * Clone the hash function object. 23 | * 24 | * @return the cloned hasher 25 | */ 26 | public LongHasher cloneHasher(); 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/hash/int64/Murmur3LongHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int64; 2 | 3 | /** 4 | * Implements Murmur3 64-bit finalizer (https://github.com/aappleby/smhasher/wiki/MurmurHash3) 5 | * 6 | * @author tdbaker 7 | */ 8 | public class Murmur3LongHasher implements LongHasher { 9 | public static final String NAME = "hash.int64.Murmur3LongHasher"; 10 | @Override 11 | public long hash(long x) { 12 | if (x == 0) { 13 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 14 | } 15 | x ^= x >>> 33; 16 | x *= 0xff51afd7ed558ccdL; 17 | x ^= x >>> 33; 18 | x *= 0xc4ceb9fe1a85ec53L; 19 | x ^= x >>> 33; 20 | return x; 21 | } 22 | 23 | @Override 24 | public long unhash(long x) { 25 | if (x == 0) { 26 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 27 | } 28 | x ^= x >>> 33; 29 | x *= 0x9cb4b2f8129337dbL; 30 | x ^= x >>> 33; 31 | x *= 0x4f74430c22a54005L; 32 | x ^= x >>> 33; 33 | return x; 34 | } 35 | 36 | @Override 37 | public LongHasher cloneHasher() { 38 | // stateless 39 | return new Murmur3LongHasher(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/hash/int64/PhiLongHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int64; 2 | 3 | /** 4 | * Multiply by the golden ratio and mix high bits into low bits. 5 | * Based on https://raw.githubusercontent.com/vigna/fastutil/master/src/it/unimi/dsi/fastutil/HashCommon.java. 6 | * 7 | * @author tdbaker 8 | */ 9 | public class PhiLongHasher implements LongHasher { 10 | public static final String NAME = "hash.int64.PhiLongHasher"; 11 | private static final long LONG_PHI = 0x9e3779b97f4a7c15l; 12 | private static final long INV_LONG_PHI = 0xf1de83e19937733dL; 13 | 14 | @Override 15 | public long hash(long x) { 16 | if (x == 0) { 17 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 18 | } 19 | x *= LONG_PHI; 20 | x ^= x >>> 32; 21 | x ^= x >>> 16; 22 | return x; 23 | } 24 | 25 | @Override 26 | public long unhash(long x) { 27 | if (x == 0) { 28 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 29 | } 30 | x ^= x >>> 32; 31 | x ^= x >>> 16; 32 | x ^= x >>> 32; 33 | x *= INV_LONG_PHI; 34 | return x; 35 | } 36 | 37 | @Override 38 | public LongHasher cloneHasher() { 39 | // stateless 40 | return new PhiLongHasher(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/hash/int64/SpeckLongHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int64; 2 | 3 | public class SpeckLongHasher implements LongHasher { 4 | public static final String NAME = "hash.int64.SpeckLongHasher"; 5 | // 64-bit Speck block cipher requires 128-bit key, default 25 rounds 6 | private final Speck64Cipher encryptor; 7 | private final Speck64Cipher decryptor; 8 | private final byte[] key; 9 | private final int rounds; 10 | 11 | public SpeckLongHasher(byte[] key, int rounds) { 12 | this.encryptor = new Speck64Cipher(rounds); 13 | this.decryptor = new Speck64Cipher(rounds); 14 | encryptor.init(true, key); 15 | decryptor.init(false, key); 16 | } 17 | 18 | @Override 19 | public long hash(long x) { 20 | return encryptor.processBlock(x); 21 | } 22 | @Override 23 | public long unhash(long x){ 24 | return decryptor.processBlock(x); 25 | } 26 | 27 | @Override 28 | public LongHasher cloneHasher() { 29 | // NYI 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/hash/int64/Variant13LongHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int64; 2 | 3 | /** 4 | * Variant 13 of Murmur3 64-bit finalizer (http://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html) 5 | * 6 | * @author tdbaker 7 | */ 8 | public class Variant13LongHasher implements LongHasher { 9 | public static final String NAME = "hash.int64.Variant13LongHasher"; 10 | @Override 11 | public long hash(long x) { 12 | if (x == 0) { 13 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 14 | } 15 | x ^= x >>> 30; 16 | x *= 0xbf58476d1ce4e5b9L; 17 | x ^= x >>> 27; 18 | x *= 0x94d049bb133111ebL; 19 | x ^= x >>> 31; 20 | return x; 21 | } 22 | 23 | @Override 24 | public long unhash(long x) { 25 | if (x == 0) { 26 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 27 | } 28 | x ^= x >>> 31 ^ x >>> 62; 29 | x *= 0x319642b2d24d8ec3L; 30 | x ^= x >>> 27 ^ x >>> 54; 31 | x *= 0x96de1b173f119089L; 32 | x ^= x >>> 30 ^ x >>> 60; 33 | return x; 34 | } 35 | 36 | @Override 37 | public LongHasher cloneHasher() { 38 | // stateless 39 | return new Variant13LongHasher(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/hash/int64/WangLongHasher.java: -------------------------------------------------------------------------------- 1 | package hash.int64; 2 | 3 | /** 4 | * Based on https://naml.us/post/inverse-of-a-hash-function/. 5 | * 6 | * @author tdbaker 7 | */ 8 | public class WangLongHasher implements LongHasher { 9 | public static final String NAME = "hash.int64.WangLongHasher"; 10 | @Override 11 | public long hash(long x) { 12 | if (x == 0) { 13 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 14 | } 15 | x = (~x) + (x << 21); // x = (x << 21) - x - 1; 16 | x = x ^ (x >>> 24); 17 | x = (x + (x << 3)) + (x << 8); // x * 265 18 | x = x ^ (x >>> 14); 19 | x = (x + (x << 2)) + (x << 4); // x * 21 20 | x = x ^ (x >>> 28); 21 | x = x + (x << 31); 22 | return x; 23 | } 24 | 25 | @Override 26 | public long unhash(long x) { 27 | if (x == 0) { 28 | throw new IllegalArgumentException("Hashing 0 is a no-op"); 29 | } 30 | 31 | long tmp; 32 | 33 | // Invert x = x + (x << 31) 34 | tmp = x - (x << 31); 35 | x = x - (tmp << 31); 36 | 37 | // Invert x = x ^ (x >> 28) 38 | tmp = x ^ x >>> 28; 39 | x = x ^ tmp >>> 28; 40 | 41 | // Invert x *= 21 42 | x *= -3513665537849438403L; 43 | 44 | // Invert x = x ^ (x >> 14) 45 | tmp = x ^ x >>> 14; 46 | tmp = x ^ tmp >>> 14; 47 | tmp = x ^ tmp >>> 14; 48 | x = x ^ tmp >>> 14; 49 | 50 | // Invert x *= 265 51 | x *= -3202076329775997639L; 52 | 53 | // Invert x = x ^ (x >> 24) 54 | tmp = x ^ x >>> 24; 55 | x = x ^ tmp >>> 24; 56 | 57 | // Invert x = (~x) + (x << 21) 58 | tmp = ~x; 59 | tmp = ~(x - (tmp << 21)); 60 | tmp = ~(x - (tmp << 21)); 61 | x = ~(x - (tmp << 21)); 62 | 63 | return x; 64 | } 65 | 66 | @Override 67 | public LongHasher cloneHasher() { 68 | // stateless 69 | return new WangLongHasher(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/set/int32/IntSet.java: -------------------------------------------------------------------------------- 1 | package set.int32; 2 | 3 | /** 4 | * A 32-bit hash set. 5 | * 6 | * @author tdbaker 7 | */ 8 | public interface IntSet { 9 | 10 | /** 11 | * Query the number of elements in the table. 12 | * 13 | * @return the number of elements in the table 14 | */ 15 | public int size(); 16 | 17 | /** 18 | * Query the table for a value. 19 | * 20 | * @param value the 32-bit integer to query the table for 21 | * @return {@code true} if {@code value} is present in the table, {@code false} otherwise 22 | */ 23 | public boolean contains(int value); 24 | 25 | /** 26 | * Add an element to the table. 27 | * 28 | * @param element the 32-bit integer to add to the table 29 | * @return {@code false} if {@code element} was already present in the table, {@code true} otherwise 30 | */ 31 | public boolean add(int element); 32 | 33 | /** 34 | * Remove an element from the table. 35 | * 36 | * @param value the 32-bit integer to remove from the table 37 | * @return {@code false} if {@code value} was not present in the table, {@code true} otherwise 38 | */ 39 | public boolean remove(int value); 40 | 41 | /** 42 | * Remove all elements from the table. 43 | */ 44 | public void clear(); 45 | 46 | /** 47 | * Return deep copy of the table. 48 | * @return the cloned table 49 | */ 50 | public IntSet cloneSet() throws CloneNotSupportedException; 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/set/int32/LCFSIntHashSet.java: -------------------------------------------------------------------------------- 1 | package set.int32; 2 | 3 | import hash.int32.IntHasher; 4 | 5 | 6 | /** 7 | * An implementation of a simple linear probing hash table, 8 | * with a tombstone-free deletion algorithm taken from 9 | * _Algorithm Design and Applications_, Section 6.3.3, and the Last-Come-First-Served 11 | * insertion heuristic. The LCFS heuristic dramatically reduces the 12 | * variance of probe length for successful lookups, though it has no effect 13 | * on its expected value, nor does it have any effect at all on unsuccessful 14 | * lookups (note that because we do not use tombstones, we cannot use the 15 | * maximum insertion probe length to bound the probe length of unsuccessful 16 | * lookups). The keys (there are no stored values) must be 32-bit integers, 17 | * which are permuted to form the hash codes (i.e., the "hash function" 18 | * is reversible). This obviates the need to separately store hash codes 19 | * or rehash the keys to perform operations which use hash codes. 20 | * 21 | * @author tdbaker 22 | */ 23 | public class LCFSIntHashSet extends LPIntHashSet { 24 | public static final String NAME = "set.int32.LCFSIntHashSet"; 25 | 26 | public LCFSIntHashSet(int maxEntries, double loadFactor, IntHasher hasher) { 27 | super(maxEntries, loadFactor, hasher); 28 | } 29 | 30 | public LCFSIntHashSet(int maxEntries, double loadFactor) { 31 | super(maxEntries, loadFactor); 32 | } 33 | 34 | public LCFSIntHashSet(LCFSIntHashSet other) throws CloneNotSupportedException { 35 | super(other); 36 | } 37 | 38 | /** 39 | * Return deep copy of the table. 40 | */ 41 | @Override 42 | public IntSet cloneSet() throws CloneNotSupportedException { 43 | return new LCFSIntHashSet(this); 44 | } 45 | 46 | /** 47 | * Add an element to the table. 48 | * 49 | * @param element the 32-bit integer to add to the table 50 | * @return {@code false} if {@code element} was already present in the table, {@code true} otherwise 51 | */ 52 | @Override 53 | public boolean add(int element) { 54 | int hash = hash(element); 55 | int bucket = lookupByHash(hash); 56 | if (bucket == -1) { 57 | // table full 58 | throw new RuntimeException("Couldn't insert into table"); 59 | } 60 | if (!isEmpty(bucket)) { 61 | return false; 62 | } 63 | int preferredBucket = findPreferredBucket(hash); 64 | int emptyBucket = findFirstEmptyBucket(preferredBucket); 65 | // This is the "Last-Come-First-Served" heuristic: we always insert the 66 | // element in its preferred bucket and shift the chain starting at that 67 | // bucket one space to the right. 68 | moveEmptyBucketToInsertionPoint(emptyBucket, preferredBucket); 69 | this.arr[preferredBucket] = hash; 70 | ++this.size; 71 | return true; 72 | } 73 | 74 | private int findFirstEmptyBucket(int startBucket) { 75 | assert startBucket >= 0 && startBucket < this.arr.length; 76 | int bucket = startBucket; 77 | int probeLength = 0; 78 | while (!isEmpty(bucket)) { 79 | if (probeLength == this.arr.length) { 80 | return -1; 81 | } 82 | bucket = wrap(bucket + 1); 83 | ++probeLength; 84 | } 85 | return bucket; 86 | } 87 | 88 | private void moveEmptyBucketToInsertionPoint(int startBucket, int endBucket) { 89 | assert startBucket >= 0 && startBucket < this.arr.length; 90 | assert isEmpty(startBucket); 91 | int bucket = startBucket; 92 | while (bucket != endBucket) { 93 | this.arr[bucket] = this.arr[wrap(bucket - 1)]; 94 | bucket = wrap(bucket - 1); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/main/java/set/int32/LPIntHashSet.java: -------------------------------------------------------------------------------- 1 | package set.int32; 2 | 3 | import hash.int32.IntHasher; 4 | import hash.int32.Murmur3IntHasher; 5 | 6 | import java.util.Arrays; 7 | 8 | 9 | /** 10 | * An implementation of a simple linear probing hash table, with a 11 | * tombstone-free deletion algorithm taken from _Algorithm Design and 12 | * Applications_, Section 6.3.3. (Note that because we do not use tombstones, 13 | * we cannot use the maximum insertion probe length to bound the probe length 14 | * of unsuccessful lookups.) The keys (there are no stored values) must be 15 | * 32-bit integers, which are permuted to form the hash codes (i.e., the 16 | * "hash function" is reversible). This obviates the need to separately store 17 | * hash codes or rehash the keys to perform operations which use hash codes. 18 | * 19 | * @author tdbaker 20 | */ 21 | public class LPIntHashSet implements IntSet { 22 | public static final String NAME = "set.int32.LPIntHashSet"; 23 | 24 | protected final int[] arr; 25 | protected int size = 0; 26 | protected final IntHasher hasher; 27 | 28 | public LPIntHashSet(int maxEntries, double loadFactor, IntHasher hasher) { 29 | assert maxEntries > 0; 30 | assert loadFactor > 0 && loadFactor <= 1.0; 31 | int arrSize = (int) (maxEntries / loadFactor); 32 | this.arr = new int[arrSize]; 33 | this.hasher = hasher; 34 | } 35 | 36 | public LPIntHashSet(int maxEntries, double loadFactor) { 37 | this(maxEntries, loadFactor, new Murmur3IntHasher()); 38 | } 39 | 40 | public LPIntHashSet(LPIntHashSet other) { 41 | this.arr = other.arr.clone(); 42 | this.size = other.size; 43 | this.hasher = other.hasher.cloneHasher(); 44 | } 45 | 46 | /** 47 | * Return deep copy of the table. 48 | */ 49 | public IntSet cloneSet() throws CloneNotSupportedException { 50 | return new LPIntHashSet(this); 51 | } 52 | 53 | /** 54 | * Query the size of the table's backing array. 55 | * 56 | * @return the size of the backing array 57 | */ 58 | public int capacity() { 59 | return this.arr.length; 60 | } 61 | 62 | /** 63 | * Query the number of elements in the table. 64 | * 65 | * @return the number of elements in the table 66 | */ 67 | public int size() { 68 | assert this.size >= 0; 69 | return this.size; 70 | } 71 | 72 | /** 73 | * Query the table for a value. 74 | * 75 | * @param value the 32-bit integer to query the table for 76 | * @return {@code true} if {@code value} is present in the table, {@code false} otherwise 77 | */ 78 | public boolean contains(int value) { 79 | int hash = hash(value); 80 | int bucket = lookupByHash(hash); 81 | if (bucket == -1 || isEmpty(bucket)) { 82 | return false; 83 | } 84 | return true; 85 | } 86 | 87 | /** 88 | * Add an element to the table. 89 | * 90 | * @param element the 32-bit integer to add to the table 91 | * @return {@code false} if {@code element} was already present in the table, {@code true} otherwise 92 | */ 93 | public boolean add(int element) { 94 | int hash = hash(element); 95 | int bucket = lookupByHash(hash); 96 | if (bucket == -1) { 97 | // table full 98 | throw new RuntimeException("Couldn't insert into table"); 99 | } 100 | if (!isEmpty(bucket)) { 101 | return false; 102 | } 103 | this.arr[bucket] = hash; 104 | ++this.size; 105 | return true; 106 | } 107 | 108 | /** 109 | * Remove an element from the table. 110 | * 111 | * @param value the 32-bit integer to remove from the table 112 | * @return {@code false} if {@code value} was not present in the table, {@code true} otherwise 113 | */ 114 | public boolean remove(int value) { 115 | int hash = hash(value); 116 | int bucket = lookupByHash(hash); 117 | if (bucket == -1 || isEmpty(bucket)) { 118 | return false; 119 | } 120 | this.arr[bucket] = 0; 121 | shift(bucket); 122 | --this.size; 123 | return true; 124 | } 125 | 126 | /** 127 | * Remove all elements from the table. 128 | */ 129 | public void clear() { 130 | Arrays.fill(this.arr, 0); 131 | } 132 | 133 | protected boolean isEmpty(int bucket) { 134 | return (this.arr[bucket] == 0); 135 | } 136 | 137 | protected int contents(int bucket) { 138 | return isEmpty(bucket) ? 0 : unhash(this.arr[bucket]); 139 | } 140 | 141 | // https://github.com/lemire/fastrange 142 | protected int findPreferredBucket(int hash) { 143 | if (hash == 0) { 144 | return -1; 145 | } 146 | return (int) ((Integer.toUnsignedLong(hash) * Integer.toUnsignedLong(this.arr.length)) >>> 32); 147 | } 148 | 149 | protected int wrap(int pos) { 150 | if (pos < 0) { 151 | return this.arr.length + pos; 152 | } 153 | if (pos > this.arr.length - 1) { 154 | return pos - this.arr.length; 155 | } 156 | return pos; 157 | } 158 | 159 | protected int hash(int x) { 160 | return this.hasher.hash(x); 161 | } 162 | 163 | protected int unhash(int x) { 164 | return this.hasher.unhash(x); 165 | } 166 | 167 | protected int lookupByHash(int hash) { 168 | int bucket = findPreferredBucket(hash); 169 | int probeLength = 0; 170 | while (!isEmpty(bucket) && this.arr[bucket] != hash) { 171 | if (probeLength == this.arr.length) { 172 | return -1; 173 | } 174 | bucket = wrap(bucket + 1); 175 | ++probeLength; 176 | } 177 | return bucket; 178 | } 179 | 180 | // uses pseudocode from _Algorithm Design and Applications_, Section 6.3.3 181 | protected void shift(int startBucket) { 182 | int dst = startBucket; 183 | int shift = 1; 184 | int src = wrap(dst + shift); 185 | while (!isEmpty(src)) { 186 | int preferredBucket = findPreferredBucket(this.arr[src]); 187 | // we can only move a key if its destination can be reached from its preferred bucket 188 | boolean reachable; 189 | if (src <= dst) { 190 | reachable = (preferredBucket <= dst && preferredBucket > src); 191 | } else { 192 | reachable = (preferredBucket <= dst || preferredBucket > src); 193 | } 194 | if (reachable) { 195 | this.arr[dst] = this.arr[src]; // fill the hole 196 | this.arr[src] = 0; // move the hole 197 | dst = wrap(dst + shift); 198 | shift = 1; 199 | } else { 200 | ++shift; 201 | } 202 | src = wrap(dst + shift); 203 | } 204 | } 205 | 206 | protected void dump() { 207 | for (int i = 0; i < this.arr.length; ++i) { 208 | System.out.format("%d\t%d\t%s\t%d\n", i, contents(i), Integer.toUnsignedString(this.arr[i]), findPreferredBucket(this.arr[i])); 209 | } 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /src/main/java/set/int32/RHIntHashSet.java: -------------------------------------------------------------------------------- 1 | package set.int32; 2 | 3 | import hash.int32.IntHasher; 4 | 5 | 6 | /** 7 | * An implementation of a simple linear probing hash table, with 8 | * the "Robin 9 | * Hood" insertion heuristic, and a tombstone-free 11 | * deletion algorithm (note that because we do not use tombstones, we 12 | * cannot use the maximum insertion probe length to bound the probe length 13 | * of unsuccessful lookups; instead we use a Robin Hood-specific early 14 | * termination heuristic). The "Robin Hood" insertion heuristic dramatically 15 | * reduces the variance of probe length for successful lookups, and our 16 | * version also dramatically reduces both the expected value and variance 17 | * of unsuccessful lookups. The keys (there are no stored values) must be 18 | * 32-bit integers, which are permuted to form the hash codes (i.e., the 19 | * "hash function" is reversible). This obviates the need to separately store 20 | * hash codes or rehash the keys to perform operations which use hash codes. 21 | * 22 | * @author tdbaker 23 | */ 24 | public class RHIntHashSet extends LPIntHashSet { 25 | public static final String NAME = "set.int32.RHIntHashSet"; 26 | 27 | public RHIntHashSet(int maxEntries, double loadFactor, IntHasher hasher) { 28 | super(maxEntries, loadFactor, hasher); 29 | } 30 | 31 | public RHIntHashSet(int maxEntries, double loadFactor) { 32 | super(maxEntries, loadFactor); 33 | } 34 | 35 | public RHIntHashSet(RHIntHashSet other) throws CloneNotSupportedException { 36 | super(other); 37 | } 38 | 39 | /** 40 | * Return deep copy of the table. 41 | */ 42 | @Override 43 | public IntSet cloneSet() throws CloneNotSupportedException { 44 | return new RHIntHashSet(this); 45 | } 46 | 47 | /** 48 | * Add an element to the table. 49 | * 50 | * @param element the 32-bit integer to add to the table 51 | * @return {@code false} if {@code element} was already present in the table, {@code true} otherwise 52 | */ 53 | @Override 54 | public boolean add(int element) { 55 | int hash = hash(element); 56 | if (lookupByHash(hash) != -1) { 57 | return false; 58 | } 59 | // If the current element is closer to its preferred bucket than 60 | // the element we're trying to insert is to its preferred bucket, 61 | // swap the inserted element into the current element's bucket and 62 | // continue probing with the swapped element, swapping it with the 63 | // next such element, and so on, until we hit an empty bucket. 64 | int bucket = findPreferredBucket(hash); 65 | int insertElemProbeDist = 0; 66 | int totalProbeLen = 0; 67 | while (!isEmpty(bucket)) { 68 | int currElemProbeDist = probeDistance(this.arr[bucket], bucket); 69 | if (currElemProbeDist < insertElemProbeDist) { 70 | int currElemHash = this.arr[bucket]; 71 | this.arr[bucket] = hash; 72 | hash = currElemHash; 73 | insertElemProbeDist = currElemProbeDist; 74 | } 75 | bucket = wrap(bucket + 1); 76 | ++insertElemProbeDist; 77 | ++totalProbeLen; 78 | if (totalProbeLen == this.arr.length) { 79 | throw new RuntimeException("Couldn't insert into table: " + element); 80 | } 81 | } 82 | this.arr[bucket] = hash; 83 | ++this.size; 84 | return true; 85 | } 86 | 87 | /** 88 | * Remove an element from the table. 89 | * 90 | * @param value the 32-bit integer to remove from the table 91 | * @return {@code false} if {@code value} was not present in the table, {@code true} otherwise 92 | */ 93 | @Override 94 | public boolean remove(int value) { 95 | int hash = hash(value); 96 | int bucket = lookupByHash(hash); 97 | if (bucket == -1) { 98 | return false; 99 | } 100 | // find the chain from the deleted bucket to the nearest empty bucket 101 | // or entry in its preferred bucket and shift it one space to the left 102 | int endBucket = findMoveBoundary(wrap(bucket + 1)); 103 | int currBucket = bucket; 104 | while (endBucket != wrap(currBucket + 1)) { 105 | this.arr[currBucket] = this.arr[wrap(currBucket + 1)]; 106 | currBucket = wrap(currBucket + 1); 107 | } 108 | this.arr[currBucket] = 0; 109 | --this.size; 110 | return true; 111 | } 112 | 113 | @Override 114 | protected int lookupByHash(int hash) { 115 | int bucket = findPreferredBucket(hash); 116 | int probeLength = 0; 117 | while (!isEmpty(bucket)) { 118 | if (this.arr[bucket] == hash) { 119 | return bucket; 120 | } 121 | // If we're further from our element's preferred bucket than the 122 | // current element's distance from its preferred bucket, we know 123 | // the element is absent, since it would have been swapped with 124 | // the current element otherwise. 125 | if (probeLength == this.arr.length || 126 | probeLength > probeDistance(this.arr[bucket], bucket)) { 127 | break; 128 | } 129 | bucket = wrap(bucket + 1); 130 | ++probeLength; 131 | } 132 | return -1; 133 | } 134 | 135 | private int probeDistance(int hash, int bucket) { 136 | int preferredBucket = findPreferredBucket(hash); 137 | int distance; 138 | if (preferredBucket > bucket) { // wraparound 139 | distance = this.arr.length - preferredBucket + bucket; 140 | } else { 141 | distance = bucket - preferredBucket; 142 | } 143 | return distance; 144 | } 145 | 146 | // Any bucket to the right of its preferred bucket can be shifted left, 147 | // since its preferred bucket will be shifted along with it, ensuring 148 | // it is still reachable from its preferred bucket. 149 | private int findMoveBoundary(int startBucket) { 150 | int bucket = startBucket; 151 | assert startBucket < this.arr.length; 152 | while (!isEmpty(bucket) && 153 | bucket != findPreferredBucket(this.arr[bucket])) { 154 | bucket = wrap(bucket + 1); 155 | } 156 | return bucket; 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/main/java/set/int64/LCFSLongHashSet.java: -------------------------------------------------------------------------------- 1 | package set.int64; 2 | 3 | import hash.int64.LongHasher; 4 | 5 | 6 | /** 7 | * An implementation of a simple linear probing hash table, 8 | * with a tombstone-free deletion algorithm taken from 9 | * _Algorithm Design and Applications_, Section 6.3.3, and the Last-Come-First-Served 11 | * insertion heuristic. The LCFS heuristic dramatically reduces the 12 | * variance of probe length for successful lookups, though it has no effect 13 | * on its expected value, nor does it have any effect at all on unsuccessful 14 | * lookups (note that because we do not use tombstones, we cannot use the 15 | * maximum insertion probe length to bound the probe length of unsuccessful 16 | * lookups). The keys (there are no stored values) must be 64-bit integers, 17 | * which are permuted to form the hash codes (i.e., the "hash function" 18 | * is reversible). This obviates the need to separately store hash codes 19 | * or rehash the keys to perform operations which use hash codes. 20 | * 21 | * @author tdbaker 22 | */ 23 | public class LCFSLongHashSet extends LPLongHashSet { 24 | public static final String NAME = "set.int64.LCFSLongHashSet"; 25 | 26 | public LCFSLongHashSet(int maxEntries, double loadFactor, LongHasher hasher) { 27 | super(maxEntries, loadFactor, hasher); 28 | } 29 | 30 | public LCFSLongHashSet(int maxEntries, double loadFactor) { 31 | super(maxEntries, loadFactor); 32 | } 33 | 34 | public LCFSLongHashSet(LCFSLongHashSet other) throws CloneNotSupportedException { 35 | super(other); 36 | } 37 | 38 | /** 39 | * Return deep copy of the table. 40 | */ 41 | @Override 42 | public LongSet cloneSet() throws CloneNotSupportedException { 43 | return new LCFSLongHashSet(this); 44 | } 45 | 46 | /** 47 | * Add an element to the table. 48 | * 49 | * @param element the 64-bit integer to add to the table 50 | * @return {@code false} if {@code element} was already present in the table, {@code true} otherwise 51 | */ 52 | @Override 53 | public boolean add(long element) { 54 | long hash = hash(element); 55 | int bucket = lookupByHash(hash); 56 | if (bucket == -1) { 57 | // table full 58 | throw new RuntimeException("Couldn't insert into table"); 59 | } 60 | if (!isEmpty(bucket)) { 61 | return false; 62 | } 63 | int preferredBucket = findPreferredBucket(hash); 64 | int emptyBucket = findFirstEmptyBucket(preferredBucket); 65 | // This is the "Last-Come-First-Served" heuristic: we always insert the 66 | // element in its preferred bucket and shift the chain starting at that 67 | // bucket one space to the right. 68 | moveEmptyBucketToInsertionPoint(emptyBucket, preferredBucket); 69 | this.arr[preferredBucket] = hash; 70 | ++this.size; 71 | return true; 72 | } 73 | 74 | private int findFirstEmptyBucket(int startBucket) { 75 | assert startBucket >= 0 && startBucket < this.arr.length; 76 | int bucket = startBucket; 77 | int probeLength = 0; 78 | while (!isEmpty(bucket)) { 79 | if (probeLength == this.arr.length) { 80 | return -1; 81 | } 82 | bucket = wrap(bucket + 1); 83 | ++probeLength; 84 | } 85 | return bucket; 86 | } 87 | 88 | private void moveEmptyBucketToInsertionPoint(int startBucket, int endBucket) { 89 | assert startBucket >= 0 && startBucket < this.arr.length; 90 | assert isEmpty(startBucket); 91 | int bucket = startBucket; 92 | while (bucket != endBucket) { 93 | this.arr[bucket] = this.arr[wrap(bucket - 1)]; 94 | bucket = wrap(bucket - 1); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/main/java/set/int64/LPLongHashSet.java: -------------------------------------------------------------------------------- 1 | package set.int64; 2 | 3 | import hash.int64.LongHasher; 4 | import hash.int64.Murmur3LongHasher; 5 | 6 | import java.util.Arrays; 7 | 8 | 9 | /** 10 | * An implementation of a simple linear probing hash table, with a 11 | * tombstone-free deletion algorithm taken from _Algorithm Design and 12 | * Applications_, Section 6.3.3. (Note that because we do not use tombstones, 13 | * we cannot use the maximum insertion probe length to bound the probe length 14 | * of unsuccessful lookups.) The keys (there are no stored values) must be 15 | * 64-bit integers, which are permuted to form the hash codes (i.e., the 16 | * "hash function" is reversible). This obviates the need to separately store 17 | * hash codes or rehash the keys to perform operations which use hash codes. 18 | * 19 | * @author tdbaker 20 | */ 21 | public class LPLongHashSet implements LongSet { 22 | public static final String NAME = "set.int64.LPLongHashSet"; 23 | 24 | protected final long[] arr; 25 | protected int size = 0; 26 | protected final LongHasher hasher; 27 | 28 | public LPLongHashSet(int maxEntries, double loadFactor, LongHasher hasher) { 29 | assert maxEntries > 0; 30 | assert loadFactor > 0 && loadFactor <= 1.0; 31 | int arrSize = (int) (maxEntries / loadFactor); 32 | this.arr = new long[arrSize]; 33 | this.hasher = hasher; 34 | } 35 | 36 | public LPLongHashSet(int maxEntries, double loadFactor) { 37 | this(maxEntries, loadFactor, new Murmur3LongHasher()); 38 | } 39 | 40 | public LPLongHashSet(LPLongHashSet other) { 41 | this.arr = other.arr.clone(); 42 | this.size = other.size; 43 | this.hasher = other.hasher.cloneHasher(); 44 | } 45 | 46 | /** 47 | * Return deep copy of the table. 48 | */ 49 | public LongSet cloneSet() throws CloneNotSupportedException { 50 | return new LPLongHashSet(this); 51 | } 52 | 53 | /** 54 | * Query the size of the table's backing array. 55 | * 56 | * @return the size of the backing array 57 | */ 58 | public int capacity() { 59 | return this.arr.length; 60 | } 61 | 62 | /** 63 | * Query the number of elements in the table. 64 | * 65 | * @return the number of elements in the table 66 | */ 67 | public int size() { 68 | assert this.size >= 0; 69 | return this.size; 70 | } 71 | 72 | /** 73 | * Query the table for a value. 74 | * 75 | * @param value the 64-bit integer to query the table for 76 | * @return {@code true} if {@code value} is present in the table, {@code false} otherwise 77 | */ 78 | public boolean contains(long value) { 79 | long hash = hash(value); 80 | int bucket = lookupByHash(hash); 81 | if (bucket == -1 || isEmpty(bucket)) { 82 | return false; 83 | } 84 | return true; 85 | } 86 | 87 | /** 88 | * Add an element to the table. 89 | * 90 | * @param element the 64-bit integer to add to the table 91 | * @return {@code false} if {@code element} was already present in the table, {@code true} otherwise 92 | */ 93 | public boolean add(long element) { 94 | long hash = hash(element); 95 | int bucket = lookupByHash(hash); 96 | if (bucket == -1) { 97 | // table full 98 | throw new RuntimeException("Couldn't insert into table"); 99 | } 100 | if (!isEmpty(bucket)) { 101 | return false; 102 | } 103 | this.arr[bucket] = hash; 104 | ++this.size; 105 | return true; 106 | } 107 | 108 | /** 109 | * Remove an element from the table. 110 | * 111 | * @param value the 64-bit integer to remove from the table 112 | * @return {@code false} if {@code value} was not present in the table, {@code true} otherwise 113 | */ 114 | public boolean remove(long value) { 115 | long hash = hash(value); 116 | int bucket = lookupByHash(hash); 117 | if (bucket == -1 || isEmpty(bucket)) { 118 | return false; 119 | } 120 | this.arr[bucket] = 0; 121 | shift(bucket); 122 | --this.size; 123 | return true; 124 | } 125 | 126 | /** 127 | * Remove all elements from the table. 128 | */ 129 | public void clear() { 130 | Arrays.fill(this.arr, 0); 131 | } 132 | 133 | protected boolean isEmpty(int bucket) { 134 | return (this.arr[bucket] == 0); 135 | } 136 | 137 | protected long contents(int bucket) { 138 | return isEmpty(bucket) ? 0 : unhash(this.arr[bucket]); 139 | } 140 | 141 | // https://github.com/lemire/fastrange 142 | // for this 64-bit version, we just use the high 32 bits of the hash 143 | // to calculate the bucket index. as long as we're using good hash 144 | // functions, this should be fine. 145 | protected int findPreferredBucket(long hash) { 146 | if (hash == 0) { 147 | return -1; 148 | } 149 | return (int) (((hash >>> 32) * Integer.toUnsignedLong(this.arr.length)) >>> 32); 150 | } 151 | 152 | protected int wrap(int pos) { 153 | if (pos < 0) { 154 | return this.arr.length + pos; 155 | } 156 | if (pos > this.arr.length - 1) { 157 | return pos - this.arr.length; 158 | } 159 | return pos; 160 | } 161 | 162 | protected long hash(long x) { 163 | return this.hasher.hash(x); 164 | } 165 | 166 | protected long unhash(long x) { 167 | return this.hasher.unhash(x); 168 | } 169 | 170 | protected int lookupByHash(long hash) { 171 | int bucket = findPreferredBucket(hash); 172 | int probeLength = 0; 173 | while (!isEmpty(bucket) && this.arr[bucket] != hash) { 174 | if (probeLength == this.arr.length) { 175 | return -1; 176 | } 177 | bucket = wrap(bucket + 1); 178 | ++probeLength; 179 | } 180 | return bucket; 181 | } 182 | 183 | // uses pseudocode from _Algorithm Design and Applications_, Section 6.3.3 184 | protected void shift(int startBucket) { 185 | int dst = startBucket; 186 | int shift = 1; 187 | int src = wrap(dst + shift); 188 | while (!isEmpty(src)) { 189 | int preferredBucket = findPreferredBucket(this.arr[src]); 190 | // we can only move a key if its destination can be reached from its preferred bucket 191 | boolean reachable; 192 | if (src <= dst) { 193 | reachable = (preferredBucket <= dst && preferredBucket > src); 194 | } else { 195 | reachable = (preferredBucket <= dst || preferredBucket > src); 196 | } 197 | if (reachable) { 198 | this.arr[dst] = this.arr[src]; // fill the hole 199 | this.arr[src] = 0; // move the hole 200 | dst = wrap(dst + shift); 201 | shift = 1; 202 | } else { 203 | ++shift; 204 | } 205 | src = wrap(dst + shift); 206 | } 207 | } 208 | 209 | protected void dump() { 210 | for (int i = 0; i < this.arr.length; ++i) { 211 | System.out.format("%d\t%d\t%s\t%d\n", i, contents(i), Long.toUnsignedString(this.arr[i]), findPreferredBucket(this.arr[i])); 212 | } 213 | } 214 | } 215 | -------------------------------------------------------------------------------- /src/main/java/set/int64/LongSet.java: -------------------------------------------------------------------------------- 1 | package set.int64; 2 | 3 | /** 4 | * A 64-bit hash set. 5 | * 6 | * @author tdbaker 7 | */ 8 | public interface LongSet { 9 | 10 | /** 11 | * Query the number of elements in the table. 12 | * 13 | * @return the number of elements in the table 14 | */ 15 | public int size(); 16 | 17 | /** 18 | * Query the table for a value. 19 | * 20 | * @param value the 64-bit integer to query the table for 21 | * @return {@code true} if {@code value} is present in the table, {@code false} otherwise 22 | */ 23 | public boolean contains(long value); 24 | 25 | /** 26 | * Add an element to the table. 27 | * 28 | * @param element the 64-bit integer to add to the table 29 | * @return {@code false} if {@code element} was already present in the table, {@code true} otherwise 30 | */ 31 | public boolean add(long element); 32 | 33 | /** 34 | * Remove an element from the table. 35 | * 36 | * @param value the 64-bit integer to remove from the table 37 | * @return {@code false} if {@code value} was not present in the table, {@code true} otherwise 38 | */ 39 | public boolean remove(long value); 40 | 41 | /** 42 | * Remove all elements from the table. 43 | */ 44 | public void clear(); 45 | 46 | /** 47 | * Return deep copy of the table. 48 | * @return the cloned table 49 | */ 50 | public LongSet cloneSet() throws CloneNotSupportedException; 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/set/int64/RHLongHashSet.java: -------------------------------------------------------------------------------- 1 | package set.int64; 2 | 3 | import hash.int64.LongHasher; 4 | 5 | 6 | /** 7 | * An implementation of a simple linear probing hash table, with 8 | * the "Robin 9 | * Hood" insertion heuristic, and a tombstone-free 11 | * deletion algorithm (note that because we do not use tombstones, we 12 | * cannot use the maximum insertion probe length to bound the probe length 13 | * of unsuccessful lookups; instead we use a Robin Hood-specific early 14 | * termination heuristic). The "Robin Hood" insertion heuristic dramatically 15 | * reduces the variance of probe length for successful lookups, and our 16 | * version also dramatically reduces both the expected value and variance 17 | * of unsuccessful lookups. The keys (there are no stored values) must be 18 | * 64-bit integers, which are permuted to form the hash codes (i.e., the 19 | * "hash function" is reversible). This obviates the need to separately store 20 | * hash codes or rehash the keys to perform operations which use hash codes. 21 | * 22 | * @author tdbaker 23 | */ 24 | public class RHLongHashSet extends LPLongHashSet { 25 | public static final String NAME = "set.int64.RHLongHashSet"; 26 | 27 | public RHLongHashSet(int maxEntries, double loadFactor, LongHasher hasher) { 28 | super(maxEntries, loadFactor, hasher); 29 | } 30 | 31 | public RHLongHashSet(int maxEntries, double loadFactor) { 32 | super(maxEntries, loadFactor); 33 | } 34 | 35 | public RHLongHashSet(RHLongHashSet other) throws CloneNotSupportedException { 36 | super(other); 37 | } 38 | 39 | /** 40 | * Return deep copy of the table. 41 | */ 42 | @Override 43 | public LongSet cloneSet() throws CloneNotSupportedException { 44 | return new RHLongHashSet(this); 45 | } 46 | 47 | /** 48 | * Add an element to the table. 49 | * 50 | * @param element the 64-bit integer to add to the table 51 | * @return {@code false} if {@code element} was already present in the table, {@code true} otherwise 52 | */ 53 | @Override 54 | public boolean add(long element) { 55 | long hash = hash(element); 56 | if (lookupByHash(hash) != -1) { 57 | return false; 58 | } 59 | // If the current element is closer to its preferred bucket than 60 | // the element we're trying to insert is to its preferred bucket, 61 | // swap the inserted element into the current element's bucket and 62 | // continue probing with the swapped element, swapping it with the 63 | // next such element, and so on, until we hit an empty bucket. 64 | int bucket = findPreferredBucket(hash); 65 | int insertElemProbeDist = 0; 66 | int totalProbeLen = 0; 67 | while (!isEmpty(bucket)) { 68 | int currElemProbeDist = probeDistance(this.arr[bucket], bucket); 69 | if (currElemProbeDist < insertElemProbeDist) { 70 | long currElemHash = this.arr[bucket]; 71 | this.arr[bucket] = hash; 72 | hash = currElemHash; 73 | insertElemProbeDist = currElemProbeDist; 74 | } 75 | bucket = wrap(bucket + 1); 76 | ++insertElemProbeDist; 77 | ++totalProbeLen; 78 | if (totalProbeLen == this.arr.length) { 79 | throw new RuntimeException("Couldn't insert into table: " + element); 80 | } 81 | } 82 | this.arr[bucket] = hash; 83 | ++this.size; 84 | return true; 85 | } 86 | 87 | /** 88 | * Remove an element from the table. 89 | * 90 | * @param value the 64-bit integer to remove from the table 91 | * @return {@code false} if {@code value} was not present in the table, {@code true} otherwise 92 | */ 93 | @Override 94 | public boolean remove(long value) { 95 | long hash = hash(value); 96 | int bucket = lookupByHash(hash); 97 | if (bucket == -1) { 98 | return false; 99 | } 100 | // find the chain from the deleted bucket to the nearest empty bucket 101 | // or entry in its preferred bucket and shift it one space to the left 102 | int endBucket = findMoveBoundary(wrap(bucket + 1)); 103 | int currBucket = bucket; 104 | while (endBucket != wrap(currBucket + 1)) { 105 | this.arr[currBucket] = this.arr[wrap(currBucket + 1)]; 106 | currBucket = wrap(currBucket + 1); 107 | } 108 | this.arr[currBucket] = 0; 109 | --this.size; 110 | return true; 111 | } 112 | 113 | @Override 114 | protected int lookupByHash(long hash) { 115 | int bucket = findPreferredBucket(hash); 116 | int probeLength = 0; 117 | while (!isEmpty(bucket)) { 118 | if (this.arr[bucket] == hash) { 119 | return bucket; 120 | } 121 | // If we're further from our element's preferred bucket than the 122 | // current element's distance from its preferred bucket, we know 123 | // the element is absent, since it would have been swapped with 124 | // the current element otherwise. 125 | if (probeLength == this.arr.length || 126 | probeLength > probeDistance(this.arr[bucket], bucket)) { 127 | break; 128 | } 129 | bucket = wrap(bucket + 1); 130 | ++probeLength; 131 | } 132 | return -1; 133 | } 134 | 135 | private int probeDistance(long hash, int bucket) { 136 | int preferredBucket = findPreferredBucket(hash); 137 | int distance; 138 | if (preferredBucket > bucket) { // wraparound 139 | distance = this.arr.length - preferredBucket + bucket; 140 | } else { 141 | distance = bucket - preferredBucket; 142 | } 143 | return distance; 144 | } 145 | 146 | // Any bucket to the right of its preferred bucket can be shifted left, 147 | // since its preferred bucket will be shifted along with it, ensuring 148 | // it is still reachable from its preferred bucket. 149 | private int findMoveBoundary(int startBucket) { 150 | int bucket = startBucket; 151 | assert startBucket < this.arr.length; 152 | while (!isEmpty(bucket) && 153 | bucket != findPreferredBucket(this.arr[bucket])) { 154 | bucket = wrap(bucket + 1); 155 | } 156 | return bucket; 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/test/java/hash/int32/VerifyHashInverses.java: -------------------------------------------------------------------------------- 1 | package hash.int32; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertThrows; 5 | import org.junit.jupiter.api.Test; 6 | 7 | import java.util.Random; 8 | 9 | import java.lang.reflect.InvocationTargetException; 10 | 11 | class VerifyHashInverses { 12 | 13 | private static final int SAMPLE_SIZE = 1 << 10; 14 | private static final long SEED = 0xdeadbeefcafebabeL; 15 | 16 | private static final Class[] classes = { 17 | H2IntHasher.class, 18 | IdentityIntHasher.class, 19 | Murmur3IntHasher.class, 20 | PhiIntHasher.class, 21 | Prospector2RoundIntHasher.class, 22 | Prospector3RoundIntHasher.class, 23 | // SpeckIntHasher.class, 24 | }; 25 | 26 | @Test 27 | void test() throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { 28 | for (Class cls : classes) { 29 | IntHasher hasher = (IntHasher) cls.getDeclaredConstructor().newInstance(); 30 | new Random(SEED).ints(SAMPLE_SIZE).forEach(i -> assertEquals(i, hasher.unhash(hasher.hash(i)), 31 | String.format("%s: inverting hash for value %d", cls.getName(), i))); 32 | // all current hash functions map 0 to itself 33 | if (!cls.equals(IdentityIntHasher.class)) { 34 | assertThrows(IllegalArgumentException.class, () -> hasher.hash(0), cls.getName()); 35 | } 36 | } 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/test/java/hash/int64/VerifyHashInverses.java: -------------------------------------------------------------------------------- 1 | package hash.int64; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertThrows; 5 | import org.junit.jupiter.api.Test; 6 | 7 | import java.util.Random; 8 | 9 | import java.lang.reflect.InvocationTargetException; 10 | 11 | class VerifyHashInverses { 12 | 13 | private static final long SAMPLE_SIZE = 1L << 10; 14 | private static final long SEED = 0xdeadbeefcafebabeL; 15 | 16 | private static final Class[] classes = { 17 | DegskiLongHasher.class, 18 | IdentityLongHasher.class, 19 | Murmur3LongHasher.class, 20 | PhiLongHasher.class, 21 | // SpeckLongHasher.class, 22 | Variant13LongHasher.class, 23 | WangLongHasher.class, 24 | }; 25 | 26 | @Test 27 | void test() 28 | throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { 29 | for (Class cls : classes) { 30 | LongHasher hasher = (LongHasher) cls.getDeclaredConstructor().newInstance(); 31 | new Random(SEED).longs(SAMPLE_SIZE).forEach(i -> assertEquals(i, hasher.unhash(hasher.hash(i)), 32 | String.format("%s: inverting hash for value %d", cls.getName(), i))); 33 | // all current hash functions map 0 to itself 34 | if (!cls.equals(IdentityLongHasher.class)) { 35 | assertThrows(IllegalArgumentException.class, () -> hasher.hash(0L), cls.getName()); 36 | } 37 | } 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/set/int32/VerifyHashSet.java: -------------------------------------------------------------------------------- 1 | package set.int32; 2 | 3 | import hash.int32.*; 4 | 5 | import static org.junit.jupiter.api.Assertions.*; 6 | import org.junit.runner.RunWith; 7 | 8 | import com.pholser.junit.quickcheck.runner.JUnitQuickcheck; 9 | import com.pholser.junit.quickcheck.Property; 10 | import com.pholser.junit.quickcheck.generator.*; 11 | 12 | import java.lang.reflect.Constructor; 13 | import java.lang.reflect.InvocationTargetException; 14 | import java.util.stream.IntStream; 15 | 16 | @RunWith(JUnitQuickcheck.class) 17 | public class VerifyHashSet { 18 | 19 | private static final int SAMPLE_SIZE_LIMIT = 1 << 20; 20 | 21 | private static final Class[] classes = { 22 | BLPIntHashSet.class, 23 | LCFSIntHashSet.class, 24 | LPIntHashSet.class, 25 | RHIntHashSet.class, 26 | }; 27 | 28 | @Property 29 | public void testSequentialKeys(@InRange(minInt = 0, maxInt = SAMPLE_SIZE_LIMIT) int sampleSize, 30 | @InRange(minDouble = 0.0, maxDouble = 1.0) double loadFactor) 31 | throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { 32 | for (Class cls : classes) { 33 | Constructor constructor = cls.getConstructor(int.class, double.class); 34 | IntSet set = (IntSet) constructor.newInstance(sampleSize, loadFactor); 35 | int[] ints = IntStream.rangeClosed(1, sampleSize).toArray(); 36 | for (int i : ints) { 37 | assertTrue(set.add(i)); 38 | } 39 | assertEquals(sampleSize, set.size()); 40 | for (int i : ints) { 41 | assertTrue(set.contains(i)); 42 | } 43 | for (int i : ints) { 44 | assertTrue(set.remove(i)); 45 | } 46 | assertEquals(0, set.size()); 47 | for (int i : ints) { 48 | assertFalse(set.contains(i)); 49 | } 50 | } 51 | } 52 | 53 | @Property 54 | public void testRandomKeys(@InRange(minInt = 0, maxInt = SAMPLE_SIZE_LIMIT) int sampleSize, 55 | @InRange(minDouble = 0.0, maxDouble = 1.0) double loadFactor) 56 | throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { 57 | for (Class cls : classes) { 58 | Constructor constructor = cls.getConstructor(int.class, double.class); 59 | IntSet set = (IntSet) constructor.newInstance(sampleSize, loadFactor); 60 | // we want a random permutation, not an RNG, to avoid duplicate keys, 61 | // and the Phi hash has quasi-uniform behavior on sequential integers 62 | IntHasher hasher = new PhiIntHasher(); 63 | int[] ints = IntStream.rangeClosed(1, sampleSize).map(hasher::hash).toArray(); 64 | for (int i : ints) { 65 | assertTrue(set.add(i)); 66 | } 67 | assertEquals(sampleSize, set.size()); 68 | for (int i : ints) { 69 | assertTrue(set.contains(i)); 70 | } 71 | for (int i : ints) { 72 | assertTrue(set.remove(i)); 73 | } 74 | assertEquals(0, set.size()); 75 | for (int i : ints) { 76 | assertFalse(set.contains(i)); 77 | } 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/test/java/set/int64/VerifyHashSet.java: -------------------------------------------------------------------------------- 1 | package set.int64; 2 | 3 | import hash.int64.*; 4 | 5 | import static org.junit.jupiter.api.Assertions.*; 6 | import org.junit.runner.RunWith; 7 | 8 | import com.pholser.junit.quickcheck.runner.JUnitQuickcheck; 9 | import com.pholser.junit.quickcheck.Property; 10 | import com.pholser.junit.quickcheck.generator.*; 11 | 12 | import java.lang.reflect.Constructor; 13 | import java.lang.reflect.InvocationTargetException; 14 | import java.util.stream.LongStream; 15 | 16 | @RunWith(JUnitQuickcheck.class) 17 | public class VerifyHashSet { 18 | 19 | private static final int SAMPLE_SIZE_LIMIT = 1 << 20; 20 | 21 | private static final Class[] classes = { 22 | BLPLongHashSet.class, 23 | LCFSLongHashSet.class, 24 | LPLongHashSet.class, 25 | RHLongHashSet.class, 26 | }; 27 | 28 | @Property 29 | public void testSequentialKeys(@InRange(minInt = 0, maxInt = SAMPLE_SIZE_LIMIT) int sampleSize, 30 | @InRange(minDouble = 0.0, maxDouble = 1.0) double loadFactor) 31 | throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { 32 | for (Class cls : classes) { 33 | Constructor constructor = cls.getConstructor(int.class, double.class); 34 | LongSet set = (LongSet) constructor.newInstance(sampleSize, loadFactor); 35 | long[] longs = LongStream.rangeClosed(1, sampleSize).toArray(); 36 | for (long i : longs) { 37 | assertTrue(set.add(i)); 38 | } 39 | assertEquals(sampleSize, set.size()); 40 | for (long i : longs) { 41 | assertTrue(set.contains(i)); 42 | } 43 | for (long i : longs) { 44 | assertTrue(set.remove(i)); 45 | } 46 | assertEquals(0, set.size()); 47 | for (long i : longs) { 48 | assertFalse(set.contains(i)); 49 | } 50 | } 51 | } 52 | 53 | @Property 54 | public void testRandomKeys(@InRange(minInt = 0, maxInt = SAMPLE_SIZE_LIMIT) int sampleSize, 55 | @InRange(minDouble = 0.0, maxDouble = 1.0) double loadFactor) 56 | throws InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException { 57 | for (Class cls : classes) { 58 | Constructor constructor = cls.getConstructor(int.class, double.class); 59 | LongSet set = (LongSet) constructor.newInstance(sampleSize, loadFactor); 60 | // we want a random permutation, not an RNG, to avoid duplicate keys, 61 | // and the Phi hash has quasi-uniform behavior on sequential integers 62 | LongHasher hasher = new PhiLongHasher(); 63 | long[] longs = LongStream.rangeClosed(1, sampleSize).map(hasher::hash).toArray(); 64 | for (long i : longs) { 65 | assertTrue(set.add(i)); 66 | } 67 | assertEquals(sampleSize, set.size()); 68 | for (long i : longs) { 69 | assertTrue(set.contains(i)); 70 | } 71 | for (long i : longs) { 72 | assertTrue(set.remove(i)); 73 | } 74 | assertEquals(0, set.size()); 75 | for (long i : longs) { 76 | assertFalse(set.contains(i)); 77 | } 78 | } 79 | } 80 | } 81 | --------------------------------------------------------------------------------