├── .circleci └── config.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── build.gradle ├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle └── src ├── jmh └── java │ └── me │ └── k11i │ └── xorfilter │ ├── FppAndSerializationSize.java │ └── QueryBenchmark.java ├── main └── java │ └── me │ └── k11i │ └── xorfilter │ ├── MurmurHashFinalizer.java │ └── XorFilter.java └── test └── java └── me └── k11i └── xorfilter └── XorFilterTest.java /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | executors: 4 | builder: 5 | docker: 6 | - image: circleci/openjdk:8-jdk 7 | 8 | environment: 9 | JVM_OPTS: -Xmx3200m 10 | TERM: dumb 11 | 12 | working_directory: /tmp/workspace 13 | 14 | jobs: 15 | build: 16 | executor: builder 17 | steps: 18 | - checkout 19 | 20 | - restore_cache: 21 | keys: 22 | - xor-filter-dependencies-{{ .Branch }}-{{ checksum "build.gradle" }} 23 | - xor-filter-dependencies-{{ .Branch }}- 24 | - xor-filter-dependencies- 25 | 26 | - run: 27 | name: Resolve dependencies 28 | command: ./gradlew dependencies 29 | 30 | - save_cache: 31 | paths: 32 | - ~/.gradle 33 | key: xor-filter-dependencies-{{ .Branch }}-{{ checksum "build.gradle" }} 34 | 35 | - run: 36 | name: Test 37 | command: ./gradlew test --stacktrace 38 | 39 | - run: 40 | name: Save test results 41 | command: | 42 | mkdir -p ~/test-results/junit/ 43 | find . -type f -regex ".*/build/test-results/.*xml" -exec cp {} ~/test-results/junit/ \; 44 | when: always 45 | 46 | - run: 47 | name: Build artifacts 48 | command: ./gradlew build --stacktrace 49 | 50 | - store_test_results: 51 | path: ~/test-results 52 | 53 | - persist_to_workspace: 54 | root: /tmp/workspace 55 | paths: 56 | - .gradle/* 57 | - build/* 58 | 59 | release: 60 | executor: builder 61 | steps: 62 | - add_ssh_keys: 63 | fingerprints: 64 | - "6e:f7:3b:3a:ab:a4:99:92:ff:e0:5c:63:a5:8e:1f:d3" 65 | 66 | - checkout 67 | 68 | - attach_workspace: 69 | at: /tmp/workspace 70 | 71 | - restore_cache: 72 | keys: 73 | - xor-filter-dependencies-{{ .Branch }}-{{ checksum "build.gradle" }} 74 | - xor-filter-dependencies-{{ .Branch }}- 75 | - xor-filter-dependencies- 76 | 77 | - run: 78 | name: Publish 79 | command: ./gradlew release 80 | 81 | workflows: 82 | build-release: 83 | jobs: 84 | - build 85 | - release: 86 | requires: 87 | - build 88 | filters: 89 | branches: 90 | only: master 91 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .gradle/ 2 | build/ 3 | 4 | .idea/ 5 | out/ 6 | 7 | .envrc 8 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.1.1 4 | 5 | * Build with Java 8 (not Java 11). 6 | 7 | ## 0.1.0 8 | 9 | * Initial release. 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 KOMIYA Atsushi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | xor-filter 2 | ========== 3 | 4 | [![CircleCI](https://circleci.com/gh/komiya-atsushi/xor-filter/tree/develop.svg?style=svg)](https://circleci.com/gh/komiya-atsushi/xor-filter/tree/develop) 5 | [ ![Download](https://api.bintray.com/packages/komiya-atsushi/maven/xor-filter/images/download.svg) ](https://bintray.com/komiya-atsushi/maven/xor-filter/_latestVersion) 6 | 7 | Yet another Java implementation of the [Xor Filters](https://arxiv.org/abs/1912.08258). 8 | 9 | 10 | How to use 11 | ---------- 12 | 13 | ```gradle 14 | plugins { 15 | id 'java' 16 | } 17 | 18 | sourceCompatibility = 11 19 | 20 | repositories { 21 | mavenCentral() 22 | maven { 23 | url "https://dl.bintray.com/komiya-atsushi/maven" 24 | } 25 | } 26 | 27 | dependencies { 28 | implementation 'com.google.guava:guava:28.2-jre' 29 | implementation 'me.k11i:xor-filter:0.1.1' 30 | } 31 | ``` 32 | 33 | ```java 34 | package me.k11i.demo; 35 | 36 | import com.google.common.hash.Funnels; 37 | import me.k11i.xorfilter.XorFilter; 38 | 39 | import java.io.*; 40 | import java.nio.charset.StandardCharsets; 41 | import java.util.List; 42 | 43 | @SuppressWarnings("UnstableApiUsage") 44 | public class XorFilterDemo { 45 | public static void main(String[] args) throws IOException { 46 | List strings = List.of( 47 | "1235df54-e42e-457b-9d9d-de0c46ecdfe7", 48 | "4ea534e3-6552-4975-b520-196ac5124423", 49 | "8b0fbde6-aaab-4a53-a546-be4cccdae3e9", 50 | "82570f67-7378-4772-ada2-ac7f1b44dbcf" 51 | ); 52 | 53 | // Build Xor filter from the collection. 54 | XorFilter filter = XorFilter.build( 55 | Funnels.stringFunnel(StandardCharsets.UTF_8), 56 | strings, 57 | XorFilter.Strategy.MURMUR128_XOR8); 58 | 59 | // Query whether the filter contains the element. 60 | boolean result = filter.mightContain(strings.get(0)); 61 | System.out.println(result); // => true 62 | 63 | // XorFilter implements java.util.function.Predicate interface. 64 | result = filter.test("8d05d410-1a0c-467b-9554-89c453a5"); 65 | System.out.println(result); // => false 66 | 67 | // Serialize the filter. 68 | byte[] serialized; 69 | try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { 70 | filter.writeTo(out); 71 | serialized = out.toByteArray(); 72 | } 73 | 74 | // Deserialize and use the filter. 75 | XorFilter deserialized = XorFilter.readFrom( 76 | new ByteArrayInputStream(serialized), 77 | Funnels.stringFunnel(StandardCharsets.UTF_8)); 78 | 79 | System.out.println(filter.mightContain(strings.get(0))); // => true 80 | System.out.println(filter.mightContain("8d05d410-1a0c-467b-9554-89c453a5")); // => false 81 | } 82 | } 83 | ``` 84 | 85 | ## Benchmark 86 | 87 | ### Throughput (queries/ms) 88 | 89 | | Type of the element | Algorithm | Queries per ms | 90 | | :----- | :-------------------------- | ---------: | 91 | | Long | Bloom filter (fpp = 0.389%) | 8,879.024 | 92 | | | Bloom filter (fpp = 0.002%) | 6,534.761 | 93 | | | Xor filter (8 bit) | 17,458.015 | 94 | | | Xor filter (16 bit) | 16,975.909 | 95 | | Srring | Bloom filter (fpp = 0.389%) | 5,318.409 | 96 | | | Bloom filter (fpp = 0.002%) | 4,449.762 | 97 | | | Xor filter (8 bit) | 7,121.787 | 98 | | | Xor filter (16 bit) | 7,128.727 | 99 | 100 | ### Serialization size 101 | 102 | These serialization sizes of the filters approximately equal to the memory footprint of the filters. 103 | 104 | | Algorithm | Serialization size (byte) | Bit per entry | 105 | | --- | ---: | ---: | 106 | | Bloom filter (fpp = 0.389%) | 144,390 | 11.551 | 107 | | Bloom filter (fpp = 0.002%) | 281,510 | 22.521 | 108 | | Xor filter (8 bit) | 123,042 | 9.843 | 109 | | Xor filter (16 bit) | 246,075 | 19.686 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | plugins { 2 | id 'java' 3 | id 'maven-publish' 4 | id 'me.champeau.gradle.jmh' version '0.5.0' 5 | id 'net.researchgate.release' version '2.8.1' 6 | id 'com.jfrog.bintray' version '1.8.4' 7 | } 8 | 9 | group 'me.k11i' 10 | 11 | sourceCompatibility = 8 12 | 13 | repositories { 14 | mavenCentral() 15 | } 16 | 17 | dependencies { 18 | implementation group: 'com.google.guava', name: 'guava', version: '28.2-jre' 19 | testImplementation group: 'org.junit.jupiter', name: 'junit-jupiter', version: '5.4.2' 20 | } 21 | 22 | test { 23 | useJUnitPlatform() 24 | 25 | testLogging { 26 | events 'PASSED', 'FAILED', 'SKIPPED' 27 | } 28 | 29 | afterSuite { desc, result -> 30 | if (!desc.parent) { 31 | println "\nTest result: ${result.resultType}" 32 | println "Test summary: ${result.testCount} tests, " + 33 | "${result.successfulTestCount} succeeded, " + 34 | "${result.failedTestCount} failed, " + 35 | "${result.skippedTestCount} skipped" 36 | } 37 | } 38 | } 39 | 40 | jmh { 41 | timeUnit = 'ms' 42 | resultFormat = 'CSV' 43 | } 44 | 45 | javadoc { 46 | options.locale = 'en_US' 47 | } 48 | 49 | task sourcesJar(type: Jar, dependsOn: classes) { 50 | archiveClassifier = 'sources' 51 | from sourceSets.main.allSource 52 | } 53 | 54 | task javadocJar(type: Jar, dependsOn: javadoc) { 55 | archiveClassifier = 'javadoc' 56 | from javadoc.destinationDir 57 | } 58 | 59 | artifacts { 60 | archives jar 61 | archives sourcesJar 62 | archives javadocJar 63 | } 64 | 65 | publishing { 66 | publications { 67 | mavenJava(MavenPublication) { 68 | from components.java 69 | artifact sourcesJar 70 | artifact javadocJar 71 | } 72 | } 73 | } 74 | 75 | release { 76 | preTagCommitMessage = '[skip ci] [Gradle Release Plugin] - pre tag commit: ' 77 | newVersionCommitMessage = '[skip ci] [Gradle Release Plugin] - new version commit: ' 78 | 79 | git { 80 | requireBranch = 'master' 81 | } 82 | } 83 | 84 | afterReleaseBuild.dependsOn(bintrayUpload) 85 | 86 | bintray { 87 | user = System.getenv('BINTRAY_USER') 88 | key = System.getenv('BINTRAY_KEY') 89 | publications = ['mavenJava'] 90 | 91 | pkg { 92 | repo = 'maven' 93 | name = 'xor-filter' 94 | userOrg = 'komiya-atsushi' 95 | licenses = ['MIT'] 96 | 97 | websiteUrl = 'https://github.com/komiya-atsushi/xor-filter' 98 | issueTrackerUrl = 'https://github.com/komiya-atsushi/xor-filter/issues' 99 | vcsUrl = 'https://github.com/komiya-atsushi/xor-filter.git' 100 | 101 | version { 102 | name = project.version 103 | desc = 'Yet another Java implementation of the Xor Filters' 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | release.useAutomaticVersion = true 2 | version = 0.1.2-SNAPSHOT -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/komiya-atsushi/xor-filter/7eebb2836a60d2f7ae1a1671935f62aacc8d3cc1/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Mon Jan 06 23:54:20 JST 2020 2 | distributionUrl=https\://services.gradle.org/distributions/gradle-6.0.1-all.zip 3 | distributionBase=GRADLE_USER_HOME 4 | distributionPath=wrapper/dists 5 | zipStorePath=wrapper/dists 6 | zipStoreBase=GRADLE_USER_HOME 7 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS='"-Xmx64m"' 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS="-Xmx64m" 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'xor-filter' 2 | 3 | -------------------------------------------------------------------------------- /src/jmh/java/me/k11i/xorfilter/FppAndSerializationSize.java: -------------------------------------------------------------------------------- 1 | package me.k11i.xorfilter; 2 | 3 | import com.google.common.hash.BloomFilter; 4 | import com.google.common.hash.Funnels; 5 | import me.k11i.xorfilter.QueryBenchmark.FilterFactory; 6 | 7 | import java.io.ByteArrayOutputStream; 8 | import java.io.IOException; 9 | import java.util.function.Predicate; 10 | import java.util.stream.Collectors; 11 | import java.util.stream.LongStream; 12 | import java.util.stream.Stream; 13 | 14 | @SuppressWarnings("UnstableApiUsage") 15 | public class FppAndSerializationSize { 16 | public static void main(String[] args) { 17 | for (FilterFactory filterFactory : FilterFactory.values()) { 18 | test(filterFactory); 19 | } 20 | } 21 | 22 | static void test(FilterFactory filterFactory) { 23 | final int numElements = 100_000; 24 | final int numQueries = numElements * 100; 25 | 26 | Predicate filter = filterFactory.buildFilter( 27 | Funnels.longFunnel(), 28 | LongStream.range(0, numElements) 29 | .boxed() 30 | .collect(Collectors.toList())); 31 | 32 | long falsePositiveCount = LongStream.range(numElements, numElements + numQueries) 33 | .filter(filter::test) 34 | .count(); 35 | 36 | int serializedSize; 37 | try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { 38 | if (filter instanceof BloomFilter) { 39 | ((BloomFilter) filter).writeTo(out); 40 | } else { 41 | ((XorFilter) filter).writeTo(out); 42 | } 43 | serializedSize = out.size(); 44 | } catch (IOException e) { 45 | throw new RuntimeException(e); 46 | } 47 | 48 | String line = Stream.of( 49 | filterFactory, 50 | falsePositiveCount, 51 | 100.0 * falsePositiveCount / numQueries, 52 | serializedSize, 53 | 8.0 * serializedSize / numElements) 54 | .map(Object::toString) 55 | .collect(Collectors.joining("\t")); 56 | System.out.println(line); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/jmh/java/me/k11i/xorfilter/QueryBenchmark.java: -------------------------------------------------------------------------------- 1 | package me.k11i.xorfilter; 2 | 3 | import com.google.common.hash.BloomFilter; 4 | import com.google.common.hash.Funnel; 5 | import com.google.common.hash.Funnels; 6 | import org.openjdk.jmh.annotations.Benchmark; 7 | import org.openjdk.jmh.annotations.Param; 8 | import org.openjdk.jmh.annotations.Scope; 9 | import org.openjdk.jmh.annotations.Setup; 10 | import org.openjdk.jmh.annotations.State; 11 | 12 | import java.nio.charset.StandardCharsets; 13 | import java.util.List; 14 | import java.util.SplittableRandom; 15 | import java.util.function.Predicate; 16 | import java.util.stream.Collectors; 17 | import java.util.stream.IntStream; 18 | import java.util.stream.LongStream; 19 | 20 | @SuppressWarnings({"UnstableApiUsage", "unused"}) 21 | public class QueryBenchmark { 22 | private static final int NUM_ELEMENTS = 1 << 14; 23 | private static final int INDEX_MOD_MASK = NUM_ELEMENTS - 1; 24 | 25 | public enum FilterFactory { 26 | BLOOM_FILTER_FPP00389 { 27 | @Override 28 | Predicate buildFilter(Funnel funnel, List elements) { 29 | BloomFilter filter = BloomFilter.create(funnel, elements.size(), 0.00389); 30 | elements.forEach(filter::put); 31 | return filter; 32 | } 33 | }, 34 | 35 | BLOOM_FILTER_FPP00002 { 36 | @Override 37 | Predicate buildFilter(Funnel funnel, List elements) { 38 | BloomFilter filter = BloomFilter.create(funnel, elements.size(), 0.00002); 39 | elements.forEach(filter::put); 40 | return filter; 41 | } 42 | }, 43 | 44 | XOR_8 { 45 | @Override 46 | Predicate buildFilter(Funnel funnel, List elements) { 47 | return XorFilter.build(funnel, elements, XorFilter.Strategy.MURMUR128_XOR8); 48 | } 49 | }, 50 | 51 | XOR_16 { 52 | @Override 53 | Predicate buildFilter(Funnel funnel, List elements) { 54 | return XorFilter.build(funnel, elements, XorFilter.Strategy.MURMUR128_XOR16); 55 | } 56 | }; 57 | 58 | abstract Predicate buildFilter(Funnel funnel, List elements); 59 | } 60 | 61 | @State(Scope.Benchmark) 62 | public static class QueryStringBenchmark { 63 | private static final Funnel FUNNEL = Funnels.stringFunnel(StandardCharsets.ISO_8859_1); 64 | 65 | @Param(value = { 66 | "BLOOM_FILTER_FPP00389", 67 | "BLOOM_FILTER_FPP00002", 68 | "XOR_8", 69 | "XOR_16", 70 | }) 71 | private FilterFactory filterFactory; 72 | 73 | private Predicate filter; 74 | private String[] strings; 75 | private int index; 76 | 77 | @Setup 78 | public void setUp() { 79 | strings = LongStream.range(0, NUM_ELEMENTS) 80 | .map(l -> l * 0x9e3779b97f4a7c15L) 81 | .mapToObj(l -> String.format("%020d", l)) 82 | .toArray(String[]::new); 83 | 84 | filter = filterFactory.buildFilter( 85 | FUNNEL, 86 | IntStream.range(0, strings.length / 2) 87 | .map(i -> i * 2) 88 | .mapToObj(i -> strings[i]) 89 | .collect(Collectors.toList())); 90 | } 91 | 92 | @Benchmark 93 | public boolean benchmark() { 94 | String query = strings[index]; 95 | index = (index + 1) & INDEX_MOD_MASK; 96 | return filter.test(query); 97 | } 98 | } 99 | 100 | @State(Scope.Benchmark) 101 | public static class QueryLongBenchmark { 102 | private static final Funnel FUNNEL = Funnels.longFunnel(); 103 | 104 | @Param(value = { 105 | "BLOOM_FILTER_FPP00389", 106 | "BLOOM_FILTER_FPP00002", 107 | "XOR_8", 108 | "XOR_16", 109 | }) 110 | private FilterFactory filterFactory; 111 | 112 | private Predicate filter; 113 | private Long[] longs; 114 | private int index; 115 | 116 | @Setup 117 | public void setUp() { 118 | longs = new SplittableRandom(0).longs() 119 | .limit(NUM_ELEMENTS) 120 | .boxed() 121 | .toArray(Long[]::new); 122 | 123 | filter = filterFactory.buildFilter( 124 | FUNNEL, 125 | IntStream.range(0, longs.length / 2) 126 | .map(i -> i * 2) 127 | .mapToObj(i -> longs[i]) 128 | .collect(Collectors.toList())); 129 | } 130 | 131 | @Benchmark 132 | public boolean benchmark() { 133 | Long query = longs[index]; 134 | index = (index + 1) & INDEX_MOD_MASK; 135 | return filter.test(query); 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/main/java/me/k11i/xorfilter/MurmurHashFinalizer.java: -------------------------------------------------------------------------------- 1 | package me.k11i.xorfilter; 2 | 3 | class MurmurHashFinalizer { 4 | static long hash(long seed, long x) { 5 | long h = seed + x; 6 | h = (h ^ (h >>> 33)) * 0xff51afd7ed558ccdL; 7 | h = (h ^ (h >>> 33)) * 0xc4ceb9fe1a85ec53L; 8 | return h ^ (h >>> 33); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/me/k11i/xorfilter/XorFilter.java: -------------------------------------------------------------------------------- 1 | package me.k11i.xorfilter; 2 | 3 | import com.google.common.hash.Funnel; 4 | import com.google.common.hash.HashFunction; 5 | import com.google.common.hash.Hashing; 6 | 7 | import java.io.DataInputStream; 8 | import java.io.DataOutputStream; 9 | import java.io.IOException; 10 | import java.io.InputStream; 11 | import java.io.OutputStream; 12 | import java.util.Arrays; 13 | import java.util.Collection; 14 | import java.util.Objects; 15 | import java.util.SplittableRandom; 16 | import java.util.concurrent.ThreadLocalRandom; 17 | import java.util.function.IntConsumer; 18 | import java.util.function.Predicate; 19 | import java.util.function.ToLongFunction; 20 | 21 | @SuppressWarnings("UnstableApiUsage") 22 | public class XorFilter implements Predicate { 23 | public enum Strategy { 24 | MURMUR128_XOR8 { 25 | @Override 26 | HashFunction newHashFunction(int seed) { 27 | return Hashing.murmur3_128(seed); 28 | } 29 | 30 | @Override 31 | KBitValueArray newArray(int capacity) { 32 | return new KBitValueArray._8(capacity); 33 | } 34 | }, 35 | 36 | MURMUR128_XOR16 { 37 | @Override 38 | HashFunction newHashFunction(int seed) { 39 | return Hashing.murmur3_128(seed); 40 | } 41 | 42 | @Override 43 | KBitValueArray newArray(int capacity) { 44 | return new KBitValueArray._16(capacity); 45 | } 46 | }; 47 | 48 | abstract HashFunction newHashFunction(int seed); 49 | 50 | abstract KBitValueArray newArray(int capacity); 51 | } 52 | 53 | private static abstract class KBitValueArray { 54 | static class _8 extends KBitValueArray { 55 | private final byte[] b; 56 | 57 | _8(int capacity) { 58 | super(capacity); 59 | b = new byte[capacity]; 60 | } 61 | 62 | private byte fingerprint(long x) { 63 | return (byte) (x >>> 56); 64 | } 65 | 66 | @Override 67 | public void put(int index, long x, int h0, int h1, int h2) { 68 | b[index] = 0; 69 | b[index] = (byte) (fingerprint(x) ^ b[h0] ^ b[h1] ^ b[h2]); 70 | } 71 | 72 | @Override 73 | boolean contains(long x, int h0, int h1, int h2) { 74 | return fingerprint(x) == (b[h0] ^ b[h1] ^ b[h2]); 75 | } 76 | 77 | @Override 78 | void writeTo(DataOutputStream out) throws IOException { 79 | for (byte value : b) { 80 | out.writeByte(value); 81 | } 82 | } 83 | 84 | @Override 85 | void readFrom(DataInputStream in) throws IOException { 86 | for (int i = 0; i < b.length; i++) { 87 | b[i] = in.readByte(); 88 | } 89 | } 90 | 91 | @Override 92 | public boolean equals(Object o) { 93 | if (this == o) return true; 94 | if (o == null || getClass() != o.getClass()) return false; 95 | _8 that = (_8) o; 96 | return Arrays.equals(b, that.b); 97 | } 98 | 99 | @Override 100 | public int hashCode() { 101 | return Arrays.hashCode(b); 102 | } 103 | } 104 | 105 | static class _16 extends KBitValueArray { 106 | private final short[] b; 107 | 108 | _16(int capacity) { 109 | super(capacity); 110 | b = new short[capacity]; 111 | } 112 | 113 | private short fingerprint(long x) { 114 | return (short) (x >>> 48); 115 | } 116 | 117 | @Override 118 | void put(int index, long x, int h0, int h1, int h2) { 119 | b[index] = 0; 120 | b[index] = (short) (fingerprint(x) ^ b[h0] ^ b[h1] ^ b[h2]); 121 | } 122 | 123 | @Override 124 | boolean contains(long x, int h0, int h1, int h2) { 125 | return fingerprint(x) == (b[h0] ^ b[h1] ^ b[h2]); 126 | } 127 | 128 | @Override 129 | void writeTo(DataOutputStream out) throws IOException { 130 | for (short value : b) { 131 | out.writeShort(value); 132 | } 133 | } 134 | 135 | @Override 136 | void readFrom(DataInputStream in) throws IOException { 137 | for (int i = 0; i < b.length; i++) { 138 | b[i] = in.readShort(); 139 | } 140 | } 141 | 142 | @Override 143 | public boolean equals(Object o) { 144 | if (this == o) return true; 145 | if (o == null || getClass() != o.getClass()) return false; 146 | _16 that = (_16) o; 147 | return Arrays.equals(b, that.b); 148 | } 149 | 150 | @Override 151 | public int hashCode() { 152 | return Arrays.hashCode(b); 153 | } 154 | } 155 | 156 | final int capacity; 157 | private final int blockLength; 158 | 159 | KBitValueArray(int capacity) { 160 | this.capacity = capacity; 161 | this.blockLength = capacity / 3; 162 | } 163 | 164 | void put(int index, long x) { 165 | long x2 = MurmurHashFinalizer.hash(0, x); 166 | put(index, x, h0(x, blockLength), h1(x2, blockLength), h2(x2, blockLength)); 167 | } 168 | 169 | boolean contains(long x) { 170 | long x2 = MurmurHashFinalizer.hash(0, x); 171 | return contains(x, h0(x, blockLength), h1(x2, blockLength), h2(x2, blockLength)); 172 | } 173 | 174 | abstract void put(int index, long x, int h0, int h1, int h2); 175 | 176 | abstract boolean contains(long x, int h0, int h1, int h2); 177 | 178 | abstract void writeTo(DataOutputStream out) throws IOException; 179 | 180 | abstract void readFrom(DataInputStream in) throws IOException; 181 | 182 | @Override 183 | public String toString() { 184 | return getClass().getSimpleName() + "{" + 185 | "capacity=" + capacity + 186 | '}'; 187 | } 188 | } 189 | 190 | // --- 191 | 192 | public static XorFilter build(Funnel funnel, Collection elements, Strategy strategy) { 193 | return build(funnel, elements, strategy, ThreadLocalRandom.current().nextInt()); 194 | } 195 | 196 | public static XorFilter build(Funnel funnel, Collection elements, Strategy strategy, int rngSeed) { 197 | final int capacity = (int) ((1.23 * elements.size() + 32 + 2) / 3) * 3; 198 | Mapping mapping = buildMapping(funnel, elements, strategy, capacity, rngSeed); 199 | KBitValueArray b = strategy.newArray(capacity); 200 | assign(b, mapping.stack); 201 | return new XorFilter<>(strategy, mapping.seed, funnel, b); 202 | } 203 | 204 | private static Mapping buildMapping( 205 | Funnel funnel, 206 | Collection elements, 207 | Strategy strategy, 208 | int capacity, 209 | int rngSeed) { 210 | 211 | int blockLength = capacity / 3; 212 | HashedElements hashedElements = new HashedElements(elements.size()); 213 | HashedElementSets h = new HashedElementSets(capacity); 214 | IntQueue q = new IntQueue(elements.size()); 215 | PairStack s = new PairStack(elements.size()); 216 | SplittableRandom r = new SplittableRandom(rngSeed); 217 | 218 | do { 219 | int seed = r.nextInt(); 220 | HashFunction hashFunction = strategy.newHashFunction(seed); 221 | hashedElements.hashAll(o -> hashFunction.hashObject(o, funnel).asLong(), elements); 222 | 223 | for (int i = 0; i < hashedElements.size(); i++) { 224 | long x = hashedElements.get(i); 225 | long x2 = MurmurHashFinalizer.hash(0, x); 226 | 227 | h.append(h0(x, blockLength), x); 228 | h.append(h1(x2, blockLength), x); 229 | h.append(h2(x2, blockLength), x); 230 | } 231 | 232 | for (int i = 0; i < capacity; i++) { 233 | if (h.containsOnlyOneValue(i)) { 234 | q.enqueue(i); 235 | } 236 | } 237 | 238 | while (q.isNotEmpty()) { 239 | int i = q.dequeue(); 240 | if (h.containsOnlyOneValue(i)) { 241 | long x = h.getSoleValue(i); 242 | long x2 = MurmurHashFinalizer.hash(0, x); 243 | s.push(i, x); 244 | 245 | h.remove(h0(x, blockLength), x, q::enqueue); 246 | h.remove(h1(x2, blockLength), x, q::enqueue); 247 | h.remove(h2(x2, blockLength), x, q::enqueue); 248 | } 249 | } 250 | 251 | if (s.size() == hashedElements.size()) { 252 | return new Mapping(seed, s); 253 | } 254 | 255 | h.clear(); 256 | q.clear(); 257 | s.clear(); 258 | 259 | } while (true); 260 | } 261 | 262 | private static void assign(KBitValueArray b, PairStack s) { 263 | while (s.isNotEmpty()) { 264 | int index = s.peekIndex(); 265 | long x = s.popHashedElements(); 266 | b.put(index, x); 267 | } 268 | } 269 | 270 | private static int h0(long x, int blockLength) { 271 | return (int) ((x & 0xffffffffL) * blockLength >>> 32); 272 | } 273 | 274 | private static int h1(long x2, int blockLength) { 275 | return (int) (((x2 & 0xffffffffL) * blockLength >>> 32) + blockLength); 276 | } 277 | 278 | private static int h2(long x2, int blockLength) { 279 | return (int) (((x2 >>> 32) * blockLength >>> 32) + blockLength + blockLength); 280 | } 281 | 282 | // --- 283 | 284 | private final Strategy strategy; 285 | private final int seed; 286 | private final HashFunction hashFunction; 287 | private final Funnel funnel; 288 | private final KBitValueArray b; 289 | 290 | private XorFilter(Strategy strategy, int seed, Funnel funnel, KBitValueArray b) { 291 | this.strategy = strategy; 292 | this.seed = seed; 293 | this.hashFunction = strategy.newHashFunction(seed); 294 | this.funnel = funnel; 295 | this.b = b; 296 | } 297 | 298 | public boolean mightContain(T element) { 299 | long x = hashFunction.hashObject(element, funnel).asLong(); 300 | return b.contains(x); 301 | } 302 | 303 | @Override 304 | public boolean test(T t) { 305 | return mightContain(t); 306 | } 307 | 308 | public void writeTo(OutputStream out) throws IOException { 309 | try (DataOutputStream dos = new DataOutputStream(out)) { 310 | dos.writeByte(strategy.ordinal()); 311 | dos.writeInt(seed); 312 | dos.writeInt(b.capacity); 313 | b.writeTo(dos); 314 | } 315 | } 316 | 317 | public static XorFilter readFrom(InputStream in, Funnel funnel) throws IOException { 318 | try (DataInputStream dis = new DataInputStream(in)) { 319 | Strategy strategy = Strategy.values()[dis.readByte()]; 320 | int seed = dis.readInt(); 321 | int capacity = dis.readInt(); 322 | KBitValueArray b = strategy.newArray(capacity); 323 | b.readFrom(dis); 324 | return new XorFilter<>(strategy, seed, funnel, b); 325 | } 326 | } 327 | 328 | @Override 329 | public boolean equals(Object o) { 330 | if (this == o) return true; 331 | if (o == null || getClass() != o.getClass()) return false; 332 | XorFilter xorFilter = (XorFilter) o; 333 | return seed == xorFilter.seed && 334 | strategy == xorFilter.strategy && 335 | Objects.equals(hashFunction, xorFilter.hashFunction) && 336 | Objects.equals(funnel, xorFilter.funnel) && 337 | Objects.equals(b, xorFilter.b); 338 | } 339 | 340 | @Override 341 | public int hashCode() { 342 | return Objects.hash(strategy, seed, hashFunction, funnel, b); 343 | } 344 | 345 | @Override 346 | public String toString() { 347 | return "XorFilter{" + 348 | "strategy=" + strategy + 349 | ", seed=" + seed + 350 | ", hashFunction=" + hashFunction + 351 | ", funnel=" + funnel + 352 | ", b=" + b + 353 | '}'; 354 | } 355 | } 356 | 357 | class HashedElements { 358 | private final long[] hashedElements; 359 | private int actualSize; 360 | 361 | HashedElements(int size) { 362 | this.hashedElements = new long[size]; 363 | } 364 | 365 | void hashAll(ToLongFunction hashFunction, Collection elements) { 366 | int i0 = 0; 367 | for (T o : elements) { 368 | hashedElements[i0++] = hashFunction.applyAsLong(o); 369 | } 370 | 371 | Arrays.sort(hashedElements); 372 | 373 | int p = 0; 374 | for (int i1 = 1; i1 < hashedElements.length; i1++) { 375 | if (hashedElements[p] != hashedElements[i1]) { 376 | hashedElements[++p] = hashedElements[i1]; 377 | } 378 | } 379 | 380 | actualSize = p + 1; 381 | } 382 | 383 | int size() { 384 | return actualSize; 385 | } 386 | 387 | long get(int index) { 388 | return hashedElements[index]; 389 | } 390 | } 391 | 392 | class HashedElementSets { 393 | private final long[] xorValues; 394 | private final byte[] counts; 395 | 396 | HashedElementSets(int capacity) { 397 | this.xorValues = new long[capacity]; 398 | this.counts = new byte[capacity]; 399 | } 400 | 401 | void clear() { 402 | Arrays.fill(xorValues, 0); 403 | Arrays.fill(counts, (byte) 0); 404 | } 405 | 406 | void append(int index, long x) { 407 | xorValues[index] ^= x; 408 | counts[index]++; 409 | } 410 | 411 | void remove(int index, long x, IntConsumer consumer) { 412 | xorValues[index] ^= x; 413 | counts[index]--; 414 | if (counts[index] == 1) { 415 | consumer.accept(index); 416 | } 417 | } 418 | 419 | boolean containsOnlyOneValue(int index) { 420 | return counts[index] == 1; 421 | } 422 | 423 | long getSoleValue(int index) { 424 | return xorValues[index]; 425 | } 426 | } 427 | 428 | class IntQueue { 429 | private final int[] queue; 430 | private int putIndex; 431 | private int takeIndex; 432 | 433 | IntQueue(int size) { 434 | queue = new int[size + 1]; 435 | } 436 | 437 | boolean isNotEmpty() { 438 | return putIndex != takeIndex; 439 | } 440 | 441 | void clear() { 442 | putIndex = takeIndex = 0; 443 | } 444 | 445 | void enqueue(int value) { 446 | queue[putIndex++] = value; 447 | if (putIndex == queue.length) { 448 | putIndex = 0; 449 | } 450 | } 451 | 452 | int dequeue() { 453 | int result = queue[takeIndex++]; 454 | if (takeIndex == queue.length) { 455 | takeIndex = 0; 456 | } 457 | return result; 458 | } 459 | } 460 | 461 | class PairStack { 462 | private final int[] indexes; 463 | private final long[] hashedElements; 464 | private int stackIndex; 465 | 466 | PairStack(int size) { 467 | this.indexes = new int[size]; 468 | this.hashedElements = new long[size]; 469 | } 470 | 471 | int size() { 472 | return stackIndex; 473 | } 474 | 475 | boolean isNotEmpty() { 476 | return stackIndex > 0; 477 | } 478 | 479 | void clear() { 480 | stackIndex = 0; 481 | } 482 | 483 | void push(int index, long x) { 484 | indexes[stackIndex] = index; 485 | hashedElements[stackIndex] = x; 486 | stackIndex++; 487 | } 488 | 489 | int peekIndex() { 490 | return indexes[stackIndex - 1]; 491 | } 492 | 493 | long popHashedElements() { 494 | return hashedElements[--stackIndex]; 495 | } 496 | } 497 | 498 | class Mapping { 499 | final int seed; 500 | final PairStack stack; 501 | 502 | Mapping(int seed, PairStack stack) { 503 | this.seed = seed; 504 | this.stack = stack; 505 | } 506 | } 507 | -------------------------------------------------------------------------------- /src/test/java/me/k11i/xorfilter/XorFilterTest.java: -------------------------------------------------------------------------------- 1 | package me.k11i.xorfilter; 2 | 3 | import com.google.common.hash.Funnel; 4 | import com.google.common.hash.Funnels; 5 | import org.junit.jupiter.params.ParameterizedTest; 6 | import org.junit.jupiter.params.provider.MethodSource; 7 | 8 | import java.io.ByteArrayInputStream; 9 | import java.io.ByteArrayOutputStream; 10 | import java.io.IOException; 11 | import java.nio.charset.StandardCharsets; 12 | import java.util.List; 13 | import java.util.stream.Collectors; 14 | import java.util.stream.IntStream; 15 | import java.util.stream.Stream; 16 | 17 | import static org.junit.jupiter.api.Assertions.*; 18 | 19 | @SuppressWarnings("UnstableApiUsage") 20 | class XorFilterTest { 21 | private static final Funnel FUNNEL = Funnels.stringFunnel(StandardCharsets.ISO_8859_1); 22 | 23 | static Stream strategies() { 24 | return Stream.of(XorFilter.Strategy.values()); 25 | } 26 | 27 | @ParameterizedTest 28 | @MethodSource("strategies") 29 | void testMightContainShouldReturnTrue(XorFilter.Strategy strategy) { 30 | final int numEntries = 10000; 31 | List elements = IntStream.range(0, numEntries) 32 | .mapToObj(String::valueOf) 33 | .collect(Collectors.toList()); 34 | 35 | XorFilter filter = XorFilter.build( 36 | FUNNEL, 37 | elements, 38 | strategy); 39 | 40 | for (int i = 0; i < elements.size(); i++) { 41 | String element = elements.get(i); 42 | assertTrue( 43 | filter.mightContain(element), 44 | String.format("[%d]: %s", i, element)); 45 | } 46 | } 47 | 48 | @ParameterizedTest 49 | @MethodSource("strategies") 50 | void testMightContainShouldNotReturnTrue(XorFilter.Strategy strategy) { 51 | final int numEntries = 10000; 52 | List elements = IntStream.range(0, numEntries) 53 | .mapToObj(String::valueOf) 54 | .collect(Collectors.toList()); 55 | 56 | XorFilter filter = XorFilter.build( 57 | FUNNEL, 58 | elements, 59 | strategy); 60 | 61 | final double expectedMaxFalsePositiveRate = 0.01; 62 | 63 | System.out.print("False positives:"); 64 | long falsePositiveCount = IntStream.range(numEntries, numEntries * 2) 65 | .mapToObj(String::valueOf) 66 | .filter(filter::mightContain) 67 | .peek(s -> System.out.printf(" %s", s)) 68 | .count(); 69 | 70 | double falsePositiveRate = falsePositiveCount / (double) numEntries; 71 | System.out.printf("%n# of false positives: %d%n", falsePositiveCount); 72 | System.out.printf("False positive rate: %f%n", falsePositiveRate); 73 | 74 | assertTrue(falsePositiveRate < expectedMaxFalsePositiveRate); 75 | } 76 | 77 | @ParameterizedTest 78 | @MethodSource("strategies") 79 | void testSerialization(XorFilter.Strategy strategy) throws IOException { 80 | final int numEntries = 10000; 81 | List elements = IntStream.range(0, numEntries) 82 | .mapToObj(String::valueOf) 83 | .collect(Collectors.toList()); 84 | 85 | XorFilter filter = XorFilter.build( 86 | FUNNEL, 87 | elements.stream().limit(numEntries / 2).collect(Collectors.toList()), 88 | strategy); 89 | 90 | XorFilter deserialized; 91 | try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { 92 | filter.writeTo(out); 93 | deserialized = XorFilter.readFrom(new ByteArrayInputStream(out.toByteArray()), FUNNEL); 94 | } 95 | 96 | assertEquals(filter, deserialized); 97 | 98 | for (int i = 0; i < elements.size(); i++) { 99 | String element = elements.get(i); 100 | assertEquals( 101 | filter.mightContain(element), 102 | deserialized.mightContain(element), 103 | String.format("[%d]: %s", i, element)); 104 | } 105 | } 106 | } 107 | --------------------------------------------------------------------------------