├── .github └── workflows │ ├── maven.yml │ └── release.yml ├── LICENSE ├── README.md ├── docs ├── code-of-conduct.md └── contributing.md ├── examples └── cuckoofilter │ ├── README.md │ ├── pom.xml │ └── src │ └── CuckooFilterExample.java ├── pom.xml ├── setfilters-tests ├── pom.xml └── test │ └── com │ └── google │ └── setfilters │ └── cuckoofilter │ ├── CuckooFilterArrayTest.java │ ├── CuckooFilterConfigTest.java │ ├── CuckooFilterHashFunctionsTest.java │ ├── CuckooFilterLargeTest.java │ ├── CuckooFilterStrategiesTest.java │ ├── CuckooFilterTableTest.java │ ├── CuckooFilterTest.java │ ├── SemiSortedCuckooFilterTableTest.java │ └── SerializedCuckooFilterTableTest.java └── setfilters ├── pom.xml └── src └── com └── google └── setfilters └── cuckoofilter ├── CuckooFilter.java ├── CuckooFilterArray.java ├── CuckooFilterConfig.java ├── CuckooFilterHashFunctions.java ├── CuckooFilterStrategies.java ├── CuckooFilterTable.java ├── SemiSortedCuckooFilterTable.java ├── SerializedCuckooFilterTable.java └── UncompressedCuckooFilterTable.java /.github/workflows/maven.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a Java project with Maven, and cache/restore any dependencies to improve the workflow execution time 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-java-with-maven 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Java CI with Maven 10 | 11 | on: 12 | push: 13 | branches: [ "master" ] 14 | pull_request: 15 | branches: [ "master" ] 16 | 17 | jobs: 18 | build: 19 | 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Set up JDK 17 25 | uses: actions/setup-java@v3 26 | with: 27 | java-version: '17' 28 | distribution: 'temurin' 29 | cache: maven 30 | - name: Build with Maven 31 | run: mvn -B package --file pom.xml 32 | 33 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: setfilters release action 2 | run-name: ${{ github.actor }} is publishing release ${{ github.ref_name }} 3 | on: 4 | release: 5 | types: [published] 6 | jobs: 7 | sha256: 8 | name: sha256 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: zip url 12 | run: echo "${{ github.server_url }}/${{ github.repository }}/archive/refs/tags/${{ github.ref_name }}.zip" 13 | - name: Create zip SHA256 14 | run: curl -sL "${{ github.server_url }}/${{ github.repository }}/archive/refs/tags/${{ github.ref_name }}.zip" | shasum -a 256 | cut -d " " -f 1 15 | - name: Tarball url 16 | run: echo "${{ github.server_url }}/${{ github.repository }}/archive/refs/tags/${{ github.ref_name }}.tar.gz" 17 | - name: Create tarball SHA256 18 | run: curl -sL "${{ github.server_url }}/${{ github.repository }}/archive/refs/tags/${{ github.ref_name }}.tar.gz" | shasum -a 256 | cut -d " " -f 1 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://github.com/google/setfilters/workflows/CI/badge.svg?branch=master)](https://github.com/google/setfilters/actions) 2 | 3 | # Setfilters Library 4 | 5 | This repository contains implementations of a collection of set filter data structures, also commonly referred to as approximate membership query data structures. We will use the pronoun "Setfilters" to refer to the library. 6 | 7 | ## Adding Setfilters library to your Java project 8 | 9 | ### Maven 10 | 11 | Setfilters' Maven group ID is `com.google.setfilters`, and its artifact id is `setfilters`. To add dependency using Maven, add the following lines to your project's `pom.xml`: 12 | 13 | ```xml 14 | 15 | com.google.setfilters 16 | setfilters 17 | 1.0.0 18 | 19 | ``` 20 | 21 | ## Supported Data Structures 22 | 23 | ### Cuckoo Filter 24 | Cuckoo filter is a space efficient, approximate membershp query data structure that supports insertions and deletions. False positives are allowed (e.g. a non-member element may incorrectly be labeled as a member), but false negatives are not. The code for the cuckoo filter is located in [setfilters/src/com/google/setfilters/cuckoofilter/](https://github.com/google/setfilters/tree/master/setfilters/src/com/google/setfilters/cuckoofilter) directory. For example code on how to use the library, please see [examples/cuckoofilter/](https://github.com/google/setfilters/tree/master/examples/cuckoofilter). 25 | 26 | ## Note 27 | 28 | This is not an officially supported Google product. 29 | -------------------------------------------------------------------------------- /docs/code-of-conduct.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of 9 | experience, education, socio-economic status, nationality, personal appearance, 10 | race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or reject 41 | comments, commits, code, wiki edits, issues, and other contributions that are 42 | not aligned to this Code of Conduct, or to ban temporarily or permanently any 43 | contributor for other behaviors that they deem inappropriate, threatening, 44 | offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | This Code of Conduct also applies outside the project spaces when the Project 56 | Steward has a reasonable belief that an individual's behavior may have a 57 | negative impact on the project or its community. 58 | 59 | ## Conflict Resolution 60 | 61 | We do not believe that all conflict is bad; healthy debate and disagreement 62 | often yield positive results. However, it is never okay to be disrespectful or 63 | to engage in behavior that violates the project’s code of conduct. 64 | 65 | If you see someone violating the code of conduct, you are encouraged to address 66 | the behavior directly with those involved. Many issues can be resolved quickly 67 | and easily, and this gives people more control over the outcome of their 68 | dispute. If you are unable to resolve the matter for any reason, or if the 69 | behavior is threatening or harassing, report it. We are dedicated to providing 70 | an environment where participants feel welcome and safe. 71 | 72 | Reports should be directed to *[PROJECT STEWARD NAME(s) AND EMAIL(s)]*, the 73 | Project Steward(s) for *[PROJECT NAME]*. It is the Project Steward’s duty to 74 | receive and address reported violations of the code of conduct. They will then 75 | work with a committee consisting of representatives from the Open Source 76 | Programs Office and the Google Open Source Strategy team. If for any reason you 77 | are uncomfortable reaching out to the Project Steward, please email 78 | opensource@google.com. 79 | 80 | We will investigate every complaint, but you may not receive a direct response. 81 | We will use our discretion in determining when and how to follow up on reported 82 | incidents, which may range from not taking action to permanent expulsion from 83 | the project and project-sponsored spaces. We will notify the accused of the 84 | report and provide them an opportunity to discuss it before any action is taken. 85 | The identity of the reporter will be omitted from the details of the report 86 | supplied to the accused. In potentially harmful situations, such as ongoing 87 | harassment or threats to anyone's safety, we may take action without notice. 88 | 89 | ## Attribution 90 | 91 | This Code of Conduct is adapted from the Contributor Covenant, version 1.4, 92 | available at 93 | https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 94 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code Reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google/conduct/). 29 | -------------------------------------------------------------------------------- /examples/cuckoofilter/README.md: -------------------------------------------------------------------------------- 1 | # Cuckoo Filter Example Code 2 | 3 | To run the code: 4 | 5 | ``` 6 | mvn package 7 | java -cp target/cuckoofilter-example-HEAD-jre-SNAPSHOT.jar com.google.setfilters.examples.cuckoofilter.CuckooFilterExample 8 | ``` 9 | -------------------------------------------------------------------------------- /examples/cuckoofilter/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.google.setfilters 7 | cuckoofilter-example 8 | HEAD-jre-SNAPSHOT 9 | https://github.com/google/setfilters 10 | 11 | Cuckoo Filter Example 12 | 13 | 14 | 15 | Apache License, Version 2.0 16 | http://www.apache.org/licenses/LICENSE-2.0.txt 17 | repo 18 | 19 | 20 | 21 | 22 | src 23 | test 24 | 25 | 26 | .. 27 | 28 | LICENSE 29 | 30 | META-INF 31 | 32 | 33 | 34 | 35 | org.apache.maven.plugins 36 | maven-shade-plugin 37 | 3.5.3 38 | 39 | 40 | package 41 | 42 | shade 43 | 44 | 45 | 46 | 47 | 48 | com.google.setfilters.examples.cuckoofilter.CuckooFilterExample 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | maven-compiler-plugin 58 | 3.8.1 59 | 60 | 1.8 61 | 1.8 62 | UTF-8 63 | true 64 | 65 | -sourcepath 66 | doesnotexist 67 | 68 | -XDcompilePolicy=simple 69 | 70 | 71 | true 72 | 73 | 74 | 75 | maven-jar-plugin 76 | 3.2.0 77 | 78 | 79 | org.apache.maven.plugins 80 | maven-source-plugin 81 | 2.2.1 82 | 83 | 84 | attach-sources 85 | 86 | jar-no-fork 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | com.google.setfilters 97 | setfilters 98 | 1.0.0 99 | 100 | 101 | com.google.guava 102 | guava 103 | 32.0.0-jre 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /examples/cuckoofilter/src/CuckooFilterExample.java: -------------------------------------------------------------------------------- 1 | package com.google.setfilters.examples.cuckoofilter; 2 | 3 | import com.google.common.hash.Funnels; 4 | import com.google.setfilters.cuckoofilter.CuckooFilter; 5 | import com.google.setfilters.cuckoofilter.CuckooFilterConfig; 6 | import com.google.setfilters.cuckoofilter.CuckooFilterConfig.Size; 7 | import com.google.setfilters.cuckoofilter.CuckooFilterHashFunctions; 8 | import com.google.setfilters.cuckoofilter.CuckooFilterStrategies; 9 | import com.google.setfilters.cuckoofilter.SerializedCuckooFilterTable; 10 | import java.util.HashSet; 11 | import java.util.List; 12 | import java.util.Random; 13 | 14 | public class CuckooFilterExample { 15 | 16 | /** 17 | * In this example code, we create a new cuckoo filter with 1,000,000 integers and configure the 18 | * target false positive probability as 0.01. 19 | */ 20 | public static void simpleExample() { 21 | // Create a new cuckoo filter with 1,000,000 elements. 22 | int numElements = 1000000; 23 | CuckooFilterConfig config = CuckooFilterConfig.newBuilder() 24 | .setSize(Size.computeEfficientSize(0.01, numElements)) 25 | .setHashFunction(CuckooFilterHashFunctions.MURMUR3_128) 26 | .setStrategy(CuckooFilterStrategies.SIMPLE_MOD) 27 | .build(); 28 | CuckooFilter cuckooFilter = CuckooFilter.createNew(config, Funnels.integerFunnel()); 29 | 30 | // Insert 1,000,000 integers to the empty cuckoo filter. 31 | HashSet elements = new HashSet<>(); 32 | for (int i = 0; i < numElements; i++) { 33 | elements.add(i); 34 | } 35 | for (int element : elements) { 36 | if (!cuckooFilter.insert(element)) { 37 | // This should not print. 38 | System.out.println("Element " + element + " could not be inserted!"); 39 | } 40 | } 41 | 42 | // Verifies that all inserted elements are in the cuckoo filter, e.g. no false negatives. 43 | if (hasFalseNegative(cuckooFilter, elements)) { 44 | System.out.println("False negative in the cuckoo filter!"); 45 | } 46 | 47 | // Computes (approximate) false positive rate. The printed false positive rate should be 48 | // < 0.01, or approximately equal to it. 49 | System.out.println("Estimated false positive rate: " 50 | + computeFalsePositiveRate(cuckooFilter, elements, /* numRuns= */100000)); 51 | 52 | // Serialize the cuckoo filter. 53 | SerializedCuckooFilterTable table = cuckooFilter.serializeTable(); 54 | byte [] rawTableBytes = table.asByteArray(); 55 | System.out.println("Serialized cuckoo filter size in bytes: " + rawTableBytes.length); 56 | 57 | // Deserialize the serialized cuckoo filter. 58 | SerializedCuckooFilterTable table2 = 59 | SerializedCuckooFilterTable.createFromByteArray(rawTableBytes); 60 | // Note that the hash function, strategy, and funnel objects are NOT part of the serialization. 61 | // The same hash function, strategy, and funnel that were used to create the original cuckoo 62 | // filter object must be supplied. 63 | CuckooFilter cuckooFilter2 = 64 | CuckooFilter.createFromSerializedTable(table2, config.hashFunction(), config.strategy(), 65 | Funnels.integerFunnel()); 66 | 67 | // Verify correctness of the deserialized filter. 68 | // Verifies that all inserted elements are in the cuckoo filter, e.g. no false negatives. 69 | if (hasFalseNegative(cuckooFilter2, elements)) { 70 | System.out.println("False negative in the cuckoo filter!"); 71 | } 72 | 73 | // Computes (approximate) false positive rate. The printed false positive rate should be 74 | // < 0.01, or approximately equal to it. 75 | System.out.println("Estimated false positive rate of deserialized cuckoo filter: " 76 | + computeFalsePositiveRate(cuckooFilter2, elements, /* numRuns= */100000)); 77 | } 78 | 79 | // Returns whether the given cuckoo filter has false negatives, with original elements 80 | // as {@code elements}. 81 | private static boolean hasFalseNegative(CuckooFilter cuckooFilter, 82 | HashSet elements) { 83 | for (int element : elements) { 84 | if (!cuckooFilter.contains(element)) { 85 | return true; 86 | } 87 | } 88 | return false; 89 | } 90 | 91 | // Computes an estimated false positive rate of the given cuckoo filter by querying 92 | // random non-member elements {@code numRuns} times. 93 | private static double computeFalsePositiveRate(CuckooFilter cuckooFilter, 94 | HashSet elements, int numRuns) { 95 | Random random = new Random(); 96 | int falsePositiveCount = 0; 97 | for (int i = 0; i < numRuns; i++) { 98 | int randomElement; 99 | do { 100 | randomElement = random.nextInt(); 101 | } while (elements.contains(randomElement)); 102 | if (cuckooFilter.contains(randomElement)) { 103 | falsePositiveCount++; 104 | } 105 | } 106 | return (falsePositiveCount + 0.0) / numRuns; 107 | } 108 | 109 | public static void main (String[] args) { 110 | simpleExample(); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.google.setfilters 7 | setfilters-parent 8 | HEAD-jre-SNAPSHOT 9 | pom 10 | https://github.com/google/setfilters 11 | 12 | Setfilters Main Parent 13 | 14 | 15 | 2.26.1 16 | 32.0.0-jre 17 | 1.1 18 | 9+181-r4173-1 19 | 20 | 21 | 22 | 23 | Apache License, Version 2.0 24 | http://www.apache.org/licenses/LICENSE-2.0.txt 25 | repo 26 | 27 | 28 | 29 | 30 | setfilters 31 | setfilters-tests 32 | 33 | 34 | 35 | 36 | ossrh 37 | https://s01.oss.sonatype.org/content/repositories/snapshots 38 | 39 | 40 | ossrh 41 | https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ 42 | 43 | 44 | 45 | 46 | src 47 | test 48 | 49 | 50 | .. 51 | 52 | LICENSE 53 | 54 | META-INF 55 | 56 | 57 | 58 | 59 | 60 | maven-compiler-plugin 61 | 3.8.1 62 | 63 | 1.8 64 | 1.8 65 | UTF-8 66 | true 67 | 68 | -sourcepath 69 | doesnotexist 70 | 71 | -XDcompilePolicy=simple 72 | 73 | 74 | 75 | 76 | com.google.errorprone 77 | error_prone_core 78 | 2.23.0 79 | 80 | 81 | true 82 | 83 | 84 | 85 | maven-jar-plugin 86 | 3.2.0 87 | 88 | 89 | org.apache.maven.plugins 90 | maven-source-plugin 91 | 2.2.1 92 | 93 | 94 | attach-sources 95 | 96 | jar-no-fork 97 | 98 | 99 | 100 | 101 | 102 | org.apache.maven.plugins 103 | maven-javadoc-plugin 104 | 3.5.0 105 | 106 | 107 | attach-javadocs 108 | 109 | jar 110 | 111 | 112 | 113 | 114 | ${java.home}/bin/javadoc 115 | 116 | 117 | 118 | maven-dependency-plugin 119 | 3.1.1 120 | 121 | 122 | org.apache.maven.plugins 123 | maven-gpg-plugin 124 | 1.5 125 | 126 | 127 | sign-artifacts 128 | verify 129 | 130 | sign 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | org.sonatype.plugins 140 | nexus-staging-maven-plugin 141 | 1.6.7 142 | true 143 | 144 | ossrh 145 | https://s01.oss.sonatype.org/ 146 | true 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | com.google.errorprone 156 | error_prone_annotations 157 | ${errorprone.version} 158 | 159 | 160 | com.google.guava 161 | guava 162 | ${guava.version} 163 | 164 | 165 | 166 | 167 | -------------------------------------------------------------------------------- /setfilters-tests/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.google.setfilters 7 | setfilters-parent 8 | HEAD-jre-SNAPSHOT 9 | 10 | 11 | setfilters-tests 12 | Setfilters Unit Tests 13 | 14 | 15 | 16 | ${project.groupId} 17 | setfilters 18 | ${project.version} 19 | 20 | 21 | com.google.guava 22 | guava 23 | 24 | 25 | junit 26 | junit 27 | 4.13.2 28 | test 29 | 30 | 31 | org.mockito 32 | mockito-core 33 | 4.11.0 34 | test 35 | 36 | 37 | com.google.truth 38 | truth 39 | ${truth.version} 40 | test 41 | 42 | 43 | com.google.truth.extensions 44 | truth-java8-extension 45 | ${truth.version} 46 | test 47 | 48 | 49 | 50 | 51 | 52 | 53 | .. 54 | 55 | LICENSE 56 | proguard/* 57 | 58 | META-INF 59 | 60 | 61 | 62 | 63 | maven-compiler-plugin 64 | 65 | 66 | maven-source-plugin 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /setfilters-tests/test/com/google/setfilters/cuckoofilter/CuckooFilterArrayTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | import static org.junit.Assert.assertThrows; 19 | 20 | import java.util.Random; 21 | import org.junit.Test; 22 | import org.junit.runner.RunWith; 23 | import org.junit.runners.JUnit4; 24 | 25 | @RunWith(JUnit4.class) 26 | public final class CuckooFilterArrayTest { 27 | 28 | @Test 29 | public void createsNewArray_invalidLength() { 30 | String message = 31 | assertThrows(IllegalArgumentException.class, () -> new CuckooFilterArray(0, 20)) 32 | .getMessage(); 33 | assertThat(message) 34 | .isEqualTo( 35 | String.format( 36 | "length must be in range (0, %s).", (long) Integer.MAX_VALUE * Long.SIZE)); 37 | } 38 | 39 | @Test 40 | public void createsNewArray_invalidBitsPerElement() { 41 | String message = 42 | assertThrows(IllegalArgumentException.class, () -> new CuckooFilterArray(5, 0)) 43 | .getMessage(); 44 | assertThat(message).isEqualTo("bitsPerElement must be in range [1, 64]."); 45 | 46 | message = 47 | assertThrows(IllegalArgumentException.class, () -> new CuckooFilterArray(5, 65)) 48 | .getMessage(); 49 | assertThat(message).isEqualTo("bitsPerElement must be in range [1, 64]."); 50 | } 51 | 52 | @Test 53 | public void createsNewArray_tooLarge() { 54 | String message = 55 | assertThrows( 56 | IllegalArgumentException.class, 57 | () -> new CuckooFilterArray((long) Integer.MAX_VALUE * 63, 20)) 58 | .getMessage(); 59 | assertThat(message) 60 | .isEqualTo( 61 | String.format( 62 | "Too large: could not create CuckooFilterArray with length %s and bitsPerElement" 63 | + " 20.", 64 | (long) Integer.MAX_VALUE * 63)); 65 | } 66 | 67 | @Test 68 | public void createsExistingArray_invalidLength() { 69 | String message = 70 | assertThrows( 71 | IllegalArgumentException.class, () -> new CuckooFilterArray(0, 20, new byte[1])) 72 | .getMessage(); 73 | assertThat(message) 74 | .isEqualTo( 75 | String.format( 76 | "length must be in range (0, %s).", (long) Integer.MAX_VALUE * Long.SIZE)); 77 | } 78 | 79 | @Test 80 | public void createsExistingArray_invalidBitsPerElement() { 81 | String message = 82 | assertThrows(IllegalArgumentException.class, () -> new CuckooFilterArray(5, 0, new byte[1])) 83 | .getMessage(); 84 | assertThat(message).isEqualTo("bitsPerElement must be in range [1, 64]."); 85 | 86 | message = 87 | assertThrows( 88 | IllegalArgumentException.class, () -> new CuckooFilterArray(5, 65, new byte[1])) 89 | .getMessage(); 90 | assertThat(message).isEqualTo("bitsPerElement must be in range [1, 64]."); 91 | } 92 | 93 | @Test 94 | public void creatExistingArray() { 95 | CuckooFilterArray array = new CuckooFilterArray(100, 20); 96 | array.set(0, 1); 97 | array.set(1, 2); 98 | 99 | byte[] byteArray = array.toByteArray(); 100 | 101 | CuckooFilterArray existing = new CuckooFilterArray(100, 20, byteArray); 102 | 103 | assertThat(existing.getAsLong(0)).isEqualTo(1); 104 | assertThat(existing.getAsLong(1)).isEqualTo(2); 105 | for (int i = 2; i < existing.length(); i++) { 106 | assertThat(existing.getAsLong(i)).isEqualTo(0); 107 | } 108 | } 109 | 110 | @Test 111 | public void length() { 112 | CuckooFilterArray array = new CuckooFilterArray(100, 20); 113 | 114 | assertThat(array.length()).isEqualTo(100); 115 | } 116 | 117 | @Test 118 | public void bitsPerElement() { 119 | CuckooFilterArray array = new CuckooFilterArray(100, 20); 120 | 121 | assertThat(array.bitsPerElement()).isEqualTo(20); 122 | } 123 | 124 | @Test 125 | public void getAsLong_indexOutOfBounds() { 126 | CuckooFilterArray array = new CuckooFilterArray(100, 20); 127 | 128 | String message = 129 | assertThrows(IllegalArgumentException.class, () -> array.getAsLong(-1)).getMessage(); 130 | assertThat(message).isEqualTo("Index is out of bounds: -1."); 131 | 132 | message = assertThrows(IllegalArgumentException.class, () -> array.getAsLong(100)).getMessage(); 133 | assertThat(message).isEqualTo("Index is out of bounds: 100."); 134 | } 135 | 136 | @Test 137 | public void set_indexOutOfBounds() { 138 | CuckooFilterArray array = new CuckooFilterArray(100, 20); 139 | 140 | String message = 141 | assertThrows(IllegalArgumentException.class, () -> array.set(-1, 20)).getMessage(); 142 | assertThat(message).isEqualTo("Index is out of bounds: -1."); 143 | 144 | message = assertThrows(IllegalArgumentException.class, () -> array.set(100, 20)).getMessage(); 145 | assertThat(message).isEqualTo("Index is out of bounds: 100."); 146 | } 147 | 148 | @Test 149 | public void setAndGet() { 150 | for (int bitsPerElement = 1; bitsPerElement <= 64; bitsPerElement++) { 151 | CuckooFilterArray array = new CuckooFilterArray(100, bitsPerElement); 152 | 153 | for (int i = 0; i < array.length(); i++) { 154 | array.set(i, -1L - i); 155 | } 156 | 157 | for (int i = 0; i < array.length(); i++) { 158 | assertThat(array.getAsLong(i)).isEqualTo((-1L - i) & mask(bitsPerElement)); 159 | } 160 | } 161 | } 162 | 163 | @Test 164 | public void setAndGet2() { 165 | for (int bitsPerElement = 1; bitsPerElement <= 64; bitsPerElement++) { 166 | CuckooFilterArray array = new CuckooFilterArray(10000, bitsPerElement); 167 | 168 | Random rand = new Random(); 169 | long[] inserted = new long[(int) array.length()]; 170 | for (int i = 0; i < array.length(); i++) { 171 | long v = rand.nextLong() & mask(bitsPerElement); 172 | inserted[i] = v; 173 | array.set(i, v); 174 | } 175 | 176 | for (int i = 0; i < array.length(); i++) { 177 | long v = rand.nextLong() & mask(bitsPerElement); 178 | inserted[i] = v; 179 | array.set(i, v); 180 | } 181 | 182 | for (int i = 0; i < array.length(); i += 2) { 183 | inserted[i] = 0; 184 | array.set(i, 0); 185 | } 186 | 187 | for (int i = 0; i < array.length(); i++) { 188 | assertThat(array.getAsLong(i)).isEqualTo(inserted[i]); 189 | } 190 | } 191 | } 192 | 193 | private static long mask(int length) { 194 | if (length == 64) { 195 | return -1; 196 | } 197 | return (1L << length) - 1; 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /setfilters-tests/test/com/google/setfilters/cuckoofilter/CuckooFilterConfigTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | import static org.junit.Assert.assertThrows; 19 | 20 | import com.google.common.hash.Funnel; 21 | import com.google.common.hash.Funnels; 22 | import com.google.common.hash.HashCode; 23 | import com.google.common.hash.Hashing; 24 | import org.junit.Test; 25 | import org.junit.runner.RunWith; 26 | import org.junit.runners.JUnit4; 27 | 28 | @RunWith(JUnit4.class) 29 | public final class CuckooFilterConfigTest { 30 | 31 | public static final class TestHashFunction implements CuckooFilterConfig.HashFunction { 32 | @Override 33 | public HashCode hash(T element, Funnel funnel) { 34 | return Hashing.murmur3_128().hashObject(element, funnel); 35 | } 36 | } 37 | 38 | public static final class TestStrategy implements CuckooFilterConfig.Strategy { 39 | @Override 40 | public long computeFingerprint(HashCode hash, int fingerprintLength) { 41 | return 20; 42 | } 43 | 44 | @Override 45 | public int computeBucketIndex(HashCode hash, int bucketCount) { 46 | return 0; 47 | } 48 | 49 | @Override 50 | public int computeOtherBucketIndex( 51 | long fingerprint, 52 | int bucketIndex, 53 | int bucketCount, 54 | CuckooFilterConfig.HashFunction hashFunction) { 55 | return 1; 56 | } 57 | } 58 | 59 | @Test 60 | public void build_buildsCuckooFilterConfig() { 61 | CuckooFilterConfig config = 62 | CuckooFilterConfig.newBuilder() 63 | .setSize( 64 | CuckooFilterConfig.Size.newBuilder() 65 | .setBucketCount(100) 66 | .setBucketCapacity(4) 67 | .setFingerprintLength(16) 68 | .build()) 69 | .setHashFunction(new TestHashFunction()) 70 | .setStrategy(new TestStrategy()) 71 | .setUseSpaceOptimization(true) 72 | .build(); 73 | 74 | CuckooFilterConfig.Size size = config.size(); 75 | assertThat(size.bucketCount()).isEqualTo(100); 76 | assertThat(size.bucketCapacity()).isEqualTo(4); 77 | assertThat(size.fingerprintLength()).isEqualTo(16); 78 | 79 | Funnel funnel = Funnels.longFunnel(); 80 | CuckooFilterConfig.HashFunction hashFunction = config.hashFunction(); 81 | assertThat(hashFunction.hash(100L, funnel)) 82 | .isEqualTo(Hashing.murmur3_128().hashObject(100L, funnel)); 83 | 84 | CuckooFilterConfig.Strategy strategy = config.strategy(); 85 | HashCode randomHash = HashCode.fromLong(100L); 86 | assertThat(strategy.computeFingerprint(randomHash, 16)).isEqualTo(20); 87 | assertThat(strategy.computeBucketIndex(randomHash, 100)).isEqualTo(0); 88 | assertThat(strategy.computeOtherBucketIndex(0, 5, 100, config.hashFunction())).isEqualTo(1); 89 | assertThat(strategy.maxReplacementCount()).isEqualTo(500); 90 | 91 | assertThat(config.useSpaceOptimization()).isTrue(); 92 | } 93 | 94 | @Test 95 | public void build_failsWithUnsetSize() { 96 | String message = 97 | assertThrows(IllegalArgumentException.class, () -> CuckooFilterConfig.newBuilder().build()) 98 | .getMessage(); 99 | 100 | assertThat(message).isEqualTo("Size must be set."); 101 | } 102 | 103 | @Test 104 | public void build_failsWithUnsetHashFunction() { 105 | String message = 106 | assertThrows( 107 | IllegalArgumentException.class, 108 | () -> 109 | CuckooFilterConfig.newBuilder() 110 | .setSize( 111 | CuckooFilterConfig.Size.newBuilder() 112 | .setBucketCount(100) 113 | .setBucketCapacity(4) 114 | .setFingerprintLength(16) 115 | .build()) 116 | .build()) 117 | .getMessage(); 118 | 119 | assertThat(message).isEqualTo("Hash function must be set."); 120 | } 121 | 122 | @Test 123 | public void build_failsWithUnsetStrategy() { 124 | String message = 125 | assertThrows( 126 | IllegalArgumentException.class, 127 | () -> 128 | CuckooFilterConfig.newBuilder() 129 | .setSize( 130 | CuckooFilterConfig.Size.newBuilder() 131 | .setBucketCount(100) 132 | .setBucketCapacity(4) 133 | .setFingerprintLength(16) 134 | .build()) 135 | .setHashFunction(new TestHashFunction()) 136 | .build()) 137 | .getMessage(); 138 | 139 | assertThat(message).isEqualTo("Strategy must be set."); 140 | } 141 | 142 | @Test 143 | public void buildSize_failsWithInvalidBucketCount() { 144 | String message = 145 | assertThrows( 146 | IllegalArgumentException.class, 147 | () -> CuckooFilterConfig.Size.newBuilder().setBucketCount(0).build()) 148 | .getMessage(); 149 | 150 | assertThat(message).isEqualTo("bucketCount must be > 0: 0 given instead."); 151 | } 152 | 153 | @Test 154 | public void buildSize_failsWithInvalidBucketCapacity() { 155 | String messageLower = 156 | assertThrows( 157 | IllegalArgumentException.class, 158 | () -> 159 | CuckooFilterConfig.Size.newBuilder() 160 | .setBucketCount(1) 161 | .setBucketCapacity(0) 162 | .build()) 163 | .getMessage(); 164 | 165 | assertThat(messageLower) 166 | .isEqualTo("bucketCapacity must be in range (0, 128]: 0 given instead."); 167 | 168 | String messageHigher = 169 | assertThrows( 170 | IllegalArgumentException.class, 171 | () -> 172 | CuckooFilterConfig.Size.newBuilder() 173 | .setBucketCount(1) 174 | .setBucketCapacity(129) 175 | .build()) 176 | .getMessage(); 177 | 178 | assertThat(messageHigher) 179 | .isEqualTo("bucketCapacity must be in range (0, 128]: 129 given instead."); 180 | } 181 | 182 | @Test 183 | public void buildSize_failsWithInvalidFingerprintLength() { 184 | String messageLower = 185 | assertThrows( 186 | IllegalArgumentException.class, 187 | () -> 188 | CuckooFilterConfig.Size.newBuilder() 189 | .setBucketCount(1) 190 | .setBucketCapacity(1) 191 | .setFingerprintLength(0) 192 | .build()) 193 | .getMessage(); 194 | 195 | assertThat(messageLower) 196 | .isEqualTo("fingerprintLength must be in range (0, 64]: 0 given instead."); 197 | 198 | String messageHigher = 199 | assertThrows( 200 | IllegalArgumentException.class, 201 | () -> 202 | CuckooFilterConfig.Size.newBuilder() 203 | .setBucketCount(1) 204 | .setBucketCapacity(1) 205 | .setFingerprintLength(65) 206 | .build()) 207 | .getMessage(); 208 | 209 | assertThat(messageHigher) 210 | .isEqualTo("fingerprintLength must be in range (0, 64]: 65 given instead."); 211 | } 212 | 213 | @Test 214 | public void computeEfficientSize_failsWithInvalidFalsePositiveRate() { 215 | String messageLower = 216 | assertThrows( 217 | IllegalArgumentException.class, 218 | () -> CuckooFilterConfig.Size.computeEfficientSize(0, 5)) 219 | .getMessage(); 220 | 221 | assertThat(messageLower) 222 | .isEqualTo("targetFalsePositiveRate must be in range (0, 1): 0.0 given."); 223 | 224 | String messageHigher = 225 | assertThrows( 226 | IllegalArgumentException.class, 227 | () -> CuckooFilterConfig.Size.computeEfficientSize(1, 5)) 228 | .getMessage(); 229 | 230 | assertThat(messageHigher) 231 | .isEqualTo("targetFalsePositiveRate must be in range (0, 1): 1.0 given."); 232 | } 233 | 234 | @Test 235 | public void computeEfficientSize_failsWithInvalidElementsCountUpperBound() { 236 | String message = 237 | assertThrows( 238 | IllegalArgumentException.class, 239 | () -> CuckooFilterConfig.Size.computeEfficientSize(0.5, 0)) 240 | .getMessage(); 241 | 242 | assertThat(message).isEqualTo("elementsCountUpperBound must be > 0: 0 given."); 243 | } 244 | 245 | @Test 246 | public void computeEfficientSize_failsIfElementsCountUpperBoundTooBig() { 247 | String message = 248 | assertThrows( 249 | IllegalArgumentException.class, 250 | () -> CuckooFilterConfig.Size.computeEfficientSize(0.5, 5000L * Integer.MAX_VALUE)) 251 | .getMessage(); 252 | 253 | assertThat(message) 254 | .isEqualTo( 255 | "Could not compute suitable cuckoo filter size based on the given input. Either the" 256 | + " target false positive rate is too low, or the computed size is too big."); 257 | } 258 | 259 | @Test 260 | public void computeEfficientSize_failsIfFalsePositiveRateTooLow() { 261 | String message = 262 | assertThrows( 263 | IllegalArgumentException.class, 264 | () -> CuckooFilterConfig.Size.computeEfficientSize(Double.MIN_NORMAL, 100)) 265 | .getMessage(); 266 | 267 | assertThat(message) 268 | .isEqualTo( 269 | "Could not compute suitable cuckoo filter size based on the given input. Either the" 270 | + " target false positive rate is too low, or the computed size is too big."); 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /setfilters-tests/test/com/google/setfilters/cuckoofilter/CuckooFilterHashFunctionsTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | 19 | import com.google.common.hash.Funnels; 20 | import com.google.common.hash.Hashing; 21 | import org.junit.Test; 22 | import org.junit.runner.RunWith; 23 | import org.junit.runners.JUnit4; 24 | 25 | @RunWith(JUnit4.class) 26 | public final class CuckooFilterHashFunctionsTest { 27 | 28 | @Test 29 | public void murmur3_128() { 30 | assertThat(CuckooFilterHashFunctions.MURMUR3_128.hash(100L, Funnels.longFunnel())) 31 | .isEqualTo(Hashing.murmur3_128().hashObject(100L, Funnels.longFunnel())); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /setfilters-tests/test/com/google/setfilters/cuckoofilter/CuckooFilterLargeTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | 19 | import com.google.common.hash.Funnel; 20 | import com.google.common.hash.Funnels; 21 | import com.google.common.hash.HashCode; 22 | import com.google.common.hash.Hashing; 23 | import java.util.Arrays; 24 | import java.util.List; 25 | import java.util.Random; 26 | import org.junit.Test; 27 | import org.junit.runner.RunWith; 28 | import org.junit.runners.Parameterized; 29 | import org.junit.runners.Parameterized.Parameter; 30 | import org.junit.runners.Parameterized.Parameters; 31 | 32 | @RunWith(Parameterized.class) 33 | public final class CuckooFilterLargeTest { 34 | 35 | private static class GoodFastHashFunction implements CuckooFilterConfig.HashFunction { 36 | 37 | @Override 38 | public HashCode hash(T element, Funnel funnel) { 39 | return Hashing.goodFastHash(128).hashObject(element, funnel); 40 | } 41 | } 42 | 43 | @Parameters 44 | public static List data() { 45 | return Arrays.asList(new Object[][]{{new GoodFastHashFunction(), false}, 46 | {CuckooFilterHashFunctions.MURMUR3_128, true}}); 47 | } 48 | 49 | @Parameter(0) 50 | public CuckooFilterConfig.HashFunction hashFunction; 51 | 52 | @Parameter(1) 53 | public boolean useSpaceOptimization; 54 | 55 | @Test 56 | public void serializeAndDeserialize() { 57 | final int insertedElementsCount = 100000000; 58 | final double targetFalsePositiveRate = 0.001; 59 | 60 | CuckooFilterConfig config = 61 | CuckooFilterConfig.newBuilder() 62 | .setSize(CuckooFilterConfig.Size.computeEfficientSize( 63 | targetFalsePositiveRate, insertedElementsCount)) 64 | .setHashFunction(hashFunction) 65 | .setStrategy(CuckooFilterStrategies.SIMPLE_MOD) 66 | .setUseSpaceOptimization(useSpaceOptimization) 67 | .build(); 68 | 69 | CuckooFilter cuckooFilter = CuckooFilter.createNew(config, Funnels.longFunnel()); 70 | 71 | for (int i = 0; i < insertedElementsCount; i++) { 72 | assertThat(cuckooFilter.insert((long)i)).isTrue(); 73 | } 74 | 75 | SerializedCuckooFilterTable serializedTable = cuckooFilter.serializeTable(); 76 | 77 | CuckooFilter anotherCuckooFilter = 78 | CuckooFilter.createFromSerializedTable( 79 | serializedTable, config.hashFunction(), config.strategy(), Funnels.longFunnel()); 80 | 81 | for (int i = 0; i < insertedElementsCount; i++) { 82 | assertThat(anotherCuckooFilter.contains((long)i)).isTrue(); 83 | } 84 | assertThat(anotherCuckooFilter.contains((long)insertedElementsCount)).isFalse(); 85 | } 86 | 87 | @Test 88 | public void loadIsHigh() { 89 | Random random = new Random(); 90 | 91 | final int[] bucketCounts = {1000, 10000, 100000, 1000000}; 92 | final int[] bucketCapacities = {4, 5, 6, 7, 8}; 93 | final int fingerprintLength = 16; 94 | 95 | for (int bucketCount : bucketCounts) { 96 | for (int bucketCapacity : bucketCapacities) { 97 | CuckooFilter cuckooFilter = 98 | CuckooFilter.createNew( 99 | CuckooFilterConfig.newBuilder() 100 | .setSize( 101 | CuckooFilterConfig.Size.newBuilder() 102 | .setBucketCount(bucketCount) 103 | .setBucketCapacity(bucketCapacity) 104 | .setFingerprintLength(fingerprintLength) 105 | .build()) 106 | .setHashFunction(hashFunction) 107 | .setStrategy(CuckooFilterStrategies.SIMPLE_MOD) 108 | .setUseSpaceOptimization(useSpaceOptimization) 109 | .build(), 110 | Funnels.longFunnel()); 111 | 112 | long element = 0; 113 | do { 114 | element = Math.abs(random.nextLong()); 115 | } while (cuckooFilter.insert(element)); 116 | 117 | assertThat(cuckooFilter.load()).isAtLeast(0.95); 118 | } 119 | } 120 | } 121 | 122 | @Test 123 | public void computeEfficientSize_achievesTargetFalsePositiveRateAndCapacity() { 124 | Random random = new Random(); 125 | 126 | final double[] targetFalsePositiveRates = {0.05, 0.01, 0.001}; 127 | final long[] elementsCountUpperBounds = {1, 5, 10, 50, 100, 500, 1000, 5000, 10000}; 128 | 129 | for (double targetFalsePositiveRate : targetFalsePositiveRates) { 130 | for (long elementsCountUpperBound : elementsCountUpperBounds) { 131 | CuckooFilter cuckooFilter = 132 | CuckooFilter.createNew( 133 | CuckooFilterConfig.newBuilder() 134 | .setSize( 135 | CuckooFilterConfig.Size.computeEfficientSize( 136 | targetFalsePositiveRate, elementsCountUpperBound)) 137 | .setHashFunction(hashFunction) 138 | .setStrategy(CuckooFilterStrategies.SIMPLE_MOD) 139 | .setUseSpaceOptimization(useSpaceOptimization) 140 | .build(), 141 | Funnels.longFunnel()); 142 | 143 | long element = 0; 144 | do { 145 | element = Math.abs(random.nextLong()); 146 | } while (cuckooFilter.insert(element)); 147 | 148 | assertThat(computeFalsePositiveRate(cuckooFilter, 2000000)) 149 | .isAtMost(targetFalsePositiveRate); 150 | 151 | if (elementsCountUpperBound < 10) { 152 | assertThat(cuckooFilter.count()).isAtLeast( 153 | (int) Math.ceil(0.5 * elementsCountUpperBound)); 154 | } else if (elementsCountUpperBound < 100) { 155 | assertThat(cuckooFilter.count()).isAtLeast( 156 | (int) Math.ceil(0.70 * elementsCountUpperBound)); 157 | } else if (elementsCountUpperBound == 100) { 158 | assertThat(cuckooFilter.count()).isAtLeast( 159 | (int) Math.ceil(0.95 * elementsCountUpperBound)); 160 | } else { 161 | assertThat(cuckooFilter.count()).isAtLeast(elementsCountUpperBound); 162 | } 163 | } 164 | } 165 | } 166 | 167 | @Test 168 | public void closeToTheoreticalFalsePositiveRate() { 169 | Random random = new Random(); 170 | 171 | final int bucketCount = 1000; 172 | final int[] bucketCapacities = {2, 3, 4, 5, 6, 7, 8}; 173 | for (int bucketCapacity : bucketCapacities) { 174 | // Due to time out issue, we only go up to 12 bits (otherwise we have to sample too many times 175 | // to get a reliable measurement). 176 | // TODO: Add a separate benchmark to test for longer fingerprint length. 177 | for (int fingerprintLength = 8; fingerprintLength <= 12; fingerprintLength++) { 178 | CuckooFilter cuckooFilter = 179 | CuckooFilter.createNew( 180 | CuckooFilterConfig.newBuilder() 181 | .setSize( 182 | CuckooFilterConfig.Size.newBuilder() 183 | .setBucketCount(bucketCount) 184 | .setBucketCapacity(bucketCapacity) 185 | .setFingerprintLength(fingerprintLength) 186 | .build()) 187 | .setHashFunction(hashFunction) 188 | .setStrategy(CuckooFilterStrategies.SIMPLE_MOD) 189 | .setUseSpaceOptimization(useSpaceOptimization) 190 | .build(), 191 | Funnels.longFunnel()); 192 | 193 | long element = 0; 194 | do { 195 | element = Math.abs(random.nextLong()); 196 | } while (cuckooFilter.insert(element)); 197 | 198 | // Let f = fingerprintLength. A random element not in the cuckoo filter has 1 / (2^f - 1) 199 | // probability of matching a random fingerprint, and the probability it matches at least one 200 | // of the x fingerprints is 1 - (1 - 1 / (2^f - 1))^x which is approximately x / (2^f - 1) 201 | // when x << 2^f - 1. 202 | // 203 | // If X is a random variable denoting number of fingerprints in a randomly chosen two 204 | // buckets, false positive probability is roughly E[X / (2^f - 1)] = E[X] / (2^f - 1). 205 | // Let a be the cuckoo filter's load and b be the bucketCapacity. Then E[X] = a * 2b. 206 | // Thus, theoretical false positive rate is ~ a * 2b / (2^f - 1). 207 | double load = cuckooFilter.load(); 208 | double theoreticalFalsePositiveRate = 209 | load * 2 * bucketCapacity / ((1 << fingerprintLength) - 1); 210 | 211 | double relativeDiff = 212 | Math.abs(computeFalsePositiveRate(cuckooFilter, 2000000) - theoreticalFalsePositiveRate) 213 | / theoreticalFalsePositiveRate; 214 | assertThat(relativeDiff).isAtMost(0.04); 215 | } 216 | } 217 | } 218 | 219 | private static double computeFalsePositiveRate( 220 | CuckooFilter cuckooFilter, int sampleCount) { 221 | int falsePositiveCount = 0; 222 | for (int i = 0; i < sampleCount; i++) { 223 | if (cuckooFilter.contains((long)(-i - 1))) { 224 | falsePositiveCount++; 225 | } 226 | } 227 | return (double) falsePositiveCount / sampleCount; 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /setfilters-tests/test/com/google/setfilters/cuckoofilter/CuckooFilterStrategiesTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | 19 | import com.google.common.hash.HashCode; 20 | import org.junit.Test; 21 | import org.junit.runner.RunWith; 22 | import org.junit.runners.JUnit4; 23 | 24 | @RunWith(JUnit4.class) 25 | public final class CuckooFilterStrategiesTest { 26 | 27 | private static final int FINGERPRINT_LENGTH = 16; 28 | private static final int MAX_FINGERPRINT_LENGTH = 64; 29 | private static final int BUCKET_COUNT = 100; 30 | 31 | @Test 32 | public void simpleModStrategy_computeFingerprint_zeroMapsToOne() { 33 | assertThat( 34 | CuckooFilterStrategies.SIMPLE_MOD.computeFingerprint( 35 | HashCode.fromLong(0L), FINGERPRINT_LENGTH)) 36 | .isEqualTo(1L); 37 | assertThat( 38 | CuckooFilterStrategies.SIMPLE_MOD.computeFingerprint( 39 | HashCode.fromLong(1L << (FINGERPRINT_LENGTH + 1)), FINGERPRINT_LENGTH)) 40 | .isEqualTo(1L); 41 | assertThat( 42 | CuckooFilterStrategies.SIMPLE_MOD.computeFingerprint( 43 | HashCode.fromLong(0L), MAX_FINGERPRINT_LENGTH)) 44 | .isEqualTo(1L); 45 | } 46 | 47 | @Test 48 | public void simpleModStrategy_computeFingerprint_mostSignificantBits() { 49 | assertThat( 50 | CuckooFilterStrategies.SIMPLE_MOD.computeFingerprint( 51 | HashCode.fromLong(-1L), FINGERPRINT_LENGTH)) 52 | .isEqualTo((1L << 16) - 1); 53 | assertThat( 54 | CuckooFilterStrategies.SIMPLE_MOD.computeFingerprint( 55 | HashCode.fromLong(-1L), MAX_FINGERPRINT_LENGTH)) 56 | .isEqualTo(-1L); 57 | } 58 | 59 | @Test 60 | public void simpleModStrategy_computeBucketIndex_smallerThanDivisorStaysUnchanged() { 61 | assertThat( 62 | CuckooFilterStrategies.SIMPLE_MOD.computeBucketIndex( 63 | HashCode.fromLong(0L), BUCKET_COUNT)) 64 | .isEqualTo(0); 65 | assertThat( 66 | CuckooFilterStrategies.SIMPLE_MOD.computeBucketIndex( 67 | HashCode.fromLong(99L), BUCKET_COUNT)) 68 | .isEqualTo(99); 69 | } 70 | 71 | @Test 72 | public void simpleModStrategy_computeBucketIndex_largerThanDivisorUsesRemainder() { 73 | assertThat( 74 | CuckooFilterStrategies.SIMPLE_MOD.computeBucketIndex( 75 | HashCode.fromLong(100), BUCKET_COUNT)) 76 | .isEqualTo(0); 77 | assertThat( 78 | CuckooFilterStrategies.SIMPLE_MOD.computeBucketIndex( 79 | HashCode.fromLong(199L), BUCKET_COUNT)) 80 | .isEqualTo(99); 81 | } 82 | 83 | @Test 84 | public void simpleModStrategy_computeOtherBucketIndex_involution() { 85 | for (long fingerprint = 1; fingerprint < 1000; fingerprint += 10) { 86 | for (int bucketIndex = 0; bucketIndex < BUCKET_COUNT; bucketIndex++) { 87 | int otherBucketIndex = 88 | CuckooFilterStrategies.SIMPLE_MOD.computeOtherBucketIndex( 89 | fingerprint, bucketIndex, BUCKET_COUNT, CuckooFilterHashFunctions.MURMUR3_128); 90 | 91 | assertThat(otherBucketIndex).isAtLeast(0); 92 | assertThat(otherBucketIndex).isLessThan(BUCKET_COUNT); 93 | assertThat( 94 | CuckooFilterStrategies.SIMPLE_MOD.computeOtherBucketIndex( 95 | fingerprint, 96 | otherBucketIndex, 97 | BUCKET_COUNT, 98 | CuckooFilterHashFunctions.MURMUR3_128)) 99 | .isEqualTo(bucketIndex); 100 | } 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /setfilters-tests/test/com/google/setfilters/cuckoofilter/CuckooFilterTableTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | import static com.google.common.truth.Truth8.assertThat; 19 | import static org.junit.Assert.assertThrows; 20 | import static org.mockito.Mockito.mock; 21 | import static org.mockito.Mockito.when; 22 | import static org.mockito.Mockito.withSettings; 23 | 24 | import java.util.Arrays; 25 | import java.util.List; 26 | import java.util.Optional; 27 | import java.util.Random; 28 | import org.junit.Before; 29 | import org.junit.Test; 30 | import org.junit.runner.RunWith; 31 | import org.junit.runners.Parameterized; 32 | import org.junit.runners.Parameterized.Parameter; 33 | import org.junit.runners.Parameterized.Parameters; 34 | 35 | @RunWith(Parameterized.class) 36 | public final class CuckooFilterTableTest { 37 | private static final int BUCKET_COUNT = 10000; 38 | private static final int BUCKET_CAPACITY = 4; 39 | private static final int FINGERPRINT_LENGTH = 16; 40 | 41 | private Random random; 42 | private CuckooFilterTable table; 43 | 44 | private interface CuckooFilterTableFactory { 45 | public CuckooFilterTable create(CuckooFilterConfig.Size size, Random random); 46 | 47 | public default CuckooFilterTable createExisting( 48 | SerializedCuckooFilterTable serializedTable, Random random) { 49 | return CuckooFilterTable.createFromSerialization(serializedTable, random); 50 | } 51 | } 52 | 53 | private static class SemiSortedCuckooFilterTableFactory implements CuckooFilterTableFactory { 54 | @Override 55 | public CuckooFilterTable create(CuckooFilterConfig.Size size, Random random) { 56 | return new SemiSortedCuckooFilterTable(size, random); 57 | } 58 | } 59 | 60 | private static class UncompressedCuckooFilterTableFactory implements CuckooFilterTableFactory { 61 | @Override 62 | public CuckooFilterTable create(CuckooFilterConfig.Size size, Random random) { 63 | return new UncompressedCuckooFilterTable(size, random); 64 | } 65 | } 66 | 67 | @Parameters 68 | public static List data() { 69 | return Arrays.asList( 70 | new SemiSortedCuckooFilterTableFactory(), new UncompressedCuckooFilterTableFactory()); 71 | } 72 | 73 | @Parameter public CuckooFilterTableFactory tableFactory; 74 | 75 | @Before 76 | public void setUp() { 77 | random = mock(Random.class, withSettings().withoutAnnotations()); 78 | table = 79 | tableFactory.create( 80 | CuckooFilterConfig.Size.newBuilder() 81 | .setBucketCount(BUCKET_COUNT) 82 | .setBucketCapacity(BUCKET_CAPACITY) 83 | .setFingerprintLength(FINGERPRINT_LENGTH) 84 | .build(), 85 | random); 86 | } 87 | 88 | @Test 89 | public void insertWithReplacement() { 90 | for (int i = 0; i < BUCKET_COUNT; i++) { 91 | long offset = (long) i * BUCKET_CAPACITY; 92 | for (int j = 0; j < BUCKET_CAPACITY; j++) { 93 | assertThat(table.insertWithReplacement(i, offset + j + 1)).isEmpty(); 94 | } 95 | when(random.nextInt(BUCKET_CAPACITY)).thenReturn(0); 96 | 97 | Optional replaced = table.insertWithReplacement(i, offset + BUCKET_CAPACITY + 1); 98 | 99 | boolean anyOf = false; 100 | for (int j = 0; j < BUCKET_CAPACITY; j++) { 101 | anyOf = anyOf || (replaced.get() == offset + j + 1); 102 | } 103 | assertThat(anyOf).isTrue(); 104 | assertThat(table.contains(i, replaced.get())).isFalse(); 105 | for (long fingerprint = offset + 1; 106 | fingerprint < offset + BUCKET_CAPACITY + 2; 107 | fingerprint++) { 108 | if (fingerprint != replaced.get()) { 109 | assertThat(table.contains(i, fingerprint)).isTrue(); 110 | } 111 | } 112 | } 113 | } 114 | 115 | @Test 116 | public void contains_containsFingerprint() { 117 | assertThat(table.insertWithReplacement(0, 1L)).isEmpty(); 118 | 119 | assertThat(table.contains(0, 1L)).isTrue(); 120 | } 121 | 122 | @Test 123 | public void contains_doesNotContainFingerprint() { 124 | assertThat(table.contains(0, 1L)).isFalse(); 125 | } 126 | 127 | @Test 128 | public void delete_deletesExistingFingerprint() { 129 | assertThat(table.insertWithReplacement(0, 1L)).isEmpty(); 130 | assertThat(table.contains(0, 1L)).isTrue(); 131 | 132 | assertThat(table.delete(0, 1L)).isTrue(); 133 | assertThat(table.contains(0, 1L)).isFalse(); 134 | } 135 | 136 | @Test 137 | public void delete_deletesOneFingerprintAtATime() { 138 | assertThat(table.insertWithReplacement(0, 1L)).isEmpty(); 139 | assertThat(table.insertWithReplacement(0, 1L)).isEmpty(); 140 | assertThat(table.contains(0, 1L)).isTrue(); 141 | 142 | assertThat(table.delete(0, 1L)).isTrue(); 143 | assertThat(table.contains(0, 1L)).isTrue(); 144 | assertThat(table.delete(0, 1L)).isTrue(); 145 | assertThat(table.contains(0, 1L)).isFalse(); 146 | } 147 | 148 | @Test 149 | public void delete_deletesNonExistingFingerprint() { 150 | assertThat(table.delete(0, 1L)).isFalse(); 151 | } 152 | 153 | @Test 154 | public void isFull() { 155 | for (int j = 0; j < BUCKET_CAPACITY; j++) { 156 | assertThat(table.isFull(0)).isFalse(); 157 | assertThat(table.insertWithReplacement(0, j + 1)).isEmpty(); 158 | } 159 | assertThat(table.isFull(0)).isTrue(); 160 | } 161 | 162 | @Test 163 | public void size() { 164 | CuckooFilterConfig.Size size = table.size(); 165 | 166 | assertThat(size.bucketCount()).isEqualTo(BUCKET_COUNT); 167 | assertThat(size.bucketCapacity()).isEqualTo(BUCKET_CAPACITY); 168 | assertThat(size.fingerprintLength()).isEqualTo(FINGERPRINT_LENGTH); 169 | } 170 | 171 | @Test 172 | public void serializeAndDeserialize() { 173 | for (int i = 0; i < BUCKET_CAPACITY; i++) { 174 | long offset = (long) i * BUCKET_CAPACITY; 175 | for (int j = 0; j < BUCKET_CAPACITY; j++) { 176 | assertThat(table.insertWithReplacement(i, offset + j + 1)).isEmpty(); 177 | } 178 | } 179 | 180 | SerializedCuckooFilterTable serializedTable = table.serialize(); 181 | CuckooFilterTable existingTable = tableFactory.createExisting(serializedTable, new Random()); 182 | 183 | for (int i = 0; i < BUCKET_CAPACITY; i++) { 184 | long offset = (long) i * BUCKET_CAPACITY; 185 | for (int j = 0; j < BUCKET_CAPACITY; j++) { 186 | assertThat(existingTable.contains(i, offset + j + 1)).isTrue(); 187 | } 188 | } 189 | } 190 | 191 | @Test 192 | public void deserialize_failsWithInvalidSerialization() { 193 | SerializedCuckooFilterTable serializedTable = 194 | SerializedCuckooFilterTable.createFromByteArray(new byte[12]); 195 | 196 | String message = 197 | assertThrows( 198 | IllegalArgumentException.class, 199 | () -> tableFactory.createExisting(serializedTable, new Random())) 200 | .getMessage(); 201 | assertThat(message).isEqualTo("Unable to parse the SerializedCuckooFilterTable."); 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /setfilters-tests/test/com/google/setfilters/cuckoofilter/CuckooFilterTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | 19 | import com.google.common.hash.Funnel; 20 | import com.google.common.hash.Funnels; 21 | import com.google.common.hash.HashCode; 22 | import com.google.common.hash.Hashing; 23 | import java.util.Arrays; 24 | import java.util.List; 25 | import org.junit.Before; 26 | import org.junit.Test; 27 | import org.junit.runner.RunWith; 28 | import org.junit.runners.Parameterized; 29 | import org.junit.runners.Parameterized.Parameter; 30 | import org.junit.runners.Parameterized.Parameters; 31 | 32 | @RunWith(Parameterized.class) 33 | public final class CuckooFilterTest { 34 | 35 | private static class Sha256HashFunction implements CuckooFilterConfig.HashFunction { 36 | @Override 37 | public HashCode hash(T element, Funnel funnel) { 38 | return Hashing.sha256().hashObject(element, funnel); 39 | } 40 | } 41 | 42 | @Parameters 43 | public static List data() { 44 | return Arrays.asList(new Object[][]{{new Sha256HashFunction(), true}, 45 | {CuckooFilterHashFunctions.MURMUR3_128, false}}); 46 | } 47 | 48 | @Parameter(0) 49 | public CuckooFilterConfig.HashFunction hashFunction; 50 | @Parameter(1) 51 | public boolean useSpaceOptimization; 52 | 53 | private CuckooFilterConfig config; 54 | private CuckooFilter cuckooFilter; 55 | 56 | @Before 57 | public void setUp() { 58 | config = 59 | CuckooFilterConfig.newBuilder() 60 | .setSize( 61 | CuckooFilterConfig.Size.newBuilder() 62 | .setBucketCount(100) 63 | .setBucketCapacity(4) 64 | .setFingerprintLength(16) 65 | .build()) 66 | .setHashFunction(hashFunction) 67 | .setStrategy(CuckooFilterStrategies.SIMPLE_MOD) 68 | .setUseSpaceOptimization(useSpaceOptimization) 69 | .build(); 70 | cuckooFilter = CuckooFilter.createNew(config, Funnels.integerFunnel()); 71 | } 72 | 73 | @Test 74 | public void insertAndContains() { 75 | final int insertedElementsCount = 380; 76 | 77 | for (int i = 0; i < insertedElementsCount; i++) { 78 | assertThat(cuckooFilter.insert(i)).isTrue(); 79 | } 80 | 81 | for (int i = 0; i < insertedElementsCount; i++) { 82 | assertThat(cuckooFilter.contains(i)).isTrue(); 83 | } 84 | 85 | final int testCountNonExistentElements = 300; 86 | 87 | for (int i = 0; i < testCountNonExistentElements; i++) { 88 | assertThat(cuckooFilter.contains(i + insertedElementsCount)).isFalse(); 89 | } 90 | } 91 | 92 | @Test 93 | public void insert_failsWhenFull_insertSameElements() { 94 | // Exhaust two buckets that element 0 can belong to. 95 | for (int i = 0; i < 2 * config.size().bucketCapacity(); i++) { 96 | assertThat(cuckooFilter.insert(0)).isTrue(); 97 | } 98 | 99 | assertThat(cuckooFilter.insert(0)).isFalse(); 100 | } 101 | 102 | @Test 103 | public void insert_insertFailureReversesTheReplacements() { 104 | int insertedCount = 0; 105 | while (true) { 106 | if (!cuckooFilter.insert(insertedCount)) { 107 | break; 108 | } 109 | insertedCount++; 110 | } 111 | 112 | for (int i = 0; i < insertedCount; i++) { 113 | assertThat(cuckooFilter.contains(i)).isTrue(); 114 | } 115 | assertThat(cuckooFilter.contains(insertedCount)).isFalse(); 116 | } 117 | 118 | @Test 119 | public void delete_deletesExistingElements() { 120 | final int insertedElementsCount = 150; 121 | 122 | for (int i = 0; i < insertedElementsCount; i++) { 123 | assertThat(cuckooFilter.insert(i)).isTrue(); 124 | assertThat(cuckooFilter.insert(i)).isTrue(); 125 | } 126 | 127 | for (int i = 0; i < insertedElementsCount; i++) { 128 | assertThat(cuckooFilter.delete(i)).isTrue(); 129 | assertThat(cuckooFilter.delete(i)).isTrue(); 130 | } 131 | } 132 | 133 | @Test 134 | public void delete_deletingNonExistingElementsFails() { 135 | final int insertedElementsCount = 150; 136 | 137 | for (int i = 0; i < insertedElementsCount; i++) { 138 | assertThat(cuckooFilter.delete(i)).isFalse(); 139 | } 140 | } 141 | 142 | @Test 143 | public void size() { 144 | assertThat(cuckooFilter.size()).isEqualTo(config.size()); 145 | } 146 | 147 | @Test 148 | public void count() { 149 | final int insertedElementsCount = 300; 150 | final int deletedElementCount = 150; 151 | 152 | for (int i = 0; i < insertedElementsCount; i++) { 153 | assertThat(cuckooFilter.insert(i)).isTrue(); 154 | } 155 | assertThat(cuckooFilter.count()).isEqualTo(insertedElementsCount); 156 | 157 | for (int i = 0; i < deletedElementCount; i++) { 158 | assertThat(cuckooFilter.delete(i)).isTrue(); 159 | } 160 | assertThat(cuckooFilter.count()).isEqualTo(insertedElementsCount - deletedElementCount); 161 | 162 | // Attempt to delete non existing elements. 163 | for (int i = 0; i < deletedElementCount; i++) { 164 | assertThat(cuckooFilter.delete(insertedElementsCount + i)).isFalse(); 165 | } 166 | assertThat(cuckooFilter.count()).isEqualTo(insertedElementsCount - deletedElementCount); 167 | } 168 | 169 | @Test 170 | public void serializeAndDeserialize() { 171 | final int insertedElementsCount = 300; 172 | 173 | for (int i = 0; i < insertedElementsCount; i++) { 174 | assertThat(cuckooFilter.insert(i)).isTrue(); 175 | } 176 | 177 | SerializedCuckooFilterTable serializedTable = cuckooFilter.serializeTable(); 178 | 179 | CuckooFilter anotherCuckooFilter = 180 | CuckooFilter.createFromSerializedTable( 181 | serializedTable, config.hashFunction(), config.strategy(), Funnels.integerFunnel()); 182 | 183 | for (int i = 0; i < insertedElementsCount; i++) { 184 | assertThat(anotherCuckooFilter.contains(i)).isTrue(); 185 | } 186 | assertThat(anotherCuckooFilter.contains(insertedElementsCount)).isFalse(); 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /setfilters-tests/test/com/google/setfilters/cuckoofilter/SemiSortedCuckooFilterTableTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | import static org.junit.Assert.assertThrows; 19 | 20 | import java.util.Random; 21 | import org.junit.Test; 22 | import org.junit.runner.RunWith; 23 | import org.junit.runners.JUnit4; 24 | 25 | @RunWith(JUnit4.class) 26 | public final class SemiSortedCuckooFilterTableTest { 27 | 28 | @Test 29 | public void creation_failsWithInvalidBucketCapacity() { 30 | String message = 31 | assertThrows( 32 | IllegalArgumentException.class, 33 | () -> 34 | new SemiSortedCuckooFilterTable( 35 | CuckooFilterConfig.Size.newBuilder() 36 | .setBucketCount(100) 37 | .setBucketCapacity(5) 38 | .setFingerprintLength(4) 39 | .build(), 40 | new Random())) 41 | .getMessage(); 42 | 43 | assertThat(message) 44 | .isEqualTo("SemiSortedCuckooFilterTable only supports bucket capacity of 4."); 45 | } 46 | 47 | @Test 48 | public void creation_failsWithInvalidFingerprintLength() { 49 | String message = 50 | assertThrows( 51 | IllegalArgumentException.class, 52 | () -> 53 | new SemiSortedCuckooFilterTable( 54 | CuckooFilterConfig.Size.newBuilder() 55 | .setBucketCount(100) 56 | .setBucketCapacity(4) 57 | .setFingerprintLength(3) 58 | .build(), 59 | new Random())) 60 | .getMessage(); 61 | 62 | assertThat(message) 63 | .isEqualTo("SemiSortedCuckooFilterTable only supports fingerprint length >= 4."); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /setfilters-tests/test/com/google/setfilters/cuckoofilter/SerializedCuckooFilterTableTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.truth.Truth.assertThat; 18 | 19 | import java.util.Arrays; 20 | import org.junit.Test; 21 | import org.junit.runner.RunWith; 22 | import org.junit.runners.JUnit4; 23 | 24 | @RunWith(JUnit4.class) 25 | public final class SerializedCuckooFilterTableTest { 26 | 27 | @Test 28 | public void construct_byteArrayCopied() { 29 | byte[] array = new byte[] {0, 1, 2, 3, 4}; 30 | byte[] copied = Arrays.copyOf(array, array.length); 31 | 32 | SerializedCuckooFilterTable serializedTable = 33 | SerializedCuckooFilterTable.createFromByteArray(array); 34 | array[0] = 2; 35 | 36 | byte[] asByteArray = serializedTable.asByteArray(); 37 | assertThat(asByteArray).isEqualTo(copied); 38 | } 39 | 40 | @Test 41 | public void asByteArray_byteArrayCopied() { 42 | byte[] array = new byte[] {0, 1, 2, 3, 4}; 43 | 44 | SerializedCuckooFilterTable serializedTable = 45 | SerializedCuckooFilterTable.createFromByteArray(array); 46 | 47 | byte[] asByteArray = serializedTable.asByteArray(); 48 | asByteArray[0] = 1; 49 | assertThat(serializedTable.asByteArray()).isEqualTo(array); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /setfilters/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.google.setfilters 7 | setfilters-parent 8 | HEAD-jre-SNAPSHOT 9 | 10 | 11 | setfilters 12 | Setfilters Main 13 | https://github.com/google/setfilters 14 | 15 | 16 | 17 | com.google.errorprone 18 | error_prone_annotations 19 | 20 | 21 | com.google.guava 22 | guava 23 | 24 | 25 | 26 | 27 | 28 | 29 | .. 30 | 31 | LICENSE 32 | proguard/* 33 | 34 | META-INF 35 | 36 | 37 | 38 | 39 | maven-compiler-plugin 40 | 41 | 42 | maven-source-plugin 43 | 44 | 45 | maven-javadoc-plugin 46 | 47 | 48 | maven-gpg-plugin 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /setfilters/src/com/google/setfilters/cuckoofilter/CuckooFilter.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import com.google.common.hash.Funnel; 18 | import com.google.common.hash.HashCode; 19 | import java.util.ArrayList; 20 | import java.util.List; 21 | import java.util.Optional; 22 | import java.util.Random; 23 | 24 | /** 25 | * A space efficient, probabilistic multiset data structure that supports membership check, 26 | * insertion, and deletion of the elements. 27 | * 28 | *

Cuckoo filter enables tradeoffs between its space efficiency and the false positive 29 | * probability of the membership check. 30 | * 31 | *

See the original paper https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf for more 32 | * details. 33 | * 34 | *

This class is not thread-safe. 35 | */ 36 | public final class CuckooFilter { 37 | private final CuckooFilterConfig config; 38 | private final CuckooFilterTable table; 39 | private final Funnel funnel; 40 | private final Random random; 41 | 42 | /** Counts the total number of elements in the cuckoo filter. */ 43 | private long count; 44 | 45 | /** Instantiates a new cuckoo filter. */ 46 | public static CuckooFilter createNew(CuckooFilterConfig config, Funnel funnel) { 47 | Random random = new Random(); 48 | CuckooFilterTable table = 49 | CuckooFilterTable.create(config.size(), config.useSpaceOptimization(), random); 50 | return new CuckooFilter(config, table, funnel, random); 51 | } 52 | 53 | /** 54 | * Instantiates a cuckoo filter from serialized cuckoo filter table. 55 | * 56 | *

Note that {@link SerializedCuckooFilterTable} does not contain any data on {@link 57 | * CuckooFilterConfig.HashFunction}, {@link CuckooFilterConfig.Strategy}, or {@link Funnel} used, 58 | * so it is up to the user to supply appropriate hash function, strategy, and funnel that were 59 | * used to generate the {@link SerializedCuckooFilterTable}. 60 | */ 61 | public static CuckooFilter createFromSerializedTable( 62 | SerializedCuckooFilterTable serializedTable, 63 | CuckooFilterConfig.HashFunction hashFunction, 64 | CuckooFilterConfig.Strategy strategy, 65 | Funnel funnel) { 66 | Random random = new Random(); 67 | CuckooFilterTable table = CuckooFilterTable.createFromSerialization(serializedTable, random); 68 | return new CuckooFilter( 69 | CuckooFilterConfig.newBuilder() 70 | .setSize(table.size()) 71 | .setHashFunction(hashFunction) 72 | .setStrategy(strategy) 73 | .build(), 74 | table, 75 | funnel, 76 | random); 77 | } 78 | 79 | private CuckooFilter( 80 | CuckooFilterConfig config, CuckooFilterTable table, Funnel funnel, Random random) { 81 | this.config = config; 82 | this.table = table; 83 | this.funnel = funnel; 84 | this.random = random; 85 | count = 0; 86 | } 87 | 88 | /** 89 | * Returns true if {@code element} is in the cuckoo filter. 90 | * 91 | *

By the probabilistic nature of the cuckoo filter data structure, this method may return a 92 | * false positive result. In other words, this method may incorrectly return true for an element 93 | * that was actually never inserted. This probability can depend on various factors, including the 94 | * size of the cuckoo filter and the hash function used. 95 | * 96 | *

However, it is guaranteed that this method never returns a false negative result, as long as 97 | * {@code delete} method is called on an element that exists in the filter. Please see {@code 98 | * delete} method for more details. 99 | */ 100 | public boolean contains(T element) { 101 | HashCode hash = config.hashFunction().hash(element, funnel); 102 | long fingerprint = 103 | config.strategy().computeFingerprint(hash, config.size().fingerprintLength()); 104 | int bucketIndex = config.strategy().computeBucketIndex(hash, config.size().bucketCount()); 105 | int otherBucketIndex = 106 | config 107 | .strategy() 108 | .computeOtherBucketIndex( 109 | fingerprint, bucketIndex, config.size().bucketCount(), config.hashFunction()); 110 | return table.contains(bucketIndex, fingerprint) 111 | || table.contains(otherBucketIndex, fingerprint); 112 | } 113 | 114 | /** 115 | * Inserts {@code element} to the cuckoo filter, returning true if the element was inserted 116 | * successfully. 117 | * 118 | *

Insertion of {@code element} will fail if there is no room for {@code element}. Note that 119 | * even when the insertion of {@code element} fails, it is possible for another element to be 120 | * inserted successfully. Even then, the insertion failure should be a good indicator that the 121 | * filter is getting close to its maximum capacity. 122 | */ 123 | public boolean insert(T element) { 124 | HashCode hash = config.hashFunction().hash(element, funnel); 125 | long fingerprint = 126 | config.strategy().computeFingerprint(hash, config.size().fingerprintLength()); 127 | int bucketIndex = config.strategy().computeBucketIndex(hash, config.size().bucketCount()); 128 | int otherBucketIndex = 129 | config 130 | .strategy() 131 | .computeOtherBucketIndex( 132 | fingerprint, bucketIndex, config.size().bucketCount(), config.hashFunction()); 133 | 134 | // First attempt to insert the fingerprint to one of the two assigned buckets. 135 | if (attemptInsertion(fingerprint, bucketIndex, otherBucketIndex)) { 136 | count++; 137 | return true; 138 | } 139 | 140 | // If both buckets are full, execute insertion with repeated replacements algorithm. 141 | int startBucketIndex = (random.nextInt(2) == 0) ? bucketIndex : otherBucketIndex; 142 | boolean inserted = insertWithRepeatedReplacements(fingerprint, startBucketIndex); 143 | if (inserted) { 144 | count++; 145 | } 146 | return inserted; 147 | } 148 | 149 | /** 150 | * Deletes {@code element} from the cuckoo filter, returning true if the element was deleted 151 | * successfully. 152 | * 153 | *

It is critical for {@code delete} to be called on an already existing element. Otherwise, 154 | * the filter may incorrectly delete a wrong element. When this happens, it is possible for {@code 155 | * contains} method to return a false negative result. 156 | */ 157 | public boolean delete(T element) { 158 | HashCode hash = config.hashFunction().hash(element, funnel); 159 | long fingerprint = 160 | config.strategy().computeFingerprint(hash, config.size().fingerprintLength()); 161 | int bucketIndex = config.strategy().computeBucketIndex(hash, config.size().bucketCount()); 162 | int otherBucketIndex = 163 | config 164 | .strategy() 165 | .computeOtherBucketIndex( 166 | fingerprint, bucketIndex, config.size().bucketCount(), config.hashFunction()); 167 | boolean deleted = 168 | table.delete(bucketIndex, fingerprint) || table.delete(otherBucketIndex, fingerprint); 169 | if (deleted) { 170 | count--; 171 | } 172 | return deleted; 173 | } 174 | 175 | /** Returns the size of the cuckoo filter. */ 176 | public CuckooFilterConfig.Size size() { 177 | return config.size(); 178 | } 179 | 180 | /** Returns the count of the elements in the cuckoo filter. */ 181 | public long count() { 182 | return count; 183 | } 184 | 185 | /** 186 | * Returns the ratio of the total number of elements in the cuckoo filter and the theoretical max 187 | * capacity. 188 | * 189 | *

The returned value is in range [0, 1]. 190 | */ 191 | public double load() { 192 | return count / ((double) config.size().bucketCount() * config.size().bucketCapacity()); 193 | } 194 | 195 | /** 196 | * Serializes the state of the cuckoo filter table. 197 | * 198 | *

Note that this method does not serialize hash function, strategy, and funnel. When 199 | * instantiating a cuckoo filter from the returned {@link SerializedCuckooFilterTable}, it is up 200 | * to the user to supply appropriate hash function, strategy, and funnel that were used. 201 | */ 202 | public SerializedCuckooFilterTable serializeTable() { 203 | return table.serialize(); 204 | } 205 | 206 | /** 207 | * Attempts to insert {@code fingerprint} to one of the buckets with indices {@code bucketIndex} 208 | * and {@code otherBucketIndex}, returning true when successful. Returns false if both buckets are 209 | * full and the insertion failed. 210 | */ 211 | private boolean attemptInsertion(long fingerprint, int bucketIndex, int otherBucketIndex) { 212 | if (!table.isFull(bucketIndex)) { 213 | table.insertWithReplacement(bucketIndex, fingerprint); 214 | return true; 215 | } 216 | if (!table.isFull(otherBucketIndex)) { 217 | table.insertWithReplacement(otherBucketIndex, fingerprint); 218 | return true; 219 | } 220 | return false; 221 | } 222 | 223 | /** 224 | * Randomly traverses the cuckoo graph to find an available bucket for insertion. 225 | * 226 | *

At a high level, this algorithm starts at vertex {@code bucketIndex} and performs a random 227 | * walk of length at most {@link CuckooFilterConfig.Strategy#maxReplacementCount}. If an available 228 | * bucket is found, the algorithm "pushes" all the fingerprints (edges) that are visited (note 229 | * that in the cuckoo graph, the edges are the fingerprints) to their alternate buckets, and make 230 | * room for {@code fingerprint} to be inserted. 231 | * 232 | *

If during the random walk an available bucket is not found, the insertion fails and the 233 | * method returns false. 234 | * 235 | *

Note that it is possible to deterministically find an available bucket by performing breadth 236 | * first search in the cuckoo graph, but this is usually slower and the extra chance of successful 237 | * insertion is negligibly small in practice. 238 | */ 239 | private boolean insertWithRepeatedReplacements(long fingerprint, int bucketIndex) { 240 | List visitedBucketIndices = new ArrayList<>(); 241 | List replacedFingerprints = new ArrayList<>(); 242 | 243 | long currFingerprint = fingerprint; 244 | int currBucketIndex = bucketIndex; 245 | visitedBucketIndices.add(-1); // Just for index alignment purpose. 246 | replacedFingerprints.add(currFingerprint); 247 | for (int i = 0; i < config.strategy().maxReplacementCount(); i++) { 248 | Optional replacedFingerprint = 249 | table.insertWithReplacement(currBucketIndex, currFingerprint); 250 | // Found an available bucket, and the insertion is successful. 251 | if (replacedFingerprint.isEmpty()) { 252 | return true; 253 | } 254 | 255 | visitedBucketIndices.add(currBucketIndex); 256 | replacedFingerprints.add(replacedFingerprint.get()); 257 | 258 | currFingerprint = replacedFingerprint.get(); 259 | currBucketIndex = 260 | config 261 | .strategy() 262 | .computeOtherBucketIndex( 263 | currFingerprint, 264 | currBucketIndex, 265 | config.size().bucketCount(), 266 | config.hashFunction()); 267 | } 268 | 269 | // Failed to find a bucket to insert. Reverse the replacements and declare that the insertion 270 | // failed. 271 | for (int i = visitedBucketIndices.size() - 1; i > 0; i--) { 272 | int previousBucketIndex = visitedBucketIndices.get(i); 273 | table.delete(previousBucketIndex, replacedFingerprints.get(i - 1)); 274 | table.insertWithReplacement(previousBucketIndex, replacedFingerprints.get(i)); 275 | } 276 | return false; 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /setfilters/src/com/google/setfilters/cuckoofilter/CuckooFilterArray.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.base.Preconditions.checkArgument; 18 | 19 | import java.nio.ByteBuffer; 20 | import java.nio.ByteOrder; 21 | 22 | /** 23 | * Static array where each element is an integer of size {@code bitsPerElement} bits. 24 | * 25 | *

Supports up to 64 bits per element. This will be used internally by cuckoo filter. 26 | */ 27 | final class CuckooFilterArray { 28 | private final long length; 29 | private final int bitsPerElement; 30 | private final long[] bitArray; 31 | 32 | /** 33 | * Constructs a new cuckoo filter array with length {@code length}, with each element of length 34 | * {@code bitsPerElement} bits. 35 | * 36 | * @throws IllegalArgumentException if {@code length} <= 0 or {@code bitsPerElement} <= 0 or 37 | * {@code bitsPerElement} > 64. 38 | */ 39 | public CuckooFilterArray(long length, int bitsPerElement) { 40 | checkLengthIsValid(length); 41 | checkBitsPerElementIsValid(bitsPerElement); 42 | 43 | this.length = length; 44 | this.bitsPerElement = bitsPerElement; 45 | long totalBits = length * bitsPerElement; 46 | // ceil(totalBits / 64) number of elements. 47 | long longArrayLength = (totalBits + Long.SIZE - 1) / Long.SIZE; 48 | checkArgument( 49 | longArrayLength < Integer.MAX_VALUE, 50 | "Too large: could not create CuckooFilterArray with length %s and bitsPerElement %s.", 51 | length, 52 | bitsPerElement); 53 | bitArray = new long[(int) longArrayLength]; 54 | } 55 | 56 | /** 57 | * Constructs a cuckoo filter array with length {@code length}, with each element of length {@code 58 | * bitsPerElement}, from {@code byteArray}. 59 | */ 60 | public CuckooFilterArray(long length, int bitsPerElement, byte[] byteArray) { 61 | this(length, bitsPerElement); 62 | ByteBuffer buffer = ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN); 63 | for (int i = 0; i < bitArray.length; i++) { 64 | bitArray[i] = buffer.getLong(); 65 | } 66 | } 67 | 68 | /** Returns the length of the array. */ 69 | public long length() { 70 | return length; 71 | } 72 | 73 | /** Returns the number of bits per element. */ 74 | public int bitsPerElement() { 75 | return bitsPerElement; 76 | } 77 | 78 | /** 79 | * Returns the element at the {@code index}th position as a long. 80 | * 81 | *

The lowest {@code bitsPerElement} bits will correspond to the value of the element. 82 | * 83 | * @throws IllegalArgumentException if {@code index} is out of bounds. 84 | */ 85 | public long getAsLong(long index) { 86 | checkIndexOutOfBounds(index); 87 | long bitStart = index * bitsPerElement; 88 | long bitEnd = bitStart + bitsPerElement; 89 | int arrayIndex1 = (int) (bitStart / Long.SIZE); 90 | int arrayIndex2 = (int) ((bitEnd - 1) / Long.SIZE); 91 | 92 | int a = (int) (bitStart % Long.SIZE); 93 | // The element intersects the two array indices. 94 | if (arrayIndex1 < arrayIndex2) { 95 | int b = a + bitsPerElement - Long.SIZE; 96 | long value1 = bitArray[arrayIndex1] >>> a; 97 | long value2 = bitArray[arrayIndex2] & mask(b); 98 | return (value1 | (value2 << (Long.SIZE - a))); 99 | } 100 | // Element is contained in one array index. 101 | return (bitArray[arrayIndex1] >>> a) & mask(bitsPerElement); 102 | } 103 | 104 | /** 105 | * Sets the element at {@code index}th position as {@code value}, using the lowest {@code 106 | * bitsPerElement} bits as the value of the element. 107 | * 108 | * @throws IllegalArgumentException if {@code index} is out of bounds. 109 | */ 110 | public void set(long index, long value) { 111 | checkIndexOutOfBounds(index); 112 | long bitStart = index * bitsPerElement; 113 | long bitEnd = bitStart + bitsPerElement; 114 | int arrayIndex1 = (int) (bitStart / Long.SIZE); 115 | int arrayIndex2 = (int) ((bitEnd - 1) / Long.SIZE); 116 | 117 | // Use the lowest bitsPerElement bits and clear all other bits. 118 | value &= mask(bitsPerElement); 119 | 120 | int a = (int) (bitStart % Long.SIZE); 121 | // The element intersects the two array indices. 122 | if (arrayIndex1 < arrayIndex2) { 123 | int b = a + bitsPerElement - Long.SIZE; 124 | bitArray[arrayIndex1] &= clearMask(Long.SIZE, a, Long.SIZE); 125 | bitArray[arrayIndex1] |= (value << a); 126 | bitArray[arrayIndex2] &= clearMask(Long.SIZE, 0, b); 127 | bitArray[arrayIndex2] |= (value >>> (Long.SIZE - a)); 128 | } else { 129 | // Element is contained in one array index. 130 | int b = a + bitsPerElement; 131 | bitArray[arrayIndex1] &= clearMask(Long.SIZE, a, b); 132 | bitArray[arrayIndex1] |= (value << a); 133 | } 134 | } 135 | 136 | /** Returns byte array representation of the {@link CuckooFilterArray}. */ 137 | public byte[] toByteArray() { 138 | byte[] byteArray = new byte[bitArray.length * Long.BYTES]; 139 | for (int i = 0; i < bitArray.length; i++) { 140 | long value = bitArray[i]; 141 | for (int j = 0; j < Long.BYTES; j++) { 142 | // Explicit conversion from long to byte will truncate to lowest 8 bits. 143 | byteArray[i * Long.BYTES + j] = (byte) value; 144 | value >>>= Byte.SIZE; 145 | } 146 | } 147 | return byteArray; 148 | } 149 | 150 | // Theoretical max size of a long array is Integer.MAX_VALUE. Assuming each element is 1 bit, 151 | // we can support up to Integer.MAX_VALUE * 64 number of elements. 152 | private void checkLengthIsValid(long length) { 153 | checkArgument( 154 | 0 < length && length < (long) Integer.MAX_VALUE * Long.SIZE, 155 | "length must be in range (0, %s).", 156 | (long) Integer.MAX_VALUE * Long.SIZE); 157 | } 158 | 159 | private void checkBitsPerElementIsValid(int bitsPerElement) { 160 | checkArgument( 161 | 0 < bitsPerElement && bitsPerElement <= 64, "bitsPerElement must be in range [1, 64]."); 162 | } 163 | 164 | private void checkIndexOutOfBounds(long index) { 165 | checkArgument(0 <= index && index < length, "Index is out of bounds: %s.", index); 166 | } 167 | 168 | private static long mask(int length) { 169 | if (length == Long.SIZE) { 170 | // -1 in 2s complement is 0xFFFFFFFFFFFFFFFF. 171 | return -1; 172 | } 173 | return (1L << length) - 1; 174 | } 175 | 176 | // Mask for clearing bits in range [a, b). 177 | private static long clearMask(int length, int a, int b) { 178 | long mask1 = mask(length); 179 | long mask2 = mask(b - a); 180 | return mask1 ^ (mask2 << a); 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /setfilters/src/com/google/setfilters/cuckoofilter/CuckooFilterConfig.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.base.Preconditions.checkArgument; 18 | 19 | import com.google.common.collect.ImmutableMap; 20 | import com.google.common.hash.Funnel; 21 | import com.google.common.hash.HashCode; 22 | import com.google.errorprone.annotations.CanIgnoreReturnValue; 23 | import java.util.Map; 24 | 25 | /** 26 | * Specification for the cuckoo filter. 27 | * 28 | *

This class is immutable. 29 | */ 30 | // TODO: Handle serialization. 31 | public final class CuckooFilterConfig { 32 | private final Size size; 33 | private final HashFunction hashFunction; 34 | private final Strategy strategy; 35 | private final boolean useSpaceOptimization; 36 | 37 | private CuckooFilterConfig( 38 | Size size, HashFunction hashFunction, Strategy strategy, boolean useSpaceOptimization) { 39 | this.size = size; 40 | this.hashFunction = hashFunction; 41 | this.strategy = strategy; 42 | this.useSpaceOptimization = useSpaceOptimization; 43 | } 44 | 45 | public Size size() { 46 | return size; 47 | } 48 | 49 | public HashFunction hashFunction() { 50 | return hashFunction; 51 | } 52 | 53 | public Strategy strategy() { 54 | return strategy; 55 | } 56 | 57 | public boolean useSpaceOptimization() { 58 | return useSpaceOptimization; 59 | } 60 | 61 | public static Builder newBuilder() { 62 | return new Builder(); 63 | } 64 | 65 | /** Builder for the {@link CuckooFilterConfig}. */ 66 | public static class Builder { 67 | private Size size; 68 | private HashFunction hashFunction; 69 | private Strategy strategy; 70 | private boolean useSpaceOptimization; 71 | 72 | private Builder() {} 73 | 74 | @CanIgnoreReturnValue 75 | public Builder setSize(Size size) { 76 | this.size = size; 77 | return this; 78 | } 79 | 80 | @CanIgnoreReturnValue 81 | public Builder setHashFunction(HashFunction hashFunction) { 82 | this.hashFunction = hashFunction; 83 | return this; 84 | } 85 | 86 | @CanIgnoreReturnValue 87 | public Builder setStrategy(Strategy strategy) { 88 | this.strategy = strategy; 89 | return this; 90 | } 91 | 92 | /** 93 | * Whether to use space optimized filter representation (if possible). 94 | * 95 | *

Setting this field to {@code true} does not guarantee the optimization algorithm to always 96 | * apply - it is best effort. 97 | * 98 | *

In general, using this may result in slower filter operations, and incurs an additional 99 | * fixed space overhead. Thus, it is possible for the "optimized" version of the filter to 100 | * actually take more space than the non optimized one. 101 | */ 102 | @CanIgnoreReturnValue 103 | public Builder setUseSpaceOptimization(boolean useSpaceOptimization) { 104 | this.useSpaceOptimization = useSpaceOptimization; 105 | return this; 106 | } 107 | 108 | /** 109 | * Builds {@link CuckooFilterConfig}. 110 | * 111 | * @throws IllegalArgumentException if the required parameters are not set. 112 | */ 113 | public CuckooFilterConfig build() { 114 | checkArgument(size != null, "Size must be set."); 115 | checkArgument(hashFunction != null, "Hash function must be set."); 116 | checkArgument(strategy != null, "Strategy must be set."); 117 | 118 | return new CuckooFilterConfig(size, hashFunction, strategy, useSpaceOptimization); 119 | } 120 | } 121 | 122 | /** 123 | * Specification of the cuckoo filter size. 124 | * 125 | *

A cuckoo filter's size can be defined as a tuple (bucketCount, bucketCapacity, 126 | * fingeprintLength); this means that there are bucketCount number of buckets, where each bucket 127 | * can store up to bucketCapacity fingerprints, and each fingerprint is of length 128 | * fingerprintLength bits. 129 | * 130 | *

All fields are required and must be set explicitly. 131 | * 132 | *

This class is immutable. 133 | */ 134 | public static class Size { 135 | private static final int MAX_BUCKET_CAPACITY = 128; 136 | private static final int MAX_FINGERPRINT_LENGTH = 64; 137 | /** Empirical load by the bucket capacity. */ 138 | private static final ImmutableMap APPROX_LOAD_BY_BUCKET_CAPACITY = 139 | ImmutableMap.builder() 140 | .put(2, 0.85) 141 | .put(3, 0.91) 142 | .put(4, 0.95) 143 | .put(5, 0.96) 144 | .put(6, 0.97) 145 | .put(7, 0.98) 146 | .put(8, 0.98) 147 | .buildOrThrow(); 148 | 149 | private final int bucketCount; 150 | private final int bucketCapacity; 151 | private final int fingerprintLength; 152 | 153 | private Size(int bucketCount, int bucketCapacity, int fingerprintLength) { 154 | this.bucketCount = bucketCount; 155 | this.bucketCapacity = bucketCapacity; 156 | this.fingerprintLength = fingerprintLength; 157 | } 158 | 159 | /** 160 | * Automatically computes a reasonably efficient cuckoo filter {@link Size} that ensures (with 161 | * high probability) storing up to {@code elementsCountUpperBound} elements (with high 162 | * probability) with the given {@code targetFalsePositiveRate}. 163 | * 164 | * @throws IllegalArgumentException if {@code targetFalsePositiveRate} is not in range [0, 1] or 165 | * {@code elementsCountUpperBound} is <= 0, or a suitable cuckoo filter size could not be 166 | * computed based on the given input. 167 | */ 168 | public static Size computeEfficientSize( 169 | double targetFalsePositiveRate, long elementsCountUpperBound) { 170 | checkArgument( 171 | 0 < targetFalsePositiveRate && targetFalsePositiveRate < 1, 172 | "targetFalsePositiveRate must be in range (0, 1): %s given.", 173 | targetFalsePositiveRate); 174 | checkArgument( 175 | elementsCountUpperBound > 0, 176 | "elementsCountUpperBound must be > 0: %s given.", 177 | elementsCountUpperBound); 178 | 179 | long bestCuckooFilterSizeInBits = -1; 180 | int bestBucketCount = 0; 181 | int bestBucketCapacity = 0; 182 | int bestFingerprintLength = 0; 183 | for (Map.Entry entry : APPROX_LOAD_BY_BUCKET_CAPACITY.entrySet()) { 184 | int bucketCapacity = entry.getKey(); 185 | double load = entry.getValue(); 186 | 187 | int fingerprintLength = 188 | (int) Math.ceil(-log2(targetFalsePositiveRate) + log2(bucketCapacity) + 1); 189 | long bucketCount = (long) Math.ceil(elementsCountUpperBound / (bucketCapacity * load)); 190 | 191 | // The computed size is invalid if fingerprint length is larger than max length or the 192 | // bucket count that is larger than max integer. 193 | if (fingerprintLength > MAX_FINGERPRINT_LENGTH || bucketCount >= Integer.MAX_VALUE) { 194 | continue; 195 | } 196 | 197 | long totalBits = bucketCount * bucketCapacity * fingerprintLength; 198 | if (bestCuckooFilterSizeInBits == -1 || bestCuckooFilterSizeInBits > totalBits) { 199 | bestCuckooFilterSizeInBits = totalBits; 200 | bestBucketCount = (int) bucketCount; 201 | bestBucketCapacity = bucketCapacity; 202 | bestFingerprintLength = fingerprintLength; 203 | } 204 | } 205 | 206 | checkArgument( 207 | bestCuckooFilterSizeInBits != -1, 208 | "Could not compute suitable cuckoo filter size based on the given input. Either the" 209 | + " target false positive rate is too low, or the computed size is too big."); 210 | 211 | return Size.newBuilder() 212 | .setBucketCount(bestBucketCount) 213 | .setBucketCapacity(bestBucketCapacity) 214 | .setFingerprintLength(bestFingerprintLength) 215 | .build(); 216 | } 217 | 218 | public static Builder newBuilder() { 219 | return new Builder(); 220 | } 221 | 222 | /** Returns the total number of buckets in the cuckoo filter. */ 223 | public int bucketCount() { 224 | return bucketCount; 225 | } 226 | 227 | /** Returns the maximum number of fingerprints each bucket can hold. */ 228 | public int bucketCapacity() { 229 | return bucketCapacity; 230 | } 231 | 232 | /** Returns the length of the fingerprint in bits. */ 233 | public int fingerprintLength() { 234 | return fingerprintLength; 235 | } 236 | 237 | /** Builder for the {@link Size}. */ 238 | public static class Builder { 239 | private int bucketCount; 240 | private int bucketCapacity; 241 | private int fingerprintLength; 242 | 243 | private Builder() {} 244 | 245 | /** 246 | * Sets the number of buckets in the cuckoo filter. 247 | * 248 | *

{@code bucketCount} must be > 0. 249 | */ 250 | @CanIgnoreReturnValue 251 | public Builder setBucketCount(int bucketCount) { 252 | this.bucketCount = bucketCount; 253 | return this; 254 | } 255 | 256 | /** 257 | * Sets the maximum number of fingerprints each bucket can hold. 258 | * 259 | *

{@code bucketCapacity} must be in range (0, {@value #MAX_BUCKET_CAPACITY}]. 260 | */ 261 | @CanIgnoreReturnValue 262 | public Builder setBucketCapacity(int bucketCapacity) { 263 | this.bucketCapacity = bucketCapacity; 264 | return this; 265 | } 266 | 267 | /** 268 | * Sets the length of each fingerprint in bits. 269 | * 270 | *

{@code fingerprintLength} must be in range (0, {@value #MAX_FINGERPRINT_LENGTH}]. 271 | */ 272 | @CanIgnoreReturnValue 273 | public Builder setFingerprintLength(int fingerprintLength) { 274 | this.fingerprintLength = fingerprintLength; 275 | return this; 276 | } 277 | 278 | /** 279 | * Builds {@link Size}. 280 | * 281 | * @throws IllegalArgumentException if the configured parameters are invalid. 282 | */ 283 | public Size build() { 284 | checkArgument(bucketCount > 0, "bucketCount must be > 0: %s given instead.", bucketCount); 285 | checkArgument( 286 | 0 < bucketCapacity && bucketCapacity <= MAX_BUCKET_CAPACITY, 287 | "bucketCapacity must be in range (0, %s]: %s given instead.", 288 | MAX_BUCKET_CAPACITY, 289 | bucketCapacity); 290 | checkArgument( 291 | 0 < fingerprintLength && fingerprintLength <= MAX_FINGERPRINT_LENGTH, 292 | "fingerprintLength must be in range (0, %s]: %s given instead.", 293 | MAX_FINGERPRINT_LENGTH, 294 | fingerprintLength); 295 | 296 | return new Size(bucketCount, bucketCapacity, fingerprintLength); 297 | } 298 | } 299 | 300 | private static double log2(double x) { 301 | return Math.log(x) / Math.log(2); 302 | } 303 | } 304 | 305 | /** Hash function for transforming an arbitrary type element to a {@link HashCode}. */ 306 | public interface HashFunction { 307 | /** Hashes given {@code element} to a {@link HashCode}, using the given {@code funnel}. */ 308 | HashCode hash(T element, Funnel funnel); 309 | } 310 | 311 | /** 312 | * Strategy for computing fingerprints and where these fingerprints belong in the cuckoo filter 313 | * table. 314 | */ 315 | public interface Strategy { 316 | 317 | /** 318 | * Computes the fingerprint value given the element's {@code hash} output from {@link 319 | * HashFunction}. 320 | * 321 | *

The returned value should be in range (0, 2^{@code fingerprintLength}). Otherwise, the 322 | * behavior of the cuckoo filter is undefined. Note that the interval is an open interval, so 0 323 | * and 2^{@code fingerprintLength} are not included. 324 | */ 325 | long computeFingerprint(HashCode hash, int fingerprintLength); 326 | 327 | /** 328 | * Computes one of the bucket indices given the element's {@code hash} output from {@link 329 | * HashFunction} and {@code bucketCount} of the cuckoo filter. 330 | * 331 | *

The returned value should be in range [0, {@code bucketCount}). Otherwise, the behavior of 332 | * the cuckoo filter is undefined. 333 | */ 334 | int computeBucketIndex(HashCode hash, int bucketCount); 335 | 336 | /** 337 | * Computes the element's other bucket index given the element's {@code fingerprint} value and 338 | * its initial {@code bucketIndex}. 339 | * 340 | *

{@code hashFunction} corresponds to the {@link HashFunction} that was supplied when the 341 | * config was constructed. Depending on the implementation, {@code hashFunction} may or may not 342 | * be used. 343 | * 344 | *

The returned value should be in range [0, {@code bucketCount}), and the method needs to be 345 | * an involution with respect to {@code bucketIndex}. That is, with other parameters fixed, the 346 | * method needs to satisfy bucketIndex = 347 | * computeOtherBucketIndex(computeOtherBucketIndex(bucketIndex)) for all valid 348 | * bucketIndex. Note that other parameters are omitted for brevity. If these properties 349 | * don't hold, the behavior of the cuckoo filter is undefined. 350 | */ 351 | int computeOtherBucketIndex( 352 | long fingerprint, int bucketIndex, int bucketCount, HashFunction hashFunction); 353 | 354 | /** 355 | * Maximum number of replacements to be made during insertion, before declaring that the 356 | * insertion has failed. 357 | * 358 | *

If not overridden, set to 500 as a default. 359 | */ 360 | default int maxReplacementCount() { 361 | return 500; 362 | } 363 | } 364 | } 365 | -------------------------------------------------------------------------------- /setfilters/src/com/google/setfilters/cuckoofilter/CuckooFilterHashFunctions.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import com.google.common.hash.Funnel; 18 | import com.google.common.hash.HashCode; 19 | import com.google.common.hash.Hashing; 20 | 21 | /** A set of predefined {@link CuckooFilterConfig.HashFunction}s. */ 22 | public enum CuckooFilterHashFunctions implements CuckooFilterConfig.HashFunction { 23 | 24 | /** 25 | * MurmurHash3 that yields 128 bit hash value. 26 | * 27 | *

Behavior of MurmurHash3 is fixed and should not change in the future. 28 | */ 29 | MURMUR3_128() { 30 | @Override 31 | public HashCode hash(T element, Funnel funnel) { 32 | return Hashing.murmur3_128().hashObject(element, funnel); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /setfilters/src/com/google/setfilters/cuckoofilter/CuckooFilterStrategies.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import com.google.common.hash.Funnels; 18 | import com.google.common.hash.HashCode; 19 | 20 | /** A set of predefined {@link CuckooFilterConfig.Strategy}s. */ 21 | public enum CuckooFilterStrategies implements CuckooFilterConfig.Strategy { 22 | 23 | /** 24 | * A strategy that uses a mod operator to produce the desired outputs. 25 | * 26 | *

The {@link HashCode} generated with the hash function should be at least 64 bits. This will 27 | * achieve good false positive rate when fingerprintLength <= 32. 28 | */ 29 | SIMPLE_MOD() { 30 | @Override 31 | public long computeFingerprint(HashCode hash, int fingerprintLength) { 32 | // Use the most significant fingerprintLength bits. This is needed to get rid of the 33 | // correlation with the bucket index. 34 | long fingerprint = hash.asLong() >>> (Long.SIZE - fingerprintLength); 35 | // Value 0 is reserved, so instead map to 1. This means that the generated fingerprint value 36 | // is skewed (1 is twice as more likely to be generated than any other value). Note that, we 37 | // could have taken mod (2^fingerprintLength - 1) and added 1, which would produce a more 38 | // uniform distribution. However, for performance reason, we choose to take this approach 39 | // instead. 40 | if (fingerprint == 0) { 41 | return 1L; 42 | } 43 | return fingerprint; 44 | } 45 | 46 | @Override 47 | public int computeBucketIndex(HashCode hash, int bucketCount) { 48 | return Math.floorMod(hash.asLong(), bucketCount); 49 | } 50 | 51 | @Override 52 | public int computeOtherBucketIndex( 53 | long fingerprint, 54 | int bucketIndex, 55 | int bucketCount, 56 | CuckooFilterConfig.HashFunction hashFunction) { 57 | long fingerprintHash = hashFunction.hash(fingerprint, Funnels.longFunnel()).asLong(); 58 | // Use (hash(fingerprint) - bucketIndex) mod bucketCount as the involution. 59 | return Math.floorMod(fingerprintHash - bucketIndex, bucketCount); 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /setfilters/src/com/google/setfilters/cuckoofilter/CuckooFilterTable.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import java.nio.ByteBuffer; 18 | import java.util.Optional; 19 | import java.util.Random; 20 | 21 | /** An array of buckets where each bucket can store a fixed number of fingerprints. */ 22 | interface CuckooFilterTable { 23 | /** Value of the empty "slot", which is reserved as 0. */ 24 | public static long EMPTY_SLOT = 0L; 25 | 26 | /** 27 | * Creates an implementation of an empty cuckoo filter based on whether space optimization should 28 | * be used. 29 | * 30 | *

Space optimization is best effort, and is not guaranteed. 31 | */ 32 | public static CuckooFilterTable create( 33 | CuckooFilterConfig.Size size, boolean useSpaceOptimization, Random random) { 34 | if (useSpaceOptimization && size.bucketCapacity() == 4 && size.fingerprintLength() >= 4) { 35 | return new SemiSortedCuckooFilterTable(size, random); 36 | } 37 | return new UncompressedCuckooFilterTable(size, random); 38 | } 39 | 40 | /** Creates an implementation of the cuckoo filter based on the serialization. */ 41 | public static CuckooFilterTable createFromSerialization( 42 | SerializedCuckooFilterTable serializedTable, Random random) { 43 | ByteBuffer buffer = ByteBuffer.wrap(serializedTable.asByteArray()); 44 | 45 | if (buffer.remaining() <= 16) { 46 | throw new IllegalArgumentException("Unable to parse the SerializedCuckooFilterTable."); 47 | } 48 | 49 | int tableType = buffer.getInt(); 50 | int bucketCount = buffer.getInt(); 51 | int bucketCapacity = buffer.getInt(); 52 | int fingerprintLength = buffer.getInt(); 53 | CuckooFilterConfig.Size size = 54 | CuckooFilterConfig.Size.newBuilder() 55 | .setBucketCount(bucketCount) 56 | .setBucketCapacity(bucketCapacity) 57 | .setFingerprintLength(fingerprintLength) 58 | .build(); 59 | 60 | byte[] bitArray = new byte[buffer.remaining()]; 61 | buffer.get(bitArray); 62 | 63 | if (tableType == UncompressedCuckooFilterTable.TABLE_TYPE) { 64 | return new UncompressedCuckooFilterTable(size, bitArray, random); 65 | } else if (tableType == SemiSortedCuckooFilterTable.TABLE_TYPE) { 66 | return new SemiSortedCuckooFilterTable(size, bitArray, random); 67 | } else { 68 | throw new IllegalArgumentException("Unable to parse the SerializedCuckooFilterTable."); 69 | } 70 | } 71 | 72 | /** 73 | * Inserts given {@code fingerprint} to the {@code bucketIndex}th bucket, replacing an arbitrary 74 | * fingerprint if the bucket is full. 75 | * 76 | *

How this arbitrary fingerprint is chosen depends on the implementation. 77 | * 78 | * @return the value of the replaced fingerprint if the bucket is full, and an empty {@link 79 | * Optional} otherwise. 80 | */ 81 | Optional insertWithReplacement(int bucketIndex, long fingerprint); 82 | 83 | /** Returns whether {@code bucketIndex}th bucket contains {@code fingerprint}. */ 84 | boolean contains(int bucketIndex, long fingerprint); 85 | 86 | /** 87 | * Deletes a {@code fingerprint} from {@code bucketIndex}th bucket. 88 | * 89 | *

If a bucket contains multiple {@code fingerprint} values, this method only deletes one. 90 | * 91 | * @return {@code true} if {@code fingerprint} is in {@code bucketIndex}th bucket and is deleted, 92 | * and {@code false} otherwise. 93 | */ 94 | boolean delete(int bucketIndex, long fingerprint); 95 | 96 | /** Returns whether {@code bucketIndex}th bucket is full. */ 97 | boolean isFull(int bucketIndex); 98 | 99 | /** Returns the size of {@link CuckooFilterTable}. */ 100 | CuckooFilterConfig.Size size(); 101 | 102 | /** Returns serialization of {@link CuckooFilterTable}. */ 103 | SerializedCuckooFilterTable serialize(); 104 | 105 | // TODO: Add more methods as needed. 106 | } 107 | -------------------------------------------------------------------------------- /setfilters/src/com/google/setfilters/cuckoofilter/SemiSortedCuckooFilterTable.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import static com.google.common.base.Preconditions.checkArgument; 18 | import static java.util.Comparator.comparingInt; 19 | 20 | import com.google.common.collect.ImmutableMap; 21 | import java.nio.ByteBuffer; 22 | import java.util.Arrays; 23 | import java.util.Optional; 24 | import java.util.Random; 25 | 26 | /** 27 | * Implementation of the {@link CuckooFilterTable} using the semi-sorting bucket compression scheme 28 | * in the original paper by Fan et al (https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) - 29 | * see section 5.2. 30 | * 31 | *

The main idea behind the compression algorithm is that the order of the fingerprints in each 32 | * bucket is irrelevant - that is, the fingerprints in each bucket forms a multiset. For fingerprint 33 | * length f and bucket capacity b, the possible number of multisets of b fingerprints of f bits each 34 | * is given by C(2^f + b - 1, b), where C denotes binomial coefficient. In particular, we can encode 35 | * each bucket with ceil(log2(C(2^f + b - 1, b))) bits. On the other hand, naively encoding the 36 | * fingerprints will take b * f bits. Thus, it is theoretically possible to save b * f - 37 | * ceil(log2(C(2^f + b - 1, b))) bits per bucket (note that this is not information theoretically 38 | * tight because the distribution of the multisets is not uniform). 39 | * 40 | *

For performance reason, this only supports a table with bucket capacity of size 4 and 41 | * fingerprint length >= 4 - in many cases this is not a limitation because, for many practical 42 | * applications, bucket capacity of size 4 yields the optimal cuckoo filter size and fingerprint 43 | * length < 4 will never achieve good enough false positive rate. 44 | * 45 | *

Compared to the {@link UncompressedCuckooFilterTable}, this implementation can save 1 bit per 46 | * element, at the cost of slower filter operations by a constant factor (asymptotically, it is the 47 | * same as the uncompressed one). Note that for bucket capacity of size 4, saving 1 bit per element 48 | * is "optimal" up to rounding down, as the function 4 * f - ceil(log2(C(2^f + 3, 4))) < 5 for 49 | * reasonable values of f. However, this also incurs an additional fixed space overhead, so for 50 | * smaller filter the extra saving of 1 bit per element may not be worth it. 51 | */ 52 | final class SemiSortedCuckooFilterTable implements CuckooFilterTable { 53 | // Implementation type of the table, to be encoded in the serialization. 54 | public static final int TABLE_TYPE = 1; 55 | 56 | // Table containing all sorted 4 bit partial fingerprints of length 4 (16 bits) by its index. 57 | private static final short[] SORTED_PARTIAL_FINGERPRINTS = computeSortedPartialFingerprints(); 58 | // Inverse map of SORTED_PARTIAL_FINGERPRINTS. 59 | private static final ImmutableMap SORTED_PARTIAL_FINGERPRINTS_INDEX = 60 | computeSortedPartialFingerprintsIndex(SORTED_PARTIAL_FINGERPRINTS); 61 | 62 | private final CuckooFilterConfig.Size size; 63 | private final Random random; 64 | private final CuckooFilterArray cuckooFilterArray; 65 | 66 | /** 67 | * Creates a new uncompressed cuckoo filter table of the given size. 68 | * 69 | *

Uses the given source of {@code random} to choose the replaced fingerprint in {@code 70 | * insertWithReplacement} method. 71 | */ 72 | public SemiSortedCuckooFilterTable(CuckooFilterConfig.Size size, Random random) { 73 | this.size = size; 74 | checkArgument( 75 | size.bucketCapacity() == 4, 76 | "SemiSortedCuckooFilterTable only supports bucket capacity of 4."); 77 | checkArgument( 78 | size.fingerprintLength() >= 4, 79 | "SemiSortedCuckooFilterTable only supports fingerprint length >= 4."); 80 | this.random = random; 81 | // bucketCapacity == 4 and fingerprintLength <= 64, so we can assume that it will always fit 82 | // into a long. 83 | cuckooFilterArray = 84 | new CuckooFilterArray( 85 | (long) size.bucketCount() * size.bucketCapacity(), size.fingerprintLength() - 1); 86 | } 87 | 88 | /** Creates {@link SemiSortedCuckooFilterTable} from {@link SerializedCuckooFilterTable}. */ 89 | public SemiSortedCuckooFilterTable(CuckooFilterConfig.Size size, byte[] bitArray, Random random) { 90 | this.size = size; 91 | this.random = random; 92 | cuckooFilterArray = 93 | new CuckooFilterArray( 94 | (long) size.bucketCount() * size.bucketCapacity(), 95 | size.fingerprintLength() - 1, 96 | bitArray); 97 | } 98 | 99 | @Override 100 | public Optional insertWithReplacement(int bucketIndex, long fingerprint) { 101 | long[] fingerprints = decodeBucket(bucketIndex); 102 | for (int i = 0; i < size.bucketCapacity(); i++) { 103 | if (fingerprints[i] == EMPTY_SLOT) { 104 | fingerprints[i] = fingerprint; 105 | encodeAndPut(bucketIndex, fingerprints); 106 | return Optional.empty(); 107 | } 108 | } 109 | 110 | int replacedSlotIndex = random.nextInt(size.bucketCapacity()); 111 | long replacedFingerprint = fingerprints[replacedSlotIndex]; 112 | fingerprints[replacedSlotIndex] = fingerprint; 113 | encodeAndPut(bucketIndex, fingerprints); 114 | return Optional.of(replacedFingerprint); 115 | } 116 | 117 | @Override 118 | public boolean contains(int bucketIndex, long fingerprint) { 119 | long[] fingerprints = decodeBucket(bucketIndex); 120 | for (long fingerprintInBucket : fingerprints) { 121 | if (fingerprintInBucket == fingerprint) { 122 | return true; 123 | } 124 | } 125 | return false; 126 | } 127 | 128 | @Override 129 | public boolean delete(int bucketIndex, long fingerprint) { 130 | long[] fingerprints = decodeBucket(bucketIndex); 131 | for (int i = 0; i < fingerprints.length; i++) { 132 | if (fingerprints[i] == fingerprint) { 133 | fingerprints[i] = EMPTY_SLOT; 134 | encodeAndPut(bucketIndex, fingerprints); 135 | return true; 136 | } 137 | } 138 | return false; 139 | } 140 | 141 | @Override 142 | public boolean isFull(int bucketIndex) { 143 | return !contains(bucketIndex, CuckooFilterTable.EMPTY_SLOT); 144 | } 145 | 146 | @Override 147 | public CuckooFilterConfig.Size size() { 148 | return size; 149 | } 150 | 151 | @Override 152 | public SerializedCuckooFilterTable serialize() { 153 | byte[] serializedArray = cuckooFilterArray.toByteArray(); 154 | 155 | // The first 16 bytes specifies the implementation type and the size of the table (defined by 156 | // tuple (type, bucketCount, 157 | // bucketCapacity, fingerprintLength)). 158 | // Rest is the bit array. 159 | ByteBuffer encoded = ByteBuffer.allocate(16 + serializedArray.length); 160 | return SerializedCuckooFilterTable.createFromByteArray( 161 | encoded 162 | .putInt(TABLE_TYPE) 163 | .putInt(size.bucketCount()) 164 | .putInt(size.bucketCapacity()) 165 | .putInt(size.fingerprintLength()) 166 | .put(serializedArray) 167 | .array()); 168 | } 169 | 170 | private long toArrayIndex(int bucketIndex, int slotIndex) { 171 | return (long) bucketIndex * size.bucketCapacity() + slotIndex; 172 | } 173 | 174 | // TODO: Check if encoding/decoding needs to be optimized. 175 | 176 | // Decodes fingerprints at bucketIndex. 177 | private long[] decodeBucket(int bucketIndex) { 178 | int encodedSortedPartialFingerintsIndex = 0; 179 | long[] fingerprintPrefixes = new long[size.bucketCapacity()]; 180 | for (int i = 0; i < size.bucketCapacity(); i++) { 181 | long arrayIndex = toArrayIndex(bucketIndex, i); 182 | long n = cuckooFilterArray.getAsLong(arrayIndex); 183 | encodedSortedPartialFingerintsIndex <<= 3; 184 | encodedSortedPartialFingerintsIndex |= (int) (n & 0x7); 185 | fingerprintPrefixes[i] = n >>> 3; 186 | } 187 | 188 | int encodedSortedPartialFingerprints = 189 | SORTED_PARTIAL_FINGERPRINTS[encodedSortedPartialFingerintsIndex]; 190 | long[] fingerprints = new long[size.bucketCapacity()]; 191 | for (int i = size.bucketCapacity() - 1; i >= 0; i--) { 192 | fingerprints[i] = (fingerprintPrefixes[i] << 4) | (encodedSortedPartialFingerprints & 0xF); 193 | encodedSortedPartialFingerprints >>>= 4; 194 | } 195 | return fingerprints; 196 | } 197 | 198 | /** 199 | * Encode fingerprints and put them to bucketIndex. 200 | * 201 | *

Encoding works as follows. 202 | * 203 | *

Suppose each fingerprint is logically f bits. First, sort the fingerprints by the least 204 | * significant 4 bits. Let's call the most significant f - 4 bits of the fingerprints as the 205 | * fingerprint prefixes. The least significant 4 bits of the fingerprints will be the partial 206 | * fingerprints, which will be encoded according to the SORTED_PARTIAL_FINGEPRRINTS_INDEX map as a 207 | * 12 bit value. Partition the encoded 12 bit value into four 3 bit chunks. Group each of the f - 208 | * 4 bit prefixes with each 3 bit chunk (f - 1 bits total) and insert it as a cuckoo filter array 209 | * element. 210 | */ 211 | private void encodeAndPut(int bucketIndex, long[] fingerprints) { 212 | long[] fingerprintPrefixes = new long[size.bucketCapacity()]; 213 | int[] partialFingerprints = new int[size.bucketCapacity()]; 214 | for (int i = 0; i < size.bucketCapacity(); i++) { 215 | fingerprintPrefixes[i] = fingerprints[i] >>> 4; 216 | partialFingerprints[i] = (int) (fingerprints[i] & 0xF); 217 | } 218 | Integer[] indices = {0, 1, 2, 3}; 219 | Arrays.sort(indices, comparingInt((Integer i) -> partialFingerprints[i])); 220 | short encodedSortedPartialFingerprints = 221 | (short) 222 | ((partialFingerprints[indices[0]] << 12) 223 | | (partialFingerprints[indices[1]] << 8) 224 | | (partialFingerprints[indices[2]] << 4) 225 | | partialFingerprints[indices[3]]); 226 | int encodedSortedPartialFingerprintsIndex = 227 | SORTED_PARTIAL_FINGERPRINTS_INDEX.get(encodedSortedPartialFingerprints); 228 | for (int i = size.bucketCapacity() - 1; i >= 0; i--) { 229 | long arrayIndex = toArrayIndex(bucketIndex, i); 230 | cuckooFilterArray.set( 231 | arrayIndex, 232 | (fingerprintPrefixes[indices[i]] << 3) | (encodedSortedPartialFingerprintsIndex & 0x7)); 233 | encodedSortedPartialFingerprintsIndex >>>= 3; 234 | } 235 | } 236 | 237 | private static short[] computeSortedPartialFingerprints() { 238 | // (2^4 + 3 choose 4) = 3876 counts the number of multisets of size 4, with each element in 239 | // [0, 16). 240 | short[] sortedPartialFingerprints = new short[3876]; 241 | 242 | final short fingerprintUpperBound = 16; 243 | 244 | int i = 0; 245 | for (short a = 0; a < fingerprintUpperBound; a++) { 246 | for (short b = a; b < fingerprintUpperBound; b++) { 247 | for (short c = b; c < fingerprintUpperBound; c++) { 248 | for (short d = c; d < fingerprintUpperBound; d++) { 249 | sortedPartialFingerprints[i] = (short) ((a << 12) | (b << 8) | (c << 4) | d); 250 | i++; 251 | } 252 | } 253 | } 254 | } 255 | return sortedPartialFingerprints; 256 | } 257 | 258 | private static ImmutableMap computeSortedPartialFingerprintsIndex( 259 | short[] sortedPartialFingerprints) { 260 | ImmutableMap.Builder map = ImmutableMap.builder(); 261 | for (short i = 0; i < sortedPartialFingerprints.length; i++) { 262 | map.put(sortedPartialFingerprints[i], i); 263 | } 264 | return map.buildOrThrow(); 265 | } 266 | } 267 | -------------------------------------------------------------------------------- /setfilters/src/com/google/setfilters/cuckoofilter/SerializedCuckooFilterTable.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import java.util.Arrays; 18 | 19 | /** Serialization of {@link CuckooFilterTable}. */ 20 | public final class SerializedCuckooFilterTable { 21 | private final byte[] rawSerialization; 22 | 23 | /** Creates serialization from raw byte array. */ 24 | public static SerializedCuckooFilterTable createFromByteArray(byte[] byteArray) { 25 | return new SerializedCuckooFilterTable(Arrays.copyOf(byteArray, byteArray.length)); 26 | } 27 | 28 | private SerializedCuckooFilterTable(byte[] rawSerialization) { 29 | this.rawSerialization = rawSerialization; 30 | } 31 | 32 | /** Returns the serialization as a byte array. */ 33 | public byte[] asByteArray() { 34 | return Arrays.copyOf(rawSerialization, rawSerialization.length); 35 | } 36 | 37 | // TODO: Add other methods like asJSON(); 38 | } 39 | -------------------------------------------------------------------------------- /setfilters/src/com/google/setfilters/cuckoofilter/UncompressedCuckooFilterTable.java: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // https://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package com.google.setfilters.cuckoofilter; 16 | 17 | import java.nio.ByteBuffer; 18 | import java.util.Optional; 19 | import java.util.Random; 20 | 21 | /** 22 | * Implementation of the {@link CuckooFilterTable} that doesn't use the semi-sorting bucket 23 | * compression scheme in the original paper by Fan et al 24 | * (https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) - see section 5.2 for what 25 | * semi-sorting bucket compression scheme is. 26 | * 27 | *

Thus, if a bucket can hold up to bucketCapacity number of fingerprints and each fingerprint is 28 | * of length fingerprintLength bits, it takes bucketCapacity * fingerprintLength bits to represent 29 | * each bucket. 30 | */ 31 | final class UncompressedCuckooFilterTable implements CuckooFilterTable { 32 | // Implementation type of the table, to be encoded in the serialization. 33 | public static final int TABLE_TYPE = 0; 34 | 35 | private final CuckooFilterConfig.Size size; 36 | private final Random random; 37 | private final CuckooFilterArray cuckooFilterArray; 38 | 39 | /** 40 | * Creates a new uncompressed cuckoo filter table of the given size. 41 | * 42 | *

Uses the given source of {@code random} to choose the replaced fingerprint in {@code 43 | * insertWithReplacement} method. 44 | */ 45 | public UncompressedCuckooFilterTable(CuckooFilterConfig.Size size, Random random) { 46 | this.size = size; 47 | this.random = random; 48 | // bucketCapacity <= 128 and fingerprintLength <= 64, so we can assume that it will always fit 49 | // into a long. 50 | cuckooFilterArray = 51 | new CuckooFilterArray( 52 | (long) size.bucketCount() * size.bucketCapacity(), size.fingerprintLength()); 53 | } 54 | 55 | /** Creates {@link UncompressedCuckooFilterTable} from {@link SerializedCuckooFilterTable}. */ 56 | public UncompressedCuckooFilterTable( 57 | CuckooFilterConfig.Size size, byte[] bitArray, Random random) { 58 | this.size = size; 59 | this.random = random; 60 | cuckooFilterArray = 61 | new CuckooFilterArray( 62 | (long) size.bucketCount() * size.bucketCapacity(), size.fingerprintLength(), bitArray); 63 | } 64 | 65 | @Override 66 | public Optional insertWithReplacement(int bucketIndex, long fingerprint) { 67 | for (int slotIndex = 0; slotIndex < size.bucketCapacity(); slotIndex++) { 68 | long arrayIndex = toArrayIndex(bucketIndex, slotIndex); 69 | if (cuckooFilterArray.getAsLong(arrayIndex) == CuckooFilterTable.EMPTY_SLOT) { 70 | cuckooFilterArray.set(arrayIndex, fingerprint); 71 | return Optional.empty(); 72 | } 73 | } 74 | int replacedSlotIndex = random.nextInt(size.bucketCapacity()); 75 | long replacedArrayIndex = toArrayIndex(bucketIndex, replacedSlotIndex); 76 | long replacedFingerprint = cuckooFilterArray.getAsLong(replacedArrayIndex); 77 | cuckooFilterArray.set(replacedArrayIndex, fingerprint); 78 | return Optional.of(replacedFingerprint); 79 | } 80 | 81 | @Override 82 | public boolean contains(int bucketIndex, long fingerprint) { 83 | for (int slotIndex = 0; slotIndex < size.bucketCapacity(); slotIndex++) { 84 | long arrayIndex = toArrayIndex(bucketIndex, slotIndex); 85 | if (cuckooFilterArray.getAsLong(arrayIndex) == fingerprint) { 86 | return true; 87 | } 88 | } 89 | return false; 90 | } 91 | 92 | @Override 93 | public boolean delete(int bucketIndex, long fingerprint) { 94 | for (int slotIndex = 0; slotIndex < size.bucketCapacity(); slotIndex++) { 95 | long arrayIndex = toArrayIndex(bucketIndex, slotIndex); 96 | if (cuckooFilterArray.getAsLong(arrayIndex) == fingerprint) { 97 | cuckooFilterArray.set(arrayIndex, CuckooFilterTable.EMPTY_SLOT); 98 | return true; 99 | } 100 | } 101 | return false; 102 | } 103 | 104 | @Override 105 | public boolean isFull(int bucketIndex) { 106 | return !contains(bucketIndex, CuckooFilterTable.EMPTY_SLOT); 107 | } 108 | 109 | @Override 110 | public CuckooFilterConfig.Size size() { 111 | return size; 112 | } 113 | 114 | @Override 115 | public SerializedCuckooFilterTable serialize() { 116 | byte[] serializedArray = cuckooFilterArray.toByteArray(); 117 | 118 | // The first 16 bytes specifies the implementation type and the size of the table (defined by 119 | // tuple (type, bucketCount, 120 | // bucketCapacity, fingerprintLength)). 121 | // Rest is the bit array. 122 | ByteBuffer encoded = ByteBuffer.allocate(16 + serializedArray.length); 123 | return SerializedCuckooFilterTable.createFromByteArray( 124 | encoded 125 | .putInt(TABLE_TYPE) 126 | .putInt(size.bucketCount()) 127 | .putInt(size.bucketCapacity()) 128 | .putInt(size.fingerprintLength()) 129 | .put(serializedArray) 130 | .array()); 131 | } 132 | 133 | private long toArrayIndex(int bucketIndex, int slotIndex) { 134 | return (long) bucketIndex * size.bucketCapacity() + slotIndex; 135 | } 136 | } 137 | --------------------------------------------------------------------------------