├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── TODO.md
├── _config.yml
├── pom.xml
├── src
├── main
│ └── java
│ │ └── com
│ │ └── duprasville
│ │ └── guava
│ │ └── probably
│ │ ├── AbstractCuckooStrategy.java
│ │ ├── BloomFilter.java
│ │ ├── CuckooFilter.java
│ │ ├── CuckooStrategies.java
│ │ ├── CuckooStrategy.java
│ │ ├── CuckooStrategyMurmurBealDupras32.java
│ │ ├── CuckooTable.java
│ │ ├── ProbabilisticFilter.java
│ │ └── package-info.java
└── test
│ └── java
│ └── com
│ └── duprasville
│ └── guava
│ └── probably
│ ├── AbstractProbabilisticFilterTest.java
│ ├── BloomProbabilisticFilterTest.java
│ ├── CuckooFilterProbabilisticFilterTest.java
│ ├── CuckooFilterTest.java
│ └── CuckooStrategiesTest.java
├── updaterelease.sh
└── util
├── deploy_snapshot.sh
├── settings.xml
├── update_snapshot_docs.sh
└── util.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | # Maven
2 | target/
3 | *.ser
4 | *.ec
5 |
6 | # IntelliJ Idea
7 | .idea/
8 | out/
9 | *.ipr
10 | *.iws
11 | *.iml
12 |
13 | # Eclipse
14 | .classpath
15 | .project
16 | .settings/
17 | .metadata/
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | jdk:
3 | - oraclejdk8
4 | - openjdk8
5 | install: mvn install -U -DskipTests=true
6 | script:
7 | - mvn verify -U -Dmaven.javadoc.skip=true
8 | # avoid unnecessary cache updates
9 | - rm -rf $HOME/.m2/repository/com/duprasville/guava/guava-probably
10 | after_success:
11 | - util/update_snapshot_docs.sh
12 | - util/deploy_snapshot.sh
13 | cache:
14 | directories:
15 | - $HOME/.m2
16 | env:
17 | global:
18 | - secure: MJY016432HZC8lTgwtyWLzmjaKZNVxHWRZ1M3en4X4u0qsYd9mwpZKjbBry5M2GY1laeMwa2XX/hAicxaSqkijul6NGTkV1tSmAkRHccqyu80+5uQuK0SYRMzXPxZdHGwwYG1X9Myene7HDs2FyGg8UezQphfFQFk0y+0EFuFR4C2hrGUJl8OBjzx/sF9/MhyyUfdZPDHVxpZ/cmw+TspmBZ08IVjl5HJcYQGki/Y2VZ4d7rKfI24EeL5mjivFzWR3j6hWLKpRqbnCtMzcXDs/z8qBzWPIu8D+PZQpZwN6OQM/JDYpDD4Vn/9+FGStbjCGOtSTigvA7639NY7nKiGEWBJTEhREP+7YwovYcj9oPXodTYpNg/Ai1tgCHw2zGBiD2eandCCYhyN2gHcfdq2lJ1PA4nUPNdj1zsTW7IUNbAmJ+gVd4uDvgIML7NPqbgl9hUaEx/DPzVc272sOYJOdtRgcg5sOIA7Ehnky6CWnpKCggv2Lww3dO5bSeMccAoB96RQKNFqXTAynGoBaLZnVZzgCobZ66YAYWSosM0oBzWVwDEkw32MiRoizplF3NfnSARaGzENyCV4Gay611dbXYF9HVrxp3oYFnptEbvvOKmUGgp0HpQiBemzJ0tuabs4Zu47MkCnalrU9K8BkoreaVJk8WP0/KPj53t/fWVUT4=
19 | - secure: uLNJ12pDfeggkdcvV1G2KVgh04H7SkxRBPBsgmgMknu/6MnXZSlJS8uiT7bSId0E3F/ERXCjB/z00vI3+kYpvAYdvsNqGONx6rMJvFs+vn3BtG9q5VSyhULKrnZnkWthRYKQv5EZdQT8WqKVO77m266sX1eBGUtazazqwm8kEZ7yjagfiBNcfoUIdVEP1jCxd/0+Vdq/KRgtejunXmm6rcE9ppMh+j5KG6jy5FxwlfQzDXys+E+NvIsgFmMy7ZQKnivZuEhsQmng4nB8AaZpraOOrooNBJfVdn+VGiL8Pnw1INgOqv89LN5u6Hcc5ztL5Sf2rooSP21KQCLC1ZbtmqjXxng06AA4PIuTKdv75G/huK8M7q+TCWSpe/4aIjnRxOfqetQ3jntj6e1joolHVUWj/9cSOUwGIJg748SA6atN5iDi0GGfORutblD28/2BZzMOEMRKfszT0JGxQ5bsjbDzIbyvT+XrSpZ+exx/DAtMUzvn9OE+xDQW046pFy87UHPeW985vM9M1MVh1frT8RPq8pCIKVp+bLk8gJTvV+etfVkD6yT2YYwxtp/02s3G3IrQVF4WtuwXv0B0HUF94xwzVm+JtHEQ+v2Yq68z7IjApBlabocrUcWMNH1EiDgcscwQJ2xgdNEPyef3ruZYQ2TJaWeo/gyt0CmgfGyDAzc=
20 | - secure: Gi1cBOP9PiFiKDHHnId8hXGHjo4heNLN9XSsvKzSFG0fJzn889XWl3b6W4rGT4uqV3TvETe51rI/RdmD2Ru2W4oyoAk5ReE7rIyUFXmEHa1L/rfyNF8L0eAcKHS3ZpGIMk+k15kGJUxRKyloSTiqrXvIV96c7r9tfmZet5482uT+FiMNblrVwlWcguYzpUOpU2nHj57heueCIAWBVjS3/Fk1M8HwLFm07a7N8v9bvqN8Opc1a36FtIKxuHfCBWA0R+VwJLYEeW0RAUPnFQtVnq3ZKxOYqAiH+jdg4gDIX6i12/1VETQTjTkQt9uFVOtIooxWVMHt7Glho3/kBopUlnOOOsfmj40X/YP4u6tafcai0imXNtEVnjhUBX4f6+62QSl8GoJRvYwwKaJ1XL2Bz8W8kFvBj+rPRMF6495J4aAvn47ykxBM/gQvrMusjKyvku34jIpCeZ/XKUpUHQvErTQlp7dE1F94GaW4DDSBAFGBVrHPe0hGfRwRVglRZMmpDYj6yTl9xcsn+8iR2A8cN9JVumye10rKcAYIpy768uWarT4ZXv2eO1otxm2SUPlb6+ryxXKCyr0d6JfvG/o0Yq+Yg3mVBq4jgAvx4MzgP67tOKufBXcQP6+HKtNZrcHD0pWPassGb23io3/On68YObJpddgRBYes+lFLHOpzOmA=
21 | branches:
22 | except:
23 | - gh-pages
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Guava-Probably: Probabilistic Filters
2 | =====================================
3 | The Guava-Probably project provides two probabilistic filters for Guava.
4 |
5 | [](https://travis-ci.org/bdupras/guava-probably)
6 | [](./LICENSE)
7 | [](https://maven-badges.herokuapp.com/maven-central/com.duprasville.guava/guava-probably)
8 |
9 | # What is it?
10 | A probabilistic filter is a space-efficient data structure for representing a set in order to support membership queries. [ref][BroderMitzenmacher]
11 |
12 | # How does it work?
13 | Check out this sweet, interactive demo: [Probabilistic Filters By Example](https://bdupras.github.io/filter-tutorial/)
14 |
15 | # What's it good for?
16 | Probabilistic filters are great for reducing unnecessary disk, database or network queries. Applications where the universe of possible members in a set is much larger than actual members may benefit from probabilistic filters, especially when most membership queries are expected to return false.
17 |
18 | # No really, what's it good for?
19 | - Google Chrome uses p-filters to make a preliminary decision whether a particular web site is malicious or safe. [ref][Yakunin]
20 | - Exim mail transfer agent uses p-filters in its rate-limiting logic. [ref][Finch]
21 | - Use a p-filter to reject malicious authentication attempts, protecting your cache and database from botnet queries.
22 |
23 | # Cool, how do I get it?
24 | Requires JDK 8 or higher and Google Guava 19.0 or higher (as of 1.0).
25 | - `1.0`: [API Docs][guava-probably-snapshot-api-docs], 05 July 2019.
26 |
27 | To add a dependency on Guava-Probably using Maven, use the following:
28 | ```xml
29 |
30 | com.duprasville.guava
31 | guava-probably
32 | 1.0
33 |
34 | ```
35 |
36 | To add a dependency using Gradle:
37 | ```
38 | dependencies {
39 | compile 'com.duprasville.guava:guava-probably:1.0'
40 | }
41 | ```
42 |
43 | # How do I learn more?
44 | - [Probabilistic Filters By Example](https://bdupras.github.io/filter-tutorial/)
45 | - [Cuckoo Filter: Practically Better Than Bloom](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf)
46 | - [Bloom Filters by Example](http://billmill.org/bloomfilter-tutorial/)
47 | - [Google Guava BloomFilter](https://github.com/google/guava/wiki/HashingExplained#bloomfilter)
48 | - [Network Applications of Bloom Filters: A Survey](http://projecteuclid.org/DPubS?service=UI&version=1.0&verb=Display&handle=euclid.im/1109191032)
49 | - [Nice Bloom filter application](http://blog.alexyakunin.com/2010/03/nice-bloom-filter-application.html)
50 | - [What use are Bloom filters, anyway?](http://fanf.livejournal.com/82764.html)
51 |
52 | # Links
53 | - [GitHub project](https://github.com/bdupras/guava-probably)
54 | - [Issue tracker: report a defect or feature request](https://github.com/bdupras/guava-probably/issues/new)
55 |
56 | [BroderMitzenmacher]: http://projecteuclid.org/DPubS?service=UI&version=1.0&verb=Display&handle=euclid.im/1109191032 "Network Applications of Bloom Filters: A Survey; Andrei Broder and Michael Mitzenmacher"
57 | [Yakunin]: http://blog.alexyakunin.com/2010/03/nice-bloom-filter-application.html "Nice Bloom filter application"
58 | [Finch]: http://fanf.livejournal.com/82764.html "What use are Bloom filters, anyway?"
59 | [guava-probably-release-api-docs]: http://bdupras.github.io/guava-probably/releases/1.0/api/docs/
60 | [guava-probably-snapshot-api-docs]: http://bdupras.github.io/guava-probably/releases/snapshot/api/docs/
61 |
--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | # Guava-Probably: TODO List
2 |
3 | ## CI
4 | * commit/push to release SNAPSHOT, major, minor, patch :: maven central && javadocs
5 | * simplify travis scripts
6 |
7 | ## Features
8 | * MultiSet interface operations (count, set counts)
9 | * CuckooFilter impl increase max capacity (separate even/odd tables? array of tables?)
10 | * Primitive interface API (to avoid object alloc)
11 | * Direct hash fn invocation (to avoid object alloc)
12 | * extract filter dimensions calculation
13 | * NOTE: knowing if an insertion modified a bloom filter is useful
14 | ** e.g. loop detection in routing algos
15 | ** question: what should the semantic be for returning inserted/not-inserted vs changed/not-changed?
16 | * make deletability optional? when off, colliding insertions do not mutate the filter
17 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | # Main site settings
2 | title: Guava-Probably
3 | subtitle: Probabilistic data structures for Guava
4 | description: Probabilistic data structures for Guava
5 | baseurl: /guava-probably
6 | url: http://bdupras.github.io
7 | permalink: /news/:year/:month/:day/:title/
8 |
9 | exclude:
10 | - "Gemfile*"
11 | - "README.md"
12 | - "*.sh"
13 |
14 | # GitHub-flavored Markdown support
15 | markdown: kramdown
16 | kramdown:
17 | input: GFM
18 |
19 | # Set by default by GitHub pages (can't be changed)
20 | safe: true
21 | lsi: false
22 | # source:
23 |
24 | # Set by default by GitHub pages (can be changed)
25 | # highlighter: pygments
26 | # github: (https://help.github.com/articles/repository-metadata-on-github-pages/)
27 |
28 | # Collections
29 | collections:
30 | releases:
31 | output: true
32 | permalink: /:collection/:path/
33 |
34 | # Release data
35 | # Do not change! updaterelease.sh automatically updates these fields
36 | latest_release: 1.0
37 | latest_snapshot: 1.0-SNAPSHOT
38 |
39 | # Miscellaneous data
40 | email: brian@duprasville.com
41 | twitter_username: briandupras
42 | github_username: bdupras/guava-probably
43 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 | com.duprasville.guava
4 | guava-probably
5 | jar
6 | 1.1-SNAPSHOT
7 | Guava-Probably: Probabilistic Data Structures extension for Guava
8 |
9 | Guava-Probably is an extension library to Google Guava that adds probabilistic data structures
10 | and related interfaces.
11 |
12 | https://github.com/bdupras/guava-probably
13 |
14 |
15 | The Apache License, Version 2.0
16 | http://www.apache.org/licenses/LICENSE-2.0.txt
17 |
18 |
19 |
20 |
21 | Brian Dupras
22 | brian@duprasville.com
23 | bdupras
24 | https://github.com/bdupras
25 |
26 |
27 |
28 | scm:git:git@github.com:bdupras/guava-probably.git
29 | scm:git:git@github.com:bdupras/guava-probably.git
30 | git@github.com:bdupras/guava-probably.git
31 | HEAD
32 |
33 |
34 | UTF-8
35 |
36 |
37 |
38 |
39 | ossrh
40 | https://oss.sonatype.org/service/local/staging/deploy/maven2/
41 |
42 |
43 | ossrh
44 | https://oss.sonatype.org/content/repositories/snapshots
45 |
46 |
47 |
48 |
49 |
50 | org.apache.maven.plugins
51 | maven-compiler-plugin
52 | 3.8.1
53 |
54 | 1.8
55 | 1.8
56 |
57 |
58 |
59 | org.sonatype.plugins
60 | nexus-staging-maven-plugin
61 | 1.6.3
62 | true
63 |
64 | ossrh
65 | https://oss.sonatype.org/
66 | true
67 |
68 |
69 |
70 | maven-release-plugin
71 | 2.5.3
72 |
73 | v@{project.version}
74 | true
75 | release
76 |
77 |
78 |
79 | org.apache.maven.scm
80 | maven-scm-provider-gitexe
81 | 1.9.5
82 |
83 |
84 |
85 |
86 | org.apache.maven.plugins
87 | maven-source-plugin
88 | 2.2.1
89 |
90 |
91 | attach-sources
92 |
93 | jar-no-fork
94 |
95 |
96 |
97 |
98 |
99 | org.apache.maven.plugins
100 | maven-javadoc-plugin
101 | 2.9.1
102 |
103 |
104 | attach-javadocs
105 |
106 | jar
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 | com.google.guava
117 | guava
118 | 19.0
119 |
120 |
121 | com.google.guava
122 | guava-testlib
123 | 19.0
124 |
125 |
126 | com.google.guava
127 | guava-tests
128 | 19.0
129 |
130 |
131 | junit
132 | junit
133 | 4.13.1
134 | test
135 |
136 |
137 | com.google.code.findbugs
138 | jsr305
139 | 2.0.1
140 |
141 |
142 | com.google.truth
143 | truth
144 | 0.28
145 |
146 |
147 |
148 |
149 | release
150 |
151 |
152 |
153 | maven-source-plugin
154 | 3.0.1
155 |
156 |
157 | attach-sources
158 |
159 | jar
160 |
161 |
162 |
163 |
164 |
165 | org.sonatype.plugins
166 | nexus-staging-maven-plugin
167 | 1.6.3
168 | true
169 |
170 | ossrh
171 | https://oss.sonatype.org/
172 | true
173 |
174 |
175 |
176 | org.apache.maven.plugins
177 | maven-gpg-plugin
178 | 1.6
179 |
180 |
181 | sign-artifacts
182 | verify
183 |
184 | sign
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 | jdk8
194 |
195 | [1.8,)
196 |
197 |
198 |
199 |
200 |
201 | org.apache.maven.plugins
202 | maven-javadoc-plugin
203 |
204 | -Xdoclint:none
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 | org.apache.maven.plugins
213 | maven-javadoc-plugin
214 |
215 | -Xdoclint:none
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
--------------------------------------------------------------------------------
/src/main/java/com/duprasville/guava/probably/AbstractCuckooStrategy.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015 Brian Dupras
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | * in compliance with the License. You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | * or implied. See the License for the specific language governing permissions and limitations under
12 | * the License.
13 | */
14 |
15 | package com.duprasville.guava.probably;
16 |
17 | abstract class AbstractCuckooStrategy implements CuckooStrategy {
18 | AbstractCuckooStrategy(int ordinal) {
19 | this.ordinal = ordinal;
20 | }
21 |
22 | public abstract long index(int hash, long m);
23 |
24 | public abstract long altIndex(long index, int fingerprint, long m);
25 |
26 | protected abstract int pickEntryToKick(int numEntriesPerBucket);
27 |
28 | protected abstract long maxRelocationAttempts();
29 |
30 | private final int ordinal;
31 |
32 | public int ordinal() {
33 | return ordinal;
34 | }
35 |
36 | public boolean addAll(CuckooTable thiz, CuckooTable that) {
37 | for (long index = 0; index < that.numBuckets; index++) {
38 | for (int entry = 0; entry < that.numEntriesPerBucket; entry++) {
39 | int fingerprint = that.readEntry(index, entry);
40 | if (CuckooTable.EMPTY_ENTRY != fingerprint && !(
41 | putEntry(fingerprint, thiz, index) ||
42 | putEntry(fingerprint, thiz,
43 | altIndex(index, fingerprint, thiz.numBuckets)))) {
44 | return false;
45 | }
46 | }
47 | }
48 | return true;
49 | }
50 |
51 | protected boolean putEntry(int fingerprint, CuckooTable table, long index) {
52 | return table.swapAnyEntry(fingerprint, CuckooTable.EMPTY_ENTRY, index)
53 | || putEntry(fingerprint, table, index, 0);
54 | }
55 |
56 |
57 | protected boolean putEntry(int fingerprint, final CuckooTable table, long index, int kick) {
58 | if (maxRelocationAttempts() == kick) {
59 | return false;
60 | }
61 |
62 | int entry = pickEntryToKick(table.numEntriesPerBucket);
63 | int kicked = table.swapEntry(fingerprint, index, entry);
64 |
65 | if ((CuckooTable.EMPTY_ENTRY == kicked)
66 | || putEntry(kicked, table, altIndex(index, kicked, table.numBuckets), kick + 1)) {
67 | return true;
68 | } else {
69 | int kickedBack = table.swapEntry(kicked, index, entry);
70 | assert kickedBack == fingerprint : "Uh oh - couldn't unroll failed attempts to putEntry()";
71 | return false;
72 | }
73 | }
74 |
75 | public boolean equivalent(CuckooTable thiz, CuckooTable that) {
76 | if (!thiz.isCompatible(that)) {
77 | return false;
78 | }
79 |
80 | for (long index = 0; index < that.numBuckets; index++) {
81 | for (int entry = 0; entry < that.numEntriesPerBucket; entry++) {
82 | int fingerprint = that.readEntry(index, entry);
83 | if (CuckooTable.EMPTY_ENTRY == fingerprint) {
84 | continue;
85 | }
86 |
87 | int thizCount = thiz.countEntry(fingerprint, index) +
88 | thiz.countEntry(fingerprint, altIndex(index, fingerprint, thiz.numBuckets));
89 | int thatCount = that.countEntry(fingerprint, index) +
90 | that.countEntry(fingerprint, altIndex(index, fingerprint, that.numBuckets));
91 | if (thizCount != thatCount) {
92 | return false;
93 | }
94 | }
95 | }
96 | return true;
97 | }
98 |
99 | public boolean containsAll(CuckooTable thiz, CuckooTable that) {
100 | if (!thiz.isCompatible(that)) {
101 | return false;
102 | }
103 |
104 | for (long index = 0; index < that.numBuckets; index++) {
105 | for (int entry = 0; entry < that.numEntriesPerBucket; entry++) {
106 | int fingerprint = that.readEntry(index, entry);
107 | if (CuckooTable.EMPTY_ENTRY == fingerprint) {
108 | continue;
109 | }
110 |
111 | int thizCount = thiz.countEntry(fingerprint, index) +
112 | thiz.countEntry(fingerprint, altIndex(index, fingerprint, thiz.numBuckets));
113 | int thatCount = that.countEntry(fingerprint, index) +
114 | that.countEntry(fingerprint, altIndex(index, fingerprint, that.numBuckets));
115 | if (thizCount < thatCount) {
116 | return false;
117 | }
118 | }
119 | }
120 | return true;
121 | }
122 |
123 | public boolean removeAll(CuckooTable thiz, CuckooTable that) {
124 | if (!thiz.isCompatible(that)) {
125 | return false;
126 | }
127 |
128 | for (long index = 0; index < that.numBuckets; index++) {
129 | for (int entry = 0; entry < that.numEntriesPerBucket; entry++) {
130 | int fingerprint = that.readEntry(index, entry);
131 | if (CuckooTable.EMPTY_ENTRY == fingerprint) {
132 | continue;
133 | }
134 |
135 | long altIndex = altIndex(index, fingerprint, thiz.numBuckets);
136 | int thatCount = that.countEntry(fingerprint, index) + that.countEntry(fingerprint, altIndex);
137 |
138 | for (int i = 0; i < thatCount; i++) {
139 | if (!(thiz.swapAnyEntry(CuckooTable.EMPTY_ENTRY, fingerprint, index)
140 | || thiz.swapAnyEntry(CuckooTable.EMPTY_ENTRY, fingerprint, altIndex))) {
141 | return false;
142 | }
143 | }
144 | }
145 | }
146 | return true;
147 | }
148 |
149 | @Override
150 | public boolean equals(Object obj) {
151 | if (obj instanceof CuckooStrategy) {
152 | return ((CuckooStrategy) obj).ordinal() == this.ordinal();
153 | } else {
154 | return super.equals(obj);
155 | }
156 | }
157 |
158 | @Override
159 | public int hashCode() {
160 | return this.ordinal();
161 | }
162 |
163 | @Override
164 | public String toString() {
165 | return this.getClass().getSimpleName() + '{' +
166 | "ordinal=" + this.ordinal() +
167 | '}';
168 | }
169 | }
170 |
--------------------------------------------------------------------------------
/src/main/java/com/duprasville/guava/probably/BloomFilter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015 Brian Dupras
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | * in compliance with the License. You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | * or implied. See the License for the specific language governing permissions and limitations under
12 | * the License.
13 | */
14 |
15 | package com.duprasville.guava.probably;
16 |
17 | import com.google.common.hash.Funnel;
18 | import com.google.common.math.LongMath;
19 |
20 | import java.io.Serializable;
21 | import java.util.Collection;
22 |
23 | import javax.annotation.CheckReturnValue;
24 | import javax.annotation.Nullable;
25 |
26 | import static com.google.common.base.Preconditions.checkArgument;
27 | import static com.google.common.base.Preconditions.checkNotNull;
28 |
29 | /**
30 | * A Bloom filter for instances of {@code E} that implements the {@link ProbabilisticFilter}
31 | * interface.
32 | *
33 | *
A Bloom filter offers an approximate containment test with one-sided
38 | * error: if it claims that an element is contained in it, this might be in error, but if it claims
39 | * that an element is not contained in it, then this is definitely true.
40 | *
41 | *
If you are unfamiliar with Bloom filters, this nice tutorial
42 | * may help you understand how they work.
43 | *
44 | *
The false positive probability ({@code FPP}) of a bloom filter is defined as the probability
45 | * that {@link #contains(Object)} will erroneously return {@code true} for an object that has not
46 | * actually been put in the {@link BloomFilter}.
47 | *
48 | * @param the type of instances that the {@link BloomFilter} accepts.
49 | * @author Brian Dupras
50 | * @author Guava Authors (underlying BloomFilter implementation)
51 | * @see com.google.common.hash.BloomFilter
52 | * @see ProbabilisticFilter
53 | */
54 | public final class BloomFilter implements ProbabilisticFilter, Serializable {
55 | private com.google.common.hash.BloomFilter delegate;
56 | private final Funnel funnel;
57 | private final long capacity;
58 | private final double fpp;
59 | private long size;
60 |
61 | private BloomFilter(com.google.common.hash.BloomFilter delegate, Funnel funnel, long capacity, double fpp, long size) {
62 | super();
63 | checkNotNull(delegate);
64 | checkNotNull(funnel);
65 | checkArgument(capacity >= 0, "capacity must be positive");
66 | checkArgument(fpp >= 0.0 && fpp < 1.0, "fpp must be positive 0.0 <= fpp < 1.0");
67 | checkArgument(size >= 0, "size must be positive");
68 | this.delegate = delegate;
69 | this.funnel = funnel;
70 | this.capacity = capacity;
71 | this.fpp = fpp;
72 | this.size = size;
73 | }
74 |
75 | /**
76 | * Creates a {@link BloomFilter} with the expected number of insertions and expected false
77 | * positive probability.
78 | *
79 | *
Note that overflowing a {@link BloomFilter} with significantly more elements than specified,
80 | * will result in its saturation, and a sharp deterioration of its false positive probability.
81 | *
82 | *
The constructed {@link BloomFilter} will be serializable if the provided {@link Funnel} is.
83 | *
84 | *
It is recommended that the funnel be implemented as a Java enum. This has the benefit of
85 | * ensuring proper serialization and deserialization, which is important since {@link
86 | * #equals(Object)} also relies on object identity of funnels.
87 | *
88 | * @param funnel the funnel of T's that the constructed {@link BloomFilter} will use
89 | * @param capacity the number of expected insertions to the constructed {@link BloomFilter}; must
90 | * be positive
91 | * @param fpp the desired false positive probability (must be positive and less than 1.0)
92 | * @return a {@link BloomFilter}
93 | * @see com.google.common.hash.BloomFilter#create(com.google.common.hash.Funnel, int,
95 | * double)
96 | */
97 | @CheckReturnValue
98 | public static BloomFilter create(Funnel funnel, long capacity, double fpp) {
99 | return new BloomFilter(
100 | com.google.common.hash.BloomFilter.create(funnel, capacity, fpp),
101 | funnel, capacity, fpp, 0L);
102 | }
103 |
104 | /**
105 | * Creates a {@link BloomFilter BloomFilter} with the expected number of insertions and a
106 | * default expected false positive probability of 3%.
107 | *
108 | *
Note that overflowing a {@link BloomFilter} with significantly more objects than specified,
109 | * will result in its saturation, and a sharp deterioration of its false positive probability.
110 | *
111 | *
The constructed {@link BloomFilter} will be serializable if the provided {@code Funnel}
112 | * is.
113 | *
114 | *
It is recommended that the funnel be implemented as a Java enum. This has the benefit of
115 | * ensuring proper serialization and deserialization, which is important since {@link #equals}
116 | * also relies on object identity of funnels.
117 | *
118 | * @param funnel the funnel of T's that the constructed {@link BloomFilter} will use
119 | * @param capacity the number of expected insertions to the constructed {@link BloomFilter}; must
120 | * be positive
121 | * @return a {@link BloomFilter}
122 | * @see com.google.common.hash.BloomFilter#create(com.google.common.hash.Funnel, int)
124 | */
125 | @CheckReturnValue
126 | public static BloomFilter create(Funnel funnel, long capacity) {
127 | return new BloomFilter(
128 | com.google.common.hash.BloomFilter.create(funnel, capacity, 0.03D),
129 | funnel, capacity, 0.03D, 0L);
130 | }
131 |
132 | /**
133 | * Adds the specified element to this filter. A return value of {@code true} ensures that {@link
134 | * #contains(Object)} given {@code e} will also return {@code true}.
135 | *
136 | * @param e element to be added to this filter
137 | * @return always {@code true} as {@code com.google.common.hash.BloomFilter} cannot fail to add an
138 | * object
139 | * @throws NullPointerException if the specified element is null
140 | * @see #contains(Object)
141 | * @see #addAll(Collection)
142 | * @see #addAll(ProbabilisticFilter)
143 | * @see com.google.common.hash.BloomFilter#put(T)
144 | */
145 | public boolean add(E e) {
146 | checkNotNull(e);
147 | delegate.put(e);
148 | size = LongMath.checkedAdd(size, 1L);
149 | return true;
150 | }
151 |
152 | /**
153 | * Combines {@code this} filter with another compatible filter. The mutations happen to {@code
154 | * this} instance. Callers must ensure {@code this} filter is appropriately sized to avoid
155 | * saturating it or running out of space.
156 | *
157 | * @param f filter to be combined into {@code this} filter - {@code f} is not mutated
158 | * @return {@code true} if the operation was successful, {@code false} otherwise
159 | * @throws NullPointerException if the specified filter is null
160 | * @throws IllegalArgumentException if {@link #isCompatible(ProbabilisticFilter)} {@code ==
161 | * false}
162 | * @see #add(Object)
163 | * @see #addAll(Collection)
164 | * @see #contains(Object)
165 | */
166 | public boolean addAll(ProbabilisticFilter f) {
167 | checkNotNull(f);
168 | checkArgument(this != f, "Cannot combine a " + this.getClass().getSimpleName() +
169 | " with itself.");
170 | checkArgument(f instanceof BloomFilter, "Cannot combine a " +
171 | this.getClass().getSimpleName() + " with a " + f.getClass().getSimpleName());
172 | checkArgument(this.isCompatible(f), "Cannot combine incompatible filters. " +
173 | this.getClass().getSimpleName() + " instances must have equivalent funnels; the same " +
174 | "strategy; and the same number of buckets, entries per bucket, and bits per entry.");
175 |
176 | delegate.putAll(((BloomFilter) f).delegate);
177 | size = LongMath.checkedAdd(size, f.sizeLong());
178 | return true;
179 | }
180 |
181 | /**
182 | * Adds all of the elements in the specified collection to this filter. The behavior of this
183 | * operation is undefined if the specified collection is modified while the operation is in
184 | * progress.
185 | *
186 | * @param c collection containing elements to be added to this filter
187 | * @return {@code true} if all elements of the collection were successfully added, {@code false}
188 | * otherwise
189 | * @throws NullPointerException if the specified collection contains a null element, or if the
190 | * specified collection is null
191 | * @see #add(Object)
192 | * @see #addAll(ProbabilisticFilter)
193 | * @see #contains(Object)
194 | */
195 | public boolean addAll(Collection extends E> c) {
196 | checkNotNull(c);
197 | for (E e : c) {
198 | checkNotNull(c);
199 | add(e);
200 | }
201 | return true;
202 | }
203 |
204 | /**
205 | * Returns {@code true} if this filter might contain the specified element, {@code false}
206 | * if this is definitely not the case.
207 | *
208 | * @param e element whose containment in this filter is to be tested
209 | * @return {@code true} if this filter might contain the specified element, {@code false}
210 | * if this is definitely not the case.
211 | * @throws ClassCastException if the type of the specified element is incompatible with this
212 | * filter (optional)
213 | * @throws NullPointerException if the specified element is {@code null} and this filter does not
214 | * permit {@code null} elements
215 | * @see #containsAll(Collection)
216 | * @see #containsAll(ProbabilisticFilter)
217 | * @see #add(Object)
218 | * @see #remove(Object)
219 | * @see com.google.common.hash.BloomFilter#mightContain(T)
220 | */
221 | public boolean contains(E e) {
222 | return delegate.mightContain(e);
223 | }
224 |
225 | /**
226 | * Returns the current false positive probability ({@code FPP}) of this filter.
227 | *
228 | * @return the probability that {@link #contains(Object)} will erroneously return {@code true}
229 | * given an element that has not actually been added to the filter.
230 | * @see #fpp()
231 | * @see com.google.common.hash.BloomFilter#put(T)
232 | */
233 | public double currentFpp() {
234 | return delegate.expectedFpp();
235 | }
236 |
237 | /**
238 | * Returns {@code true} if the specified filter is compatible with {@code this} filter. {@code f}
239 | * is considered compatible if {@code this} filter can use it in combinatoric operations (e.g.
240 | * {@link #addAll(ProbabilisticFilter)}, {@link #containsAll(ProbabilisticFilter)}).
241 | *
242 | * For two bloom filters to be compatible, they must:
243 | *
244 | *
not be the same instance
have the same number of hash functions
have
245 | * the same bit size
have the same strategy
have equal funnels
246 | *
247 | * @param f filter to check for compatibility with {@code this} filter
248 | * @return {@code true} if the specified filter is compatible with {@code this} filter
249 | * @throws NullPointerException if the specified filter is {@code null}
250 | * @see #addAll(ProbabilisticFilter)
251 | * @see #containsAll(ProbabilisticFilter)
252 | * @see #removeAll(ProbabilisticFilter)
253 | * @see com.google.common.hash.BloomFilter#isCompatible(com.google.common.hash.BloomFilter)
254 | */
255 | public boolean isCompatible(ProbabilisticFilter f) {
256 | checkNotNull(f);
257 | return (f instanceof BloomFilter) &&
258 | this.delegate.isCompatible(((BloomFilter) f).delegate);
259 | }
260 |
261 | /**
262 | * Returns {@code true} if this filter might contain all of the elements of the specified
263 | * collection (optional operation). More formally, returns {@code true} if {@link
264 | * #contains(Object)} {@code == true} for all of the elements of the specified collection.
265 | *
266 | * @param c collection containing elements to be checked for containment in this filter
267 | * @return {@code true} if this filter might contain all elements of the specified
268 | * collection
269 | * @throws NullPointerException if the specified collection contains one or more {@code null}
270 | * elements, or if the specified collection is {@code null}
271 | * @see #contains(Object)
272 | * @see #containsAll(ProbabilisticFilter)
273 | */
274 | public boolean containsAll(Collection extends E> c) {
275 | checkNotNull(c);
276 | for (E e : c) {
277 | checkNotNull(e);
278 | if (!contains(e)) return false;
279 | }
280 | return true;
281 | }
282 |
283 | /**
284 | * Not supported.
285 | *
286 | * @throws UnsupportedOperationException
287 | */
288 | public boolean containsAll(ProbabilisticFilter f) {
289 | throw new UnsupportedOperationException();
290 | }
291 |
292 | /**
293 | * Returns {@code true} if this filter contains no elements.
294 | *
295 | * @return {@code true} if this filter contains no elements
296 | * @see #sizeLong()
297 | */
298 | public boolean isEmpty() {
299 | return 0 == this.sizeLong();
300 | }
301 |
302 | /**
303 | * Returns the number of elements contained in this filter (its cardinality). If this filter
304 | * contains more than {@code Long.MAX_VALUE} elements, returns {@code Long.MAX_VALUE}.
305 | *
306 | * @return the number of elements contained in this filter (its cardinality)
307 | * @see #capacity()
308 | * @see #isEmpty()
309 | */
310 | public long sizeLong() {
311 | return size >= 0 ? size : Long.MAX_VALUE /* overflow */;
312 | }
313 |
314 | /**
315 | * Returns the number of elements contained in this filter (its cardinality). If this filter
316 | * contains more than {@code Integer.MAX_VALUE} elements, returns {@code Integer.MAX_VALUE}.
317 | *
318 | * @return the number of elements contained in this filter (its cardinality)
319 | * @see #capacity()
320 | * @see #isEmpty()
321 | * @see #sizeLong()
322 | */
323 | public long size() {
324 | return size > Integer.MAX_VALUE ? Integer.MAX_VALUE : size;
325 | }
326 |
327 | /**
328 | * Returns the number of elements this filter can represent at its requested {@code FPP}. This is
329 | * not be a hard limit of the filter implementation. It is permissible for a filter to contain
330 | * more elements than its requested capacity, though its {@code FPP} will suffer.
331 | *
332 | * @return the number of elements this filter can represent at its requested {@code FPP}.
333 | * @see #fpp()
334 | * @see #currentFpp()
335 | * @see #sizeLong()
336 | */
337 | public long capacity() {
338 | return capacity;
339 | }
340 |
341 | /**
342 | * Returns the intended {@code FPP} limit of this filter. This is not a hard limit of the filter
343 | * implementation. It is permissible for a filter's {@code FPP} to degrade (e.g. via saturation)
344 | * beyond its intended limit.
345 | *
346 | * @return the intended {@code FPP} limit of this filter.
347 | * @see #currentFpp()
348 | */
349 | public double fpp() {
350 | return fpp;
351 | }
352 |
353 | /**
354 | * Creates a new {@link BloomFilter} that's a copy of this instance. The returned instance {@code
355 | * equals(f) == true} but shares no mutable state.
356 | */
357 | public static BloomFilter copyOf(BloomFilter f) {
358 | return new BloomFilter(f.delegate.copy(), f.funnel, f.capacity(), f.fpp(), f.sizeLong());
359 | }
360 |
361 | /**
362 | * Removes all of the elements from this filter. The filter will be empty after this call
363 | * returns.
364 | *
365 | * @see #sizeLong()
366 | * @see #isEmpty()
367 | */
368 | public void clear() {
369 | this.delegate = com.google.common.hash.BloomFilter.create(funnel, (int) capacity, fpp);
370 | this.size = 0L;
371 | }
372 |
373 | /**
374 | * Not supported. Standard bloom filters do not support element removal.
375 | *
376 | * @throws UnsupportedOperationException
377 | */
378 | public boolean remove(E e) {
379 | throw new UnsupportedOperationException();
380 | }
381 |
382 | /**
383 | * Not supported. Standard bloom filters do not support element removal.
384 | *
385 | * @throws UnsupportedOperationException
386 | */
387 | public boolean removeAll(Collection extends E> c) {
388 | throw new UnsupportedOperationException();
389 | }
390 |
391 | /**
392 | * Not supported. Standard bloom filters do not support element removal.
393 | *
394 | * @throws UnsupportedOperationException
395 | */
396 | public boolean removeAll(ProbabilisticFilter f) {
397 | throw new UnsupportedOperationException();
398 | }
399 |
400 | @Override
401 | public boolean equals(@Nullable Object object) {
402 | if (object instanceof com.google.common.hash.BloomFilter) {
403 | //noinspection ConstantConditions
404 | return delegate.equals(((BloomFilter) object).delegate);
405 | } else {
406 | return delegate.equals(object);
407 | }
408 | }
409 |
410 | @Override
411 | public int hashCode() {
412 | return delegate.hashCode();
413 | }
414 | }
--------------------------------------------------------------------------------
/src/main/java/com/duprasville/guava/probably/CuckooFilter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015 Brian Dupras
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | * in compliance with the License. You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | * or implied. See the License for the specific language governing permissions and limitations under
12 | * the License.
13 | */
14 |
15 | package com.duprasville.guava.probably;
16 |
17 | import com.google.common.annotations.Beta;
18 | import com.google.common.annotations.VisibleForTesting;
19 | import com.google.common.base.Objects;
20 | import com.google.common.hash.Funnel;
21 | import com.google.common.primitives.SignedBytes;
22 |
23 | import java.io.DataInputStream;
24 | import java.io.DataOutputStream;
25 | import java.io.IOException;
26 | import java.io.InputStream;
27 | import java.io.OutputStream;
28 | import java.io.Serializable;
29 | import java.util.Collection;
30 | import java.util.Random;
31 |
32 | import javax.annotation.CheckReturnValue;
33 | import javax.annotation.Nullable;
34 |
35 | import static com.google.common.base.Preconditions.checkArgument;
36 | import static com.google.common.base.Preconditions.checkNotNull;
37 | import static com.google.common.math.DoubleMath.log2;
38 | import static com.google.common.math.LongMath.divide;
39 | import static java.lang.Math.ceil;
40 | import static java.lang.Math.pow;
41 | import static java.math.RoundingMode.CEILING;
42 | import static java.math.RoundingMode.HALF_DOWN;
43 |
44 | /**
45 | * A Cuckoo filter for instances of {@code E} that implements the {@link ProbabilisticFilter}
46 | * interface.
47 | *
48 | *
"Cuckoo filters can replace Bloom filters for approximate set membership tests.
49 | * Cuckoo filters support adding and removing items dynamically while achieving even higher
50 | * performance than Bloom filters. For applications that store many items and target moderately low
51 | * false positive rates, cuckoo filters have lower space overhead than space-optimized Bloom
52 | * filters. Cuckoo filters outperform previous data structures that extend Bloom filters to support
53 | * deletions substantially in both time and space." - Fan, et. al.
54 | *
55 | *
Cuckoo filters offer constant time performance for the basic operations {@link #add(Object)},
56 | * {@link #remove(Object)}, {@link #contains(Object)} and {@link #sizeLong()}.
57 | *
58 | *
This class does not permit {@code null} elements.
59 | *
60 | *
Cuckoo filters implement the {@link Serializable} interface. They also support a more compact
61 | * serial representation via the {@link #writeTo(OutputStream)} and {@link #readFrom(InputStream,
62 | * Funnel)} methods. Both serialized forms will continue to be supported by future versions of this
63 | * library. However, serial forms generated by newer versions of the code may not be readable by
64 | * older versions of the code (e.g., a serialized cuckoo filter generated today may not be
65 | * readable by a binary that was compiled 6 months ago).
70 | *
71 | * @param the type of elements that this filter accepts
72 | * @author Brian Dupras
73 | * @author Alex Beal
74 | * @see ProbabilisticFilter
75 | */
76 | @Beta
77 | public final class CuckooFilter implements ProbabilisticFilter, Serializable {
78 | static final int MAX_ENTRIES_PER_BUCKET = 8;
79 | static final int MIN_ENTRIES_PER_BUCKET = 2;
80 |
81 | /**
82 | * Minimum false positive probability supported, 8.67E-19.
83 | *
84 | * CuckooFilter § 5.1 Eq. (6), "f ≥ log2(2b/e) = [log2(1/e) + log2(2b)]"
85 | * (b) entries per bucket: 8 at e <= 0.00001
86 | * (f) bits per entry: 64-bits max
87 | * (e) false positive probability
88 | *
89 | * 64 = log2(16/e) = [log2(1/e) + log2(16)]
90 | * 64 = log2(1/e) + 4
91 | * 60 = log2(1/e)
92 | * 2^60 = 1/e
93 | * e = 1/2^60
94 | * e = 8.673617379884035E-19
95 | */
96 | static double MIN_FPP = 1.0D / pow(2, 60);
97 |
98 | /**
99 | * Maximum false positive probability supported, 0.99.
100 | */
101 | static double MAX_FPP = 0.99D;
102 |
103 | private final CuckooTable table;
104 | private final Funnel super E> funnel;
105 | private final CuckooStrategy cuckooStrategy;
106 | private final double fpp;
107 |
108 | /**
109 | * Creates a CuckooFilter.
110 | */
111 | private CuckooFilter(
112 | CuckooTable table, Funnel super E> funnel, CuckooStrategy cuckooStrategy, double fpp) {
113 | this.fpp = fpp;
114 | this.table = checkNotNull(table);
115 | this.funnel = checkNotNull(funnel);
116 | this.cuckooStrategy = checkNotNull(cuckooStrategy);
117 | }
118 |
119 | /**
120 | * Returns a new {@link CuckooFilter} that's a copy of this instance. The new instance is equal to
121 | * this instance but shares no mutable state.
122 | */
123 | @CheckReturnValue
124 | public CuckooFilter copy() {
125 | return new CuckooFilter(table.copy(), funnel, cuckooStrategy, fpp);
126 | }
127 |
128 | /**
129 | * Returns {@code true} if this filter might contain the specified element, {@code false}
130 | * if this is definitely not the case.
131 | *
132 | * @param e element whose containment in this filter is to be tested
133 | * @return {@code true} if this filter might contain the specified element, {@code false}
134 | * if this is definitely not the case.
135 | * @throws NullPointerException if the specified element is {@code null} and this filter does not
136 | * permit {@code null} elements
137 | * @see #containsAll(Collection)
138 | * @see #containsAll(ProbabilisticFilter)
139 | * @see #add(Object)
140 | * @see #remove(Object)
141 | */
142 | @CheckReturnValue
143 | public boolean contains(E e) {
144 | checkNotNull(e);
145 | return cuckooStrategy.contains(e, funnel, table);
146 | }
147 |
148 | /**
149 | * Returns {@code true} if this filter might contain all of the elements of the specified
150 | * collection. More formally, returns {@code true} if {@link #contains(Object)} {@code == true}
151 | * for all of the elements of the specified collection.
152 | *
153 | * @param c collection containing elements to be checked for containment in this filter
154 | * @return {@code true} if this filter might contain all elements of the specified
155 | * collection
156 | * @throws NullPointerException if the specified collection contains one or more {@code null}
157 | * elements, or if the specified collection is {@code null}
158 | * @see #contains(Object)
159 | * @see #containsAll(ProbabilisticFilter)
160 | */
161 | public boolean containsAll(Collection extends E> c) {
162 | checkNotNull(c);
163 | for (E e : c) {
164 | checkNotNull(e);
165 | if (!contains(e)) return false;
166 | }
167 | return true;
168 | }
169 |
170 | /**
171 | * Returns {@code true} if this filter might contain all elements contained in the
172 | * specified filter. {@link #isCompatible(ProbabilisticFilter)} must return {@code true} for the
173 | * given filter.
174 | *
175 | * @param f cuckoo filter containing elements to be checked for probable containment in this
176 | * filter
177 | * @return {@code true} if this filter might contain all elements contained in the
178 | * specified filter, {@code false} if this is definitely not the case.
179 | * @throws NullPointerException if the specified filter is {@code null}
180 | * @throws IllegalArgumentException if {@link #isCompatible(ProbabilisticFilter)} {@code == false}
181 | * given {@code f}
182 | * @see #contains(Object)
183 | * @see #containsAll(Collection)
184 | */
185 | public boolean containsAll(ProbabilisticFilter f) {
186 | checkNotNull(f);
187 | if (this == f) {
188 | return true;
189 | }
190 | checkCompatibility(f, "compare");
191 | return this.cuckooStrategy.containsAll(this.table, ((CuckooFilter) f).table);
192 | }
193 |
194 | /**
195 | * Adds the specified element to this filter. Returns {@code true} if {@code e} was successfully
196 | * added to the filter, {@code false} if this is definitely not the case, as would be the
197 | * case when the filter becomes saturated. Saturation may occur even if {@link #sizeLong()} {@code
198 | * < } {@link #capacity()}, e.g. if {@code e} has already been added {@code 2*b} times to the
199 | * cuckoo filter, it will have saturated the number of entries per bucket ({@code b}) allocated
200 | * within the filter and a subsequent invocation will return {@code false}. A return value of
201 | * {@code true} ensures that {@link #contains(Object)} given {@code e} will also return {@code
202 | * true}.
203 | *
204 | * @param e element to be added to this filter
205 | * @return {@code true} if {@code e} was successfully added to the filter, {@code false} if this
206 | * is definitely not the case
207 | * @throws NullPointerException if the specified element is {@code null}
208 | * @todo consider exposing {@code b} as maxEntriesPerElement()?
209 | * @see #contains(Object)
210 | * @see #addAll(Collection)
211 | * @see #addAll(ProbabilisticFilter)
212 | */
213 | @CheckReturnValue
214 | public boolean add(E e) {
215 | checkNotNull(e);
216 | return cuckooStrategy.add(e, funnel, table);
217 | }
218 |
219 | /**
220 | * Combines {@code this} filter with another compatible filter. The mutations happen to {@code
221 | * this} instance. Callers must ensure {@code this} filter is appropriately sized to avoid
222 | * saturating it or running out of space.
223 | *
224 | * @param f filter to be combined into {@code this} filter - {@code f} is not mutated
225 | * @return {@code true} if the operation was successful, {@code false} otherwise
226 | * @throws NullPointerException if the specified filter is {@code null}
227 | * @throws IllegalArgumentException if {@link #isCompatible(ProbabilisticFilter)} {@code ==
228 | * false}
229 | * @see #add(Object)
230 | * @see #addAll(Collection)
231 | * @see #contains(Object)
232 | */
233 | @CheckReturnValue
234 | public boolean addAll(ProbabilisticFilter f) {
235 | checkNotNull(f);
236 | checkArgument(this != f, "Cannot combine a " + this.getClass().getSimpleName() +
237 | " with itself.");
238 | checkCompatibility(f, "combine");
239 | return this.cuckooStrategy.addAll(this.table, ((CuckooFilter) f).table);
240 | }
241 |
242 | /**
243 | * Adds all of the elements in the specified collection to this filter. The behavior of this
244 | * operation is undefined if the specified collection is modified while the operation is in
245 | * progress. Some elements of {@code c} may have been added to the filter even when {@code false}
246 | * is returned. In this case, the caller may {@link #remove(Object)} the additions by comparing
247 | * the filter {@link #sizeLong()} before and after the invocation, knowing that additions from
248 | * {@code c} occurred in {@code c}'s iteration order.
249 | *
250 | * @param c collection containing elements to be added to this filter
251 | * @return {@code true} if all elements of the collection were successfully added, {@code false}
252 | * otherwise
253 | * @throws NullPointerException if the specified collection contains a {@code null} element, or if
254 | * the specified collection is {@code null}
255 | * @see #add(Object)
256 | * @see #addAll(ProbabilisticFilter)
257 | * @see #contains(Object)
258 | */
259 | public boolean addAll(Collection extends E> c) {
260 | checkNotNull(c);
261 | for (E e : c) {
262 | checkNotNull(e);
263 | if (!add(e)) {
264 | return false;
265 | }
266 | }
267 | return true;
268 | }
269 |
270 | /**
271 | * Removes all of the elements from this filter. The filter will be empty after this call
272 | * returns.
273 | *
274 | * @see #sizeLong()
275 | * @see #isEmpty()
276 | */
277 | public void clear() {
278 | table.clear();
279 | }
280 |
281 |
282 | /**
283 | * Removes the specified element from this filter. The element must be contained in the filter
284 | * prior to invocation. If {@code false} is returned, this is definitely an indication that
285 | * the specified element wasn't contained in the filter prior to invocation. This condition is an
286 | * error, and this filter can no longer be relied upon to return correct {@code false} responses
287 | * from {@link #contains(Object)}, unless {@link #isEmpty()} is also {@code true}.
288 | *
289 | * @param e element to be removed from this filter
290 | * @return {@code true} if this filter probably contained the specified element, {@code false}
291 | * otherwise
292 | * @throws NullPointerException if the specified element is {@code null} and this filter does not
293 | * permit {@code null} elements
294 | * @see #contains(Object)
295 | * @see #removeAll(Collection)
296 | * @see #removeAll(ProbabilisticFilter)
297 | */
298 | @CheckReturnValue
299 | public boolean remove(E e) {
300 | checkNotNull(e);
301 | return cuckooStrategy.remove(e, funnel, table);
302 | }
303 |
304 |
305 | /**
306 | * Removes from this filter all of its elements that are contained in the specified collection.
307 | * All element contained in the specified collection must be contained in the filter prior to
308 | * invocation.
309 | *
310 | * If {@code false} is returned, this is definitely an indication that the specified
311 | * collection contained elements that were not contained in this filter prior to invocation, and
312 | * this filter can no longer be relied upon to return correct {@code false} responses from {@link
313 | * #contains(Object)}, unless {@link #isEmpty()} is also {@code true}.
314 | *
315 | * Some elements of {@code c} may have been removed from the filter even when {@code false} is
316 | * returned. In this case, the caller may {@link #add(Object)} the additions by comparing the
317 | * filter {@link #sizeLong()} before and after the invocation, knowing that removals from {@code
318 | * c} occurred in {@code c}'s iteration order.
319 | *
320 | * @param c collection containing elements to be removed from this filter
321 | * @return {@code true} if all of the elements of the specified collection were successfully
322 | * removed from the filter, {@code false} if any of the elements was not successfully removed
323 | * @throws NullPointerException if the specified collection contains one or more {@code null}
324 | * elements, or if the specified collection is {@code null}
325 | * @see #contains(Object)
326 | * @see #remove(Object)
327 | * @see #removeAll(ProbabilisticFilter)
328 | */
329 | @CheckReturnValue
330 | public boolean removeAll(Collection extends E> c) {
331 | checkNotNull(c);
332 | for (E e : c) {
333 | checkNotNull(e);
334 | if (!remove(e)) {
335 | return false;
336 | }
337 | }
338 | return true;
339 | }
340 |
341 | /**
342 | * Subtracts the specified filter from {@code this} filter. The mutations happen to {@code this}
343 | * instance. Callers must ensure that the specified filter represents elements that are currently
344 | * contained in {@code this} filter.
345 | *
346 | * If {@code false} is returned, this is definitely an indication that the specified filter
347 | * contained elements that were not contained in this filter prior to invocation and this filter
348 | * can no longer be relied upon to return correct {@code false} responses from {@link
349 | * #contains(Object)}, unless {@link #isEmpty()} is also {@code true}.
350 | *
351 | * @param f filter containing elements to remove from {@code this} filter - {@code f} is not
352 | * mutated
353 | * @return {@code true} if the operation was successful, {@code false} otherwise
354 | * @throws NullPointerException if the specified filter is null
355 | * @throws IllegalArgumentException if {@link #isCompatible(ProbabilisticFilter)} {@code == false}
356 | * given {@code f}
357 | * @see #contains(Object)
358 | * @see #remove(Object)
359 | * @see #removeAll(Collection)
360 | */
361 | @CheckReturnValue
362 | public boolean removeAll(ProbabilisticFilter f) {
363 | checkNotNull(f);
364 | if (this == f) {
365 | clear();
366 | return true;
367 | }
368 | checkCompatibility(f, "remove");
369 | return this.cuckooStrategy.removeAll(this.table, ((CuckooFilter) f).table);
370 | }
371 |
372 | /**
373 | * Returns the number of elements contained in this filter (its cardinality). If this filter
374 | * contains more than {@code Long.MAX_VALUE} elements, returns {@code Long.MAX_VALUE}.
375 | *
376 | * @return the number of elements contained in this filter (its cardinality)
377 | * @see #capacity()
378 | * @see #isEmpty()
379 | * @see #size()
380 | */
381 | public long sizeLong() {
382 | return table.size();
383 | }
384 |
385 | /**
386 | * Returns the number of elements contained in this filter (its cardinality). If this filter
387 | * contains more than {@code Integer.MAX_VALUE} elements, returns {@code Integer.MAX_VALUE}.
388 | *
389 | * @return the number of elements contained in this filter (its cardinality)
390 | * @see #capacity()
391 | * @see #isEmpty()
392 | * @see #sizeLong()
393 | */
394 | public long size() {
395 | final long ret = sizeLong();
396 | return ret > Integer.MAX_VALUE ? Integer.MAX_VALUE : ret;
397 | }
398 |
399 | /**
400 | * Returns the number of elements this filter can represent at its requested {@code FPP}. It's
401 | * sometimes possible to add more elements to a cuckoo filter than its capacity since the load
402 | * factor used to calculate its optimal storage size is less than 100%.
403 | *
404 | * @return the number of elements this filter can represent at its requested {@code FPP}.
405 | * @see #fpp()
406 | * @see #currentFpp()
407 | * @see #sizeLong()
408 | * @see #optimalLoadFactor(int)
409 | */
410 | public long capacity() {
411 | return (long) Math.floor(table.capacity() * optimalLoadFactor(table.numEntriesPerBucket()));
412 | }
413 |
414 | /**
415 | * Returns the approximate {@code FPP} limit of this filter. This is not a hard limit, however a
416 | * cuckoo filter will not exceed its {@code FPP} by a significant amount as the filter becomes
417 | * saturated.
418 | *
419 | * @return the intended {@code FPP} limit of this filter.
420 | * @see #currentFpp()
421 | */
422 | public double fpp() {
423 | return table.fppAtGivenLoad(optimalLoadFactor(table.numEntriesPerBucket()));
424 | }
425 |
426 | /**
427 | * Returns the current false positive probability ({@code FPP}) of this filter.
428 | *
429 | * @return the probability that {@link #contains(Object)} will erroneously return {@code true}
430 | * given an element that has not actually been added to the filter. Unlike a bloom filter, a
431 | * cuckoo filter cannot become saturated to the point of significantly degrading its {@code FPP}.
432 | * @see CuckooFilter#fpp()
433 | */
434 | public double currentFpp() {
435 | return table.currentFpp();
436 | }
437 |
438 | /**
439 | * Returns {@code true} if this filter contains no elements.
440 | *
441 | * @return {@code true} if this filter contains no elements
442 | * @see #sizeLong()
443 | */
444 | public boolean isEmpty() {
445 | return 0 == sizeLong();
446 | }
447 |
448 | /**
449 | * Returns {@code true} if {@code f} is compatible with {@code this} filter. {@code f} is
450 | * considered compatible if {@code this} filter can use it in combinatoric operations (e.g. {@link
451 | * #addAll(ProbabilisticFilter)}).
452 | *
453 | * @param f The filter to check for compatibility.
454 | * @return {@code true} if {@code f} is compatible with {@code this} filter.
455 | */
456 | public boolean isCompatible(ProbabilisticFilter f) {
457 | checkNotNull(f);
458 |
459 | return (this != f)
460 | && (f instanceof CuckooFilter)
461 | && (this.table.isCompatible(((CuckooFilter) f).table))
462 | && (this.cuckooStrategy.equals(((CuckooFilter) f).cuckooStrategy))
463 | && (this.funnel.equals(((CuckooFilter) f).funnel));
464 | }
465 |
466 | @Override
467 | public boolean equals(@Nullable Object object) {
468 | if (object == this) {
469 | return true;
470 | }
471 | if (object instanceof CuckooFilter) {
472 | CuckooFilter> that = (CuckooFilter>) object;
473 | return this.funnel.equals(that.funnel)
474 | && this.cuckooStrategy.equals(that.cuckooStrategy)
475 | && this.table.equals(that.table)
476 | && this.cuckooStrategy.equivalent(this.table, that.table)
477 | ;
478 | }
479 | return false;
480 | }
481 |
482 | @Override
483 | public int hashCode() {
484 | return Objects.hashCode(funnel, cuckooStrategy, table);
485 | }
486 |
487 | /**
488 | * Creates a filter with the expected number of insertions and expected false positive
489 | * probability.
Note that overflowing a {@link CuckooFilter} with significantly more
490 | * objects than specified, will result in its saturation causing {@link #add(Object)} to reject
491 | * new additions.
The constructed {@link CuckooFilter} will be serializable if the
492 | * provided {@code Funnel} is.
It is recommended that the funnel be implemented as a
493 | * Java enum. This has the benefit of ensuring proper serialization and deserialization, which is
494 | * important since {@link #equals} also relies on object identity of funnels.
495 | *
496 | * @param funnel the funnel of T's that the constructed {@link CuckooFilter} will use
497 | * @param capacity the number of expected insertions to the constructed {@link CuckooFilter}; must
498 | * be positive
499 | * @param fpp the desired false positive probability (must be positive and less than 1.0).
500 | * @return a {@link CuckooFilter}
501 | */
502 | @CheckReturnValue
503 | public static CuckooFilter create(
504 | Funnel super T> funnel, long capacity, double fpp) {
505 | return create(funnel, capacity, fpp,
506 | CuckooStrategies.MURMUR128_BEALDUPRAS_32.strategy());
507 | }
508 |
509 | @VisibleForTesting
510 | static CuckooFilter create(Funnel super T> funnel, long capacity, double fpp,
511 | CuckooStrategy cuckooStrategy) {
512 | checkNotNull(funnel);
513 | checkArgument(capacity > 0, "Expected insertions (%s) must be > 0", capacity);
514 | checkArgument(fpp > 0.0D, "False positive probability (%s) must be > 0.0", fpp);
515 | checkArgument(fpp < 1.0D, "False positive probability (%s) must be < 1.0", fpp);
516 | checkNotNull(cuckooStrategy);
517 |
518 | int numEntriesPerBucket = optimalEntriesPerBucket(fpp);
519 | long numBuckets = optimalNumberOfBuckets(capacity, numEntriesPerBucket);
520 | int numBitsPerEntry = optimalBitsPerEntry(fpp, numEntriesPerBucket);
521 |
522 | try {
523 | return new CuckooFilter(new CuckooTable(numBuckets,
524 | numEntriesPerBucket, numBitsPerEntry), funnel, cuckooStrategy, fpp);
525 | } catch (IllegalArgumentException e) {
526 | throw new IllegalArgumentException("Could not create CuckooFilter of " + numBuckets +
527 | " buckets, " + numEntriesPerBucket + " entries per bucket, " + numBitsPerEntry +
528 | " bits per entry", e);
529 | }
530 | }
531 |
532 | /**
533 | * Creates a filter with the expected number of insertions and a default expected false positive
534 | * probability of 3.2%.
Note that overflowing a {@code CuckooFilter} with significantly
535 | * more objects than specified, will result in its saturation causing {@link #add(Object)} to
536 | * reject new additions.
The constructed {@link CuckooFilter} will be serializable if the
537 | * provided {@code Funnel} is.
It is recommended that the funnel be implemented as a
538 | * Java enum. This has the benefit of ensuring proper serialization and deserialization, which is
539 | * important since {@link #equals} also relies on object identity of funnels.
540 | *
541 | * @param funnel the funnel of T's that the constructed {@link CuckooFilter} will use
542 | * @param capacity the number of expected insertions to the constructed {@link CuckooFilter}; must
543 | * be positive
544 | * @return a {@link CuckooFilter}
545 | */
546 | @CheckReturnValue
547 | public static CuckooFilter create(Funnel super T> funnel, long capacity) {
548 | return create(funnel, capacity, 0.032D);
549 | }
550 |
551 | /*
552 | * Space optimization cheat sheet, per CuckooFilter § 5.1 :
553 | *
554 | * Given:
555 | * n: expected insertions
556 | * e: expected false positive probability (e.g. 0.03D for 3% fpp)
557 | *
558 | * Choose:
559 | * b: bucket size in entries (2, 4, 8)
560 | * a: load factor (proportional to b)
561 | *
562 | * Calculate:
563 | * f: fingerprint size in bits
564 | * m: table size in buckets
565 | *
566 | *
567 | * 1) Choose b = 8 | 4 | 2
568 | * when e : 0.00001 < e ≤ 0.002
569 | * ref: CuckooFilter § 5.1 ¶ 5, "Optimal bucket size"
570 | *
571 | * 2) Choose a = 50% | 84% | 95.5% | 98%
572 | * when b = 1 | 2 | 4 | 8
573 | * ref: CuckooFilter § 5.1 ¶ 2, "(1) Larger buckets improve table occupancy"
574 | *
575 | * 2) Optimal f = ceil( log2(2b/e) )
576 | * ref: CuckooFilter § 5.1 Eq. (6), "f ≥ log2(2b/e) = [log2(1/e) + log2(2b)]"
577 | *
578 | * 3) Required m = evenCeil( ceiling( ceiling( n/a ) / b ) )
579 | * Minimum entries (B) = n/a rounded up
580 | * Minimum buckets (m) = B/b rounded up to an even number
581 | */
582 |
583 | /**
584 | * Returns the optimal number of entries per bucket, or bucket size, ({@code b}) given the
585 | * expected false positive probability ({@code e}).
586 | *
587 | * CuckooFilter § 5.1 ¶ 5, "Optimal bucket size"
588 | *
589 | * @param e the desired false positive probability (must be positive and less than 1.0)
590 | * @return optimal number of entries per bucket
591 | */
592 | @VisibleForTesting
593 | static int optimalEntriesPerBucket(double e) {
594 | checkArgument(e > 0.0D, "e must be > 0.0");
595 | if (e <= 0.00001) {
596 | return MAX_ENTRIES_PER_BUCKET;
597 | } else if (e <= 0.002) {
598 | return MAX_ENTRIES_PER_BUCKET / 2;
599 | } else {
600 | return MIN_ENTRIES_PER_BUCKET;
601 | }
602 | }
603 |
604 | /**
605 | * Returns the optimal load factor ({@code a}) given the number of entries per bucket ({@code
606 | * b}).
607 | *
608 | * CuckooFilter § 5.1 ¶ 2, "(1) Larger buckets improve table occupancy"
609 | *
610 | * @param b number of entries per bucket
611 | * @return load factor, positive and less than 1.0
612 | */
613 | @VisibleForTesting
614 | static double optimalLoadFactor(int b) {
615 | checkArgument(b == 2 || b == 4 || b == 8, "b must be 2, 4, or 8");
616 | if (b == 2) {
617 | return 0.84D;
618 | } else if (b == 4) {
619 | return 0.955D;
620 | } else {
621 | return 0.98D;
622 | }
623 | }
624 |
625 | /**
626 | * Returns the optimal number of bits per entry ({@code f}) given the false positive probability
627 | * ({@code e}) and the number of entries per bucket ({@code b}).
628 | *
629 | * CuckooFilter § 5.1 Eq. (6), "f ≥ log2(2b/e) = [log2(1/e) + log2(2b)]"
630 | *
631 | * @param e the desired false positive probability (must be positive and less than 1.0)
632 | * @param b number of entries per bucket
633 | * @return number of bits per entry
634 | */
635 | @VisibleForTesting
636 | static int optimalBitsPerEntry(double e, int b) {
637 | checkArgument(e >= MIN_FPP, "Cannot create CuckooFilter with FPP[" + e +
638 | "] < CuckooFilter.MIN_FPP[" + CuckooFilter.MIN_FPP + "]");
639 | return log2(2 * b / e, HALF_DOWN);
640 | }
641 |
642 | /**
643 | * Returns the minimal required number of buckets given the expected insertions {@code n}, and the
644 | * number of entries per bucket ({@code b}).
645 | *
646 | * @param n the number of expected insertions
647 | * @param b number of entries per bucket
648 | * @return number of buckets
649 | */
650 | @VisibleForTesting
651 | static long optimalNumberOfBuckets(long n, int b) {
652 | checkArgument(n > 0, "n must be > 0");
653 | return evenCeil(divide((long) ceil(n / optimalLoadFactor(b)), b, CEILING));
654 | }
655 |
656 | static long evenCeil(long n) {
657 | return (n + 1) / 2 * 2;
658 | }
659 |
660 | private Object writeReplace() {
661 | return new SerialForm(this);
662 | }
663 |
664 | /**
665 | * Returns the size in bits of the underlying cuckoo table data structure.
666 | */
667 | @VisibleForTesting
668 | long bitSize() {
669 | return table.bitSize();
670 | }
671 |
672 | private static class SerialForm implements Serializable {
673 | final long[] data;
674 | final long size;
675 | final long checksum;
676 | final long numBuckets;
677 | final int numEntriesPerBucket;
678 | final int numBitsPerEntry;
679 | final Funnel super T> funnel;
680 | final CuckooStrategy cuckooStrategy;
681 | final double fpp;
682 |
683 | SerialForm(CuckooFilter filter) {
684 | this.data = filter.table.data();
685 | this.numBuckets = filter.table.numBuckets();
686 | this.numEntriesPerBucket = filter.table.numEntriesPerBucket();
687 | this.numBitsPerEntry = filter.table.numBitsPerEntry();
688 | this.size = filter.table.size();
689 | this.checksum = filter.table.checksum();
690 | this.funnel = filter.funnel;
691 | this.cuckooStrategy = filter.cuckooStrategy;
692 | this.fpp = filter.fpp;
693 | }
694 |
695 | Object readResolve() {
696 | return new CuckooFilter(
697 | new CuckooTable(data, size, checksum, numBuckets, numEntriesPerBucket, numBitsPerEntry),
698 | funnel, cuckooStrategy, fpp);
699 | }
700 |
701 | private static final long serialVersionUID = 1;
702 | }
703 |
704 | /**
705 | * Writes this cuckoo filter to an output stream, with a custom format (not Java serialization).
706 | * This has been measured to save at least 400 bytes compared to regular serialization.
707 | *
708 | * Use {@link #readFrom(InputStream, Funnel)} to reconstruct the written CuckooFilter.
709 | */
710 | public void writeTo(OutputStream out) throws IOException {
711 | /*
712 | * Serial form:
713 | * 1 signed byte for the strategy
714 | * 1 IEEE 754 floating-point double, the expected FPP
715 | * 1 big endian long, the number of entries
716 | * 1 big endian long, the checksum of entries
717 | * 1 big endian long for the number of buckets
718 | * 1 big endian int for the number of entries per bucket
719 | * 1 big endian int for the fingerprint size in bits
720 | * 1 big endian int, the number of longs in the filter table's data
721 | * N big endian longs of the filter table's data
722 | */
723 | DataOutputStream dout = new DataOutputStream(out);
724 | dout.writeByte(SignedBytes.checkedCast(cuckooStrategy.ordinal()));
725 | dout.writeDouble(fpp);
726 | dout.writeLong(table.size());
727 | dout.writeLong(table.checksum());
728 | dout.writeLong(table.numBuckets());
729 | dout.writeInt(table.numEntriesPerBucket());
730 | dout.writeInt(table.numBitsPerEntry());
731 | dout.writeInt(table.data().length);
732 |
733 | for (long value : table.data()) {
734 | dout.writeLong(value);
735 | }
736 | }
737 |
738 | /**
739 | * Reads a byte stream, which was written by {@link #writeTo(OutputStream)}, into a {@link
740 | * CuckooFilter}. The {@code Funnel} to be used is not encoded in the stream, so it must be
741 | * provided here. Warning: the funnel provided must behave identically to the one
742 | * used to populate the original Cuckoo filter!
743 | *
744 | * @throws IOException if the InputStream throws an {@code IOException}, or if its data does not
745 | * appear to be a CuckooFilter serialized using the {@link
746 | * #writeTo(OutputStream)} method.
747 | */
748 | @CheckReturnValue
749 | public static CuckooFilter readFrom(InputStream in, Funnel funnel) throws IOException {
750 | checkNotNull(in, "InputStream");
751 | checkNotNull(funnel, "Funnel");
752 | int strategyOrdinal = -1;
753 | double fpp = -1.0D;
754 | long size = -1L;
755 | long checksum = -1L;
756 | long numBuckets = -1L;
757 | int numEntriesPerBucket = -1;
758 | int numBitsPerEntry = -1;
759 | int dataLength = -1;
760 | try {
761 | DataInputStream din = new DataInputStream(in);
762 | // currently this assumes there is no negative ordinal; will have to be updated if we
763 | // add non-stateless strategies (for which we've reserved negative ordinals; see
764 | // Strategy.ordinal()).
765 | strategyOrdinal = din.readByte();
766 | fpp = din.readDouble();
767 | size = din.readLong();
768 | checksum = din.readLong();
769 | numBuckets = din.readLong();
770 | numEntriesPerBucket = din.readInt();
771 | numBitsPerEntry = din.readInt();
772 | dataLength = din.readInt();
773 |
774 | CuckooStrategy cuckooStrategy = CuckooStrategies.values()[strategyOrdinal].strategy();
775 | long[] data = new long[dataLength];
776 | for (int i = 0; i < data.length; i++) {
777 | data[i] = din.readLong();
778 | }
779 | return new CuckooFilter(
780 | new CuckooTable(data, size, checksum, numBuckets, numEntriesPerBucket, numBitsPerEntry),
781 | funnel, cuckooStrategy, fpp);
782 | } catch (RuntimeException e) {
783 | IOException ioException = new IOException(
784 | "Unable to deserialize CuckooFilter from InputStream."
785 | + " strategyOrdinal: " + strategyOrdinal
786 | + " fpp: " + fpp
787 | + " size: " + size
788 | + " checksum: " + checksum
789 | + " numBuckets: " + numBuckets
790 | + " numEntriesPerBucket: " + numEntriesPerBucket
791 | + " numBitsPerEntry: " + numBitsPerEntry
792 | + " dataLength: " + dataLength);
793 | ioException.initCause(e);
794 | throw ioException;
795 | }
796 | }
797 |
798 | /**
799 | * Returns the number of longs required by a CuckooTable for storage given the dimensions chosen
800 | * by the CuckooFilter to support {@code capacity) @ {@code fpp}.
801 | *
802 | * CuckooTable current impl uses a single long[] for data storage, so the calculated value must be
803 | * <= Integer.MAX_VALUE at this time.
804 | */
805 | @VisibleForTesting
806 | static int calculateDataLength(long capacity, double fpp) {
807 | return CuckooTable.calculateDataLength(
808 | optimalNumberOfBuckets(capacity, optimalEntriesPerBucket(fpp)),
809 | optimalEntriesPerBucket(fpp),
810 | optimalBitsPerEntry(fpp, optimalEntriesPerBucket(fpp)));
811 | }
812 |
813 | @Override
814 | public String toString() {
815 | return "CuckooFilter{" +
816 | "table=" + table +
817 | ", funnel=" + funnel +
818 | ", strategy=" + cuckooStrategy +
819 | ", capacity=" + capacity() +
820 | ", fpp=" + fpp +
821 | ", currentFpp=" + currentFpp() +
822 | ", size=" + sizeLong() +
823 | '}';
824 | }
825 |
826 | private void checkCompatibility(ProbabilisticFilter f, String verb) {
827 | checkArgument(f instanceof CuckooFilter, "Cannot" + verb + " a " +
828 | this.getClass().getSimpleName() + " with a " + f.getClass().getSimpleName());
829 | checkArgument(this.isCompatible(f), "Cannot " + verb + " incompatible filters. " +
830 | this.getClass().getSimpleName() + " instances must have equivalent funnels; the same " +
831 | "strategy; and the same number of buckets, entries per bucket, and bits per entry.");
832 | }
833 |
834 | }
835 |
--------------------------------------------------------------------------------
/src/main/java/com/duprasville/guava/probably/CuckooStrategies.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015 Brian Dupras
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | * in compliance with the License. You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | * or implied. See the License for the specific language governing permissions and limitations under
12 | * the License.
13 | */
14 |
15 | package com.duprasville.guava.probably;
16 |
17 | import com.google.common.hash.Hashing;
18 |
19 | /**
20 | * Collections of strategies of generating the f-bit fingerprint, index i1 and index i2 required for
21 | * an element to be mapped to a CuckooTable of m buckets with hash function h. These strategies are
22 | * part of the serialized form of the Cuckoo filters that use them, thus they must be preserved as
23 | * is (no updates allowed, only introduction of new versions). Important: the order of the
24 | * constants cannot change, and they cannot be deleted - we depend on their ordinal for CuckooFilter
25 | * serialization.
26 | *
27 | * @author Brian Dupras
28 | */
29 | public enum CuckooStrategies {
30 | /**
31 | * Adaptation of "Cuckoo Filter: Practically Better Than Bloom", Bin Fan, et al, that is
32 | * comparable to a Bloom Filter's memory efficiency, supports entry deletion, and can accept up to
33 | * 12.8 billion entries at 3% FPP.
34 | *
35 | *
This strategy uses 32 bits of {@link Hashing#murmur3_128} to find an entry's primary index.
36 | * The next non-zero f-bit segment of the hash is used as the entry's fingerprint. An entry's
37 | * alternate index is defined as {@code [hash(fingerprint) * parsign(index)] modulo bucket_count},
38 | * where {@code hash(fingerprint)} is always odd, and {@code parsign(index)} is defined as {@code
39 | * +1} when {@code index} is even and {@code -1} when {@code index} is odd. The filter's bucket
40 | * count is rounded up to an even number. By specifying an even number of buckets and an odd
41 | * fingerprint hash, the parity of the alternate index is guaranteed to be opposite the parity of
42 | * the primary index. The use of the index's parity to apply a sign to {@code hash(fingerprint)}
43 | * causes the operation to be reversible, i.e. {@code index(e) == altIndex(altIndex(e))}.
44 | *
45 | *
A notable difference of this strategy from "Cuckoo Filter" is the method of selecting an
46 | * entry's alternate index. In the paper, the alternate index is defined as {@code index xor
47 | * hash(fingerprint)}. The use of {@code xor} requires that the index space be defined as
48 | * [0..2^f]. The side-effect of this is that the Cuckoo Filter's bucket count must be a power of
49 | * 2, meaning the memory utilization of the filter must be "rounded up" to the next power of two.
50 | * This side-effect of the paper's algorithm is avoided by the algorithm as described above.
51 | */
52 | MURMUR128_BEALDUPRAS_32() {
53 | @Override
54 | public CuckooStrategy strategy() {
55 | return new CuckooStrategyMurmurBealDupras32(this.ordinal());
56 | }
57 | };
58 |
59 | public abstract CuckooStrategy strategy();
60 | }
61 |
--------------------------------------------------------------------------------
/src/main/java/com/duprasville/guava/probably/CuckooStrategy.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015 Brian Dupras
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | * in compliance with the License. You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | * or implied. See the License for the specific language governing permissions and limitations under
12 | * the License.
13 | */
14 |
15 | package com.duprasville.guava.probably;
16 |
17 | import com.google.common.hash.Funnel;
18 |
19 | import java.io.Serializable;
20 |
21 | interface CuckooStrategy extends Serializable {
22 | int ordinal();
23 | boolean add(T object, Funnel super T> funnel, CuckooTable table);
24 | boolean remove(T object, Funnel super T> funnel, CuckooTable table);
25 | boolean contains(T object, Funnel super T> funnel, CuckooTable table);
26 | boolean addAll(CuckooTable thiz, CuckooTable that);
27 | boolean equivalent(CuckooTable thiz, CuckooTable that);
28 | boolean containsAll(CuckooTable thiz, CuckooTable that);
29 | boolean removeAll(CuckooTable thiz, CuckooTable that);
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/java/com/duprasville/guava/probably/CuckooStrategyMurmurBealDupras32.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015 Brian Dupras
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | * in compliance with the License. You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | * or implied. See the License for the specific language governing permissions and limitations under
12 | * the License.
13 | */
14 |
15 | package com.duprasville.guava.probably;
16 |
17 | import com.google.common.hash.Funnel;
18 | import com.google.common.hash.HashCode;
19 | import com.google.common.hash.HashFunction;
20 | import com.google.common.hash.Hashing;
21 |
22 | import java.util.Random;
23 |
24 | import static com.google.common.base.Preconditions.checkArgument;
25 | import static com.google.common.math.LongMath.mod;
26 |
27 | /**
28 | * Cuckoo Filter strategy employing Murmur3 32-bit hashes and parity-based altIndex calculation.
29 | *
30 | * @author Brian Dupras
31 | * @author Alex Beal
32 | */
33 | class CuckooStrategyMurmurBealDupras32 extends AbstractCuckooStrategy implements CuckooStrategy {
34 | private static final int MAX_RELOCATION_ATTEMPTS = 500;
35 | private static final HashFunction hashFunction = Hashing.murmur3_128();
36 |
37 | CuckooStrategyMurmurBealDupras32(int ordinal) {
38 | super(ordinal);
39 | }
40 |
41 | public boolean add(T object, Funnel super T> funnel, CuckooTable table) {
42 | final long hash64 = hash(object, funnel).asLong();
43 | final int hash1 = hash1(hash64);
44 | final int hash2 = hash2(hash64);
45 | final int fingerprint = fingerprint(hash2, table.numBitsPerEntry);
46 |
47 | final long index = index(hash1, table.numBuckets);
48 | return putEntry(fingerprint, table, index) ||
49 | putEntry(fingerprint, table, altIndex(index, fingerprint, table.numBuckets));
50 | }
51 |
52 | protected long maxRelocationAttempts() {
53 | return MAX_RELOCATION_ATTEMPTS;
54 | }
55 |
56 | private final Random kicker = new Random(1L);
57 |
58 | protected int pickEntryToKick(int numEntriesPerBucket) {
59 | return kicker.nextInt(numEntriesPerBucket);
60 | }
61 |
62 | public boolean remove(T object, Funnel super T> funnel, CuckooTable table) {
63 | final long hash64 = hash(object, funnel).asLong();
64 | final int hash1 = hash1(hash64);
65 | final int hash2 = hash2(hash64);
66 | final int fingerprint = fingerprint(hash2, table.numBitsPerEntry);
67 | final long index1 = index(hash1, table.numBuckets);
68 | final long index2 = altIndex(index1, fingerprint, table.numBuckets);
69 | return table.swapAnyEntry(CuckooTable.EMPTY_ENTRY, fingerprint, index1)
70 | || table.swapAnyEntry(CuckooTable.EMPTY_ENTRY, fingerprint, index2);
71 | }
72 |
73 | public boolean contains(T object, Funnel super T> funnel, CuckooTable table) {
74 | final long hash64 = hash(object, funnel).asLong();
75 | final int hash1 = hash1(hash64);
76 | final int hash2 = hash2(hash64);
77 | final int fingerprint = fingerprint(hash2, table.numBitsPerEntry);
78 | final long index1 = index(hash1, table.numBuckets);
79 | final long index2 = altIndex(index1, fingerprint, table.numBuckets);
80 | return table.hasEntry(fingerprint, index1) || table.hasEntry(fingerprint, index2);
81 | }
82 |
83 | HashCode hash(final T object, final Funnel super T> funnel) {
84 | return hashFunction.hashObject(object, funnel);
85 | }
86 |
87 | int hash1(long hash64) {
88 | return (int) hash64;
89 | }
90 |
91 | int hash2(long hash64) {
92 | return (int) (hash64 >>> 32);
93 | }
94 |
95 | /**
96 | * Returns an f-bit portion of the given hash. Iterating by f-bit segments from the least
97 | * significant side of the hash to the most significant, looks for a non-zero segment. If a
98 | * non-zero segment isn't found, 1 is returned to distinguish the fingerprint from a
99 | * non-entry.
100 | *
101 | * @param hash 32-bit hash value
102 | * @param f number of bits to consider from the hash
103 | * @return first non-zero f-bit value from hash as an int, or 1 if no non-zero value is found
104 | */
105 | public static int fingerprint(int hash, int f) {
106 | checkArgument(f > 0, "f must be greater than zero");
107 | checkArgument(f <= Integer.SIZE, "f must be less than " + Integer.SIZE);
108 | int mask = (0x80000000 >> (f - 1)) >>> (Integer.SIZE - f);
109 |
110 | for (int bit = 0; (bit + f) <= Integer.SIZE; bit += f) {
111 | int ret = (hash >> bit) & mask;
112 | if (0 != ret) {
113 | return ret;
114 | }
115 | }
116 | return 0x1;
117 | }
118 |
119 | /**
120 | * Calculates a primary index for an entry in the cuckoo table given the entry's 32-bit
121 | * hash and the table's size in buckets, m.
122 | *
123 | * tl;dr simply a wrap-around modulo bound by 0..m-1
124 | *
125 | * @param hash 32-bit hash value
126 | * @param m size of cuckoo table in buckets
127 | * @return index, bound by 0..m-1 inclusive
128 | */
129 | @Override
130 | public long index(int hash, long m) {
131 | return mod(hash, m);
132 | }
133 |
134 | /**
135 | * Calculates an alternate index for an entry in the cuckoo table.
136 | *
137 | * tl;dr
138 | * Calculates an offset as an odd hash of the fingerprint and adds to, or subtracts from,
139 | * the starting index, wrapping around the table (mod) as necessary.
140 | *
141 | * Detail:
142 | * Hash the fingerprint
143 | * make it odd (*)
144 | * flip the sign if starting index is odd
145 | * sum with starting index (**)
146 | * and modulo to 0..m-1
147 | *
148 | * (*) Constraining the CuckooTable to an even size in buckets, and applying odd offsets
149 | * guarantees opposite parities for index & altIndex. The parity of the starting index
150 | * determines whether the offset is subtracted from or added to the starting index.
151 | * This strategy guarantees altIndex() is reversible, i.e.
152 | *
153 | * index == altIndex(altIndex(index, fingerprint, m), fingerprint, m)
154 | *
155 | * (**) Summing the starting index and offset can possibly lead to numeric overflow. See
156 | * {@link #protectedSum(long, long, long)} protectedSum} for details on how this is
157 | * avoided.
158 | *
159 | * @param index starting index
160 | * @param fingerprint fingerprint
161 | * @param m size of table in buckets; must be even for this strategy
162 | * @return an alternate index for fingerprint bounded by 0..m-1
163 | */
164 | @Override
165 | public long altIndex(long index, int fingerprint, long m) {
166 | checkArgument(0L <= index, "index must be a positive!");
167 | checkArgument((0L <= m) && (0L == (m & 0x1L)), "m must be a positive even number!");
168 | return mod(protectedSum(index, parsign(index) * odd(hash(fingerprint)), m), m);
169 | }
170 |
171 | /**
172 | * Maps parity of i to a sign.
173 | *
174 | * @return 1 if i is even parity, -1 if i is odd parity
175 | */
176 | static long parsign(long i) {
177 | return ((i & 0x01L) * -2L) + 1L;
178 | }
179 |
180 | static int hash(int i) {
181 | return hashFunction.hashInt(i).asInt();
182 | }
183 |
184 | static long odd(long i) {
185 | return i | 0x01L;
186 | }
187 |
188 | /**
189 | * Returns the sum of index and offset, reduced by a mod-consistent amount if necessary to
190 | * protect from numeric overflow. This method is intended to support a subsequent mod operation
191 | * on the return value.
192 | *
193 | * @param index Assumed to be >= 0L.
194 | * @param offset Any value.
195 | * @param mod Value used to reduce the result,
196 | * @return sum of index and offset, reduced by a mod-consistent amount if necessary to protect
197 | * from numeric overflow.
198 | */
199 | static long protectedSum(long index, long offset, long mod) {
200 | return canSum(index, offset) ? index + offset : protectedSum(index - mod, offset, mod);
201 | }
202 |
203 | static boolean canSum(long a, long b) {
204 | return (a ^ b) < 0 | (a ^ (a + b)) >= 0;
205 | }
206 |
207 | }
208 |
--------------------------------------------------------------------------------
/src/main/java/com/duprasville/guava/probably/CuckooTable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015 Brian Dupras
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | * in compliance with the License. You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | * or implied. See the License for the specific language governing permissions and limitations under
12 | * the License.
13 | */
14 |
15 | package com.duprasville.guava.probably;
16 |
17 | import com.google.common.annotations.VisibleForTesting;
18 | import com.google.common.base.Objects;
19 | import com.google.common.math.LongMath;
20 | import com.google.common.primitives.Ints;
21 |
22 | import java.math.RoundingMode;
23 | import java.util.Arrays;
24 | import java.util.Random;
25 |
26 | import static com.google.common.base.Preconditions.checkArgument;
27 | import static java.lang.Math.pow;
28 |
29 | class CuckooTable {
30 | static final int EMPTY_ENTRY = 0x00;
31 |
32 | public long[] data() {
33 | return data;
34 | }
35 |
36 | public long numBuckets() {
37 | return numBuckets;
38 | }
39 |
40 | public int numEntriesPerBucket() {
41 | return numEntriesPerBucket;
42 | }
43 |
44 | public int numBitsPerEntry() {
45 | return numBitsPerEntry;
46 | }
47 |
48 | final long[] data;
49 | final long numBuckets;
50 | final int numEntriesPerBucket;
51 | final int numBitsPerEntry;
52 | private long size;
53 | private long checksum;
54 |
55 | public CuckooTable(long numBuckets, int numEntriesPerBucket, int numBitsPerEntry) {
56 | this(new long[calculateDataLength(numBuckets, numEntriesPerBucket, numBitsPerEntry)]
57 | , numBuckets
58 | , numEntriesPerBucket
59 | , numBitsPerEntry
60 | , 0L
61 | );
62 | }
63 |
64 | CuckooTable(final long[] data, long numBuckets, int numEntriesPerBucket,
65 | int numBitsPerEntry, long checksum) {
66 | this(data, 0L, checksum, numBuckets, numEntriesPerBucket, numBitsPerEntry);
67 | }
68 |
69 | public CuckooTable(final long[] data, long size, long checksum, long numBuckets,
70 | int numEntriesPerBucket, int numBitsPerEntry) {
71 | this.data = data;
72 | this.size = size;
73 | this.numBuckets = numBuckets;
74 | this.numEntriesPerBucket = numEntriesPerBucket;
75 | this.numBitsPerEntry = numBitsPerEntry;
76 | this.checksum = checksum;
77 | }
78 |
79 | public CuckooTable copy() {
80 | return new CuckooTable(
81 | data.clone(), size, checksum, numBuckets, numEntriesPerBucket, numBitsPerEntry);
82 | }
83 |
84 | public static int calculateDataLength(long numBuckets, int numEntriesPerBucket, int numBitsPerEntry) {
85 | checkArgument(numBuckets > 0, "numBuckets (%s) must be > 0", numBuckets);
86 | checkArgument(numEntriesPerBucket > 0, "numEntriesPerBucket (%s) must be > 0",
87 | numEntriesPerBucket);
88 | checkArgument(numBitsPerEntry > 0, "numBitsPerEntry (%s) must be > 0", numBitsPerEntry);
89 |
90 | return Ints.checkedCast(LongMath.divide(
91 | LongMath.checkedMultiply(numBuckets,
92 | LongMath.checkedMultiply(numEntriesPerBucket, numBitsPerEntry)),
93 | Long.SIZE, RoundingMode.CEILING));
94 | }
95 |
96 | public int findEntry(int value, long bucket) {
97 | for (int i = 0; i < numEntriesPerBucket; i++) {
98 | if (value == readEntry(bucket, i)) {
99 | return i;
100 | }
101 | }
102 | return -1;
103 | }
104 |
105 | public int countEntry(int value, long bucket) {
106 | int ret = 0;
107 | for (int i = 0; i < numEntriesPerBucket; i++) {
108 | if (value == readEntry(bucket, i)) {
109 | ret++;
110 | }
111 | }
112 | return ret;
113 | }
114 |
115 | public boolean hasEntry(int value, long bucket) {
116 | return findEntry(value, bucket) >= 0;
117 | }
118 |
119 | public int readEntry(long bucket, int entry) {
120 | return readBits(
121 | data, bitOffset(bucket, entry, numEntriesPerBucket, numBitsPerEntry), numBitsPerEntry);
122 | }
123 |
124 | public boolean swapAnyEntry(int valueIn, int valueOut, long bucket) {
125 | final int entry = findEntry(valueOut, bucket);
126 | if (entry >= 0) {
127 | final int kicked = swapEntry(valueIn, bucket, entry);
128 | assert valueOut == kicked : "expected valueOut [" + valueOut + "] != actual kicked [" +
129 | kicked + "]";
130 | return true;
131 | }
132 | return false;
133 | }
134 |
135 | int swapEntry(int value, long bucket, int entry) {
136 | final int kicked = writeBits(value, data,
137 | bitOffset(bucket, entry, numEntriesPerBucket, numBitsPerEntry), numBitsPerEntry);
138 | checksum += value - kicked;
139 |
140 | if ((EMPTY_ENTRY == value) && (EMPTY_ENTRY != kicked)) {
141 | size--;
142 | } else if ((EMPTY_ENTRY != value) && (EMPTY_ENTRY == kicked)) {
143 | size++;
144 | }
145 | assert size >= 0 : "Hmm - that's strange. CuckooTable size [" + size + "] shouldn't be < 0l";
146 |
147 | return kicked;
148 | }
149 |
150 | static long bitOffset(long bucket, int entry, int numEntriesPerBucket, int numBitsPerEntry) {
151 | return (bucket * numEntriesPerBucket + entry) * numBitsPerEntry;
152 | }
153 |
154 | static int dataIndex(long bit) {
155 | return (int) (bit >>> 6);
156 | }
157 |
158 | @VisibleForTesting
159 | static int readBits(final long[] data, long bit, int len) {
160 | final int startLower = (int) (bit % Long.SIZE);
161 | final int lenLower = Math.min(Long.SIZE - startLower, len);
162 | final int lenUpper = Math.max(len - lenLower, 0);
163 |
164 | final int indexUpper = dataIndex(bit + len);
165 |
166 | final long lower = (data[dataIndex(bit)] & mask(startLower, lenLower)) >>> startLower;
167 | final long upper = indexUpper < data.length ?
168 | (data[indexUpper] & mask(0, lenUpper)) << lenLower : 0x00L;
169 |
170 | return (int) (lower | upper);
171 | }
172 |
173 | @VisibleForTesting
174 | static int writeBits(int bits, final long[] data, long bit, int len) {
175 | final int ret = readBits(data, bit, len);
176 |
177 | final long bitsl = ((long) bits) & 0x00000000FFFFFFFFL; // upcast without carrying the sign
178 |
179 | final int startLower = (int) (bit % Long.SIZE);
180 | final int lenLower = Math.min(Long.SIZE - startLower, len);
181 | final int lenUpper = Math.max(len - lenLower, 0);
182 |
183 | final long maskLowerKeep = ~(mask(0, lenLower) << startLower);
184 | final long maskUpperKeep = mask(lenUpper, Long.SIZE - lenUpper);
185 |
186 | final long bitsLower = (bitsl << startLower) & ~maskLowerKeep;
187 | final long bitsUpper = (bitsl >>> (len - lenUpper)) & ~maskUpperKeep;
188 |
189 | final int indexLower = dataIndex(bit);
190 | final int indexUpper = dataIndex(bit + len - 1);
191 |
192 | final long dataLower = (data[indexLower] & maskLowerKeep) | bitsLower;
193 | data[indexLower] = dataLower;
194 |
195 | if (indexLower != indexUpper) {
196 | final long dataUpper = (data[indexUpper] & maskUpperKeep) | bitsUpper;
197 | data[indexUpper] = dataUpper;
198 | }
199 |
200 | return ret;
201 | }
202 |
203 | static long mask(int start, int len) {
204 | return (len <= 0) ? 0L : (0x8000000000000000L >> (len - 1)) >>> (Long.SIZE - (start + len));
205 | }
206 |
207 | @Override
208 | public boolean equals(Object o) {
209 | if (o instanceof CuckooTable) {
210 | CuckooTable that = (CuckooTable) o;
211 | return this.numBuckets == that.numBuckets
212 | && this.numEntriesPerBucket == that.numEntriesPerBucket
213 | && this.numBitsPerEntry == that.numBitsPerEntry
214 | && this.size == that.size
215 | && this.checksum == that.checksum
216 | ;
217 | }
218 | return false;
219 | }
220 |
221 | @Override
222 | public int hashCode() {
223 | return Objects.hashCode(numBuckets, numEntriesPerBucket, numBitsPerEntry, size,
224 | checksum);
225 | }
226 |
227 | public boolean isCompatible(CuckooTable that) {
228 | return this.numBuckets == that.numBuckets
229 | && this.numEntriesPerBucket == that.numEntriesPerBucket
230 | && this.numBitsPerEntry == that.numBitsPerEntry;
231 | }
232 |
233 | public long size() {
234 | return size < 0 ? /* indicates overflow */ Long.MAX_VALUE : size;
235 | }
236 |
237 | public long checksum() {
238 | return checksum;
239 | }
240 |
241 | public long bitSize() {
242 | return (long) data.length * Long.SIZE;
243 | }
244 |
245 | public long capacity() {
246 | return numBuckets * numEntriesPerBucket;
247 | }
248 |
249 | public double load() {
250 | return (double) size() / (double) capacity();
251 | }
252 |
253 | public double currentFpp() {
254 | return fppAtGivenLoad(load());
255 | }
256 |
257 | public double fppAtGivenLoad(double load) {
258 | return 1.0D - pow(
259 | ( pow(2, numBitsPerEntry) - 2 )
260 | /
261 | ( pow(2, numBitsPerEntry) - 1 )
262 | ,
263 | 2 * numEntriesPerBucket * load
264 | );
265 | }
266 |
267 | public double averageBitsPerEntry() {
268 | return (double) bitSize() / (double) size;
269 | }
270 |
271 | @Override
272 | public String toString() {
273 | return getClass().getSimpleName() + "{" +
274 | "size=" + size +
275 | ", checksum=" + checksum +
276 | ", byteSize=" + bitSize() / Byte.SIZE +
277 | ", load=" + load() +
278 | ", capacity=" + capacity() +
279 | ", averageBitsPerEntry=" + averageBitsPerEntry() +
280 | ", numBuckets=" + numBuckets +
281 | ", numEntriesPerBucket=" + numEntriesPerBucket +
282 | ", numBitsPerEntry=" + numBitsPerEntry +
283 | '}';
284 | }
285 |
286 | public void clear() {
287 | Arrays.fill(data, 0L);
288 | size = 0L;
289 | }
290 | }
291 |
--------------------------------------------------------------------------------
/src/main/java/com/duprasville/guava/probably/ProbabilisticFilter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015 Brian Dupras
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | * in compliance with the License. You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | * or implied. See the License for the specific language governing permissions and limitations under
12 | * the License.
13 | */
14 |
15 | package com.duprasville.guava.probably;
16 |
17 | import java.util.Collection;
18 |
19 | import javax.annotation.CheckReturnValue;
20 |
21 | /**
22 | * A probabilistic filter offers an approximate containment test with one-sided error: if it claims
23 | * that an element is contained in it, this might be in error, but if it claims that an
24 | * element is not contained in it, then this is definitely true.
The false
25 | * positive probability ({@code FPP}) of a probabilistic filter is defined as the probability that
26 | * {@link #contains(Object)} will erroneously return {@code true} for an element that is not
27 | * actually contained in the filter.
28 | *
29 | * @param the type of elements that this filter accepts
30 | * @author Brian Dupras
31 | * @see CuckooFilter
32 | * @see BloomFilter
33 | */
34 | public interface ProbabilisticFilter {
35 | /**
36 | * Adds the specified element to this filter (optional operation). A return value of {@code true}
37 | * ensures that {@link #contains(Object)} given {@code e} will also return {@code true}.
38 | *
39 | * @param e element to be added to this filter
40 | * @return {@code true} if {@code e} was successfully added to the filter, {@code false} if this
41 | * is definitely not the case
42 | * @throws UnsupportedOperationException if the {@link #add(Object)} operation is not supported by
43 | * this filter
44 | * @throws ClassCastException if the class of the specified element prevents it from
45 | * being added to this filter
46 | * @throws NullPointerException if the specified element is {@code null} and this filter
47 | * does not permit {@code null} elements
48 | * @throws IllegalArgumentException if some property of the specified element prevents it
49 | * from being added to this filter
50 | * @see #contains(Object)
51 | * @see #addAll(Collection)
52 | * @see #addAll(ProbabilisticFilter)
53 | */
54 | @CheckReturnValue
55 | boolean add(E e);
56 |
57 | /**
58 | * Combines {@code this} filter with another compatible filter (optional operation). The mutations
59 | * happen to {@code this} instance. Callers must ensure {@code this} filter is appropriately sized
60 | * to avoid saturating it or running out of space.
61 | *
62 | * @param f filter to be combined into {@code this} filter - {@code f} is not mutated
63 | * @return {@code true} if the operation was successful, {@code false} otherwise
64 | * @throws UnsupportedOperationException if the {@link #addAll(ProbabilisticFilter)} operation is
65 | * not supported by this filter
66 | * @throws NullPointerException if the specified filter is {@code null}
67 | * @throws IllegalArgumentException if {@link #isCompatible(ProbabilisticFilter)} {@code ==
68 | * false}
69 | * @throws IllegalStateException if this filter cannot be combined with the specified
70 | * filter at this time due to insertion restrictions
71 | * @see #add(Object)
72 | * @see #addAll(Collection)
73 | * @see #contains(Object)
74 | */
75 | @CheckReturnValue
76 | boolean addAll(ProbabilisticFilter f);
77 |
78 | /**
79 | * Adds all of the elements in the specified collection to this filter (optional operation). The
80 | * behavior of this operation is undefined if the specified collection is modified while the
81 | * operation is in progress.
82 | *
83 | * @param c collection containing elements to be added to this filter
84 | * @return {@code true} if all elements of the collection were successfully added, {@code false}
85 | * otherwise
86 | * @throws UnsupportedOperationException if the {@link #addAll(Collection)} operation is not
87 | * supported by this filter
88 | * @throws ClassCastException if the class of an element of the specified collection
89 | * prevents it from being added to this filter
90 | * @throws NullPointerException if the specified collection contains a {@code null}
91 | * element and this filter does not permit {@code null}
92 | * elements, or if the specified collection is {@code null}
93 | * @throws IllegalArgumentException if some property of an element of the specified
94 | * collection prevents it from being added to this filter
95 | * @throws IllegalStateException if not all the elements can be added at this time due to
96 | * insertion restrictions
97 | * @see #add(Object)
98 | * @see #addAll(ProbabilisticFilter)
99 | * @see #contains(Object)
100 | */
101 | @CheckReturnValue
102 | boolean addAll(Collection extends E> c);
103 |
104 | /**
105 | * Removes all of the elements from this filter (optional operation). The filter will be empty
106 | * after this call returns.
107 | *
108 | * @throws UnsupportedOperationException if the {@link #clear()} method is not supported by this
109 | * filter
110 | * @see #sizeLong()
111 | * @see #isEmpty()
112 | */
113 | void clear();
114 |
115 | /**
116 | * Removes the specified element from this filter (optional operation). The element must be
117 | * contained in the filter prior to invocation. Removing an element that isn't contained in the
118 | * filter may put the filter in an inconsistent state causing it to return false negative
119 | * responses from {@link #contains(Object)}.
120 | *
121 | * If {@code false} is returned, this is definitely an indication that the specified
122 | * element wasn't contained in the filter prior to invocation. If the implementation treats this
123 | * condition as an error, then this filter can no longer be relied upon to return correct {@code
124 | * false} responses from {@link #contains(Object)}, unless {@link #isEmpty()} is also {@code
125 | * true}.
126 | *
127 | * @param e element to be removed from this filter
128 | * @return {@code true} if this filter probably contained the specified element, {@code false}
129 | * otherwise
130 | * @throws ClassCastException if the type of the specified element is incompatible with
131 | * this filter (optional)
132 | * @throws NullPointerException if the specified element is {@code null} and this filter
133 | * does not permit {@code null} elements
134 | * @throws UnsupportedOperationException if the {@link #remove(Object)} operation is not supported
135 | * by this filter
136 | * @see #contains(Object)
137 | * @see #removeAll(Collection)
138 | * @see #removeAll(ProbabilisticFilter)
139 | */
140 | @CheckReturnValue
141 | boolean remove(E e);
142 |
143 | /**
144 | * Removes from this filter all of its elements that are contained in the specified collection
145 | * (optional operation). All element contained in the specified collection must be contained in
146 | * the filter prior to invocation. Removing elements that aren't contained in the filter may put
147 | * the filter in an inconsistent state causing it to return false negative responses from {@link
148 | * #contains(Object)}.
149 | *
150 | * If {@code false} is returned, this is definitely an indication that the specified
151 | * collection contained elements that were not contained in this filter prior to invocation. If
152 | * the implementation treats this condition as an error, then this filter can no longer be relied
153 | * upon to return correct {@code false} responses from {@link #contains(Object)}, unless {@link
154 | * #isEmpty()} is also {@code true}.
155 | *
156 | * @param c collection containing elements to be removed from this filter
157 | * @return {@code true} if all of the elements of the specified collection were successfully
158 | * removed from the filter, {@code false} if any of the elements was not successfully removed
159 | * @throws ClassCastException if the types of one or more elements in the specified
160 | * collection are incompatible with this filter (optional)
161 | * @throws NullPointerException if the specified collection contains one or more null
162 | * elements and this filter does not permit {@code null}
163 | * elements (optional), or if the specified collection is
164 | * {@code null}
165 | * @throws UnsupportedOperationException if the {@link #removeAll(Collection)} operation is not
166 | * supported by this filter
167 | * @see #contains(Object)
168 | * @see #remove(Object)
169 | * @see #removeAll(ProbabilisticFilter)
170 | */
171 | @CheckReturnValue
172 | boolean removeAll(Collection extends E> c);
173 |
174 | /**
175 | * Subtracts the specified filter from {@code this} filter. The mutations happen to {@code this}
176 | * instance. Callers must ensure that the specified filter represents elements that are currently
177 | * contained in {@code this} filter.
178 | *
179 | * If {@code false} is returned, this is definitely an indication that the specified filter
180 | * contained elements that were not contained in this filter prior to invocation. If the
181 | * implementation treats this condition as an error, then this filter can no longer be relied upon
182 | * to return correct {@code false} responses from {@link #contains(Object)}, unless {@link
183 | * #isEmpty()} is also {@code true}.
184 | *
185 | * @param f filter containing elements to remove from {@code this} filter. {@code f} is not
186 | * mutated
187 | * @return {@code true} if the operation was successful, {@code false} otherwise
188 | * @throws UnsupportedOperationException if the {@link #removeAll(ProbabilisticFilter)} operation
189 | * is not supported by this filter
190 | * @throws NullPointerException if the specified filter is {@code null}
191 | * @throws IllegalArgumentException if {@link #isCompatible(ProbabilisticFilter)} {@code ==
192 | * false} given {@code f}
193 | * @see #contains(Object)
194 | * @see #remove(Object)
195 | * @see #removeAll(Collection)
196 | */
197 | @CheckReturnValue
198 | boolean removeAll(ProbabilisticFilter f);
199 |
200 | /**
201 | * Returns {@code true} if this filter might contain the specified element, {@code false}
202 | * if this is definitely not the case.
203 | *
204 | * @param e element whose containment in this filter is to be tested
205 | * @return {@code true} if this filter might contain the specified element, {@code false}
206 | * if this is definitely not the case.
207 | * @throws ClassCastException if the type of the specified element is incompatible with this
208 | * filter (optional)
209 | * @throws NullPointerException if the specified element is {@code null} and this filter does not
210 | * permit {@code null} elements
211 | * @see #containsAll(Collection)
212 | * @see #containsAll(ProbabilisticFilter)
213 | * @see #add(Object)
214 | * @see #remove(Object)
215 | */
216 | boolean contains(E e);
217 |
218 | /**
219 | * Returns {@code true} if this filter might contain all of the elements of the specified
220 | * collection (optional operation). More formally, returns {@code true} if {@link
221 | * #contains(Object)} {@code == true} for all of the elements of the specified collection.
222 | *
223 | * @param c collection containing elements to be checked for containment in this filter
224 | * @return {@code true} if this filter might contain all elements of the specified
225 | * collection
226 | * @throws ClassCastException if the types of one or more elements in the specified collection
227 | * are incompatible with this filter (optional)
228 | * @throws NullPointerException if the specified collection contains one or more {@code null}
229 | * elements and this filter does not permit {@code null} elements
230 | * (optional), or if the specified collection is {@code null}
231 | * @see #contains(Object)
232 | * @see #containsAll(ProbabilisticFilter)
233 | */
234 | boolean containsAll(Collection extends E> c);
235 |
236 | /**
237 | * Returns {@code true} if this filter might contain all elements contained in the
238 | * specified filter (optional operation).
239 | *
240 | * @param f filter containing elements to be checked for probable containment in this filter
241 | * @return {@code true} if this filter might contain all elements contained in the
242 | * specified filter, {@code false} if this is definitely not the case.
243 | * @throws UnsupportedOperationException if the {@link #containsAll(ProbabilisticFilter)}
244 | * operation is not supported by this filter
245 | * @throws NullPointerException if the specified filter is {@code null}
246 | * @throws IllegalArgumentException if {@link #isCompatible(ProbabilisticFilter)} {@code ==
247 | * false} given {@code f}
248 | * @see #contains(Object)
249 | * @see #containsAll(Collection)
250 | */
251 | boolean containsAll(ProbabilisticFilter f);
252 |
253 | /**
254 | * Returns {@code true} if this filter contains no elements.
255 | *
256 | * @return {@code true} if this filter contains no elements
257 | * @see #sizeLong()
258 | */
259 | boolean isEmpty();
260 |
261 | /**
262 | * Returns the number of elements contained in this filter (its cardinality). If this filter
263 | * contains more than {@code Long.MAX_VALUE} elements, returns {@code Long.MAX_VALUE}.
264 | *
265 | * @return the number of elements contained in this filter (its cardinality)
266 | * @see #capacity()
267 | * @see #isEmpty()
268 | * @see #size()
269 | */
270 | long sizeLong();
271 |
272 | /**
273 | * Returns the number of elements contained in this filter (its cardinality). If this filter
274 | * contains more than {@code Integer.MAX_VALUE} elements, returns {@code Integer.MAX_VALUE}. Use
275 | * {@link #sizeLong()} to obtain filter sizes lager than {@code Integer.MAX_VALUE};
276 | *
277 | *
This method is provided for consistency with the Collections API.
278 | *
279 | * @return the number of elements contained in this filter (its cardinality)
280 | * @see #capacity()
281 | * @see #isEmpty()
282 | * @see #sizeLong()
283 | */
284 | long size();
285 |
286 | /**
287 | * Returns {@code true} if the specified filter is compatible with {@code this} filter. {@code f}
288 | * is considered compatible if {@code this} filter can use it in combinatoric operations (e.g.
289 | * {@link #addAll(ProbabilisticFilter)}, {@link #containsAll(ProbabilisticFilter)}, {@link
290 | * #removeAll(ProbabilisticFilter)}).
291 | *
292 | * @param f filter to check for compatibility with {@code this} filter
293 | * @return {@code true} if the specified filter is compatible with {@code this} filter
294 | * @throws NullPointerException if the specified filter is {@code null}
295 | * @see #addAll(ProbabilisticFilter)
296 | * @see #containsAll(ProbabilisticFilter)
297 | * @see #removeAll(ProbabilisticFilter)
298 | */
299 | boolean isCompatible(ProbabilisticFilter f);
300 |
301 | /**
302 | * Returns the number of elements this filter can represent at its requested {@code FPP}. This may
303 | * not be a hard limit of the filter implementation. It is permissible for a filter to contain
304 | * more elements than its requested capacity, though its {@code FPP} may suffer.
305 | *
306 | * @return the number of elements this filter can represent at its requested {@code FPP}.
307 | * @see #fpp()
308 | * @see #currentFpp()
309 | * @see #sizeLong()
310 | */
311 | long capacity();
312 |
313 | /**
314 | * Returns the current false positive probability ({@code FPP}) of this filter.
315 | *
316 | * @return the probability that {@link #contains(Object)} will erroneously return {@code true}
317 | * given an element that has not actually been added to the filter.
318 | * @see #fpp()
319 | */
320 | double currentFpp();
321 |
322 | /**
323 | * Returns the intended {@code FPP} limit of this filter. This may not be a hard limit of the
324 | * filter implementation. It is permissible for a filter's {@code FPP} to degrade (e.g. via
325 | * saturation) beyond its intended limit.
326 | *
327 | * @return the intended {@code FPP} limit of this filter.
328 | * @see #currentFpp()
329 | */
330 | double fpp();
331 | }
332 |
--------------------------------------------------------------------------------
/src/main/java/com/duprasville/guava/probably/package-info.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (C) 2015 Brian Dupras
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5 | * in compliance with the License. You may obtain a copy of the License at
6 | *
7 | * http://www.apache.org/licenses/LICENSE-2.0
8 | *
9 | * Unless required by applicable law or agreed to in writing, software distributed under the License
10 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11 | * or implied. See the License for the specific language governing permissions and limitations under
12 | * the License.
13 | */
14 |
15 | /**
16 | * Probabilistic data structures for Guava.
17 | *
18 | *