├── .editorconfig ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── bench_lib.rs ├── cabi ├── .gitignore ├── Cargo.toml ├── Makefile ├── build.rs ├── src │ └── lib.rs └── tests │ ├── Makefile │ └── basic_operations.c ├── src ├── bucket.rs ├── lib.rs └── util.rs └── tests ├── false_positive_rate.rs └── interop.rs /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors and IDEs 3 | # editorconfig.org 4 | 5 | root = true 6 | 7 | 8 | [*] 9 | end_of_line = lf 10 | charset = utf-8 11 | trim_trailing_whitespace = true 12 | insert_final_newline = true 13 | indent_style = space 14 | indent_size = 4 15 | 16 | [*.rs] 17 | indent_style = space 18 | indent_size = 4 19 | 20 | [*.toml] 21 | indent_style = space 22 | indent_size = 4 23 | 24 | [*.md] 25 | trim_trailing_whitespace = false 26 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: Continuous Integration 4 | 5 | jobs: 6 | check: 7 | name: Check 8 | runs-on: Ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: actions-rs/toolchain@v1 12 | with: 13 | profile: minimal 14 | toolchain: stable 15 | override: true 16 | - uses: actions-rs/cargo@v1 17 | with: 18 | command: check 19 | 20 | test: 21 | name: Test Suite ${{ matrix.os }} 22 | needs: check 23 | runs-on: ${{ matrix.os }}-latest 24 | strategy: 25 | matrix: 26 | os: [Ubuntu, macOS, Windows] 27 | steps: 28 | - uses: actions/checkout@v2 29 | - uses: actions-rs/toolchain@v1 30 | with: 31 | profile: minimal 32 | toolchain: stable 33 | override: true 34 | - uses: actions-rs/cargo@v1 35 | with: 36 | command: test 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | 4 | 5 | #Added by cargo 6 | # 7 | #already existing elements were commented out 8 | 9 | /target 10 | #Cargo.lock 11 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 5 | and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). 6 | 7 | ## [Unreleased] 8 | ### Changed 9 | - Serde support is now behind the feature flag `serde_support` and is disabled by default. 10 | 11 | ## [v0.4.0] - 2018-04-1 12 | ### Added 13 | - `ExportedCuckooFilter` adds the ability to serialize the memory map of a `CuckooFilter` via Serde; reducing communication overhead between nodes for example, or the ability to store the current state on disk for retrieval at a later time. 14 | - Added a C interface for embedding this crate into other languages. 15 | The interface is an additional crate, located in the cabi/ subfolder. 16 | ### Changed 17 | - add() now returns Result<(), CuckooError> instead of a bool, and returns a NotEnoughSpaceError instead of panicking 18 | when insertion fails. 19 | - len() now returns usize instead of u64 to match std's data structures' len() functions. 20 | - with_capacity() now takes an usize instead of an u64 to match std's data structures' with_capacity() functions. 21 | 22 | ## [v0.3.2] 23 | ### Added 24 | - Filters now have a memory_usage() function that return how much bytes a given filter occupies in memory. 25 | Let's show how little memory the filters need for their capacity! 26 | ### Fixed 27 | - Use std::collections::hash_map::DefaultHasher as replacement for std::hah::SipHasher as default hasher, as 28 | SipHasher is deprecated since Rust 1.13. 29 | - The same part of the item hash was used for generating the fingerprint as well as the index positions. This means that 30 | equal fingerprints always had the same index positions, resulting in increased rebucketing and less items fitting in 31 | the filter. 32 | 33 | [v0.4.0]: https://github.com/seiflotfy/rust-cuckoofilter/compare/v0.4.0...HEAD 34 | [v0.4.0]: https://github.com/seiflotfy/rust-cuckoofilter/compare/v0.3.2...v0.4.0 35 | [v0.3.2]: https://github.com/seiflotfy/rust-cuckoofilter/compare/v0.3.1...v0.3.2 36 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cuckoofilter" 3 | version = "0.5.0" 4 | authors = [ 5 | "Seif Lotfy ", 6 | "Seif Lotfy ", 7 | "Florian Jacob ", 8 | "The cuckoofilter contributors", 9 | ] 10 | 11 | # A short blurb about the package. This is not rendered in any format when 12 | # uploaded to crates.io (aka this is not markdown) 13 | description = "Cuckoo Filter: Practically Better Than Bloom" 14 | 15 | # These URLs point to more information about the repository 16 | homepage = "http://axiom.co" 17 | repository = "https://github.com/axiomhq/rust-cuckoofilter" 18 | 19 | # This points to a file in the repository (relative to this Cargo.toml). The 20 | # contents of this file are stored and indexed in the registry. 21 | readme = "./README.md" 22 | 23 | # This is a small list of keywords used to categorize and search for this 24 | # package. 25 | keywords = ["bloomfilter", "cuckoohashing", "cuckoofilter"] 26 | 27 | # This is a string description of the license for this package. Currently 28 | # crates.io will validate the license provided against a whitelist of known 29 | # license identifiers from http://spdx.org/licenses/. Multiple licenses can 30 | # be separated with a `/` 31 | license = "MIT" 32 | 33 | edition = "2018" 34 | 35 | [features] 36 | default = [] 37 | serde_support = ["serde", "serde_derive", "serde_bytes"] 38 | 39 | [dependencies] 40 | byteorder = "1.3.4" 41 | rand = "0.7.3" 42 | serde = {version = "1.0.114", optional = true} 43 | serde_derive = {version = "1.0.114", optional = true} 44 | serde_bytes = {version = "0.11.5", optional = true} 45 | fnv = "1.0.7" 46 | farmhash = {version = "1.1.5", optional = true} 47 | 48 | [dev-dependencies] 49 | serde_json = "1.0" 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Seif Lotfy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cuckoo Filter 2 | 3 | [![Crates.io](https://img.shields.io/crates/v/cuckoofilter.svg?maxAge=2592000)](https://crates.io/crates/cuckoofilter) 4 | 5 | [Documentation](https://docs.rs/cuckoofilter) 6 | 7 | 8 | Cuckoo filter is a Bloom filter replacement for approximated set-membership queries. While Bloom filters are well-known space-efficient data structures to serve queries like "if item x is in a set?", they do not support deletion. Their variances to enable deletion (like counting Bloom filters) usually require much more space. 9 | 10 | Cuckoo filters provide the flexibility to add and remove items dynamically. A cuckoo filter is based on cuckoo hashing (and therefore named as cuckoo filter). It is essentially a cuckoo hash table storing each key's fingerprint. Cuckoo hash tables can be highly compact, thus a cuckoo filter could use less space than conventional Bloom filters, for applications that require low false positive rates (< 3%). 11 | 12 | For details about the algorithm and citations please use this article for now 13 | 14 | ["Cuckoo Filter: Better Than Bloom" by Bin Fan, Dave Andersen and Michael Kaminsky](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) 15 | 16 | 17 | ## Example usage 18 | 19 | ```rust 20 | extern crate cuckoofilter; 21 | 22 | ... 23 | 24 | let value: &str = "hello world"; 25 | 26 | // Create cuckoo filter with default max capacity of 1000000 items 27 | let mut cf = cuckoofilter::new(); 28 | 29 | // Add data to the filter 30 | let success = cf.add(value).unwrap(); 31 | // success ==> Ok(()) 32 | 33 | // Lookup if data is in the filter 34 | let success = cf.contains(value); 35 | // success ==> true 36 | 37 | // Test and add to the filter (if data does not exists then add) 38 | let success = cf.test_and_add(value).unwrap(); 39 | // success ==> Ok(false) 40 | 41 | // Remove data from the filter. 42 | let success = cf.delete(value); 43 | // success ==> true 44 | ``` 45 | 46 | ## C Interface 47 | This crate has a C interface for embedding it into other languages than Rust. 48 | See the [C Interface Documentation](https://docs.rs/cuckoofilter_cabi) for more details. 49 | 50 | 51 | ## Notes & TODOs 52 | * This implementation uses a a static bucket size of 4 fingerprints and a fingerprint size of 1 byte based on my understanding of an optimal bucket/fingerprint/size ratio from the aforementioned paper. 53 | * When the filter returns `NotEnoughSpace`, the element given is actually added to the filter, but some random *other* 54 | element gets removed. This could be improved by implementing a single-item eviction cache for that removed item. 55 | * There are no high-level bindings for other languages than C. 56 | One could add them e.g. for python using [milksnake](https://github.com/getsentry/milksnake). 57 | -------------------------------------------------------------------------------- /benches/bench_lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate cuckoofilter; 4 | #[cfg(feature = "farmhash")] 5 | extern crate farmhash; 6 | #[cfg(feature = "fnv")] 7 | extern crate fnv; 8 | extern crate rand; 9 | extern crate test; 10 | 11 | use self::cuckoofilter::*; 12 | use std::error::Error; 13 | use std::fs::File; 14 | use std::io::prelude::*; 15 | use std::path::Path; 16 | 17 | fn get_words() -> String { 18 | let path = Path::new("/usr/share/dict/words"); 19 | let display = path.display(); 20 | 21 | // Open the path in read-only mode, returns `io::Result` 22 | let mut file = match File::open(&path) { 23 | // The `description` method of `io::Error` returns a string that 24 | // describes the error 25 | Err(why) => panic!("couldn't open {}: {}", display, Error::description(&why)), 26 | Ok(file) => file, 27 | }; 28 | 29 | let mut contents = String::new(); 30 | if let Err(why) = file.read_to_string(&mut contents) { 31 | panic!("couldn't read {}: {}", display, Error::description(&why)); 32 | } 33 | contents 34 | } 35 | 36 | fn perform_insertions(b: &mut test::Bencher) { 37 | let contents = get_words(); 38 | let split: Vec<&str> = contents.split("\n").take(1000).collect(); 39 | let mut cf = CuckooFilter::::with_capacity(split.len() * 2); 40 | 41 | b.iter(|| { 42 | for s in &split { 43 | test::black_box(cf.test_and_add(s).unwrap()); 44 | } 45 | }); 46 | } 47 | 48 | #[bench] 49 | fn bench_new(b: &mut test::Bencher) { 50 | b.iter(|| { 51 | test::black_box(CuckooFilter::new()); 52 | }); 53 | } 54 | 55 | #[bench] 56 | fn bench_clear(b: &mut test::Bencher) { 57 | let mut cf = test::black_box(CuckooFilter::new()); 58 | 59 | b.iter(|| { 60 | test::black_box(cf.clear()); 61 | }); 62 | } 63 | 64 | #[cfg(feature = "farmhash")] 65 | #[bench] 66 | fn bench_insertion_farmhash(b: &mut test::Bencher) { 67 | perform_insertions::(b); 68 | } 69 | 70 | #[cfg(feature = "fnv")] 71 | #[bench] 72 | fn bench_insertion_fnv(b: &mut test::Bencher) { 73 | perform_insertions::(b); 74 | } 75 | 76 | #[bench] 77 | fn bench_insertion_default(b: &mut test::Bencher) { 78 | perform_insertions::(b); 79 | } 80 | -------------------------------------------------------------------------------- /cabi/.gitignore: -------------------------------------------------------------------------------- 1 | tests/build 2 | -------------------------------------------------------------------------------- /cabi/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cuckoofilter-cabi" 3 | version = "0.4.0" 4 | license = "MIT" 5 | authors = [ 6 | "Florian Jacob " 7 | ] 8 | homepage = "http://geekyogre.com" 9 | repository = "https://github.com/seiflotfy/rust-cuckoofilter" 10 | description = """ 11 | C interface wrapper for cuckoofilter, a library for 12 | the Bloom filter replacement for approximated set-membership queries. 13 | """ 14 | keywords = ["bloomfilter", "cuckoohashing", "cuckoofilter"] 15 | 16 | build = "build.rs" 17 | 18 | [profile.release] 19 | lto = true 20 | 21 | [dependencies] 22 | cuckoofilter = { version = "0.4", path = "../" } 23 | 24 | [build-dependencies] 25 | cbindgen = "0.5" 26 | 27 | [lib] 28 | # foreign programs can link against cdylib or staticlib to use rust-cuckoofilter's C API 29 | crate-type = ["cdylib", "staticlib"] 30 | -------------------------------------------------------------------------------- /cabi/Makefile: -------------------------------------------------------------------------------- 1 | PREFIX = /usr/local 2 | 3 | HEADER = rcf_cuckoofilter.h 4 | LIBS = libcuckoofilter_cabi.a libcuckoofilter_cabi.so 5 | 6 | error: 7 | @echo "Please use 'cargo build --release' for building, the Makefile is for installation only (via 'make install')." 8 | 9 | .PHONY: install 10 | install: 11 | install -D -m 0755 $(addprefix target/include/,$(HEADER)) -t $(DESTDIR)$(PREFIX)/include 12 | install -D -m 0755 $(addprefix target/release/,$(LIBS)) -t $(DESTDIR)$(PREFIX)/lib 13 | 14 | .PHONY: uninstall 15 | uninstall: 16 | rm -f $(DESTDIR)$(PREFIX)/lib/$(LIBS) 17 | rm -f $(DESTDIR)$(PREFIX)/include/$(HEADER) 18 | -------------------------------------------------------------------------------- /cabi/build.rs: -------------------------------------------------------------------------------- 1 | extern crate cbindgen; 2 | 3 | use std::env; 4 | 5 | fn main() { 6 | let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); 7 | 8 | cbindgen::Builder::new() 9 | .with_crate(crate_dir) 10 | .with_language(cbindgen::Language::C) 11 | .with_parse_deps(true) 12 | .with_parse_include(&["cuckoofilter"]) 13 | .generate() 14 | .expect("Unable to generate bindings") 15 | .write_to_file("target/include/rcf_cuckoofilter.h"); 16 | } 17 | -------------------------------------------------------------------------------- /cabi/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A C interface for the [cuckoofilter crate]. 2 | //! 3 | //! You can use this crate to use the `cuckoofilter` crate from C/C++, or almost any other 4 | //! language. There only needs to be a way to call foreign C functions, like in python or ruby. 5 | //! 6 | //! **Note**: For other languages, you'd probably want to add a language-specific layer on top of 7 | //! this, e.g. using [milksnake] for python. Contributions welcome! 8 | //! 9 | //! # Build Setup 10 | //! 11 | //! You need to integrate this crate in your build system somehow, how you do this depends on your 12 | //! specific build system. You can e.g. use a local checkout of the [cuckoofilter crate]: 13 | //! 14 | //! ```bash 15 | //! git clone https://github.com/seiflotfy/rust-cuckoofilter 16 | //! cd rust-cuckoofilter/cabi 17 | //! cargo build --release 18 | //! ``` 19 | //! 20 | //! Then, put the generated file `cabi/target/include/rcf_cuckoofilter.h` on your compiler's 21 | //! include path and link against either `cabi/target/release/libcuckoofilter_cabi.a` or 22 | //! `cabi/target/release/libcuckoofilter_cabi.so`, depending on whether you want static or dynamic 23 | //! linking. Alternatively, use the provided Makefile via `sudo make install` to install the header 24 | //! and libraries system-wide. You can see the `tests` directory for basic examples using a 25 | //! Makefile for C and C++, including static and dynamic linking in each case. 26 | //! 27 | //! If you found a nice way to integrate this crate in a build system, 28 | //! please consider contributing the necessary build files! 29 | //! 30 | //! # Usage 31 | //! 32 | //! You can then use the interface like this: 33 | //! 34 | //! ```C 35 | //! #include "rcf_cuckoofilter.h" 36 | //! 37 | //! rcf_cuckoofilter *filter = rcf_cuckoofilter_with_capacity(1000); 38 | //! rcf_cuckoofilter_status result; 39 | //! result = rcf_cuckoofilter_add(filter, 42); 40 | //! assert(result == RCF_OK); 41 | //! result = rcf_cuckoofilter_contains(filter, 42); 42 | //! assert(result == RCF_OK); 43 | //! result = rcf_cuckoofilter_delete(filter, 42); 44 | //! assert(result == RCF_OK); 45 | //! result = rcf_cuckoofilter_contains(filter, 42); 46 | //! assert(result == RCF_NOT_FOUND); 47 | //! ``` 48 | //! 49 | //! # Hashing arbitrary data 50 | //! The interface only takes unsigned 64bit integers as `data`. 51 | //! If you want to insert structs or other types, search for something that hashes those to 52 | //! integers. Those hashes don't need to be well-distributed as they're 53 | //! hashed on the rust side again, so a very simple hash function is sufficient, 54 | //! like `std::hash` for C++ or the `__hash__` method in python. 55 | //! There's a good chance something like this is present in the 56 | //! respective standard library, for implementing hash tables and the like. 57 | //! 58 | //! In the future, the interface could accept a pointer to arbitrary memory and a size parameter, 59 | //! and hash that as byte array on the Rust side. But this approach is problematic if not all given 60 | //! bytes are the same for two equal objects. 61 | //! 62 | //! # Naming 63 | //! The prefix `rcf` is short for *rust cuckoo filter*. 64 | //! It's used for C-style namespacing to avoid name conflicts with other libraries. 65 | //! 66 | //! [cuckoofilter crate]: https://crates.io/crates/cuckoofilter 67 | //! [milksnake]: https://github.com/getsentry/milksnake 68 | 69 | extern crate cuckoofilter; 70 | 71 | use cuckoofilter::CuckooError; 72 | use std::collections::hash_map::DefaultHasher; 73 | 74 | /// Opaque type for a cuckoo filter using Rust's `std::collections::hash_map::DefaultHasher` as 75 | /// Hasher. The C ABI only supports that specific Hasher, currently. 76 | #[allow(non_camel_case_types)] 77 | pub type rcf_cuckoofilter = cuckoofilter::CuckooFilter; 78 | 79 | #[allow(non_camel_case_types)] 80 | #[repr(C)] 81 | pub enum rcf_cuckoofilter_status { 82 | RCF_OK, 83 | RCF_NOT_FOUND, 84 | RCF_NOT_ENOUGH_SPACE, 85 | } 86 | 87 | /// Constructs a cuckoo filter with a given max capacity. 88 | /// The various wrapper methods of this crate operate on the returned reference. 89 | /// At the end of its life, use [`rcf_cuckoofilter_free`] to free the allocated memory. 90 | /// 91 | /// [`rcf_cuckoofilter_free`]: fn.rcf_cuckoofilter_free.html 92 | #[no_mangle] 93 | pub extern "C" fn rcf_cuckoofilter_with_capacity(capacity: usize) -> *mut rcf_cuckoofilter { 94 | let filter = cuckoofilter::CuckooFilter::with_capacity(capacity); 95 | let filter = Box::new(filter); 96 | Box::into_raw(filter) 97 | } 98 | 99 | /// Free the given `filter`, releasing its allocated memory. 100 | #[no_mangle] 101 | pub extern "C" fn rcf_cuckoofilter_free(filter: *mut rcf_cuckoofilter) { 102 | let filter = unsafe { Box::from_raw(filter) }; 103 | drop(filter); 104 | } 105 | 106 | /// Checks if the given `data` is in the `filter`. 107 | /// 108 | /// Returns `rcf_cuckoofilter_status::RCF_OK` if the given `data` is in the `filter`, 109 | /// `rcf_cuckoofilter_status::RCF_NOT_FOUND` otherwise. 110 | /// Aborts if the given `filter` is a null pointer. 111 | #[no_mangle] 112 | pub extern "C" fn rcf_cuckoofilter_contains( 113 | filter: *const rcf_cuckoofilter, 114 | data: u64, 115 | ) -> rcf_cuckoofilter_status { 116 | let filter = unsafe { filter.as_ref() }; 117 | let found = filter 118 | .expect("Given rcf_cuckoofilter* is a null pointer") 119 | .contains(&data); 120 | if found { 121 | rcf_cuckoofilter_status::RCF_OK 122 | } else { 123 | rcf_cuckoofilter_status::RCF_NOT_FOUND 124 | } 125 | } 126 | 127 | /// Adds `data` to the `filter`. 128 | /// 129 | /// Returns `rcf_cuckoofilter_status::RCF_OK` if the given `data` was successfully added to the 130 | /// `filter`, `rcf_cuckoofilter_status::RCF_NOT_ENOUGH_SPACE` if the filter could not find a free 131 | /// space for it. 132 | /// Aborts if the given `filter` is a null pointer. 133 | #[no_mangle] 134 | pub extern "C" fn rcf_cuckoofilter_add( 135 | filter: *mut rcf_cuckoofilter, 136 | data: u64, 137 | ) -> rcf_cuckoofilter_status { 138 | let filter = unsafe { filter.as_mut() }; 139 | match filter 140 | .expect("Given rcf_cuckoofilter* is a null pointer") 141 | .add(&data) 142 | { 143 | Ok(_) => rcf_cuckoofilter_status::RCF_OK, 144 | Err(CuckooError::NotEnoughSpace) => rcf_cuckoofilter_status::RCF_NOT_ENOUGH_SPACE, 145 | } 146 | } 147 | 148 | /// Returns the number of items in the `filter`. 149 | /// Aborts if the given `filter` is a null pointer. 150 | #[no_mangle] 151 | pub extern "C" fn rcf_cuckoofilter_len(filter: *const rcf_cuckoofilter) -> usize { 152 | let filter = unsafe { filter.as_ref() }; 153 | filter 154 | .expect("Given rcf_cuckoofilter* is a null pointer") 155 | .len() 156 | } 157 | 158 | /// Checks if `filter` is empty. 159 | /// This is equivalent to `rcf_cuckoofilter_len(filter) == 0` 160 | /// Aborts if the given `filter` is a null pointer. 161 | #[no_mangle] 162 | pub extern "C" fn rcf_cuckoofilter_is_empty(filter: *const rcf_cuckoofilter) -> bool { 163 | let filter = unsafe { filter.as_ref() }; 164 | filter 165 | .expect("Given rcf_cuckoofilter* is a null pointer") 166 | .is_empty() 167 | } 168 | 169 | /// Returns the number of bytes the `filter` occupies in memory. 170 | /// Aborts if the given `filter` is a null pointer. 171 | #[no_mangle] 172 | pub extern "C" fn rcf_cuckoofilter_memory_usage(filter: *const rcf_cuckoofilter) -> usize { 173 | let filter = unsafe { filter.as_ref() }; 174 | filter 175 | .expect("Given rcf_cuckoofilter* is a null pointer") 176 | .memory_usage() 177 | } 178 | 179 | /// Deletes `data` from the `filter`. 180 | /// Returns `rcf_cuckoofilter_status::RCF_OK` if `data` existed in the filter before, 181 | /// `rcf_cuckoofilter_status::RCF_NOT_FOUND` if `data` did not exist. 182 | /// Aborts if the given `filter` is a null pointer. 183 | #[no_mangle] 184 | pub extern "C" fn rcf_cuckoofilter_delete( 185 | filter: *mut rcf_cuckoofilter, 186 | data: u64, 187 | ) -> rcf_cuckoofilter_status { 188 | let filter = unsafe { filter.as_mut() }; 189 | let found = filter 190 | .expect("Given rcf_cuckoofilter* is a null pointer") 191 | .delete(&data); 192 | if found { 193 | rcf_cuckoofilter_status::RCF_OK 194 | } else { 195 | rcf_cuckoofilter_status::RCF_NOT_FOUND 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /cabi/tests/Makefile: -------------------------------------------------------------------------------- 1 | INCLUDE = ../target/include 2 | LIB = ../target/release 3 | CFLAGS = -I$(INCLUDE) 4 | BUILDDIR = build 5 | 6 | LDFLAGS_STATIC = -L$(LIB) -l:libcuckoofilter_cabi.a -ldl -lpthread 7 | LDFLAGS_DYNAMIC = -L$(LIB) -Wl,-rpath=$(LIB) -lcuckoofilter_cabi 8 | 9 | 10 | tests: basic_operations_static basic_operations_dynamic basic_operations_cpp_static basic_operations_cpp_dynamic 11 | 12 | 13 | basic_operations_static: basic_operations.c 14 | mkdir -p $(BUILDDIR) 15 | $(CC) $(CFLAGS) -o $(BUILDDIR)/$@ $^ $(LDFLAGS_STATIC) 16 | 17 | basic_operations_dynamic: basic_operations.c 18 | mkdir -p $(BUILDDIR) 19 | $(CC) $(CFLAGS) -o $(BUILDDIR)/$@ $^ $(LDFLAGS_DYNAMIC) 20 | 21 | basic_operations_cpp_static: basic_operations.c 22 | mkdir -p $(BUILDDIR) 23 | $(CXX) $(CFLAGS) -o $(BUILDDIR)/$@ $^ $(LDFLAGS_STATIC) 24 | 25 | basic_operations_cpp_dynamic: basic_operations.c 26 | mkdir -p $(BUILDDIR) 27 | $(CXX) $(CFLAGS) -o $(BUILDDIR)/$@ $^ $(LDFLAGS_DYNAMIC) 28 | 29 | clean: 30 | rm basic_operations_static basic_operations_dynamic basic_operations_cpp_static basic_operations_cpp_dynamic 31 | -------------------------------------------------------------------------------- /cabi/tests/basic_operations.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | #include "rcf_cuckoofilter.h" 7 | #ifdef __cplusplus 8 | } 9 | #endif 10 | 11 | int main(int argc, char **argv) { 12 | rcf_cuckoofilter *filter = rcf_cuckoofilter_with_capacity(1024); 13 | rcf_cuckoofilter_status status; 14 | 15 | status = rcf_cuckoofilter_add(filter, 42); 16 | assert(status == RCF_OK); 17 | printf("added 42: %d\n", status == RCF_OK); 18 | 19 | status = rcf_cuckoofilter_contains(filter, 42); 20 | assert(status == RCF_OK); 21 | printf("contains 42: %d\n", status == RCF_OK); 22 | 23 | status = rcf_cuckoofilter_contains(filter, 4711); 24 | assert(status == RCF_NOT_FOUND); 25 | printf("contains 4711: %d\n", status == RCF_OK); 26 | 27 | status = rcf_cuckoofilter_delete(filter, 42); 28 | assert(status == RCF_OK); 29 | printf("deleted 42: %d\n", status == RCF_OK); 30 | 31 | status = rcf_cuckoofilter_contains(filter, 42); 32 | assert(status == RCF_NOT_FOUND); 33 | printf("contains 42: %d\n", status == RCF_OK); 34 | 35 | rcf_cuckoofilter_free(filter); 36 | 37 | return 0; 38 | } 39 | -------------------------------------------------------------------------------- /src/bucket.rs: -------------------------------------------------------------------------------- 1 | pub const FINGERPRINT_SIZE: usize = 1; 2 | pub const BUCKET_SIZE: usize = 4; 3 | const EMPTY_FINGERPRINT_DATA: [u8; FINGERPRINT_SIZE] = [100; FINGERPRINT_SIZE]; 4 | 5 | // Fingerprint Size is 1 byte so lets remove the Vec 6 | #[derive(PartialEq, Copy, Clone, Hash)] 7 | pub struct Fingerprint { 8 | pub data: [u8; FINGERPRINT_SIZE], 9 | } 10 | 11 | impl Fingerprint { 12 | /// Attempts to create a new Fingerprint based on the given 13 | /// number. If the created Fingerprint would be equal to the 14 | /// empty Fingerprint, None is returned. 15 | pub fn from_data(data: [u8; FINGERPRINT_SIZE]) -> Option { 16 | let result = Self { data }; 17 | if result.is_empty() { 18 | None 19 | } else { 20 | Some(result) 21 | } 22 | } 23 | 24 | /// Returns the empty Fingerprint. 25 | pub fn empty() -> Self { 26 | Self { 27 | data: EMPTY_FINGERPRINT_DATA, 28 | } 29 | } 30 | 31 | /// Checks if this is the empty Fingerprint. 32 | pub fn is_empty(&self) -> bool { 33 | self.data == EMPTY_FINGERPRINT_DATA 34 | } 35 | 36 | /// Sets the fingerprint value to a previously exported one via an in-memory copy. 37 | fn slice_copy(&mut self, fingerprint: &[u8]) { 38 | self.data.copy_from_slice(fingerprint); 39 | } 40 | } 41 | 42 | /// Manages `BUCKET_SIZE` fingerprints at most. 43 | #[derive(Clone)] 44 | pub struct Bucket { 45 | pub buffer: [Fingerprint; BUCKET_SIZE], 46 | } 47 | 48 | impl Bucket { 49 | /// Creates a new bucket with a pre-allocated buffer. 50 | pub fn new() -> Self { 51 | Self { 52 | buffer: [Fingerprint::empty(); BUCKET_SIZE], 53 | } 54 | } 55 | 56 | /// Inserts the fingerprint into the buffer if the buffer is not full. 57 | /// This operation is O(1). 58 | pub fn insert(&mut self, fp: Fingerprint) -> bool { 59 | for entry in &mut self.buffer { 60 | if entry.is_empty() { 61 | *entry = fp; 62 | return true; 63 | } 64 | } 65 | false 66 | } 67 | 68 | /// Deletes the given fingerprint from the bucket. This operation is O(1). 69 | pub fn delete(&mut self, fp: Fingerprint) -> bool { 70 | match self.get_fingerprint_index(fp) { 71 | Some(index) => { 72 | self.buffer[index] = Fingerprint::empty(); 73 | true 74 | } 75 | None => false, 76 | } 77 | } 78 | 79 | /// Returns the index of the given fingerprint, if its found. O(1) 80 | pub fn get_fingerprint_index(&self, fp: Fingerprint) -> Option { 81 | self.buffer.iter().position(|e| *e == fp) 82 | } 83 | 84 | /// Returns all current fingerprint data of the current buffer for storage. 85 | pub fn get_fingerprint_data(&self) -> Vec { 86 | self.buffer 87 | .iter() 88 | .flat_map(|f| f.data.iter()) 89 | .cloned() 90 | .collect() 91 | } 92 | 93 | /// Empties the bucket by setting each used entry to Fingerprint::empty(). Returns the number of entries that were modified. 94 | #[inline(always)] 95 | pub fn clear(&mut self) { 96 | *self = Self::new() 97 | } 98 | } 99 | 100 | impl From<&[u8]> for Bucket { 101 | /// Constructs a buffer of fingerprints from a set of previously exported fingerprints. 102 | fn from(fingerprints: &[u8]) -> Self { 103 | let mut buffer = [Fingerprint::empty(); BUCKET_SIZE]; 104 | for (idx, value) in fingerprints.chunks(FINGERPRINT_SIZE).enumerate() { 105 | buffer[idx].slice_copy(value); 106 | } 107 | Self { buffer } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Cuckoo filter probabilistic data structure for membership testing and cardinality counting. 2 | //! 3 | //! # Usage 4 | //! 5 | //! This crate is [on crates.io](https://crates.io/crates/cuckoofilter) and can be 6 | //! used by adding `cuckoofilter` to the dependencies in your project's `Cargo.toml`. 7 | //! 8 | //! ```toml 9 | //! [dependencies] 10 | //! cuckoofilter = "0.3" 11 | //! ``` 12 | //! 13 | //! And this in your crate root: 14 | //! 15 | //! ```rust 16 | //! extern crate cuckoofilter; 17 | //! ``` 18 | 19 | mod bucket; 20 | mod util; 21 | 22 | use crate::bucket::{Bucket, Fingerprint, BUCKET_SIZE, FINGERPRINT_SIZE}; 23 | use crate::util::{get_alt_index, get_fai, FaI}; 24 | 25 | use std::cmp; 26 | use std::collections::hash_map::DefaultHasher; 27 | use std::error::Error as StdError; 28 | use std::fmt; 29 | use std::hash::{Hash, Hasher}; 30 | use std::iter::repeat; 31 | use std::marker::PhantomData; 32 | use std::mem; 33 | 34 | use rand::Rng; 35 | #[cfg(feature = "serde_support")] 36 | use serde_derive::{Deserialize, Serialize}; 37 | 38 | /// If insertion fails, we will retry this many times. 39 | pub const MAX_REBUCKET: u32 = 500; 40 | 41 | /// The default number of buckets. 42 | pub const DEFAULT_CAPACITY: usize = (1 << 20) - 1; 43 | 44 | #[derive(Debug)] 45 | pub enum CuckooError { 46 | NotEnoughSpace, 47 | } 48 | 49 | impl fmt::Display for CuckooError { 50 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 51 | f.write_str("NotEnoughSpace") 52 | } 53 | } 54 | 55 | impl StdError for CuckooError { 56 | fn description(&self) -> &str { 57 | "Not enough space to store this item, rebucketing failed." 58 | } 59 | } 60 | 61 | /// A cuckoo filter class exposes a Bloomier filter interface, 62 | /// providing methods of add, delete, contains. 63 | /// 64 | /// # Examples 65 | /// 66 | /// ``` 67 | /// extern crate cuckoofilter; 68 | /// 69 | /// let words = vec!["foo", "bar", "xylophone", "milagro"]; 70 | /// let mut cf = cuckoofilter::CuckooFilter::new(); 71 | /// 72 | /// let mut insertions = 0; 73 | /// for s in &words { 74 | /// if cf.test_and_add(s).unwrap() { 75 | /// insertions += 1; 76 | /// } 77 | /// } 78 | /// 79 | /// assert_eq!(insertions, words.len()); 80 | /// assert_eq!(cf.len(), words.len()); 81 | /// 82 | /// // Re-add the first element. 83 | /// cf.add(words[0]); 84 | /// 85 | /// assert_eq!(cf.len(), words.len() + 1); 86 | /// 87 | /// for s in &words { 88 | /// cf.delete(s); 89 | /// } 90 | /// 91 | /// assert_eq!(cf.len(), 1); 92 | /// assert!(!cf.is_empty()); 93 | /// 94 | /// cf.delete(words[0]); 95 | /// 96 | /// assert_eq!(cf.len(), 0); 97 | /// assert!(cf.is_empty()); 98 | /// 99 | /// for s in &words { 100 | /// if cf.test_and_add(s).unwrap() { 101 | /// insertions += 1; 102 | /// } 103 | /// } 104 | /// 105 | /// cf.clear(); 106 | /// 107 | /// assert!(cf.is_empty()); 108 | /// 109 | /// ``` 110 | pub struct CuckooFilter { 111 | buckets: Box<[Bucket]>, 112 | len: usize, 113 | _hasher: std::marker::PhantomData, 114 | } 115 | 116 | impl Default for CuckooFilter { 117 | fn default() -> Self { 118 | Self::new() 119 | } 120 | } 121 | 122 | impl CuckooFilter { 123 | /// Construct a CuckooFilter with default capacity and hasher. 124 | pub fn new() -> Self { 125 | Self::with_capacity(DEFAULT_CAPACITY) 126 | } 127 | } 128 | 129 | impl CuckooFilter 130 | where 131 | H: Hasher + Default, 132 | { 133 | /// Constructs a Cuckoo Filter with a given max capacity 134 | pub fn with_capacity(cap: usize) -> Self { 135 | let capacity = cmp::max(1, cap.next_power_of_two() / BUCKET_SIZE); 136 | 137 | Self { 138 | buckets: repeat(Bucket::new()) 139 | .take(capacity) 140 | .collect::>() 141 | .into_boxed_slice(), 142 | len: 0, 143 | _hasher: PhantomData, 144 | } 145 | } 146 | 147 | /// Checks if `data` is in the filter. 148 | pub fn contains(&self, data: &T) -> bool { 149 | let FaI { fp, i1, i2 } = get_fai::(data); 150 | let len = self.buckets.len(); 151 | self.buckets[i1 % len] 152 | .get_fingerprint_index(fp) 153 | .or_else(|| self.buckets[i2 % len].get_fingerprint_index(fp)) 154 | .is_some() 155 | } 156 | 157 | /// Adds `data` to the filter. Returns `Ok` if the insertion was successful, 158 | /// but could fail with a `NotEnoughSpace` error, especially when the filter 159 | /// is nearing its capacity. 160 | /// Note that while you can put any hashable type in the same filter, beware 161 | /// for side effects like that the same number can have diferent hashes 162 | /// depending on the type. 163 | /// So for the filter, 4711i64 isn't the same as 4711u64. 164 | /// 165 | /// **Note:** When this returns `NotEnoughSpace`, the element given was 166 | /// actually added to the filter, but some random *other* element was 167 | /// removed. This might improve in the future. 168 | pub fn add(&mut self, data: &T) -> Result<(), CuckooError> { 169 | let fai = get_fai::(data); 170 | if self.put(fai.fp, fai.i1) || self.put(fai.fp, fai.i2) { 171 | return Ok(()); 172 | } 173 | let len = self.buckets.len(); 174 | let mut rng = rand::thread_rng(); 175 | let mut i = fai.random_index(&mut rng); 176 | let mut fp = fai.fp; 177 | for _ in 0..MAX_REBUCKET { 178 | let other_fp; 179 | { 180 | let loc = &mut self.buckets[i % len].buffer[rng.gen_range(0, BUCKET_SIZE)]; 181 | other_fp = *loc; 182 | *loc = fp; 183 | i = get_alt_index::(other_fp, i); 184 | } 185 | if self.put(other_fp, i) { 186 | return Ok(()); 187 | } 188 | fp = other_fp; 189 | } 190 | // fp is dropped here, which means that the last item that was 191 | // rebucketed gets removed from the filter. 192 | // TODO: One could introduce a single-item cache for this element, 193 | // check this cache in all methods additionally to the actual filter, 194 | // and return NotEnoughSpace if that cache is already in use. 195 | // This would complicate the code, but stop random elements from 196 | // getting removed and result in nicer behaviour for the user. 197 | Err(CuckooError::NotEnoughSpace) 198 | } 199 | 200 | /// Adds `data` to the filter if it does not exist in the filter yet. 201 | /// Returns `Ok(true)` if `data` was not yet present in the filter and added 202 | /// successfully. 203 | pub fn test_and_add(&mut self, data: &T) -> Result { 204 | if self.contains(data) { 205 | Ok(false) 206 | } else { 207 | self.add(data).map(|_| true) 208 | } 209 | } 210 | 211 | /// Number of items in the filter. 212 | pub fn len(&self) -> usize { 213 | self.len 214 | } 215 | 216 | /// Exports fingerprints in all buckets, along with the filter's length for storage. 217 | /// The filter can be recovered by passing the `ExportedCuckooFilter` struct to the 218 | /// `from` method of `CuckooFilter`. 219 | pub fn export(&self) -> ExportedCuckooFilter { 220 | self.into() 221 | } 222 | 223 | /// Number of bytes the filter occupies in memory 224 | pub fn memory_usage(&self) -> usize { 225 | mem::size_of_val(self) + self.buckets.len() * mem::size_of::() 226 | } 227 | 228 | /// Check if filter is empty 229 | pub fn is_empty(&self) -> bool { 230 | self.len == 0 231 | } 232 | 233 | /// Deletes `data` from the filter. Returns true if `data` existed in the 234 | /// filter before. 235 | pub fn delete(&mut self, data: &T) -> bool { 236 | let FaI { fp, i1, i2 } = get_fai::(data); 237 | self.remove(fp, i1) || self.remove(fp, i2) 238 | } 239 | 240 | /// Empty all the buckets in a filter and reset the number of items. 241 | pub fn clear(&mut self) { 242 | if self.is_empty() { 243 | return; 244 | } 245 | 246 | for bucket in self.buckets.iter_mut() { 247 | bucket.clear(); 248 | } 249 | self.len = 0; 250 | } 251 | 252 | /// Extracts fingerprint values from all buckets, used for exporting the filters data. 253 | fn values(&self) -> Vec { 254 | self.buckets 255 | .iter() 256 | .flat_map(|b| b.get_fingerprint_data().into_iter()) 257 | .collect() 258 | } 259 | 260 | /// Removes the item with the given fingerprint from the bucket indexed by i. 261 | fn remove(&mut self, fp: Fingerprint, i: usize) -> bool { 262 | let len = self.buckets.len(); 263 | if self.buckets[i % len].delete(fp) { 264 | self.len -= 1; 265 | true 266 | } else { 267 | false 268 | } 269 | } 270 | 271 | fn put(&mut self, fp: Fingerprint, i: usize) -> bool { 272 | let len = self.buckets.len(); 273 | if self.buckets[i % len].insert(fp) { 274 | self.len += 1; 275 | true 276 | } else { 277 | false 278 | } 279 | } 280 | } 281 | 282 | /// A minimal representation of the CuckooFilter which can be transfered or stored, then recovered at a later stage. 283 | #[derive(Debug)] 284 | #[cfg_attr(feature = "serde_support", derive(Deserialize, Serialize))] 285 | pub struct ExportedCuckooFilter { 286 | #[cfg_attr(feature = "serde_support", serde(with = "serde_bytes"))] 287 | pub values: Vec, 288 | pub length: usize, 289 | } 290 | 291 | impl From for CuckooFilter { 292 | /// Converts a simplified representation of a filter used for export to a 293 | /// fully functioning version. 294 | /// 295 | /// # Contents 296 | /// 297 | /// * `values` - A serialized version of the `CuckooFilter`'s memory, where the 298 | /// fingerprints in each bucket are chained one after another, then in turn all 299 | /// buckets are chained together. 300 | /// * `length` - The number of valid fingerprints inside the `CuckooFilter`. 301 | /// This value is used as a time saving method, otherwise all fingerprints 302 | /// would need to be checked for equivalence against the null pattern. 303 | fn from(exported: ExportedCuckooFilter) -> Self { 304 | // Assumes that the `BUCKET_SIZE` and `FINGERPRINT_SIZE` constants do not change. 305 | Self { 306 | buckets: exported 307 | .values 308 | .chunks(BUCKET_SIZE * FINGERPRINT_SIZE) 309 | .map(Bucket::from) 310 | .collect::>() 311 | .into_boxed_slice(), 312 | len: exported.length, 313 | _hasher: PhantomData, 314 | } 315 | } 316 | } 317 | 318 | impl From<&CuckooFilter> for ExportedCuckooFilter 319 | where 320 | H: Hasher + Default, 321 | { 322 | /// Converts a `CuckooFilter` into a simplified version which can be serialized and stored 323 | /// for later use. 324 | fn from(cuckoo: &CuckooFilter) -> Self { 325 | Self { 326 | values: cuckoo.values(), 327 | length: cuckoo.len(), 328 | } 329 | } 330 | } 331 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | use crate::bucket::{Fingerprint, FINGERPRINT_SIZE}; 2 | 3 | use std::hash::{Hash, Hasher}; 4 | 5 | use byteorder::{BigEndian, WriteBytesExt}; 6 | 7 | // A struct combining *F*ingerprint *a*nd *I*ndexes, 8 | // to have a return type with named fields 9 | // instead of a tuple with unnamed fields. 10 | pub struct FaI { 11 | pub fp: Fingerprint, 12 | pub i1: usize, 13 | pub i2: usize, 14 | } 15 | 16 | fn get_hash(data: &T) -> (u32, u32) { 17 | let mut hasher = ::default(); 18 | data.hash(&mut hasher); 19 | let result = hasher.finish(); 20 | 21 | // split 64bit hash value in the upper and the lower 32bit parts, 22 | // one used for the fingerprint, the other used for the indexes. 23 | ((result >> 32) as u32, result as u32) 24 | } 25 | 26 | pub fn get_alt_index(fp: Fingerprint, i: usize) -> usize { 27 | let (_, index_hash) = get_hash::<_, H>(&fp.data); 28 | let alt_i = index_hash as usize; 29 | (i ^ alt_i) as usize 30 | } 31 | 32 | impl FaI { 33 | fn from_data(data: &T) -> Self { 34 | let (fp_hash, index_hash) = get_hash::<_, H>(data); 35 | 36 | let mut fp_hash_arr = [0; FINGERPRINT_SIZE]; 37 | let _ = (&mut fp_hash_arr[..]).write_u32::(fp_hash); 38 | let mut valid_fp_hash: [u8; FINGERPRINT_SIZE] = [0; FINGERPRINT_SIZE]; 39 | let mut n = 0; 40 | let fp; 41 | 42 | // increment every byte of the hash until we find one that is a valid fingerprint 43 | loop { 44 | for i in 0..FINGERPRINT_SIZE { 45 | valid_fp_hash[i] = fp_hash_arr[i] + n; 46 | } 47 | 48 | if let Some(val) = Fingerprint::from_data(valid_fp_hash) { 49 | fp = val; 50 | break; 51 | } 52 | n += 1; 53 | } 54 | 55 | let i1 = index_hash as usize; 56 | let i2 = get_alt_index::(fp, i1); 57 | Self { fp, i1, i2 } 58 | } 59 | 60 | pub fn random_index(&self, r: &mut R) -> usize { 61 | if r.gen() { 62 | self.i1 63 | } else { 64 | self.i2 65 | } 66 | } 67 | } 68 | 69 | pub fn get_fai(data: &T) -> FaI { 70 | FaI::from_data::<_, H>(data) 71 | } 72 | 73 | #[cfg(test)] 74 | mod tests { 75 | use super::*; 76 | 77 | #[test] 78 | fn test_fp_and_index() { 79 | use std::collections::hash_map::DefaultHasher; 80 | let data = "seif"; 81 | let fai = get_fai::<_, DefaultHasher>(data); 82 | let FaI { fp, i1, i2 } = fai; 83 | let i11 = get_alt_index::(fp, i2); 84 | assert_eq!(i11, i1); 85 | 86 | let i22 = get_alt_index::(fp, i11); 87 | assert_eq!(i22, i2); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /tests/false_positive_rate.rs: -------------------------------------------------------------------------------- 1 | use cuckoofilter::CuckooFilter; 2 | 3 | use std::collections::hash_map::DefaultHasher; 4 | 5 | // Modelled after 6 | // https://github.com/efficient/cuckoofilter/blob/master/example/test.cc 7 | // to make test setup and results comparable. 8 | 9 | #[test] 10 | fn false_positive_rate() { 11 | let total_items = 1_000_000; 12 | 13 | let mut filter = CuckooFilter::::with_capacity(total_items); 14 | 15 | let mut num_inserted: u64 = 0; 16 | // We might not be able to get all items in, but still there should be enough 17 | // so we can just use what has fit in and continue with the test. 18 | for i in 0..total_items { 19 | match filter.add(&i) { 20 | Ok(_) => num_inserted += 1, 21 | Err(_) => break, 22 | } 23 | } 24 | 25 | // The range 0..num_inserted are all known to be in the filter. 26 | // The filter shouldn't return false negatives, and therefore they should all be contained. 27 | for i in 0..num_inserted { 28 | assert!(filter.contains(&i)); 29 | } 30 | 31 | // The range total_items..(2 * total_items) are all known *not* to be in the filter. 32 | // Every element for which the filter claims that it is contained is therefore a false positive. 33 | let mut false_queries: u64 = 0; 34 | for i in total_items..(2 * total_items) { 35 | if filter.contains(&i) { 36 | false_queries += 1; 37 | } 38 | } 39 | let false_positive_rate = (false_queries as f64) / (total_items as f64); 40 | 41 | println!("elements inserted: {}", num_inserted); 42 | println!( 43 | "memory usage: {:.2}KiB", 44 | (filter.memory_usage() as f64) / 1024.0 45 | ); 46 | println!("false positive rate: {}%", 100.0 * false_positive_rate); 47 | // ratio should be around 0.024, round up to 0.03 to accomodate for random fluctuation 48 | assert!(false_positive_rate < 0.03); 49 | } 50 | -------------------------------------------------------------------------------- /tests/interop.rs: -------------------------------------------------------------------------------- 1 | use cuckoofilter::{CuckooFilter, ExportedCuckooFilter}; 2 | 3 | use std::collections::hash_map::DefaultHasher; 4 | 5 | #[test] 6 | fn interoperability() { 7 | let total_items = 1_000_000; 8 | 9 | let mut filter = CuckooFilter::::with_capacity(total_items); 10 | 11 | let mut num_inserted: u64 = 0; 12 | // Fit as many values in as possible, count how many made it in. 13 | for i in 0..total_items { 14 | match filter.add(&i) { 15 | Ok(_) => num_inserted += 1, 16 | Err(_) => break, 17 | } 18 | } 19 | 20 | // Export the fingerprint data stored in the filter, 21 | // along with the filter's current length. 22 | let store: ExportedCuckooFilter = filter.export(); 23 | 24 | // Create a new filter using the `recover` method and the values previously exported. 25 | let recovered_filter = CuckooFilter::::from(store); 26 | 27 | // The range 0..num_inserted are all known to be in the filter. 28 | // The filters shouldn't return false negatives, and therefore they should all be contained. 29 | // Both filters should also be identical. 30 | for i in 0..num_inserted { 31 | assert!(filter.contains(&i)); 32 | assert!(recovered_filter.contains(&i)); 33 | } 34 | 35 | // The range total_items..(2 * total_items) are all known *not* to be in the filter. 36 | // Every element for which the filter claims that it is contained is therefore a false positive, and both the original filter and recovered filter should exhibit the same false positive behaviour. 37 | for i in total_items..(2 * total_items) { 38 | assert_eq!(filter.contains(&i), recovered_filter.contains(&i)); 39 | } 40 | } 41 | 42 | #[test] 43 | #[cfg(feature = "serde_support")] 44 | fn serialization() { 45 | // Just a small filter to test serialization. 46 | let mut filter = CuckooFilter::::with_capacity(100); 47 | 48 | // Fill a few values. 49 | for i in 0..50 { 50 | filter.add(&i).unwrap(); 51 | } 52 | // export data. 53 | let store: ExportedCuckooFilter = filter.export(); 54 | 55 | // serialize using json (for example, any serde format can be used). 56 | let saved_json = serde_json::to_string(&store).unwrap(); 57 | 58 | // create a new filter from the json string. 59 | let restore_json: ExportedCuckooFilter = serde_json::from_str(&saved_json).unwrap(); 60 | let recovered_filter = CuckooFilter::::from(restore_json); 61 | 62 | // Check our values exist within the reconstructed filter. 63 | for i in 0..50 { 64 | assert!(recovered_filter.contains(&i)); 65 | } 66 | } 67 | --------------------------------------------------------------------------------