├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── Cargo.toml
├── LICENSE
├── README.md
├── benches
    ├── basic.rs
    ├── compare.rs
    └── concurrent.rs
├── src
    └── lib.rs
└── tests
    ├── basic.rs
    └── concurrent.rs


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["**"]
 6 |   pull_request:
 7 |     branches: ["**"]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.ref }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   rust:
15 |     name: Rust checks
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - name: Checkout
19 |         uses: actions/checkout@v4
20 | 
21 |       - name: Install Rust (stable)
22 |         uses: dtolnay/rust-toolchain@stable
23 |         with:
24 |           components: rustfmt, clippy
25 | 
26 |       - name: Cache cargo
27 |         uses: Swatinem/rust-cache@v2
28 |         with:
29 |           cache-on-failure: true
30 | 
31 |       - name: Format
32 |         run: cargo fmt --all -- --check
33 | 
34 |       - name: Clippy
35 |         run: cargo clippy --lib --tests -- -D warnings
36 | 
37 |       - name: Build
38 |         run: cargo build --verbose
39 | 
40 |       - name: Test
41 |         run: cargo test --verbose
42 | 
43 |   rust-32:
44 |     name: Rust checks (i686)
45 |     runs-on: ubuntu-latest
46 |     steps:
47 |       - name: Checkout
48 |         uses: actions/checkout@v4
49 | 
50 |       - name: Install Rust (stable)
51 |         uses: dtolnay/rust-toolchain@stable
52 |         with:
53 |           targets: i686-unknown-linux-gnu
54 | 
55 |       - name: Install dependencies
56 |         run: sudo apt-get update && sudo apt-get install -y gcc-multilib
57 | 
58 |       - name: Cache cargo
59 |         uses: Swatinem/rust-cache@v2
60 |         with:
61 |           cache-on-failure: true
62 | 
63 |       - name: Build
64 |         run: cargo build --verbose --target i686-unknown-linux-gnu
65 | 
66 |       - name: Test
67 |         run: cargo test --verbose --target i686-unknown-linux-gnu
68 | 
69 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | debug
2 | target
3 | Cargo.lock
4 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "atomic-cuckoo-filter"
 3 | version = "0.2.0"
 4 | edition = "2024"
 5 | description = "Highly concurrent Cuckoo Filter"
 6 | license = "MIT"
 7 | repository = "https://github.com/farhadi/atomic-cuckoo-filter"
 8 | readme = "README.md"
 9 | 
10 | [dependencies]
11 | derive_builder = "0.20"
12 | parking_lot_core = "0.9"
13 | rand = "0.9"
14 | thiserror = "2.0"
15 | 
16 | [dev-dependencies]
17 | ahash = "0.8"
18 | cuckoofilter = "0.5"


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Ali Farhadi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Atomic Cuckoo Filter
  2 | 
  3 | A high-performance, lock-free concurrent cuckoo filter implementation in Rust for efficient set membership testing.
  4 | 
  5 | [![Crates.io](https://img.shields.io/crates/v/atomic-cuckoo-filter.svg)](https://crates.io/crates/atomic-cuckoo-filter)
  6 | [![Documentation](https://docs.rs/atomic-cuckoo-filter/badge.svg)](https://docs.rs/atomic-cuckoo-filter)
  7 | [![CI](https://img.shields.io/github/actions/workflow/status/farhadi/atomic-cuckoo-filter/ci.yml?branch=main&style=flat-square&logo=github)](https://github.com/farhadi/atomic-cuckoo-filter/actions)
  8 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
  9 | 
 10 | ## Overview
 11 | 
 12 | This crate provides a sophisticated implementation of a cuckoo filter - a probabilistic data structure
 13 | for fast set membership testing. Unlike traditional implementations, this version uses **lock-free**
 14 | atomic operations and is designed for high-concurrency environments.
 15 | 
 16 | ## Key Features
 17 | 
 18 | ✨ **Lock-Free Concurrency**: All operations use atomic compare-exchange loops instead of traditional locks  
 19 | 🚀 **High Performance**: Optimized for multi-threaded environments with minimal blocking  
 20 | 🔍 **No False Negatives**: Items that were inserted are guaranteed to be found  
 21 | 🎯 **Controllable False Positives**: Configurable fingerprint size to tune accuracy  
 22 | 📦 **Space Efficient**: ~20-30% less memory usage than Bloom filters for the same false positive rate  
 23 | 🗑️ **Deletion Support**: Unlike Bloom filters, inserted items can be safely removed  
 24 | ⏱️ **Bounded Lookup Time**: Always at most 2 bucket checks maximum  
 25 | 🔧 **Highly Configurable**: Customizable capacity, fingerprint size, bucket size, and eviction limits  
 26 | 
 27 | ## Quick Start
 28 | 
 29 | Add this to your `Cargo.toml`:
 30 | 
 31 | ```toml
 32 | [dependencies]
 33 | atomic-cuckoo-filter = "0.2"
 34 | ```
 35 | 
 36 | ### Basic Usage
 37 | 
 38 | ```rust
 39 | use atomic_cuckoo_filter::CuckooFilter;
 40 | 
 41 | // Create a filter with default settings
 42 | let filter = CuckooFilter::new();
 43 | 
 44 | // Insert items
 45 | filter.insert(&"hello").unwrap();
 46 | filter.insert(&"world").unwrap();
 47 | filter.insert(&42).unwrap();
 48 | 
 49 | // Check membership
 50 | assert!(filter.contains(&"hello"));
 51 | assert!(filter.contains(&42));
 52 | assert!(!filter.contains(&"rust"));
 53 | 
 54 | // Remove items
 55 | assert!(filter.remove(&"hello"));
 56 | assert!(!filter.contains(&"hello"));
 57 | 
 58 | // Count occurrences (not meant to be used as a counting filter, but to detect duplicates or hash collisions)
 59 | filter.insert(&"duplicate").unwrap();
 60 | filter.insert(&"duplicate").unwrap();
 61 | assert_eq!(filter.count(&"duplicate"), 2);
 62 | 
 63 | println!("Filter contains {} items", filter.len());
 64 | 
 65 | // Unique Insertions (Atomically check and insert items)
 66 | // Returns Ok(true) if inserted, Ok(false) if already present
 67 | match filter.insert_unique(&"item") {
 68 |     Ok(true) => println!("Item was inserted"),
 69 |     Ok(false) => println!("Item already existed"),
 70 |     Err(e) => println!("Filter is full: {}", e),
 71 | }
 72 | ```
 73 | 
 74 | ### Custom Configuration
 75 | 
 76 | ```rust
 77 | use atomic_cuckoo_filter::CuckooFilter;
 78 | 
 79 | let filter = CuckooFilter::builder()
 80 |     .capacity(1_000_000)        // Target capacity
 81 |     .fingerprint_size(16)       // Bits per fingerprint (4, 8, 16, or 32)
 82 |     .bucket_size(4)             // Fingerprints per bucket
 83 |     .max_evictions(500)         // Maximum eviction chain length
 84 |     .build()
 85 |     .unwrap();
 86 | ```
 87 | 
 88 | ### Custom Hash Functions
 89 | 
 90 | ```rust
 91 | use ahash::AHasher;
 92 | 
 93 | let filter = CuckooFilterBuilder::<AHasher>::default()
 94 |     .capacity(1024)
 95 |     .build()
 96 |     .unwrap();
 97 | ```
 98 | 
 99 | ### Concurrent Usage
100 | 
101 | The filter is designed for high-concurrency scenarios:
102 | 
103 | ```rust
104 | use atomic_cuckoo_filter::CuckooFilter;
105 | use std::sync::Arc;
106 | use std::thread;
107 | 
108 | let filter = Arc::new(CuckooFilter::with_capacity(100_000));
109 | 
110 | // Spawn multiple threads for concurrent operations
111 | let mut handles = vec![];
112 | 
113 | // Writer threads
114 | for i in 0..4 {
115 |     let filter_clone = Arc::clone(&filter);
116 |     handles.push(thread::spawn(move || {
117 |         for j in 0..1000 {
118 |             let item = format!("item_{}_{}", i, j);
119 |             filter_clone.insert(&item).unwrap();
120 |         }
121 |     }));
122 | }
123 | 
124 | // Reader threads
125 | for i in 0..4 {
126 |     let filter_clone = Arc::clone(&filter);
127 |     handles.push(thread::spawn(move || {
128 |         for j in 0..1000 {
129 |             let item = format!("item_{}_{}", i, j);
130 |             while !filter_clone.contains(&item) {};
131 |         }
132 |     }));
133 | }
134 | 
135 | // Wait for all threads to complete
136 | for handle in handles {
137 |     handle.join().unwrap();
138 | }
139 | 
140 | println!("Final filter size: {}", filter.len());
141 | ```
142 | 
143 | ## Configuration Options
144 | 
145 | | Parameter | Description | Valid Values | Default |
146 | |-----------|-------------|--------------|---------|
147 | | `capacity` | Target number of items | Any positive integer | 1,048,576 |
148 | | `fingerprint_size` | Bits per fingerprint | 4, 8, 16, or 32 | 16 |
149 | | `bucket_size` | Fingerprints per bucket | Any positive integer | 4 |
150 | | `max_evictions` | Max eviction chain length | Any integer ≥ 0 | 500 |
151 | 
152 | ### Choosing Parameters
153 | 
154 | **Fingerprint Size**: Larger fingerprints = fewer false positives but more memory usage
155 | 
156 | **Bucket Size**: Larger buckets = Faster inserts (fewer evictions), but slower lookups, and slightly higher FPR
157 | 
158 | **Max Evictions**: 
159 | - 0 = No evictions (faster but may fail to insert occasionally)
160 | - Higher values = Better space utilization but slower inserts when load factor is high
161 | 
162 | ## Concurrency Model
163 | 
164 | All operations use atomic compare-exchange loops instead of traditional locks, with optimistic
165 | concurrency control for read operations. The only exception is when inserting with evictions,
166 | where an atomic-based lock is used to ensure consistency.
167 | 
168 | ## Error Handling
169 | 
170 | The main error type is `Error::NotEnoughSpace`, returned when the filter cannot accommodate more items:
171 | 
172 | ```rust
173 | use atomic_cuckoo_filter::{CuckooFilter, Error};
174 | 
175 | let small_filter = CuckooFilter::builder()
176 |     .capacity(10)
177 |     .max_evictions(0)  // Disable evictions
178 |     .build()
179 |     .unwrap();
180 | 
181 | // Fill the filter
182 | for i in 0..20 {
183 |     match small_filter.insert(&i) {
184 |         Ok(()) => println!("Inserted {}", i),
185 |         Err(Error::NotEnoughSpace) => {
186 |             println!("Filter is full at {} items", small_filter.len());
187 |             break;
188 |         }
189 |     }
190 | }
191 | ```
192 | 
193 | ## Testing
194 | 
195 | Run the test suite:
196 | 
197 | ```bash
198 | # Unit tests
199 | cargo test
200 | 
201 | # Benchmarks
202 | cargo bench
203 | ```
204 | 
205 | ## Benchmarks
206 | 
207 | - Environment: rustc 1.90.0-nightly (ace633090 2025-07-23), Apple M4 Pro
208 | - Command: `cargo +nightly bench -- --nocapture`
209 | 
210 | Basic (single-threaded):
211 | 
212 | ```
213 | contains_false                          ~ 39.36 ns/iter
214 | contains_true                           ~ 24.60 ns/iter
215 | contains_with_max_evictions_0           ~ 20.37 ns/iter
216 | insert_and_remove                       ~ 111.66 ns/iter
217 | insert_and_remove_with_max_evictions_0  ~ 68.10 ns/iter
218 | insert_into_full_filter                 ~ 27.20 µs/iter
219 | insert_unique                           ~ 25.10 ns/iter
220 | ```
221 | 
222 | Concurrent (multi-threaded):
223 | 
224 | ```
225 | concurrent_contains                         ~ 36.93 ns/iter
226 | concurrent_contains_under_write_contention  ~ 138.05 ns/iter
227 | ```
228 | 
229 | Comparison suite:
230 | 
231 | This suite uses the reference [cuckoofilter](https://crates.io/crates/cuckoofilter) crate (dev-dependency `cuckoofilter = "0.5"`) as the baseline for comparison.
232 | 
233 | ```
234 | concurrent_contains                         ~ 3.34 µs/iter
235 | concurrent_contains_under_write_contention  ~ 2.49 µs/iter
236 | contains_false                              ~ 14.31 ns/iter
237 | contains_true                               ~ 26.78 ns/iter
238 | insert_and_remove                           ~ 83.70 ns/iter
239 | insert_into_full_filter                     ~ 16.63 µs/iter
240 | insert_unique                               ~ 27.15 ns/iter
241 | ```
242 | 
243 | ## Safety and Guarantees
244 | 
245 | - **Thread Safety**: All operations are thread-safe and can be called concurrently
246 | - **Memory Safety**: No unsafe code in the public API (uses `parking_lot_core` internally)
247 | 
248 | ## License
249 | 
250 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
251 | 


--------------------------------------------------------------------------------
/benches/basic.rs:
--------------------------------------------------------------------------------
  1 | #![feature(test)]
  2 | 
  3 | extern crate test;
  4 | 
  5 | use atomic_cuckoo_filter::CuckooFilter;
  6 | use test::Bencher;
  7 | 
  8 | /// Benchmarks basic single-threaded insert and remove performance of the atomic
  9 | /// cuckoo filter. This provides baseline performance metrics for the lock-free
 10 | /// implementation without any concurrent access.
 11 | ///
 12 | /// Setup: 131k capacity filter with 8-bit fingerprints
 13 | /// Test: Continuous insert/remove cycle with a sliding window of 100k items
 14 | #[bench]
 15 | fn insert_and_remove(b: &mut Bencher) {
 16 |     let filter = CuckooFilter::builder()
 17 |         .capacity(131072)
 18 |         .fingerprint_size(8)
 19 |         .build()
 20 |         .unwrap();
 21 |     let mut i = 0;
 22 |     b.iter(|| {
 23 |         i += 1;
 24 |         let _ = filter.insert(&i);
 25 |         filter.remove(&(i - 100000)); // Remove item from 100k iterations ago
 26 |     });
 27 | }
 28 | 
 29 | /// Benchmarks single-threaded insert_unique performance. insert_unique provides
 30 | /// atomic test-and-insert semantics, ensuring items are only inserted if they
 31 | /// don't already exist in the filter.
 32 | ///
 33 | /// Setup: 131k capacity filter with 8-bit fingerprints, initially empty
 34 | /// Test: Continuous insert_unique operations with incrementing u16 values
 35 | #[bench]
 36 | fn insert_unique(b: &mut Bencher) {
 37 |     let filter = CuckooFilter::builder()
 38 |         .capacity(131072)
 39 |         .fingerprint_size(8)
 40 |         .build()
 41 |         .unwrap();
 42 |     let mut i: u16 = 0;
 43 |     b.iter(|| {
 44 |         i += 1;
 45 |         let _ = filter.insert_unique(&i);
 46 |     });
 47 | }
 48 | 
 49 | /// Benchmarks insert/remove performance when the filter is configured with
 50 | /// zero evictions allowed. In this case, the filter can do all operations
 51 | /// atomically without any locks.
 52 | ///
 53 | /// Setup: 131k capacity filter with max_evictions=0 and 8-bit fingerprints
 54 | /// Test: Insert/remove cycle with sliding window, no eviction attempts allowed
 55 | #[bench]
 56 | fn insert_and_remove_with_max_evictions_0(b: &mut Bencher) {
 57 |     let filter = CuckooFilter::builder()
 58 |         .capacity(131072)
 59 |         .max_evictions(0) // No evictions allowed - faster failure on collisions
 60 |         .fingerprint_size(8)
 61 |         .build()
 62 |         .unwrap();
 63 |     let mut i = 0;
 64 |     b.iter(|| {
 65 |         i += 1;
 66 |         let _ = filter.insert(&i);
 67 |         filter.remove(&(i - 100000)) // Remove item from 100k iterations ago
 68 |     });
 69 | }
 70 | 
 71 | /// Benchmarks insert performance as the filter becomes increasingly full.
 72 | /// This tests how performance degrades as the load factor increases and
 73 | /// hash collisions become more frequent, requiring more eviction attempts.
 74 | ///
 75 | /// Setup: 131k capacity filter with 8-bit fingerprints, initially empty
 76 | /// Test: Continuous insertions without any removes until filter reaches capacity
 77 | #[bench]
 78 | fn insert_into_full_filter(b: &mut Bencher) {
 79 |     let filter = CuckooFilter::builder()
 80 |         .capacity(131072)
 81 |         .fingerprint_size(8)
 82 |         .build()
 83 |         .unwrap();
 84 |     let mut i = 0;
 85 |     b.iter(|| {
 86 |         i += 1;
 87 |         let _ = filter.insert(&i);
 88 |     });
 89 | }
 90 | 
 91 | /// Benchmarks contains() performance when querying for items that exist in the filter.
 92 | /// This tests positive lookup performance with a fully-populated filter, measuring
 93 | /// the cost of successful hash table lookups.
 94 | ///
 95 | /// Setup: 131k capacity filter pre-populated with all u16 values (0-65535)
 96 | /// Test: Cycling through contains() calls for values that definitely exist
 97 | #[bench]
 98 | fn contains_true(b: &mut Bencher) {
 99 |     let filter = CuckooFilter::builder()
100 |         .capacity(131072)
101 |         .fingerprint_size(8)
102 |         .build()
103 |         .unwrap();
104 |     // Pre-populate with all possible u16 values
105 |     for i in 0..=65535u16 {
106 |         filter.insert(&i).unwrap();
107 |     }
108 |     let mut i: u16 = 0;
109 |     b.iter(|| {
110 |         i += 1;
111 |         filter.contains(&i);
112 |     });
113 | }
114 | 
115 | /// Benchmarks contains() performance when querying for items that don't exist.
116 | /// This tests negative lookup performance with an empty filter, measuring
117 | /// the cost of failed hash table lookups.
118 | ///
119 | /// Setup: 131k capacity filter with 8-bit fingerprints, completely empty
120 | /// Test: Continuous contains() calls for items that definitely don't exist
121 | #[bench]
122 | fn contains_false(b: &mut Bencher) {
123 |     let filter = CuckooFilter::builder()
124 |         .capacity(131072)
125 |         .fingerprint_size(8)
126 |         .build()
127 |         .unwrap();
128 |     let mut i: u16 = 0;
129 |     b.iter(|| {
130 |         i += 1;
131 |         filter.contains(&i);
132 |     });
133 | }
134 | 
135 | /// Benchmarks contains() performance with a filter configured for zero evictions.
136 | /// In this case, the filter can do all operations atomically without any optimistic concurrency control.
137 | ///
138 | /// Setup: 131k capacity filter with max_evictions=0, attempt to insert all u16 values
139 | /// Test: Contains() calls for values that may or may not exist (depending on insertion success)
140 | #[bench]
141 | fn contains_with_max_evictions_0(b: &mut Bencher) {
142 |     let filter = CuckooFilter::builder()
143 |         .capacity(131072)
144 |         .max_evictions(0) // No evictions - some insertions may fail due to collisions
145 |         .build()
146 |         .unwrap();
147 |     // Attempt to insert all u16 values (some may fail due to collisions)
148 |     for i in 0..=65535u16 {
149 |         let _ = filter.insert(&i);
150 |     }
151 |     let mut i: u16 = 0;
152 |     b.iter(|| {
153 |         i += 1;
154 |         filter.contains(&i);
155 |     });
156 | }
157 | 


--------------------------------------------------------------------------------
/benches/compare.rs:
--------------------------------------------------------------------------------
  1 | #![feature(test)]
  2 | 
  3 | extern crate test;
  4 | 
  5 | use cuckoofilter::CuckooFilter;
  6 | use std::hash::DefaultHasher;
  7 | use std::sync::RwLock;
  8 | use std::sync::atomic::AtomicBool;
  9 | use std::sync::{Arc, atomic::Ordering};
 10 | use std::thread;
 11 | use test::Bencher;
 12 | 
 13 | /// Benchmarks basic single-threaded insert and remove performance using the
 14 | /// reference cuckoofilter implementation. This provides a baseline for
 15 | /// comparing against the atomic implementation.
 16 | ///
 17 | /// Setup: 131k capacity filter using DefaultHasher
 18 | /// Test: Continuous insert/remove cycle with a sliding window of 100k items
 19 | #[bench]
 20 | fn insert_and_remove(b: &mut Bencher) {
 21 |     let mut filter = CuckooFilter::<DefaultHasher>::with_capacity(131072);
 22 |     let mut i = 0;
 23 |     b.iter(|| {
 24 |         i += 1;
 25 |         let _ = filter.add(&i);
 26 |         filter.delete(&(i - 100000));
 27 |     });
 28 | }
 29 | 
 30 | /// Benchmarks single-threaded test_and_add performance (insert_unique equivalent).
 31 | /// test_and_add ensures an item is only inserted if it doesn't already exist.
 32 | ///
 33 | /// Setup: 131k capacity filter, initially empty
 34 | /// Test: Continuous test_and_add operations with incrementing u16 values
 35 | #[bench]
 36 | fn insert_unique(b: &mut Bencher) {
 37 |     let mut filter = CuckooFilter::<DefaultHasher>::with_capacity(131072);
 38 |     let mut i: u16 = 0;
 39 |     b.iter(|| {
 40 |         i += 1;
 41 |         let _ = filter.test_and_add(&i);
 42 |     });
 43 | }
 44 | 
 45 | /// Benchmarks insert performance when the filter becomes increasingly full.
 46 | /// This tests how performance degrades as the filter reaches capacity and
 47 | /// hash collisions become more frequent.
 48 | ///
 49 | /// Setup: 131k capacity filter, initially empty  
 50 | /// Test: Continuous insertions until filter is full (no removes)
 51 | #[bench]
 52 | fn insert_into_full_filter(b: &mut Bencher) {
 53 |     let mut filter = CuckooFilter::<DefaultHasher>::with_capacity(131072);
 54 |     let mut i = 0;
 55 |     b.iter(|| {
 56 |         i += 1;
 57 |         let _ = filter.add(&i);
 58 |     });
 59 | }
 60 | 
 61 | /// Benchmarks contains() performance when querying for items that exist in the filter.
 62 | /// This tests positive lookup performance with a fully-populated filter.
 63 | ///
 64 | /// Setup: 131k capacity filter pre-populated with all u16 values (0-65535)
 65 | /// Test: Cycling through contains() calls for values that definitely exist
 66 | #[bench]
 67 | fn contains_true(b: &mut Bencher) {
 68 |     let mut filter = CuckooFilter::<DefaultHasher>::with_capacity(131072);
 69 |     // Pre-populate with all possible u16 values
 70 |     for i in 0..=65535u16 {
 71 |         filter.add(&i).unwrap();
 72 |     }
 73 |     let mut i: u16 = 0;
 74 |     b.iter(|| {
 75 |         i += 1;
 76 |         filter.contains(&i);
 77 |     });
 78 | }
 79 | 
 80 | /// Benchmarks contains() performance when querying for items that don't exist.
 81 | /// This tests negative lookup performance with an empty filter.
 82 | ///
 83 | /// Setup: 131k capacity filter, completely empty
 84 | /// Test: Continuous contains() calls for items that definitely don't exist
 85 | #[bench]
 86 | fn contains_false(b: &mut Bencher) {
 87 |     let filter = CuckooFilter::<DefaultHasher>::with_capacity(131072);
 88 |     let mut i: u16 = 0;
 89 |     b.iter(|| {
 90 |         i += 1;
 91 |         filter.contains(&i);
 92 |     });
 93 | }
 94 | 
 95 | /// Benchmarks concurrent read performance using RwLock-protected filter.
 96 | /// This tests read scalability compared to the lock-free atomic implementation.
 97 | ///
 98 | /// Setup: 131k capacity filter with 100k pre-inserted items, protected by RwLock
 99 | /// Scenario: 10 background threads doing continuous reads while main thread benchmarks reads
100 | /// Note: Uses read locks for all contains() operations
101 | #[bench]
102 | fn concurrent_contains(b: &mut Bencher) {
103 |     let filter = Arc::new(RwLock::new(CuckooFilter::<DefaultHasher>::with_capacity(
104 |         131072,
105 |     )));
106 |     let stop_flag = Arc::new(AtomicBool::new(false));
107 |     let mut handles = vec![];
108 | 
109 |     // Pre-populate with 100k items (even numbers) using write lock
110 |     {
111 |         let f = filter.clone();
112 |         let mut f = f.write().unwrap();
113 |         for i in 0..100000 {
114 |             f.add(&(i * 2)).unwrap();
115 |         }
116 |     }
117 | 
118 |     // Start 10 background threads doing continuous contains() with read locks
119 |     for _ in 0..10 {
120 |         let f = filter.clone();
121 |         let stop = stop_flag.clone();
122 |         handles.push(thread::spawn(move || {
123 |             let mut i = 0;
124 |             while !stop.load(Ordering::Relaxed) {
125 |                 if i == 200000 {
126 |                     i = 0;
127 |                 } else {
128 |                     i += 1;
129 |                 }
130 |                 f.read().unwrap().contains(&i);
131 |             }
132 |         }));
133 |     }
134 | 
135 |     // Benchmark contains() performance using read locks
136 |     let mut i = 0;
137 |     b.iter(|| {
138 |         if i == 200000 {
139 |             i = 0;
140 |         } else {
141 |             i += 1;
142 |         }
143 |         filter.read().unwrap().contains(&i);
144 |     });
145 | 
146 |     // Clean up background threads
147 |     stop_flag.store(true, Ordering::Relaxed);
148 |     for h in handles {
149 |         h.join().unwrap();
150 |     }
151 | }
152 | 
153 | /// Benchmarks read performance while background threads are writing using RwLock.
154 | /// This tests read/write contention compared to the lock-free atomic implementation.
155 | ///
156 | /// Setup: 131k capacity filter protected by RwLock
157 | /// Scenario: 10 background threads doing write operations (insert/remove)
158 | ///          while main thread benchmarks read performance
159 | #[bench]
160 | fn concurrent_contains_under_write_contention(b: &mut Bencher) {
161 |     let filter = Arc::new(RwLock::new(CuckooFilter::<DefaultHasher>::with_capacity(
162 |         131072,
163 |     )));
164 |     let stop_flag = Arc::new(AtomicBool::new(false));
165 |     let mut handles = vec![];
166 | 
167 |     // Start 10 background threads doing write operations (insert/remove)
168 |     for c in 0..10 {
169 |         let f = filter.clone();
170 |         let stop = stop_flag.clone();
171 |         handles.push(thread::spawn(move || {
172 |             let mut i: u16 = c;
173 |             while !stop.load(Ordering::Relaxed) {
174 |                 i += 10;
175 |                 let _ = f.write().unwrap().add(&i);
176 |                 f.write().unwrap().delete(&(i - 10000));
177 |             }
178 |         }));
179 |     }
180 | 
181 |     // Benchmark read performance under write contention
182 |     let mut i: u16 = 0;
183 |     b.iter(|| {
184 |         i += 1;
185 |         filter.read().unwrap().contains(&i);
186 |     });
187 | 
188 |     // Clean up background threads
189 |     stop_flag.store(true, Ordering::Relaxed);
190 |     for h in handles {
191 |         h.join().unwrap();
192 |     }
193 | }
194 | 


--------------------------------------------------------------------------------
/benches/concurrent.rs:
--------------------------------------------------------------------------------
  1 | #![feature(test)]
  2 | 
  3 | extern crate test;
  4 | 
  5 | use atomic_cuckoo_filter::CuckooFilter;
  6 | use std::sync::atomic::AtomicBool;
  7 | use std::sync::{Arc, atomic::Ordering};
  8 | use std::thread;
  9 | use test::Bencher;
 10 | 
 11 | /// Benchmarks concurrent read performance (contains() calls) while multiple background
 12 | /// threads are also performing reads. This tests the filter's ability to handle
 13 | /// high-concurrency read workloads without contention.
 14 | ///
 15 | /// Setup: 131k capacity filter with 100k pre-inserted items
 16 | /// Scenario: 10 background threads continuously calling contains() while main thread benchmarks contains()
 17 | #[bench]
 18 | fn concurrent_contains(b: &mut Bencher) {
 19 |     let filter = Arc::new(
 20 |         CuckooFilter::builder()
 21 |             .capacity(131072)
 22 |             .fingerprint_size(8)
 23 |             .build()
 24 |             .unwrap(),
 25 |     );
 26 |     let stop_flag = Arc::new(AtomicBool::new(false));
 27 |     let mut handles = vec![];
 28 | 
 29 |     // Pre-populate with 100k items (even numbers)
 30 |     for i in 0..100000 {
 31 |         filter.insert(&(i * 2)).unwrap();
 32 |     }
 33 | 
 34 |     // Start 10 background threads doing continuous contains() calls
 35 |     for _ in 0..10 {
 36 |         let f = filter.clone();
 37 |         let stop = stop_flag.clone();
 38 |         handles.push(thread::spawn(move || {
 39 |             let mut i = 0;
 40 |             while !stop.load(Ordering::Relaxed) {
 41 |                 if i == 200000 {
 42 |                     i = 0;
 43 |                 } else {
 44 |                     i += 1;
 45 |                 }
 46 |                 f.contains(&i);
 47 |             }
 48 |         }))
 49 |     }
 50 | 
 51 |     // Benchmark contains() calls in main thread
 52 |     let mut i = 0;
 53 |     b.iter(|| {
 54 |         if i == 200000 {
 55 |             i = 0;
 56 |         } else {
 57 |             i += 1;
 58 |         }
 59 |         filter.contains(&i);
 60 |     });
 61 | 
 62 |     // Clean up background threads
 63 |     stop_flag.store(true, Ordering::Relaxed);
 64 |     for h in handles {
 65 |         h.join().unwrap();
 66 |     }
 67 | }
 68 | 
 69 | /// Benchmarks contains() performance while background threads are actively
 70 | /// inserting and removing items. This tests read performance under write contention.
 71 | ///
 72 | /// Setup: 131k capacity filter, initially empty
 73 | /// Scenario: 10 background threads inserting new items and removing old ones,
 74 | ///          while main thread benchmarks contains() performance
 75 | #[bench]
 76 | fn concurrent_contains_under_write_contention(b: &mut Bencher) {
 77 |     let filter = Arc::new(
 78 |         CuckooFilter::builder()
 79 |             .capacity(131072)
 80 |             .fingerprint_size(8)
 81 |             .build()
 82 |             .unwrap(),
 83 |     );
 84 |     let stop_flag = Arc::new(AtomicBool::new(false));
 85 |     let mut handles = vec![];
 86 | 
 87 |     // Start 10 background threads doing insert/remove operations
 88 |     for c in 0..10 {
 89 |         let f = filter.clone();
 90 |         let stop = stop_flag.clone();
 91 |         handles.push(thread::spawn(move || {
 92 |             let mut i: u16 = c;
 93 |             while !stop.load(Ordering::Relaxed) {
 94 |                 i += 10;
 95 |                 let _ = f.insert(&i);
 96 |                 f.remove(&(i - 10000));
 97 |             }
 98 |         }))
 99 |     }
100 | 
101 |     // Benchmark contains() calls while background threads are modifying the filter
102 |     let mut i: u16 = 0;
103 |     b.iter(|| {
104 |         i += 1;
105 |         filter.contains(&i);
106 |     });
107 | 
108 |     // Clean up background threads
109 |     stop_flag.store(true, Ordering::Relaxed);
110 |     for h in handles {
111 |         h.join().unwrap();
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | // Lock-Free Concurrent Cuckoo Filter Implementation
  2 | // A high-performance probabilistic data structure for efficient set membership testing
  3 | // with better space efficiency than Bloom filters, support for deletions, and
  4 | // fully concurrent operations using atomic operations and lock-free algorithms.
  5 | 
  6 | use derive_builder::Builder;
  7 | use rand::Rng;
  8 | use std::collections::HashSet;
  9 | use std::collections::hash_map::DefaultHasher;
 10 | use std::hash::{Hash, Hasher};
 11 | use std::hint;
 12 | use std::marker::PhantomData;
 13 | use std::sync::atomic::{AtomicUsize, Ordering};
 14 | 
 15 | /// Maximum number of spin-loop iterations before parking a thread.
 16 | /// This balances CPU usage vs. latency - spinning avoids kernel calls for
 17 | /// short waits, but we park threads to avoid wasting CPU on long waits.
 18 | const MAX_SPIN: usize = 100;
 19 | 
 20 | /// Error type for Cuckoo Filter insert operation
 21 | #[derive(Debug, thiserror::Error, PartialEq)]
 22 | pub enum Error {
 23 |     /// Returned when the filter is full and cannot accommodate more elements
 24 |     #[error("Not enough space to store this item.")]
 25 |     NotEnoughSpace,
 26 | }
 27 | 
 28 | /// Types of locks that can be acquired on the filter
 29 | #[derive(PartialEq)]
 30 | pub enum LockKind {
 31 |     /// Optimistic version tracking - does not block other operations but captures
 32 |     /// a version number to detect if data changed during the operation
 33 |     Optimistic,
 34 |     /// Exclusive among writers only - prevents other writers but allows concurrent readers
 35 |     WriterExclusive,
 36 |     /// Fully exclusive access - blocks all other operations (used only during evictions)
 37 |     FullyExclusive,
 38 | }
 39 | 
 40 | /// A sophisticated lock implementation designed for concurrent cuckoo filter operations.
 41 | ///
 42 | /// This is NOT a traditional mutex but an atomic-based synchronization mechanism that
 43 | /// enables three distinct concurrency modes:
 44 | ///
 45 | /// 1. **Optimistic locks**: Allow maximum concurrency - multiple readers and writers
 46 | ///    can proceed simultaneously. Used for optimistic reads that detect data races.
 47 | ///
 48 | /// 2. **WriterExclusive locks**: Mutual exclusion among writers only - prevents
 49 | ///    concurrent modifications but allows concurrent reads.
 50 | ///
 51 | /// 3. **FullyExclusive locks**: Complete mutual exclusion - blocks all operations.
 52 | ///    Only used during complex eviction chains to ensure consistency.
 53 | ///
 54 | /// ## Version Encoding Scheme
 55 | /// The atomic usize encodes both lock state and version information:
 56 | /// - **Bits 0-1**: Lock kind (0=Optimistic, 1=WriterExclusive, 2=FullyExclusive)
 57 | /// - **Bits 2-63**: Version counter (incremented on FullyExclusive release)
 58 | ///
 59 | /// This allows optimistic readers to detect when their read might be stale by
 60 | /// comparing version numbers before and after the operation.
 61 | pub struct Lock<'a> {
 62 |     /// Reference to the shared atomic value encoding lock state and version
 63 |     atomic: &'a AtomicUsize,
 64 |     /// Snapshot of the atomic value when this lock was acquired.
 65 |     /// Used for optimistic concurrency control and version tracking.
 66 |     /// The lower 2 bits indicate lock type, upper bits track version changes.
 67 |     version: usize,
 68 |     /// The type of lock held by this instance
 69 |     kind: LockKind,
 70 |     /// Counter for spin attempts before transitioning to thread parking.
 71 |     /// Implements adaptive spinning to balance latency vs CPU usage.
 72 |     retry: usize,
 73 | }
 74 | 
 75 | impl<'a> Lock<'a> {
 76 |     /// Create a new lock of the specified kind
 77 |     /// Blocks until the lock can be acquired
 78 |     fn new(atomic: &'a AtomicUsize, kind: LockKind) -> Self {
 79 |         let mut lock = Self {
 80 |             atomic,
 81 |             version: 0,
 82 |             kind,
 83 |             retry: 0,
 84 |         };
 85 |         match lock.kind {
 86 |             LockKind::Optimistic => loop {
 87 |                 // For optimistic locks, we can proceed as long as there's no FullyExclusive lock
 88 |                 lock.version = atomic.load(Ordering::Relaxed);
 89 |                 if Self::kind(lock.version) != LockKind::FullyExclusive {
 90 |                     return lock;
 91 |                 }
 92 |                 lock.spin_or_park()
 93 |             },
 94 |             _ => loop {
 95 |                 // For writer exclusive and fully exclusive locks, we need to ensure no exclusive lock is acquired
 96 |                 lock.version = atomic.load(Ordering::Relaxed);
 97 |                 if Self::kind(lock.version) != LockKind::Optimistic {
 98 |                     lock.spin_or_park();
 99 |                     continue;
100 |                 }
101 |                 // Update lower bits of the version: 1 for WriterExclusive, 2 for FullyExclusive
102 |                 let new_version = if lock.kind == LockKind::WriterExclusive {
103 |                     lock.version + 1
104 |                 } else {
105 |                     lock.version + 2
106 |                 };
107 |                 if atomic
108 |                     .compare_exchange_weak(
109 |                         lock.version,
110 |                         new_version,
111 |                         Ordering::Release,
112 |                         Ordering::Relaxed,
113 |                     )
114 |                     .is_ok()
115 |                 {
116 |                     return lock;
117 |                 }
118 |             },
119 |         }
120 |     }
121 | 
122 |     /// Upgrade a WriterExclusive lock to a FullyExclusive lock
123 |     /// This assumes the current thread holds the writer exclusive lock.
124 |     fn upgrade(&mut self) {
125 |         self.atomic.store(self.version + 2, Ordering::Release);
126 |         self.kind = LockKind::FullyExclusive;
127 |     }
128 | 
129 |     /// Check if the lock is outdated (version changed) or a FullyExclusive lock is acquired
130 |     /// Used for optimistic concurrency control
131 |     fn is_outdated(&self) -> bool {
132 |         let version = self.atomic.load(Ordering::Acquire);
133 |         Self::kind(version) == LockKind::FullyExclusive || version >> 2 != self.version >> 2
134 |     }
135 | 
136 |     /// Get the key for parking a thread
137 |     /// Different keys are used for optimistic and exclusive locks
138 |     fn park_key(&self) -> usize {
139 |         let key = self.atomic.as_ptr() as usize;
140 |         match self.kind {
141 |             LockKind::Optimistic => key,
142 |             _ => key + 1,
143 |         }
144 |     }
145 | 
146 |     /// Spin or park the thread when waiting for a lock
147 |     fn spin_or_park(&mut self) {
148 |         if self.retry > MAX_SPIN {
149 |             // After MAX_SPIN attempts, park the thread
150 |             self.retry = 0;
151 |             unsafe {
152 |                 parking_lot_core::park(
153 |                     self.park_key(),
154 |                     || self.atomic.load(Ordering::Acquire) == self.version,
155 |                     || (),
156 |                     |_, _| (),
157 |                     parking_lot_core::DEFAULT_PARK_TOKEN,
158 |                     None,
159 |                 );
160 |             }
161 |         } else {
162 |             // Otherwise, spin
163 |             self.retry += 1;
164 |             hint::spin_loop();
165 |         }
166 |     }
167 | 
168 |     /// Extract the lock kind from the lower 2 bits of a version value
169 |     fn kind(version: usize) -> LockKind {
170 |         match version & 0b11 {
171 |             0 => LockKind::Optimistic,
172 |             1 => LockKind::WriterExclusive,
173 |             2 => LockKind::FullyExclusive,
174 |             _ => panic!("Invalid Lock"),
175 |         }
176 |     }
177 | }
178 | 
179 | impl Drop for Lock<'_> {
180 |     /// Release the lock when it goes out of scope
181 |     fn drop(&mut self) {
182 |         match self.kind {
183 |             LockKind::Optimistic => return, // No need to do anything for Optimistic locks
184 |             LockKind::WriterExclusive => {
185 |                 // For WriterExclusive locks, release the lock without incrementing the version
186 |                 self.atomic.store(self.version, Ordering::Release);
187 |             }
188 |             LockKind::FullyExclusive => {
189 |                 // For FullyExclusive locks, increment the version to invalidate Optimistic locks
190 |                 self.atomic.store(self.version + 4, Ordering::Release);
191 |             }
192 |         }
193 | 
194 |         // Unpark waiting threads
195 |         let optimistic_key = self.atomic.as_ptr() as usize;
196 |         let exclusive_key = optimistic_key + 1;
197 |         unsafe {
198 |             // Unpark all waiting optimistic locks
199 |             parking_lot_core::unpark_all(optimistic_key, parking_lot_core::DEFAULT_UNPARK_TOKEN);
200 |             // Unpark one waiting exclusive lock (either WriterExclusive or FullyExclusive)
201 |             parking_lot_core::unpark_one(exclusive_key, |_| parking_lot_core::DEFAULT_UNPARK_TOKEN);
202 |         }
203 |     }
204 | }
205 | 
206 | /// A highly concurrent lock-free probabilistic data structure for set membership testing.
207 | ///
208 | /// ## What Makes It "Cuckoo"
209 | ///
210 | /// Named after the cuckoo bird's behavior of displacing other birds' eggs, this filter
211 | /// uses **cuckoo hashing** where each item can be stored in one of two possible locations.
212 | /// When both locations are full, existing items are "evicted" (like cuckoo eggs) and
213 | /// relocated to their alternate position, creating eviction chains.
214 | ///
215 | /// ## Algorithm Overview
216 | ///
217 | /// 1. **Fingerprints**: Items are reduced to small fingerprints (4-32 bits) instead of
218 | ///    storing full keys, providing excellent space efficiency.
219 | ///
220 | /// 2. **Dual Hashing**: Each item has two possible bucket locations computed from its hash.
221 | ///    This provides better space efficiency and flexibility when inserting and removing items.
222 | ///
223 | /// 3. **Eviction Chains**: When both buckets are full, a random item is evicted from one
224 | ///    bucket and moved to its alternate location, potentially triggering a chain of evictions.
225 | ///
226 | /// 4. **Lock-Free Concurrency**: All operations use atomic compare-exchange loops instead
227 | ///    of traditional locks, with optimistic concurrency control for read operations.
228 | ///    The only exception is when inserting with evictions, where a FullyExclusive lock is used
229 | ///    to ensure consistency.
230 | ///
231 | /// ## Key Advantages Over Bloom Filters
232 | ///
233 | /// - **Deletions supported**: Items can be removed without false negatives
234 | /// - **Better space efficiency**: ~20-30% less memory for same false positive rate
235 | /// - **Bounded lookup time**: Always at most 2 bucket checks, never more
236 | /// - **High concurrency**: Lock-free design enables excellent parallel performance
237 | ///
238 | /// ## Concurrency Model
239 | ///
240 | /// - **Reads**: Optimistic, can proceed concurrently with most operations
241 | /// - **Simple writes**: Use atomic compare-exchange loops without blocking other operations
242 | /// - **WriterExclusive locks**: Used for removing items, and for unique insertions
243 | /// - **Complex evictions**: Use FullyExclusive locks to ensure consistency
244 | ///
245 | /// ## Time Complexity
246 | ///
247 | /// - **Lookup**: O(1)
248 | /// - **Deletion**: O(1)
249 | /// - **Insertion**: Amortized O(1) due to eviction chains, but the number of evictions is bounded
250 | #[derive(Debug, Builder)]
251 | #[builder(
252 |     pattern = "owned",
253 |     build_fn(private, name = "base_build", validate = "Self::validate")
254 | )]
255 | pub struct CuckooFilter<H = DefaultHasher>
256 | where
257 |     H: Hasher + Default,
258 | {
259 |     // Configuration parameters
260 |     /// Maximum number of elements the filter can store
261 |     #[builder(default = "1048576")]
262 |     capacity: usize,
263 | 
264 |     /// Size of fingerprints in bits (must be 4, 8, 16, or 32)
265 |     #[builder(default = "16")]
266 |     fingerprint_size: usize,
267 | 
268 |     /// Number of fingerprints per bucket
269 |     #[builder(default = "4")]
270 |     bucket_size: usize,
271 | 
272 |     /// Maximum number of evictions to try before giving up
273 |     #[builder(default = "500")]
274 |     max_evictions: usize,
275 | 
276 |     // Internal values - automatically derived from the configuration
277 |     /// Number of fingerprints that can be stored in a single atomic value
278 |     #[builder(setter(skip))]
279 |     fingerprints_per_atomic: usize,
280 | 
281 |     /// Number of buckets in the filter (power of 2)
282 |     #[builder(setter(skip))]
283 |     num_buckets: usize,
284 | 
285 |     /// Bit mask for extracting fingerprints
286 |     #[builder(setter(skip))]
287 |     fingerprint_mask: usize,
288 | 
289 |     /// Storage for buckets, implemented as a vector of atomic values
290 |     #[builder(setter(skip))]
291 |     buckets: Vec<AtomicUsize>,
292 | 
293 |     /// Atomic value used for locking
294 |     #[builder(setter(skip))]
295 |     lock: AtomicUsize,
296 | 
297 |     /// Counter for the number of elements in the filter
298 |     #[builder(setter(skip))]
299 |     counter: AtomicUsize,
300 | 
301 |     /// Phantom data for the hasher type
302 |     #[builder(setter(skip))]
303 |     _hasher: PhantomData<H>,
304 | }
305 | 
306 | impl<H: Hasher + Default> CuckooFilter<H> {
307 |     /// Insert an item into the filter
308 |     ///
309 |     /// This operation first attempts a direct insertion without acquiring a lock.
310 |     /// If that fails due to bucket collisions, it falls back to the eviction-based
311 |     /// insertion algorithm which may require a write lock.
312 |     ///
313 |     /// Concurrent operations are safely handled through atomic operations.
314 |     ///
315 |     /// Returns Ok(()) if the item was inserted, or Error::NotEnoughSpace if the filter is full
316 |     pub fn insert<T: ?Sized + Hash>(&self, item: &T) -> Result<(), Error> {
317 |         let (index, fingerprint) = self.index_and_fingerprint(item);
318 |         self.try_insert(index, fingerprint).or_else(|error| {
319 |             let lock = self.lock(LockKind::WriterExclusive).ok_or(error)?;
320 |             self.insert_with_evictions(index, fingerprint, lock)
321 |         })
322 |     }
323 | 
324 |     /// Check if an item is in the filter and insert it if is not present (atomically)
325 |     ///
326 |     /// This method combines lookup and insert into a single atomic operation,
327 |     /// ensuring thread safety and consistency even with concurrent operations.
328 |     ///
329 |     /// Returns Ok(true) if the item was inserted, Ok(false) if it was already present,
330 |     /// or Error::NotEnoughSpace if the filter is full
331 |     pub fn insert_unique<T: ?Sized + Hash>(&self, item: &T) -> Result<bool, Error> {
332 |         let (index, fingerprint) = self.index_and_fingerprint(item);
333 |         if self.lookup_fingerprint(index, fingerprint).is_some() {
334 |             return Ok(false);
335 |         }
336 |         let lock = Lock::new(&self.lock, LockKind::WriterExclusive);
337 |         if self.lookup_fingerprint(index, fingerprint).is_some() {
338 |             return Ok(false);
339 |         }
340 |         self.try_insert(index, fingerprint)
341 |             .or_else(|error| {
342 |                 if self.max_evictions == 0 {
343 |                     return Err(error);
344 |                 }
345 |                 self.insert_with_evictions(index, fingerprint, lock)
346 |             })
347 |             .map(|_| true)
348 |     }
349 | 
350 |     /// Counts the number of occurrences of an item in the filter.
351 |     ///
352 |     /// # Notes
353 |     /// - This is not a counting filter; it simply counts matching fingerprints in both candidate buckets.
354 |     /// - Useful for detecting duplicates or hash collisions, not for precise multiset membership.
355 |     /// - The count is limited by the filter's structure: at most `bucket_size * 2` per item.
356 |     /// - This method may count false positives due to hash collisions.
357 |     pub fn count<T: ?Sized + Hash>(&self, item: &T) -> usize {
358 |         let (index, fingerprint) = self.index_and_fingerprint(item);
359 |         let alt_index = self.alt_index(index, fingerprint);
360 |         self.atomic_read(
361 |             || {
362 |                 self.read_bucket(index, Ordering::Acquire)
363 |                     .filter(|&f| f == fingerprint)
364 |                     .count()
365 |                     + self
366 |                         .read_bucket(alt_index, Ordering::Acquire)
367 |                         .filter(|&f| f == fingerprint)
368 |                         .count()
369 |             },
370 |             None,
371 |         )
372 |     }
373 | 
374 |     /// Attempts to remove an item from the filter.
375 |     ///
376 |     /// Returns `true` if the item was successfully removed, or `false` if it was not found.
377 |     ///
378 |     /// Note:
379 |     /// - An item should only be removed if it was previously added. Removing a non-existent
380 |     ///   item may inadvertently remove a different item due to hash collisions inherent to
381 |     ///   cuckoo filters.
382 |     pub fn remove<T: ?Sized + Hash>(&self, item: &T) -> bool {
383 |         let (index, fingerprint) = self.index_and_fingerprint(item);
384 |         while let Some((index, sub_index)) = self.lookup_fingerprint(index, fingerprint) {
385 |             let _lock = self.lock(LockKind::WriterExclusive);
386 |             if self.update_bucket(index, sub_index, fingerprint, 0, Ordering::Release) {
387 |                 return true;
388 |             }
389 |         }
390 |         false
391 |     }
392 | 
393 |     /// Check if an item is in the filter
394 |     ///
395 |     /// Returns `true` if the item is possibly in the filter (may have false positives),
396 |     /// `false` if it is definitely not in the filter
397 |     pub fn contains<T: ?Sized + Hash>(&self, item: &T) -> bool {
398 |         let (index, fingerprint) = self.index_and_fingerprint(item);
399 |         self.atomic_read(
400 |             || self.lookup_fingerprint(index, fingerprint).is_some(),
401 |             Some(true),
402 |         )
403 |     }
404 | 
405 |     /// Get the number of elements in the filter
406 |     pub fn len(&self) -> usize {
407 |         self.counter.load(Ordering::Acquire)
408 |     }
409 | 
410 |     /// Check if the filter is empty
411 |     pub fn is_empty(&self) -> bool {
412 |         self.len() == 0
413 |     }
414 | 
415 |     /// Get the capacity of the filter
416 |     pub fn capacity(&self) -> usize {
417 |         self.capacity
418 |     }
419 | 
420 |     /// Clear the filter, removing all elements
421 |     pub fn clear(&self) {
422 |         let _lock = self.lock(LockKind::WriterExclusive);
423 |         for atomic in &self.buckets {
424 |             let old_value = atomic.swap(0, Ordering::Release);
425 |             let removed = (0..self.fingerprints_per_atomic)
426 |                 .filter(|i| (old_value >> (i * self.fingerprint_size)) & self.fingerprint_mask != 0)
427 |                 .count();
428 |             if removed > 0 {
429 |                 self.counter.fetch_sub(removed, Ordering::Release);
430 |             }
431 |         }
432 |     }
433 | 
434 |     /// Compute the hash of an item
435 |     /// Uses the generic hasher H for flexibility and performance
436 |     fn hash<T: ?Sized + Hash>(&self, data: &T) -> u64 {
437 |         let mut hasher = <H as Default>::default();
438 |         data.hash(&mut hasher);
439 |         hasher.finish()
440 |     }
441 | 
442 |     /// Compute the bucket index and fingerprint for an item.
443 |     ///
444 |     /// 1. **Hash the item**: Use the configured hasher to get a 64-bit hash
445 |     /// 2. **Extract fingerprint**: Use multiplication + shift for high-quality
446 |     ///    distribution across the fingerprint space, then add 1 to avoid zero.
447 |     /// 3. **Extract index**: Use bitwise AND with (num_buckets-1) since num_buckets
448 |     ///    is always a power of 2, providing perfect hash distribution.
449 |     ///
450 |     /// ## Why This Design
451 |     ///
452 |     /// - **Non-zero fingerprints**: Adding 1 ensures fingerprints are never 0,
453 |     ///   so 0 can represent empty slots without ambiguity
454 |     /// - **Independent bits**: Index uses lower hash bits, fingerprint uses
455 |     ///   different bits via multiplication, avoiding correlation
456 |     /// - **Uniform distribution**: Both index and fingerprint are uniformly
457 |     ///   distributed across their respective ranges
458 |     ///
459 |     /// Returns (index, fingerprint) where:
460 |     /// - index is the primary bucket index (0 to num_buckets-1)  
461 |     /// - fingerprint is a compact hash of the item (1 to fingerprint_mask)
462 |     fn index_and_fingerprint<T: ?Sized + Hash>(&self, item: &T) -> (usize, usize) {
463 |         let hash = self.hash(item);
464 |         // Compute fingerprint using multiplication and shift for better distribution
465 |         let fingerprint = ((hash as u128 * self.fingerprint_mask as u128) >> 64) + 1;
466 |         // Compute index using modulo num_buckets (optimized with bitwise AND since num_buckets is a power of 2)
467 |         let index = hash as usize & (self.num_buckets - 1);
468 |         (index, fingerprint as usize)
469 |     }
470 | 
471 |     /// Computes the alternative bucket index for a given fingerprint using cuckoo hashing.
472 |     ///
473 |     /// In cuckoo hashing, each item can reside in one of two possible buckets. This function
474 |     /// deterministically computes the alternate bucket index from the current index and fingerprint.
475 |     ///
476 |     /// Properties:
477 |     /// 1. Symmetry: `alt_index(alt_index(i, f), f) == i` for any index `i` and fingerprint `f`.
478 |     /// 2. Distinctness: For any fingerprint, the two indices are always different.
479 |     /// 3. Uniformity: The mapping distributes fingerprints evenly across all buckets.
480 |     fn alt_index(&self, index: usize, fingerprint: usize) -> usize {
481 |         index ^ (self.hash(&fingerprint) as usize & (self.num_buckets - 1))
482 |     }
483 | 
484 |     /// Look up a fingerprint at its primary or alternative index
485 |     /// Returns `Some((index, sub_index))` if found, None otherwise
486 |     fn lookup_fingerprint(&self, index: usize, fingerprint: usize) -> Option<(usize, usize)> {
487 |         // First check the primary bucket
488 |         self.read_bucket(index, Ordering::Acquire)
489 |             .position(|fp| fp == fingerprint)
490 |             .map(|sub_index| (index, sub_index))
491 |             .or_else(|| {
492 |                 // Then check the alternative bucket
493 |                 let alt_index = self.alt_index(index, fingerprint);
494 |                 self.read_bucket(alt_index, Ordering::Acquire)
495 |                     .position(|fp| fp == fingerprint)
496 |                     .map(|sub_index| (alt_index, sub_index))
497 |             })
498 |     }
499 | 
500 |     /// Try to insert a fingerprint at its primary or alternative index
501 |     /// Returns `Ok(())` if successful, `Error::NotEnoughSpace` if both buckets are full
502 |     fn try_insert(&self, index: usize, fingerprint: usize) -> Result<(), Error> {
503 |         self.insert_at_index(index, fingerprint).or_else(|_| {
504 |             let alt_index = self.alt_index(index, fingerprint);
505 |             self.insert_at_index(alt_index, fingerprint)
506 |         })
507 |     }
508 | 
509 |     /// Try to insert a fingerprint at a specific index
510 |     /// Returns Ok(()) if successful, Err(Error::NotEnoughSpace) if the bucket is full
511 |     fn insert_at_index(&self, index: usize, fingerprint: usize) -> Result<(), Error> {
512 |         loop {
513 |             let sub_index = self
514 |                 .read_bucket(index, Ordering::Relaxed)
515 |                 .position(|i| i == 0)
516 |                 .ok_or(Error::NotEnoughSpace)?;
517 | 
518 |             if self.update_bucket(index, sub_index, 0, fingerprint, Ordering::Release) {
519 |                 return Ok(());
520 |             }
521 |         }
522 |     }
523 | 
524 |     /// Insert a fingerprint using cuckoo eviction chains when both buckets are full.
525 |     ///
526 |     /// This method is invoked only as a fallback when direct insertion fails, preserving
527 |     /// the optimistic, lock-free fast path for the common case.
528 |     ///
529 |     /// # Cuckoo Eviction Algorithm
530 |     ///
531 |     /// When both possible locations for an item are full:
532 |     /// 1. **Randomly select** an existing item from one of the full buckets
533 |     /// 2. **Evict** that item and insert our new item in its place
534 |     /// 3. **Relocate** the evicted item to its alternate location
535 |     /// 4. **Repeat** if the alternate location is also full (eviction chain)
536 |     /// 5. **Succeed** when we find an empty slot, or **fail** after max_evictions
537 |     ///
538 |     /// # Implementation Details
539 |     ///
540 |     /// - **Eviction tracking**: Collects a sequence of planned evictions, which are
541 |     ///   atomically applied only if the chain succeeds, ensuring atomicity and consistency.
542 |     /// - **Lock upgrading**: Starts with a `WriterExclusive` lock, upgrading to
543 |     ///   `FullyExclusive` only when actually applying the eviction chain, maximizing
544 |     ///   read concurrency during planning.
545 |     /// - **Loop prevention**: Uses a map to track which sub-indices have been tried
546 |     ///   in each bucket, to ensure early detection of loops in eviction chains.
547 |     fn insert_with_evictions(
548 |         &self,
549 |         mut index: usize,
550 |         mut fingerprint: usize,
551 |         mut lock: Lock,
552 |     ) -> Result<(), Error> {
553 |         let mut rng = rand::rng();
554 |         let mut insertions = Vec::with_capacity(self.max_evictions.min(32));
555 |         let mut used_slots = HashSet::with_capacity(self.max_evictions.min(32));
556 |         while insertions.len() <= self.max_evictions {
557 |             // Choose a sub-index in this bucket whose global slot has not been used yet in the plan
558 |             let base_slot = index * self.bucket_size;
559 |             let mut sub_index = rng.random_range(0..self.bucket_size);
560 |             if used_slots.contains(&(base_slot + sub_index)) {
561 |                 sub_index = (0..self.bucket_size)
562 |                     .find(|&i| !used_slots.contains(&(base_slot + i)))
563 |                     .ok_or(Error::NotEnoughSpace)?;
564 |             }
565 |             used_slots.insert(base_slot + sub_index);
566 |             insertions.push((index, sub_index, fingerprint));
567 | 
568 |             // Evict the fingerprint at the chosen sub-index
569 |             fingerprint = self
570 |                 .read_bucket(index, Ordering::Relaxed)
571 |                 .nth(sub_index)
572 |                 .unwrap();
573 |             // Find the alternative index for the evicted fingerprint
574 |             index = self.alt_index(index, fingerprint);
575 | 
576 |             if self.insert_at_index(index, fingerprint).is_ok() {
577 |                 // Successfully inserted the fingerprint, now apply all evictions
578 |                 lock.upgrade();
579 |                 let mut evicted = fingerprint;
580 |                 while let Some((index, sub_index, fingerprint)) = insertions.pop() {
581 |                     self.update_bucket(index, sub_index, evicted, fingerprint, Ordering::Relaxed);
582 |                     evicted = fingerprint;
583 |                 }
584 |                 return Ok(());
585 |             }
586 |         }
587 |         // Reached the maximum number of evictions, give up
588 |         Err(Error::NotEnoughSpace)
589 |     }
590 | 
591 |     /// Atomically read all fingerprints from a bucket using lock-free bit manipulation.
592 |     ///
593 |     /// ## Memory Layout Complexity
594 |     ///
595 |     /// Fingerprints are tightly packed in memory across multiple atomic usize values:
596 |     /// - Each bucket contains `bucket_size` fingerprints
597 |     /// - Each fingerprint is `fingerprint_size` bits
598 |     /// - Multiple fingerprints are packed into each atomic usize
599 |     /// - Buckets may span across multiple atomic values
600 |     ///
601 |     /// ## Algorithm Steps
602 |     ///
603 |     /// 1. Calculate which atomic values contain this bucket's data
604 |     /// 2. Atomically load each relevant atomic value (using Acquire ordering)
605 |     /// 3. Extract fingerprints using bit manipulation and masking
606 |     /// 4. Handle boundary cases where buckets span multiple atomics
607 |     /// 5. Skip any padding bits and return exactly `bucket_size` fingerprints
608 |     ///
609 |     /// This is completely lock-free - multiple threads can read concurrently,
610 |     /// and reads can proceed even during writes (though they might see
611 |     /// intermediate states that get resolved by retry logic).
612 |     ///
613 |     /// Returns an Iterator over the fingerprints in the bucket, (0 = empty slot).
614 |     fn read_bucket(&self, index: usize, ordering: Ordering) -> impl Iterator<Item = usize> {
615 |         let fingerprint_index = index * self.bucket_size;
616 |         let bit_index = fingerprint_index * self.fingerprint_size;
617 |         let start_index = bit_index / usize::BITS as usize;
618 |         let skip_bits = bit_index % usize::BITS as usize;
619 |         let skip_fingerprints = skip_bits >> self.fingerprint_size.trailing_zeros();
620 |         // No need to calculate end_index; just iterate from start_index to the end of the bucket
621 |         self.buckets[start_index..]
622 |             .iter()
623 |             .flat_map(move |atomic| {
624 |                 let atomic_value = atomic.load(ordering);
625 |                 (0..self.fingerprints_per_atomic).map(move |i| {
626 |                     (atomic_value
627 |                         >> (self.fingerprint_size * (self.fingerprints_per_atomic - i - 1)))
628 |                         & self.fingerprint_mask
629 |                 })
630 |             })
631 |             .skip(skip_fingerprints)
632 |             .take(self.bucket_size)
633 |     }
634 | 
635 |     /// Atomically update a single fingerprint using lock-free compare-exchange.
636 |     ///
637 |     /// ## Lock-Free Update Algorithm
638 |     ///
639 |     /// 1. **Locate the target**: Calculate which atomic usize contains the fingerprint
640 |     ///    and the exact bit position within that atomic value
641 |     /// 2. **Read current state**: Load the current atomic value
642 |     /// 3. **Verify expectation**: Check that the target position contains `old_value`
643 |     /// 4. **Atomic update**: Use compare_exchange_weak to atomically replace `old_value`
644 |     ///    with `new_value`, but only if the atomic hasn't changed since step 2
645 |     /// 5. **Retry on conflict**: If another thread modified the atomic concurrently,
646 |     ///    restart from step 2
647 |     ///
648 |     /// ## Concurrency Safety
649 |     ///
650 |     /// - Uses `compare_exchange_weak` which can fail spuriously on some architectures
651 |     ///   but is more efficient than the strong version
652 |     /// - Employs Release ordering on success to ensure other threads see the change
653 |     /// - Updates the global counter atomically to maintain consistency
654 |     /// - Returns false if the expected `old_value` is no longer present (indicating
655 |     ///   another thread already modified this slot)
656 |     ///
657 |     /// Returns `true` if update succeeded, `false` if the slot no longer contains
658 |     /// the expected `old_value` due to concurrent modification.
659 |     fn update_bucket(
660 |         &self,
661 |         index: usize,
662 |         sub_index: usize,
663 |         old_value: usize,
664 |         new_value: usize,
665 |         ordering: Ordering,
666 |     ) -> bool {
667 |         let bit_index = (index * self.bucket_size + sub_index) * self.fingerprint_size;
668 |         let atomic_index = bit_index / usize::BITS as usize;
669 |         let skip_bits = bit_index % usize::BITS as usize;
670 |         let shift = usize::BITS as usize - self.fingerprint_size - skip_bits;
671 |         let fingerprint_mask = self.fingerprint_mask << shift;
672 |         let atomic = &self.buckets[atomic_index];
673 | 
674 |         loop {
675 |             let atomic_value = atomic.load(Ordering::Relaxed);
676 |             if (atomic_value & fingerprint_mask) >> shift != old_value {
677 |                 // The expected fingerprint is not present in the atomic value
678 |                 return false;
679 |             }
680 |             let new_atomic_value = (atomic_value & !fingerprint_mask) | (new_value << shift);
681 |             if atomic
682 |                 .compare_exchange_weak(atomic_value, new_atomic_value, ordering, Ordering::Relaxed)
683 |                 .is_ok()
684 |             {
685 |                 // Update the counter based on the change
686 |                 match (old_value, new_value) {
687 |                     (0, _) => self.counter.fetch_add(1, Ordering::Release),
688 |                     (_, 0) => self.counter.fetch_sub(1, Ordering::Release),
689 |                     (_, _) => 0,
690 |                 };
691 |                 return true;
692 |             }
693 |         }
694 |     }
695 | 
696 |     /// Acquires a lock on the filter, if necessary.
697 |     ///
698 |     /// A lock is only required when evictions are enabled (i.e., `max_evictions > 0`).
699 |     /// If `max_evictions` is set to 0, no lock is acquired.
700 |     ///
701 |     /// Returns `Some(Lock)` if a lock is needed, or `None` if no locking is required.
702 |     pub fn lock(&self, kind: LockKind) -> Option<Lock<'_>> {
703 |         if self.max_evictions == 0 {
704 |             None
705 |         } else {
706 |             Some(Lock::new(&self.lock, kind))
707 |         }
708 |     }
709 | 
710 |     /// Execute a read operation with optimistic concurrency control and automatic retry.
711 |     ///
712 |     /// This is the cornerstone of the lock-free design, implementing a sophisticated
713 |     /// optimistic concurrency protocol that allows reads to proceed concurrently with
714 |     /// most write operations.
715 |     ///
716 |     /// ## Optimistic Concurrency Protocol
717 |     ///
718 |     /// 1. **Snapshot version**: Acquire an Optimistic lock (capturing version number)
719 |     /// 2. **Execute read**: Run the provided function without any blocking
720 |     /// 3. **Validate consistency**: Check if version changed or FullyExclusive lock acquired
721 |     /// 4. **Retry or return**: If data may be stale, retry; otherwise return result
722 |     ///
723 |     /// ## How It Works
724 |     ///
725 |     /// - **WriterExclusive operations**: Don't invalidate optimistic reads because they
726 |     ///   coordinate through atomic compare-exchange operations that are linearizable
727 |     /// - **FullyExclusive operations**: Do invalidate optimistic reads because they
728 |     ///   perform complex multi-step updates that require consistency
729 |     /// - **Early return optimization**: For operations that can short-circuit (like
730 |     ///   `contains()` returning true), we skip version validation as an optimization
731 |     ///
732 |     /// This pattern is essential for achieving lock-free performance while maintaining
733 |     /// correctness in the presence of concurrent modifications.
734 |     fn atomic_read<T, F>(&self, fun: F, early_return: Option<T>) -> T
735 |     where
736 |         F: Fn() -> T,
737 |         T: PartialEq,
738 |     {
739 |         if self.max_evictions == 0 {
740 |             return fun();
741 |         }
742 |         loop {
743 |             let lock = Lock::new(&self.lock, LockKind::Optimistic);
744 |             let result = fun();
745 |             if Some(&result) == early_return.as_ref() || !lock.is_outdated() {
746 |                 return result;
747 |             }
748 |         }
749 |     }
750 | }
751 | 
752 | impl CuckooFilter<DefaultHasher> {
753 |     /// Create a new CuckooFilterBuilder with default settings
754 |     pub fn builder() -> CuckooFilterBuilder<DefaultHasher> {
755 |         CuckooFilterBuilder::default()
756 |     }
757 | 
758 |     /// Create a new CuckooFilter with default settings
759 |     pub fn new() -> CuckooFilter<DefaultHasher> {
760 |         Self::builder().build().unwrap()
761 |     }
762 | 
763 |     /// Create a new CuckooFilter with the specified capacity
764 |     pub fn with_capacity(capacity: usize) -> CuckooFilter<DefaultHasher> {
765 |         Self::builder().capacity(capacity).build().unwrap()
766 |     }
767 | }
768 | 
769 | impl Default for CuckooFilter<DefaultHasher> {
770 |     /// Create a new CuckooFilter with default settings
771 |     fn default() -> Self {
772 |         Self::new()
773 |     }
774 | }
775 | 
776 | impl<H: Hasher + Default> CuckooFilterBuilder<H> {
777 |     /// Validate the builder configuration
778 |     fn validate(&self) -> Result<(), String> {
779 |         if let Some(fingerprint_size) = self.fingerprint_size
780 |             && ![4, 8, 16, 32].contains(&fingerprint_size)
781 |         {
782 |             return Err("Invalid fingerprint_size".into());
783 |         }
784 |         if self.bucket_size == Some(0) {
785 |             return Err("bucket_size must be greater than zero".into());
786 |         }
787 |         if self.capacity == Some(0) {
788 |             return Err("capacity must be greater than zero".into());
789 |         }
790 |         Ok(())
791 |     }
792 | 
793 |     /// Build a CuckooFilter with the specified configuration
794 |     pub fn build(self) -> Result<CuckooFilter<H>, CuckooFilterBuilderError> {
795 |         let mut cuckoo_filter = self.base_build()?;
796 |         // Calculate the number of buckets (power of 2)
797 |         cuckoo_filter.num_buckets = cuckoo_filter
798 |             .capacity
799 |             .div_ceil(cuckoo_filter.bucket_size)
800 |             .next_power_of_two();
801 |         // Adjust the capacity to match the actual number of buckets
802 |         cuckoo_filter.capacity = cuckoo_filter.num_buckets * cuckoo_filter.bucket_size;
803 |         // Calculate the fingerprint mask
804 |         cuckoo_filter.fingerprint_mask = ((1u64 << cuckoo_filter.fingerprint_size) - 1) as usize;
805 |         // Calculate the number of fingerprints per atomic value
806 |         cuckoo_filter.fingerprints_per_atomic =
807 |             usize::BITS as usize / cuckoo_filter.fingerprint_size;
808 |         // Calculate the total number of atomic values needed
809 |         let bit_size = cuckoo_filter.capacity * cuckoo_filter.fingerprint_size;
810 |         let atomic_size = bit_size.div_ceil(usize::BITS as usize);
811 |         // Initialize the buckets
812 |         cuckoo_filter.buckets = (0..atomic_size).map(|_| AtomicUsize::new(0)).collect();
813 |         Ok(cuckoo_filter)
814 |     }
815 | }
816 | 


--------------------------------------------------------------------------------
/tests/basic.rs:
--------------------------------------------------------------------------------
  1 | use ahash::AHasher;
  2 | use atomic_cuckoo_filter::{CuckooFilter, CuckooFilterBuilder};
  3 | // Helper function to create test data
  4 | fn test_items(count: usize) -> Vec<String> {
  5 |     (0..count).map(|i| format!("test_item_{i}")).collect()
  6 | }
  7 | 
  8 | #[test]
  9 | fn test_new_filter() {
 10 |     let filter = CuckooFilter::new();
 11 |     assert_eq!(filter.len(), 0);
 12 |     assert!(filter.is_empty());
 13 |     assert_eq!(filter.capacity(), 1048576); // Default capacity
 14 | }
 15 | 
 16 | #[test]
 17 | fn test_with_capacity() {
 18 |     let filter = CuckooFilter::with_capacity(1000);
 19 |     assert_eq!(filter.len(), 0);
 20 |     assert!(filter.is_empty());
 21 |     assert_eq!(filter.capacity(), 1024); // Rounded up to power of 2
 22 | }
 23 | 
 24 | #[test]
 25 | fn test_builder_default() {
 26 |     let filter = CuckooFilter::builder().build().unwrap();
 27 |     assert_eq!(filter.len(), 0);
 28 |     assert!(filter.is_empty());
 29 | }
 30 | 
 31 | #[test]
 32 | fn test_builder_custom_config() {
 33 |     let filter = CuckooFilter::builder()
 34 |         .capacity(2048)
 35 |         .fingerprint_size(8)
 36 |         .bucket_size(2)
 37 |         .max_evictions(100)
 38 |         .build()
 39 |         .unwrap();
 40 | 
 41 |     assert_eq!(filter.len(), 0);
 42 |     assert_eq!(filter.capacity(), 2048);
 43 | }
 44 | 
 45 | #[test]
 46 | fn test_builder_validation_invalid_fingerprint_size() {
 47 |     let result = CuckooFilter::builder()
 48 |         .fingerprint_size(7) // Invalid: must be 4, 8, 16, or 32
 49 |         .build();
 50 | 
 51 |     assert!(result.is_err());
 52 |     assert!(
 53 |         result
 54 |             .unwrap_err()
 55 |             .to_string()
 56 |             .contains("Invalid fingerprint_size")
 57 |     );
 58 | }
 59 | 
 60 | #[test]
 61 | fn test_builder_validation_zero_bucket_size() {
 62 |     let result = CuckooFilter::builder().bucket_size(0).build();
 63 | 
 64 |     assert!(result.is_err());
 65 |     assert!(
 66 |         result
 67 |             .unwrap_err()
 68 |             .to_string()
 69 |             .contains("bucket_size must be greater than zero")
 70 |     );
 71 | }
 72 | 
 73 | #[test]
 74 | fn test_builder_validation_zero_capacity() {
 75 |     let result = CuckooFilter::builder().capacity(0).build();
 76 | 
 77 |     assert!(result.is_err());
 78 |     assert!(
 79 |         result
 80 |             .unwrap_err()
 81 |             .to_string()
 82 |             .contains("capacity must be greater than zero")
 83 |     );
 84 | }
 85 | 
 86 | #[test]
 87 | fn test_empty_filter_operations() {
 88 |     let filter = CuckooFilter::with_capacity(1024);
 89 | 
 90 |     // Test operations on empty filter
 91 |     assert!(!filter.contains(&"nonexistent"));
 92 |     assert_eq!(filter.count(&"nonexistent"), 0);
 93 |     assert!(!filter.remove(&"nonexistent"));
 94 |     assert_eq!(filter.len(), 0);
 95 |     assert!(filter.is_empty());
 96 | }
 97 | 
 98 | #[test]
 99 | fn test_basic_insert_contains() {
100 |     let filter = CuckooFilter::with_capacity(1024);
101 |     let item = "test_item";
102 | 
103 |     assert!(!filter.contains(&item));
104 |     assert!(filter.insert(&item).is_ok());
105 |     assert!(filter.contains(&item));
106 |     assert_eq!(filter.len(), 1);
107 |     assert!(!filter.is_empty());
108 | }
109 | 
110 | #[test]
111 | fn test_insert_duplicate_items() {
112 |     let filter = CuckooFilter::with_capacity(1024);
113 |     let item = "duplicate_item";
114 | 
115 |     // Insert same item multiple times
116 |     assert!(filter.insert(&item).is_ok());
117 |     assert!(filter.insert(&item).is_ok());
118 |     assert!(filter.insert(&item).is_ok());
119 | 
120 |     assert!(filter.contains(&item));
121 |     assert_eq!(filter.count(&item), 3);
122 |     assert_eq!(filter.len(), 3);
123 | }
124 | 
125 | #[test]
126 | fn test_insert_unique() {
127 |     let filter = CuckooFilter::with_capacity(1024);
128 |     let item = "unique_item";
129 | 
130 |     // First insertion should succeed
131 |     assert_eq!(filter.insert_unique(&item), Ok(true));
132 |     assert_eq!(filter.count(&item), 1);
133 | 
134 |     // Second insertion should return false (already exists)
135 |     assert_eq!(filter.insert_unique(&item), Ok(false));
136 |     assert_eq!(filter.count(&item), 1);
137 |     assert_eq!(filter.len(), 1);
138 | }
139 | 
140 | #[test]
141 | fn test_remove_existing_item() {
142 |     let filter = CuckooFilter::with_capacity(1024);
143 |     let item = "removable_item";
144 | 
145 |     // Insert and then remove
146 |     assert!(filter.insert(&item).is_ok());
147 |     assert!(filter.contains(&item));
148 |     assert!(filter.remove(&item));
149 |     assert!(!filter.contains(&item));
150 |     assert_eq!(filter.len(), 0);
151 | 
152 |     // Trying to remove again should return false
153 |     assert!(!filter.remove(&item));
154 | }
155 | 
156 | #[test]
157 | fn test_remove_duplicate_items() {
158 |     let filter = CuckooFilter::with_capacity(1024);
159 |     let item = "dup_removable";
160 | 
161 |     // Insert multiple copies
162 |     assert!(filter.insert(&item).is_ok());
163 |     assert!(filter.insert(&item).is_ok());
164 |     assert!(filter.insert(&item).is_ok());
165 |     assert_eq!(filter.count(&item), 3);
166 | 
167 |     // Remove one at a time
168 |     assert!(filter.remove(&item));
169 |     assert_eq!(filter.count(&item), 2);
170 |     assert!(filter.remove(&item));
171 |     assert_eq!(filter.count(&item), 1);
172 |     assert!(filter.remove(&item));
173 |     assert_eq!(filter.count(&item), 0);
174 |     assert!(!filter.contains(&item));
175 | }
176 | 
177 | #[test]
178 | fn test_clear() {
179 |     let filter = CuckooFilter::with_capacity(1024);
180 |     let items = test_items(100);
181 | 
182 |     // Insert many items
183 |     for item in &items {
184 |         assert!(filter.insert(item).is_ok());
185 |     }
186 |     assert_eq!(filter.len(), 100);
187 | 
188 |     // Clear all items
189 |     filter.clear();
190 |     assert_eq!(filter.len(), 0);
191 |     assert!(filter.is_empty());
192 | 
193 |     // Verify all items are gone
194 |     for item in &items {
195 |         assert!(!filter.contains(item));
196 |     }
197 | }
198 | 
199 | #[test]
200 | fn test_count_functionality() {
201 |     let filter = CuckooFilter::with_capacity(1024);
202 |     let item = "countable_item";
203 | 
204 |     assert_eq!(filter.count(&item), 0);
205 | 
206 |     // Add items and verify count increases
207 |     for i in 1..=5 {
208 |         assert!(filter.insert(&item).is_ok());
209 |         assert_eq!(filter.count(&item), i);
210 |     }
211 | 
212 |     // Remove items and verify count decreases
213 |     for i in (1..=5).rev() {
214 |         assert!(filter.remove(&item));
215 |         assert_eq!(filter.count(&item), i - 1);
216 |     }
217 | }
218 | 
219 | #[test]
220 | fn test_different_item_types() {
221 |     let filter = CuckooFilter::with_capacity(1024);
222 | 
223 |     // Test with different types that implement Hash
224 |     assert!(filter.insert(&42i32).is_ok());
225 |     assert!(filter.insert(&"string").is_ok());
226 |     assert!(filter.insert(&vec![1, 2, 3]).is_ok());
227 |     assert!(filter.insert(&(1, 2, 3)).is_ok());
228 | 
229 |     assert!(filter.contains(&42i32));
230 |     assert!(filter.contains(&"string"));
231 |     assert!(filter.contains(&vec![1, 2, 3]));
232 |     assert!(filter.contains(&(1, 2, 3)));
233 | 
234 |     assert_eq!(filter.len(), 4);
235 | }
236 | 
237 | #[test]
238 | fn test_false_positives() {
239 |     let filter = CuckooFilter::builder()
240 |         .capacity(1024)
241 |         .fingerprint_size(8) // Smaller fingerprint = higher false positive rate
242 |         .build()
243 |         .unwrap();
244 | 
245 |     // Insert known items
246 |     let known_items: Vec<i32> = (0..500).collect();
247 |     for item in &known_items {
248 |         assert!(filter.insert(item).is_ok());
249 |     }
250 | 
251 |     // Test with unknown items
252 |     let unknown_items: Vec<i32> = (1000..2000).collect();
253 |     let false_positives = unknown_items
254 |         .iter()
255 |         .filter(|item| filter.contains(item))
256 |         .count();
257 | 
258 |     // Should have some false positives but not too many
259 |     assert!(false_positives > 0);
260 |     assert!(false_positives < 50); // Less than 10% false positive rate
261 | }
262 | 
263 | #[test]
264 | fn test_no_false_negatives() {
265 |     let filter = CuckooFilter::with_capacity(1024);
266 |     let items = test_items(1024);
267 | 
268 |     // Insert items and filter out the ones that failed to insert
269 |     let inserted_items = items
270 |         .into_iter()
271 |         .filter(|item| filter.insert(item).is_ok())
272 |         .collect::<Vec<_>>();
273 | 
274 |     // All inserted items should be found (no false negatives)
275 |     for item in inserted_items {
276 |         assert!(filter.contains(&item), "False negative for item: {item}");
277 |     }
278 | }
279 | 
280 | #[test]
281 | fn test_full_filter_insertion() {
282 |     let filter = CuckooFilter::builder()
283 |         .capacity(16) // Very small capacity
284 |         .max_evictions(0) // No evictions
285 |         .build()
286 |         .unwrap();
287 | 
288 |     let mut successful_inserts = 0;
289 | 
290 |     // Try to insert many items
291 |     for i in 0..100 {
292 |         if filter.insert(&i).is_ok() {
293 |             successful_inserts += 1;
294 |         } else {
295 |             break; // Filter is full
296 |         }
297 |     }
298 | 
299 |     // Should fill up and then start failing
300 |     assert!(successful_inserts <= filter.capacity());
301 |     assert!(successful_inserts > 0);
302 |     assert_eq!(filter.len(), successful_inserts);
303 | }
304 | 
305 | #[test]
306 | fn test_eviction_behavior() {
307 |     let filter_no_evict = CuckooFilter::builder()
308 |         .capacity(1024)
309 |         .max_evictions(0)
310 |         .build()
311 |         .unwrap();
312 | 
313 |     let filter_10_evict = CuckooFilter::builder()
314 |         .capacity(1024)
315 |         .max_evictions(10)
316 |         .build()
317 |         .unwrap();
318 | 
319 |     let filter_100_evict = CuckooFilter::builder()
320 |         .capacity(1024)
321 |         .max_evictions(100)
322 |         .build()
323 |         .unwrap();
324 | 
325 |     let mut no_evict_count = 0;
326 |     let mut evict_10_count = 0;
327 |     let mut evict_100_count = 0;
328 | 
329 |     for i in 0..1024 {
330 |         if filter_no_evict.insert(&i).is_ok() {
331 |             no_evict_count += 1;
332 |         }
333 |         if filter_10_evict.insert(&i).is_ok() {
334 |             evict_10_count += 1;
335 |         }
336 |         if filter_100_evict.insert(&i).is_ok() {
337 |             evict_100_count += 1;
338 |         }
339 |     }
340 | 
341 |     // Filter with evictions should accommodate more items
342 |     assert!(no_evict_count < evict_10_count);
343 |     assert!(evict_10_count < evict_100_count);
344 |     assert_eq!(filter_no_evict.len(), no_evict_count);
345 |     assert_eq!(filter_10_evict.len(), evict_10_count);
346 |     assert_eq!(filter_100_evict.len(), evict_100_count);
347 | }
348 | 
349 | #[test]
350 | fn test_fingerprint_sizes() {
351 |     let sizes = [4, 8, 16, 32];
352 | 
353 |     for &size in &sizes {
354 |         let filter = CuckooFilter::builder()
355 |             .capacity(1024)
356 |             .fingerprint_size(size)
357 |             .build()
358 |             .unwrap();
359 | 
360 |         // insert items to ensure the filter is fully loaded
361 |         let mut i = 0;
362 |         while filter.len() < 1024 {
363 |             let _ = filter.insert(&i);
364 |             i += 1;
365 |         }
366 | 
367 |         // test false positive rate
368 |         let non_existing_items = 10000..110000;
369 |         let false_positives = non_existing_items.filter(|i| filter.contains(i)).count();
370 |         // Calculate the expected false positive rate (FPR) based on fingerprint size
371 |         let expected_fpr = 1.0 - (1.0 - 1.0 / (1u64 << size) as f64).powi(8);
372 |         let fpr = false_positives as f64 / 100000.0;
373 |         if size == 4 {
374 |             assert_eq!(expected_fpr, 0.4032805261667818);
375 |         } else if size == 8 {
376 |             assert_eq!(expected_fpr, 0.030826075519044704);
377 |         } else if size == 16 {
378 |             assert_eq!(expected_fpr, 0.00012206379344092966);
379 |         } else if size == 32 {
380 |             assert_eq!(expected_fpr, 0.000000001862645149230957);
381 |         }
382 |         // Allow a small margin due to randomness
383 |         let tolerance = expected_fpr * 0.1 + 0.0001;
384 | 
385 |         assert!(
386 |             (fpr - expected_fpr).abs() < tolerance,
387 |             "Observed FPR ({fpr}) deviates too much from expected FPR ({expected_fpr}) for fingerprint size {size} ({false_positives} false positives)"
388 |         );
389 |     }
390 | }
391 | 
392 | #[test]
393 | fn test_bucket_sizes() {
394 |     let sizes = [1, 2, 4, 8];
395 | 
396 |     for &size in &sizes {
397 |         let filter = CuckooFilter::builder()
398 |             .capacity(1024)
399 |             .bucket_size(size)
400 |             .build()
401 |             .unwrap();
402 | 
403 |         // Should be able to insert items regardless of bucket size
404 |         for i in 0..100 {
405 |             assert!(filter.insert(&i).is_ok());
406 |         }
407 | 
408 |         // Should be able to find all items
409 |         for i in 0..100 {
410 |             assert!(filter.contains(&i));
411 |         }
412 | 
413 |         assert_eq!(filter.len(), 100);
414 |     }
415 | }
416 | 
417 | #[test]
418 | fn test_custom_hasher() {
419 |     // Test that we can use different hashers
420 |     let filter = CuckooFilterBuilder::<AHasher>::default()
421 |         .capacity(1024)
422 |         .build()
423 |         .unwrap();
424 | 
425 |     let items = test_items(100);
426 |     for item in &items {
427 |         assert!(filter.insert(item).is_ok());
428 |     }
429 | 
430 |     for item in &items {
431 |         assert!(filter.contains(item));
432 |     }
433 | 
434 |     assert_eq!(filter.len(), 100);
435 | }
436 | 


--------------------------------------------------------------------------------
/tests/concurrent.rs:
--------------------------------------------------------------------------------
  1 | use atomic_cuckoo_filter::CuckooFilter;
  2 | use std::sync::Arc;
  3 | use std::thread;
  4 | 
  5 | #[test]
  6 | fn test_concurrent_reads() {
  7 |     let filter = Arc::new(CuckooFilter::with_capacity(1024));
  8 | 
  9 |     // Insert test data
 10 |     for i in 0..100 {
 11 |         assert!(filter.insert(&i).is_ok());
 12 |     }
 13 | 
 14 |     let mut handles = vec![];
 15 | 
 16 |     // Spawn multiple reader threads
 17 |     for _ in 0..5 {
 18 |         let filter_clone = Arc::clone(&filter);
 19 |         handles.push(thread::spawn(move || {
 20 |             for i in 0..100 {
 21 |                 assert!(filter_clone.contains(&i));
 22 |             }
 23 |         }));
 24 |     }
 25 | 
 26 |     // All reads should succeed
 27 |     for handle in handles {
 28 |         handle.join().unwrap();
 29 |     }
 30 | }
 31 | 
 32 | #[test]
 33 | fn test_concurrent_insert() {
 34 |     let filter = Arc::new(CuckooFilter::with_capacity(10000));
 35 |     let mut handles = vec![];
 36 | 
 37 |     // Spawn writer threads
 38 |     for thread_id in 0..5 {
 39 |         let filter_clone = Arc::clone(&filter);
 40 |         handles.push(thread::spawn(move || {
 41 |             for i in 0..100 {
 42 |                 let item = format!("thread_{thread_id}_item_{i}");
 43 |                 filter_clone.insert(&item).unwrap();
 44 |             }
 45 |         }));
 46 |     }
 47 | 
 48 |     // Wait for all threads
 49 |     for handle in handles {
 50 |         handle.join().unwrap();
 51 |     }
 52 | 
 53 |     // check if all items are inserted
 54 |     for thread_id in 0..5 {
 55 |         for i in 0..100 {
 56 |             let item = format!("thread_{thread_id}_item_{i}");
 57 |             assert!(filter.contains(&item));
 58 |         }
 59 |     }
 60 | 
 61 |     // Should have inserted 500 items total
 62 |     assert_eq!(filter.len(), 500);
 63 | }
 64 | 
 65 | #[test]
 66 | fn test_concurrent_insert_unique() {
 67 |     let filter = Arc::new(CuckooFilter::with_capacity(131072));
 68 |     let mut handles = vec![];
 69 | 
 70 |     for _ in 0..5 {
 71 |         let filter_clone = filter.clone();
 72 |         handles.push(thread::spawn(move || {
 73 |             (0..100000)
 74 |                 .filter(|i| filter_clone.insert_unique(i).unwrap())
 75 |                 .count()
 76 |         }));
 77 |     }
 78 | 
 79 |     let inserted: usize = handles.into_iter().map(|h| h.join().unwrap()).sum();
 80 | 
 81 |     for i in 0..100000 {
 82 |         assert!(filter.contains(&i));
 83 |     }
 84 | 
 85 |     assert_eq!(inserted, filter.len());
 86 | 
 87 |     // inserted items might be less than 100000 due to false positives
 88 |     assert!(inserted <= 100000);
 89 | }
 90 | 
 91 | #[test]
 92 | fn concurrent_remove() {
 93 |     let filter = Arc::new(CuckooFilter::with_capacity(131072));
 94 |     let mut handles = vec![];
 95 | 
 96 |     for i in 0..100000 {
 97 |         assert!(filter.insert(&i).is_ok());
 98 |     }
 99 | 
100 |     for _ in 0..5 {
101 |         let f = filter.clone();
102 |         handles.push(thread::spawn(move || {
103 |             (0..100000).filter(|i| f.remove(i)).count()
104 |         }));
105 |     }
106 | 
107 |     let removed: usize = handles.into_iter().map(|h| h.join().unwrap()).sum();
108 |     assert_eq!(removed, 100000)
109 | }
110 | 
111 | #[test]
112 | fn test_concurrent_insert_and_remove() {
113 |     let filter = Arc::new(CuckooFilter::with_capacity(10000));
114 |     let mut handles = vec![];
115 | 
116 |     // Spawn writer threads
117 |     for thread_id in 0..5 {
118 |         let filter_clone = Arc::clone(&filter);
119 |         handles.push(thread::spawn(move || {
120 |             for i in 0..100 {
121 |                 let item = format!("thread_{thread_id}_item_{i}");
122 |                 filter_clone.insert(&item).unwrap();
123 |             }
124 |         }));
125 |     }
126 | 
127 |     // Spawn remover threads
128 |     for thread_id in 0..5 {
129 |         let filter_clone = Arc::clone(&filter);
130 |         handles.push(thread::spawn(move || {
131 |             for i in 0..100 {
132 |                 let item = format!("thread_{thread_id}_item_{i}");
133 |                 while !filter_clone.remove(&item) {}
134 |             }
135 |         }));
136 |     }
137 | 
138 |     // Wait for all threads
139 |     for handle in handles {
140 |         handle.join().unwrap();
141 |     }
142 | 
143 |     // Should have removed all items
144 |     assert_eq!(filter.len(), 0);
145 | }
146 | 


--------------------------------------------------------------------------------