├── .gitignore ├── LICENSE ├── README.md ├── apbf.go ├── apbf_test.go ├── go.mod ├── go.sum ├── snapshot.pb.go └── snapshot.proto /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 CrowdStrike Holdings, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Age-Partitioned Bloom Filters 2 | 3 | Age-Partitioned Bloom Filters (APBF) is a novel approach for duplicate detection in sliding windows over an unbounded stream of items described in [Age-Partitioned Bloom Filters](https://arxiv.org/abs/2001.03147): Ariel Shtul, Carlos Baquero and Paulo Sérgio Almeida, 2020. 4 | 5 | The implementation employs the enhanced double hashing technique for fast index computation introduced in [Bloom Filters in Probabilistic Verification](https://link.springer.com/chapter/10.1007/978-3-540-30494-4_26): Peter C. Dillinger and Panagiotis Manolios, 2004. 6 | 7 | ## Example 8 | 9 | ```golang 10 | // create a filter with k=10, l=7, and g=1000 11 | filter := apbf.New(10, 7, 1000) 12 | 13 | item := []byte("test item") 14 | filter.Add(item) 15 | 16 | if filter.Query(item) { 17 | fmt.Println("item was found") 18 | } 19 | ``` 20 | 21 | ## Installation 22 | 23 | Use `go get` to add the project to your workspace: 24 | ```bash 25 | go get -u github.com/CrowdStrike/apbf 26 | ``` 27 | 28 | ## Benchmarks 29 | 30 | The following results show the performance of main filter operations `Add` and `Query` with and without refresh enabled for a small and large filter. Benchmarks were executed on a MacBook Pro 2017 dev laptop. 31 | 32 | ``` 33 | BenchmarkSmallFilterAdd-8 20000000 103 ns/op 0 B/op 0 allocs/op 34 | BenchmarkSmallFilterAddWithRefresh-8 10000000 177 ns/op 0 B/op 0 allocs/op 35 | BenchmarkSmallFilterQuery-8 10000000 133 ns/op 0 B/op 0 allocs/op 36 | BenchmarkSmallFilterQueryWithRefresh-8 10000000 206 ns/op 0 B/op 0 allocs/op 37 | BenchmarkLargeFilterAdd-8 5000000 252 ns/op 0 B/op 0 allocs/op 38 | BenchmarkLargeFilterAddWithRefresh-8 5000000 325 ns/op 0 B/op 0 allocs/op 39 | BenchmarkLargeFilterQuery-8 3000000 431 ns/op 0 B/op 0 allocs/op 40 | BenchmarkLargeFilterQueryWithRefresh-8 2000000 543 ns/op 0 B/op 0 allocs/op 41 | ``` 42 | 43 | ## Contributors 44 | 45 | [Bogdan-Ciprian Rusu](https://github.com/bcrusu) - Author/Maintainer 46 | 47 | ## License 48 | 49 | The project is licensed under the [MIT License](LICENSE). 50 | -------------------------------------------------------------------------------- /apbf.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 CrowdStrike Holdings, Inc. 2 | // 3 | // Use of this source code is governed by the MIT License. 4 | 5 | package apbf 6 | 7 | import ( 8 | "math" 9 | "sync" 10 | "sync/atomic" 11 | "time" 12 | 13 | "github.com/spaolacci/murmur3" 14 | ) 15 | 16 | //go:generate protoc --gogo_out=. snapshot.proto 17 | 18 | // Filter represents the Age-Partitioned Bloom Filter (APBF). 19 | // The implementation is safe for concurrent use. 20 | type Filter struct { 21 | k uint 22 | l uint 23 | h uint // slice count 24 | g uint // generation size 25 | r int64 // refresh interval in nano 26 | m uint // slice bit count 27 | lock sync.RWMutex // guards all below 28 | base uint // current slice 1-based index 29 | buffer []byte // circular buffer 30 | count uint // current generation count 31 | lastRefresh int64 32 | hasherPool *sync.Pool 33 | } 34 | 35 | type hash [2]uint 36 | 37 | // New returns the APBF with k + l slices and g generation size. 38 | func New(k, l, g uint) *Filter { 39 | return new(k, l, g, 0) 40 | } 41 | 42 | // NewWithRefresh returns the APBF with k + l slices, g generation size, and r refresh interval. 43 | func NewWithRefresh(k, l, g uint, r time.Duration) *Filter { 44 | return new(k, l, g, r) 45 | } 46 | 47 | func new(k, l, g uint, r time.Duration) *Filter { 48 | if k == 0 || l == 0 || g == 0 || r < 0 { 49 | panic("invalid parameters") 50 | } 51 | 52 | h, m, bufferLen := deriveParams(k, l, g) 53 | 54 | return &Filter{ 55 | k: k, 56 | l: l, 57 | h: h, 58 | g: g, 59 | r: int64(r), 60 | m: m, 61 | base: 1, 62 | buffer: make([]byte, bufferLen), 63 | count: 0, 64 | hasherPool: getHasherPool(), 65 | lastRefresh: time.Now().UnixNano(), 66 | } 67 | } 68 | 69 | // NewFromSnapshot recreates the matching filter from the provided snapshot. 70 | func NewFromSnapshot(s Snapshot) *Filter { 71 | if s.K == 0 || s.L == 0 || s.G == 0 || s.R < 0 { 72 | panic("invalid snapshot") 73 | } 74 | 75 | h, m, bufferLen := deriveParams(uint(s.K), uint(s.L), uint(s.G)) 76 | 77 | validBuffer := len(s.Buffer) == bufferLen 78 | validBase := uint(s.Base) >= 1 && uint(s.Base) <= h 79 | validCount := s.Count <= s.G 80 | 81 | if !validBuffer || !validBase || !validCount { 82 | panic("invalid snapshot") 83 | } 84 | 85 | return &Filter{ 86 | k: uint(s.K), 87 | l: uint(s.L), 88 | h: h, 89 | g: uint(s.G), 90 | r: int64(s.R), 91 | m: m, 92 | base: uint(s.Base), 93 | buffer: cloneSlice(s.Buffer), 94 | count: uint(s.Count), 95 | hasherPool: getHasherPool(), 96 | lastRefresh: time.Now().UnixNano(), 97 | } 98 | } 99 | 100 | func deriveParams(k, l, g uint) (uint, uint, int) { 101 | h := k + l // slice count 102 | n := k * g // slice capacity 103 | m := uint(math.Ceil(1.442695 * float64(n))) // slice bit count 104 | mt := h * m // total bit count 105 | bufferLen := int((mt + 7) / 8) 106 | 107 | return h, m, bufferLen 108 | } 109 | 110 | // Add item to the set. 111 | func (f *Filter) Add(item []byte) { 112 | f.refresh() 113 | hash := f.getHash(item) 114 | 115 | f.lock.Lock() 116 | 117 | if f.count == f.g { 118 | f.shift() 119 | } 120 | 121 | slice := f.base 122 | for i := uint(0); i < f.k; i++ { 123 | bit := f.location(slice, hash) 124 | f.setBit(bit) 125 | 126 | slice = f.nextSlice(slice) 127 | } 128 | 129 | f.count++ 130 | f.lock.Unlock() 131 | } 132 | 133 | // Query returns true if the item is in the set and false otherwise. A true value might be a false positive whereas false is always correct. 134 | func (f *Filter) Query(item []byte) bool { 135 | f.refresh() 136 | hash := f.getHash(item) 137 | 138 | f.lock.RLock() 139 | slice := f.base 140 | matched := uint(0) 141 | 142 | for i := f.h; i >= f.k-matched; i-- { 143 | bit := f.location(slice, hash) 144 | 145 | if f.hasBit(bit) { 146 | matched++ 147 | if matched == f.k { 148 | break 149 | } 150 | } else { 151 | matched = 0 152 | } 153 | 154 | slice = f.nextSlice(slice) 155 | } 156 | 157 | f.lock.RUnlock() 158 | return matched == f.k 159 | } 160 | 161 | // NextGeneration transitions to next generation. 162 | func (f *Filter) NextGeneration() { 163 | f.lock.Lock() 164 | f.shift() 165 | f.lock.Unlock() 166 | } 167 | 168 | // Snapshot returns a consistent snapshot of filter state. 169 | func (f *Filter) Snapshot() Snapshot { 170 | f.lock.RLock() 171 | 172 | result := Snapshot{ 173 | K: uint64(f.k), 174 | L: uint64(f.l), 175 | G: uint64(f.g), 176 | R: uint64(f.r), 177 | Base: uint64(f.base), 178 | Count: uint64(f.count), 179 | Buffer: cloneSlice(f.buffer), 180 | } 181 | 182 | f.lock.RUnlock() 183 | return result 184 | } 185 | 186 | // MaxCapacity returns filter max capacity. 187 | func (f *Filter) MaxCapacity() int { 188 | return int(f.g) * f.MaxGenerations() 189 | } 190 | 191 | // MaxGenerations returns filter max generations count. 192 | func (f *Filter) MaxGenerations() int { 193 | return int(f.l + 1) 194 | } 195 | 196 | func (f *Filter) refresh() { 197 | if f.r == 0 { 198 | return 199 | } 200 | 201 | now := time.Now().UnixNano() 202 | 203 | // fast path 204 | lastRefresh := atomic.LoadInt64(&f.lastRefresh) 205 | if lastRefresh+f.r > now { 206 | return 207 | } 208 | 209 | // slow path 210 | f.lock.Lock() 211 | for { 212 | next := f.lastRefresh + f.r 213 | if next > now { 214 | break 215 | } 216 | 217 | f.shift() 218 | f.lastRefresh = next 219 | } 220 | 221 | f.lock.Unlock() 222 | } 223 | 224 | func (f *Filter) shift() { 225 | f.count = 0 226 | f.base = f.prevSlice(f.base) 227 | 228 | bit := (f.base - 1) * f.m 229 | endBit := bit + f.m 230 | 231 | for bit < endBit && bit%8 != 0 { 232 | f.clearBit(bit) 233 | bit++ 234 | } 235 | 236 | for bit < endBit && bit+8 < endBit { 237 | f.buffer[bit/8] = 0 238 | bit += 8 239 | } 240 | 241 | for bit < endBit { 242 | f.clearBit(bit) 243 | bit++ 244 | } 245 | } 246 | 247 | func (f *Filter) getHash(item []byte) hash { 248 | hasher := f.hasherPool.Get().(murmur3.Hash128) 249 | hasher.Reset() 250 | hasher.Write(item) 251 | 252 | h1, h2 := hasher.Sum128() 253 | 254 | f.hasherPool.Put(hasher) 255 | return hash{uint(h1), uint(h2)} 256 | } 257 | 258 | func (f *Filter) location(i uint, h hash) uint { 259 | t := (i*i*i - i) / 6 260 | return (i-1)*f.m + (h[0]+i*h[1]+t)%f.m // enhanced double hashing 261 | } 262 | 263 | func (f *Filter) nextSlice(i uint) uint { 264 | if i == f.h { 265 | return 1 266 | } 267 | 268 | return i + 1 269 | } 270 | 271 | func (f *Filter) prevSlice(i uint) uint { 272 | if i == 1 { 273 | return f.h 274 | } 275 | 276 | return i - 1 277 | } 278 | 279 | func (f *Filter) setBit(index uint) { 280 | f.buffer[index/8] |= 1 << (index % 8) 281 | } 282 | 283 | func (f *Filter) clearBit(index uint) { 284 | f.buffer[index/8] &^= 1 << (index % 8) 285 | } 286 | 287 | func (f *Filter) hasBit(index uint) bool { 288 | return f.buffer[index/8]&(1<<(index%8)) != 0 289 | } 290 | 291 | // CalculateFalsePositiveRate computes the false positive rate for given k and l parameters. 292 | func CalculateFalsePositiveRate(k, l uint) float64 { 293 | if k == 0 || l == 0 { 294 | panic("invalid parameters") 295 | } 296 | 297 | type key struct { 298 | a, i uint 299 | } 300 | 301 | cache := map[key]float64{} 302 | 303 | var calculate func(a, i uint) float64 304 | calculate = func(a, i uint) float64 { 305 | if a == k { 306 | return 1 307 | } else if i > l+a { 308 | return 0 309 | } 310 | 311 | ck := key{a, i} 312 | if val, ok := cache[ck]; ok { 313 | return val 314 | } 315 | 316 | ri := 0.5 317 | if i < k { 318 | ri = float64(i+1) / float64(2*k) 319 | } 320 | 321 | val := ri*calculate(a+1, i+1) + (1-ri)*calculate(0, i+1) 322 | cache[ck] = val 323 | return val 324 | } 325 | 326 | return calculate(0, 0) 327 | } 328 | 329 | func cloneSlice(src []byte) []byte { 330 | dest := make([]byte, len(src)) 331 | copy(dest, src) 332 | return dest 333 | } 334 | 335 | func getHasherPool() *sync.Pool { 336 | return &sync.Pool{ 337 | New: func() interface{} { return murmur3.New128() }, 338 | } 339 | } 340 | -------------------------------------------------------------------------------- /apbf_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2020 CrowdStrike Holdings, Inc. 2 | // 3 | // Use of this source code is governed by the MIT License. 4 | 5 | package apbf_test 6 | 7 | import ( 8 | "fmt" 9 | "math" 10 | "math/rand" 11 | "sync" 12 | "testing" 13 | "time" 14 | 15 | "github.com/CrowdStrike/apbf" 16 | ) 17 | 18 | const ( 19 | testItemsCount = 10000000 20 | ) 21 | 22 | var ( 23 | testItems [][]byte 24 | ) 25 | 26 | type newParams struct { 27 | k, l, g uint 28 | } 29 | 30 | type newWithRefreshParams struct { 31 | k, l, g uint 32 | r time.Duration 33 | } 34 | 35 | type fpRate struct { 36 | k, l uint 37 | expected float64 38 | } 39 | 40 | func init() { 41 | rand.Seed(time.Now().UnixNano()) 42 | 43 | testItems = make([][]byte, testItemsCount) 44 | for i := range testItems { 45 | item := make([]byte, 8) 46 | rand.Read(item) 47 | testItems[i] = item 48 | } 49 | } 50 | 51 | func TestBasicAddQuery(t *testing.T) { 52 | params := []newParams{ 53 | {1, 1, 1}, 54 | {1, 1, 2}, 55 | {1, 1, 100}, 56 | {3, 1, 1}, 57 | {3, 1, 2}, 58 | {3, 1, 100}, 59 | {1, 3, 1}, 60 | {1, 3, 2}, 61 | {1, 3, 100}, 62 | } 63 | 64 | item1 := []byte("itemA") 65 | item2 := []byte("itemB") 66 | 67 | for _, p := range params { 68 | filter := p.New() 69 | 70 | if filter.Query(item1) { 71 | t.Errorf("Query returned true before item1 was added in filter %s", p) 72 | } 73 | 74 | filter.Add(item1) 75 | 76 | if !filter.Query(item1) { 77 | t.Errorf("Query returned false after item1 was added in filter %s", p) 78 | } 79 | 80 | if filter.Query(item2) { 81 | t.Errorf("Query returned true for item2 in filter %s", p) 82 | } 83 | } 84 | } 85 | 86 | func TestMaxCapacity(t *testing.T) { 87 | params := []newParams{ 88 | {1, 1, 10}, 89 | {1, 3, 10}, 90 | {3, 1, 10}, 91 | {3, 3, 10}, 92 | } 93 | 94 | item1 := []byte("itemA") 95 | item2 := []byte("itemB") 96 | 97 | for _, p := range params { 98 | filter := p.New() 99 | 100 | // item1 is part of first generation 101 | filter.Add(item1) 102 | 103 | // adding both item1 and item2 should not result in false positives 104 | if filter.Query(item2) { 105 | t.Errorf("Query returned false positive in filter %s", p) 106 | continue 107 | } 108 | 109 | for i := 0; i < filter.MaxCapacity()-1; i++ { 110 | filter.Add(item2) 111 | } 112 | 113 | if !filter.Query(item1) { 114 | t.Errorf("Query returned false in filter %s", p) 115 | } 116 | 117 | filter.Add(item2) 118 | 119 | if filter.Query(item1) { 120 | t.Errorf("Query returned true in filter %s", p) 121 | } 122 | } 123 | } 124 | 125 | func TestRefresh(t *testing.T) { 126 | refresh := 50 * time.Millisecond 127 | params := []newWithRefreshParams{ 128 | {3, 6, 10, refresh}, 129 | {3, 8, 10, refresh}, 130 | } 131 | 132 | item := []byte("itemA") 133 | 134 | for _, p := range params { 135 | filter := p.New() 136 | 137 | filter.Add(item) 138 | 139 | // a. wait one refresh interval will not evict the item 140 | <-time.After(refresh) 141 | 142 | if !filter.Query(item) { 143 | t.Errorf("Query returned false in filter %s", p) 144 | } 145 | 146 | // b. wait 2x number of generations times the refresh interval to ensure the item was evicted 147 | <-time.After(2 * time.Duration(filter.MaxGenerations()) * refresh) 148 | 149 | if filter.Query(item) { 150 | t.Errorf("Query returned true in filter %s", p) 151 | } 152 | } 153 | } 154 | 155 | func TestNextGeneration(t *testing.T) { 156 | params := []newParams{ 157 | {1, 1, 10}, 158 | {1, 3, 10}, 159 | {3, 1, 10}, 160 | {3, 3, 10}, 161 | } 162 | 163 | item := []byte("itemA") 164 | 165 | for _, p := range params { 166 | filter := p.New() 167 | 168 | filter.Add(item) 169 | 170 | for i := 0; i < filter.MaxGenerations(); i++ { 171 | filter.NextGeneration() 172 | } 173 | 174 | if filter.Query(item) { 175 | t.Errorf("Query returned false in filter %s", p) 176 | } 177 | } 178 | } 179 | 180 | func TestConcurrentAccess(t *testing.T) { 181 | params := []newParams{ 182 | {4, 3, 10000}, 183 | {10, 7, 10000}, 184 | {18, 16, 10000}, 185 | } 186 | 187 | for _, p := range params { 188 | filter := p.New() 189 | 190 | itemChan := make(chan []byte, filter.MaxGenerations()) 191 | stopChan := make(chan struct{}) 192 | var stopOnce sync.Once 193 | var wg sync.WaitGroup 194 | 195 | addGeneration := func(seed int) { 196 | defer wg.Done() 197 | 198 | for i := 0; i < int(p.g); i++ { 199 | item := getItem(seed + i) 200 | filter.Add(item) 201 | 202 | select { 203 | case itemChan <- item: 204 | case <-stopChan: 205 | return 206 | } 207 | } 208 | } 209 | 210 | queryGeneration := func() { 211 | defer wg.Done() 212 | 213 | for i := 0; i < int(p.g); i++ { 214 | select { 215 | case item := <-itemChan: 216 | if !filter.Query(item) { 217 | t.Errorf("Query returned false for item %s in filter %s", string(item), p) 218 | stopOnce.Do(func() { close(stopChan) }) 219 | } 220 | case <-stopChan: 221 | return 222 | } 223 | } 224 | } 225 | 226 | for i := 0; i < filter.MaxGenerations(); i++ { 227 | wg.Add(2) 228 | 229 | go addGeneration(rand.Int()) 230 | go queryGeneration() 231 | } 232 | 233 | wg.Wait() 234 | } 235 | } 236 | 237 | func TestEmptyItem(t *testing.T) { 238 | items := [][]byte{ 239 | {}, 240 | nil, 241 | } 242 | 243 | for i, item := range items { 244 | filter := apbf.New(3, 1, 10) 245 | 246 | filter.Add(item) 247 | 248 | if !filter.Query(item) { 249 | t.Errorf("Query returned false for item %d", i) 250 | } 251 | } 252 | } 253 | 254 | func TestCalculateFalsePositiveRate(t *testing.T) { 255 | epsilon := 1e-6 256 | items := []fpRate{ 257 | {4, 3, 0.100586}, 258 | {5, 7, 0.101603}, 259 | {7, 5, 0.011232}, 260 | {8, 8, 0.010244}, 261 | {10, 7, 0.001211}, 262 | {11, 9, 0.000918}, 263 | {14, 11, 0.000099}, 264 | {15, 15, 0.000100}, 265 | {17, 13, 0.00001}, 266 | {18, 16, 0.000009}, 267 | } 268 | 269 | for _, f := range items { 270 | actual := apbf.CalculateFalsePositiveRate(f.k, f.l) 271 | 272 | if math.Abs(actual-f.expected) > epsilon { 273 | t.Errorf("Wrong FP rate for k=%d, k=%d: expected=%v vs. actual=%v", f.k, f.l, f.expected, actual) 274 | } 275 | } 276 | } 277 | 278 | func TestRealFalsePositiveRate(t *testing.T) { 279 | epochMax := 1000 // max number of epochs to converge 280 | epochThreshold := 5 // epoch required for FP confirmation 281 | capacity := uint(1000) // target filter capacity 282 | 283 | getParams := func(k, l uint) newParams { 284 | return newParams{k, l, capacity / (l + 1)} 285 | } 286 | 287 | params := []newParams{ 288 | getParams(4, 3), 289 | getParams(5, 7), 290 | getParams(6, 14), 291 | getParams(7, 5), 292 | getParams(8, 8), 293 | getParams(9, 14), 294 | getParams(10, 7), 295 | getParams(11, 9), 296 | getParams(12, 14), 297 | getParams(14, 11), 298 | getParams(15, 15), 299 | getParams(16, 22), 300 | getParams(17, 13), 301 | getParams(18, 16), 302 | getParams(19, 22), 303 | } 304 | 305 | buff := make([]byte, 8) 306 | randItem := func(prefix byte) []byte { 307 | rand.Read(buff) 308 | buff[0] = prefix 309 | return buff 310 | } 311 | 312 | for _, p := range params { 313 | filter := p.New() 314 | expected := apbf.CalculateFalsePositiveRate(p.k, p.l) 315 | 316 | // a. fill the filter 317 | for i := 0; i < filter.MaxCapacity(); i++ { 318 | filter.Add(randItem(17)) 319 | } 320 | 321 | // b. query for items that were not added 322 | epochSize := int(1 / expected) 323 | epochCount := 0 324 | count := 0 325 | fpCount := 0 326 | 327 | for epoch := 0; epoch < epochMax && epochCount < epochThreshold; epoch++ { 328 | for i := 0; i < epochSize; i++ { 329 | if filter.Query(randItem(19)) { 330 | fpCount++ 331 | } 332 | } 333 | 334 | count += epochSize 335 | 336 | // c. compute real FP rate and compare 337 | actual := float64(fpCount) / float64(count) 338 | 339 | if actual <= expected { 340 | epochCount++ 341 | } else { 342 | epochCount = 0 343 | } 344 | } 345 | 346 | if epochCount < epochThreshold { 347 | overall := float64(fpCount) / float64(count) 348 | t.Errorf("Filter %s did not converge to expected FP rate %v vs. %v", p, expected, overall) 349 | } 350 | } 351 | } 352 | 353 | func TestSnapshot(t *testing.T) { 354 | params := []newParams{ 355 | {1, 1, 1}, 356 | {1, 1, 2}, 357 | {1, 1, 100}, 358 | {3, 1, 1}, 359 | {3, 1, 2}, 360 | {3, 1, 100}, 361 | {1, 3, 1}, 362 | {1, 3, 2}, 363 | {1, 3, 100}, 364 | } 365 | 366 | item := []byte("itemA") 367 | 368 | for _, p := range params { 369 | filter1 := p.New() 370 | filter1.Add(item) 371 | 372 | snapshot := filter1.Snapshot() 373 | 374 | filter2 := apbf.NewFromSnapshot(snapshot) 375 | 376 | if !filter2.Query(item) { 377 | t.Errorf("Query returned false for filter %s", p) 378 | } 379 | } 380 | } 381 | 382 | func TestInvalidSnapshot(t *testing.T) { 383 | filter := apbf.NewWithRefresh(4, 3, 1000, time.Minute) 384 | valid := filter.Snapshot() 385 | 386 | invalid := []apbf.Snapshot{ 387 | {}, 388 | func(s apbf.Snapshot) apbf.Snapshot { s.K = 0; return s }(valid), 389 | func(s apbf.Snapshot) apbf.Snapshot { s.K = s.K - 1; return s }(valid), 390 | func(s apbf.Snapshot) apbf.Snapshot { s.K = s.K + 1; return s }(valid), 391 | func(s apbf.Snapshot) apbf.Snapshot { s.L = 0; return s }(valid), 392 | func(s apbf.Snapshot) apbf.Snapshot { s.L = s.L - 1; return s }(valid), 393 | func(s apbf.Snapshot) apbf.Snapshot { s.L = s.L + 1; return s }(valid), 394 | func(s apbf.Snapshot) apbf.Snapshot { s.G = 0; return s }(valid), 395 | func(s apbf.Snapshot) apbf.Snapshot { s.G = s.G - 1; return s }(valid), 396 | func(s apbf.Snapshot) apbf.Snapshot { s.G = s.G + 1; return s }(valid), 397 | func(s apbf.Snapshot) apbf.Snapshot { s.Base = 0; return s }(valid), 398 | func(s apbf.Snapshot) apbf.Snapshot { s.Base = s.K + s.L + 1; return s }(valid), 399 | func(s apbf.Snapshot) apbf.Snapshot { s.Count = s.G + 1; return s }(valid), 400 | func(s apbf.Snapshot) apbf.Snapshot { s.Buffer = nil; return s }(valid), 401 | func(s apbf.Snapshot) apbf.Snapshot { s.Buffer = []byte{}; return s }(valid), 402 | func(s apbf.Snapshot) apbf.Snapshot { s.Buffer = s.Buffer[:len(s.Buffer)-1]; return s }(valid), 403 | func(s apbf.Snapshot) apbf.Snapshot { s.Buffer = append(s.Buffer, 1); return s }(valid), 404 | } 405 | 406 | callNew := func(s apbf.Snapshot) (didPanic bool) { 407 | defer func() { 408 | didPanic = recover() != nil 409 | }() 410 | 411 | apbf.NewFromSnapshot(s) 412 | return 413 | } 414 | 415 | for i, s := range invalid { 416 | if !callNew(s) { 417 | t.Errorf("Unexpected result for snapshot at index %d", i) 418 | } 419 | } 420 | } 421 | 422 | func TestInvalidParams(t *testing.T) { 423 | invalid := []newWithRefreshParams{ 424 | {}, 425 | {0, 3, 100, 0}, 426 | {3, 0, 100, 0}, 427 | {3, 3, 0, 0}, 428 | {3, 3, 100, -1}, 429 | } 430 | 431 | callNew := func(p newWithRefreshParams) (didPanic bool) { 432 | defer func() { 433 | didPanic = recover() != nil 434 | }() 435 | 436 | p.New() 437 | return 438 | } 439 | 440 | for _, p := range invalid { 441 | if !callNew(p) { 442 | t.Errorf("Unexpected result for params %s", p) 443 | } 444 | } 445 | } 446 | 447 | func BenchmarkSmallFilterAdd(b *testing.B) { 448 | filter := apbf.New(3, 3, 1000) 449 | benchmarkAdd(b, filter) 450 | } 451 | 452 | func BenchmarkSmallFilterAddWithRefresh(b *testing.B) { 453 | filter := apbf.NewWithRefresh(3, 3, 1000, time.Minute) 454 | benchmarkAdd(b, filter) 455 | } 456 | 457 | func BenchmarkSmallFilterQuery(b *testing.B) { 458 | filter := apbf.New(3, 3, 1000) 459 | benchmarkQuery(b, filter) 460 | } 461 | 462 | func BenchmarkSmallFilterQueryWithRefresh(b *testing.B) { 463 | filter := apbf.NewWithRefresh(3, 3, 1000, time.Minute) 464 | benchmarkQuery(b, filter) 465 | } 466 | 467 | func BenchmarkLargeFilterAdd(b *testing.B) { 468 | filter := apbf.New(15, 15, 100000) 469 | benchmarkAdd(b, filter) 470 | } 471 | 472 | func BenchmarkLargeFilterAddWithRefresh(b *testing.B) { 473 | filter := apbf.NewWithRefresh(15, 15, 100000, time.Minute) 474 | benchmarkAdd(b, filter) 475 | } 476 | 477 | func BenchmarkLargeFilterQuery(b *testing.B) { 478 | filter := apbf.New(15, 15, 100000) 479 | benchmarkQuery(b, filter) 480 | } 481 | 482 | func BenchmarkLargeFilterQueryWithRefresh(b *testing.B) { 483 | filter := apbf.NewWithRefresh(15, 15, 100000, time.Minute) 484 | benchmarkQuery(b, filter) 485 | } 486 | 487 | func benchmarkAdd(b *testing.B, filter *apbf.Filter) { 488 | b.ResetTimer() 489 | for i := 0; i < b.N; i++ { 490 | filter.Add(getItem(i)) 491 | } 492 | } 493 | 494 | func benchmarkQuery(b *testing.B, filter *apbf.Filter) { 495 | // fill the filter 496 | for i := 0; i < filter.MaxCapacity(); i++ { 497 | filter.Add(getItem(i)) 498 | } 499 | 500 | b.ResetTimer() 501 | for i := 0; i < b.N; i++ { 502 | filter.Query(getItem(i)) 503 | } 504 | } 505 | 506 | func (p newParams) New() *apbf.Filter { 507 | return apbf.New(p.k, p.l, p.g) 508 | } 509 | 510 | func (p newParams) String() string { 511 | return fmt.Sprintf("k=%d, l=%d, g=%d", p.k, p.l, p.g) 512 | } 513 | 514 | func (p newWithRefreshParams) New() *apbf.Filter { 515 | return apbf.NewWithRefresh(p.k, p.l, p.g, p.r) 516 | } 517 | 518 | func (p newWithRefreshParams) String() string { 519 | return fmt.Sprintf("k=%d, l=%d, g=%d, r=%s", p.k, p.l, p.g, p.r) 520 | } 521 | 522 | func getItem(i int) []byte { 523 | return testItems[i%testItemsCount] 524 | } 525 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/CrowdStrike/apbf 2 | 3 | go 1.12 4 | 5 | require ( 6 | github.com/gogo/protobuf v1.3.2 7 | github.com/spaolacci/murmur3 v1.1.0 8 | ) 9 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 2 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 3 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= 4 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 5 | github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= 6 | github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 7 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 8 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 9 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 10 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 11 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 12 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 13 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 14 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 15 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 16 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 17 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 18 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 19 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 20 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 21 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 22 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 23 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 24 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 25 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 26 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 27 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 28 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= 29 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 30 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 31 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 32 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 33 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 34 | -------------------------------------------------------------------------------- /snapshot.pb.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-gogo. DO NOT EDIT. 2 | // source: snapshot.proto 3 | 4 | package apbf 5 | 6 | import ( 7 | fmt "fmt" 8 | proto "github.com/gogo/protobuf/proto" 9 | math "math" 10 | ) 11 | 12 | // Reference imports to suppress errors if they are not otherwise used. 13 | var _ = proto.Marshal 14 | var _ = fmt.Errorf 15 | var _ = math.Inf 16 | 17 | // This is a compile-time assertion to ensure that this generated file 18 | // is compatible with the proto package it is being compiled against. 19 | // A compilation error at this line likely means your copy of the 20 | // proto package needs to be updated. 21 | const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package 22 | 23 | type Snapshot struct { 24 | K uint64 `protobuf:"varint,1,opt,name=k,proto3" json:"k,omitempty"` 25 | L uint64 `protobuf:"varint,2,opt,name=l,proto3" json:"l,omitempty"` 26 | G uint64 `protobuf:"varint,3,opt,name=g,proto3" json:"g,omitempty"` 27 | R uint64 `protobuf:"varint,4,opt,name=r,proto3" json:"r,omitempty"` 28 | Base uint64 `protobuf:"varint,5,opt,name=base,proto3" json:"base,omitempty"` 29 | Count uint64 `protobuf:"varint,6,opt,name=count,proto3" json:"count,omitempty"` 30 | Buffer []byte `protobuf:"bytes,7,opt,name=buffer,proto3" json:"buffer,omitempty"` 31 | XXX_NoUnkeyedLiteral struct{} `json:"-"` 32 | XXX_unrecognized []byte `json:"-"` 33 | XXX_sizecache int32 `json:"-"` 34 | } 35 | 36 | func (m *Snapshot) Reset() { *m = Snapshot{} } 37 | func (m *Snapshot) String() string { return proto.CompactTextString(m) } 38 | func (*Snapshot) ProtoMessage() {} 39 | func (*Snapshot) Descriptor() ([]byte, []int) { 40 | return fileDescriptor_0c8aab8e59648e0b, []int{0} 41 | } 42 | func (m *Snapshot) XXX_Unmarshal(b []byte) error { 43 | return xxx_messageInfo_Snapshot.Unmarshal(m, b) 44 | } 45 | func (m *Snapshot) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { 46 | return xxx_messageInfo_Snapshot.Marshal(b, m, deterministic) 47 | } 48 | func (m *Snapshot) XXX_Merge(src proto.Message) { 49 | xxx_messageInfo_Snapshot.Merge(m, src) 50 | } 51 | func (m *Snapshot) XXX_Size() int { 52 | return xxx_messageInfo_Snapshot.Size(m) 53 | } 54 | func (m *Snapshot) XXX_DiscardUnknown() { 55 | xxx_messageInfo_Snapshot.DiscardUnknown(m) 56 | } 57 | 58 | var xxx_messageInfo_Snapshot proto.InternalMessageInfo 59 | 60 | func (m *Snapshot) GetK() uint64 { 61 | if m != nil { 62 | return m.K 63 | } 64 | return 0 65 | } 66 | 67 | func (m *Snapshot) GetL() uint64 { 68 | if m != nil { 69 | return m.L 70 | } 71 | return 0 72 | } 73 | 74 | func (m *Snapshot) GetG() uint64 { 75 | if m != nil { 76 | return m.G 77 | } 78 | return 0 79 | } 80 | 81 | func (m *Snapshot) GetR() uint64 { 82 | if m != nil { 83 | return m.R 84 | } 85 | return 0 86 | } 87 | 88 | func (m *Snapshot) GetBase() uint64 { 89 | if m != nil { 90 | return m.Base 91 | } 92 | return 0 93 | } 94 | 95 | func (m *Snapshot) GetCount() uint64 { 96 | if m != nil { 97 | return m.Count 98 | } 99 | return 0 100 | } 101 | 102 | func (m *Snapshot) GetBuffer() []byte { 103 | if m != nil { 104 | return m.Buffer 105 | } 106 | return nil 107 | } 108 | 109 | func init() { 110 | proto.RegisterType((*Snapshot)(nil), "apbf.Snapshot") 111 | } 112 | 113 | func init() { proto.RegisterFile("snapshot.proto", fileDescriptor_0c8aab8e59648e0b) } 114 | 115 | var fileDescriptor_0c8aab8e59648e0b = []byte{ 116 | // 143 bytes of a gzipped FileDescriptorProto 117 | 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x2b, 0xce, 0x4b, 0x2c, 118 | 0x28, 0xce, 0xc8, 0x2f, 0xd1, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0x49, 0x2c, 0x48, 0x4a, 119 | 0x53, 0x6a, 0x61, 0xe4, 0xe2, 0x08, 0x86, 0x4a, 0x08, 0xf1, 0x70, 0x31, 0x66, 0x4b, 0x30, 0x2a, 120 | 0x30, 0x6a, 0xb0, 0x04, 0x31, 0x66, 0x83, 0x78, 0x39, 0x12, 0x4c, 0x10, 0x5e, 0x0e, 0x88, 0x97, 121 | 0x2e, 0xc1, 0x0c, 0xe1, 0xa5, 0x83, 0x78, 0x45, 0x12, 0x2c, 0x10, 0x5e, 0x91, 0x90, 0x10, 0x17, 122 | 0x4b, 0x52, 0x62, 0x71, 0xaa, 0x04, 0x2b, 0x58, 0x00, 0xcc, 0x16, 0x12, 0xe1, 0x62, 0x4d, 0xce, 123 | 0x2f, 0xcd, 0x2b, 0x91, 0x60, 0x03, 0x0b, 0x42, 0x38, 0x42, 0x62, 0x5c, 0x6c, 0x49, 0xa5, 0x69, 124 | 0x69, 0xa9, 0x45, 0x12, 0xec, 0x0a, 0x8c, 0x1a, 0x3c, 0x41, 0x50, 0x5e, 0x12, 0x1b, 0xd8, 0x4d, 125 | 0xc6, 0x80, 0x00, 0x00, 0x00, 0xff, 0xff, 0x0b, 0x4f, 0x6d, 0xfc, 0xa5, 0x00, 0x00, 0x00, 126 | } 127 | -------------------------------------------------------------------------------- /snapshot.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2020 CrowdStrike Holdings, Inc. 2 | // 3 | // Use of this source code is governed by the MIT License. 4 | 5 | syntax = "proto3"; 6 | 7 | package apbf; 8 | 9 | message Snapshot { 10 | uint64 k = 1; 11 | uint64 l = 2; 12 | uint64 g = 3; 13 | uint64 r = 4; 14 | uint64 base = 5; 15 | uint64 count = 6; 16 | bytes buffer = 7; 17 | } 18 | --------------------------------------------------------------------------------