├── .gitignore ├── LICENSE ├── README.md ├── bucket.go ├── cuckoofilter.go ├── cuckoofilter_test.go ├── doc.go ├── go.mod ├── scalable_cuckoofilter.go ├── scalable_cuckoofilter_test.go ├── util.go └── util_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | *.test 24 | *.prof 25 | 26 | .idea 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Seif Lotfy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cuckoo Filter 2 | 3 | [![GoDoc](https://godoc.org/github.com/seiflotfy/cuckoofilter?status.svg)](https://godoc.org/github.com/seiflotfy/cuckoofilter) [![CodeHunt.io](https://img.shields.io/badge/vote-codehunt.io-02AFD1.svg)](http://codehunt.io/sub/cuckoo-filter/?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) 4 | 5 | Cuckoo filter is a Bloom filter replacement for approximated set-membership queries. While Bloom filters are well-known space-efficient data structures to serve queries like "if item x is in a set?", they do not support deletion. Their variances to enable deletion (like counting Bloom filters) usually require much more space. 6 | 7 | Cuckoo filters provide the flexibility to add and remove items dynamically. A cuckoo filter is based on cuckoo hashing (and therefore named as cuckoo filter). It is essentially a cuckoo hash table storing each key's fingerprint. Cuckoo hash tables can be highly compact, thus a cuckoo filter could use less space than conventional Bloom filters, for applications that require low false positive rates (< 3%). 8 | 9 | For details about the algorithm and citations please use this article for now 10 | 11 | ["Cuckoo Filter: Better Than Bloom" by Bin Fan, Dave Andersen and Michael Kaminsky](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) 12 | 13 | ## Implementation details 14 | 15 | The paper cited above leaves several parameters to choose. In this implementation 16 | 17 | 1. Every element has 2 possible bucket indices 18 | 2. Buckets have a static size of 4 fingerprints 19 | 3. Fingerprints have a static size of 8 bits 20 | 21 | 1 and 2 are suggested to be the optimum by the authors. The choice of 3 comes down to the desired false positive rate. Given a target false positive rate of `r` and a bucket size `b`, they suggest choosing the fingerprint size `f` using 22 | 23 | f >= log2(2b/r) bits 24 | 25 | With the 8 bit fingerprint size in this repository, you can expect `r ~= 0.03`. 26 | [Other implementations](https://github.com/panmari/cuckoofilter) use 16 bit, which correspond to a false positive rate of `r ~= 0.0001`. 27 | 28 | ## Example usage: 29 | ```go 30 | package main 31 | 32 | import "fmt" 33 | import cuckoo "github.com/seiflotfy/cuckoofilter" 34 | 35 | func main() { 36 | cf := cuckoo.NewFilter(1000) 37 | cf.InsertUnique([]byte("geeky ogre")) 38 | 39 | // Lookup a string (and it a miss) if it exists in the cuckoofilter 40 | cf.Lookup([]byte("hello")) 41 | 42 | count := cf.Count() 43 | fmt.Println(count) // count == 1 44 | 45 | // Delete a string (and it a miss) 46 | cf.Delete([]byte("hello")) 47 | 48 | count = cf.Count() 49 | fmt.Println(count) // count == 1 50 | 51 | // Delete a string (a hit) 52 | cf.Delete([]byte("geeky ogre")) 53 | 54 | count = cf.Count() 55 | fmt.Println(count) // count == 0 56 | 57 | cf.Reset() // reset 58 | } 59 | ``` 60 | 61 | ## Documentation: 62 | ["Cuckoo Filter on GoDoc"](http://godoc.org/github.com/seiflotfy/cuckoofilter) 63 | -------------------------------------------------------------------------------- /bucket.go: -------------------------------------------------------------------------------- 1 | package cuckoo 2 | 3 | type fingerprint byte 4 | 5 | type bucket [bucketSize]fingerprint 6 | 7 | const ( 8 | nullFp = 0 9 | bucketSize = 4 10 | ) 11 | 12 | func (b *bucket) insert(fp fingerprint) bool { 13 | for i, tfp := range b { 14 | if tfp == nullFp { 15 | b[i] = fp 16 | return true 17 | } 18 | } 19 | return false 20 | } 21 | 22 | func (b *bucket) delete(fp fingerprint) bool { 23 | for i, tfp := range b { 24 | if tfp == fp { 25 | b[i] = nullFp 26 | return true 27 | } 28 | } 29 | return false 30 | } 31 | 32 | func (b *bucket) getFingerprintIndex(fp fingerprint) int { 33 | for i, tfp := range b { 34 | if tfp == fp { 35 | return i 36 | } 37 | } 38 | return -1 39 | } 40 | 41 | func (b *bucket) reset() { 42 | for i := range b { 43 | b[i] = nullFp 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /cuckoofilter.go: -------------------------------------------------------------------------------- 1 | package cuckoo 2 | 3 | import ( 4 | "fmt" 5 | "math/bits" 6 | "math/rand" 7 | ) 8 | 9 | const maxCuckooCount = 500 10 | 11 | // Filter is a probabilistic counter 12 | type Filter struct { 13 | buckets []bucket 14 | count uint 15 | bucketPow uint 16 | } 17 | 18 | // NewFilter returns a new cuckoofilter with a given capacity. 19 | // A capacity of 1000000 is a normal default, which allocates 20 | // about ~1MB on 64-bit machines. 21 | func NewFilter(capacity uint) *Filter { 22 | capacity = getNextPow2(uint64(capacity)) / bucketSize 23 | if capacity == 0 { 24 | capacity = 1 25 | } 26 | buckets := make([]bucket, capacity) 27 | return &Filter{ 28 | buckets: buckets, 29 | count: 0, 30 | bucketPow: uint(bits.TrailingZeros(capacity)), 31 | } 32 | } 33 | 34 | // Lookup returns true if data is in the counter 35 | func (cf *Filter) Lookup(data []byte) bool { 36 | i1, fp := getIndexAndFingerprint(data, cf.bucketPow) 37 | if cf.buckets[i1].getFingerprintIndex(fp) > -1 { 38 | return true 39 | } 40 | i2 := getAltIndex(fp, i1, cf.bucketPow) 41 | return cf.buckets[i2].getFingerprintIndex(fp) > -1 42 | } 43 | 44 | // Reset ... 45 | func (cf *Filter) Reset() { 46 | for i := range cf.buckets { 47 | cf.buckets[i].reset() 48 | } 49 | cf.count = 0 50 | } 51 | 52 | func randi(i1, i2 uint) uint { 53 | if rand.Intn(2) == 0 { 54 | return i1 55 | } 56 | return i2 57 | } 58 | 59 | // Insert inserts data into the counter and returns true upon success 60 | func (cf *Filter) Insert(data []byte) bool { 61 | i1, fp := getIndexAndFingerprint(data, cf.bucketPow) 62 | if cf.insert(fp, i1) { 63 | return true 64 | } 65 | i2 := getAltIndex(fp, i1, cf.bucketPow) 66 | if cf.insert(fp, i2) { 67 | return true 68 | } 69 | return cf.reinsert(fp, randi(i1, i2)) 70 | } 71 | 72 | // InsertUnique inserts data into the counter if not exists and returns true upon success 73 | func (cf *Filter) InsertUnique(data []byte) bool { 74 | if cf.Lookup(data) { 75 | return false 76 | } 77 | return cf.Insert(data) 78 | } 79 | 80 | func (cf *Filter) insert(fp fingerprint, i uint) bool { 81 | if cf.buckets[i].insert(fp) { 82 | cf.count++ 83 | return true 84 | } 85 | return false 86 | } 87 | 88 | func (cf *Filter) reinsert(fp fingerprint, i uint) bool { 89 | for k := 0; k < maxCuckooCount; k++ { 90 | j := rand.Intn(bucketSize) 91 | oldfp := fp 92 | fp = cf.buckets[i][j] 93 | cf.buckets[i][j] = oldfp 94 | 95 | // look in the alternate location for that random element 96 | i = getAltIndex(fp, i, cf.bucketPow) 97 | if cf.insert(fp, i) { 98 | return true 99 | } 100 | } 101 | return false 102 | } 103 | 104 | // Delete data from counter if exists and return if deleted or not 105 | func (cf *Filter) Delete(data []byte) bool { 106 | i1, fp := getIndexAndFingerprint(data, cf.bucketPow) 107 | if cf.delete(fp, i1) { 108 | return true 109 | } 110 | i2 := getAltIndex(fp, i1, cf.bucketPow) 111 | return cf.delete(fp, i2) 112 | } 113 | 114 | func (cf *Filter) delete(fp fingerprint, i uint) bool { 115 | if cf.buckets[i].delete(fp) { 116 | if cf.count > 0 { 117 | cf.count-- 118 | } 119 | return true 120 | } 121 | return false 122 | } 123 | 124 | // Count returns the number of items in the counter 125 | func (cf *Filter) Count() uint { 126 | return cf.count 127 | } 128 | 129 | // Encode returns a byte slice representing a Cuckoofilter 130 | func (cf *Filter) Encode() []byte { 131 | bytes := make([]byte, len(cf.buckets)*bucketSize) 132 | for i, b := range cf.buckets { 133 | for j, f := range b { 134 | index := (i * len(b)) + j 135 | bytes[index] = byte(f) 136 | } 137 | } 138 | return bytes 139 | } 140 | 141 | // Decode returns a Cuckoofilter from a byte slice 142 | func Decode(bytes []byte) (*Filter, error) { 143 | var count uint 144 | if len(bytes)%bucketSize != 0 { 145 | return nil, fmt.Errorf("expected bytes to be multiple of %d, got %d", bucketSize, len(bytes)) 146 | } 147 | if len(bytes) == 0 { 148 | return nil, fmt.Errorf("bytes can not be empty") 149 | } 150 | buckets := make([]bucket, len(bytes)/4) 151 | for i, b := range buckets { 152 | for j := range b { 153 | index := (i * len(b)) + j 154 | if bytes[index] != 0 { 155 | buckets[i][j] = fingerprint(bytes[index]) 156 | count++ 157 | } 158 | } 159 | } 160 | return &Filter{ 161 | buckets: buckets, 162 | count: count, 163 | bucketPow: uint(bits.TrailingZeros(uint(len(buckets)))), 164 | }, nil 165 | } 166 | -------------------------------------------------------------------------------- /cuckoofilter_test.go: -------------------------------------------------------------------------------- 1 | package cuckoo 2 | 3 | import ( 4 | "bufio" 5 | "crypto/rand" 6 | "io" 7 | "os" 8 | "reflect" 9 | "testing" 10 | ) 11 | 12 | func TestInsertion(t *testing.T) { 13 | cf := NewFilter(1000000) 14 | fd, err := os.Open("/usr/share/dict/words") 15 | if err != nil { 16 | panic(err) 17 | } 18 | scanner := bufio.NewScanner(fd) 19 | 20 | var values [][]byte 21 | var lineCount uint 22 | for scanner.Scan() { 23 | s := []byte(scanner.Text()) 24 | if cf.InsertUnique(s) { 25 | lineCount++ 26 | } 27 | values = append(values, s) 28 | } 29 | 30 | count := cf.Count() 31 | if count != lineCount { 32 | t.Errorf("Expected count = %d, instead count = %d", lineCount, count) 33 | } 34 | 35 | for _, v := range values { 36 | cf.Delete(v) 37 | } 38 | 39 | count = cf.Count() 40 | if count != 0 { 41 | t.Errorf("Expected count = 0, instead count == %d", count) 42 | } 43 | } 44 | 45 | func TestEncodeDecode(t *testing.T) { 46 | cf := NewFilter(8) 47 | cf.buckets = []bucket{ 48 | [4]fingerprint{1, 2, 3, 4}, 49 | [4]fingerprint{5, 6, 7, 8}, 50 | } 51 | cf.count = 8 52 | bytes := cf.Encode() 53 | ncf, err := Decode(bytes) 54 | if err != nil { 55 | t.Errorf("Expected no error, got %v", err) 56 | } 57 | if !reflect.DeepEqual(cf, ncf) { 58 | t.Errorf("Expected %v, got %v", cf, ncf) 59 | } 60 | } 61 | 62 | func TestDecode(t *testing.T) { 63 | ncf, err := Decode([]byte("")) 64 | if err == nil { 65 | t.Errorf("Expected err, got nil") 66 | } 67 | if ncf != nil { 68 | t.Errorf("Expected nil, got %v", ncf) 69 | } 70 | } 71 | 72 | func BenchmarkFilter_Reset(b *testing.B) { 73 | const cap = 10000 74 | filter := NewFilter(cap) 75 | 76 | b.ResetTimer() 77 | 78 | for i := 0; i < b.N; i++ { 79 | filter.Reset() 80 | } 81 | } 82 | 83 | func BenchmarkFilter_Insert(b *testing.B) { 84 | const cap = 10000 85 | filter := NewFilter(cap) 86 | 87 | b.ResetTimer() 88 | 89 | var hash [32]byte 90 | for i := 0; i < b.N; i++ { 91 | io.ReadFull(rand.Reader, hash[:]) 92 | filter.Insert(hash[:]) 93 | } 94 | } 95 | 96 | func BenchmarkFilter_Lookup(b *testing.B) { 97 | const cap = 10000 98 | filter := NewFilter(cap) 99 | 100 | var hash [32]byte 101 | for i := 0; i < 10000; i++ { 102 | io.ReadFull(rand.Reader, hash[:]) 103 | filter.Insert(hash[:]) 104 | } 105 | 106 | b.ResetTimer() 107 | for i := 0; i < b.N; i++ { 108 | io.ReadFull(rand.Reader, hash[:]) 109 | filter.Lookup(hash[:]) 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Permission is hereby granted, free of charge, to any person obtaining a copy 3 | of this software and associated documentation files (the "Software"), to deal 4 | in the Software without restriction, including without limitation the rights 5 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 6 | copies of the Software, and to permit persons to whom the Software is 7 | furnished to do so, subject to the following conditions: 8 | 9 | The above copyright notice and this permission notice shall be included in all 10 | copies or substantial portions of the Software. 11 | 12 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 | SOFTWARE. 19 | */ 20 | 21 | /* 22 | Package cuckoo provides a Cuckoo Filter, a Bloom filter replacement for approximated set-membership queries. 23 | 24 | While Bloom filters are well-known space-efficient data structures to serve queries like "if item x is in a set?", they do not support deletion. Their variances to enable deletion (like counting Bloom filters) usually require much more space. 25 | 26 | Cuckoo filters provide the flexibility to add and remove items dynamically. A cuckoo filter is based on cuckoo hashing (and therefore named as cuckoo filter). It is essentially a cuckoo hash table storing each key's fingerprint. Cuckoo hash tables can be highly compact, thus a cuckoo filter could use less space than conventional Bloom filters, for applications that require low false positive rates (< 3%). 27 | 28 | For details about the algorithm and citations please use this article: 29 | 30 | "Cuckoo Filter: Better Than Bloom" by Bin Fan, Dave Andersen and Michael Kaminsky 31 | (https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf) 32 | 33 | Note: 34 | This implementation uses a a static bucket size of 4 fingerprints and a fingerprint size of 1 byte based on my understanding of an optimal bucket/fingerprint/size ratio from the aforementioned paper.*/ 35 | package cuckoo 36 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/seiflotfy/cuckoofilter 2 | 3 | go 1.15 4 | 5 | require ( 6 | github.com/davecgh/go-spew v1.1.1 // indirect 7 | github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 8 | github.com/stretchr/testify v1.6.1 9 | gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c // indirect 10 | ) 11 | -------------------------------------------------------------------------------- /scalable_cuckoofilter.go: -------------------------------------------------------------------------------- 1 | package cuckoo 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | ) 7 | 8 | const ( 9 | DefaultLoadFactor = 0.9 10 | DefaultCapacity = 10000 11 | ) 12 | 13 | type ScalableCuckooFilter struct { 14 | filters []*Filter 15 | loadFactor float32 16 | //when scale(last filter size * loadFactor >= capacity) get new filter capacity 17 | scaleFactor func(capacity uint) uint 18 | } 19 | 20 | type option func(*ScalableCuckooFilter) 21 | 22 | type Store struct { 23 | Bytes [][]byte 24 | LoadFactor float32 25 | } 26 | 27 | /* 28 | by default option the grow capacity is: 29 | capacity , total 30 | 4096 4096 31 | 8192 12288 32 | 16384 28672 33 | 32768 61440 34 | 65536 126,976 35 | */ 36 | func NewScalableCuckooFilter(opts ...option) *ScalableCuckooFilter { 37 | sfilter := new(ScalableCuckooFilter) 38 | for _, opt := range opts { 39 | opt(sfilter) 40 | } 41 | configure(sfilter) 42 | return sfilter 43 | } 44 | 45 | func (sf *ScalableCuckooFilter) Lookup(data []byte) bool { 46 | for _, filter := range sf.filters { 47 | if filter.Lookup(data) { 48 | return true 49 | } 50 | } 51 | return false 52 | } 53 | 54 | func (sf *ScalableCuckooFilter) Reset() { 55 | for _, filter := range sf.filters { 56 | filter.Reset() 57 | } 58 | } 59 | 60 | func (sf *ScalableCuckooFilter) Insert(data []byte) bool { 61 | needScale := false 62 | lastFilter := sf.filters[len(sf.filters)-1] 63 | if (float32(lastFilter.count) / float32(len(lastFilter.buckets))) > sf.loadFactor { 64 | needScale = true 65 | } else { 66 | b := lastFilter.Insert(data) 67 | needScale = !b 68 | } 69 | if !needScale { 70 | return true 71 | } 72 | newFilter := NewFilter(sf.scaleFactor(uint(len(lastFilter.buckets)))) 73 | sf.filters = append(sf.filters, newFilter) 74 | return newFilter.Insert(data) 75 | } 76 | 77 | func (sf *ScalableCuckooFilter) InsertUnique(data []byte) bool { 78 | if sf.Lookup(data) { 79 | return false 80 | } 81 | return sf.Insert(data) 82 | } 83 | 84 | func (sf *ScalableCuckooFilter) Delete(data []byte) bool { 85 | for _, filter := range sf.filters { 86 | if filter.Delete(data) { 87 | return true 88 | } 89 | } 90 | return false 91 | } 92 | 93 | func (sf *ScalableCuckooFilter) Count() uint { 94 | var sum uint 95 | for _, filter := range sf.filters { 96 | sum += filter.count 97 | } 98 | return sum 99 | 100 | } 101 | 102 | func (sf *ScalableCuckooFilter) Encode() []byte { 103 | slice := make([][]byte, len(sf.filters)) 104 | for i, filter := range sf.filters { 105 | encode := filter.Encode() 106 | slice[i] = encode 107 | } 108 | store := &Store{ 109 | Bytes: slice, 110 | LoadFactor: sf.loadFactor, 111 | } 112 | buf := bytes.NewBuffer(nil) 113 | enc := gob.NewEncoder(buf) 114 | err := enc.Encode(store) 115 | if err != nil { 116 | return nil 117 | } 118 | return buf.Bytes() 119 | } 120 | 121 | func (sf *ScalableCuckooFilter) DecodeWithParam(fBytes []byte, opts ...option) (*ScalableCuckooFilter, error) { 122 | instance, err := DecodeScalableFilter(fBytes) 123 | if err != nil { 124 | return nil, err 125 | } 126 | for _, opt := range opts { 127 | opt(instance) 128 | } 129 | return instance, nil 130 | } 131 | 132 | func DecodeScalableFilter(fBytes []byte) (*ScalableCuckooFilter, error) { 133 | buf := bytes.NewBuffer(fBytes) 134 | dec := gob.NewDecoder(buf) 135 | store := &Store{} 136 | err := dec.Decode(store) 137 | if err != nil { 138 | return nil, err 139 | } 140 | filterSize := len(store.Bytes) 141 | instance := NewScalableCuckooFilter(func(filter *ScalableCuckooFilter) { 142 | filter.filters = make([]*Filter, filterSize) 143 | }, func(filter *ScalableCuckooFilter) { 144 | filter.loadFactor = store.LoadFactor 145 | }) 146 | for i, oneBytes := range store.Bytes { 147 | filter, err := Decode(oneBytes) 148 | if err != nil { 149 | return nil, err 150 | } 151 | instance.filters[i] = filter 152 | } 153 | return instance, nil 154 | 155 | } 156 | 157 | func configure(sfilter *ScalableCuckooFilter) { 158 | if sfilter.loadFactor == 0 { 159 | sfilter.loadFactor = DefaultLoadFactor 160 | } 161 | if sfilter.scaleFactor == nil { 162 | sfilter.scaleFactor = func(currentSize uint) uint { 163 | return currentSize * bucketSize * 2 164 | } 165 | } 166 | if sfilter.filters == nil { 167 | initFilter := NewFilter(DefaultCapacity) 168 | sfilter.filters = []*Filter{initFilter} 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /scalable_cuckoofilter_test.go: -------------------------------------------------------------------------------- 1 | package cuckoo 2 | 3 | import ( 4 | "strconv" 5 | "testing" 6 | ) 7 | import "github.com/stretchr/testify/assert" 8 | 9 | func TestNormalUse(t *testing.T) { 10 | filter := NewScalableCuckooFilter() 11 | for i := 0; i < 100000; i++ { 12 | filter.Insert([]byte("NewScalableCuckooFilter_" + strconv.Itoa(i))) 13 | } 14 | testStr := []byte("NewScalableCuckooFilter") 15 | b := filter.Insert(testStr) 16 | assert.True(t, b) 17 | b = filter.Lookup(testStr) 18 | assert.True(t, b) 19 | b = filter.Delete(testStr) 20 | assert.True(t, b) 21 | b = filter.Lookup(testStr) 22 | assert.False(t, b) 23 | b = filter.Lookup([]byte("NewScalableCuckooFilter_233")) 24 | assert.True(t, b) 25 | b = filter.InsertUnique([]byte("NewScalableCuckooFilter_599")) 26 | assert.False(t, b) 27 | } 28 | 29 | func TestScalableCuckooFilter_DecodeEncode(t *testing.T) { 30 | filter := NewScalableCuckooFilter(func(filter *ScalableCuckooFilter) { 31 | filter.loadFactor = 0.8 32 | }) 33 | for i := 0; i < 100000; i++ { 34 | filter.Insert([]byte("NewScalableCuckooFilter_" + strconv.Itoa(i))) 35 | } 36 | bytes := filter.Encode() 37 | decodeFilter, err := DecodeScalableFilter(bytes) 38 | assert.Nil(t, err) 39 | assert.Equal(t, decodeFilter.loadFactor, float32(0.8)) 40 | b := decodeFilter.Lookup([]byte("NewScalableCuckooFilter_233")) 41 | assert.True(t, b) 42 | for i, f := range decodeFilter.filters { 43 | assert.Equal(t, f.count, filter.filters[i].count) 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package cuckoo 2 | 3 | import ( 4 | metro "github.com/dgryski/go-metro" 5 | ) 6 | 7 | var ( 8 | altHash = [256]uint{} 9 | masks = [65]uint{} 10 | ) 11 | 12 | func init() { 13 | for i := 0; i < 256; i++ { 14 | altHash[i] = (uint(metro.Hash64([]byte{byte(i)}, 1337))) 15 | } 16 | for i := uint(0); i <= 64; i++ { 17 | masks[i] = (1 << i) - 1 18 | } 19 | } 20 | 21 | func getAltIndex(fp fingerprint, i uint, bucketPow uint) uint { 22 | mask := masks[bucketPow] 23 | hash := altHash[fp] & mask 24 | return (i & mask) ^ hash 25 | } 26 | 27 | func getFingerprint(hash uint64) byte { 28 | // Use least significant bits for fingerprint. 29 | fp := byte(hash%255 + 1) 30 | return fp 31 | } 32 | 33 | // getIndicesAndFingerprint returns the 2 bucket indices and fingerprint to be used 34 | func getIndexAndFingerprint(data []byte, bucketPow uint) (uint, fingerprint) { 35 | hash := defaultHasher.Hash64(data) 36 | fp := getFingerprint(hash) 37 | // Use most significant bits for deriving index. 38 | i1 := uint(hash>>32) & masks[bucketPow] 39 | return i1, fingerprint(fp) 40 | } 41 | 42 | func getNextPow2(n uint64) uint { 43 | n-- 44 | n |= n >> 1 45 | n |= n >> 2 46 | n |= n >> 4 47 | n |= n >> 8 48 | n |= n >> 16 49 | n |= n >> 32 50 | n++ 51 | return uint(n) 52 | } 53 | 54 | var defaultHasher Hasher = new(metrotHasher) 55 | 56 | func SetDefaultHasher(hasher Hasher) { 57 | defaultHasher = hasher 58 | } 59 | 60 | type Hasher interface { 61 | Hash64([]byte) uint64 62 | } 63 | 64 | var _ Hasher = new(metrotHasher) 65 | 66 | type metrotHasher struct{} 67 | 68 | func (h *metrotHasher) Hash64(data []byte) uint64 { 69 | hash := metro.Hash64(data, 1337) 70 | return hash 71 | } 72 | -------------------------------------------------------------------------------- /util_test.go: -------------------------------------------------------------------------------- 1 | package cuckoo 2 | 3 | import ( 4 | "crypto/rand" 5 | "io" 6 | "math/bits" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestIndexAndFP(t *testing.T) { 13 | data := []byte("seif") 14 | bucketPow := uint(bits.TrailingZeros(1024)) 15 | i1, fp := getIndexAndFingerprint(data, bucketPow) 16 | i2 := getAltIndex(fp, i1, bucketPow) 17 | i11 := getAltIndex(fp, i2, bucketPow) 18 | i22 := getAltIndex(fp, i11, bucketPow) 19 | assert.EqualValues(t, i11, i1) 20 | assert.EqualValues(t, i22, i2) 21 | } 22 | 23 | func TestCap(t *testing.T) { 24 | const capacity = 10000 25 | res := getNextPow2(uint64(capacity)) / bucketSize 26 | assert.EqualValues(t, res, 4096) 27 | } 28 | 29 | func TestInsert(t *testing.T) { 30 | const cap = 10000 31 | filter := NewFilter(cap) 32 | 33 | var hash [32]byte 34 | io.ReadFull(rand.Reader, hash[:]) 35 | 36 | for i := 0; i < 100; i++ { 37 | filter.Insert(hash[:]) 38 | } 39 | 40 | assert.EqualValues(t, filter.Count(), 8) 41 | } 42 | 43 | func TestFilter_Lookup(t *testing.T) { 44 | const cap = 10000 45 | 46 | var ( 47 | m = make(map[[32]byte]struct{}) 48 | filter = NewFilter(cap) 49 | hash [32]byte 50 | ) 51 | 52 | for i := 0; i < cap; i++ { 53 | io.ReadFull(rand.Reader, hash[:]) 54 | m[hash] = struct{}{} 55 | filter.Insert(hash[:]) 56 | } 57 | 58 | assert.EqualValues(t, len(m), 10000) 59 | 60 | var lookFail int 61 | for k := range m { 62 | if !filter.Lookup(k[:]) { 63 | lookFail++ 64 | } 65 | } 66 | 67 | assert.EqualValues(t, lookFail, 0) 68 | } 69 | 70 | func TestReset(t *testing.T) { 71 | const cap = 10000 72 | 73 | var ( 74 | filter = NewFilter(cap) 75 | hash [32]byte 76 | insertSuccess int 77 | insertFails int 78 | ) 79 | 80 | for i := 0; i < 10*cap; i++ { 81 | io.ReadFull(rand.Reader, hash[:]) 82 | 83 | if filter.Insert(hash[:]) { 84 | insertSuccess++ 85 | } else { 86 | insertFails++ 87 | filter.Reset() 88 | } 89 | } 90 | 91 | assert.EqualValues(t, insertSuccess, 99994) 92 | assert.EqualValues(t, insertFails, 6) 93 | } 94 | 95 | func TestBucket_Reset(t *testing.T) { 96 | var bkt bucket 97 | for i := byte(0); i < bucketSize; i++ { 98 | bkt[i] = fingerprint(i) 99 | } 100 | bkt.reset() 101 | for _, val := range bkt { 102 | assert.EqualValues(t, 0, val) 103 | } 104 | } 105 | --------------------------------------------------------------------------------