├── vendor ├── modules.txt └── github.com │ └── dgryski │ └── go-metro │ ├── README │ ├── metro_stub.go │ ├── LICENSE │ ├── metro64.go │ ├── metro128.go │ ├── metro.py │ └── metro_amd64.s ├── go.mod ├── go.sum ├── .gitignore ├── util.go ├── LICENSE ├── permencoding.go ├── README_ZH.md ├── README.md ├── cuckoofilter_test.go ├── singletable.go ├── cuckoofilter.go └── packedtable.go /vendor/modules.txt: -------------------------------------------------------------------------------- 1 | # github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 2 | ## explicit 3 | github.com/dgryski/go-metro 4 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/linvon/cuckoo-filter 2 | 3 | go 1.14 4 | 5 | require github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 h1:BS21ZUJ/B5X2UVUbczfmdWH7GapPWAhxcMsDnjJTU1E= 2 | github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw= 3 | -------------------------------------------------------------------------------- /vendor/github.com/dgryski/go-metro/README: -------------------------------------------------------------------------------- 1 | MetroHash 2 | 3 | This package is a mechanical translation of the reference C++ code for 4 | MetroHash, available at https://github.com/jandrewrogers/MetroHash 5 | 6 | I claim no additional copyright over the original implementation. 7 | -------------------------------------------------------------------------------- /vendor/github.com/dgryski/go-metro/metro_stub.go: -------------------------------------------------------------------------------- 1 | // +build !noasm,amd64 2 | // +build !gccgo 3 | 4 | package metro 5 | 6 | //go:generate python -m peachpy.x86_64 metro.py -S -o metro_amd64.s -mabi=goasm 7 | //go:noescape 8 | 9 | func Hash64(buffer []byte, seed uint64) uint64 10 | func Hash64Str(buffer string, seed uint64) uint64 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | 17 | .idea -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) linvon 3 | * Date 2021/2/18 10:29 4 | */ 5 | 6 | package cuckoo 7 | 8 | import "fmt" 9 | 10 | const ( 11 | bitsPerByte = 8 12 | bytesPerUint64 = 8 13 | bytesPerUint32 = 4 14 | ) 15 | 16 | func getNextPow2(n uint64) uint { 17 | n-- 18 | n |= n >> 1 19 | n |= n >> 2 20 | n |= n >> 4 21 | n |= n >> 8 22 | n |= n >> 16 23 | n |= n >> 32 24 | n++ 25 | return uint(n) 26 | } 27 | 28 | func maxLoadFactor(tagsPerBucket uint) float64 { 29 | switch tagsPerBucket { 30 | case 2: 31 | return 0.85 32 | case 4: 33 | return 0.96 34 | default: 35 | return 0.99 36 | } 37 | } 38 | 39 | func getBucketsFromHint(initialBucketsHint []byte, expectedLength uint) ([]byte, error) { 40 | result := initialBucketsHint 41 | if len(result) == 0 { 42 | result = make([]byte, expectedLength) 43 | } 44 | if uint(len(result)) != expectedLength { 45 | return nil, fmt.Errorf("buckets length should be %d but got %d", expectedLength, len(result)) 46 | } 47 | return result, nil 48 | } 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Linvon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /vendor/github.com/dgryski/go-metro/LICENSE: -------------------------------------------------------------------------------- 1 | This package is a mechanical translation of the reference C++ code for 2 | MetroHash, available at https://github.com/jandrewrogers/MetroHash 3 | 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2016 Damian Gryski 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | SOFTWARE. 25 | -------------------------------------------------------------------------------- /permencoding.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) linvon 3 | * Date 2021/2/18 10:29 4 | */ 5 | 6 | package cuckoo 7 | 8 | import ( 9 | "encoding/binary" 10 | ) 11 | 12 | //PermEncoding permutation table 13 | type PermEncoding struct { 14 | nEnts uint 15 | DecTable []uint16 16 | EncTable []uint16 17 | } 18 | 19 | //Init init permutation table 20 | func (p *PermEncoding) Init() { 21 | p.nEnts = 3876 22 | p.DecTable = make([]uint16, p.nEnts) 23 | p.EncTable = make([]uint16, 1<<16) 24 | 25 | dst := [tagsPerPTable]uint8{} 26 | var idx uint16 27 | p.genTables(0, 0, dst, &idx) 28 | } 29 | 30 | /* unpack one 2-byte number to four 4-bit numbers */ 31 | func (p *PermEncoding) unpack(in uint16, out *[tagsPerPTable]uint8) { 32 | out[0] = uint8(in & 0x000f) 33 | out[2] = uint8((in >> 4) & 0x000f) 34 | out[1] = uint8((in >> 8) & 0x000f) 35 | out[3] = uint8((in >> 12) & 0x000f) 36 | } 37 | 38 | /* pack four 4-bit numbers to one 2-byte number */ 39 | func (p *PermEncoding) pack(in [tagsPerPTable]uint8) uint16 { 40 | var in1, in2 uint16 41 | in1 = binary.LittleEndian.Uint16([]byte{in[0], in[1]}) & 0x0f0f 42 | in2 = binary.LittleEndian.Uint16([]byte{in[2], in[3]}) << 4 43 | 44 | return in1 | in2 45 | } 46 | 47 | //Decode decode codeword to lowBits 48 | func (p *PermEncoding) Decode(codeword uint16, lowBits *[tagsPerPTable]uint8) { 49 | p.unpack(p.DecTable[codeword], lowBits) 50 | } 51 | 52 | //Encode encode lowBits to codeword 53 | func (p *PermEncoding) Encode(lowBits [tagsPerPTable]uint8) uint16 { 54 | //fmt.Printf("Perm.encode\n") 55 | //for i := 0; i < tagsPerPTable; i++ { 56 | // fmt.Printf("encode lowBits[%d]=%x\n", i, lowBits[i]) 57 | //} 58 | //fmt.Printf("pack(lowBits) = %x\n", p.pack(lowBits)) 59 | //fmt.Printf("enc_table[%x]=%x\n", p.pack(lowBits), p.EncTable[p.pack(lowBits)]) 60 | return p.EncTable[p.pack(lowBits)] 61 | } 62 | 63 | func (p *PermEncoding) genTables(base, k int, dst [tagsPerPTable]uint8, idx *uint16) { 64 | for i := base; i < 16; i++ { 65 | /* for fast comparison in binary_search in little-endian machine */ 66 | dst[k] = uint8(i) 67 | if k+1 < tagsPerPTable { 68 | p.genTables(i, k+1, dst, idx) 69 | } else { 70 | p.DecTable[*idx] = p.pack(dst) 71 | p.EncTable[p.pack(dst)] = *idx 72 | //fmt.Printf("enc_table[%04x]=%04x\t%x %x %x %x\n", p.pack(dst), *idx, dst[0], 73 | // dst[1], dst[2], dst[3]) 74 | *idx++ 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /vendor/github.com/dgryski/go-metro/metro64.go: -------------------------------------------------------------------------------- 1 | // +build noasm !amd64 gccgo 2 | 3 | package metro 4 | 5 | import ( 6 | "encoding/binary" 7 | "math/bits" 8 | ) 9 | 10 | func Hash64(buffer []byte, seed uint64) uint64 { 11 | 12 | const ( 13 | k0 = 0xD6D018F5 14 | k1 = 0xA2AA033B 15 | k2 = 0x62992FC1 16 | k3 = 0x30BC5B29 17 | ) 18 | 19 | ptr := buffer 20 | 21 | hash := (seed + k2) * k0 22 | 23 | if len(ptr) >= 32 { 24 | v0, v1, v2, v3 := hash, hash, hash, hash 25 | 26 | for len(ptr) >= 32 { 27 | v0 += binary.LittleEndian.Uint64(ptr[:8]) * k0 28 | v0 = bits.RotateLeft64(v0, -29) + v2 29 | v1 += binary.LittleEndian.Uint64(ptr[8:16]) * k1 30 | v1 = bits.RotateLeft64(v1, -29) + v3 31 | v2 += binary.LittleEndian.Uint64(ptr[16:24]) * k2 32 | v2 = bits.RotateLeft64(v2, -29) + v0 33 | v3 += binary.LittleEndian.Uint64(ptr[24:32]) * k3 34 | v3 = bits.RotateLeft64(v3, -29) + v1 35 | ptr = ptr[32:] 36 | } 37 | 38 | v2 ^= bits.RotateLeft64(((v0+v3)*k0)+v1, -37) * k1 39 | v3 ^= bits.RotateLeft64(((v1+v2)*k1)+v0, -37) * k0 40 | v0 ^= bits.RotateLeft64(((v0+v2)*k0)+v3, -37) * k1 41 | v1 ^= bits.RotateLeft64(((v1+v3)*k1)+v2, -37) * k0 42 | hash += v0 ^ v1 43 | } 44 | 45 | if len(ptr) >= 16 { 46 | v0 := hash + (binary.LittleEndian.Uint64(ptr[:8]) * k2) 47 | v0 = bits.RotateLeft64(v0, -29) * k3 48 | v1 := hash + (binary.LittleEndian.Uint64(ptr[8:16]) * k2) 49 | v1 = bits.RotateLeft64(v1, -29) * k3 50 | v0 ^= bits.RotateLeft64(v0*k0, -21) + v1 51 | v1 ^= bits.RotateLeft64(v1*k3, -21) + v0 52 | hash += v1 53 | ptr = ptr[16:] 54 | } 55 | 56 | if len(ptr) >= 8 { 57 | hash += binary.LittleEndian.Uint64(ptr[:8]) * k3 58 | ptr = ptr[8:] 59 | hash ^= bits.RotateLeft64(hash, -55) * k1 60 | } 61 | 62 | if len(ptr) >= 4 { 63 | hash += uint64(binary.LittleEndian.Uint32(ptr[:4])) * k3 64 | hash ^= bits.RotateLeft64(hash, -26) * k1 65 | ptr = ptr[4:] 66 | } 67 | 68 | if len(ptr) >= 2 { 69 | hash += uint64(binary.LittleEndian.Uint16(ptr[:2])) * k3 70 | ptr = ptr[2:] 71 | hash ^= bits.RotateLeft64(hash, -48) * k1 72 | } 73 | 74 | if len(ptr) >= 1 { 75 | hash += uint64(ptr[0]) * k3 76 | hash ^= bits.RotateLeft64(hash, -37) * k1 77 | } 78 | 79 | hash ^= bits.RotateLeft64(hash, -28) 80 | hash *= k0 81 | hash ^= bits.RotateLeft64(hash, -29) 82 | 83 | return hash 84 | } 85 | 86 | func Hash64Str(buffer string, seed uint64) uint64 { 87 | return Hash64([]byte(buffer), seed) 88 | } 89 | -------------------------------------------------------------------------------- /README_ZH.md: -------------------------------------------------------------------------------- 1 | # cuckoo-filter 2 | [![Mentioned in Awesome Go](https://awesome.re/mentioned-badge.svg)](https://github.com/avelino/awesome-go) 3 | 4 | cuckoo-filter 的 go 实现版本. 可按你的配置来定制过滤器参数 5 | 6 | 移植于 [efficient/cuckoofilter](https://github.com/efficient/cuckoofilter) 7 | 8 | [English Version](./README.md) 9 | 10 | 概述 11 | -------- 12 | 布谷鸟过滤器是一种在近似集合隶属查询时替代布隆过滤器的数据结构。布隆过滤器是众所周知的一种用于查询类似于“x是否在集合中?”这类问题,且非常节省空间的数据结构,但不支持删除。其支持删除的相关变种(如计数布隆过滤器)通常需要更多的空间。 13 | 14 | 布谷鸟过滤器可以灵活地动态添加和删除项。布谷鸟过滤器是基于布谷鸟哈希的(这也是为什么称为布谷鸟过滤器)。 它本质上是一个存储每个键的指纹的布谷鸟哈希表。布谷鸟哈希表可以非常紧凑,因此对于需要更低假阳性率(<3%)的应用程序,布谷鸟过滤器可以比传统的布隆过滤器节省更多空间。 15 | 16 | 有关算法和引用的详细信息,请参阅: 17 | 18 | ["Cuckoo Filter: Practically Better Than Bloom"](http://www.cs.cmu.edu/~binfan/papers/conext14_cuckoofilter.pdf) in proceedings of ACM CoNEXT 2014 by Bin Fan, Dave Andersen and Michael Kaminsky 19 | 20 | [中文翻译版论文](http://www.linvon.cn/posts/cuckoo/) 21 | 22 | 23 | ## 实现细节 24 | 25 | 本库的具体实现细节以及使用方法可以参考 [布谷鸟过滤器实战指南](http://www.linvon.cn/posts/%E5%B8%83%E8%B0%B7%E9%B8%9F%E8%BF%87%E6%BB%A4%E5%99%A8%E5%AE%9E%E6%88%98%E6%8C%87%E5%8D%97/) 26 | 27 | 上述的论文提供了几个参数供选择 28 | 29 | 1. 桶大小(b):一个桶存储几个指纹 30 | 2. 指纹大小(f):每个指纹存储的键的哈希值的位数 31 | 32 | 在其他的实现中: 33 | 34 | - [seiflotfy/cuckoofilter](https://github.com/seiflotfy/cuckoofilter) 使用 b=4, f=8 bit,其假阳性率趋近于 `r ~= 0.03`。 35 | - [panmari/cuckoofilter](https://github.com/panmari/cuckoofilter) 使用 b=4, f=16 bit,其假阳性率趋近于 `r ~= 0.0001`。 36 | - [irfansharif/cfilter](https://github.com/irfansharif/cfilter) 可以调整 b 和 f,但只能调整 f 为 8 的倍数,即以字节为单位。 37 | 38 | 在这个实现中, 你可以调整 b 和 f 为任意你想要的值,并且论文中提到的半排序桶也是可以使用的, 该方法可以对每一项节省一个 bit。 39 | 40 | ##### 为什么定制很重要? 41 | 42 | 根据论文 43 | 44 | - 不同的桶大小会产生不同的过滤器负载因子,这代表着过滤器的最大空间利用率 45 | - 不同的桶大小适用于不同的目标假阳性率 46 | - 为了保持假阳性率不变,桶大小越大,需要的指纹大小就越大 47 | 48 | 假定我们需要的假阳性率为 `r` 49 | 50 | > 当r>0.002时。每桶有两个条目比每桶使用四个条目产生的结果略好;当ϵ减小到0.00001= log2(2b/r) bits 55 | 56 | 同时,注意当使用桶大小为b = 2, 4 or 8时,对应的负载因子为 84%, 95% or 98%。 57 | 58 | ##### 想了解更多关于参数选择的内容,请参考论文的第五章节 59 | 60 | 注意: 通常情况下 b = 8 就足够了,由于没有更多数据的支持,我们建议你从2、4、8中选择桶大小。而 f 最大为 32 bits。 61 | 62 | ## 参考用例: 63 | 64 | ``` go 65 | package main 66 | 67 | import ( 68 | "fmt" 69 | "github.com/linvon/cuckoo-filter" 70 | ) 71 | 72 | func main() { 73 | cf := cuckoo.NewFilter(4, 9, 3900, cuckoo.TableTypePacked) 74 | fmt.Println(cf.Info()) 75 | fmt.Println(cf.FalsePositiveRate()) 76 | 77 | a := []byte("A") 78 | cf.Add(a) 79 | fmt.Println(cf.Contain(a)) 80 | fmt.Println(cf.Size()) 81 | 82 | b := cf.Encode() 83 | ncf, _ := cuckoo.Decode(b) 84 | fmt.Println(ncf.Contain(a)) 85 | 86 | cf.Delete(a) 87 | fmt.Println(cf.Size()) 88 | } 89 | ``` 90 | 91 | -------------------------------------------------------------------------------- /vendor/github.com/dgryski/go-metro/metro128.go: -------------------------------------------------------------------------------- 1 | package metro 2 | 3 | import "encoding/binary" 4 | 5 | func rotate_right(v uint64, k uint) uint64 { 6 | return (v >> k) | (v << (64 - k)) 7 | } 8 | 9 | func Hash128(buffer []byte, seed uint64) (uint64, uint64) { 10 | 11 | const ( 12 | k0 = 0xC83A91E1 13 | k1 = 0x8648DBDB 14 | k2 = 0x7BDEC03B 15 | k3 = 0x2F5870A5 16 | ) 17 | 18 | ptr := buffer 19 | 20 | var v [4]uint64 21 | 22 | v[0] = (seed - k0) * k3 23 | v[1] = (seed + k1) * k2 24 | 25 | if len(ptr) >= 32 { 26 | v[2] = (seed + k0) * k2 27 | v[3] = (seed - k1) * k3 28 | 29 | for len(ptr) >= 32 { 30 | v[0] += binary.LittleEndian.Uint64(ptr) * k0 31 | ptr = ptr[8:] 32 | v[0] = rotate_right(v[0], 29) + v[2] 33 | v[1] += binary.LittleEndian.Uint64(ptr) * k1 34 | ptr = ptr[8:] 35 | v[1] = rotate_right(v[1], 29) + v[3] 36 | v[2] += binary.LittleEndian.Uint64(ptr) * k2 37 | ptr = ptr[8:] 38 | v[2] = rotate_right(v[2], 29) + v[0] 39 | v[3] += binary.LittleEndian.Uint64(ptr) * k3 40 | ptr = ptr[8:] 41 | v[3] = rotate_right(v[3], 29) + v[1] 42 | } 43 | 44 | v[2] ^= rotate_right(((v[0]+v[3])*k0)+v[1], 21) * k1 45 | v[3] ^= rotate_right(((v[1]+v[2])*k1)+v[0], 21) * k0 46 | v[0] ^= rotate_right(((v[0]+v[2])*k0)+v[3], 21) * k1 47 | v[1] ^= rotate_right(((v[1]+v[3])*k1)+v[2], 21) * k0 48 | } 49 | 50 | if len(ptr) >= 16 { 51 | v[0] += binary.LittleEndian.Uint64(ptr) * k2 52 | ptr = ptr[8:] 53 | v[0] = rotate_right(v[0], 33) * k3 54 | v[1] += binary.LittleEndian.Uint64(ptr) * k2 55 | ptr = ptr[8:] 56 | v[1] = rotate_right(v[1], 33) * k3 57 | v[0] ^= rotate_right((v[0]*k2)+v[1], 45) * k1 58 | v[1] ^= rotate_right((v[1]*k3)+v[0], 45) * k0 59 | } 60 | 61 | if len(ptr) >= 8 { 62 | v[0] += binary.LittleEndian.Uint64(ptr) * k2 63 | ptr = ptr[8:] 64 | v[0] = rotate_right(v[0], 33) * k3 65 | v[0] ^= rotate_right((v[0]*k2)+v[1], 27) * k1 66 | } 67 | 68 | if len(ptr) >= 4 { 69 | v[1] += uint64(binary.LittleEndian.Uint32(ptr)) * k2 70 | ptr = ptr[4:] 71 | v[1] = rotate_right(v[1], 33) * k3 72 | v[1] ^= rotate_right((v[1]*k3)+v[0], 46) * k0 73 | } 74 | 75 | if len(ptr) >= 2 { 76 | v[0] += uint64(binary.LittleEndian.Uint16(ptr)) * k2 77 | ptr = ptr[2:] 78 | v[0] = rotate_right(v[0], 33) * k3 79 | v[0] ^= rotate_right((v[0]*k2)+v[1], 22) * k1 80 | } 81 | 82 | if len(ptr) >= 1 { 83 | v[1] += uint64(ptr[0]) * k2 84 | v[1] = rotate_right(v[1], 33) * k3 85 | v[1] ^= rotate_right((v[1]*k3)+v[0], 58) * k0 86 | } 87 | 88 | v[0] += rotate_right((v[0]*k0)+v[1], 13) 89 | v[1] += rotate_right((v[1]*k1)+v[0], 37) 90 | v[0] += rotate_right((v[0]*k2)+v[1], 13) 91 | v[1] += rotate_right((v[1]*k3)+v[0], 37) 92 | 93 | return v[0], v[1] 94 | } 95 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cuckoo-filter 2 | [![Mentioned in Awesome Go](https://awesome.re/mentioned-badge.svg)](https://github.com/avelino/awesome-go) 3 | 4 | cuckoo-filter go implement. Config by you 5 | 6 | transplant from [efficient/cuckoofilter](https://github.com/efficient/cuckoofilter) 7 | 8 | [中文文档](./README_ZH.md) 9 | 10 | Overview 11 | -------- 12 | Cuckoo filter is a Bloom filter replacement for approximated set-membership queries. While Bloom filters are well-known space-efficient data structures to serve queries like "if item x is in a set?", they do not support deletion. Their variances to enable deletion (like counting Bloom filters) usually require much more space. 13 | 14 | Cuckoo filters provide the flexibility to add and remove items dynamically. A cuckoo filter is based on cuckoo hashing (and therefore named as cuckoo filter). It is essentially a cuckoo hash table storing each key's fingerprint. Cuckoo hash tables can be highly compact, thus a cuckoo filter could use less space than conventional Bloom filters, for applications that require low false positive rates (< 3%). 15 | 16 | For details about the algorithm and citations please use: 17 | 18 | ["Cuckoo Filter: Practically Better Than Bloom"](http://www.cs.cmu.edu/~binfan/papers/conext14_cuckoofilter.pdf) in proceedings of ACM CoNEXT 2014 by Bin Fan, Dave Andersen and Michael Kaminsky 19 | 20 | ## Implementation details 21 | 22 | The paper cited above leaves several parameters to choose. 23 | 24 | 2. Bucket size(b): Number of fingerprints per bucket 25 | 3. Fingerprints size(f): Fingerprints bits size of hashtag 26 | 27 | In other implementation: 28 | 29 | - [seiflotfy/cuckoofilter](https://github.com/seiflotfy/cuckoofilter) use b=4, f=8 bit, which correspond to a false positive rate of `r ~= 0.03`. 30 | - [panmari/cuckoofilter](https://github.com/panmari/cuckoofilter) use b=4, f=16 bit, which correspond to a false positive rate of `r ~= 0.0001`. 31 | - [irfansharif/cfilter](https://github.com/irfansharif/cfilter) can adjust b and f, but only can adjust f to 8x, which means it is in Bytes. 32 | 33 | In this implementation, you can adjust b and f to any value you want in `TableTypeSingle` type implementation. 34 | 35 | In addition, the Semi-sorting Buckets mentioned in paper which can save 1 bit per item is also available in `TableTypePacked` type, 36 | note that b=4, only f is adjustable. 37 | 38 | ##### Why custom is important? 39 | 40 | According to paper 41 | 42 | - Different bucket size result in different filter loadfactor, which means occupancy rate of filter 43 | - Different bucket size is suitable for different target false positive rate 44 | - To keep a false positive rate, bigger bucket size, bigger fingerprint size 45 | 46 | Given a target false positive rate of `r` 47 | 48 | > when r > 0.002, having two entries per bucket yields slightly better results than using four entries per bucket; when decreases to 0.00001 < r ≤ 0.002, four entries per bucket minimizes space. 49 | 50 | with a bucket size `b`, they suggest choosing the fingerprint size `f` using 51 | 52 | f >= log2(2b/r) bits 53 | 54 | as the same time, notice that we got loadfactor 84%, 95% or 98% when using bucket size b = 2, 4 or 8 55 | 56 | ##### To know more about parameter choosing, refer to paper's section 5 57 | 58 | Note: generally b = 8 is enough, without more data support, we suggest you choosing b from 2, 4 or 8. And f is max 32 bits 59 | 60 | ## Example usage: 61 | 62 | ``` go 63 | package main 64 | 65 | import ( 66 | "fmt" 67 | "github.com/linvon/cuckoo-filter" 68 | ) 69 | 70 | func main() { 71 | cf := cuckoo.NewFilter(4, 9, 3900, cuckoo.TableTypePacked) 72 | fmt.Println(cf.Info()) 73 | fmt.Println(cf.FalsePositiveRate()) 74 | 75 | a := []byte("A") 76 | cf.Add(a) 77 | fmt.Println(cf.Contain(a)) 78 | fmt.Println(cf.Size()) 79 | 80 | b := cf.Encode() 81 | ncf, _ := cuckoo.Decode(b) 82 | fmt.Println(ncf.Contain(a)) 83 | 84 | cf.Delete(a) 85 | fmt.Println(cf.Size()) 86 | } 87 | ``` 88 | 89 | -------------------------------------------------------------------------------- /cuckoofilter_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) linvon 3 | * Date 2021/2/18 10:29 4 | */ 5 | 6 | package cuckoo 7 | 8 | import ( 9 | "bytes" 10 | "crypto/rand" 11 | "fmt" 12 | "io" 13 | "reflect" 14 | "testing" 15 | ) 16 | 17 | const size = 100000 18 | 19 | var ( 20 | testBucketSize = []uint{2, 4, 8} 21 | testFingerprintSize = []uint{2, 4, 5, 6, 7, 8, 9, 10, 12, 13, 16, 17, 23, 31, 32} 22 | testTableType = []uint{TableTypeSingle, TableTypePacked} 23 | ) 24 | 25 | func TestFilter(t *testing.T) { 26 | var insertNum uint = 50000 27 | var hash [32]byte 28 | 29 | for _, b := range testBucketSize { 30 | for _, f := range testFingerprintSize { 31 | for _, table := range testTableType { 32 | if f == 2 && table == TableTypePacked { 33 | continue 34 | } 35 | if table == TableTypePacked && b != 4 { 36 | continue 37 | } 38 | cf := NewFilter(b, f, 8190, table) 39 | // fmt.Println(cf.Info()) 40 | a := make([][]byte, 0) 41 | for i := uint(0); i < insertNum; i++ { 42 | _, _ = io.ReadFull(rand.Reader, hash[:]) 43 | if cf.AddUnique(hash[:]) { 44 | tmp := make([]byte, 32) 45 | copy(tmp, hash[:]) 46 | a = append(a, tmp) 47 | } 48 | } 49 | 50 | count := cf.Size() 51 | if count != uint(len(a)) { 52 | t.Errorf("Expected count = %d, instead count = %d, b %v f %v", uint(len(a)), count, b, f) 53 | return 54 | } 55 | 56 | encodedBytes, err := cf.Encode() 57 | if err != nil { 58 | t.Fatalf("err %v", err) 59 | } 60 | if len(encodedBytes) != cap(encodedBytes) { 61 | t.Fatalf("len(%d) != cap(%d)", len(encodedBytes), cap(encodedBytes)) 62 | } 63 | ncf, err := Decode(encodedBytes) 64 | if err != nil || !reflect.DeepEqual(cf, ncf) { 65 | t.Errorf("Expected epual, err %v", err) 66 | return 67 | } 68 | 69 | encodedBytes, err = cf.Encode() 70 | if err != nil { 71 | t.Fatalf("err %v", err) 72 | } 73 | ncf, err = DecodeFrom(encodedBytes) 74 | if err != nil || !reflect.DeepEqual(cf, ncf) { 75 | t.Errorf("Expected epual, err %v", err) 76 | return 77 | } 78 | 79 | filterReader, _ := cf.EncodeReader() 80 | bytesFromReader, err := io.ReadAll(filterReader) 81 | if err != nil { 82 | t.Fatalf("Error reading from reader") 83 | } 84 | if !bytes.Equal(bytesFromReader, encodedBytes) { 85 | t.Fatalf("Expected to be equal") 86 | } 87 | 88 | fmt.Println(cf.Info()) 89 | cf.BitsPerItem() 90 | cf.SizeInBytes() 91 | cf.LoadFactor() 92 | 93 | for _, v := range a { 94 | if !cf.Contain(v) { 95 | t.Errorf("Expected contain, instead not contain, b %v f %v table type %v", b, f, table) 96 | return 97 | } 98 | cf.Delete(v) 99 | } 100 | 101 | count = cf.Size() 102 | if count != 0 { 103 | t.Errorf("Expected count = 0, instead count == %d, b %v f %v table type %v", count, b, f, table) 104 | return 105 | } 106 | 107 | fmt.Printf("Filter bucketSize %v fingerprintSize %v tableType %v falsePositive Rate %v \n", b, f, table, cf.FalsePositiveRate()) 108 | } 109 | } 110 | } 111 | } 112 | 113 | func BenchmarkFilterSingle_Reset(b *testing.B) { 114 | filter := NewFilter(4, 8, size, TableTypeSingle) 115 | 116 | b.ResetTimer() 117 | 118 | for i := 0; i < b.N; i++ { 119 | filter.Reset() 120 | } 121 | } 122 | 123 | func BenchmarkFilterSingle_Insert(b *testing.B) { 124 | filter := NewFilter(4, 8, size, TableTypeSingle) 125 | 126 | b.ResetTimer() 127 | 128 | var hash [32]byte 129 | for i := 0; i < b.N; i++ { 130 | _, _ = io.ReadFull(rand.Reader, hash[:]) 131 | filter.Add(hash[:]) 132 | } 133 | } 134 | 135 | func BenchmarkFilterSingle_Lookup(b *testing.B) { 136 | filter := NewFilter(4, 8, size, TableTypeSingle) 137 | 138 | var hash [32]byte 139 | for i := 0; i < size; i++ { 140 | _, _ = io.ReadFull(rand.Reader, hash[:]) 141 | filter.Add(hash[:]) 142 | } 143 | 144 | b.ResetTimer() 145 | for i := 0; i < b.N; i++ { 146 | _, _ = io.ReadFull(rand.Reader, hash[:]) 147 | filter.Contain(hash[:]) 148 | } 149 | } 150 | 151 | func BenchmarkFilterPacked_Reset(b *testing.B) { 152 | filter := NewFilter(4, 9, size, TableTypePacked) 153 | 154 | b.ResetTimer() 155 | 156 | for i := 0; i < b.N; i++ { 157 | filter.Reset() 158 | } 159 | } 160 | 161 | func BenchmarkFilterPacked_Insert(b *testing.B) { 162 | filter := NewFilter(4, 9, size, TableTypePacked) 163 | 164 | b.ResetTimer() 165 | 166 | var hash [32]byte 167 | for i := 0; i < b.N; i++ { 168 | _, _ = io.ReadFull(rand.Reader, hash[:]) 169 | filter.Add(hash[:]) 170 | } 171 | } 172 | 173 | func BenchmarkFilterPacked_Lookup(b *testing.B) { 174 | filter := NewFilter(4, 9, size, TableTypePacked) 175 | 176 | var hash [32]byte 177 | for i := 0; i < size; i++ { 178 | _, _ = io.ReadFull(rand.Reader, hash[:]) 179 | filter.Add(hash[:]) 180 | } 181 | 182 | b.ResetTimer() 183 | for i := 0; i < b.N; i++ { 184 | _, _ = io.ReadFull(rand.Reader, hash[:]) 185 | filter.Contain(hash[:]) 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /vendor/github.com/dgryski/go-metro/metro.py: -------------------------------------------------------------------------------- 1 | import peachpy.x86_64 2 | 3 | k0 = 0xD6D018F5 4 | k1 = 0xA2AA033B 5 | k2 = 0x62992FC1 6 | k3 = 0x30BC5B29 7 | 8 | def advance(p,l,c): 9 | ADD(p,c) 10 | SUB(l,c) 11 | 12 | def imul(r,k): 13 | t = GeneralPurposeRegister64() 14 | MOV(t, k) 15 | IMUL(r, t) 16 | 17 | def update32(v, p,idx, k, vadd): 18 | r = GeneralPurposeRegister64() 19 | MOV(r, [p + idx]) 20 | imul(r, k) 21 | ADD(v, r) 22 | ROR(v, 29) 23 | ADD(v, vadd) 24 | 25 | def final32(v, regs, keys): 26 | r = GeneralPurposeRegister64() 27 | MOV(r, v[regs[1]]) 28 | ADD(r, v[regs[2]]) 29 | imul(r, keys[0]) 30 | ADD(r, v[regs[3]]) 31 | ROR(r, 37) 32 | imul(r, keys[1]) 33 | XOR(v[regs[0]], r) 34 | 35 | seed = Argument(uint64_t) 36 | buffer_base = Argument(ptr()) 37 | buffer_len = Argument(int64_t) 38 | buffer_cap = Argument(int64_t) 39 | 40 | def makeHash(name, args): 41 | with Function(name, args, uint64_t) as function: 42 | 43 | reg_ptr = GeneralPurposeRegister64() 44 | reg_ptr_len = GeneralPurposeRegister64() 45 | reg_hash = GeneralPurposeRegister64() 46 | 47 | LOAD.ARGUMENT(reg_hash, seed) 48 | LOAD.ARGUMENT(reg_ptr, buffer_base) 49 | LOAD.ARGUMENT(reg_ptr_len, buffer_len) 50 | 51 | imul(reg_hash, k0) 52 | r = GeneralPurposeRegister64() 53 | MOV(r, k2*k0) 54 | ADD(reg_hash, r) 55 | 56 | after32 = Label("after32") 57 | 58 | CMP(reg_ptr_len, 32) 59 | JL(after32) 60 | v = [GeneralPurposeRegister64() for _ in range(4)] 61 | for i in range(4): 62 | MOV(v[i], reg_hash) 63 | 64 | with Loop() as loop: 65 | update32(v[0], reg_ptr, 0, k0, v[2]) 66 | update32(v[1], reg_ptr, 8, k1, v[3]) 67 | update32(v[2], reg_ptr, 16, k2, v[0]) 68 | update32(v[3], reg_ptr, 24, k3, v[1]) 69 | 70 | ADD(reg_ptr, 32) 71 | SUB(reg_ptr_len, 32) 72 | CMP(reg_ptr_len, 32) 73 | JGE(loop.begin) 74 | 75 | final32(v, [2,0,3,1], [k0, k1]) 76 | final32(v, [3,1,2,0], [k1, k0]) 77 | final32(v, [0,0,2,3], [k0, k1]) 78 | final32(v, [1,1,3,2], [k1, k0]) 79 | 80 | XOR(v[0], v[1]) 81 | ADD(reg_hash, v[0]) 82 | 83 | LABEL(after32) 84 | 85 | after16 = Label("after16") 86 | CMP(reg_ptr_len, 16) 87 | JL(after16) 88 | 89 | for i in range(2): 90 | MOV(v[i], [reg_ptr]) 91 | imul(v[i], k2) 92 | ADD(v[i], reg_hash) 93 | 94 | advance(reg_ptr, reg_ptr_len, 8) 95 | 96 | ROR(v[i], 29) 97 | imul(v[i], k3) 98 | 99 | r = GeneralPurposeRegister64() 100 | MOV(r, v[0]) 101 | imul(r, k0) 102 | ROR(r, 21) 103 | ADD(r, v[1]) 104 | XOR(v[0], r) 105 | 106 | MOV(r, v[1]) 107 | imul(r, k3) 108 | ROR(r, 21) 109 | ADD(r, v[0]) 110 | XOR(v[1], r) 111 | 112 | ADD(reg_hash, v[1]) 113 | 114 | LABEL(after16) 115 | 116 | after8 = Label("after8") 117 | CMP(reg_ptr_len, 8) 118 | JL(after8) 119 | 120 | r = GeneralPurposeRegister64() 121 | MOV(r, [reg_ptr]) 122 | imul(r, k3) 123 | ADD(reg_hash, r) 124 | advance(reg_ptr, reg_ptr_len, 8) 125 | 126 | MOV(r, reg_hash) 127 | ROR(r, 55) 128 | imul(r, k1) 129 | XOR(reg_hash, r) 130 | 131 | LABEL(after8) 132 | 133 | after4 = Label("after4") 134 | CMP(reg_ptr_len, 4) 135 | JL(after4) 136 | 137 | r = GeneralPurposeRegister64() 138 | XOR(r, r) 139 | MOV(r.as_dword, dword[reg_ptr]) 140 | imul(r, k3) 141 | ADD(reg_hash, r) 142 | advance(reg_ptr, reg_ptr_len, 4) 143 | 144 | MOV(r, reg_hash) 145 | ROR(r, 26) 146 | imul(r, k1) 147 | XOR(reg_hash, r) 148 | 149 | LABEL(after4) 150 | 151 | after2 = Label("after2") 152 | CMP(reg_ptr_len, 2) 153 | JL(after2) 154 | 155 | r = GeneralPurposeRegister64() 156 | XOR(r,r) 157 | MOV(r.as_word, word[reg_ptr]) 158 | imul(r, k3) 159 | ADD(reg_hash, r) 160 | advance(reg_ptr, reg_ptr_len, 2) 161 | 162 | MOV(r, reg_hash) 163 | ROR(r, 48) 164 | imul(r, k1) 165 | XOR(reg_hash, r) 166 | 167 | LABEL(after2) 168 | 169 | after1 = Label("after1") 170 | CMP(reg_ptr_len, 1) 171 | JL(after1) 172 | 173 | r = GeneralPurposeRegister64() 174 | MOVZX(r, byte[reg_ptr]) 175 | imul(r, k3) 176 | ADD(reg_hash, r) 177 | 178 | MOV(r, reg_hash) 179 | ROR(r, 37) 180 | imul(r, k1) 181 | XOR(reg_hash, r) 182 | 183 | LABEL(after1) 184 | 185 | r = GeneralPurposeRegister64() 186 | MOV(r, reg_hash) 187 | ROR(r, 28) 188 | XOR(reg_hash, r) 189 | 190 | imul(reg_hash, k0) 191 | 192 | MOV(r, reg_hash) 193 | ROR(r, 29) 194 | XOR(reg_hash, r) 195 | 196 | RETURN(reg_hash) 197 | 198 | makeHash("Hash64", (buffer_base, buffer_len, buffer_cap, seed)) 199 | makeHash("Hash64Str", (buffer_base, buffer_len, seed)) -------------------------------------------------------------------------------- /singletable.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) linvon 3 | * Date 2021/2/18 10:29 4 | */ 5 | 6 | package cuckoo 7 | 8 | import ( 9 | "bytes" 10 | "encoding/binary" 11 | "fmt" 12 | "io" 13 | "math/rand" 14 | ) 15 | 16 | // SingleTable the most naive table implementation: one huge bit array 17 | type SingleTable struct { 18 | kTagsPerBucket uint 19 | numBuckets uint 20 | bitsPerTag uint 21 | tagMask uint32 22 | bucket []byte 23 | len uint 24 | } 25 | 26 | // NewSingleTable return a singleTable 27 | func NewSingleTable() *SingleTable { 28 | return &SingleTable{} 29 | } 30 | 31 | // Init init table 32 | func (t *SingleTable) Init(tagsPerBucket, bitsPerTag, num uint, initialBucketsHint []byte) error { 33 | t.bitsPerTag = bitsPerTag 34 | t.numBuckets = num 35 | t.kTagsPerBucket = tagsPerBucket 36 | 37 | t.tagMask = (1 << bitsPerTag) - 1 38 | t.len = (t.bitsPerTag*t.kTagsPerBucket*t.numBuckets + 7) >> 3 39 | buckets, err := getBucketsFromHint(initialBucketsHint, t.len) 40 | if err != nil { 41 | return err 42 | } 43 | t.bucket = buckets 44 | return nil 45 | } 46 | 47 | // NumBuckets return num of table buckets 48 | func (t *SingleTable) NumBuckets() uint { 49 | return t.numBuckets 50 | } 51 | 52 | // SizeInBytes return bytes occupancy of table 53 | func (t *SingleTable) SizeInBytes() uint { 54 | return t.len 55 | } 56 | 57 | // SizeInTags return num of tags that table can store 58 | func (t *SingleTable) SizeInTags() uint { 59 | return t.kTagsPerBucket * t.numBuckets 60 | } 61 | 62 | // BitsPerItem return bits occupancy per item of table 63 | func (t *SingleTable) BitsPerItem() uint { 64 | return t.bitsPerTag 65 | } 66 | 67 | // ReadTag read tag from bucket(i,j) 68 | func (t *SingleTable) ReadTag(i, j uint) uint32 { 69 | pos := (i*t.bitsPerTag*t.kTagsPerBucket + t.bitsPerTag*j) / bitsPerByte 70 | var tag uint32 71 | /* following code only works for little-endian */ 72 | switch t.bitsPerTag { 73 | case 2: 74 | shift := j & (4 - 1) 75 | tag = uint32(t.bucket[pos]) >> (2 * shift) 76 | case 4: 77 | tag = uint32(t.bucket[pos]) >> ((j & 1) << 2) 78 | case 8: 79 | tag = uint32(t.bucket[pos]) 80 | case 12: 81 | tag = (uint32(t.bucket[pos]) | uint32(t.bucket[pos+1])<<8) >> ((j & 1) << 2) 82 | case 16: 83 | tag = uint32(t.bucket[pos]) | uint32(t.bucket[pos+1])<<8 84 | case 32: 85 | tag = uint32(t.bucket[pos]) | uint32(t.bucket[pos+1])<<8 | uint32(t.bucket[pos+2])<<16 | uint32(t.bucket[pos+3])<<24 86 | default: 87 | tag = t.readOutBytes(i, j, pos) 88 | } 89 | return tag & t.tagMask 90 | } 91 | 92 | func (t *SingleTable) readOutBytes(i, j, pos uint) uint32 { 93 | rShift := (i*t.bitsPerTag*t.kTagsPerBucket + t.bitsPerTag*j) & (bitsPerByte - 1) 94 | // tag is max 32bit, so max occupies 5 bytes 95 | kBytes := (rShift + t.bitsPerTag + 7) / bitsPerByte 96 | var tmp uint64 97 | for k := uint(0); k < kBytes; k++ { 98 | tmp |= uint64(t.bucket[pos+k]) << (bitsPerByte * k) 99 | } 100 | tmp >>= rShift 101 | return uint32(tmp) 102 | } 103 | 104 | // WriteTag write tag into bucket(i,j) 105 | func (t *SingleTable) WriteTag(i, j uint, n uint32) { 106 | pos := (i*t.bitsPerTag*t.kTagsPerBucket + t.bitsPerTag*j) / bitsPerByte 107 | tag := n & t.tagMask 108 | /* following code only works for little-endian */ 109 | switch t.bitsPerTag { 110 | case 2: 111 | shift := j & (4 - 1) 112 | t.bucket[pos] &= ^(0x03 << (2 * shift)) 113 | t.bucket[pos] |= uint8(tag) << (2 * shift) 114 | case 4: 115 | if (j & 1) == 0 { 116 | t.bucket[pos] &= 0xf0 117 | t.bucket[pos] |= uint8(tag) 118 | } else { 119 | t.bucket[pos] &= 0x0f 120 | t.bucket[pos] |= uint8(tag) << 4 121 | } 122 | case 8: 123 | t.bucket[pos] = uint8(tag) 124 | case 12: 125 | var tmp uint16 126 | tmp = uint16(t.bucket[pos]) | uint16(t.bucket[pos+1])<<8 127 | if (j & 1) == 0 { 128 | tmp &= 0xf000 129 | tmp |= uint16(tag) 130 | } else { 131 | tmp &= 0x000f 132 | tmp |= uint16(tag) << 4 133 | } 134 | t.bucket[pos] = byte(tmp) 135 | t.bucket[pos+1] = byte(tmp >> 8) 136 | case 16: 137 | t.bucket[pos] = byte(tag) 138 | t.bucket[pos+1] = byte(tag >> 8) 139 | case 32: 140 | t.bucket[pos] = byte(tag) 141 | t.bucket[pos+1] = byte(tag >> 8) 142 | t.bucket[pos+2] = byte(tag >> 16) 143 | t.bucket[pos+3] = byte(tag >> 24) 144 | default: 145 | t.writeInBytes(i, j, pos, tag) 146 | } 147 | } 148 | 149 | func (t *SingleTable) writeInBytes(i, j, pos uint, tag uint32) { 150 | rShift := (i*t.bitsPerTag*t.kTagsPerBucket + t.bitsPerTag*j) & (bitsPerByte - 1) 151 | lShift := (rShift + t.bitsPerTag) & (bitsPerByte - 1) 152 | // tag is max 32bit, so max occupies 5 bytes 153 | kBytes := (rShift + t.bitsPerTag + 7) / bitsPerByte 154 | 155 | rMask := uint8(0xff) >> (bitsPerByte - rShift) 156 | lMask := uint8(0xff) << lShift 157 | if lShift == 0 { 158 | lMask = uint8(0) 159 | } 160 | var tmp uint64 161 | tmp |= uint64(t.bucket[pos] & rMask) 162 | end := kBytes - 1 163 | tmp |= uint64(t.bucket[pos+end]&lMask) << (end * bitsPerByte) 164 | tmp |= uint64(tag) << rShift 165 | 166 | for k := uint(0); k < kBytes; k++ { 167 | t.bucket[pos+k] = byte(tmp >> (k * bitsPerByte)) 168 | } 169 | } 170 | 171 | // FindTagInBuckets find if tag in bucket i1 i2 172 | func (t *SingleTable) FindTagInBuckets(i1, i2 uint, tag uint32) bool { 173 | var j uint 174 | for j = 0; j < t.kTagsPerBucket; j++ { 175 | if t.ReadTag(i1, j) == tag || t.ReadTag(i2, j) == tag { 176 | return true 177 | } 178 | } 179 | return false 180 | } 181 | 182 | // DeleteTagFromBucket delete tag from bucket i 183 | func (t *SingleTable) DeleteTagFromBucket(i uint, tag uint32) bool { 184 | var j uint 185 | for j = 0; j < t.kTagsPerBucket; j++ { 186 | if t.ReadTag(i, j) == tag { 187 | t.WriteTag(i, j, 0) 188 | return true 189 | } 190 | } 191 | return false 192 | } 193 | 194 | // InsertTagToBucket insert tag into bucket i 195 | func (t *SingleTable) InsertTagToBucket(i uint, tag uint32, kickOut bool, oldTag *uint32) bool { 196 | var j uint 197 | for j = 0; j < t.kTagsPerBucket; j++ { 198 | if t.ReadTag(i, j) == 0 { 199 | t.WriteTag(i, j, tag) 200 | return true 201 | } 202 | } 203 | if kickOut { 204 | r := uint(rand.Int31()) % t.kTagsPerBucket 205 | *oldTag = t.ReadTag(i, r) 206 | t.WriteTag(i, r, tag) 207 | } 208 | return false 209 | } 210 | 211 | // Reset reset table 212 | func (t *SingleTable) Reset() { 213 | for i := range t.bucket { 214 | t.bucket[i] = 0 215 | } 216 | } 217 | 218 | // Info return table's info 219 | func (t *SingleTable) Info() string { 220 | return fmt.Sprintf("SingleHashtable with tag size: %v bits \n"+ 221 | "\t\tAssociativity: %v \n"+ 222 | "\t\tTotal # of rows: %v\n"+ 223 | "\t\tTotal # slots: %v\n", 224 | t.bitsPerTag, t.kTagsPerBucket, t.numBuckets, t.SizeInTags()) 225 | } 226 | 227 | const singleTableMetadataSize = 3 + bytesPerUint32 228 | 229 | // Encode returns a byte slice representing a TableBucket 230 | func (t *SingleTable) Reader() (io.Reader, uint) { 231 | var metadata [singleTableMetadataSize]byte 232 | metadata[0] = uint8(TableTypeSingle) 233 | metadata[1] = uint8(t.kTagsPerBucket) 234 | metadata[2] = uint8(t.bitsPerTag) 235 | binary.LittleEndian.PutUint32(metadata[3:], uint32(t.numBuckets)) 236 | return io.MultiReader(bytes.NewReader(metadata[:]), bytes.NewReader(t.bucket)), uint(len(metadata) + len(t.bucket)) 237 | } 238 | 239 | // Decode parse a byte slice into a TableBucket 240 | func (t *SingleTable) Decode(b []byte) error { 241 | tagsPerBucket := uint(b[1]) 242 | bitsPerTag := uint(b[2]) 243 | numBuckets := uint(binary.LittleEndian.Uint32(b[3:])) 244 | return t.Init(tagsPerBucket, bitsPerTag, numBuckets, b[7:]) 245 | } 246 | -------------------------------------------------------------------------------- /vendor/github.com/dgryski/go-metro/metro_amd64.s: -------------------------------------------------------------------------------- 1 | // +build !noasm 2 | // +build !gccgo 3 | 4 | // Generated by PeachPy 0.2.0 from metro.py 5 | 6 | // func Hash64(buffer_base uintptr, buffer_len int64, buffer_cap int64, seed uint64) uint64 7 | TEXT ·Hash64(SB),4,$0-40 8 | MOVQ seed+24(FP), AX 9 | MOVQ buffer_base+0(FP), BX 10 | MOVQ buffer_len+8(FP), CX 11 | MOVQ $3603962101, DX 12 | IMULQ DX, AX 13 | MOVQ $5961697176435608501, DX 14 | ADDQ DX, AX 15 | CMPQ CX, $32 16 | JLT after32 17 | MOVQ AX, DX 18 | MOVQ AX, DI 19 | MOVQ AX, SI 20 | MOVQ AX, BP 21 | loop_begin: 22 | MOVQ 0(BX), R8 23 | MOVQ $3603962101, R9 24 | IMULQ R9, R8 25 | ADDQ R8, DX 26 | RORQ $29, DX 27 | ADDQ SI, DX 28 | MOVQ 8(BX), R8 29 | MOVQ $2729050939, R9 30 | IMULQ R9, R8 31 | ADDQ R8, DI 32 | RORQ $29, DI 33 | ADDQ BP, DI 34 | MOVQ 16(BX), R8 35 | MOVQ $1654206401, R9 36 | IMULQ R9, R8 37 | ADDQ R8, SI 38 | RORQ $29, SI 39 | ADDQ DX, SI 40 | MOVQ 24(BX), R8 41 | MOVQ $817650473, R9 42 | IMULQ R9, R8 43 | ADDQ R8, BP 44 | RORQ $29, BP 45 | ADDQ DI, BP 46 | ADDQ $32, BX 47 | SUBQ $32, CX 48 | CMPQ CX, $32 49 | JGE loop_begin 50 | MOVQ DX, R8 51 | ADDQ BP, R8 52 | MOVQ $3603962101, R9 53 | IMULQ R9, R8 54 | ADDQ DI, R8 55 | RORQ $37, R8 56 | MOVQ $2729050939, R9 57 | IMULQ R9, R8 58 | XORQ R8, SI 59 | MOVQ DI, R8 60 | ADDQ SI, R8 61 | MOVQ $2729050939, R9 62 | IMULQ R9, R8 63 | ADDQ DX, R8 64 | RORQ $37, R8 65 | MOVQ $3603962101, R9 66 | IMULQ R9, R8 67 | XORQ R8, BP 68 | MOVQ DX, R8 69 | ADDQ SI, R8 70 | MOVQ $3603962101, R9 71 | IMULQ R9, R8 72 | ADDQ BP, R8 73 | RORQ $37, R8 74 | MOVQ $2729050939, R9 75 | IMULQ R9, R8 76 | XORQ R8, DX 77 | MOVQ DI, R8 78 | ADDQ BP, R8 79 | MOVQ $2729050939, BP 80 | IMULQ BP, R8 81 | ADDQ SI, R8 82 | RORQ $37, R8 83 | MOVQ $3603962101, SI 84 | IMULQ SI, R8 85 | XORQ R8, DI 86 | XORQ DI, DX 87 | ADDQ DX, AX 88 | after32: 89 | CMPQ CX, $16 90 | JLT after16 91 | MOVQ 0(BX), DX 92 | MOVQ $1654206401, DI 93 | IMULQ DI, DX 94 | ADDQ AX, DX 95 | ADDQ $8, BX 96 | SUBQ $8, CX 97 | RORQ $29, DX 98 | MOVQ $817650473, DI 99 | IMULQ DI, DX 100 | MOVQ 0(BX), DI 101 | MOVQ $1654206401, SI 102 | IMULQ SI, DI 103 | ADDQ AX, DI 104 | ADDQ $8, BX 105 | SUBQ $8, CX 106 | RORQ $29, DI 107 | MOVQ $817650473, SI 108 | IMULQ SI, DI 109 | MOVQ DX, SI 110 | MOVQ $3603962101, BP 111 | IMULQ BP, SI 112 | RORQ $21, SI 113 | ADDQ DI, SI 114 | XORQ SI, DX 115 | MOVQ DI, SI 116 | MOVQ $817650473, BP 117 | IMULQ BP, SI 118 | RORQ $21, SI 119 | ADDQ DX, SI 120 | XORQ SI, DI 121 | ADDQ DI, AX 122 | after16: 123 | CMPQ CX, $8 124 | JLT after8 125 | MOVQ 0(BX), DX 126 | MOVQ $817650473, DI 127 | IMULQ DI, DX 128 | ADDQ DX, AX 129 | ADDQ $8, BX 130 | SUBQ $8, CX 131 | MOVQ AX, DX 132 | RORQ $55, DX 133 | MOVQ $2729050939, DI 134 | IMULQ DI, DX 135 | XORQ DX, AX 136 | after8: 137 | CMPQ CX, $4 138 | JLT after4 139 | XORQ DX, DX 140 | MOVL 0(BX), DX 141 | MOVQ $817650473, DI 142 | IMULQ DI, DX 143 | ADDQ DX, AX 144 | ADDQ $4, BX 145 | SUBQ $4, CX 146 | MOVQ AX, DX 147 | RORQ $26, DX 148 | MOVQ $2729050939, DI 149 | IMULQ DI, DX 150 | XORQ DX, AX 151 | after4: 152 | CMPQ CX, $2 153 | JLT after2 154 | XORQ DX, DX 155 | MOVW 0(BX), DX 156 | MOVQ $817650473, DI 157 | IMULQ DI, DX 158 | ADDQ DX, AX 159 | ADDQ $2, BX 160 | SUBQ $2, CX 161 | MOVQ AX, DX 162 | RORQ $48, DX 163 | MOVQ $2729050939, DI 164 | IMULQ DI, DX 165 | XORQ DX, AX 166 | after2: 167 | CMPQ CX, $1 168 | JLT after1 169 | MOVBQZX 0(BX), BX 170 | MOVQ $817650473, CX 171 | IMULQ CX, BX 172 | ADDQ BX, AX 173 | MOVQ AX, BX 174 | RORQ $37, BX 175 | MOVQ $2729050939, CX 176 | IMULQ CX, BX 177 | XORQ BX, AX 178 | after1: 179 | MOVQ AX, BX 180 | RORQ $28, BX 181 | XORQ BX, AX 182 | MOVQ $3603962101, BX 183 | IMULQ BX, AX 184 | MOVQ AX, BX 185 | RORQ $29, BX 186 | XORQ BX, AX 187 | MOVQ AX, ret+32(FP) 188 | RET 189 | 190 | // func Hash64Str(buffer_base uintptr, buffer_len int64, seed uint64) uint64 191 | TEXT ·Hash64Str(SB),4,$0-32 192 | MOVQ seed+16(FP), AX 193 | MOVQ buffer_base+0(FP), BX 194 | MOVQ buffer_len+8(FP), CX 195 | MOVQ $3603962101, DX 196 | IMULQ DX, AX 197 | MOVQ $5961697176435608501, DX 198 | ADDQ DX, AX 199 | CMPQ CX, $32 200 | JLT after32 201 | MOVQ AX, DX 202 | MOVQ AX, DI 203 | MOVQ AX, SI 204 | MOVQ AX, BP 205 | loop_begin: 206 | MOVQ 0(BX), R8 207 | MOVQ $3603962101, R9 208 | IMULQ R9, R8 209 | ADDQ R8, DX 210 | RORQ $29, DX 211 | ADDQ SI, DX 212 | MOVQ 8(BX), R8 213 | MOVQ $2729050939, R9 214 | IMULQ R9, R8 215 | ADDQ R8, DI 216 | RORQ $29, DI 217 | ADDQ BP, DI 218 | MOVQ 16(BX), R8 219 | MOVQ $1654206401, R9 220 | IMULQ R9, R8 221 | ADDQ R8, SI 222 | RORQ $29, SI 223 | ADDQ DX, SI 224 | MOVQ 24(BX), R8 225 | MOVQ $817650473, R9 226 | IMULQ R9, R8 227 | ADDQ R8, BP 228 | RORQ $29, BP 229 | ADDQ DI, BP 230 | ADDQ $32, BX 231 | SUBQ $32, CX 232 | CMPQ CX, $32 233 | JGE loop_begin 234 | MOVQ DX, R8 235 | ADDQ BP, R8 236 | MOVQ $3603962101, R9 237 | IMULQ R9, R8 238 | ADDQ DI, R8 239 | RORQ $37, R8 240 | MOVQ $2729050939, R9 241 | IMULQ R9, R8 242 | XORQ R8, SI 243 | MOVQ DI, R8 244 | ADDQ SI, R8 245 | MOVQ $2729050939, R9 246 | IMULQ R9, R8 247 | ADDQ DX, R8 248 | RORQ $37, R8 249 | MOVQ $3603962101, R9 250 | IMULQ R9, R8 251 | XORQ R8, BP 252 | MOVQ DX, R8 253 | ADDQ SI, R8 254 | MOVQ $3603962101, R9 255 | IMULQ R9, R8 256 | ADDQ BP, R8 257 | RORQ $37, R8 258 | MOVQ $2729050939, R9 259 | IMULQ R9, R8 260 | XORQ R8, DX 261 | MOVQ DI, R8 262 | ADDQ BP, R8 263 | MOVQ $2729050939, BP 264 | IMULQ BP, R8 265 | ADDQ SI, R8 266 | RORQ $37, R8 267 | MOVQ $3603962101, SI 268 | IMULQ SI, R8 269 | XORQ R8, DI 270 | XORQ DI, DX 271 | ADDQ DX, AX 272 | after32: 273 | CMPQ CX, $16 274 | JLT after16 275 | MOVQ 0(BX), DX 276 | MOVQ $1654206401, DI 277 | IMULQ DI, DX 278 | ADDQ AX, DX 279 | ADDQ $8, BX 280 | SUBQ $8, CX 281 | RORQ $29, DX 282 | MOVQ $817650473, DI 283 | IMULQ DI, DX 284 | MOVQ 0(BX), DI 285 | MOVQ $1654206401, SI 286 | IMULQ SI, DI 287 | ADDQ AX, DI 288 | ADDQ $8, BX 289 | SUBQ $8, CX 290 | RORQ $29, DI 291 | MOVQ $817650473, SI 292 | IMULQ SI, DI 293 | MOVQ DX, SI 294 | MOVQ $3603962101, BP 295 | IMULQ BP, SI 296 | RORQ $21, SI 297 | ADDQ DI, SI 298 | XORQ SI, DX 299 | MOVQ DI, SI 300 | MOVQ $817650473, BP 301 | IMULQ BP, SI 302 | RORQ $21, SI 303 | ADDQ DX, SI 304 | XORQ SI, DI 305 | ADDQ DI, AX 306 | after16: 307 | CMPQ CX, $8 308 | JLT after8 309 | MOVQ 0(BX), DX 310 | MOVQ $817650473, DI 311 | IMULQ DI, DX 312 | ADDQ DX, AX 313 | ADDQ $8, BX 314 | SUBQ $8, CX 315 | MOVQ AX, DX 316 | RORQ $55, DX 317 | MOVQ $2729050939, DI 318 | IMULQ DI, DX 319 | XORQ DX, AX 320 | after8: 321 | CMPQ CX, $4 322 | JLT after4 323 | XORQ DX, DX 324 | MOVL 0(BX), DX 325 | MOVQ $817650473, DI 326 | IMULQ DI, DX 327 | ADDQ DX, AX 328 | ADDQ $4, BX 329 | SUBQ $4, CX 330 | MOVQ AX, DX 331 | RORQ $26, DX 332 | MOVQ $2729050939, DI 333 | IMULQ DI, DX 334 | XORQ DX, AX 335 | after4: 336 | CMPQ CX, $2 337 | JLT after2 338 | XORQ DX, DX 339 | MOVW 0(BX), DX 340 | MOVQ $817650473, DI 341 | IMULQ DI, DX 342 | ADDQ DX, AX 343 | ADDQ $2, BX 344 | SUBQ $2, CX 345 | MOVQ AX, DX 346 | RORQ $48, DX 347 | MOVQ $2729050939, DI 348 | IMULQ DI, DX 349 | XORQ DX, AX 350 | after2: 351 | CMPQ CX, $1 352 | JLT after1 353 | MOVBQZX 0(BX), BX 354 | MOVQ $817650473, CX 355 | IMULQ CX, BX 356 | ADDQ BX, AX 357 | MOVQ AX, BX 358 | RORQ $37, BX 359 | MOVQ $2729050939, CX 360 | IMULQ CX, BX 361 | XORQ BX, AX 362 | after1: 363 | MOVQ AX, BX 364 | RORQ $28, BX 365 | XORQ BX, AX 366 | MOVQ $3603962101, BX 367 | IMULQ BX, AX 368 | MOVQ AX, BX 369 | RORQ $29, BX 370 | XORQ BX, AX 371 | MOVQ AX, ret+24(FP) 372 | RET 373 | -------------------------------------------------------------------------------- /cuckoofilter.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) linvon 3 | * Date 2021/2/18 10:29 4 | */ 5 | 6 | package cuckoo 7 | 8 | import ( 9 | "bytes" 10 | "encoding/binary" 11 | "errors" 12 | "fmt" 13 | "io" 14 | 15 | "github.com/dgryski/go-metro" 16 | ) 17 | 18 | // maximum number of cuckoo kicks before claiming failure 19 | const kMaxCuckooCount uint = 500 20 | 21 | const ( 22 | // TableTypeSingle normal single table 23 | TableTypeSingle = 0 24 | // TableTypePacked packed table, use semi-sort to save 1 bit per item 25 | TableTypePacked = 1 26 | ) 27 | 28 | type table interface { 29 | Init(tagsPerBucket, bitsPerTag, num uint, initialBucketsHint []byte) error 30 | NumBuckets() uint 31 | FindTagInBuckets(i1, i2 uint, tag uint32) bool 32 | DeleteTagFromBucket(i uint, tag uint32) bool 33 | InsertTagToBucket(i uint, tag uint32, kickOut bool, oldTag *uint32) bool 34 | SizeInTags() uint 35 | SizeInBytes() uint 36 | Info() string 37 | BitsPerItem() uint 38 | Reader() (io.Reader, uint) 39 | Decode([]byte) error 40 | Reset() 41 | } 42 | 43 | func getTable(tableType uint) interface{} { 44 | switch tableType { 45 | case TableTypePacked: 46 | return NewPackedTable() 47 | default: 48 | return NewSingleTable() 49 | } 50 | } 51 | 52 | type victimCache struct { 53 | index uint 54 | tag uint32 55 | used bool 56 | } 57 | 58 | const filterMetadataSize = 3*bytesPerUint32 + 1 59 | 60 | // Filter cuckoo filter type struct 61 | type Filter struct { 62 | victim victimCache 63 | numItems uint 64 | table table 65 | } 66 | 67 | //NewFilter return a new initialized filter 68 | /* 69 | tagsPerBucket: num of tags for each bucket, which is b in paper. tag is fingerprint, which is f in paper. 70 | bitPerItem: num of bits for each item, which is length of tag(fingerprint) 71 | maxNumKeys: num of keys that filter will store. this value should close to and lower 72 | nextPow2(maxNumKeys/tagsPerBucket) * maxLoadFactor. cause table.NumBuckets is always a power of two 73 | */ 74 | func NewFilter(tagsPerBucket, bitsPerItem, maxNumKeys, tableType uint) *Filter { 75 | numBuckets := getNextPow2(uint64(maxNumKeys / tagsPerBucket)) 76 | if float64(maxNumKeys)/float64(numBuckets*tagsPerBucket) > maxLoadFactor(tagsPerBucket) { 77 | numBuckets <<= 1 78 | } 79 | if numBuckets == 0 { 80 | numBuckets = 1 81 | } 82 | table := getTable(tableType).(table) 83 | _ = table.Init(tagsPerBucket, bitsPerItem, numBuckets, nil) 84 | return &Filter{ 85 | table: table, 86 | } 87 | } 88 | 89 | func (f *Filter) indexHash(hv uint32) uint { 90 | // table.NumBuckets is always a power of two, so modulo can be replaced with bitwise-and: 91 | return uint(hv) & (f.table.NumBuckets() - 1) 92 | } 93 | 94 | func (f *Filter) tagHash(hv uint32) uint32 { 95 | return hv%((1<> 32)) 101 | tag = f.tagHash(uint32(hash)) 102 | return 103 | } 104 | 105 | func (f *Filter) altIndex(index uint, tag uint32) uint { 106 | // 0x5bd1e995 is the hash constant from MurmurHash2 107 | return f.indexHash(uint32(index) ^ (tag * 0x5bd1e995)) 108 | } 109 | 110 | // Size return num of items that filter store 111 | func (f *Filter) Size() uint { 112 | var c uint 113 | if f.victim.used { 114 | c = 1 115 | } 116 | return f.numItems + c 117 | } 118 | 119 | // LoadFactor return current filter's loadFactor 120 | func (f *Filter) LoadFactor() float64 { 121 | return 1.0 * float64(f.Size()) / float64(f.table.SizeInTags()) 122 | } 123 | 124 | // SizeInBytes return bytes occupancy of filter's table 125 | func (f *Filter) SizeInBytes() uint { 126 | return f.table.SizeInBytes() 127 | } 128 | 129 | // BitsPerItem return bits occupancy per item of filter's table 130 | func (f *Filter) BitsPerItem() float64 { 131 | return 8.0 * float64(f.table.SizeInBytes()) / float64(f.Size()) 132 | } 133 | 134 | // Add add an item into filter, return false when filter is full 135 | func (f *Filter) Add(item []byte) bool { 136 | if f.victim.used { 137 | return false 138 | } 139 | i, tag := f.generateIndexTagHash(item) 140 | return f.addImpl(i, tag) 141 | } 142 | 143 | // AddUnique add an item into filter, return false when filter already contains it or filter is full 144 | func (f *Filter) AddUnique(item []byte) bool { 145 | if f.Contain(item) { 146 | return false 147 | } 148 | return f.Add(item) 149 | } 150 | 151 | func (f *Filter) addImpl(i uint, tag uint32) bool { 152 | curIndex := i 153 | curTag := tag 154 | var oldTag uint32 155 | 156 | var count uint 157 | var kickOut bool 158 | for count = 0; count < kMaxCuckooCount; count++ { 159 | kickOut = count > 0 160 | oldTag = 0 161 | if f.table.InsertTagToBucket(curIndex, curTag, kickOut, &oldTag) { 162 | f.numItems++ 163 | return true 164 | } 165 | if kickOut { 166 | curTag = oldTag 167 | } 168 | curIndex = f.altIndex(curIndex, curTag) 169 | } 170 | 171 | f.victim.index = curIndex 172 | f.victim.tag = curTag 173 | f.victim.used = true 174 | return true 175 | } 176 | 177 | // Contain return if filter contains an item 178 | func (f *Filter) Contain(key []byte) bool { 179 | i1, tag := f.generateIndexTagHash(key) 180 | i2 := f.altIndex(i1, tag) 181 | 182 | hit := f.victim.used && tag == f.victim.tag && (i1 == f.victim.index || i2 == f.victim.index) 183 | 184 | if hit || f.table.FindTagInBuckets(i1, i2, tag) { 185 | return true 186 | } 187 | return false 188 | } 189 | 190 | // Delete delete item from filter, return false when item not exist 191 | func (f *Filter) Delete(key []byte) bool { 192 | i1, tag := f.generateIndexTagHash(key) 193 | i2 := f.altIndex(i1, tag) 194 | 195 | if f.table.DeleteTagFromBucket(i1, tag) || f.table.DeleteTagFromBucket(i2, tag) { 196 | f.numItems-- 197 | goto TryEliminateVictim 198 | } else if f.victim.used && tag == f.victim.tag && (i1 == f.victim.index || i2 == f.victim.index) { 199 | f.victim.used = false 200 | return true 201 | } else { 202 | return false 203 | } 204 | 205 | TryEliminateVictim: 206 | if f.victim.used { 207 | f.victim.used = false 208 | i := f.victim.index 209 | tag = f.victim.tag 210 | f.addImpl(i, tag) 211 | } 212 | return true 213 | } 214 | 215 | // Reset reset the filter 216 | func (f *Filter) Reset() { 217 | f.table.Reset() 218 | f.numItems = 0 219 | f.victim.index = 0 220 | f.victim.tag = 0 221 | f.victim.used = false 222 | } 223 | 224 | // FalsePositiveRate return the False Positive Rate of filter 225 | // Notice that this will reset filter 226 | func (f *Filter) FalsePositiveRate() float64 { 227 | n1 := make([]byte, 4) 228 | f.Reset() 229 | n := f.table.SizeInTags() 230 | for i := uint32(0); i < uint32(n); i++ { 231 | binary.BigEndian.PutUint32(n1, i) 232 | f.Add(n1) 233 | } 234 | var rounds uint32 = 100000 235 | fp := 0 236 | for i := uint32(0); i < rounds; i++ { 237 | binary.BigEndian.PutUint32(n1, i+uint32(n)+1) 238 | if f.Contain(n1) { 239 | fp++ 240 | } 241 | } 242 | f.Reset() 243 | return float64(fp) / float64(rounds) 244 | } 245 | 246 | // Info return filter's detail info 247 | func (f *Filter) Info() string { 248 | return fmt.Sprintf("CuckooFilter Status:\n"+ 249 | "\t\t%v\n"+ 250 | "\t\tKeys stored: %v\n"+ 251 | "\t\tLoad factor: %v\n"+ 252 | "\t\tHashtable size: %v KB\n"+ 253 | "\t\tbit/key: %v\n", 254 | f.table.Info(), f.Size(), f.LoadFactor(), f.table.SizeInBytes()>>10, f.BitsPerItem()) 255 | } 256 | 257 | // Encode returns a byte slice representing a Cuckoo filter 258 | func (f *Filter) Encode() ([]byte, error) { 259 | filterReader, filterSize := f.EncodeReader() 260 | buf := make([]byte, filterSize) 261 | if _, err := io.ReadFull(filterReader, buf); err != nil { 262 | return nil, err 263 | } 264 | return buf, nil 265 | } 266 | 267 | // EncodeReader returns a reader representing a Cuckoo filter 268 | func (f *Filter) EncodeReader() (io.Reader, uint) { 269 | var metadata [filterMetadataSize]byte 270 | 271 | for i, n := range []uint32{uint32(f.numItems), uint32(f.victim.index), f.victim.tag} { 272 | binary.LittleEndian.PutUint32(metadata[i*bytesPerUint32:], n) 273 | } 274 | 275 | victimUsed := byte(0) 276 | if f.victim.used { 277 | victimUsed = byte(1) 278 | } 279 | metadata[bytesPerUint32*3] = victimUsed 280 | tableReader, tableEncodedSize := f.table.Reader() 281 | return io.MultiReader(bytes.NewReader(metadata[:]), tableReader), uint(len(metadata)) + tableEncodedSize 282 | } 283 | 284 | // Decode returns a Cuckoo Filter using a copy of the provided byte slice. 285 | func Decode(b []byte) (*Filter, error) { 286 | copiedBytes := make([]byte, len(b)) 287 | copy(copiedBytes, b) 288 | return DecodeFrom(copiedBytes) 289 | } 290 | 291 | // DecodeFrom returns a Cuckoo Filter using the exact provided byte slice (no copy). 292 | func DecodeFrom(b []byte) (*Filter, error) { 293 | if len(b) < 20 { 294 | return nil, errors.New("unexpected bytes length") 295 | } 296 | numItems := uint(binary.LittleEndian.Uint32(b[0*bytesPerUint32:])) 297 | curIndex := uint(binary.LittleEndian.Uint32(b[1*bytesPerUint32:])) 298 | curTag := binary.LittleEndian.Uint32(b[2*1*bytesPerUint32:]) 299 | used := b[12] == byte(1) 300 | tableType := uint(b[13]) 301 | table := getTable(tableType).(table) 302 | if err := table.Decode(b[13:]); err != nil { 303 | return nil, err 304 | } 305 | return &Filter{ 306 | table: table, 307 | numItems: numItems, 308 | victim: victimCache{ 309 | index: curIndex, 310 | tag: curTag, 311 | used: used, 312 | }, 313 | }, nil 314 | } 315 | -------------------------------------------------------------------------------- /packedtable.go: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) linvon 3 | * Date 2021/2/18 10:29 4 | */ 5 | 6 | package cuckoo 7 | 8 | import ( 9 | "bytes" 10 | "encoding/binary" 11 | "fmt" 12 | "io" 13 | "math/rand" 14 | ) 15 | 16 | // PackedTable using Permutation encoding to save 1 bit per tag 17 | type PackedTable struct { 18 | kDirBitsPerTag uint 19 | kBitsPerBucket uint 20 | kBytesPerBucket uint 21 | kDirBitsMask uint32 22 | bitsPerTag uint 23 | 24 | len uint 25 | numBuckets uint 26 | buckets []byte 27 | perm PermEncoding 28 | } 29 | 30 | // NewPackedTable return a packedTable 31 | func NewPackedTable() *PackedTable { 32 | return &PackedTable{} 33 | } 34 | 35 | const ( 36 | cFpSize = 4 37 | tagsPerPTable = 4 38 | codeSize = 12 39 | ) 40 | 41 | // Init init table 42 | func (p *PackedTable) Init(_, bitsPerTag, num uint, initialBucketsHint []byte) error { 43 | p.bitsPerTag = bitsPerTag 44 | p.numBuckets = num 45 | 46 | p.kDirBitsPerTag = p.bitsPerTag - cFpSize 47 | p.kBitsPerBucket = (p.bitsPerTag - 1) * tagsPerPTable 48 | p.kBytesPerBucket = (p.kBitsPerBucket + 7) >> 3 49 | p.kDirBitsMask = ((1 << p.kDirBitsPerTag) - 1) << cFpSize 50 | // NOTE: use 7 extra bytes to avoid overrun as we always read a uint64 51 | p.len = (p.kBitsPerBucket*p.numBuckets+7)>>3 + 7 52 | buckets, err := getBucketsFromHint(initialBucketsHint, p.len) 53 | if err != nil { 54 | return err 55 | } 56 | p.buckets = buckets 57 | p.perm.Init() 58 | return nil 59 | } 60 | 61 | // NumBuckets return num of table buckets 62 | func (p *PackedTable) NumBuckets() uint { 63 | return p.numBuckets 64 | } 65 | 66 | // SizeInTags return num of tags that table can store 67 | func (p *PackedTable) SizeInTags() uint { 68 | return tagsPerPTable * p.numBuckets 69 | } 70 | 71 | // SizeInBytes return bytes occupancy of table 72 | func (p *PackedTable) SizeInBytes() uint { 73 | return p.len 74 | } 75 | 76 | // BitsPerItem return bits occupancy per item of table 77 | func (p *PackedTable) BitsPerItem() uint { 78 | return p.bitsPerTag 79 | } 80 | 81 | // PrintBucket print a bucket 82 | func (p *PackedTable) PrintBucket(i uint) { 83 | pos := p.kBitsPerBucket * i / bitsPerByte 84 | fmt.Printf("\tbucketBits =%x\n", p.buckets[pos:pos+p.kBytesPerBucket]) 85 | var tags [tagsPerPTable]uint32 86 | p.ReadBucket(i, &tags) 87 | p.PrintTags(tags) 88 | } 89 | 90 | // PrintTags print tags 91 | func (p *PackedTable) PrintTags(tags [tagsPerPTable]uint32) { 92 | var lowBits [tagsPerPTable]uint8 93 | var dirBits [tagsPerPTable]uint32 94 | for j := 0; j < tagsPerPTable; j++ { 95 | lowBits[j] = uint8(tags[j] & 0x0f) 96 | dirBits[j] = (tags[j] & p.kDirBitsMask) >> cFpSize 97 | } 98 | codeword := p.perm.Encode(lowBits) 99 | fmt.Printf("\tcodeword =%x\n", codeword) 100 | for j := 0; j < tagsPerPTable; j++ { 101 | fmt.Printf("\ttag[%v]: %x lowBits=%x dirBits=%x\n", j, tags[j], lowBits[j], dirBits[j]) 102 | } 103 | } 104 | 105 | func (p *PackedTable) sortPair(a, b *uint32) { 106 | if (*a & 0x0f) > (*b & 0x0f) { 107 | *a, *b = *b, *a 108 | } 109 | } 110 | 111 | func (p *PackedTable) sortTags(tags *[tagsPerPTable]uint32) { 112 | p.sortPair(&tags[0], &tags[2]) 113 | p.sortPair(&tags[1], &tags[3]) 114 | p.sortPair(&tags[0], &tags[1]) 115 | p.sortPair(&tags[2], &tags[3]) 116 | p.sortPair(&tags[1], &tags[2]) 117 | } 118 | 119 | // ReadBucket read and decode the bucket i, pass the 4 decoded tags to the 2nd arg 120 | // bucket bits = 12 codeword bits + dir bits of tag1 + dir bits of tag2 ... 121 | func (p *PackedTable) ReadBucket(i uint, tags *[tagsPerPTable]uint32) { 122 | var codeword uint16 123 | var lowBits [tagsPerPTable]uint8 124 | pos := i * p.kBitsPerBucket >> 3 125 | switch p.bitsPerTag { 126 | case 5: 127 | // 1 dirBits per tag, 16 bits per bucket 128 | bucketBits := uint16(p.buckets[pos]) | uint16(p.buckets[pos+1])<<8 129 | codeword = bucketBits & 0x0fff 130 | tags[0] = uint32(bucketBits>>8) & p.kDirBitsMask 131 | tags[1] = uint32(bucketBits>>9) & p.kDirBitsMask 132 | tags[2] = uint32(bucketBits>>10) & p.kDirBitsMask 133 | tags[3] = uint32(bucketBits>>11) & p.kDirBitsMask 134 | case 6: 135 | // 2 dirBits per tag, 20 bits per bucket 136 | bucketBits := uint32(p.buckets[pos]) | uint32(p.buckets[pos+1])<<8 | uint32(p.buckets[pos+2])<<16 | uint32(p.buckets[pos+3])<<24 137 | codeword = uint16(bucketBits) >> ((i & 1) << 2) & 0x0fff 138 | tags[0] = (bucketBits >> (8 + ((i & 1) << 2))) & p.kDirBitsMask 139 | tags[1] = (bucketBits >> (10 + ((i & 1) << 2))) & p.kDirBitsMask 140 | tags[2] = (bucketBits >> (12 + ((i & 1) << 2))) & p.kDirBitsMask 141 | tags[3] = (bucketBits >> (14 + ((i & 1) << 2))) & p.kDirBitsMask 142 | case 7: 143 | // 3 dirBits per tag, 24 bits per bucket 144 | bucketBits := uint32(p.buckets[pos]) | uint32(p.buckets[pos+1])<<8 | uint32(p.buckets[pos+2])<<16 | uint32(p.buckets[pos+3])<<24 145 | codeword = uint16(bucketBits) & 0x0fff 146 | tags[0] = (bucketBits >> 8) & p.kDirBitsMask 147 | tags[1] = (bucketBits >> 11) & p.kDirBitsMask 148 | tags[2] = (bucketBits >> 14) & p.kDirBitsMask 149 | tags[3] = (bucketBits >> 17) & p.kDirBitsMask 150 | case 8: 151 | // 4 dirBits per tag, 28 bits per bucket 152 | bucketBits := uint32(p.buckets[pos]) | uint32(p.buckets[pos+1])<<8 | uint32(p.buckets[pos+2])<<16 | uint32(p.buckets[pos+3])<<24 153 | codeword = uint16(bucketBits) >> ((i & 1) << 2) & 0x0fff 154 | tags[0] = (bucketBits >> (8 + ((i & 1) << 2))) & p.kDirBitsMask 155 | tags[1] = (bucketBits >> (12 + ((i & 1) << 2))) & p.kDirBitsMask 156 | tags[2] = (bucketBits >> (16 + ((i & 1) << 2))) & p.kDirBitsMask 157 | tags[3] = (bucketBits >> (20 + ((i & 1) << 2))) & p.kDirBitsMask 158 | case 9: 159 | // 5 dirBits per tag, 32 bits per bucket 160 | bucketBits := uint32(p.buckets[pos]) | uint32(p.buckets[pos+1])<<8 | uint32(p.buckets[pos+2])<<16 | uint32(p.buckets[pos+3])<<24 161 | codeword = uint16(bucketBits) & 0x0fff 162 | tags[0] = (bucketBits >> 8) & p.kDirBitsMask 163 | tags[1] = (bucketBits >> 13) & p.kDirBitsMask 164 | tags[2] = (bucketBits >> 18) & p.kDirBitsMask 165 | tags[3] = (bucketBits >> 23) & p.kDirBitsMask 166 | case 13: 167 | // 9 dirBits per tag, 48 bits per bucket 168 | bucketBits := uint64(p.buckets[pos]) | uint64(p.buckets[pos+1])<<8 | uint64(p.buckets[pos+2])<<16 | uint64(p.buckets[pos+3])<<24 | 169 | uint64(p.buckets[pos+4])<<32 | uint64(p.buckets[pos+5])<<40 | uint64(p.buckets[pos+6])<<48 | uint64(p.buckets[pos+7])<<56 170 | codeword = uint16(bucketBits) & 0x0fff 171 | tags[0] = uint32((bucketBits)>>8) & p.kDirBitsMask 172 | tags[1] = uint32((bucketBits)>>17) & p.kDirBitsMask 173 | tags[2] = uint32((bucketBits)>>26) & p.kDirBitsMask 174 | tags[3] = uint32((bucketBits)>>35) & p.kDirBitsMask 175 | case 17: 176 | // 13 dirBits per tag, 64 bits per bucket 177 | bucketBits := uint64(p.buckets[pos]) | uint64(p.buckets[pos+1])<<8 | uint64(p.buckets[pos+2])<<16 | uint64(p.buckets[pos+3])<<24 | 178 | uint64(p.buckets[pos+4])<<32 | uint64(p.buckets[pos+5])<<40 | uint64(p.buckets[pos+6])<<48 | uint64(p.buckets[pos+7])<<56 179 | codeword = uint16(bucketBits) & 0x0fff 180 | tags[0] = uint32((bucketBits)>>8) & p.kDirBitsMask 181 | tags[1] = uint32((bucketBits)>>21) & p.kDirBitsMask 182 | tags[2] = uint32((bucketBits)>>34) & p.kDirBitsMask 183 | tags[3] = uint32((bucketBits)>>47) & p.kDirBitsMask 184 | default: 185 | u1, u2, rShift := p.readOutBytes(i, pos) 186 | codeword = uint16(u1>>rShift) & 0x0fff 187 | for k := 0; k < tagsPerPTable; k++ { 188 | tags[k] = uint32(u1 >> rShift >> (codeSize - cFpSize + k*int(p.kDirBitsPerTag))) 189 | shift := codeSize - cFpSize + k*int(p.kDirBitsPerTag) - 64 + int(rShift) 190 | if shift < 0 { 191 | tags[k] |= uint32(u2 << -shift) 192 | } else { 193 | tags[k] |= uint32(u2 >> shift) 194 | } 195 | tags[k] &= p.kDirBitsMask 196 | } 197 | } 198 | 199 | /* codeword is the lowest 12 bits in the bucket */ 200 | p.perm.Decode(codeword, &lowBits) 201 | 202 | tags[0] |= uint32(lowBits[0]) 203 | tags[1] |= uint32(lowBits[1]) 204 | tags[2] |= uint32(lowBits[2]) 205 | tags[3] |= uint32(lowBits[3]) 206 | } 207 | 208 | func (p *PackedTable) readOutBytes(i, pos uint) (uint64, uint64, uint) { 209 | rShift := (p.kBitsPerBucket * i) & (bitsPerByte - 1) 210 | // tag is max 32bit, store 31bit per tag, so max occupies 16 bytes 211 | kBytes := (rShift + p.kBitsPerBucket + 7) / bitsPerByte 212 | 213 | var u1, u2 uint64 214 | for k := uint(0); k < kBytes; k++ { 215 | if k < bytesPerUint64 { 216 | u1 |= uint64(p.buckets[pos+k]) << (k * bitsPerByte) 217 | } else { 218 | u2 |= uint64(p.buckets[pos+k]) << ((k - bytesPerUint64) * bitsPerByte) 219 | } 220 | } 221 | 222 | return u1, u2, rShift 223 | } 224 | 225 | // WriteBucket write tags into bucket i 226 | func (p *PackedTable) WriteBucket(i uint, tags [tagsPerPTable]uint32) { 227 | p.sortTags(&tags) 228 | 229 | /* put in direct bits for each tag*/ 230 | var lowBits [tagsPerPTable]uint8 231 | var highBits [tagsPerPTable]uint32 232 | 233 | lowBits[0] = uint8(tags[0] & 0x0f) 234 | lowBits[1] = uint8(tags[1] & 0x0f) 235 | lowBits[2] = uint8(tags[2] & 0x0f) 236 | lowBits[3] = uint8(tags[3] & 0x0f) 237 | 238 | highBits[0] = tags[0] & 0xfffffff0 239 | highBits[1] = tags[1] & 0xfffffff0 240 | highBits[2] = tags[2] & 0xfffffff0 241 | highBits[3] = tags[3] & 0xfffffff0 242 | // note that : tags[j] = lowBits[j] | highBits[j] 243 | 244 | codeword := p.perm.Encode(lowBits) 245 | pos := i * p.kBitsPerBucket >> 3 246 | switch p.kBitsPerBucket { 247 | case 16: 248 | // 1 dirBits per tag 249 | v := codeword | uint16(highBits[0]<<8) | uint16(highBits[1]<<9) | 250 | uint16(highBits[2]<<10) | uint16(highBits[3]<<11) 251 | p.buckets[pos] = byte(v) 252 | p.buckets[pos+1] = byte(v >> 8) 253 | case 20: 254 | // 2 dirBits per tag 255 | var v uint32 256 | v = uint32(p.buckets[pos]) | uint32(p.buckets[pos+1])<<8 | uint32(p.buckets[pos+2])<<16 | uint32(p.buckets[pos+3])<<24 257 | if (i & 0x0001) == 0 { 258 | v &= 0xfff00000 259 | v |= uint32(codeword) | (highBits[0] << 8) | 260 | (highBits[1] << 10) | (highBits[2] << 12) | 261 | (highBits[3] << 14) 262 | } else { 263 | v &= 0xff00000f 264 | v |= uint32(codeword)<<4 | (highBits[0] << 12) | 265 | (highBits[1] << 14) | (highBits[2] << 16) | 266 | (highBits[3] << 18) 267 | } 268 | p.buckets[pos] = byte(v) 269 | p.buckets[pos+1] = byte(v >> 8) 270 | p.buckets[pos+2] = byte(v >> 16) 271 | p.buckets[pos+3] = byte(v >> 24) 272 | case 24: 273 | // 3 dirBits per tag 274 | var v uint32 275 | v = uint32(p.buckets[pos]) | uint32(p.buckets[pos+1])<<8 | uint32(p.buckets[pos+2])<<16 | uint32(p.buckets[pos+3])<<24 276 | v &= 0xff000000 277 | v |= uint32(codeword) | (highBits[0] << 8) | (highBits[1] << 11) | 278 | (highBits[2] << 14) | (highBits[3] << 17) 279 | p.buckets[pos] = byte(v) 280 | p.buckets[pos+1] = byte(v >> 8) 281 | p.buckets[pos+2] = byte(v >> 16) 282 | p.buckets[pos+3] = byte(v >> 24) 283 | case 28: 284 | // 4 dirBits per tag 285 | var v uint32 286 | v = uint32(p.buckets[pos]) | uint32(p.buckets[pos+1])<<8 | uint32(p.buckets[pos+2])<<16 | uint32(p.buckets[pos+3])<<24 287 | if (i & 0x0001) == 0 { 288 | v &= 0xf0000000 289 | v |= uint32(codeword) | (highBits[0] << 8) | 290 | (highBits[1] << 12) | (highBits[2] << 16) | 291 | (highBits[3] << 20) 292 | } else { 293 | v &= 0x0000000f 294 | v |= uint32(codeword)<<4 | (highBits[0] << 12) | 295 | (highBits[1] << 16) | (highBits[2] << 20) | 296 | (highBits[3] << 24) 297 | } 298 | p.buckets[pos] = byte(v) 299 | p.buckets[pos+1] = byte(v >> 8) 300 | p.buckets[pos+2] = byte(v >> 16) 301 | p.buckets[pos+3] = byte(v >> 24) 302 | case 32: 303 | // 5 dirBits per tag 304 | v := uint32(codeword) | (highBits[0] << 8) | (highBits[1] << 13) | 305 | (highBits[2] << 18) | (highBits[3] << 23) 306 | p.buckets[pos] = byte(v) 307 | p.buckets[pos+1] = byte(v >> 8) 308 | p.buckets[pos+2] = byte(v >> 16) 309 | p.buckets[pos+3] = byte(v >> 24) 310 | case 48: 311 | // 9 dirBits per tag 312 | var v uint64 313 | v = uint64(p.buckets[pos]) | uint64(p.buckets[pos+1])<<8 | uint64(p.buckets[pos+2])<<16 | uint64(p.buckets[pos+3])<<24 | 314 | uint64(p.buckets[pos+4])<<32 | uint64(p.buckets[pos+5])<<40 | uint64(p.buckets[pos+6])<<48 | uint64(p.buckets[pos+7])<<56 315 | v &= 0xffff000000000000 316 | v |= uint64(codeword) | uint64(highBits[0])<<8 | 317 | uint64(highBits[1])<<17 | uint64(highBits[2])<<26 | 318 | uint64(highBits[3])<<35 319 | p.buckets[pos] = byte(v) 320 | p.buckets[pos+1] = byte(v >> 8) 321 | p.buckets[pos+2] = byte(v >> 16) 322 | p.buckets[pos+3] = byte(v >> 24) 323 | p.buckets[pos+4] = byte(v >> 32) 324 | p.buckets[pos+5] = byte(v >> 40) 325 | p.buckets[pos+6] = byte(v >> 48) 326 | p.buckets[pos+7] = byte(v >> 56) 327 | case 64: 328 | // 13 dirBits per tag 329 | v := uint64(codeword) | uint64(highBits[0])<<8 | 330 | uint64(highBits[1])<<21 | uint64(highBits[2])<<34 | 331 | uint64(highBits[3])<<47 332 | p.buckets[pos] = byte(v) 333 | p.buckets[pos+1] = byte(v >> 8) 334 | p.buckets[pos+2] = byte(v >> 16) 335 | p.buckets[pos+3] = byte(v >> 24) 336 | p.buckets[pos+4] = byte(v >> 32) 337 | p.buckets[pos+5] = byte(v >> 40) 338 | p.buckets[pos+6] = byte(v >> 48) 339 | p.buckets[pos+7] = byte(v >> 56) 340 | default: 341 | p.writeInBytes(i, pos, codeword, highBits) 342 | } 343 | } 344 | 345 | func (p *PackedTable) writeInBytes(i, pos uint, codeword uint16, highBits [tagsPerPTable]uint32) { 346 | rShift := (p.kBitsPerBucket * i) & (bitsPerByte - 1) 347 | lShift := (rShift + p.kBitsPerBucket) & (bitsPerByte - 1) 348 | // tag is max 32bit, store 31bit per tag, so max occupies 16 bytes 349 | kBytes := (rShift + p.kBitsPerBucket + 7) / bitsPerByte 350 | 351 | rMask := uint8(0xff) >> (bitsPerByte - rShift) 352 | lMask := uint8(0xff) << lShift 353 | if lShift == 0 { 354 | lMask = uint8(0) 355 | } 356 | 357 | var u1, u2 uint64 358 | u1 |= uint64(p.buckets[pos] & rMask) 359 | end := kBytes - 1 360 | if kBytes > bytesPerUint64 { 361 | u2 |= uint64(p.buckets[pos+end]&lMask) << ((end - bytesPerUint64) * bitsPerByte) 362 | } else { 363 | u1 |= uint64(p.buckets[pos+end]&lMask) << (end * bitsPerByte) 364 | } 365 | 366 | u1 |= uint64(codeword) << rShift 367 | for k := 0; k < tagsPerPTable; k++ { 368 | u1 |= uint64(highBits[k]) << (codeSize - cFpSize + k*int(p.kDirBitsPerTag)) << rShift 369 | shift := codeSize - cFpSize + k*int(p.kDirBitsPerTag) - 64 + int(rShift) 370 | if shift < 0 { 371 | u2 |= uint64(highBits[k]) >> -shift 372 | } else { 373 | u2 |= uint64(highBits[k]) << shift 374 | } 375 | } 376 | 377 | for k := uint(0); k < kBytes; k++ { 378 | if k < bytesPerUint64 { 379 | p.buckets[pos+k] = byte(u1 >> (k * bitsPerByte)) 380 | } else { 381 | p.buckets[pos+k] = byte(u2 >> ((k - bytesPerUint64) * bitsPerByte)) 382 | } 383 | } 384 | 385 | return 386 | } 387 | 388 | // FindTagInBuckets find if tag in bucket i1 i2 389 | func (p *PackedTable) FindTagInBuckets(i1, i2 uint, tag uint32) bool { 390 | var tags1, tags2 [tagsPerPTable]uint32 391 | p.ReadBucket(i1, &tags1) 392 | p.ReadBucket(i2, &tags2) 393 | 394 | return (tags1[0] == tag) || (tags1[1] == tag) || (tags1[2] == tag) || 395 | (tags1[3] == tag) || (tags2[0] == tag) || (tags2[1] == tag) || 396 | (tags2[2] == tag) || (tags2[3] == tag) 397 | } 398 | 399 | // DeleteTagFromBucket delete tag from bucket i 400 | func (p *PackedTable) DeleteTagFromBucket(i uint, tag uint32) bool { 401 | var tags [tagsPerPTable]uint32 402 | p.ReadBucket(i, &tags) 403 | for j := 0; j < tagsPerPTable; j++ { 404 | if tags[j] == tag { 405 | tags[j] = 0 406 | p.WriteBucket(i, tags) 407 | return true 408 | } 409 | } 410 | return false 411 | } 412 | 413 | // InsertTagToBucket insert tag into bucket i 414 | func (p *PackedTable) InsertTagToBucket(i uint, tag uint32, kickOut bool, oldTag *uint32) bool { 415 | var tags [tagsPerPTable]uint32 416 | p.ReadBucket(i, &tags) 417 | for j := 0; j < tagsPerPTable; j++ { 418 | if tags[j] == 0 { 419 | tags[j] = tag 420 | p.WriteBucket(i, tags) 421 | return true 422 | } 423 | } 424 | if kickOut { 425 | r := uint(rand.Int31()) & 3 426 | *oldTag = tags[r] 427 | tags[r] = tag 428 | p.WriteBucket(i, tags) 429 | } 430 | return false 431 | } 432 | 433 | // Reset reset table 434 | func (p *PackedTable) Reset() { 435 | for i := range p.buckets { 436 | p.buckets[i] = 0 437 | } 438 | } 439 | 440 | // Info return table's info 441 | func (p *PackedTable) Info() string { 442 | return fmt.Sprintf("PackedHashtable with tag size: %v bits \n"+ 443 | "\t\t4 packed bits(3 bits after compression) and %v direct bits\n"+ 444 | "\t\tAssociativity: 4 \n"+ 445 | "\t\tTotal # of rows: %v\n"+ 446 | "\t\tTotal # slots: %v\n", 447 | p.bitsPerTag, p.kDirBitsPerTag, p.numBuckets, p.SizeInTags()) 448 | } 449 | 450 | const packedTableMetadataSize = 2+bytesPerUint32 451 | 452 | // Encode returns a byte slice representing a TableBucket 453 | func (p *PackedTable) Reader() (io.Reader, uint) { 454 | var metadata [packedTableMetadataSize]byte 455 | metadata[0] = uint8(TableTypePacked) 456 | metadata[1] = uint8(p.bitsPerTag) 457 | binary.LittleEndian.PutUint32(metadata[2:], uint32(p.numBuckets)) 458 | return io.MultiReader(bytes.NewReader(metadata[:]), bytes.NewReader(p.buckets)), uint(len(metadata) + len(p.buckets)) 459 | } 460 | 461 | // Decode parse a byte slice into a TableBucket 462 | func (p *PackedTable) Decode(b []byte) error { 463 | bitsPerTag := uint(b[1]) 464 | numBuckets := uint(binary.LittleEndian.Uint32(b[2:])) 465 | return p.Init(0, bitsPerTag, numBuckets, b[6:]) 466 | } 467 | --------------------------------------------------------------------------------