├── .gitignore ├── LICENSE ├── README.md └── src ├── bloomfilter.go └── bloomfilter_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | .idea/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Wells Jia 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bloomfilter_go 2 | ============ 3 | 4 | A bloomfilter implementation uses the Fnv hash manually. 5 | -------------------------------------------------------------------------------- /src/bloomfilter.go: -------------------------------------------------------------------------------- 1 | package bloomfilter 2 | 3 | import ( 4 | "hash/fnv" 5 | "math" 6 | ) 7 | 8 | //type Bloom interface { 9 | // Add(v string) 10 | // Locations(v string) []uint 11 | // Test(v string) bool 12 | // Size() float64 13 | //} 14 | 15 | type BloomFilter struct { 16 | arrayBuffer []bool 17 | _locations []uint 18 | bucket []uint 19 | m uint64 20 | k uint 21 | n uint 22 | v uint 23 | } 24 | 25 | func NewBloom(size uint64, k uint) *BloomFilter { 26 | 27 | var n = math.Ceil(float64(size) / 32) 28 | var kbyte = math.Ceil(math.Log(math.Ceil(math.Log(float64(size))/math.Ln2/8)) / math.Ln2) 29 | var i uint = 1 30 | var kbytes = i << uint(kbyte) 31 | var arrayBuffer = make([]bool, uint(kbytes)*k) 32 | var bucket = make([]uint, int(n)) 33 | var _locations = make([]uint, k) 34 | return &BloomFilter{ 35 | arrayBuffer: arrayBuffer, 36 | _locations: _locations, 37 | bucket: bucket, 38 | m: size, 39 | k: k, // we have 3 hash functions for now 40 | n: uint(0), 41 | v: 0, 42 | } 43 | } 44 | 45 | func fingerprint(b []byte) uint64 { 46 | hash := fnv.New64a() 47 | _, _ = hash.Write(b) 48 | return hash.Sum64() 49 | } 50 | 51 | func (bf *BloomFilter) Locations(v string) []uint { 52 | 53 | var k = bf.k 54 | var m = bf.m 55 | var r = bf._locations 56 | 57 | var a = fingerprint([]byte(v)) 58 | var b = fingerprint([]byte(v)) 59 | var x = a % m 60 | var i uint = 0 61 | for ; i < k; i++ { 62 | if x < 0 { 63 | r[i] = uint(x + m) 64 | } else { 65 | r[i] = uint(x) 66 | } 67 | x = (x + b) % m 68 | } 69 | return r 70 | } 71 | 72 | func (bf *BloomFilter) Add(v string) { 73 | var l = bf.Locations(v + "") 74 | var k = bf.k 75 | var bucket = bf.bucket 76 | var i uint = 0 77 | for i = 0; i < k; i++ { 78 | bucket[int(math.Floor(float64(l[i]/32)))] |= 1 << (l[i] % 32) 79 | } 80 | } 81 | 82 | func (bf *BloomFilter) Test(v string) bool { 83 | var l = bf.Locations(v + "") 84 | var k = bf.k 85 | var bucket = bf.bucket 86 | var i uint = 0 87 | for ; i < k; i++ { 88 | var b = l[i] 89 | if (bucket[int(math.Floor(float64(b/32)))] & (1 << (b % 32))) == 0 { 90 | return false 91 | } 92 | } 93 | return true 94 | } 95 | 96 | func (bf *BloomFilter) Size() float64 { 97 | var bucket = bf.bucket 98 | var bits uint = 0 99 | var n = len(bucket) 100 | var result float64 101 | for i := 0; i < n; i++ { 102 | bits += PopCount(bucket[i]) 103 | } 104 | result = -(float64(bf.m) * math.Log(float64(1-bits)/float64(bf.m)) / float64(bf.k)) 105 | return result 106 | } 107 | 108 | func PopCount(v uint) uint { 109 | v -= (v >> 1) & 0x55555555 110 | v = (v & 0x33333333) + ((v >> 2) & 0x33333333) 111 | return ((v + (v>>4)&0xf0f0f0f) * 0x1010101) >> 24 112 | } 113 | -------------------------------------------------------------------------------- /src/bloomfilter_test.go: -------------------------------------------------------------------------------- 1 | package bloomfilter 2 | 3 | import ( 4 | "math/rand" 5 | "sort" 6 | "testing" 7 | ) 8 | 9 | // 100 Million 10 | var MemberSize uint64 = 100000000 11 | 12 | // 100 Thousand 13 | var SampleSize int = 100000 14 | 15 | //the number of hashing functions 16 | var DefaultHashFunctions uint = 3 17 | 18 | func RandomBytes(size int) []byte { 19 | b := make([]byte, size) 20 | rand.Read(b) 21 | return b 22 | } 23 | 24 | func RandomString() string { 25 | return string(RandomBytes(10)) 26 | } 27 | 28 | func TestExistance(t *testing.T) { 29 | bf := NewBloom(MemberSize, DefaultHashFunctions) 30 | 31 | for i := 0; i < SampleSize; i++ { 32 | item := RandomString() 33 | bf.Add(item) 34 | 35 | if bf.Test(item) != true { 36 | t.Errorf("'%q' not found", item) 37 | } 38 | 39 | // Now lets create some items that don't exist 40 | item2 := RandomString() 41 | 42 | // Test that item does NOT exist 43 | if bf.Test(item2) == true { 44 | t.Errorf("'%q' should not be found", item2) 45 | } 46 | } 47 | } 48 | 49 | func BenchmarkAdd(b *testing.B) { 50 | bf := NewBloom(MemberSize, DefaultHashFunctions) 51 | for i := 0; i < b.N; i++ { 52 | bf.Add(RandomString()) 53 | } 54 | } 55 | 56 | func BenchmarkTest(b *testing.B) { 57 | bf := NewBloom(MemberSize, DefaultHashFunctions) 58 | for i := 0; i < b.N; i++ { 59 | bf.Add(RandomString()) 60 | } 61 | b.ResetTimer() 62 | for i := 0; i < b.N; i++ { 63 | bf.Test(RandomString()) 64 | } 65 | } 66 | 67 | func use(interface{}) {} 68 | 69 | func BenchmarkBinarySearch(b *testing.B) { 70 | var strings []string 71 | for i := 0; i < SampleSize; i++ { 72 | item := RandomString() 73 | strings = append(strings, item) 74 | } 75 | 76 | // Sort by byte order 77 | sort.Strings(strings) 78 | 79 | b.ResetTimer() 80 | 81 | for i := 0; i < b.N; i++ { 82 | item := RandomString() 83 | use(sort.SearchStrings(strings, item)) 84 | } 85 | } 86 | 87 | func BenchmarkHashFunctions(b *testing.B) { 88 | // Todo: finish BenchmarkHashFunctions 89 | return 90 | } 91 | --------------------------------------------------------------------------------