├── go.sum
├── go.mod
├── v2
    ├── go.mod
    ├── conformance_test.go
    ├── iscompatible.go
    ├── MIT-LICENSE.txt
    ├── optimal_test.go
    ├── statistics.go
    ├── fileio.go
    ├── binarymarshaler.go
    ├── new.go
    ├── binaryunmarshaler.go
    ├── bloomfilter.go
    ├── fileio_test.go
    └── bloomfilter_test.go
├── codecov.yml
├── .deepsource.toml
├── .circleci
    └── config.yml
├── MIT-LICENSE.txt
└── README.md


/go.sum:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/holiman/bloomfilter
2 | 
3 | go 1.15
4 | 


--------------------------------------------------------------------------------
/v2/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/holiman/bloomfilter/v2
2 | 
3 | go 1.15
4 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | codecov:
 3 |   require_ci_to_pass: no
 4 | 
 5 | coverage:
 6 |   status:
 7 |     project: no
 8 |     patch: no
 9 | 
10 | comment:
11 |   layout: "diff"
12 | 


--------------------------------------------------------------------------------
/.deepsource.toml:
--------------------------------------------------------------------------------
 1 | version = 1
 2 | 
 3 | test_patterns = ["*_test.go"]
 4 | 
 5 | [[analyzers]]
 6 | name = "go"
 7 | enabled = true
 8 | 
 9 |   [analyzers.meta]
10 |   import_paths = ["github.com/holiman/bloomfilter"]


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | # Golang CircleCI 2.0 configuration file
 2 | #
 3 | # Check https://circleci.com/docs/2.0/language-go/ for more details
 4 | version: 2
 5 | jobs:
 6 |   build:
 7 |     docker:
 8 |       # specify the version
 9 |       - image: cimg/go:1.19
10 | 
11 |     steps:
12 |       - checkout
13 | 
14 |       # specify any bash command here prefixed with `run: `
15 |       - run: (cd v2 && go test -v ./... -coverprofile=coverage.txt -covermode=count )
16 |       - run:
17 |           name: "Codecov upload"
18 |           command: bash <(curl -s https://codecov.io/bash)
19 |       - run:
20 |           name: "Install tools"
21 |           command: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.54.2
22 |       - run:
23 |           name: "Lint"
24 |           command: (cd v2 && golangci-lint run)
25 | 
26 | 


--------------------------------------------------------------------------------
/v2/conformance_test.go:
--------------------------------------------------------------------------------
 1 | // Package bloomfilter is face-meltingly fast, thread-safe,
 2 | // marshalable, unionable, probability- and
 3 | // optimal-size-calculating Bloom filter in go
 4 | //
 5 | // https://github.com/holiman/bloomfilter
 6 | //
 7 | // Original source:
 8 | // https://github.com/steakknife/bloomfilter
 9 | //
10 | // Copyright © 2014, 2015, 2018 Barry Allard
11 | // Copyright © 2020 Martin Holst Swende
12 | //
13 | // MIT license
14 | //
15 | 
16 | package v2
17 | 
18 | import (
19 | 	"encoding"
20 | 	"encoding/json"
21 | 	"io"
22 | )
23 | 
24 | // compile-time conformance tests
25 | var (
26 | 	_ encoding.BinaryMarshaler   = (*Filter)(nil)
27 | 	_ encoding.BinaryUnmarshaler = (*Filter)(nil)
28 | 	_ io.ReaderFrom              = (*Filter)(nil)
29 | 	_ io.WriterTo                = (*Filter)(nil)
30 | 	_ json.Marshaler             = (*Filter)(nil)
31 | 	_ json.Unmarshaler           = (*Filter)(nil)
32 | )
33 | 


--------------------------------------------------------------------------------
/v2/iscompatible.go:
--------------------------------------------------------------------------------
 1 | // Package bloomfilter is face-meltingly fast, thread-safe,
 2 | // marshalable, unionable, probability- and
 3 | // optimal-size-calculating Bloom filter in go
 4 | //
 5 | // https://github.com/holiman/bloomfilter
 6 | //
 7 | // Original source:
 8 | // https://github.com/steakknife/bloomfilter
 9 | //
10 | // Copyright © 2014, 2015, 2018 Barry Allard
11 | // Copyright © 2020 Martin Holst Swende
12 | //
13 | // MIT license
14 | //
15 | 
16 | package v2
17 | 
18 | // returns 0 if equal, does not compare len(b0) with len(b1)
19 | func noBranchCompareUint64s(b0, b1 []uint64) uint64 {
20 | 	r := uint64(0)
21 | 	for i, b0i := range b0 {
22 | 		r |= b0i ^ b1[i]
23 | 	}
24 | 	return r
25 | }
26 | 
27 | // IsCompatible is true if f and f2 can be Union()ed together
28 | func (f *Filter) IsCompatible(f2 *Filter) bool {
29 | 	// 0 is true, non-0 is false
30 | 	compat := f.M() ^ f2.M()
31 | 	compat |= f.K() ^ f2.K()
32 | 	compat |= noBranchCompareUint64s(f.keys, f2.keys)
33 | 	return compat == 0
34 | }
35 | 


--------------------------------------------------------------------------------
/MIT-LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | Copyright © 2014, 2015 Barry Allard
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 | 
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 | 
8 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9 | 


--------------------------------------------------------------------------------
/v2/MIT-LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | Copyright © 2014, 2015 Barry Allard
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 | 
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 | 
8 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9 | 


--------------------------------------------------------------------------------
/v2/optimal_test.go:
--------------------------------------------------------------------------------
 1 | // Package bloomfilter is face-meltingly fast, thread-safe,
 2 | // marshalable, unionable, probability- and
 3 | // optimal-size-calculating Bloom filter in go
 4 | //
 5 | // https://github.com/holiman/bloomfilter
 6 | //
 7 | // Original source:
 8 | // https://github.com/steakknife/bloomfilter
 9 | //
10 | // Copyright © 2014, 2015, 2018 Barry Allard
11 | // Copyright © 2020 Martin Holst Swende
12 | //
13 | // MIT license
14 | //
15 | 
16 | package v2
17 | 
18 | import (
19 | 	"testing"
20 | )
21 | 
22 | func TestOptimal(t *testing.T) {
23 | 	tests := []struct {
24 | 		n    uint64
25 | 		p    float64
26 | 		k, m uint64
27 | 	}{
28 | 		{
29 | 			n: 1000,
30 | 			p: 0.01 / 100,
31 | 			k: 14,
32 | 			m: 19171,
33 | 		},
34 | 		{
35 | 			n: 10000,
36 | 			p: 0.01 / 100,
37 | 			k: 14,
38 | 			m: 191702,
39 | 		},
40 | 		{
41 | 			n: 10000,
42 | 			p: 0.01 / 100,
43 | 			k: 14,
44 | 			m: 191702,
45 | 		},
46 | 		{
47 | 			n: 1000,
48 | 			p: 0.001 / 100,
49 | 			k: 17,
50 | 			m: 23963,
51 | 		},
52 | 	}
53 | 
54 | 	for _, test := range tests {
55 | 		m := OptimalM(test.n, test.p)
56 | 		k := OptimalK(m, test.n)
57 | 
58 | 		if k != test.k || m != test.m {
59 | 			t.Errorf(
60 | 				"n=%d p=%f: expected (m=%d, k=%d), got (m=%d, k=%d)",
61 | 				test.n,
62 | 				test.p,
63 | 				test.m,
64 | 				test.k,
65 | 				m,
66 | 				k,
67 | 			)
68 | 		}
69 | 	}
70 | }
71 | 


--------------------------------------------------------------------------------
/v2/statistics.go:
--------------------------------------------------------------------------------
 1 | // Package bloomfilter is face-meltingly fast, thread-safe,
 2 | // marshalable, unionable, probability- and
 3 | // optimal-size-calculating Bloom filter in go
 4 | //
 5 | // https://github.com/holiman/bloomfilter
 6 | //
 7 | // Original source:
 8 | // https://github.com/steakknife/bloomfilter
 9 | //
10 | // Copyright © 2014, 2015, 2018 Barry Allard
11 | // Copyright © 2020 Martin Holst Swende
12 | //
13 | // MIT license
14 | //
15 | 
16 | package v2
17 | 
18 | import (
19 | 	"math"
20 | 	"math/bits"
21 | )
22 | 
23 | // CountBitsUint64s count 1's in b
24 | func CountBitsUint64s(b []uint64) int {
25 | 	c := 0
26 | 	for _, x := range b {
27 | 		c += bits.OnesCount64(x)
28 | 	}
29 | 	return c
30 | }
31 | 
32 | // PreciseFilledRatio is an exhaustive count # of 1's
33 | func (f *Filter) PreciseFilledRatio() float64 {
34 | 	f.lock.RLock()
35 | 	defer f.lock.RUnlock()
36 | 	return float64(CountBitsUint64s(f.bits)) / float64(f.M())
37 | }
38 | 
39 | // N is how many elements have been inserted
40 | // (actually, how many Add()s have been performed?)
41 | func (f *Filter) N() uint64 {
42 | 	f.lock.RLock()
43 | 	defer f.lock.RUnlock()
44 | 
45 | 	return f.n
46 | }
47 | 
48 | // FalsePosititveProbability is the upper-bound probability of false positives
49 | //  (1 - exp(-k*(n+0.5)/(m-1))) ** k
50 | func (f *Filter) FalsePosititveProbability() float64 {
51 | 	k := float64(f.K())
52 | 	n := float64(f.N())
53 | 	m := float64(f.M())
54 | 	return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k)
55 | }
56 | 


--------------------------------------------------------------------------------
/v2/fileio.go:
--------------------------------------------------------------------------------
  1 | // Package bloomfilter is face-meltingly fast, thread-safe,
  2 | // marshalable, unionable, probability- and
  3 | // optimal-size-calculating Bloom filter in go
  4 | //
  5 | // https://github.com/holiman/bloomfilter
  6 | //
  7 | // Original source:
  8 | // https://github.com/steakknife/bloomfilter
  9 | //
 10 | // Copyright © 2014, 2015, 2018 Barry Allard
 11 | // Copyright © 2020 Martin Holst Swende
 12 | //
 13 | // MIT license
 14 | //
 15 | 
 16 | package v2
 17 | 
 18 | import (
 19 | 	"compress/gzip"
 20 | 	"encoding/json"
 21 | 	"errors"
 22 | 	"io"
 23 | 	"os"
 24 | )
 25 | 
 26 | // ReadFrom r and overwrite f with new Bloom filter data
 27 | func (f *Filter) ReadFrom(r io.Reader) (n int64, err error) {
 28 | 	f2, n, err := ReadFrom(r)
 29 | 	if err != nil {
 30 | 		return -1, err
 31 | 	}
 32 | 	f.lock.Lock()
 33 | 	defer f.lock.Unlock()
 34 | 	f.m = f2.m
 35 | 	f.n = f2.n
 36 | 	f.bits = f2.bits
 37 | 	f.keys = f2.keys
 38 | 	return n, nil
 39 | }
 40 | 
 41 | // ReadFrom Reader r into a lossless-compressed Bloom filter f
 42 | func ReadFrom(r io.Reader) (f *Filter, n int64, err error) {
 43 | 	f = new(Filter)
 44 | 	rawR, err := gzip.NewReader(r)
 45 | 	if err != nil {
 46 | 		return nil, -1, err
 47 | 	}
 48 | 	defer rawR.Close()
 49 | 	n, err = f.UnmarshalFromReader(rawR)
 50 | 	if err != nil {
 51 | 		return nil, -1, err
 52 | 	}
 53 | 	return f, n, nil
 54 | }
 55 | 
 56 | // ReadFile from filename into a lossless-compressed Bloom Filter f
 57 | // Suggested file extension: .bf.gz
 58 | func ReadFile(filename string) (f *Filter, n int64, err error) {
 59 | 	r, err := os.Open(filename)
 60 | 	if err != nil {
 61 | 		return nil, -1, err
 62 | 	}
 63 | 	defer r.Close()
 64 | 
 65 | 	return ReadFrom(r)
 66 | }
 67 | 
 68 | // WriteTo a Writer w from lossless-compressed Bloom Filter f
 69 | func (f *Filter) WriteTo(w io.Writer) (n int64, err error) {
 70 | 	rawW := gzip.NewWriter(w)
 71 | 	defer rawW.Close()
 72 | 
 73 | 	intN, _, err := f.MarshallToWriter(rawW)
 74 | 	n = int64(intN)
 75 | 	return n, err
 76 | }
 77 | 
 78 | // WriteFile filename from a a lossless-compressed Bloom Filter f
 79 | // Suggested file extension: .bf.gz
 80 | func (f *Filter) WriteFile(filename string) (n int64, err error) {
 81 | 	w, err := os.Create(filename)
 82 | 	if err != nil {
 83 | 		return -1, err
 84 | 	}
 85 | 	defer w.Close()
 86 | 
 87 | 	return f.WriteTo(w)
 88 | }
 89 | 
 90 | type jsonType struct {
 91 | 	Version string   `json:"version"`
 92 | 	Bits    []uint64 `json:"bits"`
 93 | 	Keys    []uint64 `json:"keys"`
 94 | 	M       uint64   `json:"m"`
 95 | 	N       uint64   `json:"n"`
 96 | }
 97 | 
 98 | func (f *Filter) MarshalJSON() ([]byte, error) {
 99 | 	return json.Marshal(&jsonType{
100 | 		string(version),
101 | 		f.bits,
102 | 		f.keys,
103 | 		f.m,
104 | 		f.n,
105 | 	})
106 | }
107 | 
108 | func (f *Filter) UnmarshalJSON(data []byte) error {
109 | 	var j jsonType
110 | 	if err := json.Unmarshal(data, &j); err != nil {
111 | 		return err
112 | 	}
113 | 	if j.Version != string(version) {
114 | 		return errors.New("incompatible version")
115 | 	}
116 | 	f.bits = j.Bits
117 | 	f.keys = j.Keys
118 | 	f.n = j.N
119 | 	f.m = j.M
120 | 	return nil
121 | }
122 | 


--------------------------------------------------------------------------------
/v2/binarymarshaler.go:
--------------------------------------------------------------------------------
  1 | // Package bloomfilter is face-meltingly fast, thread-safe,
  2 | // marshalable, unionable, probability- and
  3 | // optimal-size-calculating Bloom filter in go
  4 | //
  5 | // https://github.com/holiman/bloomfilter
  6 | //
  7 | // Original source:
  8 | // https://github.com/steakknife/bloomfilter
  9 | //
 10 | // Copyright © 2014, 2015, 2018 Barry Allard
 11 | // Copyright © 2020 Martin Holst Swende
 12 | //
 13 | // MIT license
 14 | //
 15 | 
 16 | package v2
 17 | 
 18 | import (
 19 | 	"bytes"
 20 | 	"crypto/sha512"
 21 | 	"encoding/binary"
 22 | 	"io"
 23 | )
 24 | 
 25 | // headerMagic is used to disambiguate between this package and the original
 26 | // steakknife implementation.
 27 | // Since the key hashing algorithm has changed, the format is no longer
 28 | // binary compatible
 29 | var version = []byte("v02\n")
 30 | var headerMagic = append([]byte{0, 0, 0, 0, 0, 0, 0, 0}, version...)
 31 | 
 32 | // counter is a utility to count bytes written
 33 | type counter struct {
 34 | 	bytes int
 35 | }
 36 | 
 37 | func (c *counter) Write(p []byte) (n int, err error) {
 38 | 	count := len(p)
 39 | 	c.bytes += count
 40 | 	return count, nil
 41 | }
 42 | 
 43 | // conforms to encoding.BinaryMarshaler
 44 | 
 45 | // MarshallToWriter marshalls the filter into the given io.Writer
 46 | // Binary layout (Little Endian):
 47 | //
 48 | //	 k	1 uint64
 49 | //	 n	1 uint64
 50 | //	 m	1 uint64
 51 | //	 keys	[k]uint64
 52 | //	 bits	[(m+63)/64]uint64
 53 | //	 hash	sha384 (384 bits == 48 bytes)
 54 | //
 55 | //	 size = (3 + k + (m+63)/64) * 8 bytes
 56 | //
 57 | func (f *Filter) MarshallToWriter(out io.Writer) (int, [sha512.Size384]byte, error) {
 58 | 	var (
 59 | 		c      = &counter{0}
 60 | 		hasher = sha512.New384()
 61 | 		mw     = io.MultiWriter(out, hasher, c)
 62 | 		hash   [sha512.Size384]byte
 63 | 	)
 64 | 	f.lock.RLock()
 65 | 	defer f.lock.RUnlock()
 66 | 
 67 | 	if _, err := mw.Write(headerMagic); err != nil {
 68 | 		return c.bytes, hash, err
 69 | 	}
 70 | 	if err := binary.Write(mw, binary.LittleEndian, []uint64{f.K(), f.n, f.m}); err != nil {
 71 | 		return c.bytes, hash, err
 72 | 	}
 73 | 	if err := binary.Write(mw, binary.LittleEndian, f.keys); err != nil {
 74 | 		return c.bytes, hash, err
 75 | 	}
 76 | 	// Write it in chunks of 5% (but at least 4K). Otherwise, the binary.Write will allocate a
 77 | 	// same-size slice of bytes, doubling the memory usage
 78 | 	var chunkSize = len(f.bits) / 20
 79 | 	if chunkSize < 512 {
 80 | 		chunkSize = 512 // Min 4K bytes (512 uint64s)
 81 | 	}
 82 | 	buf := make([]byte, chunkSize*8)
 83 | 	for start := 0; start < len(f.bits); {
 84 | 		end := start + chunkSize
 85 | 		if end > len(f.bits) {
 86 | 			end = len(f.bits)
 87 | 		}
 88 | 		for i, x := range f.bits[start:end] {
 89 | 			binary.LittleEndian.PutUint64(buf[8*i:], x)
 90 | 		}
 91 | 		if _, err := mw.Write(buf[0 : (end-start)*8]); err != nil {
 92 | 			return c.bytes, hash, err
 93 | 		}
 94 | 		start = end
 95 | 	}
 96 | 	// Now we stop using the multiwriter, pick out the hash of what we've
 97 | 	// written so far, and then write the hash to the output
 98 | 	hashbytes := hasher.Sum(nil)
 99 | 	copy(hash[:], hashbytes[:sha512.Size384])
100 | 	err := binary.Write(out, binary.LittleEndian, hashbytes)
101 | 	return c.bytes + len(hashbytes), hash, err
102 | }
103 | 
104 | // MarshalBinary converts a Filter into []bytes
105 | func (f *Filter) MarshalBinary() (data []byte, err error) {
106 | 	buf := new(bytes.Buffer)
107 | 	_, _, err = f.MarshallToWriter(buf)
108 | 	if err != nil {
109 | 		return nil, err
110 | 	}
111 | 	data = buf.Bytes()
112 | 	return data, nil
113 | }
114 | 


--------------------------------------------------------------------------------
/v2/new.go:
--------------------------------------------------------------------------------
  1 | // Package bloomfilter is face-meltingly fast, thread-safe,
  2 | // marshalable, unionable, probability- and
  3 | // optimal-size-calculating Bloom filter in go
  4 | //
  5 | // https://github.com/holiman/bloomfilter
  6 | //
  7 | // Original source:
  8 | // https://github.com/steakknife/bloomfilter
  9 | //
 10 | // Copyright © 2014, 2015, 2018 Barry Allard
 11 | // Copyright © 2020 Martin Holst Swende
 12 | //
 13 | // MIT license
 14 | //
 15 | 
 16 | package v2
 17 | 
 18 | import (
 19 | 	crand "crypto/rand"
 20 | 	"encoding/binary"
 21 | 	"fmt"
 22 | 	"math"
 23 | )
 24 | 
 25 | const (
 26 | 	MMin        = 2 // MMin is the minimum Bloom filter bits count
 27 | 	KMin        = 1 // KMin is the minimum number of keys
 28 | 	Uint64Bytes = 8 // Uint64Bytes is the number of bytes in type uint64
 29 | )
 30 | 
 31 | // OptimalK calculates the optimal k value for creating a new Bloom filter
 32 | // maxn is the maximum anticipated number of elements
 33 | func OptimalK(m, maxN uint64) uint64 {
 34 | 	return uint64(math.Ceil(float64(m) * math.Ln2 / float64(maxN)))
 35 | }
 36 | 
 37 | // OptimalM calculates the optimal m value for creating a new Bloom filter
 38 | // p is the desired false positive probability
 39 | // optimal m = ceiling( - n * ln(p) / ln(2)**2 )
 40 | func OptimalM(maxN uint64, p float64) uint64 {
 41 | 	return uint64(math.Ceil(-float64(maxN) * math.Log(p) / (math.Ln2 * math.Ln2)))
 42 | }
 43 | 
 44 | // New Filter with CSPRNG keys
 45 | //
 46 | // m is the size of the Bloom filter, in bits, >= 2
 47 | //
 48 | // k is the number of random keys, >= 1
 49 | func New(m, k uint64) (*Filter, error) {
 50 | 	return NewWithKeys(m, newRandKeys(m, k))
 51 | }
 52 | 
 53 | func newRandKeys(m uint64, k uint64) []uint64 {
 54 | 	keys := make([]uint64, k)
 55 | 	if err := binary.Read(crand.Reader, binary.LittleEndian, keys); err != nil {
 56 | 		panic(fmt.Sprintf("Cannot read %d bytes from CSRPNG crypto/rand.Read (err=%v)",
 57 | 			Uint64Bytes, err))
 58 | 	}
 59 | 	return keys
 60 | }
 61 | 
 62 | // NewCompatible Filter compatible with f
 63 | func (f *Filter) NewCompatible() (*Filter, error) {
 64 | 	return NewWithKeys(f.m, f.keys)
 65 | }
 66 | 
 67 | // NewOptimal Bloom filter with random CSPRNG keys
 68 | func NewOptimal(maxN uint64, p float64) (*Filter, error) {
 69 | 	m := OptimalM(maxN, p)
 70 | 	k := OptimalK(m, maxN)
 71 | 	return New(m, k)
 72 | }
 73 | 
 74 | // uniqueKeys is true if all keys are unique
 75 | func uniqueKeys(keys []uint64) bool {
 76 | 	for j := 0; j < len(keys)-1; j++ {
 77 | 		for i := j + 1; i < len(keys); i++ {
 78 | 			if keys[i] == keys[j] {
 79 | 				return false
 80 | 			}
 81 | 		}
 82 | 	}
 83 | 	return true
 84 | }
 85 | 
 86 | // NewWithKeys creates a new Filter from user-supplied origKeys
 87 | func NewWithKeys(m uint64, origKeys []uint64) (f *Filter, err error) {
 88 | 	var (
 89 | 		bits []uint64
 90 | 		keys []uint64
 91 | 	)
 92 | 	if bits, err = newBits(m); err != nil {
 93 | 		return nil, err
 94 | 	}
 95 | 	if keys, err = newKeysCopy(origKeys); err != nil {
 96 | 		return nil, err
 97 | 	}
 98 | 	return &Filter{
 99 | 		m:    m,
100 | 		n:    0,
101 | 		bits: bits,
102 | 		keys: keys,
103 | 	}, nil
104 | }
105 | 
106 | func newBits(m uint64) ([]uint64, error) {
107 | 	if m < MMin {
108 | 		return nil, fmt.Errorf("number of bits in the filter must be >= %d (was %d)", MMin, m)
109 | 	}
110 | 	return make([]uint64, (m+63)/64), nil
111 | }
112 | 
113 | func newKeysCopy(origKeys []uint64) (keys []uint64, err error) {
114 | 	if len(origKeys) < KMin {
115 | 		return nil, fmt.Errorf("keys must have length %d or greater (was %d)", KMin, len(origKeys))
116 | 	}
117 | 	if !uniqueKeys(origKeys) {
118 | 		return nil, fmt.Errorf("Bloom filter keys must be unique")
119 | 	}
120 | 	keys = append(keys, origKeys...)
121 | 	return keys, err
122 | }
123 | 


--------------------------------------------------------------------------------
/v2/binaryunmarshaler.go:
--------------------------------------------------------------------------------
  1 | // Package bloomfilter is face-meltingly fast, thread-safe,
  2 | // marshalable, unionable, probability- and
  3 | // optimal-size-calculating Bloom filter in go
  4 | //
  5 | // https://github.com/holiman/bloomfilter
  6 | //
  7 | // Original source:
  8 | // https://github.com/steakknife/bloomfilter
  9 | //
 10 | // Copyright © 2014, 2015, 2018 Barry Allard
 11 | // Copyright © 2020 Martin Holst Swende
 12 | //
 13 | // MIT license
 14 | //
 15 | 
 16 | package v2
 17 | 
 18 | import (
 19 | 	"bytes"
 20 | 	"crypto/sha512"
 21 | 	"encoding/binary"
 22 | 	"fmt"
 23 | 	"hash"
 24 | 	"io"
 25 | )
 26 | 
 27 | func unmarshalBinaryHeader(r io.Reader) (k, n, m uint64, err error) {
 28 | 	magic := make([]byte, len(headerMagic))
 29 | 	if _, err := io.ReadFull(r, magic); err != nil {
 30 | 		return 0, 0, 0, err
 31 | 	}
 32 | 	if !bytes.Equal(magic, headerMagic) {
 33 | 		return 0, 0, 0, fmt.Errorf("incompatible version (wrong magic), got %x", magic)
 34 | 	}
 35 | 	var knm = make([]uint64, 3)
 36 | 	err = binary.Read(r, binary.LittleEndian, knm)
 37 | 	if err != nil {
 38 | 		return 0, 0, 0, err
 39 | 	}
 40 | 	k = knm[0]
 41 | 	n = knm[1]
 42 | 	m = knm[2]
 43 | 	if k < KMin {
 44 | 		return 0, 0, 0, fmt.Errorf("keys must have length %d or greater (was %d)", KMin, k)
 45 | 	}
 46 | 	if m < MMin {
 47 | 		return 0, 0, 0, fmt.Errorf("number of bits in the filter must be >= %d (was %d)", MMin, m)
 48 | 	}
 49 | 	return k, n, m, err
 50 | }
 51 | 
 52 | func unmarshalBinaryBits(r io.Reader, m uint64) (bits []uint64, err error) {
 53 | 	bits, err = newBits(m)
 54 | 	if err != nil {
 55 | 		return bits, err
 56 | 	}
 57 | 	bs := make([]byte, 8)
 58 | 	for i := 0; i < len(bits) && err == nil; i++ {
 59 | 		_, err = io.ReadFull(r, bs)
 60 | 		bits[i] = binary.LittleEndian.Uint64(bs)
 61 | 	}
 62 | 	if err != nil {
 63 | 		return nil, err
 64 | 	}
 65 | 	return bits, nil
 66 | }
 67 | 
 68 | func unmarshalBinaryKeys(r io.Reader, k uint64) (keys []uint64, err error) {
 69 | 	keys = make([]uint64, k)
 70 | 	err = binary.Read(r, binary.LittleEndian, keys)
 71 | 	return keys, err
 72 | }
 73 | 
 74 | // hashingReader can be used to read from a reader, and simultaneously
 75 | // do a hash on the bytes that were read
 76 | type hashingReader struct {
 77 | 	reader io.Reader
 78 | 	hasher hash.Hash
 79 | 	tot    int64
 80 | }
 81 | 
 82 | func (h *hashingReader) Read(p []byte) (n int, err error) {
 83 | 	n, err = h.reader.Read(p)
 84 | 	h.tot += int64(n)
 85 | 	if err != nil {
 86 | 		return n, err
 87 | 	}
 88 | 	_, _ = h.hasher.Write(p[:n])
 89 | 	return n, err
 90 | }
 91 | 
 92 | // UnmarshalBinary converts []bytes into a Filter
 93 | // conforms to encoding.BinaryUnmarshaler
 94 | func (f *Filter) UnmarshalBinary(data []byte) (err error) {
 95 | 	buf := bytes.NewBuffer(data)
 96 | 	_, err = f.UnmarshalFromReader(buf)
 97 | 	return err
 98 | }
 99 | 
100 | func (f *Filter) UnmarshalFromReader(input io.Reader) (n int64, err error) {
101 | 	f.lock.Lock()
102 | 	defer f.lock.Unlock()
103 | 
104 | 	buf := &hashingReader{
105 | 		reader: input,
106 | 		hasher: sha512.New384(),
107 | 	}
108 | 	var k uint64
109 | 	k, f.n, f.m, err = unmarshalBinaryHeader(buf)
110 | 	if err != nil {
111 | 		return buf.tot, err
112 | 	}
113 | 
114 | 	f.keys, err = unmarshalBinaryKeys(buf, k)
115 | 	if err != nil {
116 | 		return buf.tot, err
117 | 	}
118 | 	f.bits, err = unmarshalBinaryBits(buf, f.m)
119 | 	if err != nil {
120 | 		return buf.tot, err
121 | 	}
122 | 
123 | 	// Only the hash remains to be read now
124 | 	// so abort the hasher at this point
125 | 	gotHash := buf.hasher.Sum(nil)
126 | 	expHash := make([]byte, sha512.Size384)
127 | 	err = binary.Read(buf, binary.LittleEndian, expHash)
128 | 	if err != nil {
129 | 		return buf.tot, err
130 | 	}
131 | 	if !bytes.Equal(gotHash, expHash) {
132 | 		return buf.tot, errHashMismatch
133 | 	}
134 | 	return buf.tot, nil
135 | }
136 | 


--------------------------------------------------------------------------------
/v2/bloomfilter.go:
--------------------------------------------------------------------------------
  1 | // Package bloomfilter is face-meltingly fast, thread-safe,
  2 | // marshalable, unionable, probability- and
  3 | // optimal-size-calculating Bloom filter in go
  4 | //
  5 | // https://github.com/holiman/bloomfilter
  6 | //
  7 | // Original source:
  8 | // https://github.com/steakknife/bloomfilter
  9 | //
 10 | // Copyright © 2014, 2015, 2018 Barry Allard
 11 | // Copyright © 2020 Martin Holst Swende
 12 | //
 13 | // MIT license
 14 | //
 15 | 
 16 | package v2
 17 | 
 18 | import (
 19 | 	"errors"
 20 | 	"hash"
 21 | 	"sync"
 22 | )
 23 | 
 24 | var (
 25 | 	errHashMismatch = errors.New("hash mismatch, bloom filter corruption or wrong version")
 26 | )
 27 | 
 28 | // Filter is an opaque Bloom filter type
 29 | type Filter struct {
 30 | 	keys []uint64
 31 | 	m    uint64 // number of bits the "bits" field should recognize
 32 | 
 33 | 	lock sync.RWMutex // lock guards accesses to the fields below
 34 | 	bits []uint64
 35 | 	n    uint64 // number of inserted elements
 36 | }
 37 | 
 38 | // M is the size of Bloom filter, in bits
 39 | func (f *Filter) M() uint64 {
 40 | 	return f.m
 41 | }
 42 | 
 43 | // K is the count of keys
 44 | func (f *Filter) K() uint64 {
 45 | 	return uint64(len(f.keys))
 46 | }
 47 | 
 48 | // Add a hashable item, v, to the filter
 49 | func (f *Filter) Add(v hash.Hash64) {
 50 | 	f.AddHash(v.Sum64())
 51 | }
 52 | 
 53 | // rotation sets how much to rotate the hash on each filter iteration. This
 54 | // is somewhat randomly set to a prime on the lower segment of 64. At 17, the cycle
 55 | // does not repeat for quite a while, but even for low number of filters the
 56 | // changes are quite rapid
 57 | const rotation = 17
 58 | 
 59 | // Adds an already hashes item to the filter.
 60 | // Identical to Add (but slightly faster)
 61 | func (f *Filter) AddHash(hash uint64) {
 62 | 	f.lock.Lock()
 63 | 	defer f.lock.Unlock()
 64 | 	var (
 65 | 		i uint64
 66 | 	)
 67 | 	for n := 0; n < len(f.keys); n++ {
 68 | 		hash = ((hash << rotation) | (hash >> (64 - rotation))) ^ f.keys[n]
 69 | 		i = hash % f.m
 70 | 		f.bits[i>>6] |= 1 << uint(i&0x3f)
 71 | 	}
 72 | 	f.n++
 73 | }
 74 | 
 75 | // ContainsHash tests if f contains the (already hashed) key
 76 | // Identical to Contains but slightly faster
 77 | func (f *Filter) ContainsHash(hash uint64) bool {
 78 | 	f.lock.RLock()
 79 | 	defer f.lock.RUnlock()
 80 | 	var (
 81 | 		i uint64
 82 | 		r = uint64(1)
 83 | 	)
 84 | 	for n := 0; n < len(f.keys) && r != 0; n++ {
 85 | 		hash = ((hash << rotation) | (hash >> (64 - rotation))) ^ f.keys[n]
 86 | 		i = hash % f.m
 87 | 		r &= (f.bits[i>>6] >> uint(i&0x3f)) & 1
 88 | 	}
 89 | 	return r != 0
 90 | }
 91 | 
 92 | // Contains tests if f contains v
 93 | // false: f definitely does not contain value v
 94 | // true:  f maybe contains value v
 95 | func (f *Filter) Contains(v hash.Hash64) bool {
 96 | 	return f.ContainsHash(v.Sum64())
 97 | }
 98 | 
 99 | // Copy f to a new Bloom filter
100 | func (f *Filter) Copy() (*Filter, error) {
101 | 	f.lock.RLock()
102 | 	defer f.lock.RUnlock()
103 | 
104 | 	out, err := f.NewCompatible()
105 | 	if err != nil {
106 | 		return nil, err
107 | 	}
108 | 	copy(out.bits, f.bits)
109 | 	out.n = f.n
110 | 	return out, nil
111 | }
112 | 
113 | // UnionInPlace merges Bloom filter f2 into f
114 | func (f *Filter) UnionInPlace(f2 *Filter) error {
115 | 	if f == f2 {
116 | 		return nil
117 | 	}
118 | 	if !f.IsCompatible(f2) {
119 | 		return errors.New("incompatible bloom filters")
120 | 	}
121 | 	f.lock.Lock()
122 | 	defer f.lock.Unlock()
123 | 	f2.lock.RLock()
124 | 	defer f2.lock.RUnlock()
125 | 	for i, bitword := range f2.bits {
126 | 		f.bits[i] |= bitword
127 | 	}
128 | 	// Also update the counters
129 | 	f.n += f2.n
130 | 	return nil
131 | }
132 | 
133 | // Union merges f2 and f2 into a new Filter out
134 | func (f *Filter) Union(f2 *Filter) (out *Filter, err error) {
135 | 	if f == f2 {
136 | 		return f.Copy()
137 | 	}
138 | 	if !f.IsCompatible(f2) {
139 | 		return nil, errors.New("incompatible bloom filters")
140 | 	}
141 | 	f.lock.RLock()
142 | 	defer f.lock.RUnlock()
143 | 
144 | 	out, err = f.NewCompatible()
145 | 	if err != nil {
146 | 		return nil, err
147 | 	}
148 | 	f2.lock.RLock()
149 | 	defer f2.lock.RUnlock()
150 | 
151 | 	for i, bitword := range f2.bits {
152 | 		out.bits[i] = f.bits[i] | bitword
153 | 	}
154 | 	// Also update the counters
155 | 	out.n = f.n + f2.n
156 | 	return out, nil
157 | }
158 | 
159 | // Clear clears the bloom filter.
160 | func (f *Filter) Clear() {
161 | 	f.lock.Lock()
162 | 	defer f.lock.Unlock()
163 | 
164 | 	for i := range f.bits {
165 | 		f.bits[i] = 0
166 | 	}
167 | 	f.n = 0 // Also update the counters
168 | }
169 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | [![GoDoc](https://godoc.org/github.com/holiman/bloomfilter?status.png)](https://godoc.org/github.com/holiman/bloomfilter)
  3 | [![CircleCI](https://circleci.com/gh/holiman/bloomfilter.svg?style=svg)](https://app.circleci.com/pipelines/github/holiman/bloomfilter)
  4 | [![codecov](https://codecov.io/gh/holiman/bloomfilter/branch/master/graph/badge.svg?token=O48l6LbHkL)](https://codecov.io/gh/holiman/bloomfilter)
  5 | [![DeepSource](https://deepsource.io/gh/holiman/bloomfilter.svg/?label=active+issues&show_trend=true)](https://deepsource.io/gh/holiman/bloomfilter/?ref=repository-badge)
  6 | 
  7 | # History
  8 | 
  9 | This bloom filter implementation is a fork from [steakknife/bloomfilter](https://github.com/steakknife/bloomfilter) by Barry Allard. 
 10 | The upstream project is now archived, so this fork exists to fix some bugs and also
 11 | make a few improvements. Below is the original description. 
 12 | 
 13 | The original implemenation is Copyright © 2014-2016,2018 Barry Allard
 14 | [MIT license](MIT-LICENSE.txt)
 15 | 
 16 | All recent changes are copyright © 2019-2020 Martin Holst Swende. 
 17 | 
 18 | ## Installation 
 19 | 
 20 | ```
 21 | $ go get github.com/holiman/bloomfilter
 22 | ```
 23 | 
 24 | ## Face-meltingly fast, thread-safe, marshalable, unionable, probability- and optimal-size-calculating Bloom filter in go
 25 | 
 26 | ### WTF is a bloom filter
 27 | 
 28 | **TL;DR:** Probabilistic, extra lookup table to track a set of elements kept elsewhere to reduce expensive, unnecessary set element retrieval and/or iterator operations **when an element is not present in the set.** It's a classic time-storage tradeoff algoritm.
 29 | 
 30 | ### Properties
 31 | 
 32 | #### [See wikipedia](https://en.wikipedia.org/wiki/Bloom_filter) for algorithm details
 33 | 
 34 | |Impact|What|Description|
 35 | |---|---|---|
 36 | |Good|No false negatives|know for certain if a given element is definitely NOT in the set|
 37 | |Bad|False positives|uncertain if a given element is in the set|
 38 | |Bad|Theoretical potential for hash collisions|in very large systems and/or badly hash.Hash64-conforming implementations|
 39 | |Bad|Add only|Cannot remove an element, it would destroy information about other elements|
 40 | |Good|Constant storage|uses only a fixed amount of memory|
 41 | 
 42 | ## Naming conventions
 43 | 
 44 | (Similar to algorithm)
 45 | 
 46 | |Variable/function|Description|Range|
 47 | |---|---|---|
 48 | |m/M()|number of bits in the bloom filter (memory representation is about m/8 bytes in size)|>=2|
 49 | |n/N()|number of elements present|>=0|
 50 | |k/K()|number of keys to use (keys are kept private to user code but are de/serialized to Marshal and file I/O)|>=0|
 51 | |maxN|maximum capacity of intended structure|>0|
 52 | |p|maximum allowed probability of collision (for computing m and k for optimal sizing)|>0..<1|
 53 | 
 54 | - Memory representation should be exactly `24 + 8*(k + (m+63)/64) + unsafe.Sizeof(RWMutex)` bytes.
 55 | - Serialized (`BinaryMarshaler`) representation should be exactly `72 + 8*(k + (m+63)/64)` bytes. (Disk format is less due to compression.)
 56 | 
 57 | ## Binary serialization format
 58 | 
 59 | All values in Little-endian format
 60 | 
 61 | |Offset|Offset (Hex)|Length (bytes)|Name|Type|
 62 | |---|---|---|---|---|
 63 | |0|00|12|magic + version number|`\0\0\0\0\0\0\0\0v02\n`|
 64 | |12|0c|8|k|`uint64`|
 65 | |20|14|8|n|`uint64`|
 66 | |28|1c|8|m|`uint64`|
 67 | |36|24|k|(keys)|`[k]uint64`|
 68 | |36+8*k|...|(m+63)/64|(bloom filter)|`[(m+63)/64]uint64`|
 69 | |36+8\*k+8\*((m+63)/64)|...|48|(SHA384 of all previous fields, hashed in order)|`[48]byte`|
 70 | 
 71 | - `bloomfilter.Filter` conforms to `encoding.BinaryMarshaler` and `encoding.BinaryUnmarshaler'
 72 | 
 73 | ## Usage
 74 | 
 75 | ```go
 76 | 
 77 | import "github.com/holiman/bloomfilter"
 78 | 
 79 | const (
 80 |   maxElements = 100000
 81 |   probCollide = 0.0000001
 82 | )
 83 | 
 84 | bf, err := bloomfilter.NewOptimal(maxElements, probCollide)
 85 | if err != nil {
 86 |   panic(err)
 87 | }
 88 | 
 89 | someValue := ... // must conform to hash.Hash64
 90 | 
 91 | bf.Add(someValue)
 92 | if bf.Contains(someValue) { // probably true, could be false
 93 |   // whatever
 94 | }
 95 | 
 96 | anotherValue := ... // must also conform to hash.Hash64
 97 | 
 98 | if bf.Contains(anotherValue) {
 99 |   panic("This should never happen")
100 | }
101 | 
102 | err := bf.WriteFile("1.bf.gz")  // saves this BF to a file
103 | if err != nil {
104 |   panic(err)
105 | }
106 | 
107 | bf2, err := bloomfilter.ReadFile("1.bf.gz") // read the BF to another var
108 | if err != nil {
109 |   panic(err)
110 | }
111 | ```
112 | 
113 | 
114 | ## Design
115 | 
116 | Where possible, branch-free operations are used to avoid deep pipeline / execution unit stalls on branch-misses.
117 | 
118 | ## Contact
119 | 
120 | - [Issues](https://github.com/holiman/bloomfilter/issues)
121 | 
122 | ## License
123 | 
124 | [MIT license](MIT-LICENSE.txt)
125 | 
126 | Copyright © 2014-2016 Barry Allard
127 | Copyright © 2019-2020 Martin Holst Swende
128 | 
129 | 


--------------------------------------------------------------------------------
/v2/fileio_test.go:
--------------------------------------------------------------------------------
  1 | // Package bloomfilter is face-meltingly fast, thread-safe,
  2 | // marshalable, unionable, probability- and
  3 | // optimal-size-calculating Bloom filter in go
  4 | //
  5 | // https://github.com/holiman/bloomfilter
  6 | //
  7 | // Original source:
  8 | // https://github.com/steakknife/bloomfilter
  9 | //
 10 | // Copyright © 2014, 2015, 2018 Barry Allard
 11 | // Copyright © 2020 Martin Holst Swende
 12 | //
 13 | // MIT license
 14 | //
 15 | 
 16 | package v2
 17 | 
 18 | import (
 19 | 	"bytes"
 20 | 	"crypto/sha512"
 21 | 	"encoding/gob"
 22 | 	"encoding/json"
 23 | 	"fmt"
 24 | 	"math/rand"
 25 | 	"os"
 26 | 	"path/filepath"
 27 | 	"runtime"
 28 | 	"testing"
 29 | )
 30 | 
 31 | type devnull struct{}
 32 | 
 33 | func (d devnull) Write(p []byte) (n int, err error) {
 34 | 	return len(p), nil
 35 | }
 36 | 
 37 | func TestWriteRead(t *testing.T) {
 38 | 	// minimal filter
 39 | 	f, _ := New(8*1024*100, 5)
 40 | 	// Add some content
 41 | 	var tests = make([]hashableUint64, 20)
 42 | 	for i := 0; i < 20; i++ {
 43 | 		tests[i] = hashableUint64(rand.Uint64())
 44 | 		f.Add(tests[i])
 45 | 	}
 46 | 	verify := func(t *testing.T, f *Filter) {
 47 | 		for i, v := range tests {
 48 | 			if !f.Contains(v) {
 49 | 				t.Errorf("missing item %d", i)
 50 | 			}
 51 | 		}
 52 | 	}
 53 | 
 54 | 	t.Run("binary", func(t *testing.T) {
 55 | 		var b bytes.Buffer
 56 | 		_, err := f.WriteTo(&b)
 57 | 		if err != nil {
 58 | 			t.Fatal(err)
 59 | 		}
 60 | 		cpy := append([]byte{}, b.Bytes()...)
 61 | 		var f2 *Filter
 62 | 		if f2, _, err = ReadFrom(&b); err != nil {
 63 | 			t.Fatal(err)
 64 | 		}
 65 | 		verify(t, f2)
 66 | 		// test overwrite
 67 | 		f3, _ := New(8*5, 3)
 68 | 		if _, err = f3.ReadFrom(bytes.NewReader(cpy)); err != nil {
 69 | 			t.Fatal(err)
 70 | 		}
 71 | 		verify(t, f3)
 72 | 	})
 73 | 	t.Run("gob", func(t *testing.T) {
 74 | 		var buffer bytes.Buffer
 75 | 		err := gob.NewEncoder(&buffer).Encode(f)
 76 | 		if err != nil {
 77 | 			t.Fatal(err)
 78 | 		}
 79 | 		var f2 Filter
 80 | 		err = gob.NewDecoder(&buffer).Decode(&f2)
 81 | 		if err != nil {
 82 | 			t.Fatal(err)
 83 | 		}
 84 | 		verify(t, &f2)
 85 | 	})
 86 | 
 87 | 	t.Run("json", func(t *testing.T) {
 88 | 		data, err := json.Marshal(f)
 89 | 		if err != nil {
 90 | 			t.Fatal(err)
 91 | 		}
 92 | 		var f2 Filter
 93 | 		if err = json.Unmarshal(data, &f2); err != nil {
 94 | 			t.Fatal(err)
 95 | 		}
 96 | 		verify(t, &f2)
 97 | 	})
 98 | 	t.Run("file", func(t *testing.T) {
 99 | 		fName := filepath.Join(os.TempDir(), "temp.deleteme.gz")
100 | 		if _, err := f.WriteFile(fName); err != nil {
101 | 			t.Fatal(err)
102 | 		}
103 | 		defer os.Remove(fName)
104 | 		if f2, _, err := ReadFile(fName); err != nil {
105 | 			t.Fatal(err)
106 | 		} else {
107 | 			verify(t, f2)
108 | 		}
109 | 	})
110 | }
111 | 
112 | func TestCorruption(t *testing.T) {
113 | 	// minimal filter
114 | 	f, _ := New(8*32, 5)
115 | 	// Add some content
116 | 	var tests = make([]hashableUint64, 20)
117 | 	for i := 0; i < 20; i++ {
118 | 		tests[i] = hashableUint64(rand.Uint64())
119 | 		f.Add(tests[i])
120 | 	}
121 | 	t.Run("binary", func(t *testing.T) {
122 | 		var b bytes.Buffer
123 | 		_, err := f.WriteTo(&b)
124 | 		if err != nil {
125 | 			t.Fatal(err)
126 | 		}
127 | 		buf := b.Bytes()
128 | 		buf[len(buf)/2] ^= 1
129 | 		if _, _, err := ReadFrom(&b); err == nil {
130 | 			t.Errorf("expected error")
131 | 		}
132 | 	})
133 | 
134 | 	t.Run("gob", func(t *testing.T) {
135 | 		var buffer bytes.Buffer
136 | 		err := gob.NewEncoder(&buffer).Encode(f)
137 | 		if err != nil {
138 | 			t.Fatal(err)
139 | 		}
140 | 		data := buffer.Bytes()
141 | 		// Flip a bit
142 | 		data[len(data)/2] ^= 1
143 | 		var f2 Filter
144 | 		err = gob.NewDecoder(&buffer).Decode(&f2)
145 | 		if err == nil {
146 | 			t.Errorf("expected error")
147 | 		}
148 | 	})
149 | 
150 | }
151 | 
152 | func bToMb(b uint64) uint64 {
153 | 	return b / 1024 / 1024
154 | }
155 | func PrintMemUsage() {
156 | 	var m runtime.MemStats
157 | 	runtime.ReadMemStats(&m)
158 | 	// For info on each, see: https://golang.org/pkg/runtime/#MemStats
159 | 	fmt.Printf("Alloc = %v MiB", bToMb(m.Alloc))
160 | 	fmt.Printf("\tTotalAlloc = %v MiB", bToMb(m.TotalAlloc))
161 | 	fmt.Printf("\tSys = %v MiB", bToMb(m.Sys))
162 | 	fmt.Printf("\tNumGC = %v\n", m.NumGC)
163 | }
164 | 
165 | func TestWrite(t *testing.T) {
166 | 	// 1Mb
167 | 	f, _ := New(4*8*1024*1024, 1)
168 | 	fmt.Printf("Allocated 1mb filter\n")
169 | 	PrintMemUsage()
170 | 	_, _ = f.WriteTo(devnull{})
171 | 	fmt.Printf("Wrote filter to devnull\n")
172 | 	PrintMemUsage()
173 | }
174 | 
175 | // fillRandom fills the filter with N random values, where N is roughly half
176 | // the size of the number of uint64's in the filter
177 | func fillRandom(f *Filter) {
178 | 	num := len(f.bits) * 4
179 | 	for i := 0; i < num; i++ {
180 | 		f.AddHash(uint64(rand.Int63()))
181 | 	}
182 | }
183 | 
184 | // TestMarshaller tests that it writes outputs correctly.
185 | func TestMarshaller(t *testing.T) {
186 | 
187 | 	h1 := sha512.New384()
188 | 	h2 := sha512.New384()
189 | 
190 | 	f, _ := New(1*8*1024*1024, 1)
191 | 	fillRandom(f)
192 | 	// Marshall using writer
193 | 	_, _, _ = f.MarshallToWriter(h1)
194 | 	// Marshall as a blob
195 | 	data, _ := f.MarshalBinary()
196 | 	_, _ = h2.Write(data)
197 | 
198 | 	if have, want := h1.Sum(nil), h2.Sum(nil); !bytes.Equal(have, want) {
199 | 		t.Errorf("Marshalling error, have %x want %x", have, want)
200 | 	}
201 | }
202 | 
203 | func BenchmarkWrite1Mb(b *testing.B) {
204 | 
205 | 	// 1Mb
206 | 	f, _ := New(1*8*1024*1024, 1)
207 | 	f.Add(hashableUint64(0))
208 | 	f.Add(hashableUint64(1))
209 | 	f.Add(hashableUint64(1 << 3))
210 | 	f.Add(hashableUint64(1 << 40))
211 | 	f.Add(hashableUint64(1 << 23))
212 | 	f.Add(hashableUint64(1 << 16))
213 | 	f.Add(hashableUint64(1 << 28))
214 | 
215 | 	b.ReportAllocs()
216 | 	for i := 0; i < b.N; i++ {
217 | 		_, _ = f.WriteTo(devnull{})
218 | 	}
219 | }
220 | 


--------------------------------------------------------------------------------
/v2/bloomfilter_test.go:
--------------------------------------------------------------------------------
  1 | // Package bloomfilter is face-meltingly fast, thread-safe,
  2 | // marshalable, unionable, probability- and
  3 | // optimal-size-calculating Bloom filter in go
  4 | //
  5 | // https://github.com/holiman/bloomfilter
  6 | //
  7 | // Original source:
  8 | // https://github.com/steakknife/bloomfilter
  9 | //
 10 | // Copyright © 2014, 2015, 2018 Barry Allard
 11 | // Copyright © 2020 Martin Holst Swende
 12 | //
 13 | // MIT license
 14 | //
 15 | 
 16 | package v2
 17 | 
 18 | import (
 19 | 	"fmt"
 20 | 	"math/rand"
 21 | 	"testing"
 22 | )
 23 | 
 24 | // a read-only type that conforms to hash.Hash64, but only Sum64() works.
 25 | // It is set by writing the underlying value.
 26 | type hashableUint64 uint64
 27 | 
 28 | func (h hashableUint64) Write([]byte) (int, error) {
 29 | 	panic("Unimplemented")
 30 | }
 31 | 
 32 | func (h hashableUint64) Sum([]byte) []byte {
 33 | 	panic("Unimplemented")
 34 | }
 35 | 
 36 | func (h hashableUint64) Reset() {
 37 | 	panic("Unimplemented")
 38 | }
 39 | 
 40 | func (h hashableUint64) BlockSize() int {
 41 | 	panic("Unimplemented")
 42 | }
 43 | 
 44 | func (h hashableUint64) Size() int {
 45 | 	panic("Unimplemented")
 46 | }
 47 | 
 48 | func (h hashableUint64) Sum64() uint64 {
 49 | 	return uint64(h)
 50 | }
 51 | 
 52 | func hashableUint64Values() []hashableUint64 {
 53 | 	return []hashableUint64{
 54 | 		0,
 55 | 		7,
 56 | 		0x0c0ffee0,
 57 | 		0xdeadbeef,
 58 | 		0xffffffff,
 59 | 	}
 60 | }
 61 | 
 62 | func hashableUint64NotValues() []hashableUint64 {
 63 | 	return []hashableUint64{
 64 | 		1,
 65 | 		5,
 66 | 		42,
 67 | 		0xa5a5a5a5,
 68 | 		0xfffffffe,
 69 | 	}
 70 | }
 71 | 
 72 | func Test0(t *testing.T) {
 73 | 	bf, _ := New(10000, 5)
 74 | 
 75 | 	t.Log("Filled ratio before adds :", bf.PreciseFilledRatio())
 76 | 	for _, x := range hashableUint64Values() {
 77 | 		bf.Add(x)
 78 | 	}
 79 | 	t.Log("Filled ratio after adds :", bf.PreciseFilledRatio())
 80 | 
 81 | 	// these may or may not be true
 82 | 	for _, y := range hashableUint64Values() {
 83 | 		if bf.Contains(y) {
 84 | 			t.Log("value in set querties: may contain ", y)
 85 | 		} else {
 86 | 			t.Fatal("value in set queries: definitely does not contain ", y,
 87 | 				", but it should")
 88 | 		}
 89 | 	}
 90 | 
 91 | 	// these must all be false
 92 | 	for _, z := range hashableUint64NotValues() {
 93 | 		if bf.Contains(z) {
 94 | 			t.Log("value not in set queries: may or may not contain ", z)
 95 | 		} else {
 96 | 			t.Log("value not in set queries: definitely does not contain ", z,
 97 | 				" which is correct")
 98 | 		}
 99 | 	}
100 | }
101 | 
102 | func TestUnion(t *testing.T) {
103 | 	f1, _ := New(8*500, 4)
104 | 	tmp, _ := New(8*500, 4)
105 | 	if _, err := tmp.Union(f1); err == nil {
106 | 		t.Errorf("Incompatible, should error")
107 | 	}
108 | 	f2, err := f1.NewCompatible()
109 | 	if err != nil {
110 | 		t.Fatal(err)
111 | 	}
112 | 	rand.Seed(1337)
113 | 	// Add some content
114 | 	var tests = make([]hashableUint64, 200)
115 | 	for i := 0; i < len(tests); i++ {
116 | 		tests[i] = hashableUint64(rand.Uint64())
117 | 		if i&1 == 0 {
118 | 			f1.Add(tests[i])
119 | 		} else {
120 | 			f2.Add(tests[i])
121 | 		}
122 | 	}
123 | 	unionF, err := f2.Union(f1)
124 | 	if err != nil {
125 | 		t.Fatal(err)
126 | 	}
127 | 	copyF, err := unionF.Copy()
128 | 	if err != nil {
129 | 		t.Fatal(err)
130 | 	}
131 | 
132 | 	for i, v := range tests {
133 | 		if !unionF.Contains(v) {
134 | 			t.Errorf("missing item %d", i)
135 | 		}
136 | 		if !copyF.Contains(v) {
137 | 			t.Errorf("missing item %d", i)
138 | 		}
139 | 		if i&1 == 0 {
140 | 			if !f1.Contains(v) {
141 | 				t.Errorf("missing item %d", i)
142 | 			}
143 | 			if f2.Contains(v) {
144 | 				t.Errorf("f2 has item it shouldn't have")
145 | 			}
146 | 		} else {
147 | 			if !f2.Contains(v) {
148 | 				t.Errorf("missing item %d", i)
149 | 			}
150 | 			if f1.Contains(v) {
151 | 				t.Errorf("f1 has item it shouldn't have")
152 | 			}
153 | 		}
154 | 	}
155 | 	// And test merging f1 into f2
156 | 	if err := f2.UnionInPlace(f1); err != nil {
157 | 		t.Fatal(err)
158 | 	}
159 | 
160 | 	for i, v := range tests {
161 | 		if !f2.Contains(v) {
162 | 			t.Errorf("missing item %d", i)
163 | 		}
164 | 		if i&1 == 0 {
165 | 			if !f1.Contains(v) {
166 | 				t.Errorf("missing item %d", i)
167 | 			}
168 | 		} else {
169 | 			if f1.Contains(v) {
170 | 				t.Errorf("f1 has item it shouldn't have")
171 | 			}
172 | 		}
173 | 	}
174 | }
175 | 
176 | func TestFPRate(t *testing.T) {
177 | 	f, _ := New(8*32, 4)
178 | 	f.n = 101 // "insert" 101 items
179 | 	// yes we could add some more tests here...
180 | 	have, want := f.FalsePosititveProbability(), 0.402507
181 | 	if int(1000*have) != int(1000*want) {
182 | 		t.Errorf("have %08f, want %f", have, want)
183 | 	}
184 | }
185 | 
186 | func BenchmarkAddX10kX5(b *testing.B) {
187 | 	bf, _ := New(10000, 5)
188 | 	b.Run("add-10kx5", func(b *testing.B) {
189 | 		b.ReportAllocs()
190 | 		for i := 0; i < b.N; i++ {
191 | 			bf.Add(hashableUint64(rand.Uint32()))
192 | 		}
193 | 	})
194 | 	b.Run("add-10kx5-hash", func(b *testing.B) {
195 | 		b.ReportAllocs()
196 | 		for i := 0; i < b.N; i++ {
197 | 			bf.AddHash(uint64(rand.Uint32()))
198 | 		}
199 | 	})
200 | }
201 | 
202 | func TestAddX10kX5(t *testing.T) {
203 | 	b1, _ := New(10000, 5)
204 | 	b2, _ := b1.NewCompatible()
205 | 
206 | 	verify := func() {
207 | 		for i := 0; i < len(b1.bits); i++ {
208 | 			if b1.bits[i] != b2.bits[i] {
209 | 				t.Fatalf("error at bit %d!", i)
210 | 			}
211 | 		}
212 | 	}
213 | 	for i := 0; i < 1000000; i++ {
214 | 		v := hashableUint64(rand.Uint32())
215 | 		b1.Add(v)
216 | 		b2.AddHash(v.Sum64())
217 | 		verify()
218 | 		if !b2.Contains(v) {
219 | 			t.Fatal("contain error")
220 | 		}
221 | 	}
222 | }
223 | func BenchmarkContains1kX10kX5(b *testing.B) {
224 | 	bf, _ := New(10000, 5)
225 | 	for i := 0; i < 1000; i++ {
226 | 		bf.Add(hashableUint64(rand.Uint32()))
227 | 	}
228 | 	b.Run("contains", func(b *testing.B) {
229 | 		for i := 0; i < b.N; i++ {
230 | 			bf.Contains(hashableUint64(rand.Uint32()))
231 | 		}
232 | 	})
233 | 	b.Run("containsHash", func(b *testing.B) {
234 | 		for i := 0; i < b.N; i++ {
235 | 			bf.ContainsHash(uint64(rand.Uint32()))
236 | 		}
237 | 	})
238 | }
239 | 
240 | func BenchmarkContains100kX10BX20(b *testing.B) {
241 | 	rand.Seed(1337)
242 | 	b.StopTimer()
243 | 	bf, _ := New(10*1000*1000*1000, 20)
244 | 	for i := 0; i < 100*1000; i++ {
245 | 		bf.Add(hashableUint64(rand.Uint32()))
246 | 	}
247 | 	b.Run("contains", func(b *testing.B) {
248 | 		for i := 0; i < b.N; i++ {
249 | 			bf.Contains(hashableUint64(rand.Uint32()))
250 | 		}
251 | 	})
252 | 	b.Run("containshash", func(b *testing.B) {
253 | 		for i := 0; i < b.N; i++ {
254 | 			bf.ContainsHash(uint64(rand.Uint32()))
255 | 		}
256 | 	})
257 | }
258 | 
259 | func TestContains(t *testing.T) {
260 | 	rand.Seed(1337)
261 | 	bf, _ := New(10*1000*1000, 20)
262 | 	for i := 0; i < 100*10000; i++ {
263 | 		x := hashableUint64(rand.Uint32())
264 | 		bf.Add(x)
265 | 		if !bf.Contains(x) {
266 | 			t.Fatalf("Did not contain newly added elem: %d", x.Sum64())
267 | 		}
268 | 	}
269 | }
270 | 
271 | //BenchmarkUnionInPlace/union-8-6         	   15270	     77848 ns/op
272 | func BenchmarkUnionInPlace(b *testing.B) {
273 | 	var filters []*Filter
274 | 	b1, _ := New(813129, 6)
275 | 	for i := 0; i < 2000; i++ {
276 | 		b1.Add(hashableUint64(rand.Uint32()))
277 | 	}
278 | 	filters = append(filters, b1)
279 | 	for i := 0; i < 7; i++ {
280 | 		b, _ := b1.NewCompatible()
281 | 		filters = append(filters, b)
282 | 	}
283 | 	b.ResetTimer()
284 | 	b.Run("union-8", func(b *testing.B) {
285 | 		for i := 0; i < b.N; i++ {
286 | 			for _, bx := range filters {
287 | 				_ = b1.UnionInPlace(bx)
288 | 			}
289 | 		}
290 | 	})
291 | }
292 | 
293 | func BenchmarkContains94percentMisses(b *testing.B) {
294 | 	// This test should produce about
295 | 	// 5.4K hits and 94k misses
296 | 	rand.Seed(1337)
297 | 	b.StopTimer()
298 | 	bf, _ := New(10*1000*1000, 20)
299 | 	for i := 0; i < 100*1000; i++ {
300 | 		bf.Add(hashableUint64(rand.Uint32()))
301 | 	}
302 | 	b.Run("contains", func(b *testing.B) {
303 | 		for i := 0; i < b.N; i++ {
304 | 			bf.Contains(hashableUint64(rand.Uint32()))
305 | 		}
306 | 	})
307 | 	b.Run("containsHash", func(b *testing.B) {
308 | 		for i := 0; i < b.N; i++ {
309 | 			bf.ContainsHash(uint64(rand.Uint32()))
310 | 		}
311 | 	})
312 | }
313 | 
314 | // This test is quite long-running, thus disabled
315 | func TestHitrate(t *testing.T) {
316 | 	t.Skip("Long-running test, use only for sanity-checking")
317 | 	/**
318 | 	After changes:
319 | 
320 | 	Fill ratio: 9.303936 %
321 | 	Theoretical hitrate : 0.007493 %
322 | 	Hit rate (100K random tests): 0.009000 % (9 out of 100000)
323 | 	Hit rate (100K random tests): 0.009000 % (9 out of 100000)
324 | 	Zero-filter Hit rate (100K random tests): 9.373000 % (9373 out of 100000)
325 | 	1-filter Hit rate: 9.474021 % (888 out of 9373)
326 | 
327 | 	Original changes:
328 | 
329 | 	Fill ratio: 9.303647 %
330 | 	Theoretical hitrate : 0.007492 %
331 | 	Hit rate (100K random tests): 2.658000 % (2658 out of 100000)
332 | 	Zero-filter Hit rate (100K random tests): 9.456000 % (9456 out of 100000)
333 | 	1-filter Hit rate: 53.489848 % (5058 out of 9456)
334 | 
335 | 	*/
336 | 	// 512 MB bloom filter
337 | 	f, _ := New(512*1024*1024*8, 4)
338 | 
339 | 	// Fill it with 100M items
340 | 	for i := 0; i < 100*1024*1024; i++ {
341 | 		val := rand.Uint64()
342 | 		f.AddHash(val)
343 | 		if !f.ContainsHash(val) {
344 | 			t.Fatalf("Missing value (just inserted) %d", val)
345 | 		}
346 | 	}
347 | 	// Test individual matches
348 | 	numTests := 100000
349 | 	hits := 0
350 | 
351 | 	for i := 0; i < numTests; i++ {
352 | 		h := rand.Uint64()
353 | 		if f.ContainsHash(h) {
354 | 			hits++
355 | 		}
356 | 	}
357 | 	fmt.Printf("Error rate: %f %%\n", 100*f.FalsePosititveProbability())
358 | 	// With four keys, we should obtain fillrate^4 chance of false positive
359 | 	fp := f.PreciseFilledRatio()
360 | 	fmt.Printf("Fill ratio: %02f %%\n", 100*fp)
361 | 	fmt.Printf("Theoretical hitrate : %02f %%\n", 100*fp*fp*fp*fp)
362 | 	fmt.Printf("Hit rate (100K random tests): %02f %% (%d out of %d) \n", 100*float64(hits)/float64(numTests), hits, numTests)
363 | }
364 | 


--------------------------------------------------------------------------------