├── LICENSE ├── README.md ├── hyperloglog.go ├── hyperloglog_test.go ├── murmur.go └── murmur_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2013 Eric Lesh 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | hyperloglog 2 | =========== 3 | 4 | Package hyperloglog implements the HyperLogLog algorithm for 5 | cardinality estimation. In English: it counts things. It counts things 6 | using very small amounts of memory compared to the number of objects 7 | it is counting. 8 | 9 | For a full description of the algorithm, see the paper HyperLogLog: 10 | the analysis of a near-optimal cardinality estimation algorithm by 11 | Flajolet, et. al. at http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf 12 | 13 | For documentation see http://godoc.org/github.com/DataDog/hyperloglog 14 | 15 | Included are a set of fast implementations for murmurhash suitable for use 16 | on 32 and 64 bit integers on little endian machines. 17 | 18 | Quick start 19 | =========== 20 | 21 | $ go get github.com/DataDog/hyperloglog 22 | $ cd $GOPATH/src/github.com/DataDog/hyperloglog 23 | $ go test -test.v 24 | $ go test -bench=. 25 | 26 | License 27 | ======= 28 | 29 | hyperloglog is licensed under the MIT license. 30 | -------------------------------------------------------------------------------- /hyperloglog.go: -------------------------------------------------------------------------------- 1 | // Package hyperloglog implements the HyperLogLog algorithm for 2 | // cardinality estimation. In English: it counts things. It counts 3 | // things using very small amounts of memory compared to the number of 4 | // objects it is counting. 5 | // 6 | // For a full description of the algorithm, see the paper HyperLogLog: 7 | // the analysis of a near-optimal cardinality estimation algorithm by 8 | // Flajolet, et. al. 9 | package hyperloglog 10 | 11 | import ( 12 | "fmt" 13 | "math" 14 | "math/bits" 15 | ) 16 | 17 | const ( 18 | exp32 = 1 << 32 // 2^32 19 | ) 20 | 21 | // A HyperLogLog is a deterministic cardinality estimator. This version 22 | // exports its fields so that it is suitable for saving eg. to a database. 23 | type HyperLogLog struct { 24 | M uint // Number of registers 25 | B uint32 // Number of bits used to determine register index 26 | Alpha float64 // Bias correction constant 27 | Registers []uint8 28 | } 29 | 30 | // Compute bias correction alpha_m. 31 | func getAlpha(m uint) (result float64) { 32 | switch m { 33 | case 16: 34 | result = 0.673 35 | case 32: 36 | result = 0.697 37 | case 64: 38 | result = 0.709 39 | default: 40 | result = 0.7213 / (1.0 + 1.079/float64(m)) 41 | } 42 | return result 43 | } 44 | 45 | // New creates a HyperLogLog with the given number of registers. More 46 | // registers leads to lower error in your estimated count, at the 47 | // expense of memory. 48 | // 49 | // Choose a power of two number of registers, depending on the amount 50 | // of memory you're willing to use and the error you're willing to 51 | // tolerate. Each register uses one byte of memory. 52 | // 53 | // Standard error will be: σ ≈ 1.04 / sqrt(registers) 54 | // The estimates provided by hyperloglog are expected to be within σ, 2σ, 3σ 55 | // of the exact count in respectively 65%, 95%, 99% of all the cases. 56 | func New(registers uint) (*HyperLogLog, error) { 57 | if registers == 0 { 58 | panic("cannot have zero registers") 59 | } 60 | if (registers & (registers - 1)) != 0 { 61 | return nil, fmt.Errorf("number of registers %d not a power of two", registers) 62 | } 63 | h := &HyperLogLog{} 64 | h.M = registers 65 | h.B = uint32(math.Log2(float64(registers))) 66 | h.Alpha = getAlpha(registers) 67 | h.Registers = make([]uint8, h.M) 68 | return h, nil 69 | } 70 | 71 | // Reset all internal variables and set the count to zero. 72 | func (h *HyperLogLog) Reset() { 73 | for i := range h.Registers { 74 | h.Registers[i] = 0 75 | } 76 | } 77 | 78 | // Add to the count. val should be a 32 bit unsigned integer from a 79 | // good hash function. 80 | func (h *HyperLogLog) Add(val uint32) { 81 | k := 32 - h.B 82 | slice := (val << h.B) | (1 << (h.B - 1)) 83 | r := uint8(bits.LeadingZeros32(slice) + 1) 84 | j := val >> uint(k) 85 | if r > h.Registers[j] { 86 | h.Registers[j] = r 87 | } 88 | } 89 | 90 | // Count returns the estimated cardinality. 91 | func (h *HyperLogLog) Count() uint64 { 92 | return h.count(true) 93 | } 94 | 95 | // CountWithoutLargeRangeCorrection returns the estimated cardinality, without applying 96 | // the large range correction proposed by Flajolet et al. as it can lead to significant 97 | // overcounting. 98 | // 99 | // See https://github.com/DataDog/hyperloglog/pull/15 100 | func (h *HyperLogLog) CountWithoutLargeRangeCorrection() uint64 { 101 | return h.count(false) 102 | } 103 | 104 | func (h *HyperLogLog) count(withLargeRangeCorrection bool) uint64 { 105 | sum := 0.0 106 | m := float64(h.M) 107 | for _, val := range h.Registers { 108 | sum += 1.0 / float64(int(1)< 0 { 120 | estimate = m * math.Log(m/float64(v)) 121 | } 122 | } else if estimate > 1.0/30.0*exp32 && withLargeRangeCorrection { 123 | // Large range correction 124 | estimate = -exp32 * math.Log(1-estimate/exp32) 125 | } 126 | return uint64(estimate) 127 | } 128 | 129 | // Merge another HyperLogLog into this one. The number of registers in 130 | // each must be the same. 131 | func (h *HyperLogLog) Merge(other *HyperLogLog) error { 132 | if h.M != other.M { 133 | return fmt.Errorf("number of registers doesn't match: %d != %d", 134 | h.M, other.M) 135 | } 136 | 137 | // Trigger boundary check once for h.Registers 138 | registers := h.Registers 139 | _ = registers[len(other.Registers)-1] 140 | 141 | for j, r := range other.Registers { 142 | if r > registers[j] { 143 | registers[j] = r 144 | } 145 | } 146 | return nil 147 | } 148 | -------------------------------------------------------------------------------- /hyperloglog_test.go: -------------------------------------------------------------------------------- 1 | package hyperloglog 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "hash/fnv" 7 | "io" 8 | "math" 9 | "math/rand" 10 | "os" 11 | "testing" 12 | ) 13 | 14 | // Return a dictionary up to n words. If n is zero, return the entire 15 | // dictionary. 16 | func dictionary(n int) []string { 17 | var words []string 18 | dict := "/usr/share/dict/words" 19 | f, err := os.Open(dict) 20 | if err != nil { 21 | fmt.Printf("can't open dictionary file '%s': %v\n", dict, err) 22 | os.Exit(1) 23 | } 24 | count := 0 25 | buf := bufio.NewReader(f) 26 | for { 27 | if n != 0 && count >= n { 28 | break 29 | } 30 | word, err := buf.ReadString('\n') 31 | if err != nil { 32 | if err == io.EOF { 33 | break 34 | } 35 | continue 36 | } 37 | words = append(words, word) 38 | count++ 39 | } 40 | f.Close() 41 | return words 42 | } 43 | 44 | func geterror(actual uint64, estimate uint64) (result float64) { 45 | return (float64(estimate) - float64(actual)) / float64(actual) 46 | } 47 | 48 | func testHyperLogLog(t *testing.T, n, lowB, highB int) { 49 | words := dictionary(n) 50 | bad := 0 51 | nWords := uint64(len(words)) 52 | for i := lowB; i < highB; i++ { 53 | m := uint(math.Pow(2, float64(i))) 54 | 55 | h, err := New(m) 56 | if err != nil { 57 | t.Fatalf("can't make New(%d): %v", m, err) 58 | } 59 | 60 | hash := fnv.New32() 61 | for _, word := range words { 62 | hash.Write([]byte(word)) 63 | h.Add(hash.Sum32()) 64 | hash.Reset() 65 | } 66 | 67 | expectedError := 1.04 / math.Sqrt(float64(m)) 68 | actualError := math.Abs(geterror(nWords, h.Count())) 69 | 70 | if actualError > expectedError { 71 | bad++ 72 | t.Logf("m=%d: error=%.5f, expected <%.5f; actual=%d, estimated=%d\n", 73 | m, actualError, expectedError, nWords, h.Count()) 74 | } 75 | 76 | } 77 | t.Logf("%d of %d tests exceeded estimated error", bad, highB-lowB) 78 | } 79 | 80 | func TestHyperLogLogSmall(t *testing.T) { 81 | testHyperLogLog(t, 5, 4, 17) 82 | } 83 | 84 | func TestHyperLogLogBig(t *testing.T) { 85 | testHyperLogLog(t, 0, 4, 17) 86 | } 87 | 88 | func testReset(t *testing.T, m uint, numObjects, runs int) { 89 | rand.Seed(101) 90 | 91 | h, err := New(m) 92 | if err != nil { 93 | t.Fatalf("can't make New(%d): %v", m, err) 94 | } 95 | 96 | for i := 0; i < runs; i++ { 97 | for j := 0; j < numObjects; j++ { 98 | h.Add(rand.Uint32()) 99 | } 100 | 101 | oldRegisters := &h.Registers 102 | h.Reset() 103 | if oldRegisters != &h.Registers { 104 | t.Error("registers were reallocated") 105 | } 106 | for _, r := range h.Registers { 107 | if r != 0 { 108 | t.Error("register is not zeroed out after reset") 109 | } 110 | } 111 | } 112 | } 113 | 114 | func TestReset(t *testing.T) { 115 | testReset(t, 512, 1_000_000, 10) 116 | } 117 | 118 | func TestMerge(t *testing.T) { 119 | trueDisinctPerHll := uint64(100000) 120 | m := uint(math.Pow(2, float64(11))) 121 | 122 | h, err := New(m) 123 | h2, err := New(m) 124 | if err != nil { 125 | return 126 | } 127 | 128 | for i := uint64(0); i < trueDisinctPerHll; i++ { 129 | h.Add(Murmur64(i)) 130 | } 131 | 132 | h2.Merge(h) 133 | 134 | if h.Count() != h2.Count() { 135 | t.Errorf("Estimate mismatch after merge, %d != %d", h.Count(), h2.Count()) 136 | } 137 | } 138 | 139 | func BenchmarkReset(b *testing.B) { 140 | m := uint(256) 141 | numObjects := 1000 142 | 143 | h, err := New(m) 144 | if err != nil { 145 | b.Fatalf("can't make New(%d): %v", m, err) 146 | } 147 | 148 | b.ResetTimer() 149 | 150 | for n := 0; n < b.N; n++ { 151 | for i := 0; i < numObjects; i++ { 152 | h.Add(uint32(i)) 153 | } 154 | h.Reset() 155 | } 156 | } 157 | 158 | func benchmarkCount(b *testing.B, registers int) { 159 | words := dictionary(0) 160 | m := uint(math.Pow(2, float64(registers))) 161 | 162 | h, err := New(m) 163 | if err != nil { 164 | return 165 | } 166 | 167 | hash := fnv.New32() 168 | for _, word := range words { 169 | hash.Write([]byte(word)) 170 | h.Add(hash.Sum32()) 171 | hash.Reset() 172 | } 173 | 174 | b.ResetTimer() 175 | for n := 0; n < b.N; n++ { 176 | h.Count() 177 | } 178 | } 179 | 180 | func BenchmarkCount4(b *testing.B) { 181 | benchmarkCount(b, 4) 182 | } 183 | 184 | func BenchmarkCount5(b *testing.B) { 185 | benchmarkCount(b, 5) 186 | } 187 | 188 | func BenchmarkCount6(b *testing.B) { 189 | benchmarkCount(b, 6) 190 | } 191 | 192 | func BenchmarkCount7(b *testing.B) { 193 | benchmarkCount(b, 7) 194 | } 195 | 196 | func BenchmarkCount8(b *testing.B) { 197 | benchmarkCount(b, 8) 198 | } 199 | 200 | func BenchmarkCount9(b *testing.B) { 201 | benchmarkCount(b, 9) 202 | } 203 | 204 | func BenchmarkCount10(b *testing.B) { 205 | benchmarkCount(b, 10) 206 | } 207 | 208 | func BenchmarkMerge(b *testing.B) { 209 | words := dictionary(0) 210 | m := uint(math.Pow(2, float64(11))) 211 | 212 | h, err := New(m) 213 | h2, err := New(m) 214 | if err != nil { 215 | return 216 | } 217 | 218 | hash := fnv.New32() 219 | for _, word := range words { 220 | hash.Write([]byte(word)) 221 | h.Add(hash.Sum32()) 222 | hash.Reset() 223 | } 224 | 225 | b.ResetTimer() 226 | for n := 0; n < b.N; n++ { 227 | h2.Merge(h) 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /murmur.go: -------------------------------------------------------------------------------- 1 | package hyperloglog 2 | 3 | import ( 4 | "math" 5 | "math/bits" 6 | "reflect" 7 | "unsafe" 8 | ) 9 | 10 | // This file implements the murmur3 32-bit hash on 32bit and 64bit integers 11 | // for little endian machines only with no heap allocation. If you are using 12 | // HLL to count integer IDs on intel machines, this is your huckleberry. 13 | 14 | // MurmurString implements a fast version of the murmur hash function for strings 15 | // for little endian machines. Suitable for adding strings to HLL counter. 16 | func MurmurString(key string) uint32 { 17 | if len(key) == 0 { 18 | return MurmurBytes(nil) 19 | } 20 | // Reinterpret the string as bytes. This is safe because we don't write into the byte array. 21 | sh := (*reflect.StringHeader)(unsafe.Pointer(&key)) 22 | byteSlice := (*[math.MaxInt32 - 1]byte)(unsafe.Pointer(sh.Data))[:sh.Len:sh.Len] 23 | return MurmurBytes(byteSlice) 24 | } 25 | 26 | // MurmurBytes implements a fast version of the murmur hash function for bytes 27 | // for little endian machines. Suitable for adding strings to HLL counter. 28 | func MurmurBytes(bkey []byte) uint32 { 29 | var c1, c2 uint32 = 0xcc9e2d51, 0x1b873593 30 | var h uint32 31 | 32 | blen := len(bkey) 33 | chunks := blen / 4 // chunk length 34 | 35 | values := (*(*[]uint32)(unsafe.Pointer(&bkey)))[:chunks:chunks] 36 | 37 | for _, k := range values { 38 | k *= c1 39 | k = bits.RotateLeft32(k, 15) 40 | k *= c2 41 | 42 | h ^= k 43 | h = bits.RotateLeft32(h, 13) 44 | h = (h * 5) + 0xe6546b64 45 | } 46 | 47 | var k uint32 48 | tailLength := blen % 4 49 | tailStart := blen - tailLength 50 | // remainder 51 | switch tailLength { 52 | case 3: 53 | k ^= uint32(bkey[tailStart+2]) << 16 54 | fallthrough 55 | case 2: 56 | k ^= uint32(bkey[tailStart+1]) << 8 57 | fallthrough 58 | case 1: 59 | k ^= uint32(bkey[tailStart]) 60 | k *= c1 61 | k = bits.RotateLeft32(k, 15) 62 | k *= c2 63 | h ^= k 64 | } 65 | 66 | h ^= uint32(blen) 67 | h ^= h >> 16 68 | h *= 0x85ebca6b 69 | h ^= h >> 13 70 | h *= 0xc2b2ae35 71 | h ^= h >> 16 72 | 73 | return h 74 | } 75 | 76 | // Murmur32 implements a fast version of the murmur hash function for uint32 for 77 | // little endian machines. Suitable for adding 32bit integers to a HLL counter. 78 | func Murmur32(i uint32) uint32 { 79 | var c1, c2 uint32 = 0xcc9e2d51, 0x1b873593 80 | var h, k uint32 81 | k = i 82 | k *= c1 83 | k = (k << 15) | (k >> (32 - 15)) 84 | k *= c2 85 | h ^= k 86 | h = (h << 13) | (h >> (32 - 13)) 87 | h = (h * 5) + 0xe6546b64 88 | // second part 89 | h ^= 4 90 | h ^= h >> 16 91 | h *= 0x85ebca6b 92 | h ^= h >> 13 93 | h *= 0xc2b2ae35 94 | h ^= h >> 16 95 | return h 96 | } 97 | 98 | // Murmur64 implements a fast version of the murmur hash function for uint64 for 99 | // little endian machines. Suitable for adding 64bit integers to a HLL counter. 100 | func Murmur64(i uint64) uint32 { 101 | var c1, c2 uint32 = 0xcc9e2d51, 0x1b873593 102 | var h, k uint32 103 | //first 4-byte chunk 104 | k = uint32(i) 105 | k *= c1 106 | k = (k << 15) | (k >> (32 - 15)) 107 | k *= c2 108 | h ^= k 109 | h = (h << 13) | (h >> (32 - 13)) 110 | h = (h * 5) + 0xe6546b64 111 | // second 4-byte chunk 112 | k = uint32(i >> 32) 113 | k *= c1 114 | k = (k << 15) | (k >> (32 - 15)) 115 | k *= c2 116 | h ^= k 117 | h = (h << 13) | (h >> (32 - 13)) 118 | h = (h * 5) + 0xe6546b64 119 | // second part 120 | h ^= 8 121 | h ^= h >> 16 122 | h *= 0x85ebca6b 123 | h ^= h >> 13 124 | h *= 0xc2b2ae35 125 | h ^= h >> 16 126 | return h 127 | } 128 | 129 | // Murmur128 implements a fast version of the murmur hash function for two uint64s 130 | // for little endian machines. Suitable for adding a 128bit value to an HLL counter. 131 | func Murmur128(i, j uint64) uint32 { 132 | var c1, c2 uint32 = 0xcc9e2d51, 0x1b873593 133 | var h, k uint32 134 | //first 4-byte chunk 135 | k = uint32(i) 136 | k *= c1 137 | k = (k << 15) | (k >> (32 - 15)) 138 | k *= c2 139 | h ^= k 140 | h = (h << 13) | (h >> (32 - 13)) 141 | h = (h * 5) + 0xe6546b64 142 | // second 4-byte chunk 143 | k = uint32(i >> 32) 144 | k *= c1 145 | k = (k << 15) | (k >> (32 - 15)) 146 | k *= c2 147 | h ^= k 148 | h = (h << 13) | (h >> (32 - 13)) 149 | h = (h * 5) + 0xe6546b64 150 | // third 4-byte chunk 151 | k = uint32(j) 152 | k *= c1 153 | k = (k << 15) | (k >> (32 - 15)) 154 | k *= c2 155 | h ^= k 156 | h = (h << 13) | (h >> (32 - 13)) 157 | h = (h * 5) + 0xe6546b64 158 | // fourth 4-byte chunk 159 | k = uint32(j >> 32) 160 | k *= c1 161 | k = (k << 15) | (k >> (32 - 15)) 162 | k *= c2 163 | h ^= k 164 | h = (h << 13) | (h >> (32 - 13)) 165 | h = (h * 5) + 0xe6546b64 166 | // second part 167 | h ^= 16 168 | h ^= h >> 16 169 | h *= 0x85ebca6b 170 | h ^= h >> 13 171 | h *= 0xc2b2ae35 172 | h ^= h >> 16 173 | return h 174 | 175 | } 176 | -------------------------------------------------------------------------------- /murmur_test.go: -------------------------------------------------------------------------------- 1 | package hyperloglog 2 | 3 | import ( 4 | "encoding/binary" 5 | "math/rand" 6 | "testing" 7 | "unsafe" 8 | 9 | "github.com/DataDog/mmh3" 10 | "github.com/dustin/randbo" 11 | ) 12 | 13 | var buf32 = make([]byte, 4) 14 | var buf64 = make([]byte, 8) 15 | var buf128 = make([]byte, 16) 16 | 17 | // Test that our abbreviated murmur hash works the same as upstream 18 | func TestMurmur(t *testing.T) { 19 | for i := 0; i < 100; i++ { 20 | x := rand.Int31() 21 | binary.LittleEndian.PutUint32(buf32, uint32(x)) 22 | hash := mmh3.Hash32(buf32) 23 | m := Murmur32(uint32(x)) 24 | if hash != m { 25 | t.Errorf("Hash mismatch on 32 bit %d: expected 0x%X, got 0x%X\n", x, hash, m) 26 | } 27 | } 28 | 29 | for i := 0; i < 100; i++ { 30 | x := rand.Int63() 31 | binary.LittleEndian.PutUint64(buf64, uint64(x)) 32 | hash := mmh3.Hash32(buf64) 33 | m := Murmur64(uint64(x)) 34 | if hash != m { 35 | t.Errorf("Hash mismatch on 64 bit %d: expected 0x%X, got 0x%X\n", x, hash, m) 36 | } 37 | } 38 | 39 | for i := 0; i < 100; i++ { 40 | x := rand.Int63() 41 | y := rand.Int63() 42 | binary.LittleEndian.PutUint64(buf128, uint64(x)) 43 | binary.LittleEndian.PutUint64(buf128[8:], uint64(y)) 44 | hash := mmh3.Hash32(buf128) 45 | m := Murmur128(uint64(x), uint64(y)) 46 | if hash != m { 47 | t.Errorf("Hash mismatch on 128 bit %d,%d: expected 0x%X, got 0x%X\n", x, y, hash, m) 48 | } 49 | } 50 | 51 | for i := 0; i < 100; i++ { 52 | key := randString((i % 15) + 5) 53 | hash := mmh3.Hash32([]byte(key)) 54 | m := MurmurString(key) 55 | if hash != m { 56 | t.Errorf("Hash mismatch on key %s: expected 0x%X, got 0x%X\n", key, hash, m) 57 | } 58 | } 59 | } 60 | 61 | func TestMurmurBytes(t *testing.T) { 62 | b := []byte("hello") 63 | v := MurmurBytes(b) 64 | if v != 613153351 { 65 | t.Fatalf("MurmurBytes failed for %s: %v != %v", b, v, 613153351) 66 | } 67 | } 68 | 69 | func TestMurmurString(t *testing.T) { 70 | s := "hello" 71 | v := MurmurString(s) 72 | if v != 613153351 { 73 | t.Fatalf("MurmurString failed for %s: %v != %v", s, v, 613153351) 74 | } 75 | } 76 | 77 | func TestMurmurStringZero(t *testing.T) { 78 | s := "" 79 | v := MurmurString(s) 80 | if v != 0 { 81 | t.Fatalf("MurmurString failed for %s: %v != %v", s, v, 0) 82 | } 83 | } 84 | 85 | func randString(n int) string { 86 | rand.Seed(10) 87 | letterRunes := []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 88 | b := make([]rune, n) 89 | for i := range b { 90 | b[i] = letterRunes[rand.Intn(len(letterRunes))] 91 | } 92 | return string(b) 93 | } 94 | 95 | // Benchmarks 96 | func benchmarkMurmurBytes(b *testing.B, input [][]byte) { 97 | b.ResetTimer() 98 | for n := 0; n < b.N; n++ { 99 | for _, x := range input { 100 | MurmurBytes(x) 101 | } 102 | } 103 | } 104 | 105 | func benchmarkMurmur64(b *testing.B, input []uint64) { 106 | b.ResetTimer() 107 | for n := 0; n < b.N; n++ { 108 | for _, x := range input { 109 | Murmur64(x) 110 | } 111 | } 112 | } 113 | 114 | func benchmarkMurmurString(b *testing.B, input []string) { 115 | b.ResetTimer() 116 | for n := 0; n < b.N; n++ { 117 | for _, x := range input { 118 | MurmurString(x) 119 | } 120 | } 121 | } 122 | 123 | func benchmarkHash32(b *testing.B, input []string) { 124 | b.ResetTimer() 125 | for n := 0; n < b.N; n++ { 126 | for _, x := range input { 127 | b := *(*[]byte)(unsafe.Pointer(&x)) 128 | mmh3.Hash32(b) 129 | } 130 | } 131 | } 132 | 133 | func Benchmark100MurmurBytes(b *testing.B) { 134 | rand.Seed(10) 135 | input := make([][]byte, 100) 136 | for i := 0; i < 100; i++ { 137 | x := make([]byte, 1000) 138 | rand.Read(x) 139 | input[i] = x 140 | } 141 | benchmarkMurmurBytes(b, input) 142 | } 143 | 144 | func Benchmark100Murmur64(b *testing.B) { 145 | rand.Seed(10) 146 | input := make([]uint64, 100) 147 | for i := 0; i < 100; i++ { 148 | input[i] = uint64(rand.Int63()) 149 | } 150 | benchmarkMurmur64(b, input) 151 | } 152 | 153 | func Benchmark100MurmurString(b *testing.B) { 154 | rand.Seed(10) 155 | input := make([]string, 100) 156 | for i := 0; i < 100; i++ { 157 | input[i] = randString((i % 15) + 5) 158 | } 159 | benchmarkMurmurString(b, input) 160 | } 161 | 162 | func Benchmark100Hash32(b *testing.B) { 163 | rand.Seed(10) 164 | input := make([]string, 100) 165 | for i := 0; i < 100; i++ { 166 | input[i] = randString((i % 15) + 5) 167 | } 168 | benchmarkHash32(b, input) 169 | } 170 | 171 | func BenchmarkMurmurStringBig(b *testing.B) { 172 | // Make a 100Mb string and use that as a benchmark 173 | r := randbo.New() 174 | slice := make([]byte, 100*1024*1024) 175 | _, err := r.Read(slice) 176 | if err != nil { 177 | b.Fatalf("Failed to create benchmark data: %s", err) 178 | } 179 | s := string(slice) 180 | b.ResetTimer() 181 | for i := 0; i < b.N; i++ { 182 | MurmurString(s) 183 | } 184 | } 185 | --------------------------------------------------------------------------------