├── LICENSE ├── README.md ├── hash.go ├── hash32.go ├── hash32_test.go ├── hash_test.go └── mmh3.go /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 声zz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | mmh3 2 | ==== 3 | 4 | MurmurHash 3 implementation of hash.Hash interface 5 | -------------------------------------------------------------------------------- /hash.go: -------------------------------------------------------------------------------- 1 | package mmh3 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "hash" 7 | "reflect" 8 | "unsafe" 9 | ) 10 | 11 | const ( 12 | c1_64 uint64 = 0x87c37b91114253d5 13 | c2_64 uint64 = 0x4cf5ad432745937f 14 | ) 15 | 16 | type hash128 struct { 17 | h1 uint64 18 | h2 uint64 19 | tail []byte 20 | size uint64 21 | } 22 | 23 | func New128() hash.Hash { 24 | return new(hash128) 25 | } 26 | 27 | func (h *hash128) BlockSize() int { 28 | return 16 29 | } 30 | 31 | func (h *hash128) Reset() { 32 | h.h1 = 0 33 | h.h2 = 0 34 | h.tail = nil 35 | h.size = 0 36 | } 37 | 38 | func (h *hash128) Size() int { 39 | return 16 40 | } 41 | 42 | func (h *hash128) Sum(in []byte) []byte { 43 | var k1, k2 uint64 44 | h1 := h.h1 45 | h2 := h.h2 46 | tail := h.tail 47 | 48 | if tail != nil { 49 | switch len(h.tail) { 50 | case 15: 51 | k2 ^= uint64(tail[14]) << 48 52 | fallthrough 53 | case 14: 54 | k2 ^= uint64(tail[13]) << 40 55 | fallthrough 56 | case 13: 57 | k2 ^= uint64(tail[12]) << 32 58 | fallthrough 59 | case 12: 60 | k2 ^= uint64(tail[11]) << 24 61 | fallthrough 62 | case 11: 63 | k2 ^= uint64(tail[10]) << 16 64 | fallthrough 65 | case 10: 66 | k2 ^= uint64(tail[9]) << 8 67 | fallthrough 68 | case 9: 69 | k2 ^= uint64(tail[8]) 70 | k2 *= c2_64 71 | k2 = (k2 << 33) | (k2 >> (64 - 33)) 72 | k2 *= c1_64 73 | h2 ^= k2 74 | fallthrough 75 | case 8: 76 | k1 ^= uint64(tail[7]) << 56 77 | fallthrough 78 | case 7: 79 | k1 ^= uint64(tail[6]) << 48 80 | fallthrough 81 | case 6: 82 | k1 ^= uint64(tail[5]) << 40 83 | fallthrough 84 | case 5: 85 | k1 ^= uint64(tail[4]) << 32 86 | fallthrough 87 | case 4: 88 | k1 ^= uint64(tail[3]) << 24 89 | fallthrough 90 | case 3: 91 | k1 ^= uint64(tail[2]) << 16 92 | fallthrough 93 | case 2: 94 | k1 ^= uint64(tail[1]) << 8 95 | fallthrough 96 | case 1: 97 | k1 ^= uint64(tail[0]) 98 | k1 *= c1_64 99 | k1 = (k1 << 31) | (k1 >> (64 - 31)) 100 | k1 *= c2_64 101 | h1 ^= k1 102 | } 103 | } 104 | 105 | h1 ^= uint64(h.size) 106 | h2 ^= uint64(h.size) 107 | h1 += h2 108 | h2 += h1 109 | h1 ^= h1 >> 33 110 | h1 *= 0xff51afd7ed558ccd 111 | h1 ^= h1 >> 33 112 | h1 *= 0xc4ceb9fe1a85ec53 113 | h1 ^= h1 >> 33 114 | h2 ^= h2 >> 33 115 | h2 *= 0xff51afd7ed558ccd 116 | h2 ^= h2 >> 33 117 | h2 *= 0xc4ceb9fe1a85ec53 118 | h2 ^= h2 >> 33 119 | h1 += h2 120 | h2 += h1 121 | 122 | h.h1 = h1 123 | h.h2 = h2 124 | 125 | ret := make([]byte, 16) 126 | retHeader := (*reflect.SliceHeader)(unsafe.Pointer(&ret)) 127 | var tuple []uint64 128 | tupleHeader := (*reflect.SliceHeader)(unsafe.Pointer(&tuple)) 129 | tupleHeader.Data = retHeader.Data 130 | tupleHeader.Len = 2 131 | tupleHeader.Cap = 2 132 | tuple[0] = h1 133 | tuple[1] = h2 134 | 135 | if in == nil { 136 | return ret 137 | } 138 | return append(in, ret...) 139 | 140 | } 141 | 142 | func (h *hash128) Write(key []byte) (n int, err error) { 143 | n = len(key) 144 | h.size += uint64(n) 145 | h1 := h.h1 146 | h2 := h.h2 147 | 148 | if h.tail != nil { 149 | n := 16 - len(h.tail) 150 | if n > len(key) { 151 | n = len(key) 152 | } 153 | h.tail = append(h.tail, key[:n]...) 154 | key = key[n:] 155 | if len(h.tail) == 16 { // a full block 156 | var k1, k2 uint64 157 | r := bytes.NewReader(h.tail) 158 | binary.Read(r, binary.LittleEndian, &k1) 159 | binary.Read(r, binary.LittleEndian, &k2) 160 | k1 *= c1_64 161 | k1 = (k1 << 31) | (k1 >> (64 - 31)) 162 | k1 *= c2_64 163 | h1 ^= k1 164 | h1 = (h1 << 27) | (h1 >> (64 - 27)) 165 | h1 += h2 166 | h1 = h1*5 + 0x52dce729 167 | k2 *= c2_64 168 | k2 = (k2 << 33) | (k2 >> (64 - 33)) 169 | k2 *= c1_64 170 | h2 ^= k2 171 | h2 = (h2 << 31) | (h2 >> (64 - 31)) 172 | h2 += h1 173 | h2 = h2*5 + 0x38495ab5 174 | h.tail = nil 175 | } 176 | } 177 | 178 | length := len(key) 179 | nblocks := length / 16 180 | if nblocks > 0 { 181 | var k1, k2 uint64 182 | var blocks [][2]uint64 183 | keyHeader := (*reflect.SliceHeader)(unsafe.Pointer(&key)) 184 | blocksHeader := (*reflect.SliceHeader)(unsafe.Pointer(&blocks)) 185 | blocksHeader.Data = keyHeader.Data 186 | blocksHeader.Len = nblocks 187 | blocksHeader.Cap = nblocks 188 | for _, b := range blocks { 189 | k1, k2 = b[0], b[1] 190 | k1 *= c1_64 191 | k1 = (k1 << 31) | (k1 >> (64 - 31)) 192 | k1 *= c2_64 193 | h1 ^= k1 194 | h1 = (h1 << 27) | (h1 >> (64 - 27)) 195 | h1 += h2 196 | h1 = h1*5 + 0x52dce729 197 | k2 *= c2_64 198 | k2 = (k2 << 33) | (k2 >> (64 - 33)) 199 | k2 *= c1_64 200 | h2 ^= k2 201 | h2 = (h2 << 31) | (h2 >> (64 - 31)) 202 | h2 += h1 203 | h2 = h2*5 + 0x38495ab5 204 | } 205 | } 206 | 207 | if length%16 != 0 { 208 | h.tail = key[nblocks*16 : length] 209 | } 210 | 211 | h.h1 = h1 212 | h.h2 = h2 213 | return 214 | } 215 | -------------------------------------------------------------------------------- /hash32.go: -------------------------------------------------------------------------------- 1 | package mmh3 2 | 3 | import ( 4 | "hash" 5 | "reflect" 6 | "unsafe" 7 | ) 8 | 9 | const ( 10 | c1_32 uint32 = 0xcc9e2d51 11 | c2_32 uint32 = 0x1b873593 12 | ) 13 | 14 | type hash32 struct { 15 | hash uint32 16 | tail []byte 17 | size uint32 18 | } 19 | 20 | func New32() hash.Hash32 { 21 | return new(hash32) 22 | } 23 | 24 | func (h *hash32) BlockSize() int { 25 | return 4 26 | } 27 | 28 | func (h *hash32) Reset() { 29 | h.hash = 0 30 | h.tail = nil 31 | h.size = 0 32 | } 33 | 34 | func (h *hash32) Size() int { 35 | return 4 36 | } 37 | 38 | func (h *hash32) Sum(in []byte) []byte { 39 | var k uint32 40 | hh := h.hash 41 | if h.tail != nil { 42 | switch len(h.tail) { 43 | case 3: 44 | k ^= uint32(h.tail[2]) << 16 45 | fallthrough 46 | case 2: 47 | k ^= uint32(h.tail[1]) << 8 48 | fallthrough 49 | case 1: 50 | k ^= uint32(h.tail[0]) 51 | k *= c1_32 52 | k = (k << 15) | (k >> (32 - 15)) 53 | k *= c2_32 54 | hh ^= k 55 | } 56 | } 57 | hh ^= h.size 58 | hh ^= hh >> 16 59 | hh *= 0x85ebca6b 60 | hh ^= hh >> 13 61 | hh *= 0xc2b2ae35 62 | hh ^= hh >> 16 63 | h.hash = hh 64 | if in == nil { 65 | return []byte{byte(hh), byte(hh >> 8), byte(hh >> 16), byte(hh >> 24)} 66 | } 67 | return append(in, byte(hh), byte(hh>>8), byte(hh>>16), byte(hh>>24)) 68 | } 69 | 70 | func (h *hash32) Sum32() uint32 { 71 | res := h.Sum(nil) 72 | return uint32(res[0]) + uint32(res[1])<<8 + uint32(res[2])<<16 + uint32(res[3])<<24 73 | } 74 | 75 | func (h *hash32) Write(key []byte) (n int, err error) { 76 | n = len(key) 77 | h.size += uint32(n) 78 | hh := h.hash 79 | 80 | if h.tail != nil { 81 | for len(key) > 0 && len(h.tail) < 4 { 82 | h.tail = append(h.tail, key[0]) 83 | key = key[1:] 84 | } 85 | if len(h.tail) == 4 { // a full block 86 | k := uint32(h.tail[0]) + uint32(h.tail[1])<<8 + uint32(h.tail[2])<<16 + uint32(h.tail[3])<<24 87 | k *= c1_32 88 | k = (k << 15) | (k >> (32 - 15)) 89 | k *= c2_32 90 | hh ^= k 91 | hh = (hh << 13) | (hh >> (32 - 13)) 92 | hh = (hh * 5) + 0xe6546b64 93 | h.tail = nil 94 | } 95 | } 96 | 97 | length := len(key) 98 | nblocks := length / 4 99 | if nblocks > 0 { 100 | var blocks []uint32 101 | keyHeader := (*reflect.SliceHeader)(unsafe.Pointer(&key)) 102 | blocksHeader := (*reflect.SliceHeader)(unsafe.Pointer(&blocks)) 103 | blocksHeader.Data = keyHeader.Data 104 | blocksHeader.Len = nblocks 105 | blocksHeader.Cap = nblocks 106 | for _, k := range blocks { 107 | k *= c1_32 108 | k = (k << 15) | (k >> (32 - 15)) 109 | k *= c2_32 110 | hh ^= k 111 | hh = (hh << 13) | (hh >> (32 - 13)) 112 | hh = (hh * 5) + 0xe6546b64 113 | } 114 | } 115 | 116 | if length%4 != 0 { 117 | h.tail = key[nblocks*4 : length] 118 | } 119 | 120 | h.hash = hh 121 | return 122 | } 123 | -------------------------------------------------------------------------------- /hash32_test.go: -------------------------------------------------------------------------------- 1 | package mmh3 2 | 3 | import ( 4 | "crypto/rand" 5 | "fmt" 6 | "io" 7 | "testing" 8 | ) 9 | 10 | func TestHash32(t *testing.T) { 11 | h := New32() 12 | 13 | cases := map[string]string{ 14 | "": "00000000", 15 | "hello": "47fa8b24", 16 | "foobar": "bdd4c4a4", 17 | "ooooooo": "cc77ff34", 18 | "我能吞下玻璃而不伤身体": "841a69c4", 19 | } 20 | for key, hex := range cases { 21 | h.Write([]byte(key)) 22 | if fmt.Sprintf("%x", h.Sum(nil)) != hex { 23 | t.Fatal() 24 | } 25 | h.Reset() 26 | 27 | for _, c := range key { 28 | h.Write([]byte(string(c))) 29 | } 30 | if fmt.Sprintf("%x", h.Sum(nil)) != hex { 31 | t.Fatal() 32 | } 33 | h.Reset() 34 | } 35 | 36 | cases2 := map[string]uint32{ 37 | "": 0, 38 | "hello": 613153351, 39 | "foobar": 2764362941, 40 | "ooooooo": 889157580, 41 | "我能吞下玻璃而不伤身体": 3295222404, 42 | } 43 | for key, hash := range cases2 { 44 | h.Write([]byte(key)) 45 | if h.Sum32() != hash { 46 | t.Fatal() 47 | } 48 | h.Reset() 49 | 50 | if Sum32([]byte(key)) != hash { 51 | t.Fatal() 52 | } 53 | } 54 | 55 | // for coverage 56 | if h.BlockSize() != 4 { 57 | t.Fatal() 58 | } 59 | if h.Size() != 4 { 60 | t.Fatal() 61 | } 62 | h.Sum([]byte{'o'}) 63 | } 64 | 65 | func bench32(b *testing.B, bytes int) { 66 | bs := make([]byte, bytes) 67 | io.ReadFull(rand.Reader, bs) 68 | b.SetBytes(int64(bytes)) 69 | b.ResetTimer() 70 | for i := 0; i < b.N; i++ { 71 | Sum32(bs) 72 | } 73 | } 74 | 75 | func BenchmarkHash32_1(b *testing.B) { bench32(b, 1) } 76 | func BenchmarkHash32_2(b *testing.B) { bench32(b, 2) } 77 | func BenchmarkHash32_4(b *testing.B) { bench32(b, 4) } 78 | func BenchmarkHash32_8(b *testing.B) { bench32(b, 8) } 79 | func BenchmarkHash32_16(b *testing.B) { bench32(b, 16) } 80 | func BenchmarkHash32_32(b *testing.B) { bench32(b, 32) } 81 | func BenchmarkHash32_64(b *testing.B) { bench32(b, 64) } 82 | func BenchmarkHash32_128(b *testing.B) { bench32(b, 128) } 83 | func BenchmarkHash32_256(b *testing.B) { bench32(b, 256) } 84 | func BenchmarkHash32_512(b *testing.B) { bench32(b, 512) } 85 | func BenchmarkHash32_1024(b *testing.B) { bench32(b, 1024) } 86 | func BenchmarkHash32_2048(b *testing.B) { bench32(b, 2048) } 87 | func BenchmarkHash32_4096(b *testing.B) { bench32(b, 4096) } 88 | func BenchmarkHash32_8192(b *testing.B) { bench32(b, 8192) } 89 | -------------------------------------------------------------------------------- /hash_test.go: -------------------------------------------------------------------------------- 1 | package mmh3 2 | 3 | import ( 4 | "crypto/rand" 5 | "fmt" 6 | "io" 7 | "testing" 8 | ) 9 | 10 | func TestHash128(t *testing.T) { 11 | h := New128() 12 | 13 | cases := map[string]string{ 14 | "": "00000000000000000000000000000000", 15 | "hello": "029bbd41b3a7d8cb191dae486a901e5b", 16 | "foobar": "455ac81671aed2bdafd6f8bae055a274", 17 | "ooooooooooooooo": "a9bd51f7e15176d22148141c49ea8fa5", 18 | "我能吞下玻璃而不伤身体": "2ea7aa45a1a1e43d44afaa81c30d1a37", 19 | } 20 | for key, hex := range cases { 21 | h.Write([]byte(key)) 22 | if fmt.Sprintf("%x", h.Sum(nil)) != hex { 23 | t.Fatal() 24 | } 25 | h.Reset() 26 | 27 | for _, c := range key { 28 | h.Write([]byte(string(c))) 29 | } 30 | if fmt.Sprintf("%x", h.Sum(nil)) != hex { 31 | t.Fatal() 32 | } 33 | h.Reset() 34 | 35 | if fmt.Sprintf("%x", Sum128([]byte(key))) != hex { 36 | t.Fatal() 37 | } 38 | } 39 | 40 | // for coverage 41 | if h.BlockSize() != 16 { 42 | t.Fatal() 43 | } 44 | if h.Size() != 16 { 45 | t.Fatal() 46 | } 47 | h.Sum([]byte{'o'}) 48 | } 49 | 50 | func bench128(b *testing.B, bytes int) { 51 | bs := make([]byte, bytes) 52 | io.ReadFull(rand.Reader, bs) 53 | b.SetBytes(int64(bytes)) 54 | b.ResetTimer() 55 | for i := 0; i < b.N; i++ { 56 | Sum128(bs) 57 | } 58 | } 59 | 60 | func BenchmarkHash128_1(b *testing.B) { bench128(b, 1) } 61 | func BenchmarkHash128_2(b *testing.B) { bench128(b, 2) } 62 | func BenchmarkHash128_4(b *testing.B) { bench128(b, 4) } 63 | func BenchmarkHash128_8(b *testing.B) { bench128(b, 8) } 64 | func BenchmarkHash128_16(b *testing.B) { bench128(b, 16) } 65 | func BenchmarkHash128_32(b *testing.B) { bench128(b, 32) } 66 | func BenchmarkHash128_64(b *testing.B) { bench128(b, 64) } 67 | func BenchmarkHash128_128(b *testing.B) { bench128(b, 128) } 68 | func BenchmarkHash128_256(b *testing.B) { bench128(b, 256) } 69 | func BenchmarkHash128_512(b *testing.B) { bench128(b, 512) } 70 | func BenchmarkHash128_1024(b *testing.B) { bench128(b, 1024) } 71 | func BenchmarkHash128_2048(b *testing.B) { bench128(b, 2048) } 72 | func BenchmarkHash128_4096(b *testing.B) { bench128(b, 4096) } 73 | func BenchmarkHash128_8192(b *testing.B) { bench128(b, 8192) } 74 | -------------------------------------------------------------------------------- /mmh3.go: -------------------------------------------------------------------------------- 1 | package mmh3 2 | 3 | import "sync" 4 | 5 | var ( 6 | Hash32x86 = Sum32 7 | Hash128x64 = Sum128 8 | 9 | //for backward compatible 10 | Hash32 = Sum32 11 | Hash128 = Sum128 12 | ) 13 | 14 | var ( 15 | pool128 = sync.Pool{ 16 | New: func() interface{} { 17 | return New128() 18 | }, 19 | } 20 | 21 | pool32 = sync.Pool{ 22 | New: func() interface{} { 23 | return New32() 24 | }, 25 | } 26 | ) 27 | 28 | func Sum32(key []byte) (ret uint32) { 29 | h := pool32.Get().(*hash32) 30 | h.Write(key) 31 | ret = h.Sum32() 32 | h.Reset() 33 | pool32.Put(h) 34 | return 35 | } 36 | 37 | func Sum128(key []byte) (ret []byte) { 38 | h := pool128.Get().(*hash128) 39 | h.Write(key) 40 | ret = h.Sum(nil) 41 | h.Reset() 42 | pool128.Put(h) 43 | return 44 | } 45 | --------------------------------------------------------------------------------