├── LICENSE ├── README.md ├── xxhash.go └── xxhash_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2013 vova616 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xxhash 2 | 3 | xxhash is a pure go (golang) implementation of [xxhash](http://code.google.com/p/xxhash/). 4 | 5 | ## Benchmark 6 | 7 | ```go test github.com/vova616/xxhash -bench=".*"``` 8 | 9 | Core i7-3770K CPU @ 3.50GHz 10 | go version devel +16e0e01c2e9b Sat Mar 09 18:14:00 2013 -0800 windows/386 11 | 12 | ``` 13 | Benchmark_xxhash32 50000000 61.1 ns/op 14 | Benchmark_CRC32IEEE 10000000 145 ns/op 15 | Benchmark_Adler32 10000000 181 ns/op 16 | Benchmark_Fnv32 10000000 162 ns/op 17 | Benchmark_MurmurHash3Hash32 1000000 1927 ns/op 18 | ``` 19 | 20 | # Note: 21 | 22 | The package uses unsafe to get higher performance its safe as far as I know but if you don't want it you can use switch to early commits. -------------------------------------------------------------------------------- /xxhash.go: -------------------------------------------------------------------------------- 1 | package xxhash 2 | 3 | import ( 4 | "errors" 5 | "hash" 6 | "unsafe" 7 | ) 8 | 9 | const ( 10 | PRIME32_1 = 2654435761 11 | PRIME32_2 = 2246822519 12 | PRIME32_3 = 3266489917 13 | PRIME32_4 = 668265263 14 | PRIME32_5 = 374761393 15 | ) 16 | 17 | type XXHash struct { 18 | seed, v1, v2, v3, v4 uint32 19 | total_len uint64 20 | memory [16]byte 21 | memsize int 22 | } 23 | 24 | func New(seed uint32) hash.Hash32 { 25 | return &XXHash{ 26 | seed: seed, 27 | v1: seed + PRIME32_1 + PRIME32_2, 28 | v2: seed + PRIME32_2, 29 | v3: seed, 30 | v4: seed - PRIME32_1, 31 | } 32 | } 33 | 34 | func (self *XXHash) BlockSize() int { 35 | return 1 36 | } 37 | 38 | // Size returns the number of bytes Sum will return. 39 | func (self *XXHash) Size() int { 40 | return 4 41 | } 42 | 43 | func (self *XXHash) feed(in []byte) uint32 { 44 | p := uintptr(unsafe.Pointer(&in[0])) 45 | pTemp := p 46 | l := len(in) 47 | bEnd := p + uintptr(l) 48 | 49 | self.total_len += uint64(l) 50 | 51 | // fill in tmp buffer 52 | if self.memsize+l < 16 { 53 | copy(self.memory[self.memsize:], in) 54 | self.memsize += l 55 | return 0 56 | } 57 | 58 | if self.memsize > 0 { 59 | copy(self.memory[self.memsize:], in[:16-self.memsize]) 60 | p2 := uintptr(unsafe.Pointer(&self.memory[0])) 61 | self.v1 += (*(*uint32)(unsafe.Pointer(p2))) * PRIME32_2 62 | self.v1 = ((self.v1 << 13) | (self.v1 >> (32 - 13))) * PRIME32_1 63 | 64 | self.v2 += (*(*uint32)(unsafe.Pointer(p2 + 4))) * PRIME32_2 65 | self.v2 = ((self.v2 << 13) | (self.v2 >> (32 - 13))) * PRIME32_1 66 | 67 | self.v3 += (*(*uint32)(unsafe.Pointer(p2 + 8))) * PRIME32_2 68 | self.v3 = ((self.v3 << 13) | (self.v3 >> (32 - 13))) * PRIME32_1 69 | 70 | self.v4 += (*(*uint32)(unsafe.Pointer(p2 + 12))) * PRIME32_2 71 | self.v4 = ((self.v4 << 13) | (self.v4 >> (32 - 13))) * PRIME32_1 72 | 73 | p += 16 - uintptr(self.memsize) 74 | self.memsize = 0 75 | } 76 | 77 | limit := bEnd - 16 78 | v1, v2, v3, v4 := self.v1, self.v2, self.v3, self.v4 79 | 80 | for ; p <= limit; p += 16 { 81 | v1 += (*(*uint32)(unsafe.Pointer(p))) * PRIME32_2 82 | v1 = ((v1 << 13) | (v1 >> (32 - 13))) * PRIME32_1 83 | 84 | v2 += (*(*uint32)(unsafe.Pointer(p + 4))) * PRIME32_2 85 | v2 = ((v2 << 13) | (v2 >> (32 - 13))) * PRIME32_1 86 | 87 | v3 += (*(*uint32)(unsafe.Pointer(p + 8))) * PRIME32_2 88 | v3 = ((v3 << 13) | (v3 >> (32 - 13))) * PRIME32_1 89 | 90 | v4 += (*(*uint32)(unsafe.Pointer(p + 12))) * PRIME32_2 91 | v4 = ((v4 << 13) | (v4 >> (32 - 13))) * PRIME32_1 92 | } 93 | 94 | self.v1 = v1 95 | self.v2 = v2 96 | self.v3 = v3 97 | self.v4 = v4 98 | 99 | limit = bEnd - p 100 | 101 | if limit > 0 { 102 | copy(self.memory[:], in[p-pTemp:bEnd-pTemp]) 103 | self.memsize = int(limit) 104 | } 105 | 106 | return 0 107 | } 108 | 109 | func (self *XXHash) Sum32() uint32 { 110 | p := uintptr(unsafe.Pointer(&self.memory[0])) 111 | bEnd := p + uintptr(self.memsize) 112 | h32 := uint32(0) 113 | 114 | if self.total_len >= 16 { 115 | h32 = ((self.v1 << 1) | (self.v1 >> (32 - 1))) + 116 | ((self.v2 << 7) | (self.v2 >> (32 - 7))) + 117 | ((self.v3 << 12) | (self.v3 >> (32 - 12))) + 118 | ((self.v4 << 18) | (self.v4 >> (32 - 18))) 119 | } else { 120 | h32 = self.seed + PRIME32_5 121 | } 122 | 123 | h32 += uint32(self.total_len) 124 | 125 | for p <= bEnd-4 { 126 | h32 += (*(*uint32)(unsafe.Pointer(p))) * PRIME32_3 127 | h32 = ((h32 << 17) | (h32 >> (32 - 17))) * PRIME32_4 128 | p += 4 129 | } 130 | 131 | for p < bEnd { 132 | h32 += uint32(*(*byte)(unsafe.Pointer(p))) * PRIME32_5 133 | h32 = ((h32 << 11) | (h32 >> (32 - 11))) * PRIME32_1 134 | p++ 135 | } 136 | 137 | h32 ^= h32 >> 15 138 | h32 *= PRIME32_2 139 | h32 ^= h32 >> 13 140 | h32 *= PRIME32_3 141 | h32 ^= h32 >> 16 142 | 143 | return h32 144 | } 145 | 146 | func (self *XXHash) Sum(in []byte) []byte { 147 | h := self.Sum32() 148 | in = append(in, byte(h>>24)) 149 | in = append(in, byte(h>>16)) 150 | in = append(in, byte(h>>8)) 151 | in = append(in, byte(h)) 152 | return in 153 | } 154 | 155 | func (self *XXHash) Reset() { 156 | seed := self.seed 157 | self.v1 = seed + PRIME32_1 + PRIME32_2 158 | self.v2 = seed + PRIME32_2 159 | self.v3 = seed 160 | self.v4 = seed - PRIME32_1 161 | self.total_len = 0 162 | self.memsize = 0 163 | } 164 | 165 | // Write adds more data to the running hash. 166 | // Length of data MUST BE less than 1 Gigabytes. 167 | func (self *XXHash) Write(data []byte) (nn int, err error) { 168 | if len(data) == 0 { 169 | return 0, errors.New("Data cannot be nil or empty.") 170 | } 171 | l := len(data) 172 | if l > 1<<30 { 173 | return 0, errors.New("Cannot add more than 1 Gigabytes at once.") 174 | } 175 | self.feed(data) 176 | return len(data), nil 177 | } 178 | 179 | // Checksum32Seed returns the xxhash32 checksum of data using a seed. Length of data MUST BE less than 2 Gigabytes. 180 | func Checksum32(data []byte) uint32 { 181 | return Checksum32Seed(data, 0) 182 | } 183 | 184 | // Checksum32 returns the xxhash32 checksum of data. Length of data MUST BE less than 2 Gigabytes. 185 | func Checksum32Seed(data []byte, seed uint32) uint32 { 186 | if len(data) == 0 { 187 | panic("Data cannot be nil or empty.") 188 | } 189 | p := uintptr(unsafe.Pointer(&data[0])) 190 | l := len(data) 191 | bEnd := p + uintptr(l) 192 | h32 := uint32(0) 193 | 194 | if l >= 16 { 195 | limit := bEnd - 16 196 | 197 | v1 := seed + PRIME32_1 + PRIME32_2 198 | v2 := seed + PRIME32_2 199 | v3 := seed + 0 200 | v4 := seed - PRIME32_1 201 | for { 202 | v1 += (*(*uint32)(unsafe.Pointer(p))) * PRIME32_2 203 | v1 = ((v1 << 13) | (v1 >> (32 - 13))) * PRIME32_1 204 | 205 | v2 += (*(*uint32)(unsafe.Pointer(p + 4))) * PRIME32_2 206 | v2 = ((v2 << 13) | (v2 >> (32 - 13))) * PRIME32_1 207 | 208 | v3 += (*(*uint32)(unsafe.Pointer(p + 8))) * PRIME32_2 209 | v3 = ((v3 << 13) | (v3 >> (32 - 13))) * PRIME32_1 210 | 211 | v4 += (*(*uint32)(unsafe.Pointer(p + 12))) * PRIME32_2 212 | v4 = ((v4 << 13) | (v4 >> (32 - 13))) * PRIME32_1 213 | 214 | p += 16 215 | if p > limit { 216 | break 217 | } 218 | } 219 | h32 = ((v1 << 1) | (v1 >> (32 - 1))) + 220 | ((v2 << 7) | (v2 >> (32 - 7))) + 221 | ((v3 << 12) | (v3 >> (32 - 12))) + 222 | ((v4 << 18) | (v4 >> (32 - 18))) 223 | } else { 224 | h32 = seed + PRIME32_5 225 | } 226 | 227 | h32 += uint32(l) 228 | 229 | for p <= bEnd-4 { 230 | h32 += (*(*uint32)(unsafe.Pointer(p))) * PRIME32_3 231 | h32 = ((h32 << 17) | (h32 >> (32 - 17))) * PRIME32_4 232 | p += 4 233 | } 234 | 235 | for p < bEnd { 236 | h32 += uint32(*(*byte)(unsafe.Pointer(p))) * PRIME32_5 237 | h32 = ((h32 << 11) | (h32 >> (32 - 11))) * PRIME32_1 238 | p++ 239 | } 240 | 241 | h32 ^= h32 >> 15 242 | h32 *= PRIME32_2 243 | h32 ^= h32 >> 13 244 | h32 *= PRIME32_3 245 | h32 ^= h32 >> 16 246 | 247 | return h32 248 | } 249 | -------------------------------------------------------------------------------- /xxhash_test.go: -------------------------------------------------------------------------------- 1 | //stollen from bitbucket.org/StephaneBunel/xxhash-go 2 | package xxhash 3 | 4 | import ( 5 | "encoding/binary" 6 | "hash/adler32" 7 | "hash/crc32" 8 | "hash/fnv" 9 | "testing" 10 | ) 11 | 12 | var ( 13 | blob1 = []byte("Lorem ipsum dolor sit amet, consectetuer adipiscing elit, ") 14 | blob2 = []byte("sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.") 15 | blob3 = []byte("Cookies") 16 | blob4 = []byte("1234567890123456") 17 | VeryBigFile = "a-very-big-file" 18 | ) 19 | 20 | func Test_Checksum32(t *testing.T) { 21 | h32 := Checksum32(blob1) 22 | if h32 != 0x1130e7d4 { 23 | t.Errorf("Checksum32(\"%v\") = 0x%08x need 0x1130e7d4\n", string(blob1), h32) 24 | } 25 | 26 | h32 = Checksum32(blob2) 27 | if h32 != 0x24ca2992 { 28 | t.Errorf("Checksum32(\"%v\") = 0x%08x need 0x24ca2992\n", string(blob2), h32) 29 | } 30 | 31 | h32 = Checksum32(blob3) 32 | if h32 != 0x99dd2ca5 { 33 | t.Errorf("Checksum32(\"%v\") = 0x%08x need 0x99dd2ca5\n", string(blob3), h32) 34 | } 35 | 36 | h32 = Checksum32(blob4) 37 | if h32 != 0x03bf5152 { 38 | t.Errorf("Checksum32(\"%v\") = 0x%08x need 0x03bf5152\n", string(blob4), h32) 39 | } 40 | } 41 | 42 | func Test_Checksum32Seed(t *testing.T) { 43 | h32 := Checksum32Seed(blob1, 1471) 44 | if h32 != 0xba59a258 { 45 | t.Errorf("Checksum32Seed(\"%v\", 1471) = 0x%08x\n need 0xba59a258", string(blob1), h32) 46 | } 47 | 48 | h32 = Checksum32Seed(blob2, 1596234) 49 | if h32 != 0xf15f3e02 { 50 | t.Errorf("Checksum32Seed(\"%v\", 1596234) = 0x%08x need 0xf15f3e02\n", string(blob2), h32) 51 | } 52 | 53 | h32 = Checksum32Seed(blob3, 9999666) 54 | if h32 != 0xcd3ae44c { 55 | t.Errorf("Checksum32Seed(\"%v\", 9999666) = 0x%08x need 0xcd3ae44c\n", string(blob3), h32) 56 | } 57 | 58 | h32 = Checksum32Seed(blob4, 1) 59 | if h32 != 0x606913c4 { 60 | t.Errorf("Checksum32Seed(\"%v\", 1) = 0x%08x need 0x606913c4\n", string(blob4), h32) 61 | } 62 | } 63 | 64 | func Test_New32(t *testing.T) { 65 | var digest = New(0) 66 | digest.Write(blob1) 67 | digest.Write(blob2) 68 | h32 := digest.Sum32() 69 | if h32 != 0x0d44373a { 70 | t.Errorf("Sum32 = 0x%08x need 0x0d44373a\n", h32) 71 | } 72 | 73 | digest = New(0) 74 | digest.Write(blob3) 75 | h32 = digest.Sum32() 76 | if h32 != 0x99dd2ca5 { 77 | t.Errorf("Sum32 = 0x%08x need 0x99dd2ca5\n", h32) 78 | } 79 | 80 | digest = New(0) 81 | digest.Write(blob4) 82 | h32 = digest.Sum32() 83 | if h32 != 0x3bf5152 { 84 | t.Errorf("Sum32 = 0x%08x need 0x3bf5152\n", h32) 85 | } 86 | } 87 | 88 | func Test_New32Seed(t *testing.T) { 89 | var digest = New(1471) 90 | digest.Write(blob1) 91 | digest.Write(blob2) 92 | h32 := digest.Sum32() 93 | if h32 != 0x3265e220 { 94 | t.Errorf("Sum32 = 0x%08x need 0x3265e220\n", h32) 95 | } 96 | 97 | digest = New(615324687) 98 | digest.Write(blob3) 99 | h32 = digest.Sum32() 100 | if h32 != 0xb90e95cb { 101 | t.Errorf("Sum32 = 0x%08x need 0x89f56371\n", h32) 102 | } 103 | 104 | digest = New(1) 105 | digest.Write(blob4) 106 | h32 = digest.Sum32() 107 | if h32 != 0x606913c4 { 108 | t.Errorf("Sum32 = 0x%08x need 0x606913c4\n", h32) 109 | } 110 | 111 | } 112 | 113 | func Test_Reset(t *testing.T) { 114 | var digest = New(0) 115 | digest.Write(blob2) 116 | digest.Reset() 117 | digest.Write(blob1) 118 | h32 := digest.Sum32() 119 | if h32 != 0x1130e7d4 { 120 | t.Errorf("Sum32 = 0x%08x need 0x1130e7d4\n", h32) 121 | } 122 | } 123 | 124 | func Benchmark_xxhash32(b *testing.B) { 125 | for i := 0; i < b.N; i++ { 126 | Checksum32(blob1) 127 | } 128 | } 129 | 130 | func Benchmark_CRC32IEEE(b *testing.B) { 131 | for i := 0; i < b.N; i++ { 132 | crc32.ChecksumIEEE(blob1) 133 | } 134 | } 135 | 136 | func Benchmark_Adler32(b *testing.B) { 137 | for i := 0; i < b.N; i++ { 138 | adler32.Checksum(blob1) 139 | } 140 | } 141 | 142 | func Benchmark_Fnv32(b *testing.B) { 143 | h := fnv.New32() 144 | for i := 0; i < b.N; i++ { 145 | h.Sum(blob1) 146 | } 147 | } 148 | 149 | func Benchmark_MurmurHash3Hash32(b *testing.B) { 150 | for i := 0; i < b.N; i++ { 151 | mmh3Hash32(blob1) 152 | } 153 | } 154 | 155 | // MurmurHash 3 156 | // mmh3.Hash32 stollen from https://github.com/reusee/mmh3 157 | func mmh3Hash32(key []byte) uint32 { 158 | length := len(key) 159 | if length == 0 { 160 | return 0 161 | } 162 | var c1, c2 uint32 = 0xcc9e2d51, 0x1b873593 163 | nblocks := length / 4 164 | var h, k uint32 165 | buf := key 166 | for i := 0; i < nblocks; i++ { 167 | k = binary.LittleEndian.Uint32(buf) 168 | buf = buf[4:] 169 | k *= c1 170 | k = (k << 15) | (k >> (32 - 15)) 171 | k *= c2 172 | h ^= k 173 | h = (h << 13) | (h >> (32 - 13)) 174 | h = (h * 5) + 0xe6546b64 175 | } 176 | k = 0 177 | tailIndex := nblocks * 4 178 | switch length & 3 { 179 | case 3: 180 | k ^= uint32(key[tailIndex+2]) << 16 181 | fallthrough 182 | case 2: 183 | k ^= uint32(key[tailIndex+1]) << 8 184 | fallthrough 185 | case 1: 186 | k ^= uint32(key[tailIndex]) 187 | k *= c1 188 | k = (k << 13) | (k >> (32 - 15)) 189 | k *= c2 190 | h ^= k 191 | } 192 | h ^= uint32(length) 193 | h ^= h >> 16 194 | h *= 0x85ebca6b 195 | h ^= h >> 13 196 | h *= 0xc2b2ae35 197 | h ^= h >> 16 198 | return h 199 | } 200 | --------------------------------------------------------------------------------