├── LICENSE
├── README.md
├── xxhash.go
└── xxhash_test.go


/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2013 vova616 <vova616@gmail.com>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # xxhash
 2 | 	
 3 | xxhash is a pure go (golang) implementation of [xxhash](http://code.google.com/p/xxhash/).
 4 | 
 5 | ## Benchmark
 6 | 
 7 | ```go test github.com/vova616/xxhash -bench=".*"```
 8 | 
 9 | Core i7-3770K CPU @ 3.50GHz
10 | go version devel +16e0e01c2e9b Sat Mar 09 18:14:00 2013 -0800 windows/386
11 | 	
12 | ```
13 | Benchmark_xxhash32     			50000000     61.1 ns/op
14 | Benchmark_CRC32IEEE    			10000000      145 ns/op
15 | Benchmark_Adler32      	 		10000000      181 ns/op
16 | Benchmark_Fnv32 				10000000      162 ns/op
17 | Benchmark_MurmurHash3Hash32     1000000      1927 ns/op
18 | ```
19 | 
20 | # Note:
21 | 
22 | The package uses unsafe to get higher performance its safe as far as I know but if you don't want it you can use switch to early commits.


--------------------------------------------------------------------------------
/xxhash.go:
--------------------------------------------------------------------------------
  1 | package xxhash
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"hash"
  6 | 	"unsafe"
  7 | )
  8 | 
  9 | const (
 10 | 	PRIME32_1 = 2654435761
 11 | 	PRIME32_2 = 2246822519
 12 | 	PRIME32_3 = 3266489917
 13 | 	PRIME32_4 = 668265263
 14 | 	PRIME32_5 = 374761393
 15 | )
 16 | 
 17 | type XXHash struct {
 18 | 	seed, v1, v2, v3, v4 uint32
 19 | 	total_len            uint64
 20 | 	memory               [16]byte
 21 | 	memsize              int
 22 | }
 23 | 
 24 | func New(seed uint32) hash.Hash32 {
 25 | 	return &XXHash{
 26 | 		seed: seed,
 27 | 		v1:   seed + PRIME32_1 + PRIME32_2,
 28 | 		v2:   seed + PRIME32_2,
 29 | 		v3:   seed,
 30 | 		v4:   seed - PRIME32_1,
 31 | 	}
 32 | }
 33 | 
 34 | func (self *XXHash) BlockSize() int {
 35 | 	return 1
 36 | }
 37 | 
 38 | // Size returns the number of bytes Sum will return.
 39 | func (self *XXHash) Size() int {
 40 | 	return 4
 41 | }
 42 | 
 43 | func (self *XXHash) feed(in []byte) uint32 {
 44 | 	p := uintptr(unsafe.Pointer(&in[0]))
 45 | 	pTemp := p
 46 | 	l := len(in)
 47 | 	bEnd := p + uintptr(l)
 48 | 
 49 | 	self.total_len += uint64(l)
 50 | 
 51 | 	// fill in tmp buffer
 52 | 	if self.memsize+l < 16 {
 53 | 		copy(self.memory[self.memsize:], in)
 54 | 		self.memsize += l
 55 | 		return 0
 56 | 	}
 57 | 
 58 | 	if self.memsize > 0 {
 59 | 		copy(self.memory[self.memsize:], in[:16-self.memsize])
 60 | 		p2 := uintptr(unsafe.Pointer(&self.memory[0]))
 61 | 		self.v1 += (*(*uint32)(unsafe.Pointer(p2))) * PRIME32_2
 62 | 		self.v1 = ((self.v1 << 13) | (self.v1 >> (32 - 13))) * PRIME32_1
 63 | 
 64 | 		self.v2 += (*(*uint32)(unsafe.Pointer(p2 + 4))) * PRIME32_2
 65 | 		self.v2 = ((self.v2 << 13) | (self.v2 >> (32 - 13))) * PRIME32_1
 66 | 
 67 | 		self.v3 += (*(*uint32)(unsafe.Pointer(p2 + 8))) * PRIME32_2
 68 | 		self.v3 = ((self.v3 << 13) | (self.v3 >> (32 - 13))) * PRIME32_1
 69 | 
 70 | 		self.v4 += (*(*uint32)(unsafe.Pointer(p2 + 12))) * PRIME32_2
 71 | 		self.v4 = ((self.v4 << 13) | (self.v4 >> (32 - 13))) * PRIME32_1
 72 | 
 73 | 		p += 16 - uintptr(self.memsize)
 74 | 		self.memsize = 0
 75 | 	}
 76 | 
 77 | 	limit := bEnd - 16
 78 | 	v1, v2, v3, v4 := self.v1, self.v2, self.v3, self.v4
 79 | 
 80 | 	for ; p <= limit; p += 16 {
 81 | 		v1 += (*(*uint32)(unsafe.Pointer(p))) * PRIME32_2
 82 | 		v1 = ((v1 << 13) | (v1 >> (32 - 13))) * PRIME32_1
 83 | 
 84 | 		v2 += (*(*uint32)(unsafe.Pointer(p + 4))) * PRIME32_2
 85 | 		v2 = ((v2 << 13) | (v2 >> (32 - 13))) * PRIME32_1
 86 | 
 87 | 		v3 += (*(*uint32)(unsafe.Pointer(p + 8))) * PRIME32_2
 88 | 		v3 = ((v3 << 13) | (v3 >> (32 - 13))) * PRIME32_1
 89 | 
 90 | 		v4 += (*(*uint32)(unsafe.Pointer(p + 12))) * PRIME32_2
 91 | 		v4 = ((v4 << 13) | (v4 >> (32 - 13))) * PRIME32_1
 92 | 	}
 93 | 
 94 | 	self.v1 = v1
 95 | 	self.v2 = v2
 96 | 	self.v3 = v3
 97 | 	self.v4 = v4
 98 | 
 99 | 	limit = bEnd - p
100 | 
101 | 	if limit > 0 {
102 | 		copy(self.memory[:], in[p-pTemp:bEnd-pTemp])
103 | 		self.memsize = int(limit)
104 | 	}
105 | 
106 | 	return 0
107 | }
108 | 
109 | func (self *XXHash) Sum32() uint32 {
110 | 	p := uintptr(unsafe.Pointer(&self.memory[0]))
111 | 	bEnd := p + uintptr(self.memsize)
112 | 	h32 := uint32(0)
113 | 
114 | 	if self.total_len >= 16 {
115 | 		h32 = ((self.v1 << 1) | (self.v1 >> (32 - 1))) +
116 | 			((self.v2 << 7) | (self.v2 >> (32 - 7))) +
117 | 			((self.v3 << 12) | (self.v3 >> (32 - 12))) +
118 | 			((self.v4 << 18) | (self.v4 >> (32 - 18)))
119 | 	} else {
120 | 		h32 = self.seed + PRIME32_5
121 | 	}
122 | 
123 | 	h32 += uint32(self.total_len)
124 | 
125 | 	for p <= bEnd-4 {
126 | 		h32 += (*(*uint32)(unsafe.Pointer(p))) * PRIME32_3
127 | 		h32 = ((h32 << 17) | (h32 >> (32 - 17))) * PRIME32_4
128 | 		p += 4
129 | 	}
130 | 
131 | 	for p < bEnd {
132 | 		h32 += uint32(*(*byte)(unsafe.Pointer(p))) * PRIME32_5
133 | 		h32 = ((h32 << 11) | (h32 >> (32 - 11))) * PRIME32_1
134 | 		p++
135 | 	}
136 | 
137 | 	h32 ^= h32 >> 15
138 | 	h32 *= PRIME32_2
139 | 	h32 ^= h32 >> 13
140 | 	h32 *= PRIME32_3
141 | 	h32 ^= h32 >> 16
142 | 
143 | 	return h32
144 | }
145 | 
146 | func (self *XXHash) Sum(in []byte) []byte {
147 | 	h := self.Sum32()
148 | 	in = append(in, byte(h>>24))
149 | 	in = append(in, byte(h>>16))
150 | 	in = append(in, byte(h>>8))
151 | 	in = append(in, byte(h))
152 | 	return in
153 | }
154 | 
155 | func (self *XXHash) Reset() {
156 | 	seed := self.seed
157 | 	self.v1 = seed + PRIME32_1 + PRIME32_2
158 | 	self.v2 = seed + PRIME32_2
159 | 	self.v3 = seed
160 | 	self.v4 = seed - PRIME32_1
161 | 	self.total_len = 0
162 | 	self.memsize = 0
163 | }
164 | 
165 | // Write adds more data to the running hash.
166 | // Length of data MUST BE less than 1 Gigabytes.
167 | func (self *XXHash) Write(data []byte) (nn int, err error) {
168 | 	if len(data) == 0 {
169 | 		return 0, errors.New("Data cannot be nil or empty.")
170 | 	}
171 | 	l := len(data)
172 | 	if l > 1<<30 {
173 | 		return 0, errors.New("Cannot add more than 1 Gigabytes at once.")
174 | 	}
175 | 	self.feed(data)
176 | 	return len(data), nil
177 | }
178 | 
179 | // Checksum32Seed returns the xxhash32 checksum of data using a seed. Length of data MUST BE less than 2 Gigabytes.
180 | func Checksum32(data []byte) uint32 {
181 | 	return Checksum32Seed(data, 0)
182 | }
183 | 
184 | // Checksum32 returns the xxhash32 checksum of data. Length of data MUST BE less than 2 Gigabytes.
185 | func Checksum32Seed(data []byte, seed uint32) uint32 {
186 | 	if len(data) == 0 {
187 | 		panic("Data cannot be nil or empty.")
188 | 	}
189 | 	p := uintptr(unsafe.Pointer(&data[0]))
190 | 	l := len(data)
191 | 	bEnd := p + uintptr(l)
192 | 	h32 := uint32(0)
193 | 
194 | 	if l >= 16 {
195 | 		limit := bEnd - 16
196 | 
197 | 		v1 := seed + PRIME32_1 + PRIME32_2
198 | 		v2 := seed + PRIME32_2
199 | 		v3 := seed + 0
200 | 		v4 := seed - PRIME32_1
201 | 		for {
202 | 			v1 += (*(*uint32)(unsafe.Pointer(p))) * PRIME32_2
203 | 			v1 = ((v1 << 13) | (v1 >> (32 - 13))) * PRIME32_1
204 | 
205 | 			v2 += (*(*uint32)(unsafe.Pointer(p + 4))) * PRIME32_2
206 | 			v2 = ((v2 << 13) | (v2 >> (32 - 13))) * PRIME32_1
207 | 
208 | 			v3 += (*(*uint32)(unsafe.Pointer(p + 8))) * PRIME32_2
209 | 			v3 = ((v3 << 13) | (v3 >> (32 - 13))) * PRIME32_1
210 | 
211 | 			v4 += (*(*uint32)(unsafe.Pointer(p + 12))) * PRIME32_2
212 | 			v4 = ((v4 << 13) | (v4 >> (32 - 13))) * PRIME32_1
213 | 
214 | 			p += 16
215 | 			if p > limit {
216 | 				break
217 | 			}
218 | 		}
219 | 		h32 = ((v1 << 1) | (v1 >> (32 - 1))) +
220 | 			((v2 << 7) | (v2 >> (32 - 7))) +
221 | 			((v3 << 12) | (v3 >> (32 - 12))) +
222 | 			((v4 << 18) | (v4 >> (32 - 18)))
223 | 	} else {
224 | 		h32 = seed + PRIME32_5
225 | 	}
226 | 
227 | 	h32 += uint32(l)
228 | 
229 | 	for p <= bEnd-4 {
230 | 		h32 += (*(*uint32)(unsafe.Pointer(p))) * PRIME32_3
231 | 		h32 = ((h32 << 17) | (h32 >> (32 - 17))) * PRIME32_4
232 | 		p += 4
233 | 	}
234 | 
235 | 	for p < bEnd {
236 | 		h32 += uint32(*(*byte)(unsafe.Pointer(p))) * PRIME32_5
237 | 		h32 = ((h32 << 11) | (h32 >> (32 - 11))) * PRIME32_1
238 | 		p++
239 | 	}
240 | 
241 | 	h32 ^= h32 >> 15
242 | 	h32 *= PRIME32_2
243 | 	h32 ^= h32 >> 13
244 | 	h32 *= PRIME32_3
245 | 	h32 ^= h32 >> 16
246 | 
247 | 	return h32
248 | }
249 | 


--------------------------------------------------------------------------------
/xxhash_test.go:
--------------------------------------------------------------------------------
  1 | //stollen from bitbucket.org/StephaneBunel/xxhash-go
  2 | package xxhash
  3 | 
  4 | import (
  5 | 	"encoding/binary"
  6 | 	"hash/adler32"
  7 | 	"hash/crc32"
  8 | 	"hash/fnv"
  9 | 	"testing"
 10 | )
 11 | 
 12 | var (
 13 | 	blob1       = []byte("Lorem ipsum dolor sit amet, consectetuer adipiscing elit, ")
 14 | 	blob2       = []byte("sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.")
 15 | 	blob3       = []byte("Cookies")
 16 | 	blob4       = []byte("1234567890123456")
 17 | 	VeryBigFile = "a-very-big-file"
 18 | )
 19 | 
 20 | func Test_Checksum32(t *testing.T) {
 21 | 	h32 := Checksum32(blob1)
 22 | 	if h32 != 0x1130e7d4 {
 23 | 		t.Errorf("Checksum32(\"%v\") = 0x%08x need 0x1130e7d4\n", string(blob1), h32)
 24 | 	}
 25 | 
 26 | 	h32 = Checksum32(blob2)
 27 | 	if h32 != 0x24ca2992 {
 28 | 		t.Errorf("Checksum32(\"%v\") = 0x%08x need 0x24ca2992\n", string(blob2), h32)
 29 | 	}
 30 | 
 31 | 	h32 = Checksum32(blob3)
 32 | 	if h32 != 0x99dd2ca5 {
 33 | 		t.Errorf("Checksum32(\"%v\") = 0x%08x need 0x99dd2ca5\n", string(blob3), h32)
 34 | 	}
 35 | 
 36 | 	h32 = Checksum32(blob4)
 37 | 	if h32 != 0x03bf5152 {
 38 | 		t.Errorf("Checksum32(\"%v\") = 0x%08x need 0x03bf5152\n", string(blob4), h32)
 39 | 	}
 40 | }
 41 | 
 42 | func Test_Checksum32Seed(t *testing.T) {
 43 | 	h32 := Checksum32Seed(blob1, 1471)
 44 | 	if h32 != 0xba59a258 {
 45 | 		t.Errorf("Checksum32Seed(\"%v\", 1471) = 0x%08x\n need 0xba59a258", string(blob1), h32)
 46 | 	}
 47 | 
 48 | 	h32 = Checksum32Seed(blob2, 1596234)
 49 | 	if h32 != 0xf15f3e02 {
 50 | 		t.Errorf("Checksum32Seed(\"%v\", 1596234) = 0x%08x need 0xf15f3e02\n", string(blob2), h32)
 51 | 	}
 52 | 
 53 | 	h32 = Checksum32Seed(blob3, 9999666)
 54 | 	if h32 != 0xcd3ae44c {
 55 | 		t.Errorf("Checksum32Seed(\"%v\", 9999666) = 0x%08x need 0xcd3ae44c\n", string(blob3), h32)
 56 | 	}
 57 | 
 58 | 	h32 = Checksum32Seed(blob4, 1)
 59 | 	if h32 != 0x606913c4 {
 60 | 		t.Errorf("Checksum32Seed(\"%v\", 1) = 0x%08x need 0x606913c4\n", string(blob4), h32)
 61 | 	}
 62 | }
 63 | 
 64 | func Test_New32(t *testing.T) {
 65 | 	var digest = New(0)
 66 | 	digest.Write(blob1)
 67 | 	digest.Write(blob2)
 68 | 	h32 := digest.Sum32()
 69 | 	if h32 != 0x0d44373a {
 70 | 		t.Errorf("Sum32 = 0x%08x need 0x0d44373a\n", h32)
 71 | 	}
 72 | 
 73 | 	digest = New(0)
 74 | 	digest.Write(blob3)
 75 | 	h32 = digest.Sum32()
 76 | 	if h32 != 0x99dd2ca5 {
 77 | 		t.Errorf("Sum32 = 0x%08x need 0x99dd2ca5\n", h32)
 78 | 	}
 79 | 
 80 | 	digest = New(0)
 81 | 	digest.Write(blob4)
 82 | 	h32 = digest.Sum32()
 83 | 	if h32 != 0x3bf5152 {
 84 | 		t.Errorf("Sum32 = 0x%08x need 0x3bf5152\n", h32)
 85 | 	}
 86 | }
 87 | 
 88 | func Test_New32Seed(t *testing.T) {
 89 | 	var digest = New(1471)
 90 | 	digest.Write(blob1)
 91 | 	digest.Write(blob2)
 92 | 	h32 := digest.Sum32()
 93 | 	if h32 != 0x3265e220 {
 94 | 		t.Errorf("Sum32 = 0x%08x need 0x3265e220\n", h32)
 95 | 	}
 96 | 
 97 | 	digest = New(615324687)
 98 | 	digest.Write(blob3)
 99 | 	h32 = digest.Sum32()
100 | 	if h32 != 0xb90e95cb {
101 | 		t.Errorf("Sum32 = 0x%08x need 0x89f56371\n", h32)
102 | 	}
103 | 
104 | 	digest = New(1)
105 | 	digest.Write(blob4)
106 | 	h32 = digest.Sum32()
107 | 	if h32 != 0x606913c4 {
108 | 		t.Errorf("Sum32 = 0x%08x need 0x606913c4\n", h32)
109 | 	}
110 | 
111 | }
112 | 
113 | func Test_Reset(t *testing.T) {
114 | 	var digest = New(0)
115 | 	digest.Write(blob2)
116 | 	digest.Reset()
117 | 	digest.Write(blob1)
118 | 	h32 := digest.Sum32()
119 | 	if h32 != 0x1130e7d4 {
120 | 		t.Errorf("Sum32 = 0x%08x need 0x1130e7d4\n", h32)
121 | 	}
122 | }
123 | 
124 | func Benchmark_xxhash32(b *testing.B) {
125 | 	for i := 0; i < b.N; i++ {
126 | 		Checksum32(blob1)
127 | 	}
128 | }
129 | 
130 | func Benchmark_CRC32IEEE(b *testing.B) {
131 | 	for i := 0; i < b.N; i++ {
132 | 		crc32.ChecksumIEEE(blob1)
133 | 	}
134 | }
135 | 
136 | func Benchmark_Adler32(b *testing.B) {
137 | 	for i := 0; i < b.N; i++ {
138 | 		adler32.Checksum(blob1)
139 | 	}
140 | }
141 | 
142 | func Benchmark_Fnv32(b *testing.B) {
143 | 	h := fnv.New32()
144 | 	for i := 0; i < b.N; i++ {
145 | 		h.Sum(blob1)
146 | 	}
147 | }
148 | 
149 | func Benchmark_MurmurHash3Hash32(b *testing.B) {
150 | 	for i := 0; i < b.N; i++ {
151 | 		mmh3Hash32(blob1)
152 | 	}
153 | }
154 | 
155 | // MurmurHash 3
156 | // mmh3.Hash32 stollen from https://github.com/reusee/mmh3
157 | func mmh3Hash32(key []byte) uint32 {
158 | 	length := len(key)
159 | 	if length == 0 {
160 | 		return 0
161 | 	}
162 | 	var c1, c2 uint32 = 0xcc9e2d51, 0x1b873593
163 | 	nblocks := length / 4
164 | 	var h, k uint32
165 | 	buf := key
166 | 	for i := 0; i < nblocks; i++ {
167 | 		k = binary.LittleEndian.Uint32(buf)
168 | 		buf = buf[4:]
169 | 		k *= c1
170 | 		k = (k << 15) | (k >> (32 - 15))
171 | 		k *= c2
172 | 		h ^= k
173 | 		h = (h << 13) | (h >> (32 - 13))
174 | 		h = (h * 5) + 0xe6546b64
175 | 	}
176 | 	k = 0
177 | 	tailIndex := nblocks * 4
178 | 	switch length & 3 {
179 | 	case 3:
180 | 		k ^= uint32(key[tailIndex+2]) << 16
181 | 		fallthrough
182 | 	case 2:
183 | 		k ^= uint32(key[tailIndex+1]) << 8
184 | 		fallthrough
185 | 	case 1:
186 | 		k ^= uint32(key[tailIndex])
187 | 		k *= c1
188 | 		k = (k << 13) | (k >> (32 - 15))
189 | 		k *= c2
190 | 		h ^= k
191 | 	}
192 | 	h ^= uint32(length)
193 | 	h ^= h >> 16
194 | 	h *= 0x85ebca6b
195 | 	h ^= h >> 13
196 | 	h *= 0xc2b2ae35
197 | 	h ^= h >> 16
198 | 	return h
199 | }
200 | 


--------------------------------------------------------------------------------