├── project ├── build.properties └── plugins.sbt ├── version.sbt ├── bench └── src │ └── main │ ├── resource │ └── results │ │ ├── XxHash32.png │ │ ├── XxHash64.png │ │ ├── MurmurHash3_32.png │ │ ├── Murmur3Hash_32BenchResult.txt │ │ ├── XxHash32BenchResults.txt │ │ └── XxHash64BenchResults.txt │ └── scala │ └── com │ └── desmondyeung │ └── bench │ ├── MurmurHash3_32Bench.scala │ ├── XxHash32Bench.scala │ └── XxHash64Bench.scala ├── .gitignore ├── .travis.yml ├── .scalafmt.conf ├── src ├── test │ └── scala │ │ └── com │ │ └── desmondyeung │ │ └── hashing │ │ ├── MurmurHash3_32Spec.scala │ │ ├── XxHash32Spec.scala │ │ ├── XxHash64Spec.scala │ │ ├── StreamingMurmurHash3_32Spec.scala │ │ ├── StreamingXxHash32Spec.scala │ │ ├── StreamingXxHash64Spec.scala │ │ ├── HashSpecUtils.scala │ │ ├── Hash32Behaviors.scala │ │ ├── Hash64Behaviors.scala │ │ ├── StreamingHash32Behaviors.scala │ │ └── StreamingHash64Behaviors.scala └── main │ └── scala │ └── com │ └── desmondyeung │ └── hashing │ ├── UnsafeUtil.scala │ ├── MurmurHash3_32.scala │ ├── StreamingMurmurHash3_32.scala │ ├── XxHash32.scala │ ├── StreamingXxHash32.scala │ ├── StreamingXxHash64.scala │ ├── XxHash64.scala │ └── Hash.scala ├── README.md └── LICENSE /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.2.8 2 | -------------------------------------------------------------------------------- /version.sbt: -------------------------------------------------------------------------------- 1 | version in ThisBuild := "0.2.0-SNAPSHOT" 2 | -------------------------------------------------------------------------------- /bench/src/main/resource/results/XxHash32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/desmondyeung/scala-hashing/HEAD/bench/src/main/resource/results/XxHash32.png -------------------------------------------------------------------------------- /bench/src/main/resource/results/XxHash64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/desmondyeung/scala-hashing/HEAD/bench/src/main/resource/results/XxHash64.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea 2 | .DS_Store 3 | target 4 | *.orig 5 | *# 6 | *~ 7 | .#* 8 | .*.swp 9 | *.vim 10 | .ensime* 11 | *.class 12 | *.log 13 | */*.iml 14 | -------------------------------------------------------------------------------- /bench/src/main/resource/results/MurmurHash3_32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/desmondyeung/scala-hashing/HEAD/bench/src/main/resource/results/MurmurHash3_32.png -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.0.1") 2 | addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.6.0") 3 | addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.3.7") 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | 3 | scala: 4 | - 2.13.0 5 | 6 | jdk: 7 | - openjdk8 8 | 9 | script: 10 | - sbt clean coverage test coverageReport 11 | 12 | after_success: 13 | - bash <(curl -s https://codecov.io/bash) 14 | -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | version = 2.0.1 2 | style = defaultWithAlign 3 | maxColumn = 120 4 | rewrite.rules = [ RedundantBraces, RedundantParens, SortImports, PreferCurlyFors ] 5 | spaces.inImportCurlyBraces = false 6 | danglingParentheses = true 7 | align.openParenCallSite = false 8 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/MurmurHash3_32Spec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import org.scalatest.FunSpec 20 | 21 | class MurmurHash3_32Spec extends FunSpec with Hash32Behaviors { 22 | 23 | def referenceImpl(input: Array[Byte], seed: Int): Int = 24 | scala.util.hashing.MurmurHash3.bytesHash(input, seed) 25 | 26 | describe("MurmurHash3_32") { 27 | it should behave like hash32(MurmurHash3_32, referenceImpl) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/XxHash32Spec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import org.scalatest.FunSpec 20 | 21 | class XxHash32Spec extends FunSpec with Hash32Behaviors { 22 | 23 | def referenceImpl(input: Array[Byte], seed: Int): Int = 24 | net.jpountz.xxhash.XXHashFactory.fastestInstance.hash32.hash(input, 0, input.length, seed) 25 | 26 | describe("XxHash32") { 27 | it should behave like hash32(XxHash32, referenceImpl) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/XxHash64Spec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import org.scalatest.FunSpec 20 | 21 | class XxHash64Spec extends FunSpec with Hash64Behaviors { 22 | 23 | def referenceImpl(input: Array[Byte], seed: Long): Long = 24 | net.jpountz.xxhash.XXHashFactory.fastestInstance.hash64.hash(input, 0, input.length, seed) 25 | 26 | describe("XxHash64") { 27 | it should behave like hash64(XxHash64, referenceImpl) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/StreamingMurmurHash3_32Spec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import org.scalatest.FunSpec 20 | 21 | class StreamingMurmurHash3_32Spec extends FunSpec with StreamingHash32Behaviors { 22 | 23 | def referenceImpl(input: Array[Byte], seed: Int): Int = 24 | scala.util.hashing.MurmurHash3.bytesHash(input, seed) 25 | 26 | describe("StreamingMurmurHash3_32") { 27 | it should behave like streamingHash32(StreamingMurmurHash3_32.apply, referenceImpl) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/StreamingXxHash32Spec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import org.scalatest.FunSpec 20 | 21 | class StreamingXxHash32Spec extends FunSpec with StreamingHash32Behaviors { 22 | 23 | def referenceImpl(input: Array[Byte], seed: Int): Int = 24 | net.jpountz.xxhash.XXHashFactory.fastestInstance.hash32.hash(input, 0, input.length, seed) 25 | 26 | describe("StreamingXxHash32") { 27 | it should behave like streamingHash32(StreamingXxHash32.apply, referenceImpl) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/StreamingXxHash64Spec.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import org.scalatest.FunSpec 20 | 21 | class StreamingXxHash64Spec extends FunSpec with StreamingHash64Behaviors { 22 | 23 | def referenceImpl(input: Array[Byte], seed: Long): Long = 24 | net.jpountz.xxhash.XXHashFactory.fastestInstance.hash64.hash(input, 0, input.length, seed) 25 | 26 | describe("StreamingXxHash64") { 27 | it should behave like streamingHash64(StreamingXxHash64.apply, referenceImpl) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/HashSpecUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import java.nio.{ByteBuffer, ByteOrder} 20 | import scala.util.Random 21 | 22 | trait HashSpecUtils { 23 | def byteBufferOfSize(size: Int, direct: Boolean = false): ByteBuffer = { 24 | val array = new Array[Byte](size) 25 | Random.nextBytes(array) 26 | 27 | val bb = if (direct) { 28 | ByteBuffer.allocateDirect(size) 29 | } else { 30 | ByteBuffer.allocate(size) 31 | } 32 | bb.order(ByteOrder.nativeOrder) 33 | bb.put(array) 34 | bb.rewind 35 | bb 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /bench/src/main/scala/com/desmondyeung/bench/MurmurHash3_32Bench.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.bench 18 | 19 | import org.openjdk.jmh.annotations._ 20 | import com.desmondyeung.hashing.MurmurHash3_32 21 | import com.google.common.hash.Hashing 22 | import scala.util.hashing.MurmurHash3 23 | 24 | import java.util.concurrent.TimeUnit 25 | 26 | @BenchmarkMode(Array(Mode.Throughput)) 27 | @Fork(1) 28 | @Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS) 29 | @Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS) 30 | @State(Scope.Thread) 31 | class MurmurHash3_32Bench { 32 | 33 | var input: Array[Byte] = _ 34 | 35 | @Param(Array("8", "128", "512", "1024", "1536", "2048")) 36 | var inputSize: Int = _ 37 | 38 | @Setup 39 | def prepare: Unit = { 40 | input = new Array[Byte](inputSize) 41 | scala.util.Random.nextBytes(input) 42 | } 43 | 44 | val guava = Hashing.murmur3_32(0) 45 | 46 | @Benchmark 47 | def com_desmondyeung_hashing: Int = MurmurHash3_32.hashByteArray(input, 0) 48 | 49 | @Benchmark 50 | def com_google_common_hash: Int = guava.hashBytes(input).asInt 51 | 52 | @Benchmark 53 | def scala_util_hashing(): Int = MurmurHash3.bytesHash(input, 0) 54 | } 55 | -------------------------------------------------------------------------------- /bench/src/main/scala/com/desmondyeung/bench/XxHash32Bench.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.bench 18 | 19 | import org.openjdk.jmh.annotations._ 20 | import com.desmondyeung.hashing.XxHash32 21 | import net.openhft.hashing.LongHashFunction 22 | import net.jpountz.xxhash.XXHashFactory 23 | 24 | import java.util.concurrent.TimeUnit 25 | 26 | @BenchmarkMode(Array(Mode.Throughput)) 27 | @Fork(1) 28 | @Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS) 29 | @Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS) 30 | @State(Scope.Thread) 31 | class XxHash32Bench { 32 | 33 | var input: Array[Byte] = _ 34 | 35 | @Param(Array("8", "128", "512", "1024", "1536", "2048")) 36 | var inputSize: Int = _ 37 | 38 | @Setup 39 | def prepare: Unit = { 40 | input = new Array[Byte](inputSize) 41 | scala.util.Random.nextBytes(input) 42 | } 43 | 44 | val jpountzJni = XXHashFactory.nativeInstance.hash32() 45 | val jpountzUnsafe = XXHashFactory.unsafeInstance.hash32() 46 | val jpountzPure = XXHashFactory.safeInstance.hash32() 47 | 48 | @Benchmark 49 | def com_desmondyeung_hashing: Int = XxHash32.hashByteArray(input, 0) 50 | 51 | @Benchmark 52 | def net_jpountz_xxhash_jni: Int = jpountzJni.hash(input, 0, inputSize, 0) 53 | 54 | @Benchmark 55 | def net_jpountz_xxhash_pure: Int = jpountzPure.hash(input, 0, inputSize, 0) 56 | 57 | @Benchmark 58 | def net_jpountz_xxhash_unsafe: Int = jpountzUnsafe.hash(input, 0, inputSize, 0) 59 | } 60 | -------------------------------------------------------------------------------- /bench/src/main/scala/com/desmondyeung/bench/XxHash64Bench.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.bench 18 | 19 | import org.openjdk.jmh.annotations._ 20 | import com.desmondyeung.hashing.XxHash64 21 | import net.openhft.hashing.LongHashFunction 22 | import net.jpountz.xxhash.XXHashFactory 23 | 24 | import java.util.concurrent.TimeUnit 25 | 26 | @BenchmarkMode(Array(Mode.Throughput)) 27 | @Fork(1) 28 | @Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS) 29 | @Measurement(iterations = 5, time = 5, timeUnit = TimeUnit.SECONDS) 30 | @State(Scope.Thread) 31 | class XxHash64Bench { 32 | 33 | var input: Array[Byte] = _ 34 | 35 | @Param(Array("8", "128", "512", "1024", "1536", "2048")) 36 | var inputSize: Int = _ 37 | 38 | @Setup 39 | def prepare: Unit = { 40 | input = new Array[Byte](inputSize) 41 | scala.util.Random.nextBytes(input) 42 | } 43 | 44 | val jpountzJni = XXHashFactory.nativeInstance.hash64() 45 | val jpountzPure = XXHashFactory.safeInstance.hash64() 46 | val jpountzUnsafe = XXHashFactory.unsafeInstance.hash64() 47 | val openhft = LongHashFunction.xx(0) 48 | 49 | @Benchmark 50 | def com_desmondyeung_hashing: Long = XxHash64.hashByteArray(input, 0) 51 | 52 | @Benchmark 53 | def net_jpountz_xxhash_jni: Long = jpountzJni.hash(input, 0, inputSize, 0) 54 | 55 | @Benchmark 56 | def net_jpountz_xxhash_pure: Long = jpountzPure.hash(input, 0, inputSize, 0) 57 | 58 | @Benchmark 59 | def net_jpountz_xxhash_unsafe: Long = jpountzUnsafe.hash(input, 0, inputSize, 0) 60 | 61 | @Benchmark 62 | def net_openhft_hashing: Long = LongHashFunction.xx(0).hashBytes(input) 63 | } 64 | -------------------------------------------------------------------------------- /src/main/scala/com/desmondyeung/hashing/UnsafeUtil.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import java.lang.Long.{reverseBytes => swap64} 20 | import java.lang.Integer.{reverseBytes => swap32} 21 | import java.lang.reflect.Field 22 | import java.nio.ByteOrder 23 | import sun.misc.Unsafe 24 | 25 | private[hashing] object UnsafeUtil { 26 | private[this] final val theUnsafe = { 27 | val field: Field = classOf[Unsafe].getDeclaredField("theUnsafe") 28 | field.setAccessible(true) 29 | field.get(null).asInstanceOf[Unsafe] 30 | } 31 | 32 | private[this] final val isLittleEndian = ByteOrder.nativeOrder == ByteOrder.LITTLE_ENDIAN 33 | 34 | final val ByteArrayBase: Long = theUnsafe.arrayBaseOffset(Array[Byte]().getClass) 35 | 36 | final def getByte(input: Array[Byte], offset: Long): Byte = 37 | theUnsafe.getByte(input, offset) 38 | 39 | final def getInt(input: Array[Byte], offset: Long): Int = 40 | if (isLittleEndian) { 41 | theUnsafe.getInt(input, offset) 42 | } else { 43 | swap32(theUnsafe.getInt(input, offset)) 44 | } 45 | 46 | final def getLong(input: Array[Byte], offset: Long): Long = 47 | if (isLittleEndian) { 48 | theUnsafe.getLong(input, offset) 49 | } else { 50 | swap64(theUnsafe.getLong(input, offset)) 51 | } 52 | 53 | final def getUnsignedByte(input: Array[Byte], offset: Long): Int = 54 | theUnsafe.getByte(input, offset) & 0xFF 55 | 56 | final def getUnsignedInt(input: Array[Byte], offset: Long): Long = 57 | if (isLittleEndian) { 58 | theUnsafe.getInt(input, offset) & 0xFFFFFFFFL 59 | } else { 60 | swap32(theUnsafe.getInt(input, offset)) & 0xFFFFFFFFL 61 | } 62 | 63 | final def copyMemory(src: Array[Byte], srcOffset: Long, dest: Array[Byte], destOffset: Long, length: Int): Unit = 64 | theUnsafe.copyMemory(src, srcOffset, dest, destOffset, length) 65 | } 66 | -------------------------------------------------------------------------------- /src/main/scala/com/desmondyeung/hashing/MurmurHash3_32.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import java.lang.Integer.{rotateLeft => rotl32} 20 | 21 | /* 22 | * Scala implementation of Austin Appleby's MurmurHash3. 23 | * See https://github.com/aappleby/smhasher 24 | */ 25 | object MurmurHash3_32 extends Hash32 { 26 | val C1 = 0xcc9e2d51 27 | val C2 = 0x1b873593 28 | 29 | final def hashByte(input: Byte, seed: Int): Int = 30 | avalance(fmix(seed, input & 0xFF) ^ 1) 31 | 32 | final def hashInt(input: Int, seed: Int): Int = 33 | avalance(mix(seed, input) ^ 4) 34 | 35 | final def hashLong(input: Long, seed: Int): Int = 36 | avalance(mix(mix(seed, input.asInstanceOf[Int]), (input >> 32).asInstanceOf[Int]) ^ 8) 37 | 38 | private[hashing] final def fmix(hash: Int, k: Int): Int = 39 | hash ^ rotl32(k * C1, 15) * C2 40 | 41 | private[hashing] final def mix(hash: Int, k: Int): Int = 42 | rotl32(fmix(hash, k), 13) * 5 + 0xe6546b64 43 | 44 | private[hashing] final def avalance(hash: Int): Int = { 45 | val k1 = (hash ^ (hash >>> 16)) * 0x85ebca6b 46 | val k2 = (k1 ^ (k1 >>> 13)) * 0xc2b2ae35 47 | k2 ^ (k2 >>> 16) 48 | } 49 | 50 | private[hashing] final def hashBytes(input: Array[Byte], offset: Long, length: Int, seed: Int): Int = { 51 | var hash = seed 52 | var off = offset 53 | var unprocessed = length 54 | 55 | while (unprocessed >= 4) { 56 | hash = mix(hash, UnsafeUtil.getInt(input, off)) 57 | off += 4 58 | unprocessed -= 4 59 | } 60 | 61 | if (unprocessed > 0) { 62 | var k1 = 0 63 | if (unprocessed == 3) { 64 | k1 ^= UnsafeUtil.getUnsignedByte(input, off + 2) << 16 65 | } 66 | if (unprocessed >= 2) { 67 | k1 ^= UnsafeUtil.getUnsignedByte(input, off + 1) << 8 68 | } 69 | if (unprocessed >= 1) { 70 | k1 ^= UnsafeUtil.getUnsignedByte(input, off) 71 | hash = fmix(hash, k1) 72 | } 73 | } 74 | 75 | avalance(hash ^ length) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /bench/src/main/resource/results/Murmur3Hash_32BenchResult.txt: -------------------------------------------------------------------------------- 1 | [info] # Run complete. Total time: 00:09:42 2 | [info] REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on 3 | [info] why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial 4 | [info] experiments, perform baseline and negative tests that provide experimental control, make sure 5 | [info] the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts. 6 | [info] Do not assume the numbers tell you what you want them to tell. 7 | [info] Benchmark (inputSize) Mode Cnt Score Error Units 8 | [info] MurmurHash3_32Bench.com_desmondyeung_hashing 8 thrpt 5 143053204.310 ± 1793248.431 ops/s 9 | [info] MurmurHash3_32Bench.com_desmondyeung_hashing 128 thrpt 5 19167534.091 ± 254169.664 ops/s 10 | [info] MurmurHash3_32Bench.com_desmondyeung_hashing 512 thrpt 5 5561943.424 ± 85516.455 ops/s 11 | [info] MurmurHash3_32Bench.com_desmondyeung_hashing 1024 thrpt 5 2929010.066 ± 80201.084 ops/s 12 | [info] MurmurHash3_32Bench.com_desmondyeung_hashing 1536 thrpt 5 1943152.471 ± 82023.180 ops/s 13 | [info] MurmurHash3_32Bench.com_desmondyeung_hashing 2048 thrpt 5 1403510.923 ± 44546.385 ops/s 14 | [info] MurmurHash3_32Bench.com_google_common_hash 8 thrpt 5 116084764.014 ± 3715825.165 ops/s 15 | [info] MurmurHash3_32Bench.com_google_common_hash 128 thrpt 5 11915395.823 ± 1434301.027 ops/s 16 | [info] MurmurHash3_32Bench.com_google_common_hash 512 thrpt 5 3158079.416 ± 134154.390 ops/s 17 | [info] MurmurHash3_32Bench.com_google_common_hash 1024 thrpt 5 1657552.706 ± 83520.818 ops/s 18 | [info] MurmurHash3_32Bench.com_google_common_hash 1536 thrpt 5 1095388.998 ± 35546.813 ops/s 19 | [info] MurmurHash3_32Bench.com_google_common_hash 2048 thrpt 5 830621.543 ± 6247.160 ops/s 20 | [info] MurmurHash3_32Bench.scala_util_hashing 8 thrpt 5 92785579.984 ± 2564661.931 ops/s 21 | [info] MurmurHash3_32Bench.scala_util_hashing 128 thrpt 5 16740708.874 ± 446764.432 ops/s 22 | [info] MurmurHash3_32Bench.scala_util_hashing 512 thrpt 5 4769417.979 ± 55659.336 ops/s 23 | [info] MurmurHash3_32Bench.scala_util_hashing 1024 thrpt 5 2362250.706 ± 112984.131 ops/s 24 | [info] MurmurHash3_32Bench.scala_util_hashing 1536 thrpt 5 1625387.712 ± 76083.874 ops/s 25 | [info] MurmurHash3_32Bench.scala_util_hashing 2048 thrpt 5 1219616.089 ± 65617.926 ops/s 26 | [success] Total time: 585 s, completed Aug 20, 2019, 10:24:00 PM 27 | sbt:Hashing> -------------------------------------------------------------------------------- /src/main/scala/com/desmondyeung/hashing/StreamingMurmurHash3_32.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import java.lang.Integer.{rotateLeft => rotl32} 20 | 21 | /* 22 | * Streaming Scala implementation of Austin Appleby's MurmurHash2_x86_32 algorithm. 23 | * See https://github.com/aappleby/smhasher 24 | */ 25 | object StreamingMurmurHash3_32 { 26 | def apply(seed: Int) = new StreamingMurmurHash3_32(seed) 27 | } 28 | 29 | class StreamingMurmurHash3_32(seed: Int) extends StreamingHash32 { 30 | 31 | private[this] final val buffer = new Array[Byte](4) 32 | private[this] final var hash = seed 33 | private[this] final var totalLength = 0 34 | private[this] final var bufferSize = 0 35 | 36 | final def reset(): Unit = { 37 | totalLength = 0 38 | bufferSize = 0 39 | hash = seed 40 | } 41 | 42 | final def value: Int = { 43 | var h = hash 44 | if (bufferSize > 0) { 45 | var k1 = 0 46 | if (bufferSize == 3) { 47 | k1 ^= UnsafeUtil.getUnsignedByte(buffer, 18L) << 16 48 | } 49 | if (bufferSize >= 2) { 50 | k1 ^= UnsafeUtil.getUnsignedByte(buffer, 17L) << 8 51 | } 52 | if (bufferSize >= 1) { 53 | k1 ^= UnsafeUtil.getUnsignedByte(buffer, 16L) 54 | h = MurmurHash3_32.fmix(h, k1) 55 | } 56 | } 57 | 58 | MurmurHash3_32.avalance(h ^ totalLength) 59 | } 60 | 61 | private[hashing] final def update(input: Array[Byte], offset: Long, length: Int): Unit = { 62 | totalLength += length 63 | val newBuffSize = bufferSize + length 64 | if (newBuffSize < 4) { 65 | UnsafeUtil.copyMemory(input, offset, buffer, bufferSize + UnsafeUtil.ByteArrayBase, length) 66 | bufferSize = newBuffSize 67 | } else { 68 | var off = offset 69 | var unprocessed = length 70 | if (bufferSize > 0) { 71 | val remaining = 4 - bufferSize 72 | UnsafeUtil.copyMemory(input, offset, buffer, bufferSize + UnsafeUtil.ByteArrayBase, remaining) 73 | hash = MurmurHash3_32.mix(hash, UnsafeUtil.getInt(buffer, UnsafeUtil.ByteArrayBase)) 74 | off += remaining 75 | unprocessed -= remaining 76 | bufferSize = 0 77 | } 78 | 79 | if (unprocessed >= 4) { 80 | do { 81 | hash = MurmurHash3_32.mix(hash, UnsafeUtil.getInt(input, off)) 82 | off += 4 83 | unprocessed -= 4 84 | } while (unprocessed >= 4) 85 | } 86 | 87 | if (unprocessed > 0) { 88 | UnsafeUtil.copyMemory(input, off, buffer, UnsafeUtil.ByteArrayBase, unprocessed) 89 | bufferSize = unprocessed 90 | } 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /bench/src/main/resource/results/XxHash32BenchResults.txt: -------------------------------------------------------------------------------- 1 | [info] # Run complete. Total time: 00:12:58 2 | [info] REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on 3 | [info] why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial 4 | [info] experiments, perform baseline and negative tests that provide experimental control, make sure 5 | [info] the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts. 6 | [info] Do not assume the numbers tell you what you want them to tell. 7 | [info] Benchmark (inputSize) Mode Cnt Score Error Units 8 | [info] XxHash32Bench.com_desmondyeung_hashing 8 thrpt 5 181898630.061 ± 2086886.490 ops/s 9 | [info] XxHash32Bench.com_desmondyeung_hashing 128 thrpt 5 44161794.692 ± 499905.676 ops/s 10 | [info] XxHash32Bench.com_desmondyeung_hashing 512 thrpt 5 14488496.855 ± 107840.819 ops/s 11 | [info] XxHash32Bench.com_desmondyeung_hashing 1024 thrpt 5 7523643.321 ± 104558.725 ops/s 12 | [info] XxHash32Bench.com_desmondyeung_hashing 1536 thrpt 5 5005641.604 ± 52267.655 ops/s 13 | [info] XxHash32Bench.com_desmondyeung_hashing 2048 thrpt 5 3789585.515 ± 31996.067 ops/s 14 | [info] XxHash32Bench.net_jpountz_xxhash_jni 8 thrpt 5 7328440.319 ± 275575.767 ops/s 15 | [info] XxHash32Bench.net_jpountz_xxhash_jni 128 thrpt 5 5930950.315 ± 303366.575 ops/s 16 | [info] XxHash32Bench.net_jpountz_xxhash_jni 512 thrpt 5 3968051.273 ± 149722.290 ops/s 17 | [info] XxHash32Bench.net_jpountz_xxhash_jni 1024 thrpt 5 2771389.170 ± 33486.356 ops/s 18 | [info] XxHash32Bench.net_jpountz_xxhash_jni 1536 thrpt 5 2148733.148 ± 145690.835 ops/s 19 | [info] XxHash32Bench.net_jpountz_xxhash_jni 2048 thrpt 5 1720267.164 ± 77320.929 ops/s 20 | [info] XxHash32Bench.net_jpountz_xxhash_pure 8 thrpt 5 103689821.011 ± 2704414.707 ops/s 21 | [info] XxHash32Bench.net_jpountz_xxhash_pure 128 thrpt 5 19236302.722 ± 730586.182 ops/s 22 | [info] XxHash32Bench.net_jpountz_xxhash_pure 512 thrpt 5 5823303.478 ± 224930.690 ops/s 23 | [info] XxHash32Bench.net_jpountz_xxhash_pure 1024 thrpt 5 3066582.769 ± 150944.281 ops/s 24 | [info] XxHash32Bench.net_jpountz_xxhash_pure 1536 thrpt 5 2076760.547 ± 65112.334 ops/s 25 | [info] XxHash32Bench.net_jpountz_xxhash_pure 2048 thrpt 5 1582100.654 ± 74129.324 ops/s 26 | [info] XxHash32Bench.net_jpountz_xxhash_unsafe 8 thrpt 5 134161752.760 ± 5035419.628 ops/s 27 | [info] XxHash32Bench.net_jpountz_xxhash_unsafe 128 thrpt 5 40852921.273 ± 2042634.150 ops/s 28 | [info] XxHash32Bench.net_jpountz_xxhash_unsafe 512 thrpt 5 12788488.138 ± 472049.488 ops/s 29 | [info] XxHash32Bench.net_jpountz_xxhash_unsafe 1024 thrpt 5 7085539.188 ± 935685.614 ops/s 30 | [info] XxHash32Bench.net_jpountz_xxhash_unsafe 1536 thrpt 5 4837437.179 ± 248957.476 ops/s 31 | [info] XxHash32Bench.net_jpountz_xxhash_unsafe 2048 thrpt 5 3613914.693 ± 80983.807 ops/s 32 | [success] Total time: 782 s, completed Aug 20, 2019, 10:01:32 PM 33 | sbt:Hashing> -------------------------------------------------------------------------------- /src/main/scala/com/desmondyeung/hashing/XxHash32.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import java.lang.Integer.{rotateLeft => rotl32} 20 | 21 | /* 22 | * Scala implementation of Yann Collet's XxHash32 algoritm. 23 | * See https://github.com/Cyan4973/xxHash 24 | */ 25 | object XxHash32 extends Hash32 { 26 | val Prime1 = -1640531535 27 | val Prime2 = -2048144777 28 | val Prime3 = -1028477379 29 | val Prime4 = 668265263 30 | val Prime5 = 374761393 31 | 32 | final def hashByte(input: Byte, seed: Int): Int = 33 | avalanche(processByte(seed + Prime5 + 1, input & 0xFF)) 34 | 35 | final def hashInt(input: Int, seed: Int): Int = 36 | avalanche(processInt(seed + Prime5 + 4, input)) 37 | 38 | final def hashLong(input: Long, seed: Int): Int = 39 | avalanche(processInt(processInt(seed + Prime5 + 8, input.asInstanceOf[Int]), (input >> 32).asInstanceOf[Int])) 40 | 41 | private[hashing] final def round(acc: Int, input: Int): Int = 42 | rotl32(acc + input * Prime2, 13) * Prime1 43 | 44 | private[hashing] final def finalize(hash: Int, input: Array[Byte], offset: Long, length: Int): Int = { 45 | var h = hash 46 | var off = offset 47 | var unprocessed = length 48 | 49 | while (unprocessed >= 4) { 50 | h = processInt(h, UnsafeUtil.getInt(input, off)) 51 | off += 4 52 | unprocessed -= 4 53 | } 54 | 55 | while (unprocessed > 0) { 56 | h = processByte(h, UnsafeUtil.getUnsignedByte(input, off)) 57 | off += 1 58 | unprocessed -= 1 59 | } 60 | 61 | avalanche(h) 62 | } 63 | 64 | private[hashing] final def hashBytes(input: Array[Byte], offset: Long, length: Int, seed: Int): Int = { 65 | var hash = 0 66 | var off = offset 67 | var unprocessed = length 68 | 69 | if (length >= 16) { 70 | var v1 = seed + Prime1 + Prime2 71 | var v2 = seed + Prime2 72 | var v3 = seed 73 | var v4 = seed - Prime1 74 | 75 | do { 76 | v1 = round(v1, UnsafeUtil.getInt(input, off)) 77 | v2 = round(v2, UnsafeUtil.getInt(input, off + 4L)) 78 | v3 = round(v3, UnsafeUtil.getInt(input, off + 8L)) 79 | v4 = round(v4, UnsafeUtil.getInt(input, off + 12L)) 80 | 81 | off += 16 82 | unprocessed -= 16 83 | } while (unprocessed >= 16) 84 | 85 | hash = rotl32(v1, 1) + rotl32(v2, 7) + rotl32(v3, 12) + rotl32(v4, 18) 86 | 87 | } else { 88 | hash = seed + Prime5 89 | } 90 | 91 | hash += length 92 | 93 | finalize(hash, input, off, unprocessed) 94 | } 95 | 96 | private final def processByte(hash: Int, input: Int): Int = 97 | rotl32(hash + input * Prime5, 11) * Prime1 98 | 99 | private final def processInt(hash: Int, input: Int): Int = 100 | rotl32(hash + input * Prime3, 17) * Prime4 101 | 102 | private final def avalanche(hash: Int): Int = { 103 | val k1 = (hash ^ (hash >>> 15)) * Prime2 104 | val k2 = (k1 ^ (k1 >>> 13)) * Prime3 105 | k2 ^ (k2 >>> 16) 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/main/scala/com/desmondyeung/hashing/StreamingXxHash32.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import java.lang.Integer.{rotateLeft => rotl32} 20 | 21 | /* 22 | * Streaming Scala implementation of Yann Collet's XxHash32 algorithm. 23 | * See https://github.com/Cyan4973/xxHash 24 | */ 25 | object StreamingXxHash32 { 26 | def apply(seed: Int) = new StreamingXxHash32(seed) 27 | } 28 | 29 | final class StreamingXxHash32(seed: Int) extends StreamingHash32 { 30 | 31 | private[this] final val buffer = new Array[Byte](16) 32 | private[this] final var v1 = seed + XxHash32.Prime1 + XxHash32.Prime2 33 | private[this] final var v2 = seed + XxHash32.Prime2 34 | private[this] final var v3 = seed 35 | private[this] final var v4 = seed - XxHash32.Prime1 36 | private[this] final var totalLength = 0 37 | private[this] final var bufferSize = 0 38 | 39 | final def reset(): Unit = { 40 | v1 = seed + XxHash32.Prime1 + XxHash32.Prime2 41 | v2 = seed + XxHash32.Prime2 42 | v3 = seed 43 | v4 = seed - XxHash32.Prime1 44 | totalLength = 0 45 | bufferSize = 0 46 | } 47 | 48 | final def value: Int = { 49 | var hash = 0 50 | if (totalLength >= 16) { 51 | hash = rotl32(v1, 1) + rotl32(v2, 7) + rotl32(v3, 12) + rotl32(v4, 18) 52 | } else { 53 | hash = seed + XxHash32.Prime5 54 | } 55 | 56 | hash += totalLength 57 | 58 | XxHash32.finalize(hash, buffer, UnsafeUtil.ByteArrayBase, bufferSize) 59 | } 60 | 61 | private[hashing] final def update(input: Array[Byte], offset: Long, length: Int): Unit = { 62 | totalLength += length 63 | val newBuffSize = bufferSize + length 64 | if (newBuffSize < 16) { 65 | UnsafeUtil.copyMemory(input, offset, buffer, bufferSize + UnsafeUtil.ByteArrayBase, length) 66 | bufferSize = newBuffSize 67 | } else { 68 | var off = offset 69 | var unprocessed = length 70 | if (bufferSize > 0) { 71 | val remaining = 16 - bufferSize 72 | UnsafeUtil.copyMemory(input, offset, buffer, bufferSize + UnsafeUtil.ByteArrayBase, remaining) 73 | 74 | v1 = XxHash32.round(v1, UnsafeUtil.getInt(buffer, UnsafeUtil.ByteArrayBase)) 75 | v2 = XxHash32.round(v2, UnsafeUtil.getInt(buffer, UnsafeUtil.ByteArrayBase + 4L)) 76 | v3 = XxHash32.round(v3, UnsafeUtil.getInt(buffer, UnsafeUtil.ByteArrayBase + 8L)) 77 | v4 = XxHash32.round(v4, UnsafeUtil.getInt(buffer, UnsafeUtil.ByteArrayBase + 12L)) 78 | 79 | off += remaining 80 | unprocessed -= remaining 81 | bufferSize = 0 82 | } 83 | 84 | if (unprocessed >= 16) { 85 | do { 86 | v1 = XxHash32.round(v1, UnsafeUtil.getInt(input, off)) 87 | v2 = XxHash32.round(v2, UnsafeUtil.getInt(input, off + 4L)) 88 | v3 = XxHash32.round(v3, UnsafeUtil.getInt(input, off + 8L)) 89 | v4 = XxHash32.round(v4, UnsafeUtil.getInt(input, off + 12L)) 90 | 91 | off += 16 92 | unprocessed -= 16 93 | } while (unprocessed >= 16) 94 | } 95 | 96 | if (unprocessed > 0) { 97 | UnsafeUtil.copyMemory(input, off, buffer, UnsafeUtil.ByteArrayBase, unprocessed) 98 | bufferSize = unprocessed 99 | } 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /bench/src/main/resource/results/XxHash64BenchResults.txt: -------------------------------------------------------------------------------- 1 | [info] # Run complete. Total time: 00:16:12 2 | [info] REMEMBER: The numbers below are just data. To gain reusable insights, you need to follow up on 3 | [info] why the numbers are the way they are. Use profilers (see -prof, -lprof), design factorial 4 | [info] experiments, perform baseline and negative tests that provide experimental control, make sure 5 | [info] the benchmarking environment is safe on JVM/OS/HW level, ask for reviews from the domain experts. 6 | [info] Do not assume the numbers tell you what you want them to tell. 7 | [info] Benchmark (inputSize) Mode Cnt Score Error Units 8 | [info] XxHash64Bench.com_desmondyeung_hashing 8 thrpt 5 190852324.077 ± 5658859.753 ops/s 9 | [info] XxHash64Bench.com_desmondyeung_hashing 128 thrpt 5 58888583.803 ± 634119.370 ops/s 10 | [info] XxHash64Bench.com_desmondyeung_hashing 512 thrpt 5 24604486.760 ± 275765.670 ops/s 11 | [info] XxHash64Bench.com_desmondyeung_hashing 1024 thrpt 5 13836850.373 ± 184524.530 ops/s 12 | [info] XxHash64Bench.com_desmondyeung_hashing 1536 thrpt 5 9591339.692 ± 82457.246 ops/s 13 | [info] XxHash64Bench.com_desmondyeung_hashing 2048 thrpt 5 7352860.162 ± 117217.475 ops/s 14 | [info] XxHash64Bench.net_jpountz_xxhash_jni 8 thrpt 5 7152589.007 ± 447581.357 ops/s 15 | [info] XxHash64Bench.net_jpountz_xxhash_jni 128 thrpt 5 6490503.456 ± 230647.108 ops/s 16 | [info] XxHash64Bench.net_jpountz_xxhash_jni 512 thrpt 5 5585357.802 ± 77475.894 ops/s 17 | [info] XxHash64Bench.net_jpountz_xxhash_jni 1024 thrpt 5 4853306.754 ± 276307.907 ops/s 18 | [info] XxHash64Bench.net_jpountz_xxhash_jni 1536 thrpt 5 4089062.237 ± 127466.632 ops/s 19 | [info] XxHash64Bench.net_jpountz_xxhash_jni 2048 thrpt 5 3665968.804 ± 72010.797 ops/s 20 | [info] XxHash64Bench.net_jpountz_xxhash_pure 8 thrpt 5 93522990.590 ± 2323318.279 ops/s 21 | [info] XxHash64Bench.net_jpountz_xxhash_pure 128 thrpt 5 19458130.129 ± 1155318.184 ops/s 22 | [info] XxHash64Bench.net_jpountz_xxhash_pure 512 thrpt 5 6057375.874 ± 204330.214 ops/s 23 | [info] XxHash64Bench.net_jpountz_xxhash_pure 1024 thrpt 5 3094880.189 ± 48461.958 ops/s 24 | [info] XxHash64Bench.net_jpountz_xxhash_pure 1536 thrpt 5 2150755.068 ± 231286.136 ops/s 25 | [info] XxHash64Bench.net_jpountz_xxhash_pure 2048 thrpt 5 1642711.800 ± 57553.147 ops/s 26 | [info] XxHash64Bench.net_jpountz_xxhash_unsafe 8 thrpt 5 138411866.674 ± 8485795.143 ops/s 27 | [info] XxHash64Bench.net_jpountz_xxhash_unsafe 128 thrpt 5 46821796.895 ± 459016.261 ops/s 28 | [info] XxHash64Bench.net_jpountz_xxhash_unsafe 512 thrpt 5 20018540.106 ± 132129.200 ops/s 29 | [info] XxHash64Bench.net_jpountz_xxhash_unsafe 1024 thrpt 5 11040708.417 ± 170272.302 ops/s 30 | [info] XxHash64Bench.net_jpountz_xxhash_unsafe 1536 thrpt 5 8353461.187 ± 934291.061 ops/s 31 | [info] XxHash64Bench.net_jpountz_xxhash_unsafe 2048 thrpt 5 5886326.566 ± 73892.885 ops/s 32 | [info] XxHash64Bench.net_openhft_hashing 8 thrpt 5 154836584.889 ± 1832416.876 ops/s 33 | [info] XxHash64Bench.net_openhft_hashing 128 thrpt 5 52375849.975 ± 2606768.814 ops/s 34 | [info] XxHash64Bench.net_openhft_hashing 512 thrpt 5 23084417.332 ± 526239.888 ops/s 35 | [info] XxHash64Bench.net_openhft_hashing 1024 thrpt 5 12772614.056 ± 565939.397 ops/s 36 | [info] XxHash64Bench.net_openhft_hashing 1536 thrpt 5 9319278.528 ± 286309.114 ops/s 37 | [info] XxHash64Bench.net_openhft_hashing 2048 thrpt 5 6963195.027 ± 77335.953 ops/s 38 | [success] Total time: 976 s, completed Aug 20, 2019, 9:32:29 PM 39 | sbt:Hashing> -------------------------------------------------------------------------------- /src/main/scala/com/desmondyeung/hashing/StreamingXxHash64.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import java.lang.Long.{rotateLeft => rotl64} 20 | 21 | /* 22 | * Streaming Scala implementation of Yann Collet's XxHash64 algorithm. 23 | * See https://github.com/Cyan4973/xxHash 24 | */ 25 | object StreamingXxHash64 { 26 | def apply(seed: Long) = new StreamingXxHash64(seed) 27 | } 28 | 29 | final class StreamingXxHash64(seed: Long) extends StreamingHash64 { 30 | 31 | private[this] final val buffer = new Array[Byte](32) 32 | private[this] final var v1 = seed + XxHash64.Prime1 + XxHash64.Prime2 33 | private[this] final var v2 = seed + XxHash64.Prime2 34 | private[this] final var v3 = seed 35 | private[this] final var v4 = seed - XxHash64.Prime1 36 | private[this] final var totalLength = 0 37 | private[this] final var bufferSize = 0 38 | 39 | final def reset(): Unit = { 40 | v1 = seed + XxHash64.Prime1 + XxHash64.Prime2 41 | v2 = seed + XxHash64.Prime2 42 | v3 = seed 43 | v4 = seed - XxHash64.Prime1 44 | totalLength = 0 45 | bufferSize = 0 46 | } 47 | 48 | final def value: Long = { 49 | var hash = 0L 50 | if (totalLength >= 32) { 51 | hash = rotl64(v1, 1) + rotl64(v2, 7) + rotl64(v3, 12) + rotl64(v4, 18) 52 | hash = XxHash64.mergeRound(hash, v1) 53 | hash = XxHash64.mergeRound(hash, v2) 54 | hash = XxHash64.mergeRound(hash, v3) 55 | hash = XxHash64.mergeRound(hash, v4) 56 | } else { 57 | hash = seed + XxHash64.Prime5 58 | } 59 | 60 | hash += totalLength 61 | 62 | XxHash64.finalize(hash, buffer, UnsafeUtil.ByteArrayBase, bufferSize) 63 | } 64 | 65 | private[hashing] final def update(input: Array[Byte], offset: Long, length: Int): Unit = { 66 | totalLength += length 67 | val newBuffSize = bufferSize + length 68 | if (newBuffSize < 32) { 69 | UnsafeUtil.copyMemory(input, offset, buffer, bufferSize + UnsafeUtil.ByteArrayBase, length) 70 | bufferSize = newBuffSize 71 | } else { 72 | var off = offset 73 | var unprocessed = length 74 | if (bufferSize > 0) { 75 | val remaining = 32 - bufferSize 76 | UnsafeUtil.copyMemory(input, offset, buffer, bufferSize + UnsafeUtil.ByteArrayBase, remaining) 77 | 78 | v1 = XxHash64.round(v1, UnsafeUtil.getLong(buffer, UnsafeUtil.ByteArrayBase)) 79 | v2 = XxHash64.round(v2, UnsafeUtil.getLong(buffer, UnsafeUtil.ByteArrayBase + 8L)) 80 | v3 = XxHash64.round(v3, UnsafeUtil.getLong(buffer, UnsafeUtil.ByteArrayBase + 16L)) 81 | v4 = XxHash64.round(v4, UnsafeUtil.getLong(buffer, UnsafeUtil.ByteArrayBase + 24L)) 82 | 83 | off += remaining 84 | unprocessed -= remaining 85 | bufferSize = 0 86 | } 87 | 88 | if (unprocessed >= 32) { 89 | do { 90 | v1 = XxHash64.round(v1, UnsafeUtil.getLong(input, off)) 91 | v2 = XxHash64.round(v2, UnsafeUtil.getLong(input, off + 8L)) 92 | v3 = XxHash64.round(v3, UnsafeUtil.getLong(input, off + 16L)) 93 | v4 = XxHash64.round(v4, UnsafeUtil.getLong(input, off + 24L)) 94 | 95 | off += 32 96 | unprocessed -= 32 97 | } while (unprocessed >= 32) 98 | } 99 | 100 | if (unprocessed > 0) { 101 | UnsafeUtil.copyMemory(input, off, buffer, UnsafeUtil.ByteArrayBase, unprocessed) 102 | bufferSize = unprocessed 103 | } 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/scala/com/desmondyeung/hashing/XxHash64.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import java.lang.Long.{rotateLeft => rotl64} 20 | 21 | /* 22 | * Scala implementation of Yann Collet's XxHash64 algorithm. 23 | * See https://github.com/Cyan4973/xxHash 24 | */ 25 | object XxHash64 extends Hash64 { 26 | val Prime1 = -7046029288634856825L 27 | val Prime2 = -4417276706812531889L 28 | val Prime3 = 1609587929392839161L 29 | val Prime4 = -8796714831421723037L 30 | val Prime5 = 2870177450012600261L 31 | 32 | final def hashByte(input: Byte, seed: Long): Long = 33 | avalanche(processByte(seed + Prime5 + 1L, input & 0xFF)) 34 | 35 | final def hashInt(input: Int, seed: Long): Long = 36 | avalanche(processInt(seed + Prime5 + 4L, input & 0xFFFFFFFFL)) 37 | 38 | final def hashLong(input: Long, seed: Long): Long = 39 | avalanche(processLong(seed + Prime5 + 8L, input)) 40 | 41 | private[hashing] final def round(acc: Long, input: Long): Long = 42 | rotl64(acc + input * Prime2, 31) * Prime1 43 | 44 | private[hashing] final def mergeRound(acc: Long, v: Long): Long = 45 | (acc ^ round(0L, v)) * Prime1 + Prime4 46 | 47 | private[hashing] final def finalize(hash: Long, input: Array[Byte], offset: Long, length: Int): Long = { 48 | var h = hash 49 | var off = offset 50 | var unprocessed = length 51 | while (unprocessed >= 8) { 52 | h = processLong(h, UnsafeUtil.getLong(input, off)) 53 | off += 8 54 | unprocessed -= 8 55 | } 56 | 57 | if (unprocessed >= 4) { 58 | h = processInt(h, UnsafeUtil.getUnsignedInt(input, off)) 59 | off += 4 60 | unprocessed -= 4 61 | } 62 | 63 | while (unprocessed > 0) { 64 | h = processByte(h, UnsafeUtil.getUnsignedByte(input, off)) 65 | off += 1 66 | unprocessed -= 1 67 | } 68 | 69 | avalanche(h) 70 | } 71 | 72 | private[hashing] final def hashBytes(input: Array[Byte], offset: Long, length: Int, seed: Long): Long = { 73 | var hash = 0L 74 | var off = offset 75 | var unprocessed = length 76 | 77 | if (length >= 32) { 78 | var v1 = seed + Prime1 + Prime2 79 | var v2 = seed + Prime2 80 | var v3 = seed 81 | var v4 = seed - Prime1 82 | 83 | do { 84 | v1 = round(v1, UnsafeUtil.getLong(input, off)) 85 | v2 = round(v2, UnsafeUtil.getLong(input, off + 8L)) 86 | v3 = round(v3, UnsafeUtil.getLong(input, off + 16L)) 87 | v4 = round(v4, UnsafeUtil.getLong(input, off + 24L)) 88 | 89 | off += 32 90 | unprocessed -= 32 91 | } while (unprocessed >= 32) 92 | 93 | hash = rotl64(v1, 1) + rotl64(v2, 7) + rotl64(v3, 12) + rotl64(v4, 18) 94 | hash = mergeRound(hash, v1) 95 | hash = mergeRound(hash, v2) 96 | hash = mergeRound(hash, v3) 97 | hash = mergeRound(hash, v4) 98 | } else { 99 | hash = seed + Prime5 100 | } 101 | 102 | hash += length 103 | 104 | finalize(hash, input, off, unprocessed) 105 | } 106 | 107 | private final def processByte(hash: Long, input: Int): Long = 108 | rotl64(hash ^ input * Prime5, 11) * Prime1 109 | 110 | private final def processInt(hash: Long, input: Long): Long = 111 | rotl64(hash ^ input * Prime1, 23) * Prime2 + Prime3 112 | 113 | private final def processLong(hash: Long, input: Long): Long = 114 | rotl64(hash ^ round(0, input), 27) * Prime1 + Prime4 115 | 116 | private final def avalanche(hash: Long): Long = { 117 | val k1 = (hash ^ (hash >>> 33)) * Prime2 118 | val k2 = (k1 ^ (k1 >>> 29)) * Prime3 119 | k2 ^ (k2 >>> 32) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scala-Hashing 2 | 3 | [![Build Status](https://api.travis-ci.com/desmondyeung/scala-hashing.svg)](https://travis-ci.com/desmondyeung/scala-hashing) 4 | [![codecov.io](http://codecov.io/github/desmondyeung/scala-hashing/coverage.svg?branch=master)](http://codecov.io/github/desmondyeung/scala-hashing?branch=master) 5 | [![Maven Central](https://maven-badges.herokuapp.com/maven-central/com.desmondyeung.hashing/scala-hashing_2.13/badge.svg)](https://maven-badges.herokuapp.com/maven-central/com.desmondyeung.hashing/scala-hashing_2.13) 6 | 7 | ## Overview 8 | Fast non-cryptographic hash functions for Scala. This library provides APIs for computing 32-bit and 64-bit hashes. 9 | 10 | Currently implemented hash functions 11 | * [MurmurHash3](https://github.com/aappleby/smhasher) (32-bit) 12 | * [XxHash](https://github.com/Cyan4973/xxHash) (32-bit and 64-bit) 13 | 14 | Hash functions in this library can be accessed via either a standard API for hashing primitives, byte arrays, or Java ByteBuffers (direct and non-direct), or a streaming API for hashing stream-like objects such as InputStreams, Java NIO Channels, or Akka Streams. Hash functions should produce consistent output regardless of platform or endianness. 15 | 16 | This library uses the `sun.misc.Unsafe` API internally. I might explore using the `VarHandle` API introduced in Java 9 in the future, but am currently still supporting Java 8. 17 | 18 | ## Performance 19 | 20 | Benchmarked against various other open-source implementations 21 | * [Guava](https://github.com/google/guava) (MurmurHash3) 22 | * [LZ4 Java](https://github.com/lz4/lz4-java) (XxHash32 and XxHash64 - Includes JNI binding, pure Java, and Java+Unsafe implementations) 23 | * [Scala](https://github.com/scala/scala) (Scala's built-in `scala.util.hashing.MurmurHash3`) 24 | * [Zero-Allocation-Hashing](https://github.com/OpenHFT/Zero-Allocation-Hashing) (XxHash64) 25 | 26 | ### MurmurHash3_32 27 | ![MurmurHash3_32](https://github.com/desmondyeung/scala-hashing/blob/master/bench/src/main/resource/results/MurmurHash3_32.png) 28 | 29 | ### XxHash32 30 | ![XxHash32](https://github.com/desmondyeung/scala-hashing/blob/master/bench/src/main/resource/results/XxHash32.png) 31 | 32 | ### XxHash64 33 | ![XxHash64](https://github.com/desmondyeung/scala-hashing/blob/master/bench/src/main/resource/results/XxHash64.png) 34 | 35 | 36 | ### Running Locally 37 | 38 | Benchmarks are located in the `bench` subproject and can be run using the [sbt-jmh](https://github.com/ktoso/sbt-jmh) plugin. 39 | 40 | To run all benchmarks with default settings 41 | ```sbt 42 | bench/jmh:run 43 | ``` 44 | To run a specific benchmark with custom settings 45 | ```sbt 46 | bench/jmh:run -f 2 -wi 5 -i 5 XxHash64Bench 47 | ``` 48 | 49 | ## Getting Started 50 | 51 | ```scala 52 | libraryDependencies += "com.desmondyeung.hashing" %% "scala-hashing" % "0.1.0" 53 | ``` 54 | 55 | ### Examples 56 | 57 | This library defines the interfaces `Hash32` and `StreamingHash32` for computing 32-bit hashes and `Hash64` and `StreamingHash64` for computing 64-bit hashes. Classes extending `StreamingHash32` or `StreamingHash64` are not thread-safe. 58 | 59 | The public API for `Hash64` and `StreamingHash64` can be seen below 60 | ```scala 61 | trait Hash64 { 62 | def hashByte(input: Byte, seed: Long): Long 63 | def hashInt(input: Int, seed: Long): Long 64 | def hashLong(input: Long, seed: Long): Long 65 | def hashByteArray(input: Array[Byte], seed: Long): Long 66 | def hashByteArray(input: Array[Byte], offset: Int, length: Int, seed: Long): Long 67 | def hashByteBuffer(input: ByteBuffer, seed: Long): Long 68 | def hashByteBuffer(input: ByteBuffer, offset: Int, length: Int, seed: Long): Long 69 | } 70 | 71 | trait StreamingHash64 { 72 | def reset(): Unit 73 | def value: Long 74 | def updateByteArray(input: Array[Byte], offset: Int, length: Int): Unit 75 | def updateByteBuffer(input: ByteBuffer, offset: Int, length: Int): Unit 76 | } 77 | ``` 78 | 79 | Using the standard API 80 | ```scala 81 | import com.desmondyeung.hashing.XxHash64 82 | import java.nio.ByteBuffer 83 | 84 | // hash a long 85 | val hash = XxHash64.hashLong(123, seed = 0) 86 | 87 | // hash a Array[Byte] 88 | val hash = XxHash64.hashByteArray(Array[Byte](123), seed = 0) 89 | 90 | // hash a ByteBuffer 91 | val hash = XxHash64.hashByteBuffer(ByteBuffer.wrap(Array[Byte](123)), seed = 0) 92 | ``` 93 | 94 | Using the streaming API 95 | ```scala 96 | import com.desmondyeung.hashing.StreamingXxHash64 97 | import java.nio.ByteBuffer 98 | import java.io.FileInputStream 99 | 100 | val checksum = StreamingXxHash64(seed = 0) 101 | val channel = new FileInputStream("/path/to/file.txt").getChannel 102 | val chunk = ByteBuffer.allocate(1024) 103 | 104 | var bytesRead = channel.read(chunk) 105 | while (bytesRead > 0) { 106 | checksum.updateByteBuffer(chunk, 0, bytesRead) 107 | chunk.rewind 108 | bytesRead = channel.read(chunk) 109 | } 110 | 111 | val hash = checksum.value 112 | ``` 113 | 114 | ## License 115 | 116 | Licensed under the Apache License, Version 2.0 (the "License"). 117 | -------------------------------------------------------------------------------- /src/main/scala/com/desmondyeung/hashing/Hash.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import java.nio.ByteBuffer 20 | import sun.nio.ch.DirectBuffer 21 | 22 | object HashUtil { 23 | final def checkBounds(inputLength: Int, offset: Int, length: Int) = 24 | if (offset < 0 || length < 0 || inputLength - offset < length) { 25 | throw new IndexOutOfBoundsException() 26 | } 27 | } 28 | 29 | trait Hash32 { 30 | def hashByte(input: Byte, seed: Int): Int 31 | def hashInt(input: Int, seed: Int): Int 32 | def hashLong(input: Long, seed: Int): Int 33 | 34 | final def hashByteArray(input: Array[Byte], seed: Int): Int = 35 | hashBytes(input, UnsafeUtil.ByteArrayBase, input.length, seed) 36 | 37 | final def hashByteArray(input: Array[Byte], offset: Int, length: Int, seed: Int): Int = { 38 | HashUtil.checkBounds(input.length, offset, length) 39 | hashBytes(input, UnsafeUtil.ByteArrayBase + offset, length, seed) 40 | } 41 | 42 | final def hashByteBuffer(input: ByteBuffer, seed: Int): Int = 43 | if (input.hasArray) { 44 | hashBytes(input.array, UnsafeUtil.ByteArrayBase + input.arrayOffset, input.capacity, seed) 45 | } else { 46 | hashBytes(null, input.asInstanceOf[DirectBuffer].address, input.capacity, seed) 47 | } 48 | 49 | final def hashByteBuffer(input: ByteBuffer, offset: Int, length: Int, seed: Int): Int = { 50 | HashUtil.checkBounds(input.capacity, offset, length) 51 | if (input.hasArray) { 52 | hashBytes(input.array, UnsafeUtil.ByteArrayBase + input.arrayOffset + offset, length, seed) 53 | } else { 54 | hashBytes(null, input.asInstanceOf[DirectBuffer].address + offset, length, seed) 55 | } 56 | } 57 | 58 | private[hashing] def hashBytes(input: Array[Byte], offset: Long, length: Int, seed: Int): Int 59 | } 60 | 61 | trait Hash64 { 62 | def hashByte(input: Byte, seed: Long): Long 63 | def hashInt(input: Int, seed: Long): Long 64 | def hashLong(input: Long, seed: Long): Long 65 | 66 | final def hashByteArray(input: Array[Byte], seed: Long): Long = 67 | hashBytes(input, UnsafeUtil.ByteArrayBase, input.length, seed) 68 | 69 | final def hashByteArray(input: Array[Byte], offset: Int, length: Int, seed: Long): Long = { 70 | HashUtil.checkBounds(input.length, offset, length) 71 | hashBytes(input, UnsafeUtil.ByteArrayBase + offset, length, seed) 72 | } 73 | 74 | final def hashByteBuffer(input: ByteBuffer, seed: Long): Long = 75 | if (input.hasArray) { 76 | hashBytes(input.array, UnsafeUtil.ByteArrayBase + input.arrayOffset, input.capacity, seed) 77 | } else { 78 | hashBytes(null, input.asInstanceOf[DirectBuffer].address, input.capacity, seed) 79 | } 80 | 81 | final def hashByteBuffer(input: ByteBuffer, offset: Int, length: Int, seed: Long): Long = { 82 | HashUtil.checkBounds(input.capacity, offset, length) 83 | if (input.hasArray) { 84 | hashBytes(input.array, UnsafeUtil.ByteArrayBase + input.arrayOffset + offset, length, seed) 85 | } else { 86 | hashBytes(null, input.asInstanceOf[DirectBuffer].address + offset, length, seed) 87 | } 88 | } 89 | 90 | private[hashing] def hashBytes(input: Array[Byte], offset: Long, length: Int, seed: Long): Long 91 | } 92 | 93 | trait StreamingHash32 { 94 | def reset(): Unit 95 | def value: Int 96 | 97 | final def updateByteArray(input: Array[Byte], offset: Int, length: Int): Unit = { 98 | HashUtil.checkBounds(input.length, offset, length) 99 | update(input, UnsafeUtil.ByteArrayBase + offset, length) 100 | } 101 | 102 | final def updateByteBuffer(input: ByteBuffer, offset: Int, length: Int): Unit = { 103 | HashUtil.checkBounds(input.capacity, offset, length) 104 | if (input.hasArray) { 105 | update(input.array, UnsafeUtil.ByteArrayBase + input.arrayOffset + offset, length) 106 | } else { 107 | update(null, input.asInstanceOf[DirectBuffer].address + offset, length) 108 | } 109 | } 110 | 111 | private[hashing] def update(input: Array[Byte], offset: Long, length: Int): Unit 112 | } 113 | 114 | trait StreamingHash64 { 115 | def reset(): Unit 116 | def value: Long 117 | 118 | final def updateByteArray(input: Array[Byte], offset: Int, length: Int): Unit = { 119 | HashUtil.checkBounds(input.length, offset, length) 120 | update(input, UnsafeUtil.ByteArrayBase + offset, length) 121 | } 122 | 123 | final def updateByteBuffer(input: ByteBuffer, offset: Int, length: Int): Unit = { 124 | HashUtil.checkBounds(input.capacity, offset, length) 125 | if (input.hasArray) { 126 | update(input.array, UnsafeUtil.ByteArrayBase + input.arrayOffset + offset, length) 127 | } else { 128 | update(null, input.asInstanceOf[DirectBuffer].address + offset, length) 129 | } 130 | } 131 | 132 | private[hashing] def update(input: Array[Byte], offset: Long, length: Int): Unit 133 | } 134 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/Hash32Behaviors.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import scala.util.Random 20 | import java.nio.{ByteBuffer, ByteOrder} 21 | import org.scalatest.FunSpec 22 | 23 | trait Hash32Behaviors extends HashSpecUtils { this: FunSpec => 24 | 25 | val seed = Random.nextInt 26 | 27 | def hash32(underTest: Hash32, referenceImpl: (Array[Byte], Int) => Int) = { 28 | describe("when hashing primitives") { 29 | it("should correctly hash a Byte") { 30 | val input = byteBufferOfSize(1) 31 | 32 | val expected = referenceImpl(input.array, seed) 33 | val computed = underTest.hashByte(input.get(0), seed) 34 | 35 | assert(expected === computed) 36 | } 37 | 38 | it("should correctly hash an Int") { 39 | val input = byteBufferOfSize(4) 40 | 41 | val expected = referenceImpl(input.array, seed) 42 | val computed = underTest.hashInt(input.getInt(0), seed) 43 | 44 | assert(expected === computed) 45 | } 46 | 47 | it("should correctly hash a Long") { 48 | val input = byteBufferOfSize(8) 49 | 50 | val expected = referenceImpl(input.array, seed) 51 | val computed = underTest.hashLong(input.getLong(0), seed) 52 | 53 | assert(expected === computed) 54 | } 55 | } 56 | 57 | describe("when hashing a byte array") { 58 | it("should correctly hash an empty byte array") { 59 | val input = Array[Byte]() 60 | 61 | val expected = referenceImpl(input, seed) 62 | val computed = underTest.hashByteArray(input, seed) 63 | 64 | assert(expected === computed) 65 | } 66 | 67 | it("should correctly hash a byte array") { 68 | val input = byteBufferOfSize(Random.nextInt(1024)).array 69 | 70 | val expected = referenceImpl(input, seed) 71 | val computed = underTest.hashByteArray(input, seed) 72 | 73 | assert(expected === computed) 74 | } 75 | 76 | it("should correctly hash a byte array slice") { 77 | val input = byteBufferOfSize(Random.nextInt(1024)).array 78 | 79 | val expected = referenceImpl(input, seed) 80 | val computed = underTest.hashByteArray(input, 0, input.length, seed) 81 | 82 | assert(expected === computed) 83 | } 84 | 85 | it("should throw IndexOutOfBoundsException if offset is < 0") { 86 | val input = byteBufferOfSize(1).array 87 | intercept[IndexOutOfBoundsException] { 88 | underTest.hashByteArray(input, -1, input.length, seed) 89 | } 90 | } 91 | 92 | it("should throw IndexOutOfBoundsException if length is < 0") { 93 | val input = byteBufferOfSize(1).array 94 | intercept[IndexOutOfBoundsException] { 95 | underTest.hashByteArray(input, 0, -1, seed) 96 | } 97 | } 98 | 99 | it("should throw IndexOutOfBoundsException if offset > input length") { 100 | val input = byteBufferOfSize(1).array 101 | intercept[IndexOutOfBoundsException] { 102 | underTest.hashByteArray(input, input.length + 1, input.length, seed) 103 | } 104 | } 105 | 106 | it("should throw IndexOutOfBoundsException if length > input length") { 107 | val input = byteBufferOfSize(1).array 108 | intercept[IndexOutOfBoundsException] { 109 | underTest.hashByteArray(input, 0, input.length + 1, seed) 110 | } 111 | } 112 | } 113 | 114 | describe("when hashing a ByteBuffer") { 115 | it("should correctly hash a non-direct byte buffer") { 116 | val input = byteBufferOfSize(Random.nextInt(1024)) 117 | 118 | val expected = referenceImpl(input.array, seed) 119 | val computed = underTest.hashByteBuffer(input, seed) 120 | 121 | assert(expected === computed) 122 | } 123 | 124 | it("should correctly hash a non-direct ByteBuffer slice") { 125 | val input = byteBufferOfSize(Random.nextInt(1024)) 126 | 127 | val expected = referenceImpl(input.array, seed) 128 | val computed = underTest.hashByteBuffer(input, 0, input.capacity, seed) 129 | 130 | assert(expected === computed) 131 | } 132 | 133 | it("should correctly hash a direct ByteBuffer") { 134 | val input = byteBufferOfSize(Random.nextInt(1024), direct = true) 135 | val array = new Array[Byte](input.capacity) 136 | input.get(array) 137 | 138 | val expected = referenceImpl(array, seed) 139 | val computed = underTest.hashByteBuffer(input, seed) 140 | 141 | assert(expected === computed) 142 | } 143 | 144 | it("should correctly hash a direct ByteBuffer slice") { 145 | val input = byteBufferOfSize(Random.nextInt(1024), direct = true) 146 | val array = new Array[Byte](input.capacity) 147 | input.get(array) 148 | 149 | val expected = referenceImpl(array, seed) 150 | val computed = underTest.hashByteBuffer(input, 0, input.capacity, seed) 151 | 152 | assert(expected === computed) 153 | } 154 | 155 | it("should throw IndexOutOfBoundsException if offset is < 0") { 156 | intercept[IndexOutOfBoundsException] { 157 | val input = byteBufferOfSize(1) 158 | underTest.hashByteBuffer(input, -1, input.capacity, seed) 159 | } 160 | } 161 | 162 | it("should throw IndexOutOfBoundsException if length is < 0") { 163 | intercept[IndexOutOfBoundsException] { 164 | val input = byteBufferOfSize(1) 165 | underTest.hashByteBuffer(input, 0, -1, seed) 166 | } 167 | } 168 | 169 | it("should throw IndexOutOfBoundsException if offset > input length") { 170 | intercept[IndexOutOfBoundsException] { 171 | val input = byteBufferOfSize(1) 172 | underTest.hashByteBuffer(input, input.capacity + 1, input.capacity, seed) 173 | } 174 | } 175 | 176 | it("should throw IndexOutOfBoundsException if length > input length") { 177 | intercept[IndexOutOfBoundsException] { 178 | val input = byteBufferOfSize(1) 179 | underTest.hashByteBuffer(input, 0, input.capacity + 1, seed) 180 | } 181 | } 182 | } 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/Hash64Behaviors.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import scala.util.Random 20 | import java.nio.{ByteBuffer, ByteOrder} 21 | import org.scalatest.FunSpec 22 | 23 | trait Hash64Behaviors extends HashSpecUtils { this: FunSpec => 24 | 25 | val seed = Random.nextLong 26 | 27 | def hash64(underTest: Hash64, referenceImpl: (Array[Byte], Long) => Long) = { 28 | describe("when hashing primitives") { 29 | it("should correctly hash a Byte") { 30 | val input = byteBufferOfSize(1) 31 | 32 | val expected = referenceImpl(input.array, seed) 33 | val computed = underTest.hashByte(input.get(0), seed) 34 | 35 | assert(expected === computed) 36 | } 37 | 38 | it("should correctly hash an Int") { 39 | val input = byteBufferOfSize(4) 40 | 41 | val expected = referenceImpl(input.array, seed) 42 | val computed = underTest.hashInt(input.getInt(0), seed) 43 | 44 | assert(expected === computed) 45 | } 46 | 47 | it("should correctly hash a Long") { 48 | val input = byteBufferOfSize(8) 49 | 50 | val expected = referenceImpl(input.array, seed) 51 | val computed = underTest.hashLong(input.getLong(0), seed) 52 | 53 | assert(expected === computed) 54 | } 55 | } 56 | 57 | describe("when hashing a byte array") { 58 | it("should correctly hash an empty byte array") { 59 | val input = Array[Byte]() 60 | 61 | val expected = referenceImpl(input, seed) 62 | val computed = underTest.hashByteArray(input, seed) 63 | 64 | assert(expected === computed) 65 | } 66 | 67 | it("should correctly hash a byte array") { 68 | val input = byteBufferOfSize(Random.nextInt(1024)).array 69 | 70 | val expected = referenceImpl(input, seed) 71 | val computed = underTest.hashByteArray(input, seed) 72 | 73 | assert(expected === computed) 74 | } 75 | 76 | it("should correctly hash a byte array slice") { 77 | val input = byteBufferOfSize(Random.nextInt(1024)).array 78 | 79 | val expected = referenceImpl(input, seed) 80 | val computed = underTest.hashByteArray(input, 0, input.length, seed) 81 | 82 | assert(expected === computed) 83 | } 84 | 85 | it("should throw IndexOutOfBoundsException if offset is < 0") { 86 | val input = byteBufferOfSize(1).array 87 | intercept[IndexOutOfBoundsException] { 88 | underTest.hashByteArray(input, -1, input.length, seed) 89 | } 90 | } 91 | 92 | it("should throw IndexOutOfBoundsException if length is < 0") { 93 | val input = byteBufferOfSize(1).array 94 | intercept[IndexOutOfBoundsException] { 95 | underTest.hashByteArray(input, 0, -1, seed) 96 | } 97 | } 98 | 99 | it("should throw IndexOutOfBoundsException if offset > input length") { 100 | val input = byteBufferOfSize(1).array 101 | intercept[IndexOutOfBoundsException] { 102 | underTest.hashByteArray(input, input.length + 1, input.length, seed) 103 | } 104 | } 105 | 106 | it("should throw IndexOutOfBoundsException if length > input length") { 107 | val input = byteBufferOfSize(1).array 108 | intercept[IndexOutOfBoundsException] { 109 | underTest.hashByteArray(input, 0, input.length + 1, seed) 110 | } 111 | } 112 | } 113 | 114 | describe("when hashing a ByteBuffer") { 115 | it("should correctly hash a non-direct byte buffer") { 116 | val input = byteBufferOfSize(Random.nextInt(1024)) 117 | 118 | val expected = referenceImpl(input.array, seed) 119 | val computed = underTest.hashByteBuffer(input, seed) 120 | 121 | assert(expected === computed) 122 | } 123 | 124 | it("should correctly hash a non-direct ByteBuffer slice") { 125 | val input = byteBufferOfSize(Random.nextInt(1024)) 126 | 127 | val expected = referenceImpl(input.array, seed) 128 | val computed = underTest.hashByteBuffer(input, 0, input.capacity, seed) 129 | 130 | assert(expected === computed) 131 | } 132 | 133 | it("should correctly hash a direct ByteBuffer") { 134 | val input = byteBufferOfSize(Random.nextInt(1024), direct = true) 135 | val array = new Array[Byte](input.capacity) 136 | input.get(array) 137 | 138 | val expected = referenceImpl(array, seed) 139 | val computed = underTest.hashByteBuffer(input, seed) 140 | 141 | assert(expected === computed) 142 | } 143 | 144 | it("should correctly hash a direct ByteBuffer slice") { 145 | val input = byteBufferOfSize(Random.nextInt(1024), direct = true) 146 | val array = new Array[Byte](input.capacity) 147 | input.get(array) 148 | 149 | val expected = referenceImpl(array, seed) 150 | val computed = underTest.hashByteBuffer(input, 0, input.capacity, seed) 151 | 152 | assert(expected === computed) 153 | } 154 | 155 | it("should throw IndexOutOfBoundsException if offset is < 0") { 156 | intercept[IndexOutOfBoundsException] { 157 | val input = byteBufferOfSize(1) 158 | underTest.hashByteBuffer(input, -1, input.capacity, seed) 159 | } 160 | } 161 | 162 | it("should throw IndexOutOfBoundsException if length is < 0") { 163 | intercept[IndexOutOfBoundsException] { 164 | val input = byteBufferOfSize(1) 165 | underTest.hashByteBuffer(input, 0, -1, seed) 166 | } 167 | } 168 | 169 | it("should throw IndexOutOfBoundsException if offset > input length") { 170 | intercept[IndexOutOfBoundsException] { 171 | val input = byteBufferOfSize(1) 172 | underTest.hashByteBuffer(input, input.capacity + 1, input.capacity, seed) 173 | } 174 | } 175 | 176 | it("should throw IndexOutOfBoundsException if length > input length") { 177 | intercept[IndexOutOfBoundsException] { 178 | val input = byteBufferOfSize(1) 179 | underTest.hashByteBuffer(input, 0, input.capacity + 1, seed) 180 | } 181 | } 182 | } 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/StreamingHash32Behaviors.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import scala.util.Random 20 | import java.nio.{ByteBuffer, ByteOrder} 21 | import org.scalatest.FunSpec 22 | 23 | trait StreamingHash32Behaviors extends HashSpecUtils { this: FunSpec => 24 | 25 | val seed = Random.nextInt 26 | 27 | def streamingHash32(underTest: (Int) => StreamingHash32, referenceImpl: (Array[Byte], Int) => Int) = { 28 | describe("when hashing a byte array") { 29 | it("should correctly hash a empty byte array") { 30 | val checksum = underTest(seed) 31 | val array = byteBufferOfSize(0).array 32 | 33 | val expected = referenceImpl(array, seed) 34 | 35 | assert(expected === checksum.value) 36 | } 37 | 38 | it("should correctly hash a byte array in chunks") { 39 | val checksum = underTest(seed) 40 | val array = byteBufferOfSize(Random.nextInt(1024)).array 41 | 42 | val expected = referenceImpl(array, seed) 43 | val computed = { 44 | var offset = 0 45 | while (offset < array.length) { 46 | val chunkLength = Random.nextInt((array.length - offset) + 1) 47 | checksum.updateByteArray(array, offset, chunkLength) 48 | offset += chunkLength 49 | } 50 | checksum.value 51 | } 52 | 53 | assert(expected === computed) 54 | } 55 | 56 | it("should throw IndexOutOfBoundsException if offset is < 0") { 57 | val checksum = underTest(seed) 58 | val input = byteBufferOfSize(1).array 59 | intercept[IndexOutOfBoundsException] { 60 | checksum.updateByteArray(input, -1, input.length) 61 | } 62 | } 63 | 64 | it("should throw IndexOutOfBoundsException if length is < 0") { 65 | val checksum = underTest(seed) 66 | val input = byteBufferOfSize(1).array 67 | intercept[IndexOutOfBoundsException] { 68 | checksum.updateByteArray(input, 0, -1) 69 | } 70 | } 71 | 72 | it("should throw IndexOutOfBoundsException if offset > input length") { 73 | val checksum = underTest(seed) 74 | val input = byteBufferOfSize(1).array 75 | intercept[IndexOutOfBoundsException] { 76 | checksum.updateByteArray(input, input.length + 1, input.length) 77 | } 78 | } 79 | 80 | it("should throw IndexOutOfBoundsException if length > input length") { 81 | val checksum = underTest(seed) 82 | val input = byteBufferOfSize(1).array 83 | intercept[IndexOutOfBoundsException] { 84 | checksum.updateByteArray(input, 0, input.length + 1) 85 | } 86 | } 87 | } 88 | 89 | describe("when hashing a ByteBuffer") { 90 | it("should correctly hash a non-direct ByteBuffer in chunks") { 91 | val checksum = underTest(seed) 92 | val input = byteBufferOfSize(Random.nextInt(1024)) 93 | 94 | val expected = referenceImpl(input.array, seed) 95 | val computed = { 96 | var offset = 0 97 | while (offset < input.capacity) { 98 | val chunkLength = Random.nextInt((input.capacity - offset) + 1) 99 | checksum.updateByteBuffer(input, offset, chunkLength) 100 | offset += chunkLength 101 | } 102 | checksum.value 103 | } 104 | 105 | assert(expected === computed) 106 | } 107 | 108 | it("should correctly hash a direct ByteBuffer in chunks") { 109 | val checksum = underTest(seed) 110 | val input = byteBufferOfSize(Random.nextInt(1024), direct = true) 111 | val array = new Array[Byte](input.capacity) 112 | input.get(array) 113 | 114 | val expected = referenceImpl(array, seed) 115 | val computed = { 116 | var offset = 0 117 | while (offset < input.capacity) { 118 | val chunkLength = Random.nextInt((input.capacity - offset) + 1) 119 | checksum.updateByteBuffer(input, offset, chunkLength) 120 | offset += chunkLength 121 | } 122 | checksum.value 123 | } 124 | 125 | assert(expected === computed) 126 | } 127 | 128 | it("should throw IndexOutOfBoundsException if offset is < 0") { 129 | val checksum = underTest(seed) 130 | val input = byteBufferOfSize(1) 131 | intercept[IndexOutOfBoundsException] { 132 | checksum.updateByteBuffer(input, -1, input.capacity) 133 | } 134 | } 135 | 136 | it("should throw IndexOutOfBoundsException if length is < 0") { 137 | val checksum = underTest(seed) 138 | val input = byteBufferOfSize(1) 139 | intercept[IndexOutOfBoundsException] { 140 | checksum.updateByteBuffer(input, 0, -1) 141 | } 142 | } 143 | 144 | it("should throw IndexOutOfBoundsException if offset > input length") { 145 | val checksum = underTest(seed) 146 | val input = byteBufferOfSize(1) 147 | intercept[IndexOutOfBoundsException] { 148 | checksum.updateByteBuffer(input, input.capacity + 1, input.capacity) 149 | } 150 | } 151 | 152 | it("should throw IndexOutOfBoundsException if length > input length") { 153 | val checksum = underTest(seed) 154 | val input = byteBufferOfSize(1) 155 | intercept[IndexOutOfBoundsException] { 156 | checksum.updateByteBuffer(input, 0, input.capacity + 1) 157 | } 158 | } 159 | } 160 | 161 | it("should be idempotent when calculating the final value") { 162 | val checksum = underTest(seed) 163 | val array = byteBufferOfSize(Random.nextInt(1024)).array 164 | 165 | var offset = 0 166 | while (offset < array.length) { 167 | val chunkLength = Random.nextInt((array.length - offset) + 1) 168 | checksum.updateByteArray(array, offset, chunkLength) 169 | offset += chunkLength 170 | } 171 | 172 | assert(checksum.value === checksum.value) 173 | } 174 | 175 | it("should allow state to be reset") { 176 | val checksum = underTest(seed) 177 | val array = byteBufferOfSize(Random.nextInt(1024)).array 178 | 179 | var expected = referenceImpl(array, seed) 180 | var computed = { 181 | var offset = 0 182 | while (offset < array.length) { 183 | val chunkLength = Random.nextInt((array.length - offset) + 1) 184 | checksum.updateByteArray(array, offset, chunkLength) 185 | offset += chunkLength 186 | } 187 | checksum.value 188 | } 189 | 190 | assert(expected === computed) 191 | 192 | checksum.reset() 193 | 194 | Random.nextBytes(array) 195 | 196 | expected = referenceImpl(array, seed) 197 | computed = { 198 | var offset = 0 199 | while (offset < array.length) { 200 | val chunkLength = Random.nextInt((array.length - offset) + 1) 201 | checksum.updateByteArray(array, offset, chunkLength) 202 | offset += chunkLength 203 | } 204 | checksum.value 205 | } 206 | 207 | assert(expected === computed) 208 | } 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /src/test/scala/com/desmondyeung/hashing/StreamingHash64Behaviors.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Desmond Yeung 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package com.desmondyeung.hashing 18 | 19 | import scala.util.Random 20 | import java.nio.{ByteBuffer, ByteOrder} 21 | import org.scalatest.FunSpec 22 | 23 | trait StreamingHash64Behaviors extends HashSpecUtils { this: FunSpec => 24 | 25 | val seed = Random.nextLong 26 | 27 | def streamingHash64(underTest: (Long) => StreamingHash64, referenceImpl: (Array[Byte], Long) => Long) = { 28 | describe("when hashing a byte array") { 29 | it("should correctly hash a empty byte array") { 30 | val checksum = underTest(seed) 31 | val array = byteBufferOfSize(0).array 32 | 33 | val expected = referenceImpl(array, seed) 34 | 35 | assert(expected === checksum.value) 36 | } 37 | 38 | it("should correctly hash a byte array in chunks") { 39 | val checksum = underTest(seed) 40 | val array = byteBufferOfSize(Random.nextInt(1024)).array 41 | 42 | val expected = referenceImpl(array, seed) 43 | val computed = { 44 | var offset = 0 45 | while (offset < array.length) { 46 | val chunkLength = Random.nextInt((array.length - offset) + 1) 47 | checksum.updateByteArray(array, offset, chunkLength) 48 | offset += chunkLength 49 | } 50 | checksum.value 51 | } 52 | 53 | assert(expected === computed) 54 | } 55 | 56 | it("should throw IndexOutOfBoundsException if offset is < 0") { 57 | val checksum = underTest(seed) 58 | val input = byteBufferOfSize(1).array 59 | intercept[IndexOutOfBoundsException] { 60 | checksum.updateByteArray(input, -1, input.length) 61 | } 62 | } 63 | 64 | it("should throw IndexOutOfBoundsException if length is < 0") { 65 | val checksum = underTest(seed) 66 | val input = byteBufferOfSize(1).array 67 | intercept[IndexOutOfBoundsException] { 68 | checksum.updateByteArray(input, 0, -1) 69 | } 70 | } 71 | 72 | it("should throw IndexOutOfBoundsException if offset > input length") { 73 | val checksum = underTest(seed) 74 | val input = byteBufferOfSize(1).array 75 | intercept[IndexOutOfBoundsException] { 76 | checksum.updateByteArray(input, input.length + 1, input.length) 77 | } 78 | } 79 | 80 | it("should throw IndexOutOfBoundsException if length > input length") { 81 | val checksum = underTest(seed) 82 | val input = byteBufferOfSize(1).array 83 | intercept[IndexOutOfBoundsException] { 84 | checksum.updateByteArray(input, 0, input.length + 1) 85 | } 86 | } 87 | } 88 | 89 | describe("when hashing a ByteBuffer") { 90 | it("should correctly hash a non-direct ByteBuffer in chunks") { 91 | val checksum = underTest(seed) 92 | val input = byteBufferOfSize(Random.nextInt(1024)) 93 | 94 | val expected = referenceImpl(input.array, seed) 95 | val computed = { 96 | var offset = 0 97 | while (offset < input.capacity) { 98 | val chunkLength = Random.nextInt((input.capacity - offset) + 1) 99 | checksum.updateByteBuffer(input, offset, chunkLength) 100 | offset += chunkLength 101 | } 102 | checksum.value 103 | } 104 | 105 | assert(expected === computed) 106 | } 107 | 108 | it("should correctly hash a direct ByteBuffer in chunks") { 109 | val checksum = underTest(seed) 110 | val input = byteBufferOfSize(Random.nextInt(1024), direct = true) 111 | val array = new Array[Byte](input.capacity) 112 | input.get(array) 113 | 114 | val expected = referenceImpl(array, seed) 115 | val computed = { 116 | var offset = 0 117 | while (offset < input.capacity) { 118 | val chunkLength = Random.nextInt((input.capacity - offset) + 1) 119 | checksum.updateByteBuffer(input, offset, chunkLength) 120 | offset += chunkLength 121 | } 122 | checksum.value 123 | } 124 | 125 | assert(expected === computed) 126 | } 127 | 128 | it("should throw IndexOutOfBoundsException if offset is < 0") { 129 | val checksum = underTest(seed) 130 | val input = byteBufferOfSize(1) 131 | intercept[IndexOutOfBoundsException] { 132 | checksum.updateByteBuffer(input, -1, input.capacity) 133 | } 134 | } 135 | 136 | it("should throw IndexOutOfBoundsException if length is < 0") { 137 | val checksum = underTest(seed) 138 | val input = byteBufferOfSize(1) 139 | intercept[IndexOutOfBoundsException] { 140 | checksum.updateByteBuffer(input, 0, -1) 141 | } 142 | } 143 | 144 | it("should throw IndexOutOfBoundsException if offset > input length") { 145 | val checksum = underTest(seed) 146 | val input = byteBufferOfSize(1) 147 | intercept[IndexOutOfBoundsException] { 148 | checksum.updateByteBuffer(input, input.capacity + 1, input.capacity) 149 | } 150 | } 151 | 152 | it("should throw IndexOutOfBoundsException if length > input length") { 153 | val checksum = underTest(seed) 154 | val input = byteBufferOfSize(1) 155 | intercept[IndexOutOfBoundsException] { 156 | checksum.updateByteBuffer(input, 0, input.capacity + 1) 157 | } 158 | } 159 | } 160 | 161 | it("should be idempotent when calculating the final value") { 162 | val checksum = underTest(seed) 163 | val array = byteBufferOfSize(Random.nextInt(1024)).array 164 | 165 | var offset = 0 166 | while (offset < array.length) { 167 | val chunkLength = Random.nextInt((array.length - offset) + 1) 168 | checksum.updateByteArray(array, offset, chunkLength) 169 | offset += chunkLength 170 | } 171 | 172 | assert(checksum.value === checksum.value) 173 | } 174 | 175 | it("should allow state to be reset") { 176 | val checksum = underTest(seed) 177 | val array = byteBufferOfSize(Random.nextInt(1024)).array 178 | 179 | var expected = referenceImpl(array, seed) 180 | var computed = { 181 | var offset = 0 182 | while (offset < array.length) { 183 | val chunkLength = Random.nextInt((array.length - offset) + 1) 184 | checksum.updateByteArray(array, offset, chunkLength) 185 | offset += chunkLength 186 | } 187 | checksum.value 188 | } 189 | 190 | assert(expected === computed) 191 | 192 | checksum.reset() 193 | 194 | Random.nextBytes(array) 195 | 196 | expected = referenceImpl(array, seed) 197 | computed = { 198 | var offset = 0 199 | while (offset < array.length) { 200 | val chunkLength = Random.nextInt((array.length - offset) + 1) 201 | checksum.updateByteArray(array, offset, chunkLength) 202 | offset += chunkLength 203 | } 204 | checksum.value 205 | } 206 | 207 | assert(expected === computed) 208 | } 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------