├── .gitignore ├── pom.xml ├── readme.md └── src ├── main └── java │ └── ie │ └── ucd │ └── murmur │ └── MurmurHash.java └── test └── java └── ie └── ucd └── murmur └── MurmurHashTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | ie.ucd 8 | murmur 9 | 0.1.0 10 | murmurhash-java 11 | 12 | 13 | 14 | Viliam Holub 15 | +1 16 | 17 | 18 | 19 | 20 | scm:git:git://github.com/tnm/murmurhash-java.git 21 | scm:git:git@github.com:tnm/murmurhash-java.git 22 | http://github.com/tnm/murmurhash-java/ 23 | 24 | 25 | 26 | github 27 | http://github.com/tnm/murmurhash-java/issues#issue/ 28 | 29 | 30 | 31 | 32 | repo 33 | https://github.com/tnm/tnm-mvn-repo/raw/master/releases 34 | 35 | 36 | snapshot-repo 37 | https://github.com/tnm/tnm-mvn-repo/raw/master/snapshots 38 | 39 | 40 | 41 | 42 | 43 | junit 44 | junit 45 | 4.8.2 46 | 47 | 48 | 49 | 50 | 51 | sign 52 | 53 | 54 | 55 | org.apache.maven.plugins 56 | maven-gpg-plugin 57 | 1.2 58 | 59 | 60 | sign-artifacts 61 | verify 62 | 63 | sign 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | org.apache.maven.plugins 77 | maven-compiler-plugin 78 | 3.8.1 79 | 80 | 81 | org.apache.maven.plugins 82 | maven-source-plugin 83 | 2.1.2 84 | 85 | 86 | attach-sources 87 | 88 | jar 89 | 90 | 91 | 92 | 93 | 94 | org.apache.maven.plugins 95 | maven-resources-plugin 96 | 2.5 97 | 98 | UTF-8 99 | 100 | 101 | 102 | 103 | 104 | 105 | org.apache.maven.wagon 106 | wagon-ssh 107 | 1.0-beta-7 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | murmurhash-java 2 | ================= 3 | 4 | This is an implementation by Viliam Holub of the fast non-cryptographic **murmurhash2** algorithm. 5 | 6 | It is written in Java and is implemented in both 32 and **64-bit** versions. 7 | 8 | If you want the latest in the world of murmur, check out Guava's [Hashing](http://docs.guava-libraries.googlecode.com/git-history/master/javadoc/com/google/common/hash/Hashing.html) class, 9 | which has 32 and 128 bit implementations of **murmur3**. 10 | 11 | building 12 | ---------- 13 | 14 | Build a package with `maven`: 15 | 16 | ``` 17 | mvn package 18 | ``` 19 | 20 | Run the tests: 21 | 22 | ``` 23 | mvn test 24 | ``` 25 | 26 | public api 27 | -------------- 28 | 29 | ```java 30 | 31 | public final class MurmurHash { 32 | public static int hash32(final byte[] data, int length); 33 | public static long hash64(final byte[] data, int length); 34 | public static int hash32(final byte[] data, int length, int seed); 35 | public static long hash64(final byte[] data, int length, int seed); 36 | public static int hash32(final String text); 37 | public static long hash64(final String text); 38 | public static int hash32(final String text, int from, int length); 39 | public static long hash64(final String text, int from, int length); 40 | } 41 | 42 | ``` 43 | 44 | author 45 | --------- 46 | 47 | This code was released by Viliam into the public domain. [More information is here](http://d3s.mff.cuni.cz/~holub/sw/javamurmurhash/). 48 | -------------------------------------------------------------------------------- /src/main/java/ie/ucd/murmur/MurmurHash.java: -------------------------------------------------------------------------------- 1 | package ie.ucd.murmur; 2 | 3 | /** 4 | * murmur hash 2.0. 5 | * 6 | * The murmur hash is a relatively fast hash function from 7 | * http://murmurhash.googlepages.com/ for platforms with efficient 8 | * multiplication. 9 | * 10 | * This is a re-implementation of the original C code plus some 11 | * additional features. 12 | * 13 | * Public domain. 14 | * 15 | * @author Viliam Holub 16 | * @version 1.0.2 17 | * 18 | */ 19 | public final class MurmurHash { 20 | 21 | // all methods static; private constructor. 22 | private MurmurHash() {} 23 | 24 | /** 25 | * Generates 32 bit hash from byte array of the given length and 26 | * seed. 27 | * 28 | * @param data byte array to hash 29 | * @param length length of the array to hash 30 | * @param seed initial seed value 31 | * @return 32 bit hash of the given array 32 | */ 33 | public static int hash32(final byte[] data, int length, int seed) { 34 | // 'm' and 'r' are mixing constants generated offline. 35 | // They're not really 'magic', they just happen to work well. 36 | final int m = 0x5bd1e995; 37 | final int r = 24; 38 | 39 | // Initialize the hash to a random value 40 | int h = seed^length; 41 | int length4 = length/4; 42 | 43 | for (int i=0; i>> r; 49 | k *= m; 50 | h *= m; 51 | h ^= k; 52 | } 53 | 54 | // Handle the last few bytes of the input array 55 | switch (length%4) { 56 | case 3: h ^= (data[(length&~3) +2]&0xff) << 16; 57 | case 2: h ^= (data[(length&~3) +1]&0xff) << 8; 58 | case 1: h ^= (data[length&~3]&0xff); 59 | h *= m; 60 | } 61 | 62 | h ^= h >>> 13; 63 | h *= m; 64 | h ^= h >>> 15; 65 | 66 | return h; 67 | } 68 | 69 | /** 70 | * Generates 32 bit hash from byte array with default seed value. 71 | * 72 | * @param data byte array to hash 73 | * @param length length of the array to hash 74 | * @return 32 bit hash of the given array 75 | */ 76 | public static int hash32(final byte[] data, int length) { 77 | return hash32(data, length, 0x9747b28c); 78 | } 79 | 80 | /** 81 | * Generates 32 bit hash from a string. 82 | * 83 | * @param text string to hash 84 | * @return 32 bit hash of the given string 85 | */ 86 | public static int hash32(final String text) { 87 | final byte[] bytes = text.getBytes(); 88 | return hash32(bytes, bytes.length); 89 | } 90 | 91 | /** 92 | * Generates 32 bit hash from a substring. 93 | * 94 | * @param text string to hash 95 | * @param from starting index 96 | * @param length length of the substring to hash 97 | * @return 32 bit hash of the given string 98 | */ 99 | public static int hash32(final String text, int from, int length) { 100 | return hash32(text.substring( from, from+length)); 101 | } 102 | 103 | /** 104 | * Generates 64 bit hash from byte array of the given length and seed. 105 | * 106 | * @param data byte array to hash 107 | * @param length length of the array to hash 108 | * @param seed initial seed value 109 | * @return 64 bit hash of the given array 110 | */ 111 | public static long hash64(final byte[] data, int length, int seed) { 112 | final long m = 0xc6a4a7935bd1e995L; 113 | final int r = 47; 114 | 115 | long h = (seed&0xffffffffl)^(length*m); 116 | 117 | int length8 = length/8; 118 | 119 | for (int i=0; i>> r; 128 | k *= m; 129 | 130 | h ^= k; 131 | h *= m; 132 | } 133 | 134 | switch (length%8) { 135 | case 7: h ^= (long)(data[(length&~7)+6]&0xff) << 48; 136 | case 6: h ^= (long)(data[(length&~7)+5]&0xff) << 40; 137 | case 5: h ^= (long)(data[(length&~7)+4]&0xff) << 32; 138 | case 4: h ^= (long)(data[(length&~7)+3]&0xff) << 24; 139 | case 3: h ^= (long)(data[(length&~7)+2]&0xff) << 16; 140 | case 2: h ^= (long)(data[(length&~7)+1]&0xff) << 8; 141 | case 1: h ^= (long)(data[length&~7]&0xff); 142 | h *= m; 143 | }; 144 | 145 | h ^= h >>> r; 146 | h *= m; 147 | h ^= h >>> r; 148 | 149 | return h; 150 | } 151 | 152 | /** 153 | * Generates 64 bit hash from byte array with default seed value. 154 | * 155 | * @param data byte array to hash 156 | * @param length length of the array to hash 157 | * @return 64 bit hash of the given string 158 | */ 159 | public static long hash64(final byte[] data, int length) { 160 | return hash64(data, length, 0xe17a1465); 161 | } 162 | 163 | /** 164 | * Generates 64 bit hash from a string. 165 | * 166 | * @param text string to hash 167 | * @return 64 bit hash of the given string 168 | */ 169 | public static long hash64(final String text) { 170 | final byte[] bytes = text.getBytes(); 171 | return hash64(bytes, bytes.length); 172 | } 173 | 174 | /** 175 | * Generates 64 bit hash from a substring. 176 | * 177 | * @param text string to hash 178 | * @param from starting index 179 | * @param length length of the substring to hash 180 | * @return 64 bit hash of the given array 181 | */ 182 | public static long hash64(final String text, int from, int length) { 183 | return hash64(text.substring( from, from+length)); 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/test/java/ie/ucd/murmur/MurmurHashTest.java: -------------------------------------------------------------------------------- 1 | package ie.ucd.murmur; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import org.junit.Test; 6 | 7 | /** Test class for the Java Murmur hash implementation. 8 | * 9 | * @author Viliam Holub 10 | * 11 | */ 12 | public class MurmurHashTest { 13 | 14 | /** Random input data with various length. */ 15 | static final byte[][] input = { 16 | { 17 | (byte)0xed, (byte)0x53, (byte)0xc4, (byte)0xa5, 18 | (byte)0x3b, (byte)0x1b, (byte)0xbd, (byte)0xc2, 19 | (byte)0x52, (byte)0x7d, (byte)0xc3, (byte)0xef, 20 | (byte)0x53, (byte)0x5f, (byte)0xae, (byte)0x3b 21 | }, 22 | { 23 | (byte)0x21, (byte)0x65, (byte)0x59, (byte)0x4e, 24 | (byte)0xd8, (byte)0x12, (byte)0xf9, (byte)0x05, 25 | (byte)0x80, (byte)0xe9, (byte)0x1e, (byte)0xed, 26 | (byte)0xe4, (byte)0x56, (byte)0xbb 27 | }, 28 | { 29 | (byte)0x2b, (byte)0x02, (byte)0xb1, (byte)0xd0, 30 | (byte)0x3d, (byte)0xce, (byte)0x31, (byte)0x3d, 31 | (byte)0x97, (byte)0xc4, (byte)0x91, (byte)0x0d, 32 | (byte)0xf7, (byte)0x17 33 | }, 34 | { 35 | (byte)0x8e, (byte)0xa7, (byte)0x9a, (byte)0x02, 36 | (byte)0xe8, (byte)0xb9, (byte)0x6a, (byte)0xda, 37 | (byte)0x92, (byte)0xad, (byte)0xe9, (byte)0x2d, 38 | (byte)0x21 39 | }, 40 | { 41 | (byte)0xa9, (byte)0x6d, (byte)0xea, (byte)0x77, 42 | (byte)0x06, (byte)0xce, (byte)0x1b, (byte)0x85, 43 | (byte)0x48, (byte)0x27, (byte)0x4c, (byte)0xfe 44 | }, 45 | { 46 | (byte)0xec, (byte)0x93, (byte)0xa0, (byte)0x12, 47 | (byte)0x60, (byte)0xee, (byte)0xc8, (byte)0x0a, 48 | (byte)0xc5, (byte)0x90, (byte)0x62 49 | }, 50 | { 51 | (byte)0x55, (byte)0x6d, (byte)0x93, (byte)0x66, 52 | (byte)0x14, (byte)0x6d, (byte)0xdf, (byte)0x00, 53 | (byte)0x58, (byte)0x99 54 | }, 55 | { 56 | (byte)0x3c, (byte)0x72, (byte)0x20, (byte)0x1f, 57 | (byte)0xd2, (byte)0x59, (byte)0x19, (byte)0xdb, 58 | (byte)0xa1 59 | }, 60 | { 61 | (byte)0x23, (byte)0xa8, (byte)0xb1, (byte)0x87, 62 | (byte)0x55, (byte)0xf7, (byte)0x8a, (byte)0x4b, 63 | 64 | }, 65 | { 66 | (byte)0xe2, (byte)0x42, (byte)0x1c, (byte)0x2d, 67 | (byte)0xc1, (byte)0xe4, (byte)0x3e 68 | }, 69 | { 70 | (byte)0x66, (byte)0xa6, (byte)0xb5, (byte)0x5a, 71 | (byte)0x74, (byte)0xd9 72 | }, 73 | { 74 | (byte)0xe8, (byte)0x76, (byte)0xa8, (byte)0x90, 75 | (byte)0x76 76 | }, 77 | { 78 | (byte)0xeb, (byte)0x25, (byte)0x3f, (byte)0x87 79 | }, 80 | { 81 | (byte)0x37, (byte)0xa0, (byte)0xa9 82 | }, 83 | { 84 | (byte)0x5b, (byte)0x5d 85 | }, 86 | { 87 | (byte)0x7e 88 | }, 89 | {} 90 | }; 91 | 92 | /* 93 | * Expected results - from the original C implementation. 94 | */ 95 | 96 | /** Murmur 32bit hash results, default library seed. */ 97 | static final int[] results32_standard = { 98 | 0x96814fb3, 0x485dcaba, 0x331dc4ae, 0xc6a7bf2f, 99 | 0xcdf35de0, 0xd9dec7cc, 0x63a7318a, 0xd0d3c2de, 100 | 0x90923aef, 0xaf35c1e2, 0x735377b2, 0x366c98f3, 101 | 0x9c48ee29, 0x0b615790, 0xb4308ac1, 0xec98125a, 102 | 0x106e08d9}; 103 | 104 | /** Murmur 32bit hash results, special test seed. */ 105 | static final int[] results32_seed = { 106 | 0xd92e493e, 0x8b50903b, 0xc3372a7b, 0x48f07e9e, 107 | 0x8a5e4a6e, 0x57916df4, 0xa346171f, 0x1e319c86, 108 | 0x9e1a03cd, 0x9f973e6c, 0x2d8c77f5, 0xabed8751, 109 | 0x296708b6, 0x24f8078b, 0x111b1553, 0xa7da1996, 110 | 0xfe776c70}; 111 | 112 | /** Murmur 64bit hash results, default library seed. */ 113 | static final long[] results64_standard = { 114 | 0x4987cb15118a83d9l, 0x28e2a79e3f0394d9l, 0x8f4600d786fc5c05l, 115 | 0xa09b27fea4b54af3l, 0x25f34447525bfd1el, 0x32fad4c21379c7bfl, 116 | 0x4b30b99a9d931921l, 0x4e5dab004f936cdbl, 0x06825c27bc96cf40l, 117 | 0xff4bf2f8a4823905l, 0x7f7e950c064e6367l, 0x821ade90caaa5889l, 118 | 0x6d28c915d791686al, 0x9c32649372163ba2l, 0xd66ae956c14d5212l, 119 | 0x38ed30ee5161200fl, 0x9bfae0a4e613fc3cl, 120 | }; 121 | 122 | /** Murmur 64bit hash results, special test seed. */ 123 | static final long[] results64_seed = { 124 | 0x0822b1481a92e97bl, 0xf8a9223fef0822ddl, 0x4b49e56affae3a89l, 125 | 0xc970296e32e1d1c1l, 0xe2f9f88789f1b08fl, 0x2b0459d9b4c10c61l, 126 | 0x377e97ea9197ee89l, 0xd2ccad460751e0e7l, 0xff162ca8d6da8c47l, 127 | 0xf12e051405769857l, 0xdabba41293d5b035l, 0xacf326b0bb690d0el, 128 | 0x0617f431bc1a8e04l, 0x15b81f28d576e1b2l, 0x28c1fe59e4f8e5bal, 129 | 0x694dd315c9354ca9l, 0xa97052a8f088ae6cl 130 | }; 131 | 132 | 133 | /** Dummy test text. */ 134 | static final String text = "Lorem ipsum dolor sit amet, consectetur adipisicing elit"; 135 | 136 | @Test 137 | public void testHash32ByteArrayIntInt() { 138 | for (int i=0; i