├── .gitignore ├── LICENSE ├── MurmurHash.pas ├── MurmurHashTests.pas └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Uncomment these types if you want even more clean repository. But be careful. 2 | # It can make harm to an existing project source. Read explanations below. 3 | # 4 | # Resource files are binaries containing manifest, project icon and version info. 5 | # They can not be viewed as text or compared by diff-tools. Consider replacing them with .rc files. 6 | #*.res 7 | # 8 | # Type library file (binary). In old Delphi versions it should be stored. 9 | # Since Delphi 2009 it is produced from .ridl file and can safely be ignored. 10 | #*.tlb 11 | # 12 | # Diagram Portfolio file. Used by the diagram editor up to Delphi 7. 13 | # Uncomment this if you are not using diagrams or use newer Delphi version. 14 | #*.ddp 15 | # 16 | # Visual LiveBindings file. Added in Delphi XE2. 17 | # Uncomment this if you are not using LiveBindings Designer. 18 | #*.vlb 19 | # 20 | # Deployment Manager configuration file for your project. Added in Delphi XE2. 21 | # Uncomment this if it is not mobile development and you do not use remote debug feature. 22 | #*.deployproj 23 | # 24 | 25 | # Delphi compiler-generated binaries (safe to delete) 26 | *.exe 27 | *.dll 28 | *.bpl 29 | *.bpi 30 | *.dcp 31 | *.so 32 | *.apk 33 | *.drc 34 | *.map 35 | *.dres 36 | *.rsm 37 | *.tds 38 | *.dcu 39 | *.lib 40 | 41 | # Delphi autogenerated files (duplicated info) 42 | *.cfg 43 | *Resource.rc 44 | 45 | # Delphi local files (user-specific info) 46 | *.local 47 | *.identcache 48 | *.projdata 49 | *.tvsconfig 50 | *.dsk 51 | 52 | # Delphi history and backups 53 | __history/ 54 | *.~* 55 | 56 | # Castalia statistics file 57 | *.stat 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | 26 | -------------------------------------------------------------------------------- /MurmurHash.pas: -------------------------------------------------------------------------------- 1 | unit MurmurHash; 2 | 3 | interface 4 | 5 | {$IFNDEF Unicode} 6 | type 7 | UnicodeString = WideString; 8 | UInt64 = Int64; 9 | {$ENDIF} 10 | 11 | type 12 | TMurmur3 = class(TObject) 13 | protected 14 | class function HashData128_x86(const Key; KeyLen: LongWord; const Seed: LongWord): UInt64; 15 | class function HashData128_x64(const Key; KeyLen: LongWord; const Seed: LongWord): UInt64; 16 | 17 | class function HashData128(const Key; KeyLen: LongWord; const Seed: LongWord): UInt64; 18 | class function HashString128(const Key: UnicodeString; const Seed: LongWord): UInt64; 19 | public 20 | class function HashData32(const Key; KeyLen: LongWord; const Seed: LongWord): LongWord; 21 | class function HashString32(const Key: UnicodeString; const Seed: LongWord): LongWord; 22 | end; 23 | 24 | implementation 25 | 26 | uses 27 | SysUtils, Windows 28 | {$IFDEF UnitTests}, MurmurHashTests{$ENDIF}; 29 | 30 | function LRot32(X: LongWord; c: Byte): LongWord; 31 | begin 32 | Result := (X shl c) or (X shr (32-c)); 33 | end; 34 | 35 | function WideCharToUtf8(const Source: PWideChar; nChars: Integer): AnsiString; 36 | var 37 | strLen: Integer; 38 | begin 39 | if nChars = 0 then 40 | begin 41 | Result := ''; 42 | Exit; 43 | end; 44 | 45 | // Determine real size of destination string, in bytes 46 | strLen := WideCharToMultiByte(CP_UTF8, 0, Source, nChars, nil, 0, nil, nil); 47 | if strLen = 0 then 48 | RaiseLastOSError; 49 | 50 | // Allocate memory for destination string 51 | SetLength(Result, strLen); 52 | 53 | // Convert source UTF-16 string (UnicodeString) to the destination using the code-page 54 | strLen := WideCharToMultiByte(CP_UTF8, 0, Source, nChars, PAnsiChar(Result), strLen, nil, nil); 55 | if strLen = 0 then 56 | RaiseLastOSError; 57 | end; 58 | 59 | { TMurmur3 } 60 | 61 | {$OVERFLOWCHECKS OFF} 62 | class function TMurmur3.HashData128(const Key; KeyLen: LongWord; const Seed: LongWord): UInt64; 63 | begin 64 | {$IFDEF CPUX64} 65 | Result := TMurmur3.HashData128_x64(Key, KeyLen, Seed); 66 | {$ELSE} 67 | Result := TMurmur3.HashData128_x86(Key, KeyLen, Seed); 68 | {$ENDIF} 69 | end; 70 | 71 | class function TMurmur3.HashData32(const Key; KeyLen: LongWord; const Seed: LongWord): LongWord; 72 | var 73 | hash: LongWord; 74 | len: LongWord; 75 | k: LongWord; 76 | i: Integer; 77 | keyBytes: PByteArray; 78 | 79 | const 80 | c1 = $cc9e2d51; 81 | c2 = $1b873593; 82 | r1 = 15; 83 | r2 = 13; 84 | m = 5; 85 | n = $e6546b64; 86 | begin 87 | { 88 | Murmur3 32-bit 89 | https://github.com/rurban/smhasher/blob/master/MurmurHash3.cpp 90 | http://code.google.com/p/smhasher/source/browse/ 91 | } 92 | keyBytes := PByteArray(@Key); 93 | 94 | // Initialize the hash 95 | hash := seed; 96 | len := KeyLen; 97 | 98 | i := 0; 99 | 100 | // Mix 4 bytes at a time into the hash 101 | while(len >= 4) do 102 | begin 103 | k := PLongWord(@(keyBytes[i]))^; 104 | 105 | k := k*c1; 106 | k := LRot32(k, r1); 107 | k := k*c2; 108 | 109 | hash := hash xor k; 110 | hash := LRot32(hash, r2); 111 | hash := hash*m + n; 112 | 113 | Inc(i, 4); 114 | Dec(len, 4); 115 | end; 116 | 117 | { Handle the last few bytes of the input array 118 | Key: ... $69 $18 $2f 119 | } 120 | if len > 0 then 121 | begin 122 | Assert(len <= 3); 123 | k := 0; 124 | 125 | //Pack last few bytes into k 126 | if len = 3 then 127 | k := k or (keyBytes[i+2] shl 16); 128 | if len >= 2 then 129 | k := k or (keyBytes[i+1] shl 8); 130 | k := k or (keyBytes[i]); 131 | 132 | k := k*c1; 133 | k := LRot32(k, r1); 134 | k := k*c2; 135 | 136 | hash := hash xor k; 137 | end; 138 | 139 | // Finalization 140 | hash := hash xor keyLen; 141 | 142 | hash := hash xor (hash shr 16); 143 | hash := hash * $85ebca6b; 144 | hash := hash xor (hash shr 13); 145 | hash := hash * $c2b2ae35; 146 | hash := hash xor (hash shr 16); 147 | 148 | Result := hash; 149 | end; 150 | {$OVERFLOWCHECKS ON} 151 | 152 | class function TMurmur3.HashString128(const Key: UnicodeString; const Seed: LongWord): UInt64; 153 | var 154 | s: AnsiString; //UTF-8 version of Key 155 | begin 156 | s := WideCharToUtf8(PWideChar(Key), Length(Key)); 157 | 158 | Result := TMurmur3.HashData128(Pointer(s)^, Length(s)*SizeOf(AnsiChar), Seed); 159 | end; 160 | 161 | class function TMurmur3.HashString32(const Key: UnicodeString; const Seed: LongWord): LongWord; 162 | var 163 | s: AnsiString; //UTF-8 version of Key 164 | begin 165 | s := WideCharToUtf8(PWideChar(Key), Length(Key)); 166 | 167 | Result := TMurmur3.HashData32(Pointer(s)^, Length(s)*SizeOf(AnsiChar), Seed); 168 | end; 169 | 170 | class function TMurmur3.HashData128_x64(const Key; KeyLen: LongWord; const Seed: LongWord): UInt64; 171 | begin 172 | raise Exception.Create('Not implemented'); 173 | end; 174 | 175 | class function TMurmur3.HashData128_x86(const Key; KeyLen: LongWord; const Seed: LongWord): UInt64; 176 | begin 177 | raise Exception.Create('Not implemented'); 178 | end; 179 | 180 | end. 181 | -------------------------------------------------------------------------------- /MurmurHashTests.pas: -------------------------------------------------------------------------------- 1 | unit MurmurHashTests; 2 | 3 | interface 4 | 5 | uses 6 | TestFramework, SysUtils, Murmur; 7 | 8 | type 9 | TMurmur3Tests = class(TTestCase) 10 | protected 11 | FFreq: Int64; 12 | procedure SetUp; override; 13 | procedure TearDown; override; 14 | published 15 | procedure SelfTest_32_CanonicalSMHasher; //Canonical SMHasher 32-bit 16 | procedure SelfTest_32_TestVectors; //useful test vectors of 32-bit 17 | end; 18 | 19 | 20 | implementation 21 | 22 | uses 23 | Types, Windows; 24 | 25 | {$IFNDEF Unicode} 26 | type 27 | TBytes = TByteDynArray; //Added sometime in Delphi. If you have Unicode then you probably already have TBytes 28 | {$ENDIF} 29 | 30 | function HexStringToBytes(s: string): TBytes; 31 | var 32 | i, j: Integer; 33 | n: Integer; 34 | begin 35 | for i := Length(s) downto 1 do 36 | begin 37 | if s[i] = ' ' then 38 | Delete(s, i, 1); 39 | end; 40 | 41 | SetLength(Result, Length(s) div 2); 42 | 43 | i := 1; 44 | j := 0; 45 | while (i < Length(s)) do 46 | begin 47 | n := StrToInt('0x'+s[i]+s[i+1]); 48 | Result[j] := n; 49 | Inc(i, 2); 50 | Inc(j, 1); 51 | end; 52 | end; 53 | 54 | 55 | { TMurmur3Tests } 56 | 57 | procedure TMurmur3Tests.SelfTest_32_CanonicalSMHasher; 58 | const 59 | Expected: LongWord = $B0F57EE3; 60 | var 61 | key: array[0..255] of Byte; //256 hashes 62 | hashes: array[0..256] of Longword; //result of each of the 256 hashes 63 | i: Integer; 64 | actual: LongWord; 65 | t1, t2: Int64; 66 | begin 67 | { 68 | The canonical Murmur3 test is to perform multiple hashes, then hash the result of the hashes. 69 | 70 | MurmurHash3.cpp 71 | https://github.com/rurban/smhasher/blob/f0b9ef8b08a5c27cc5791e888358119875a22ba0/MurmurHash3.cpp 72 | KeySetTest.cpp - VerificationTest(...) 73 | https://github.com/rurban/smhasher/blob/9c9619c3beef4241e8e96305fbbee3ec069d3081/KeysetTest.cpp 74 | 75 | Expected Result: 0xB0F57EE3 76 | main.cpp 77 | https://github.com/rurban/smhasher/blob/9c9619c3beef4241e8e96305fbbee3ec069d3081/main.cpp 78 | } 79 | (* 80 | Hash keys of the form {0}, {0,1}, {0,1,2}... up to N=255, 81 | using 256-N as the seed 82 | 83 | Key Seed Hash 84 | ================== =========== ========== 85 | 00 0x00000100 0x........ 86 | 00 01 0x000000FF 0x........ 87 | 00 01 02 0x000000FE 0x........ 88 | 00 01 02 03 0x000000FD 0x........ 89 | ... 90 | 00 01 02 ... FE 0x00000002 0x........ 91 | 00 01 02 ... FE FF 0x00000001 0x........ 92 | 93 | And then hash the concatenation of the 255 computed hashes 94 | *) 95 | if not QueryPerformanceCounter({out}t1) then 96 | t1 := 0; 97 | for i := 0 to 255 do 98 | begin 99 | key[i] := Byte(i); 100 | hashes[i] := TMurmur3.HashData32(key[0], i, 256-i); 101 | end; 102 | 103 | actual := TMurmur3.HashData32(hashes[0], 256*SizeOf(Longword), 0); 104 | 105 | if not QueryPerformanceCounter({out}t2) then 106 | t2 := 0; 107 | 108 | Status('Test completed in '+FloatToStrF((t2-t1)/FFreq*1000000, ffFixed, 15, 3)+' µs'); 109 | 110 | CheckEquals(Expected, Actual, 'Murmur3_32 SMHasher test'); 111 | end; 112 | 113 | procedure TMurmur3Tests.SelfTest_32_TestVectors; 114 | var 115 | ws: UnicodeString; 116 | t1, t2: Int64; 117 | 118 | procedure t(const KeyHexString: string; Seed: LongWord; Expected: LongWord); 119 | var 120 | actual: LongWord; 121 | key: TByteDynArray; 122 | begin 123 | key := HexStringToBytes(KeyHexString); 124 | 125 | if not QueryPerformanceCounter(t1) then t1 := 0; 126 | 127 | actual := TMurmur3.HashData32(Pointer(key)^, Length(Key), Seed); 128 | 129 | if not QueryPerformanceCounter(t2) then t2 := 0; 130 | 131 | Status('Hashed '+KeyHexString+' in '+FloatToStrF((t2-t1)/FFreq*1000000, ffFixed, 15, 3)+' µs'); 132 | 133 | CheckEquals(Expected, Actual, Format('Key: %s. Seed: 0x%.8x', [KeyHexString, Seed])); 134 | end; 135 | 136 | procedure TestString(const Value: UnicodeString; Seed: LongWord; Expected: LongWord); 137 | var 138 | actual: LongWord; 139 | i: Integer; 140 | safeValue: string; 141 | begin 142 | if not QueryPerformanceCounter(t1) then t1 := 0; 143 | 144 | actual := TMurmur3.HashString32(Value, Seed); 145 | 146 | if not QueryPerformanceCounter(t2) then t2 := 0; 147 | 148 | //Replace #0 with '#0'. Delphi's StringReplace is unable to replace strings, so we shall do it ourselves 149 | safeValue := ''; 150 | for i := 1 to Length(Value) do 151 | begin 152 | if Value[i] = #0 then 153 | safeValue := safeValue + '#0' 154 | else 155 | safeValue := safeValue + Value[i]; 156 | end; 157 | Status('Hashed "'+safeValue+'" in '+FloatToStrF((t2-t1)/FFreq*1000000, ffFixed, 15, 3)+' µs'); 158 | 159 | CheckEquals(Expected, Actual, Format('Key: %s. Seed: 0x%.8x', [safeValue, Seed])); 160 | end; 161 | const 162 | n: UnicodeString=''; //n=nothing. 163 | //Work around bug in older versions of Delphi compiler when building WideStrings 164 | //http://stackoverflow.com/a/7031942/12597 165 | 166 | begin 167 | t('', 0, 0); //with zero data and zero seed; everything becomes zero 168 | t('', 1, $514E28B7); //ignores nearly all the math 169 | 170 | t('', $FFFFFFFF, $81F16F39); //Make sure your seed is using unsigned math 171 | t('FF FF FF FF', 0, $76293B50); //Make sure your 4-byte chunks are using unsigned math 172 | t('21 43 65 87', 0, $F55B516B); //Endian order. UInt32 should end up as 0x87654321 173 | t('21 43 65 87', $5082EDEE, $2362F9DE); //Seed value eliminates initial key with xor 174 | 175 | t( '21 43 65', 0, $7E4A8634); //Only three bytes. Should end up as 0x654321 176 | t( '21 43', 0, $A0F7B07A); //Only two bytes. Should end up as 0x4321 177 | t( '21', 0, $72661CF4); //Only one bytes. Should end up as 0x21 178 | 179 | t('00 00 00 00', 0, $2362F9DE); //Zero dword eliminiates almost all math. Make sure you don't mess up the pointers and it ends up as null 180 | t( '00 00 00', 0, $85F0B427); //Only three bytes. Should end up as 0. 181 | t( '00 00', 0, $30F4C306); //Only two bytes. Should end up as 0. 182 | t( '00', 0, $514E28B7); //Only one bytes. Should end up as 0. 183 | 184 | 185 | //Easier to test strings. All strings are assumed to be UTF-8 encoded and do not include any null terminator 186 | TestString('', 0, 0); //empty string with zero seed should give zero 187 | TestString('', 1, $514E28B7); 188 | TestString('', $ffffffff, $81F16F39); //make sure seed value handled unsigned 189 | TestString(#0#0#0#0, 0, $2362F9DE); //we handle embedded nulls 190 | 191 | TestString('aaaa', $9747b28c, $5A97808A); //one full chunk 192 | TestString('a', $9747b28c, $7FA09EA6); //one character 193 | TestString('aa', $9747b28c, $5D211726); //two characters 194 | TestString('aaa', $9747b28c, $283E0130); //three characters 195 | 196 | //Endian order within the chunks 197 | TestString('abcd', $9747b28c, $F0478627); //one full chunk 198 | TestString('a', $9747b28c, $7FA09EA6); 199 | TestString('ab', $9747b28c, $74875592); 200 | TestString('abc', $9747b28c, $C84A62DD); 201 | 202 | TestString('Hello, world!', $9747b28c, $24884CBA); 203 | 204 | //we build it up this way to workaround a bug in older versions of Delphi that were unable to build WideStrings correctly 205 | ws := n+#$03C0+#$03C0+#$03C0+#$03C0+#$03C0+#$03C0+#$03C0+#$03C0; //U+03C0: Greek Small Letter Pi 206 | TestString(ws, $9747b28c, $D58063C1); //Unicode handling and conversion to UTF-8 207 | 208 | { 209 | String of 256 characters. 210 | Make sure you don't store string lengths in a char, and overflow at 255. 211 | OpenBSD's canonical implementation of BCrypt made this mistake 212 | } 213 | ws := StringOfChar('a', 256); 214 | TestString(ws, $9747b28c, $37405BDC); 215 | 216 | 217 | //The test vector that you'll see out there for Murmur 218 | TestString('The quick brown fox jumps over the lazy dog', $9747b28c, $2FA826CD); 219 | 220 | 221 | //The SHA2 test vectors 222 | TestString('abc', 0, $B3DD93FA); 223 | TestString('abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq', 0, $EE925B90); 224 | 225 | //#1) 1 byte 0xbd 226 | t('bd', 0, $5FF4C2DA); 227 | 228 | //#2) 4 bytes 0xc98c8e55 229 | t('55 8e 8c c9', 0, $A7B55574); 230 | 231 | //#3) 55 bytes of zeros (ASCII character 55) 232 | TestString(StringOfChar('0', 55), 0, 2095704162); 233 | 234 | //#4) 56 bytes of zeros 235 | TestString(StringOfChar('0', 56), 0, 2438208104); 236 | 237 | //#5) 57 bytes of zeros 238 | TestString(StringOfChar('0', 57), 0, 1843415968); 239 | 240 | //#6) 64 bytes of zeros 241 | TestString(StringOfChar('0', 64), 0, 2811227051); 242 | 243 | //#7) 1000 bytes of zeros 244 | TestString(StringOfChar('0', 1000), 0, 4049757186); 245 | 246 | //#8) 1000 bytes of 0x41 ‘A’ 247 | TestString(StringOfChar('A', 1000), 0, 296104456); 248 | 249 | //#9) 1005 bytes of 0x55 ‘U’ 250 | TestString(StringOfChar('U', 1005), 0, 3970215021); 251 | end; 252 | 253 | procedure TMurmur3Tests.SetUp; 254 | begin 255 | inherited; 256 | 257 | if not QueryPerformanceFrequency(FFreq) then 258 | FFreq := -1; 259 | end; 260 | 261 | procedure TMurmur3Tests.TearDown; 262 | begin 263 | inherited; 264 | 265 | end; 266 | 267 | initialization 268 | TestFramework.RegisterTest('MurmurHash3', TMurmur3Tests.Suite); 269 | 270 | end. 271 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # murmur-delphi 2 | Murmur hash for Delphi 3 | 4 | MurmurHash is a fast, non-cryptographic, hash, suitable for hash tables. 5 | 6 | It comes in three variants: 7 | 8 | - hash result of 32 bits 9 | - hash result of 128 bits, optimized for x86 architecture 10 | - hash result of 128 bits, optimized for x64 architecture 11 | 12 | 13 | Sample Usage 14 | ---------------- 15 | 16 | In its simplest form, it can be used to generate the hash for a string: 17 | 18 | var 19 | hash: Cardinal; 20 | 21 | hash := TMurmur3.HashString32('Customer_12793', $5caff01d); 22 | 23 | But it can also support any other kind of data: 24 | 25 | var 26 | hash: Cardinal 27 | find: WIN32_FIND_DATA; 28 | 29 | hash := TMurmur3.HashData32(find, SizeOf(WIN32_FIND_DATA), $ba5eba11); 30 | 31 | 32 | --------------------------------------------------------------------------------