├── Makefile ├── .gitignore ├── README.md ├── LICENSE ├── src ├── murmerl3.app.src └── murmerl3.erl └── test ├── murmerl3_SUITE_data └── Murmur3.java └── murmerl3_SUITE.erl /Makefile: -------------------------------------------------------------------------------- 1 | PROJECT = murmerl3 2 | 3 | TEST_DEPS=proper 4 | 5 | BUILD_DEPS = elvis_mk 6 | dep_elvis_mk = git https://github.com/inaka/elvis.mk.git master 7 | 8 | DEP_PLUGINS = elvis_mk 9 | 10 | DIALYZER_OPTS += --src -r test 11 | 12 | include erlang.mk 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | erl_crash.dump 2 | .sw? 3 | .*.sw? 4 | *.beam 5 | /.erlang.mk/ 6 | /cover/ 7 | /deps/ 8 | /ebin/ 9 | /logs/ 10 | /plugins/ 11 | /xrefr 12 | elvis 13 | callgrind* 14 | ct.coverdata 15 | test/ct.cover.spec 16 | _build 17 | 18 | osiris.d 19 | *.plt 20 | *.d 21 | 22 | *.jar 23 | 24 | /user.bazelrc 25 | /bazel-* 26 | 27 | /.vscode/ 28 | 29 | /test/murmerl3_SUITE_data/*.class 30 | *.DS_Store 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | murmerl3 2 | ======== 3 | 4 | Pure erlang implementation of the MurmurHash3 algorithm. 5 | 6 | MurmurHash3 is a hash-function that's suitable for non cryptographic 7 | situations. Such as hash-based lookups. 8 | 9 | Currently only the 32-bit version is implemented 10 | 11 | Usage 12 | ----- 13 | 14 | `murmerl3:hash_32(Data)` hash Data with an initial seed of 0. 15 | 16 | `murmerl3:hash_32(Data, Seed)` hash Data with the given seed. 17 | 18 | ``` erlang 19 | 20 | 1> murmerl3:hash_32("The quick brown fox jumps over the lazy dog"). 21 | 776992547 22 | 23 | 2> murmerl3:hash_32("The quick brown fox jumps over the lazy dog", 666). 24 | 3231564089 25 | 26 | ``` 27 | 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Bip Thelin 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /src/murmerl3.app.src: -------------------------------------------------------------------------------- 1 | %% Copyright (c) 2013, Bip Thelin 2 | %% 3 | %% Permission to use, copy, modify, and/or distribute this software for any 4 | %% purpose with or without fee is hereby granted, provided that the above 5 | %% copyright notice and this permission notice appear in all copies. 6 | %% 7 | %% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | %% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | %% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | %% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | %% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | %% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | %% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | 15 | {application, murmerl3, 16 | [ {description, "MurmurHash3 library"} 17 | , {vsn, "0.1.0"} 18 | , {modules, []} 19 | , {registered, []} 20 | , {applications, [ kernel 21 | , stdlib 22 | ]} 23 | ]}. 24 | -------------------------------------------------------------------------------- /test/murmerl3_SUITE_data/Murmur3.java: -------------------------------------------------------------------------------- 1 | import java.nio.charset.StandardCharsets; 2 | import java.util.function.ToIntFunction; 3 | 4 | public class Murmur3 { 5 | 6 | public static void main(String[] args) { 7 | if (args.length != 1) { 8 | System.err.println("Please provide a string argument to hash"); 9 | System.exit(1); 10 | } 11 | System.out.println(Integer.toUnsignedString(new Murmur3Hash().applyAsInt(args[0]))); 12 | } 13 | 14 | static class Murmur3Hash implements ToIntFunction { 15 | 16 | private static final int C1_32 = 0xcc9e2d51; 17 | private static final int C2_32 = 0x1b873593; 18 | private static final int R1_32 = 15; 19 | private static final int R2_32 = 13; 20 | private static final int M_32 = 5; 21 | private static final int N_32 = 0xe6546b64; 22 | 23 | private static int getLittleEndianInt(final byte[] data, final int index) { 24 | return ((data[index] & 0xff)) 25 | | ((data[index + 1] & 0xff) << 8) 26 | | ((data[index + 2] & 0xff) << 16) 27 | | ((data[index + 3] & 0xff) << 24); 28 | } 29 | 30 | private static int mix32(int k, int hash) { 31 | k *= C1_32; 32 | k = Integer.rotateLeft(k, R1_32); 33 | k *= C2_32; 34 | hash ^= k; 35 | return Integer.rotateLeft(hash, R2_32) * M_32 + N_32; 36 | } 37 | 38 | private static int fmix32(int hash) { 39 | hash ^= (hash >>> 16); 40 | hash *= 0x85ebca6b; 41 | hash ^= (hash >>> 13); 42 | hash *= 0xc2b2ae35; 43 | hash ^= (hash >>> 16); 44 | return hash; 45 | } 46 | 47 | @Override 48 | public int applyAsInt(String value) { 49 | byte[] data = value.getBytes(StandardCharsets.UTF_8); 50 | final int offset = 0; 51 | final int length = data.length; 52 | final int seed = 104729; 53 | int hash = seed; 54 | final int nblocks = length >> 2; 55 | 56 | // body 57 | for (int i = 0; i < nblocks; i++) { 58 | final int index = offset + (i << 2); 59 | final int k = getLittleEndianInt(data, index); 60 | hash = mix32(k, hash); 61 | } 62 | 63 | // tail 64 | final int index = offset + (nblocks << 2); 65 | int k1 = 0; 66 | switch (offset + length - index) { 67 | case 3: 68 | k1 ^= (data[index + 2] & 0xff) << 16; 69 | case 2: 70 | k1 ^= (data[index + 1] & 0xff) << 8; 71 | case 1: 72 | k1 ^= (data[index] & 0xff); 73 | 74 | // mix functions 75 | k1 *= C1_32; 76 | k1 = Integer.rotateLeft(k1, R1_32); 77 | k1 *= C2_32; 78 | hash ^= k1; 79 | } 80 | 81 | hash ^= length; 82 | return fmix32(hash); 83 | } 84 | } 85 | } -------------------------------------------------------------------------------- /src/murmerl3.erl: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | %%% Copyright (c) 2013, Bip Thelin 3 | %%% 4 | %%% Permission to use, copy, modify, and/or distribute this software for any 5 | %%% purpose with or without fee is hereby granted, provided that the above 6 | %%% copyright notice and this permission notice appear in all copies. 7 | %%% 8 | %%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 | %%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 | %%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 | %%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 | %%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 | %%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 | %%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 | %%% 16 | %%% @doc This is a pure erlang implementation of the MurmurHash3 17 | %%% (https://code.google.com/p/smhasher/wiki/MurmurHash3) 18 | %%% MurmurHash3 is suitable for generating well-distributed 19 | %%% non-cryptographic hashes. 20 | %%% 21 | %%% This version is based on the supposedly final rev 136. 22 | %%% @end 23 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 24 | 25 | %%%_* Module declaration =============================================== 26 | -module(murmerl3). 27 | 28 | %%%_* Exports ========================================================== 29 | -export([hash_32/1]). 30 | -export([hash_32/2]). 31 | 32 | %%%_* Macros =========================================================== 33 | -define(c1_32, 16#cc9e2d51). 34 | -define(c2_32, 16#1b873593). 35 | -define(n_32, 16#e6546b64). 36 | -define(mask_32(X), ((X) band 16#FFFFFFFF)). 37 | 38 | %%%_* Code ============================================================= 39 | %%%_ * API ------------------------------------------------------------- 40 | hash_32(Data) -> 41 | hash_32(Data, 0). 42 | 43 | hash_32(Data, Seed) when is_binary(Data) -> 44 | Hash = case hash_32_aux(Seed, Data) of 45 | {H, []} -> 46 | H; 47 | {H, T} -> 48 | H bxor 49 | ?mask_32( 50 | rotl32( 51 | ?mask_32( 52 | swap_uint32(T) * ?c1_32), 15) * ?c2_32) 53 | end, 54 | fmix32(Hash bxor byte_size(Data)); 55 | hash_32(Data, Seed) when is_integer(Data) -> 56 | hash_32(integer_to_binary(Data), Seed); 57 | hash_32(Data, Seed) when is_list(Data) -> 58 | hash_32(list_to_binary(Data), Seed). 59 | 60 | %%%_* Private functions ================================================ 61 | hash_32_aux(H0, <>) -> 62 | K1 = ?mask_32(rotl32(?mask_32(K * ?c1_32), 15) * ?c2_32), 63 | hash_32_aux(?mask_32(rotl32((H0 bxor K1), 13) * 5 + ?n_32), T); 64 | hash_32_aux(H, T) when byte_size(T) > 0 -> 65 | {H, T}; 66 | hash_32_aux(H, _) -> 67 | {H, []}. 68 | 69 | fmix32(H0) -> 70 | xorbsr((?mask_32(xorbsr( 71 | (?mask_32(xorbsr(H0, 16) * 16#85ebca6b)), 13 ) 72 | * 16#c2b2ae35)), 16). 73 | 74 | swap_uint32(<>) -> 77 | ((V3 bsl 16) bxor (V2 bsl 8)) bxor V1; 78 | swap_uint32(<>) -> 80 | (V2 bsl 8) bxor V1; 81 | swap_uint32(<>) -> 82 | 0 bxor V1. 83 | 84 | xorbsr(H, V) -> 85 | H bxor (H bsr V). 86 | rotl32(X, R) -> 87 | ?mask_32((X bsl R) bor (X bsr (32 - R))). 88 | 89 | %%%_* Emacs ============================================================ 90 | %%% Local Variables: 91 | %%% allout-layout: t 92 | %%% erlang-indent-level: 4 93 | %%% End: 94 | -------------------------------------------------------------------------------- /test/murmerl3_SUITE.erl: -------------------------------------------------------------------------------- 1 | %% This Source Code Form is subject to the terms of the Mozilla Public 2 | %% License, v. 2.0. If a copy of the MPL was not distributed with this 3 | %% file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | %% 5 | %% Copyright (c) 2007-2022 VMware, Inc. or its affiliates. All rights reserved. 6 | %% 7 | 8 | -module(murmerl3_SUITE). 9 | 10 | -compile(nowarn_export_all). 11 | -compile(export_all). 12 | 13 | -export([]). 14 | 15 | -include_lib("proper/include/proper.hrl"). 16 | -include_lib("common_test/include/ct.hrl"). 17 | -include_lib("eunit/include/eunit.hrl"). 18 | 19 | -define(INT32_MAX, 16#FFFFFFFF). 20 | 21 | -type int32() :: 0..?INT32_MAX. 22 | 23 | %%%=================================================================== 24 | %%% Common Test callbacks 25 | %%%=================================================================== 26 | 27 | all() -> 28 | [{group, tests}]. 29 | 30 | all_tests() -> [hash_32_basics, 31 | java_basics, 32 | java_comparison_prop, 33 | is_right_size_prop]. 34 | 35 | groups() -> 36 | [{tests, [], all_tests()}]. 37 | 38 | init_per_suite(Config) -> 39 | Config. 40 | 41 | end_per_suite(_Config) -> 42 | ok. 43 | 44 | init_per_group(_Group, Config) -> 45 | %% compile java program 46 | Dir = ?config(data_dir, Config), 47 | File = filename:join(Dir, "Murmur3.java"), 48 | case filelib:is_file(File) of 49 | true -> 50 | ok; 51 | false -> 52 | Cmd = io_lib:format("javac ~s -s ~s", [File, Dir]), 53 | Result = os:cmd(Cmd), 54 | ct:pal("javac result ~p", [Result]) 55 | end, 56 | Config. 57 | 58 | end_per_group(_Group, _Config) -> 59 | ok. 60 | 61 | init_per_testcase(_TestCase, Config) -> 62 | Config. 63 | 64 | end_per_testcase(_TestCase, _Config) -> 65 | ok. 66 | 67 | %%%=================================================================== 68 | %%% Test cases 69 | %%%=================================================================== 70 | 71 | hash_32_basics(_Config) -> 72 | ?assertEqual(murmerl3:hash_32(""), 0), 73 | ?assertEqual(murmerl3:hash_32("", 1), 1364076727), 74 | ?assertEqual(murmerl3:hash_32("Some Data"), 75 | murmerl3:hash_32("Some Data", 0)), 76 | ?assertEqual(murmerl3:hash_32("0"), 3530670207), 77 | ?assertEqual(murmerl3:hash_32("01"), 1642882560), 78 | ?assertEqual(murmerl3:hash_32("012"), 3966566284), 79 | ?assertEqual(murmerl3:hash_32("0123"), 3558446240), 80 | ?assertEqual(murmerl3:hash_32("01234"), 433070448), 81 | ok. 82 | 83 | java_basics(Config) -> 84 | %% seed is hardcoded in java program 85 | Seed = 104729, 86 | Data = <<"0">>, 87 | JavaHash = run_java(Data, Config), 88 | Hash = murmerl3:hash_32(Data, Seed), 89 | ?assertEqual(JavaHash, Hash), 90 | ok. 91 | 92 | -type c() :: 48..57 | 65..90 | 97..122 . 93 | 94 | java_comparison_prop(Config) -> 95 | Seed = 104729, 96 | run_proper( 97 | fun () -> 98 | ?FORALL(B0, nonempty_list(c()), 99 | begin 100 | B = unicode:characters_to_binary(B0), 101 | % ct:pal("testing ~s", [B]), 102 | murmerl3:hash_32(B, Seed) =:= run_java(B, Config) 103 | end) 104 | end, [], 100), 105 | 106 | ok. 107 | 108 | is_right_size_prop(_Config) -> 109 | run_proper( 110 | fun () -> 111 | ?FORALL({B, S}, {binary(), int32()}, 112 | murmerl3:hash_32(B, S) =< ?INT32_MAX) 113 | end, [], 1000), 114 | ok. 115 | 116 | %% utilities 117 | run_java(Data, Config) -> 118 | Dir = ?config(data_dir, Config), 119 | Cmd = io_lib:format("java -cp ~s Murmur3 \"~s\"", [Dir, Data]), 120 | list_to_integer(string:chomp(os:cmd(Cmd))). 121 | 122 | run_proper(Fun, Args, NumTests) -> 123 | ?assertEqual( 124 | true, 125 | proper:counterexample( 126 | erlang:apply(Fun, Args), 127 | [{numtests, NumTests}, 128 | {on_output, fun(".", _) -> ok; % don't print the '.'s on new lines 129 | (F, A) -> ct:pal(?LOW_IMPORTANCE, F, A) end}])). 130 | --------------------------------------------------------------------------------