├── .gitattributes ├── .gitignore ├── Build.bat ├── CMakeLists.txt ├── LICENSE ├── README.md ├── Test ├── Build.bat ├── CMakeLists.txt ├── IdealHash.sln ├── IdealHash.vcxproj ├── IdealHash.vcxproj.filters ├── config.h.in ├── dlmalloc │ └── malloc.c └── main.cpp ├── appveyor.yml ├── cmake └── BuildSettings.cmake ├── include └── HashTrie.h └── src ├── CMakeLists.txt └── HashTrie.cpp /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ################# 2 | ## Eclipse 3 | ################# 4 | 5 | *.pydevproject 6 | .project 7 | .metadata 8 | bin/ 9 | tmp/ 10 | *.tmp 11 | *.bak 12 | *.swp 13 | *~.nib 14 | local.properties 15 | .classpath 16 | .settings/ 17 | .loadpath 18 | 19 | # External tool builders 20 | .externalToolBuilders/ 21 | 22 | # Locally stored "Eclipse launch configurations" 23 | *.launch 24 | 25 | # CDT-specific 26 | .cproject 27 | 28 | # PDT-specific 29 | .buildpath 30 | 31 | 32 | ################# 33 | ## Visual Studio 34 | ################# 35 | 36 | ## Ignore Visual Studio temporary files, build results, and 37 | ## files generated by popular Visual Studio add-ons. 38 | 39 | # User-specific files 40 | *.suo 41 | *.user 42 | *.sln.docstates 43 | 44 | # Build results 45 | [Dd]ebug/ 46 | [Rr]elease/ 47 | *_i.c 48 | *_p.c 49 | *.cod 50 | *.ilk 51 | *.meta 52 | *.obj 53 | *.pch 54 | *.pdb 55 | *.pgc 56 | *.pgd 57 | *.rsp 58 | *.sbr 59 | *.tlb 60 | *.tli 61 | *.tlh 62 | *.tmp 63 | *.vspscc 64 | .builds 65 | *.dotCover 66 | _build 67 | 68 | ## TODO: If you have NuGet Package Restore enabled, uncomment this 69 | #packages/ 70 | 71 | # Visual C++ cache files 72 | ipch/ 73 | *.aps 74 | *.ncb 75 | *.opensdf 76 | *.sdf 77 | 78 | # Visual Studio profiler 79 | *.psess 80 | *.vsp 81 | 82 | # ReSharper is a .NET coding add-in 83 | _ReSharper* 84 | 85 | # Installshield output folder 86 | [Ee]xpress 87 | 88 | # DocProject is a documentation generator add-in 89 | DocProject/buildhelp/ 90 | DocProject/Help/*.HxT 91 | DocProject/Help/*.HxC 92 | DocProject/Help/*.hhc 93 | DocProject/Help/*.hhk 94 | DocProject/Help/*.hhp 95 | DocProject/Help/Html2 96 | DocProject/Help/html 97 | 98 | # Click-Once directory 99 | publish 100 | 101 | # Others 102 | [Bb]in 103 | [Oo]bj 104 | sql 105 | TestResults 106 | *.Cache 107 | ClientBin 108 | stylecop.* 109 | ~$* 110 | *.dbmdl 111 | Generated_Code #added for RIA/Silverlight projects 112 | 113 | # Backup & report files from converting an old project file to a newer 114 | # Visual Studio version. Backup files are not needed, because we have git ;-) 115 | _UpgradeReport_Files/ 116 | Backup*/ 117 | UpgradeLog*.XML 118 | 119 | 120 | 121 | ############ 122 | ## Windows 123 | ############ 124 | 125 | # Windows image file caches 126 | Thumbs.db 127 | 128 | # Folder config file 129 | Desktop.ini 130 | 131 | 132 | ############# 133 | ## Python 134 | ############# 135 | 136 | *.py[co] 137 | 138 | # Packages 139 | *.egg 140 | *.egg-info 141 | dist 142 | build 143 | eggs 144 | parts 145 | bin 146 | var 147 | sdist 148 | develop-eggs 149 | .installed.cfg 150 | 151 | # Installer logs 152 | pip-log.txt 153 | 154 | # Unit test / coverage reports 155 | .coverage 156 | .tox 157 | 158 | #Translations 159 | *.mo 160 | 161 | #Mr Developer 162 | .mr.developer.cfg 163 | 164 | # Mac crap 165 | .DS_Store 166 | -------------------------------------------------------------------------------- /Build.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | ::============================================================================ 3 | :: Make sure we can use command shell extensions 4 | ::============================================================================ 5 | VERIFY OTHER 2>nul 6 | SETLOCAL ENABLEEXTENSIONS ENABLEDELAYEDEXPANSION 7 | IF NOT ERRORLEVEL 1 goto ExtOk 8 | echo * 9 | echo * Error: Unable to enable DOS extensions 10 | echo * 11 | EXIT /B 1 12 | :ExtOk 13 | 14 | set ScriptPath=%~d0%~p0 15 | 16 | ::============================================================================ 17 | ::Detect VS2017 18 | ::============================================================================ 19 | set WHERE_EXE="%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" 20 | 21 | if exist %WHERE_EXE% ( 22 | for /f "usebackq tokens=1* delims=: " %%i in (`%WHERE_EXE% -latest -requires Microsoft.Component.MSBuild`) do ( 23 | if /i "%%i"=="installationPath" set InstallDir=%%j 24 | ) 25 | 26 | if exist "%InstallDir%\VC\Auxiliary\Build" ( 27 | set VCDIR="%InstallDir%\VC\Auxiliary\Build\" 28 | ) 29 | 30 | if defined VCDIR ( 31 | echo "Visual Studio 2017 (version 15) detected" 32 | set VC_VER_STR=15 2017 33 | goto :VCOK 34 | ) 35 | ) 36 | 37 | ::============================================================================ 38 | ::Detect VS14 39 | ::============================================================================ 40 | set VC_KEY_NAME="HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\VisualStudio\14.0\Setup\VC" 41 | set VC_KEY_VALUE="ProductDir" 42 | set VCDIR="" 43 | 44 | for /f "usebackq tokens=1-2,*" %%a in (`REG QUERY %VC_KEY_NAME% /v %VC_KEY_VALUE%`) do ( 45 | set VCDIR="%%c" 46 | ) 47 | 48 | if defined VCDIR ( 49 | echo "Visual Studio 2015 (version 14) detected" 50 | set VC_VER_STR=14 2015 51 | goto :VCOK 52 | ) 53 | 54 | echo ERROR: Visual Studio 2015 or 2017 is required. 55 | 56 | goto :EndOfScript 57 | 58 | :VCOK 59 | ::============================================================================ 60 | ::Run CMake 61 | ::============================================================================ 62 | if not exist %ScriptPath%_build mkdir %ScriptPath%_build 63 | pushd %ScriptPath%_build 64 | cmake .. -G "Visual Studio %VC_VER_STR% Win64" 65 | cmake --build . --config Release 66 | ctest . -C Release -V 67 | popd 68 | 69 | :EndOfScript 70 | endlocal 71 | goto :EOF 72 | 73 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | 3 | # Maps to a solution file (Tutorial.sln). The solution will 4 | # have all targets (exe, lib, dll) as projects (.vcproj) 5 | project(HAMT) 6 | 7 | # Turn on the ability to create folders to organize projects (.vcproj) 8 | # It creates "CMakePredefinedTargets" folder by default and adds CMake 9 | # defined projects like INSTALL.vcproj and ZERO_CHECK.vcproj 10 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 11 | 12 | # Turn on CMake testing capabilities 13 | enable_testing() 14 | 15 | # Sub-directories where more CMakeLists.txt exist 16 | add_subdirectory(Test) 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Chae Seong Lim 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build status](https://ci.appveyor.com/api/projects/status/qtyham872bkcbdwm/branch/dev?svg=true)](https://ci.appveyor.com/project/chaelim/hamt) 2 | 3 | C++ Template class implementation of Hash Array Mapped Trie 4 | ================================ 5 | 6 | Do you want space-efficient and fast hash table? HAMT is just here for you. Based on the paper [Ideal Hash Trees by Phil Bagwell](http://lampwww.epfl.ch/papers/idealhashtrees.pdf), and as the title stated, it has really ideal features as a hash table as below. 7 | 8 | Features 9 | ------------------------- 10 | * No initial root hash table required. 11 | _(Empty hash table just takes 8 bytes in 32 bit build or 12 bytes in 64 bit build.)_ 12 | * No stop the world rehashing. 13 | * Faster and smaller. 14 | * Constant add/delete O(1) operations 15 | * C++ Template implementation can be easily used to any data type. 16 | * 32 bit hash key and 32 bit bitmap to index subhash array. 17 | * 32 bit integer and string (ANSI and Unicode) hash key templates are included. 18 | * Expected tree depth: ![equation](http://latex.codecogs.com/gif.latex?O%28%5Clog_%7B2%5EW%7D%28n%29%29). 19 | w = 5 20 | n : number of elements stored in the trie 21 | * Hamming weight of bitmap can be caculated using POPCNT(Population count) CPU intruction (introduced in Nehalem-base and Barcelona microarchitecture CPU). POPCNT can speed up overall performance about 10%. 22 | 23 | Test program build notes 24 | ------------------------- 25 | * Open and compile Test\IdealHash.sln 26 | * To enable POPCNT CPU instruction, change 0 to 1 in "#define SSE42_POPCNT 0". POPCNT is SSE4 CPU instruction start supported since Intel Nehalem and AMD Barcelona. 27 | * References on POPCNT: 28 | - http://en.wikipedia.org/wiki/SSE4#POPCNT_and_LZCNT 29 | - http://developer.amd.com/community/blog/barcelona-processor-feature-advanced-bit-manipulation-abm/ 30 | 31 | More information 32 | ------------------------- 33 | * [Ideal Hash Trees by Phil Bagwell](http://lampwww.epfl.ch/papers/idealhashtrees.pdf). 34 | * [Wikipedia on Hash array mapped trie](http://en.wikipedia.org/wiki/Hash_array_mapped_trie). 35 | * [Ideal Hash Tries: an implementation in C++](http://www.altdevblogaday.com/2011/03/22/ideal-hash-tries-an-implementation-in-c/). 36 | * [POPCNT Instruction](http://en.wikipedia.org/wiki/SSE4#POPCNT_and_LZCNT) 37 | 38 | -------------------------------------------------------------------------------- /Test/Build.bat: -------------------------------------------------------------------------------- 1 | msbuild.exe IdealHash.vcxproj /t:Rebuild /p:Configuration=Release;platform=x64 -------------------------------------------------------------------------------- /Test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.2) 2 | 3 | # Default build-time configuration options 4 | # Can be modified via CMake GUI or via CMake command line 5 | option(SSE42_POPCNT "Use POPCNT CPU in SSE4.2 instruction" ON) 6 | option(HAMT_TEST_USE_DLMALLOC "Use DLMalloc instead of the default C runtime platform malloc" ON) 7 | option(WIN64 "Default generate x64" ON) 8 | 9 | # Write build-time configuration options to a header file 10 | configure_file(config.h.in config.h) 11 | include_directories(${CMAKE_CURRENT_BINARY_DIR}) 12 | include_directories(../include) 13 | include(../cmake/BuildSettings.cmake) 14 | 15 | # Maps to a solution file (Tutorial.sln). The solution will 16 | # have all targets (exe, lib, dll) as projects (.vcproj) 17 | project(SimplePerfTest) 18 | 19 | # Turn on the ability to create folders to organize projects (.vcproj) 20 | # It creates "CMakePredefinedTargets" folder by default and adds CMake 21 | # defined projects like INSTALL.vcproj and ZERO_CHECK.vcproj 22 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 23 | 24 | # Create project and .exe 25 | # Debug;Release;MinSizeRel;RelWithDebInfo 26 | if (CMAKE_CONFIGURATION_TYPES) 27 | set(CMAKE_CONFIGURATION_TYPES "Debug;Release") 28 | set(CMAKE_CONFIGURATION_TYPES "${CMAKE_CONFIGURATION_TYPES}" CACHE STRING 29 | "Reset the configurations to what we need" 30 | FORCE) 31 | endif() 32 | 33 | # Command to output information to the console 34 | # Useful for displaying errors, warnings, and debugging 35 | message("cxx Flags: " ${CMAKE_CXX_FLAGS}) 36 | 37 | file(GLOB SRCFILES *.cpp) 38 | file(GLOB INCFILES *.h) 39 | 40 | if (HAMT_TEST_USE_DLMALLOC) 41 | list(APPEND SRCFILES dlmalloc/malloc.c) 42 | endif() 43 | 44 | add_executable(${PROJECT_NAME} ${SRCFILES} ${INCFILES} config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) 45 | 46 | set_target_properties(${PROJECT_NAME} PROPERTIES 47 | CXX_STANDARD 11 48 | CXX_STANDARD_REQUIRED ON 49 | COMPILE_DEFINITIONS "$<$:_SCL_SECURE_NO_WARNINGS>" 50 | COMPILE_OPTIONS "$<$:/EHsc>" 51 | ) 52 | 53 | # Add subdirectories with dependencies before dependents so that each subdirectory 54 | # inherits the include paths gathered from the dependencies before it. 55 | add_subdirectory(../src HAMT) 56 | 57 | # Add libraries with dependencies after dependents to satisfy ld linker. 58 | target_link_libraries(${PROJECT_NAME} HAMT) 59 | 60 | # Adds logic to INSTALL.vcproj to copy HAMTTest.exe to destination directory 61 | install(TARGETS ${PROJECT_NAME} 62 | RUNTIME DESTINATION ${PROJECT_BINARY_DIR}/bin) 63 | 64 | # Turn on CMake testing capabilities 65 | enable_testing() 66 | 67 | # Add test cases 68 | add_test(NAME ${PROJECT_NAME} 69 | CONFIGURATIONS Release 70 | COMMAND ${PROJECT_NAME} 71 | WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) -------------------------------------------------------------------------------- /Test/IdealHash.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.31101.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "IdealHash", "IdealHash.vcxproj", "{DD13B91A-93C9-49DD-96D9-988290541758}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Debug|x64 = Debug|x64 12 | Release|Win32 = Release|Win32 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {DD13B91A-93C9-49DD-96D9-988290541758}.Debug|Win32.ActiveCfg = Debug|Win32 17 | {DD13B91A-93C9-49DD-96D9-988290541758}.Debug|Win32.Build.0 = Debug|Win32 18 | {DD13B91A-93C9-49DD-96D9-988290541758}.Debug|x64.ActiveCfg = Debug|x64 19 | {DD13B91A-93C9-49DD-96D9-988290541758}.Debug|x64.Build.0 = Debug|x64 20 | {DD13B91A-93C9-49DD-96D9-988290541758}.Release|Win32.ActiveCfg = Release|x64 21 | {DD13B91A-93C9-49DD-96D9-988290541758}.Release|Win32.Build.0 = Release|x64 22 | {DD13B91A-93C9-49DD-96D9-988290541758}.Release|x64.ActiveCfg = Release|Win32 23 | {DD13B91A-93C9-49DD-96D9-988290541758}.Release|x64.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /Test/IdealHash.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {DD13B91A-93C9-49DD-96D9-988290541758} 23 | IdealHash 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | Application 36 | true 37 | MultiByte 38 | v120 39 | 40 | 41 | Application 42 | true 43 | MultiByte 44 | v120 45 | 46 | 47 | Application 48 | false 49 | true 50 | MultiByte 51 | v120 52 | 53 | 54 | Application 55 | false 56 | true 57 | MultiByte 58 | v120 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | Level3 80 | Disabled 81 | WIN32;_MBCS;%(PreprocessorDefinitions) 82 | 83 | 84 | true 85 | 86 | 87 | 88 | 89 | Level3 90 | Disabled 91 | WIN32;_MBCS;%(PreprocessorDefinitions) 92 | 93 | 94 | true 95 | 96 | 97 | 98 | 99 | Level3 100 | MaxSpeed 101 | true 102 | true 103 | WIN32;_MBCS;%(PreprocessorDefinitions) 104 | StreamingSIMDExtensions 105 | Speed 106 | 107 | 108 | true 109 | true 110 | true 111 | PromptImmediately 112 | 113 | 114 | 115 | 116 | Level3 117 | MaxSpeed 118 | true 119 | true 120 | WIN32;_MBCS;%(PreprocessorDefinitions) 121 | Speed 122 | 123 | 124 | true 125 | true 126 | true 127 | PromptImmediately 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /Test/IdealHash.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Header Files 28 | 29 | 30 | -------------------------------------------------------------------------------- /Test/config.h.in: -------------------------------------------------------------------------------- 1 | //--------------------------------------------------- 2 | // Build-time configuration 3 | //--------------------------------------------------- 4 | 5 | #cmakedefine01 SSE42_POPCNT 6 | #cmakedefine01 HAMT_TEST_USE_DLMALLOC 7 | -------------------------------------------------------------------------------- /Test/main.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: main.cpp 3 | * Author: CS Lim 4 | * Purpose: Simple HAMT test program 5 | * History: 6 | * 2012/5/3: File Created 7 | * 8 | */ 9 | 10 | #include // autogenerated by CMake 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | // Use dlmalloc so that benchmark testing is less affected 17 | // by platform specific heap manager implementation. 18 | // e.g. Windows LFH 19 | #if HAMT_TEST_USE_DLMALLOC 20 | 21 | extern "C" 22 | { 23 | void* dlmalloc(size_t); 24 | void* dlcalloc(size_t, size_t); 25 | void dlfree(void*); 26 | void* dlrealloc(void*, size_t); 27 | } 28 | 29 | #define malloc dlmalloc 30 | #define calloc dlcalloc 31 | #define free dlfree 32 | #define realloc dlrealloc 33 | 34 | inline void* operator new(size_t cb) 35 | { 36 | return dlmalloc(cb); 37 | } 38 | 39 | inline void* operator new[](size_t cb) 40 | { 41 | return dlmalloc(cb); 42 | } 43 | 44 | inline void operator delete(void* p) 45 | { 46 | dlfree(p); 47 | } 48 | 49 | inline void operator delete[](void* p) 50 | { 51 | dlfree(p); 52 | } 53 | 54 | #endif 55 | 56 | #include 57 | 58 | typedef unsigned char u8; 59 | typedef uint16_t u16; 60 | typedef uint32_t u32; 61 | typedef uint64_t u64; 62 | typedef signed char s8; 63 | typedef int16_t s16; 64 | typedef int32_t s32; 65 | typedef int64_t s64; 66 | 67 | constexpr uint32 MAX_TEST_ENTRIES = 1000000; 68 | 69 | #ifdef WIN32 70 | 71 | #include 72 | #define WIN32_LEAN_AND_MEAN 73 | #include 74 | 75 | //=========================================================================== 76 | // Timing functions 77 | //=========================================================================== 78 | u64 GetMicroTime() 79 | { 80 | u64 hz; 81 | QueryPerformanceFrequency((LARGE_INTEGER*)&hz); 82 | 83 | u64 t; 84 | QueryPerformanceCounter((LARGE_INTEGER*)&t); 85 | return (t * 1000000) / hz; 86 | } 87 | 88 | #else 89 | 90 | #include 91 | u64 GetMicroTime() 92 | { 93 | timeval t; 94 | gettimeofday(&t,NULL); 95 | return t.tv_sec * 1000000ull + t.tv_usec; 96 | } 97 | 98 | #endif 99 | 100 | void TestHashTrie() 101 | { 102 | // uint32 to uint32 key/vaue pair example 103 | struct Test : THashKey32 104 | { 105 | Test(uint32 key) : THashKey32(key) { } 106 | uint32 value{ 0 }; 107 | }; 108 | 109 | // string to uint32 key/vaue pair example 110 | struct TestStr : CHashKeyStrAnsiChar 111 | { 112 | TestStr(const char key[]) : CHashKeyStrAnsiChar(key) { } 113 | uint32 value{ 0 }; 114 | }; 115 | 116 | // 117 | // integer test 118 | // 119 | THashTrie> test_uint32; 120 | 121 | printf("32 bit integer test...\n"); 122 | printf("1) Add %d entries: ", MAX_TEST_ENTRIES); 123 | u64 t0 = GetMicroTime(); 124 | for (int i = 0; i < MAX_TEST_ENTRIES; i++) 125 | { 126 | auto test = new Test(i); 127 | test_uint32.Add(test); 128 | } 129 | printf(" %10u usec\n", int(GetMicroTime() - t0)); 130 | 131 | printf("2) Find %d entries: ", MAX_TEST_ENTRIES); 132 | t0 = GetMicroTime(); 133 | for (uint32 i = 0; i < MAX_TEST_ENTRIES; i++) 134 | { 135 | auto find = test_uint32.Find(THashKey32(i)); 136 | volatile uint32 value = find->Get(); 137 | assert(value == i); 138 | } 139 | printf(" %10u usec\n", int(GetMicroTime() - t0)); 140 | 141 | printf("3) Remove %d entries: ", MAX_TEST_ENTRIES); 142 | t0 = GetMicroTime(); 143 | for (uint32 i = 0; i < MAX_TEST_ENTRIES; i++) 144 | { 145 | Test* removed = test_uint32.Remove(THashKey32(i)); 146 | assert(removed != 0); 147 | assert(removed->Get() == i); 148 | delete removed; 149 | } 150 | printf(" %10u usec\n\n", int(GetMicroTime() - t0)); 151 | 152 | // THashTrieInt test 153 | THashTrieInt test_hashTrieInt; 154 | 155 | printf("32 bit integer key/value pairs test using THashTrieInt...\n"); 156 | printf("1) Add %d entries: ", MAX_TEST_ENTRIES); 157 | t0 = GetMicroTime(); 158 | for (int32 i = 0; i < MAX_TEST_ENTRIES; i++) 159 | { 160 | auto added = test_hashTrieInt.Add(i); 161 | added->value = i; 162 | } 163 | printf(" %10u usec\n", int(GetMicroTime() - t0)); 164 | 165 | printf("2) Find %d entries: ", MAX_TEST_ENTRIES); 166 | t0 = GetMicroTime(); 167 | for (int32 i = 0; i < MAX_TEST_ENTRIES; i++) 168 | { 169 | volatile auto* find = test_hashTrieInt.Find(i); 170 | assert(find->value == i); 171 | } 172 | printf(" %10u usec\n", int(GetMicroTime() - t0)); 173 | 174 | printf("3) Remove %d entries: ", MAX_TEST_ENTRIES); 175 | t0 = GetMicroTime(); 176 | for (int32 i = 0; i < MAX_TEST_ENTRIES; i++) 177 | { 178 | bool removed = test_hashTrieInt.Remove(i); 179 | assert(removed); 180 | } 181 | printf(" %10u usec\n\n", int(GetMicroTime() - t0)); 182 | 183 | // 184 | // String hash test 185 | // 186 | THashTrie test_str; 187 | 188 | printf("ANSI string test...\n"); 189 | printf("1) Add %d entries: ", MAX_TEST_ENTRIES); 190 | t0 = GetMicroTime(); 191 | for (uint32 i = 0; i < MAX_TEST_ENTRIES; i++) 192 | { 193 | char buffer[16]; 194 | sprintf_s(buffer, "%d", i); 195 | TestStr *test = new TestStr(buffer); 196 | test_str.Add(test); 197 | } 198 | printf(" %10u usec\n", int(GetMicroTime() - t0)); 199 | 200 | printf("2) Find %d entries: ", MAX_TEST_ENTRIES); 201 | t0 = GetMicroTime(); 202 | for (uint32 i = 0; i < MAX_TEST_ENTRIES; i++) 203 | { 204 | char buffer[16]; 205 | sprintf_s(buffer, "%d", i); 206 | TestStr* find = test_str.Find(CHashKeyStrAnsiChar(buffer)); 207 | assert(strcmp(find->GetString(), buffer) == 0); 208 | } 209 | printf(" %10u usec\n", int(GetMicroTime() - t0)); 210 | 211 | printf("3) Remove %d entries: ", MAX_TEST_ENTRIES); 212 | t0 = GetMicroTime(); 213 | for (uint32 i = 0; i < MAX_TEST_ENTRIES; i++) 214 | { 215 | char buffer[16]; 216 | sprintf_s(buffer, "%d", i); 217 | TestStr *removed2 = test_str.Remove(CHashKeyStrAnsiChar(buffer)); 218 | assert(removed2 != 0); 219 | assert(strcmp(removed2->GetString(), buffer) == 0); 220 | delete removed2; 221 | } 222 | printf(" %10u usec\n\n", int(GetMicroTime() - t0)); 223 | } 224 | 225 | int main() 226 | { 227 | TestHashTrie(); 228 | return 0; 229 | } 230 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | #---------------------------------# 2 | # general configuration # 3 | #---------------------------------# 4 | 5 | # version format 6 | version: 1.0.{build} 7 | 8 | # you can use {branch} name in version format too 9 | # version: 1.0.{build}-{branch} 10 | 11 | # branches to build 12 | branches: 13 | # blacklist 14 | except: 15 | - gh-pages 16 | 17 | #---------------------------------# 18 | # environment configuration # 19 | #---------------------------------# 20 | 21 | # Build worker image (VM template) 22 | image: Visual Studio 2017 23 | 24 | init: [] 25 | 26 | install: [] 27 | 28 | #---------------------------------# 29 | # build configuration # 30 | #---------------------------------# 31 | 32 | build_script: 33 | - call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat" 34 | - cmake . -G "Visual Studio 15 2017" 35 | - cmake --build . --config Release 36 | 37 | #---------------------------------# 38 | # tests configuration # 39 | #---------------------------------# 40 | 41 | test_script: 42 | - ctest -C Release -V -------------------------------------------------------------------------------- /cmake/BuildSettings.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Global flags 3 | # 4 | 5 | # Enable C++11 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") 7 | # C++14: set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1y") 8 | 9 | 10 | # 11 | # Platform specific flags 12 | # 13 | if(${WIN32}) 14 | add_definitions(-DWIN32) 15 | endif() 16 | 17 | if(${MSVC}) 18 | 19 | # Use good Visual C++ compiler options for benchmarking 20 | add_definitions(-D_SECURE_SCL=0 -D_HAS_EXCEPTIONS=0) 21 | set(CMAKE_C_FLAGS "/GS- /Zi" CACHE STRING "Common C compiler settings" FORCE) 22 | set(CMAKE_CXX_FLAGS "/GS- /Zi" CACHE STRING "Common C++ compiler settings" FORCE) 23 | set(CMAKE_C_FLAGS_DEBUG "/MTd /Od /D_DEBUG" CACHE STRING "Additional C compiler settings in Debug" FORCE) 24 | set(CMAKE_CXX_FLAGS_DEBUG "/MTd /Od /D_DEBUG" CACHE STRING "Additional C++ compiler settings in Debug" FORCE) 25 | set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Ob2 /GL /DNDEBUG" CACHE STRING "Additional C compiler settings in Release" FORCE) 26 | set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /Ob2 /GL /DNDEBUG" CACHE STRING "Additional C++ compiler settings in Release" FORCE) 27 | set(CMAKE_EXE_LINKER_FLAGS "/DYNAMICBASE:NO /DEBUG /INCREMENTAL:NO" CACHE STRING "Common linker settings" FORCE) 28 | set(CMAKE_EXE_LINKER_FLAGS_DEBUG "" CACHE STRING "Additional linker settings in Debug" FORCE) 29 | set(CMAKE_EXE_LINKER_FLAGS_RELEASE "" CACHE STRING "Additional linker settings in Release" FORCE) 30 | 31 | # Enable debug info in Release. 32 | set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /debug") 33 | 34 | # Set compiler flags and options. 35 | # Here it is setting the Visual Studio warning level to 4 36 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4") 37 | 38 | # /EHsc : Catches C++ exceptions only and tells the compiler to assume that 39 | # functions declared as extern "C" never throw a C++ exception. 40 | set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") 41 | 42 | elseif(${UNIX}) 43 | 44 | # These are all required on Xcode 4.5.1 + iOS, because the defaults are no good. 45 | set(CMAKE_C_FLAGS "-pthread -g") 46 | set(CMAKE_CXX_FLAGS "-pthread -g") 47 | set(CMAKE_C_FLAGS_DEBUG "-O0") 48 | set(CMAKE_CXX_FLAGS_DEBUG "-O0") 49 | set(CMAKE_C_FLAGS_RELEASE "-Os") 50 | set(CMAKE_CXX_FLAGS_RELEASE "-Os") 51 | 52 | endif() 53 | 54 | if(${IOS}) 55 | set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-iphoneos;-iphonesimulator") 56 | set_target_properties(${PROJECT_NAME} PROPERTIES XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY "iPhone Developer") 57 | endif() 58 | 59 | -------------------------------------------------------------------------------- /include/HashTrie.h: -------------------------------------------------------------------------------- 1 | /** 2 | * File: HashTrie.h 3 | * Author: CS Lim 4 | * Purpose: Templates for CPU Cache-aware compact and high performance hash table 5 | * History: 6 | * 2012/5/3: File Created 7 | * 8 | * References: 9 | * - Ideal Hash Trees by Phil Bagwell (This is main paper the orignal ideas came from) 10 | * - Ideal Hash Tries: an implementation in C++ 11 | * http://www.altdevblogaday.com/2011/03/22/ideal-hash-tries-an-implementation-in-c/ 12 | */ 13 | 14 | #ifndef __HASH_TRIE_H__ 15 | #define __HASH_TRIE_H__ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #if _MSC_VER 24 | #include 25 | #endif 26 | 27 | #ifdef _MSC_VER 28 | #define COMPILER_CHECK(expr, msg) typedef char COMPILE_ERROR_##msg[1][(expr)] 29 | #else 30 | #define COMPILER_CHECK(expr, msg) typedef char COMPILE_ERROR_##msg[1][(expr)?1:-1] 31 | #endif 32 | 33 | //=========================================================================== 34 | // Typedefs 35 | //=========================================================================== 36 | typedef unsigned char uint8; 37 | typedef uint16_t uint16; 38 | typedef uint32_t uint32; 39 | typedef uint64_t uint64; 40 | typedef uintptr_t uint_ptr; 41 | typedef signed char int8; 42 | typedef int16_t int16; 43 | typedef int32_t int32; 44 | typedef int64_t int64; 45 | typedef intptr_t int_ptr; 46 | 47 | 48 | /**************************************************************************** 49 | * 50 | * Some bit twiddling helpers 51 | * 52 | * GetBitCount function 53 | * from http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel 54 | * 55 | **/ 56 | 57 | #if defined(_MSC_VER) && SSE42_POPCNT 58 | 59 | inline uint32 GetBitCount(uint32 v) noexcept 60 | { 61 | return __popcnt(v); 62 | } 63 | 64 | #else 65 | 66 | inline uint32 GetBitCount(uint32 v) noexcept 67 | { 68 | v = v - ((v >> 1) & 0x55555555); // reuse input as temporary 69 | v = (v & 0x33333333) + ((v >> 2) & 0x33333333); // temp 70 | return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count 71 | } 72 | 73 | #endif 74 | 75 | #if defined(_MSC_VER) && defined(_WIN64) && SSE42_POPCNT 76 | 77 | inline uint32 GetBitCount(uint64 v) noexcept 78 | { 79 | return (uint32)__popcnt64(v); 80 | } 81 | 82 | #else 83 | 84 | inline uint32 GetBitCount(uint64 v) noexcept 85 | { 86 | uint64 c; 87 | c = v - ((v >> 1) & 0x5555555555555555ull); 88 | c = ((c >> 2) & 0x3333333333333333ull) + (c & 0x3333333333333333ull); 89 | c = ((c >> 4) + c) & 0x0F0F0F0F0F0F0F0Full; 90 | c = ((c >> 8) + c) & 0x00FF00FF00FF00FFull; 91 | c = ((c >> 16) + c) & 0x0000FFFF0000FFFFull; 92 | return uint32((c >> 32) + c) /* & 0x00000000FFFFFFFFull */; 93 | } 94 | #endif 95 | 96 | template 97 | inline T ClearNthSetBit(T v, int idx) noexcept 98 | { 99 | for (T b = v; b;) 100 | { 101 | T lsb = b & ~(b - 1); 102 | if (--idx < 0) 103 | return v ^ lsb; 104 | b ^= lsb; 105 | } 106 | return v; 107 | } 108 | 109 | //=========================================================================== 110 | // Hash function foward declarations 111 | //=========================================================================== 112 | // MurmurHash3 113 | uint32 MurmurHash3_x86_32(const void* key, int len, uint32_t seed) noexcept; 114 | 115 | //=========================================================================== 116 | // THashKey32 117 | // (Helper template class to get 32bit integer hash key value used for POD types) 118 | //=========================================================================== 119 | template 120 | class THashKey32 121 | { 122 | static const uint32 MURMUR_HASH3_SEED = sizeof(T); 123 | public: 124 | THashKey32() noexcept : m_key(0) { } 125 | THashKey32(const T & key) noexcept : m_key(key) { } 126 | inline bool operator==(const THashKey32 & rhs) const noexcept { return m_key == rhs.m_key; } 127 | inline operator T () const noexcept { return m_key; } 128 | inline uint32 GetHash() const noexcept; 129 | inline const T & Get() const noexcept { return m_key; } 130 | inline void Set(const T & key) noexcept { m_key = key; } 131 | 132 | protected: 133 | T m_key; 134 | }; 135 | 136 | /** 137 | * Generic Hash function for POD types 138 | */ 139 | template 140 | inline uint32 THashKey32::GetHash() const noexcept 141 | { 142 | return MurmurHash3_x86_32((const void *)&m_key, sizeof(m_key), MURMUR_HASH3_SEED); 143 | } 144 | 145 | // Integer hash functions based on Thomas Wang's Mix Functions: 146 | // http://www.cris.com/~Ttwang/tech/inthash.htm (unavailable) 147 | // https://gist.github.com/badboy/6267743 148 | 149 | /** 150 | * Specialization for 32 bit interger key 151 | */ 152 | template <> 153 | inline uint32 THashKey32::GetHash() const noexcept 154 | { 155 | uint32 key = (uint32)m_key; 156 | key = ~key + (key << 15); // key = (key << 15) - key - 1; 157 | key ^= (key >> 12); 158 | key += (key << 2); 159 | key ^= (key >> 4); 160 | key *= 2057; // key = (key + (key << 3)) + (key << 11); 161 | key ^= (key >> 16); 162 | return key; 163 | } 164 | 165 | /** 166 | * Specialization for 32 bit unsigned int key 167 | */ 168 | template <> 169 | inline uint32 THashKey32::GetHash() const noexcept 170 | { 171 | uint32 key = m_key; 172 | key = ~key + (key << 15); // key = (key << 15) - key - 1; 173 | key ^= (key >> 12); 174 | key += (key << 2); 175 | key ^= (key >> 4); 176 | key *= 2057; // key = (key + (key << 3)) + (key << 11); 177 | key ^= (key >> 16); 178 | return key; 179 | } 180 | 181 | /** 182 | * Specialization for 64 bit interger key (64 bit to 32 bit hash) 183 | */ 184 | template <> 185 | inline uint32 THashKey32::GetHash() const noexcept 186 | { 187 | uint64 key = (uint64)m_key; 188 | key = (~key) + (key << 18); // key = (key << 18) - key - 1; 189 | key ^= (key >> 31); 190 | key *= 21; // key = (key + (key << 2)) + (key << 4); 191 | key ^= (key >> 11); 192 | key += (key << 6); 193 | key ^= (key >> 22); 194 | return (uint32)key; 195 | } 196 | 197 | /** 198 | * Specialization for 64 bit uint key (64 bit to 32 bit hash) 199 | */ 200 | template <> 201 | inline uint32 THashKey32::GetHash() const noexcept 202 | { 203 | uint64 key = m_key; 204 | key = (~key) + (key << 18); // key = (key << 18) - key - 1; 205 | key ^= (key >> 31); 206 | key *= 21; // key = (key + (key << 2)) + (key << 4); 207 | key ^= (key >> 11); 208 | key += (key << 6); 209 | key ^= (key >> 22); 210 | return (uint32)key; 211 | } 212 | 213 | //=========================================================================== 214 | // String Helper functions 215 | //=========================================================================== 216 | inline size_t StrLen(const char str[]) 217 | { 218 | return strlen(str); 219 | } 220 | 221 | inline size_t StrLen(const wchar_t str[]) 222 | { 223 | return wcslen(str); 224 | } 225 | 226 | inline int StrCmp(const char str1[], const char str2[]) 227 | { 228 | return strcmp(str1, str2); 229 | } 230 | 231 | inline int StrCmp(const wchar_t str1[], const wchar_t str2[]) 232 | { 233 | return wcscmp(str1, str2); 234 | } 235 | 236 | inline int StrCmpI(const char str1[], const char str2[]) 237 | { 238 | return _stricmp(str1, str2); 239 | } 240 | 241 | inline int StrCmpI(const wchar_t str1[], const wchar_t str2[]) 242 | { 243 | return _wcsicmp (str1, str2); 244 | } 245 | 246 | inline char* StrDup(const char str[]) 247 | { 248 | if (str == nullptr) 249 | return nullptr; 250 | 251 | size_t const size = strlen(str) + 1; 252 | char * const memory = static_cast(malloc(size)); 253 | 254 | if (memory == nullptr) 255 | return nullptr; 256 | 257 | strcpy(memory, str); 258 | return memory; 259 | } 260 | 261 | inline wchar_t * StrDup(const wchar_t str[]) 262 | { 263 | if (str == nullptr) 264 | return nullptr; 265 | 266 | size_t const size = wcslen(str) + 1; 267 | wchar_t * const memory = static_cast(malloc(size)); 268 | 269 | if (memory == nullptr) 270 | return nullptr; 271 | 272 | wcscpy(memory, str); 273 | return memory; 274 | } 275 | 276 | 277 | //=========================================================================== 278 | // TStrCmp and TStrCmpI 279 | //=========================================================================== 280 | template 281 | class TStrCmp 282 | { 283 | public: 284 | static int StrCmp(const CharType str1[], const CharType str2[]) 285 | { 286 | return ::StrCmp(str1, str2); 287 | } 288 | }; 289 | 290 | template 291 | class TStrCmpI 292 | { 293 | public: 294 | static int StrCmp(const CharType str1[], const CharType str2[]) 295 | { 296 | return ::StrCmpI(str1, str2); 297 | } 298 | }; 299 | 300 | 301 | //=========================================================================== 302 | // THashKeyStr 303 | //=========================================================================== 304 | template 305 | class THashKeyStrPtr; 306 | 307 | template > 308 | class THashKeyStr 309 | { 310 | public: 311 | bool operator==(const THashKeyStr& rhs) const 312 | { 313 | return (Cmp::StrCmp(m_str, rhs.m_str) == 0); 314 | } 315 | 316 | uint32 GetHash() const 317 | { 318 | if (m_str != nullptr) 319 | { 320 | auto strLen = StrLen(m_str); 321 | return MurmurHash3_x86_32( 322 | (const void *)m_str, 323 | (int)(sizeof(CharType) * strLen), 324 | (int)strLen); // use string length as seed value 325 | } 326 | else 327 | { 328 | return 0; 329 | } 330 | } 331 | const CharType* GetString () const { return m_str; } 332 | 333 | protected: 334 | THashKeyStr() = default; 335 | virtual ~THashKeyStr() { } 336 | 337 | const CharType* m_str{ nullptr }; 338 | 339 | friend class THashKeyStrPtr; 340 | }; 341 | 342 | template > 343 | class THashKeyStrCopy : public THashKeyStr 344 | { 345 | public: 346 | THashKeyStrCopy() noexcept { } 347 | THashKeyStrCopy(const CharType str[]) noexcept { SetString(str); } 348 | ~THashKeyStrCopy() noexcept 349 | { 350 | free(const_cast(THashKeyStr::m_str)); 351 | } 352 | 353 | void SetString(const CharType str[]) 354 | { 355 | free(const_cast(THashKeyStr::m_str)); 356 | THashKeyStr::m_str = str ? StrDup(str) : nullptr; 357 | } 358 | }; 359 | 360 | template > 361 | class THashKeyStrPtr : public THashKeyStr 362 | { 363 | public: 364 | THashKeyStrPtr() noexcept { } 365 | THashKeyStrPtr(const CharType str[]) noexcept { SetString(str); } 366 | THashKeyStrPtr(const THashKeyStr& rhs) noexcept 367 | { 368 | THashKeyStr::m_str = rhs.m_str; 369 | } 370 | 371 | THashKeyStrPtr& operator=(const THashKeyStr& rhs) noexcept 372 | { 373 | THashKeyStr::m_str = rhs.m_str; 374 | return *this; 375 | } 376 | THashKeyStrPtr& operator=(const THashKeyStrPtr& rhs) noexcept 377 | { 378 | THashKeyStr::m_str = rhs.m_str; 379 | return *this; 380 | } 381 | 382 | void SetString(const CharType str[]) noexcept 383 | { 384 | THashKeyStr::m_str = str; 385 | } 386 | }; 387 | 388 | // Typedefs for convenience 389 | typedef THashKeyStrCopy CHashKeyStr; 390 | typedef THashKeyStrPtr CHashKeyStrPtr; 391 | typedef THashKeyStrCopy CHashKeyStrAnsiChar; 392 | typedef THashKeyStrPtr CHashKeyStrPtrAnsiChar; 393 | 394 | typedef THashKeyStrCopy> CHashKeyStrI; 395 | typedef THashKeyStrPtr > CHashKeyStrPtrI; 396 | typedef THashKeyStrCopy> CHashKeyStrAnsiCharI; 397 | typedef THashKeyStrPtr > CHashKeyStrPtrAnsiCharI; 398 | 399 | 400 | /**************************************************************************** 401 | * 402 | * THashTrie 403 | * 404 | * Template class for HAMT (Hash Array Mapped Trie) 405 | * 406 | **/ 407 | 408 | template 409 | class THashTrie final 410 | { 411 | private: 412 | // Use the least significant bit as reference marker 413 | static constexpr uint_ptr AMT_MARK_BIT = 1; // Using LSB for marking AMT (sub-trie) data structure 414 | static constexpr uint32 HASH_INDEX_BITS = 5; 415 | static constexpr uint32 HASH_INDEX_MASK = (1 << HASH_INDEX_BITS) - 1; 416 | // Ceiling to 8 bits boundary to use all 32 bits. 417 | static constexpr uint32 MAX_HASH_BITS = ((sizeof(uint32) * 8 + 7) / HASH_INDEX_BITS) * HASH_INDEX_BITS; // 35 418 | static constexpr uint32 MAX_HAMT_DEPTH = MAX_HASH_BITS / HASH_INDEX_BITS; // = 7 419 | 420 | private: 421 | // Each Node entry in the hash table is either terminal (leaf) node 422 | // (a T pointer) or AMT data structure. 423 | // 424 | // If a node pointer's LSB == 1 then AMT 425 | // else T object pointer 426 | // 427 | // A one bit in the bit map represents a valid arc, while a zero an empty arc. 428 | // The pointers in the table are kept in sorted order and correspond to 429 | // the order of each one bit in the bit map. 430 | 431 | struct ArrayMappedTrie 432 | { 433 | uint32 m_bitmap; 434 | T* m_subHash[1]; 435 | // Do not add more data below 436 | // New data should be added before m_subHash 437 | 438 | inline T** Lookup(uint32 hashIndex); 439 | inline T** LookupLinear(const K& key); 440 | 441 | static T** Alloc1(uint32 bitIndex, T** slotToReplace); 442 | static T** Alloc2(uint32 hashIndex, T* node, uint32 oldHashIndex, T* oldNode, T** slotToReplace); 443 | static T** Alloc2Linear(T* node, T* oldNode, T** slotToReplace); 444 | 445 | static ArrayMappedTrie* Insert(ArrayMappedTrie* amt, uint32 hashIndex, T* node, T** slotToReplace) noexcept; 446 | static ArrayMappedTrie* AppendLinear(ArrayMappedTrie* amt, T* node, T** slotToReplace) noexcept; 447 | static ArrayMappedTrie* Resize(ArrayMappedTrie* amt, int oldSize, int deltasize, int idx) noexcept; 448 | 449 | static void ClearAll(ArrayMappedTrie* amt, uint32 depth=0) noexcept; 450 | static void DestroyAll(ArrayMappedTrie* amt, uint32 depth=0) noexcept; 451 | }; 452 | 453 | // Root Hash Table 454 | T* m_root{ nullptr }; 455 | uint32 m_count{ 0 }; 456 | 457 | public: 458 | THashTrie() = default; 459 | ~THashTrie() noexcept { Clear(); } 460 | THashTrie(THashTrie&&) = delete; 461 | THashTrie(THashTrie const&) = delete; 462 | THashTrie& operator=(THashTrie const&) = delete; 463 | 464 | public: 465 | void Add(T* node); 466 | T* Find(const K& key) noexcept; 467 | T* Remove(const K& key) noexcept; 468 | bool Empty() noexcept; 469 | uint32 GetCount() noexcept { return m_count; } 470 | void Clear() noexcept; // Destruct HAMT data structures only 471 | void Destroy(); // Destruct HAMT data structures as well as containing objects 472 | }; 473 | 474 | 475 | /**************************************************************************** 476 | * 477 | * THashTrie 478 | * 479 | * Specialized HashTrie template for int type key/value pair. 480 | * Add/Find methods returns Cell data type. 481 | * 482 | **/ 483 | 484 | template 485 | class THashTrieInt final 486 | { 487 | public: 488 | // Cell contains both key and value (int) 489 | struct Cell : THashKey32 490 | { 491 | Cell(T key) noexcept : THashKey32(key) { } 492 | T value{ 0 }; 493 | }; 494 | 495 | private: 496 | THashTrie> m_hashtable; 497 | 498 | public: 499 | THashTrieInt() noexcept = default; 500 | ~THashTrieInt() noexcept = default; 501 | 502 | public: 503 | Cell* Add(T key); 504 | Cell* Find(T key) noexcept { return m_hashtable.Find(key); } 505 | bool Remove(T key) noexcept; 506 | uint32 GetCount() noexcept { return m_hashtable.GetCount(); } 507 | void Clear() noexcept { m_hashtable.Clear(); } 508 | void Destroy() { m_hashtable.Destroy(); } 509 | }; 510 | 511 | template 512 | typename THashTrieInt::Cell* THashTrieInt::Add(T key) 513 | { 514 | static_assert(std::is_integral::value, "Integer required."); 515 | 516 | auto cell = new Cell(key); 517 | m_hashtable.Add(cell); 518 | return cell; 519 | } 520 | 521 | template 522 | bool THashTrieInt::Remove(T key) noexcept 523 | { 524 | auto removed = m_hashtable.Remove(THashKey32(key)); 525 | delete removed; 526 | return removed != nullptr; 527 | } 528 | 529 | 530 | //=========================================================================== 531 | // THashTrie::ArrayMappedTrie Implementation 532 | //=========================================================================== 533 | 534 | // helpers to search for a given entry 535 | // this function counts bits in order to return the correct slot for a given hash 536 | template 537 | T** THashTrie::ArrayMappedTrie::Lookup(uint32 hashIndex) 538 | { 539 | assert(hashIndex < (1 << HASH_INDEX_BITS)); 540 | const uint32 bitPos = (uint32)1 << hashIndex; 541 | if ((m_bitmap & bitPos) == 0) 542 | return nullptr; 543 | else 544 | return &m_subHash[GetBitCount(m_bitmap & (bitPos - 1))]; 545 | } 546 | 547 | template 548 | T** THashTrie::ArrayMappedTrie::LookupLinear(const K & key) 549 | { 550 | // Linear search 551 | T** cur = m_subHash; 552 | T** end = m_subHash + m_bitmap; 553 | for (; cur < end; cur++) 554 | { 555 | if (**cur == key) 556 | return cur; 557 | } 558 | // Not found 559 | return nullptr; 560 | } 561 | 562 | template 563 | T** THashTrie::ArrayMappedTrie::Alloc1(uint32 bitIndex, T** slotToReplace) 564 | { 565 | // Assert (0 <= bitIndex && bitIndex < 31); 566 | ArrayMappedTrie * amt = (ArrayMappedTrie *)malloc(sizeof(ArrayMappedTrie)); 567 | if (!amt) 568 | throw std::bad_alloc(); 569 | 570 | amt->m_bitmap = 1 << bitIndex; 571 | *slotToReplace = (T *)((uint_ptr)amt | AMT_MARK_BIT); 572 | return amt->m_subHash; 573 | } 574 | 575 | template 576 | T** THashTrie::ArrayMappedTrie::Alloc2( 577 | uint32 hashIndex, 578 | T* node, 579 | uint32 oldHashIndex, 580 | T* oldNode, 581 | T** slotToReplace) 582 | { 583 | // Allocates a node with room for 2 elements 584 | ArrayMappedTrie* amt = (ArrayMappedTrie *)malloc(sizeof(ArrayMappedTrie) + sizeof(T*)); 585 | if (!amt) 586 | throw std::bad_alloc(); 587 | 588 | amt->m_bitmap = ((uint32)1 << hashIndex) | ((uint32)1 << oldHashIndex); 589 | 590 | // Sort them in order and return new node 591 | if (hashIndex < oldHashIndex) 592 | { 593 | amt->m_subHash[0] = node; 594 | amt->m_subHash[1] = oldNode; 595 | } 596 | else 597 | { 598 | amt->m_subHash[0] = oldNode; 599 | amt->m_subHash[1] = node; 600 | } 601 | 602 | *slotToReplace = (T *)((uint_ptr)amt | AMT_MARK_BIT);; 603 | return amt->m_subHash; 604 | } 605 | 606 | template 607 | T** THashTrie::ArrayMappedTrie::Alloc2Linear(T* node, T* oldNode, T** slotToReplace) 608 | { 609 | // Allocates a node with room for 2 elements 610 | ArrayMappedTrie* amt = (ArrayMappedTrie *)malloc(sizeof(ArrayMappedTrie) + sizeof(T *)); 611 | if (amt == nullptr) 612 | throw std::bad_alloc(); 613 | 614 | amt->m_bitmap = 2; // Number of entry in the linear search array 615 | amt->m_subHash[0] = node; 616 | amt->m_subHash[1] = oldNode; 617 | *slotToReplace = (T *)((uint_ptr)amt | AMT_MARK_BIT); 618 | return amt->m_subHash; 619 | } 620 | 621 | template 622 | typename THashTrie::ArrayMappedTrie* 623 | THashTrie::ArrayMappedTrie::Insert(ArrayMappedTrie* amt, uint32 hashIndex, T* node, T** slotToReplace) noexcept 624 | { 625 | uint32 bitPos = (uint32)1 << hashIndex; 626 | assert((amt->m_bitmap & bitPos) == 0); 627 | 628 | uint32 numBitsBelow = GetBitCount(amt->m_bitmap & (bitPos - 1)); 629 | ArrayMappedTrie* newAmt = Resize(amt, GetBitCount(amt->m_bitmap), 1, numBitsBelow); 630 | if (newAmt == nullptr) 631 | return nullptr; 632 | amt = newAmt; 633 | amt->m_bitmap |= bitPos; 634 | amt->m_subHash[numBitsBelow] = node; 635 | *slotToReplace = (T *)((uint_ptr)amt | AMT_MARK_BIT); 636 | return amt; 637 | } 638 | 639 | template 640 | typename THashTrie::ArrayMappedTrie* 641 | THashTrie::ArrayMappedTrie::AppendLinear(ArrayMappedTrie* amt, T* node, T** slotToReplace) noexcept 642 | { 643 | ArrayMappedTrie* newAmt = Resize(amt, amt->m_bitmap, 1, amt->m_bitmap); 644 | if (newAmt == nullptr) 645 | return nullptr; 646 | amt = newAmt; 647 | amt->m_subHash[amt->m_bitmap] = node; 648 | amt->m_bitmap++; 649 | *slotToReplace = (T *)((uint_ptr)amt | AMT_MARK_BIT); 650 | return amt; 651 | } 652 | 653 | // memory allocation all in this function. (re)allocates n, 654 | // copies old m_data, and inserts space at index 'idx' 655 | template 656 | typename THashTrie::ArrayMappedTrie* 657 | THashTrie::ArrayMappedTrie::Resize(ArrayMappedTrie* amt, int oldSize, int deltaSize, int idx) noexcept 658 | { 659 | assert(deltaSize != 0); 660 | int newSize = oldSize + deltaSize; 661 | assert(newSize > 0); 662 | 663 | // if it shrinks then (idx + deltasize, idx) will be removed 664 | if (deltaSize < 0) 665 | { 666 | memmove(amt->m_subHash + idx, amt->m_subHash + idx - deltaSize, (newSize - idx) * sizeof(T *)); 667 | } 668 | 669 | ArrayMappedTrie* newAmt = (ArrayMappedTrie *)realloc(amt, sizeof(ArrayMappedTrie) + (newSize - 1) * sizeof(T*)); 670 | if (newAmt != nullptr) 671 | { 672 | amt = newAmt; 673 | } 674 | else 675 | { 676 | // if it tried to shrink amt memory and that failed then keep using original memory. 677 | if (deltaSize > 0) 678 | return nullptr; 679 | } 680 | 681 | // If it grows then (idx, idx + deltasize) will be inserted 682 | if (deltaSize > 0) 683 | { 684 | memmove(amt->m_subHash + idx + deltaSize, amt->m_subHash + idx, (oldSize - idx) * sizeof(T *)); // shuffle tail to make room 685 | } 686 | 687 | return amt; 688 | } 689 | 690 | 691 | /* 692 | * Destroy HashTrie including pertaining sub-tries 693 | * NOTE: It does NOT destroy the objects in leaf nodes. 694 | */ 695 | template 696 | void THashTrie::ArrayMappedTrie::ClearAll( 697 | ArrayMappedTrie* amt, 698 | uint32 depth) noexcept 699 | { 700 | // If this is a leaf node, do nothing 701 | if (((uint_ptr)amt & AMT_MARK_BIT) == 0) 702 | return; 703 | 704 | amt = (ArrayMappedTrie *)((uint_ptr)amt & (~AMT_MARK_BIT)); 705 | if (depth < MAX_HAMT_DEPTH) 706 | { 707 | T** cur = amt->m_subHash; 708 | T** end = amt->m_subHash + GetBitCount(amt->m_bitmap); 709 | for (; cur < end; cur++) 710 | ClearAll((ArrayMappedTrie *)*cur, depth + 1); 711 | } 712 | 713 | free(amt); 714 | } 715 | 716 | /* 717 | * Destroy HashTrie including pertaining sub-tries and containing objects 718 | * NOTE: It DOES destory (delete) the objects in leaf nodes. 719 | */ 720 | template 721 | void THashTrie::ArrayMappedTrie::DestroyAll(ArrayMappedTrie* amt, uint32 depth) noexcept 722 | { 723 | // If this is a leaf node just destroy the conatining object T 724 | if (((uint_ptr)amt & AMT_MARK_BIT) == 0) 725 | { 726 | delete ((T *)amt); 727 | return; 728 | } 729 | 730 | amt = (ArrayMappedTrie *)((uint_ptr)amt & (~AMT_MARK_BIT)); 731 | if (depth < MAX_HAMT_DEPTH) 732 | { 733 | T** cur = amt->m_subHash; 734 | T** end = amt->m_subHash + GetBitCount(amt->m_bitmap); 735 | for (; cur < end; cur++) 736 | DestroyAll((ArrayMappedTrie *)*cur, depth + 1); 737 | } 738 | else 739 | { 740 | T** cur = amt->m_subHash; 741 | T** end = amt->m_subHash + amt->m_bitmap; 742 | for (; cur < end; cur++) 743 | delete ((T *)*cur); 744 | } 745 | 746 | free(amt); 747 | } 748 | 749 | 750 | #if _MSC_VER 751 | inline bool HasAMTMarkBit(uint_ptr ptr) noexcept 752 | { 753 | return _bittest((const long *)&ptr, 0) != 0; 754 | } 755 | #else 756 | inline bool HasAMTMarkBit(uint_ptr ptr) { 757 | return (ptr & 1) != 0; 758 | } 759 | #endif 760 | 761 | //=========================================================================== 762 | // THashTrie Implementation 763 | //=========================================================================== 764 | 765 | template 766 | inline void THashTrie::Add(T* node) 767 | { 768 | // If hash trie is empty just add value/pair node and set it as root 769 | if (Empty()) 770 | { 771 | m_root = node; 772 | m_count++; 773 | return; 774 | } 775 | 776 | // Get hash value 777 | uint32 hash = node->GetHash(); 778 | uint32 bitShifts = 0; 779 | T** slot = &m_root; // First slot is the root node 780 | for (;;) 781 | { 782 | // Leaf node (a T node pointer)? 783 | if (!HasAMTMarkBit((uint_ptr)*slot)) 784 | { 785 | // Replace if a node already exists with same key. 786 | // Caller is responsible for checking if a different object 787 | // with same key already exists and prevent memory leak. 788 | if (**slot == *node) 789 | { 790 | *slot = node; 791 | return; 792 | } 793 | 794 | // Hash collision detected: 795 | // Replace this leaf with an AMT node to resolve the collision. 796 | // The existing key must be replaced with a sub-hash table and 797 | // the next 5 bit hash of the existing key computed. If there is still 798 | // a collision then this process is repeated until no collision occurs. 799 | // The existing key is then inserted in the new sub-hash table and 800 | // the new key added. 801 | 802 | T* oldNode = *slot; 803 | uint32 oldHash = oldNode->GetHash() >> bitShifts; 804 | 805 | // As long as the hashes match, we have to create single element 806 | // AMT internal nodes. this loop is hopefully nearly always run 0 time. 807 | while (bitShifts < MAX_HASH_BITS && (oldHash & HASH_INDEX_MASK) == (hash & HASH_INDEX_MASK)) 808 | { 809 | slot = ArrayMappedTrie::Alloc1(hash & HASH_INDEX_MASK, slot); 810 | bitShifts += HASH_INDEX_BITS; 811 | hash >>= HASH_INDEX_BITS; 812 | oldHash >>= HASH_INDEX_BITS; 813 | } 814 | 815 | if (bitShifts < MAX_HASH_BITS) 816 | { 817 | ArrayMappedTrie::Alloc2( 818 | hash & HASH_INDEX_MASK, 819 | node, 820 | oldHash & HASH_INDEX_MASK, 821 | oldNode, 822 | slot); 823 | } 824 | else 825 | { 826 | // Consumed all hash bits, alloc and init a linear search table 827 | ArrayMappedTrie::Alloc2Linear(node, oldNode, slot); 828 | } 829 | 830 | m_count++; 831 | break; 832 | } 833 | 834 | // 835 | // It's an Array Mapped Trie (sub-trie) 836 | // 837 | ArrayMappedTrie* amt = (ArrayMappedTrie *)((uint_ptr)*slot & (~AMT_MARK_BIT)); 838 | T** childSlot; 839 | if (bitShifts >= MAX_HASH_BITS) 840 | { 841 | // Consumed all hash bits. Add to the linear search array. 842 | childSlot = amt->LookupLinear(*node); 843 | if (childSlot == nullptr) 844 | { 845 | ArrayMappedTrie::AppendLinear(amt, node, slot); 846 | m_count++; 847 | } 848 | else 849 | { 850 | *slot = node; // If the same key node already exists then replace 851 | } 852 | break; 853 | } 854 | 855 | childSlot = amt->Lookup(hash & HASH_INDEX_MASK); 856 | if (childSlot == nullptr) 857 | { 858 | amt = ArrayMappedTrie::Insert( 859 | amt, 860 | hash & HASH_INDEX_MASK, 861 | node, 862 | slot); 863 | m_count++; 864 | break; 865 | } 866 | 867 | // Go to next sub-trie level 868 | slot = childSlot; 869 | bitShifts += HASH_INDEX_BITS; 870 | hash >>= HASH_INDEX_BITS; 871 | } // for (;;) 872 | } 873 | 874 | template 875 | T* THashTrie::Find(const K & key) noexcept 876 | { 877 | // Hash trie is empty? 878 | if (Empty()) 879 | return nullptr; 880 | 881 | // Get hash value 882 | uint32 hash = key.GetHash(); 883 | uint32 bitShifts = 0; 884 | const T* slot = m_root; // First slot is the root node 885 | for (;;) 886 | { 887 | // Leaf node (a T node pointer)? 888 | if (((uint_ptr)slot & AMT_MARK_BIT) == 0) 889 | return (*slot == key) ? (T *)slot : nullptr; 890 | 891 | // 892 | // It's an Array Mapped Trie (sub-trie) 893 | // 894 | ArrayMappedTrie * amt = (ArrayMappedTrie *)((uint_ptr)slot & (~AMT_MARK_BIT)); 895 | if (bitShifts >= MAX_HASH_BITS) 896 | { 897 | // Consumed all hash bits. Run linear search. 898 | T** linearSlot = amt->LookupLinear(key); 899 | return (linearSlot != nullptr) ? *(linearSlot) : nullptr; 900 | } 901 | 902 | T** childSlot = amt->Lookup(hash & HASH_INDEX_MASK); 903 | if (childSlot == nullptr) 904 | return nullptr; 905 | 906 | // Go to next sub-trie level 907 | slot = *childSlot; 908 | bitShifts += HASH_INDEX_BITS; 909 | hash >>= HASH_INDEX_BITS; 910 | } 911 | } 912 | 913 | template 914 | T* THashTrie::Remove(const K & key) noexcept 915 | { 916 | T** slots[MAX_HAMT_DEPTH + 2]; 917 | slots[0] = &m_root; 918 | 919 | ArrayMappedTrie* amts[MAX_HAMT_DEPTH + 2]; 920 | amts[0] = nullptr; 921 | 922 | uint32 hash = key.GetHash(); 923 | // 924 | // First find the leaf node that we want to delete 925 | // 926 | int depth = 0; 927 | for (; depth <= MAX_HAMT_DEPTH; ++depth, hash >>= HASH_INDEX_BITS) 928 | { 929 | // Leaf node? 930 | if (((uint_ptr)*slots[depth] & AMT_MARK_BIT) == 0) 931 | { 932 | amts[depth] = nullptr; 933 | if (!(**slots[depth] == key)) 934 | return nullptr; 935 | break; 936 | } 937 | else 938 | { 939 | // It's an AMT node 940 | ArrayMappedTrie* amt = amts[depth] = (ArrayMappedTrie *)((uint_ptr)*slots[depth] & (~AMT_MARK_BIT)); 941 | slots[depth + 1] = (depth >= MAX_HAMT_DEPTH) ? amt->LookupLinear(key) : amt->Lookup(hash & HASH_INDEX_MASK); 942 | if (slots[depth + 1] == nullptr) 943 | return nullptr; 944 | } 945 | } 946 | 947 | // Get the node will be returned 948 | T* ret = *slots[depth]; 949 | 950 | // we are going to have to delete an entry from the internal node at amts[depth] 951 | while (--depth >= 0) 952 | { 953 | int oldsize = depth >= MAX_HAMT_DEPTH ? (int)(amts[depth]->m_bitmap) : (int)(GetBitCount(amts[depth]->m_bitmap)); 954 | int oldidx = (int)(slots[depth + 1] - amts[depth]->m_subHash); 955 | 956 | // the second condition is that the remaining entry is a leaf 957 | if (oldsize == 2 && ((uint_ptr)(amts[depth]->m_subHash[!oldidx]) & AMT_MARK_BIT) == 0) 958 | { 959 | // we no longer need this node; just fold the remaining entry, 960 | // which must be a leaf, into the parent and free this node 961 | *(slots[depth]) = amts[depth]->m_subHash[!oldidx]; 962 | free(amts[depth]); 963 | break; 964 | } 965 | 966 | // resize this node down by a bit, and update the m_usedBitMap bitfield 967 | if (oldsize > 1) 968 | { 969 | // Shrinking AMT won't fail. 970 | ArrayMappedTrie * amt = ArrayMappedTrie::Resize(amts[depth], oldsize, -1, oldidx); 971 | amt->m_bitmap = (depth >= MAX_HAMT_DEPTH) ? (amt->m_bitmap - 1) : ClearNthSetBit(amt->m_bitmap, oldidx); 972 | *(slots[depth]) = (T *)((uint_ptr)amt | AMT_MARK_BIT); // update the parent slot to point to the resized node 973 | break; 974 | } 975 | 976 | free(amts[depth]); // oldsize==1. delete this node, and then loop to kill the parent too! 977 | } 978 | 979 | // No node exists in the HashTrie any more 980 | if (depth < 0) 981 | m_root = nullptr; 982 | 983 | m_count--; 984 | return ret; 985 | } 986 | 987 | template 988 | inline bool THashTrie::Empty() noexcept 989 | { 990 | return m_root == nullptr; 991 | } 992 | 993 | template 994 | inline void THashTrie::Clear() noexcept 995 | { 996 | if (!Empty()) 997 | { 998 | ArrayMappedTrie::ClearAll((ArrayMappedTrie *)m_root); 999 | 1000 | // HashTrie is now empty 1001 | m_root = nullptr; 1002 | } 1003 | } 1004 | 1005 | template 1006 | inline void THashTrie::Destroy() 1007 | { 1008 | if (!Empty()) 1009 | { 1010 | ArrayMappedTrie::DestroyAll((ArrayMappedTrie *)m_root); 1011 | 1012 | // HashTrie is now empty 1013 | m_root = nullptr; 1014 | } 1015 | } 1016 | 1017 | #endif // if __HASH_TRIE_H__ 1018 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(HAMT) 2 | 3 | if (MSVC) 4 | add_definitions(-DHAMT_WIN) 5 | endif() 6 | 7 | include_directories(../include) 8 | include(../cmake/BuildSettings.cmake) 9 | 10 | file(GLOB SRCFILES *.cpp) 11 | file(GLOB INCFILES ../include/*.h) 12 | add_library (HAMT ${SRCFILES} ${INCFILES} ) 13 | 14 | set_target_properties(${PROJECT_NAME} PROPERTIES 15 | CXX_STANDARD 11 16 | CXX_STANDARD_REQUIRED ON 17 | COMPILE_DEFINITIONS "$<$:_SCL_SECURE_NO_WARNINGS>" 18 | COMPILE_OPTIONS "$<$:/EHsc>" 19 | ) 20 | -------------------------------------------------------------------------------- /src/HashTrie.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * File: HashTrie.cpp 3 | * Author: CS Lim 4 | * Purpose: Templates for CPU Cache-aware compact and high performance hash table 5 | * History: 6 | * 2012/5/3: File Created 7 | * 8 | */ 9 | 10 | #include 11 | #include 12 | 13 | //=========================================================================== 14 | // START of MurMurHash3 code 15 | //=========================================================================== 16 | 17 | // MurMurHash3 code is under MIT license and more info can be found at 18 | // http://code.google.com/p/smhasher/ 19 | 20 | // Microsoft Visual Studio 21 | 22 | #if defined(_MSC_VER) 23 | 24 | #define FORCE_INLINE __forceinline 25 | 26 | #include 27 | 28 | #define ROTL32(x,y) _rotl(x,y) 29 | #define ROTL64(x,y) _rotl64(x,y) 30 | 31 | #define BIG_CONSTANT(x) (x) 32 | 33 | // Other compilers 34 | 35 | #else // defined(_MSC_VER) 36 | 37 | #define FORCE_INLINE __attribute__((always_inline)) 38 | 39 | inline uint32_t rotl32( uint32_t x, int8_t r ) noexcept 40 | { 41 | return (x << r) | (x >> (32 - r)); 42 | } 43 | 44 | inline uint64_t rotl64( uint64_t x, int8_t r ) noexcept 45 | { 46 | return (x << r) | (x >> (64 - r)); 47 | } 48 | 49 | #define ROTL32(x,y) rotl32(x,y) 50 | #define ROTL64(x,y) rotl64(x,y) 51 | 52 | #define BIG_CONSTANT(x) (x##LLU) 53 | 54 | #endif // !defined(_MSC_VER) 55 | 56 | //----------------------------------------------------------------------------- 57 | // Block read - if your platform needs to do endian-swapping or can only 58 | // handle aligned reads, do the conversion here 59 | 60 | FORCE_INLINE uint32_t getblock( const uint32_t * p, int i ) noexcept 61 | { 62 | return p[i]; 63 | } 64 | 65 | FORCE_INLINE uint64_t getblock( const uint64_t * p, int i ) noexcept 66 | { 67 | return p[i]; 68 | } 69 | 70 | //----------------------------------------------------------------------------- 71 | // Finalization mix - force all bits of a hash block to avalanche 72 | 73 | FORCE_INLINE uint32_t fmix(uint32_t h) noexcept 74 | { 75 | h ^= h >> 16; 76 | h *= 0x85ebca6b; 77 | h ^= h >> 13; 78 | h *= 0xc2b2ae35; 79 | h ^= h >> 16; 80 | 81 | return h; 82 | } 83 | 84 | //---------- 85 | 86 | FORCE_INLINE uint64_t fmix(uint64_t k) noexcept 87 | { 88 | k ^= k >> 33; 89 | k *= BIG_CONSTANT(0xff51afd7ed558ccd); 90 | k ^= k >> 33; 91 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); 92 | k ^= k >> 33; 93 | 94 | return k; 95 | } 96 | 97 | uint32 MurmurHash3_x86_32(const void* key, int len, uint32_t seed) noexcept 98 | { 99 | const uint8_t * data = (const uint8_t *)key; 100 | const int nblocks = len / 4; 101 | 102 | uint32_t h1 = seed; 103 | 104 | uint32_t c1 = 0xcc9e2d51; 105 | uint32_t c2 = 0x1b873593; 106 | 107 | //---------- 108 | // body 109 | 110 | const uint32_t * blocks = (const uint32_t *)(data + nblocks * 4); 111 | 112 | for (int i = -nblocks; i; i++) 113 | { 114 | uint32_t k1 = getblock(blocks,i); 115 | 116 | k1 *= c1; 117 | k1 = ROTL32(k1,15); 118 | k1 *= c2; 119 | 120 | h1 ^= k1; 121 | h1 = ROTL32(h1, 13); 122 | h1 = h1 * 5 + 0xe6546b64; 123 | } 124 | 125 | //---------- 126 | // tail 127 | 128 | const uint8_t * tail = (const uint8_t*)(data + nblocks * 4); 129 | 130 | uint32_t k1 = 0; 131 | 132 | switch(len & 3) 133 | { 134 | case 3: k1 ^= tail[2] << 16; 135 | case 2: k1 ^= tail[1] << 8; 136 | case 1: k1 ^= tail[0]; 137 | k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; 138 | }; 139 | 140 | //---------- 141 | // finalization 142 | 143 | h1 ^= len; 144 | 145 | h1 = fmix(h1); 146 | 147 | return h1; 148 | } 149 | 150 | //=========================================================================== 151 | // END of MurMurHash3 code 152 | //=========================================================================== 153 | --------------------------------------------------------------------------------