├── .gitattributes ├── LICENSE ├── README.md └── src ├── Hash.compute └── Test.cs /.gitattributes: -------------------------------------------------------------------------------- 1 | *.compute linguist-language=HLSL 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SimpleComputeShaderHashTable 2 | 3 | ![hash](https://user-images.githubusercontent.com/68340554/128619452-65042a29-9174-4a14-a0ba-efc0abb0f598.PNG) 4 | 5 | # About This Project 6 | This project is a Unity Compute Shader implementation of a [simple GPU hash table written by David Farell](https://github.com/nosferalatu/SimpleGPUHashTable) which in turn is based on [Cliff Click's hash table](https://preshing.com/20130605/the-worlds-simplest-lock-free-hash-table/). It uses the [MurmurHash3 function by Austin Appleby](https://github.com/aappleby/smhasher). All of the above works are in the public domain, and free to use, as is this project. 7 | 8 | This code implements a lock free hash table using linear probing, and achieves thread safety using an atomic function, `InterlockedCompareExchange()`, to insert key/values into the table. Because it uses linear probing, the table is cache-effecient, but performance quickly degrades as the load factor increases. 9 | 10 | # Important notes 11 | * The table uses 32bit keys and 32bit values. 12 | * Because we use bitwise AND to cycle through the table when probing, the size of the table must be a power of 2. 13 | * It reserves 0xffffffff as an empty sentinel value for both keys and values. 14 | * I have not included a resizing function, but it would operate exactly like you would expect. A general outline would be something like: cycle through the values of the hashbuffer, and then rehash any non-empty value into the new table. 15 | 16 | # To use this project 17 | To use this project, simply add `Hash.compute` and `Test.cs` to an existing Unity project, attach `Test.cs` to a gameobject in the editor, and attach `Hash.compute` to `Test.cs`. 18 | * If you want to verify that the HashTable is working properly, tick `Validation` on the gameobject. 19 | * If you want to get a text output if there are validation errors, tick `Validation Text` though be careful because this will cause a lot of lag on large inputs (1 mil +). 20 | * If you want to test the speed of the HashTable, untick `Validation.` The validation and speed testing is mutually exclusive in this demo. 21 | * These scripts were written in a Unity project version 2021.1.5f1, but can probably be used in older versions so long as it supports compute shaders. 22 | 23 | # To Learn More 24 | If you want learn more about this hash table and how it was designed I would highly encourage reading [David Farell's blog post](https://nosferalatu.com/SimpleGPUHashTable.html). If you want to learn more about GPU powered hash tables in general see this scholarly article [WarpCore: A Library for fast Hash Tables on GPUs](https://arxiv.org/pdf/2009.07914.pdf). 25 | -------------------------------------------------------------------------------- /src/Hash.compute: -------------------------------------------------------------------------------- 1 | #pragma kernel Initialize 2 | #pragma kernel Insert 3 | #pragma kernel Lookup 4 | #pragma kernel Delete 5 | 6 | #define THREAD_BLOCKS 256 7 | #define GROUP_SIZE 64 8 | #define EMPTY 0xffffffff 9 | 10 | RWStructuredBuffer b_hash; 11 | RWStructuredBuffer b_inputOutput; 12 | 13 | extern uint e_hashBufferSize; 14 | extern uint e_inputSize; 15 | 16 | //******************************** 17 | // Murmurhash3 32Bit Hash Function 18 | // Author: Austin Appleby 19 | // Lastmod: 1/8/2016 20 | // Liscence: MIT License 21 | // Added 8/4/2021 22 | uint hash(uint k, uint bufferSize) 23 | { 24 | k ^= k >> 16; 25 | k *= 0x85ebca6b; 26 | k ^= k >> 13; 27 | k *= 0xc2b2ae35; 28 | k ^= k >> 16; 29 | return k & (bufferSize - 1); 30 | } 31 | 32 | void HashInsert(RWStructuredBuffer hashBuffer, uint key, uint value, uint bufferSize) 33 | { 34 | uint slot = hash(key, bufferSize); 35 | while (true) 36 | { 37 | uint prev; 38 | InterlockedCompareExchange(hashBuffer[slot].x, EMPTY, key, prev); 39 | if (prev == EMPTY || prev == key) 40 | { 41 | hashBuffer[slot].y = value; 42 | return; 43 | } 44 | slot = (slot + 1) & (bufferSize - 1); 45 | } 46 | } 47 | 48 | uint HashLookup(RWStructuredBuffer hashBuffer, uint key, uint bufferSize) 49 | { 50 | uint slot = hash(key, bufferSize); 51 | while (true) 52 | { 53 | if (hashBuffer[slot].x == key) 54 | { 55 | return hashBuffer[slot].y; 56 | } 57 | if (hashBuffer[slot].x == EMPTY) 58 | { 59 | return EMPTY; 60 | } 61 | slot = (slot + 1) & (bufferSize - 1); 62 | } 63 | return EMPTY; 64 | } 65 | 66 | void HashDelete(RWStructuredBuffer hashBuffer, uint key, uint bufferSize) 67 | { 68 | uint slot = hash(key, bufferSize); 69 | while (true) 70 | { 71 | if (hashBuffer[slot].x == EMPTY) 72 | { 73 | return; 74 | } 75 | if (hashBuffer[slot].x == key) 76 | { 77 | hashBuffer[slot].y = EMPTY; 78 | return; 79 | } 80 | slot = (slot + 1) & (bufferSize - 1); 81 | } 82 | } 83 | 84 | //In order for the Hash Table to work, it must be initialized to the empty sentinel value 85 | [numthreads(GROUP_SIZE, 1, 1)] 86 | void Initialize(uint3 id : SV_DispatchThreadID) 87 | { 88 | for (uint i = id.x; i < e_hashBufferSize; i += GROUP_SIZE * THREAD_BLOCKS) 89 | { 90 | b_hash[i] = uint2(EMPTY, EMPTY); 91 | } 92 | } 93 | 94 | [numthreads(GROUP_SIZE, 1, 1)] 95 | void Insert(uint3 id : SV_DispatchThreadID) 96 | { 97 | for (uint i = id.x; i < e_inputSize; i += GROUP_SIZE * THREAD_BLOCKS) 98 | { 99 | HashInsert(b_hash, i, b_inputOutput[i], e_hashBufferSize); 100 | } 101 | } 102 | 103 | [numthreads(GROUP_SIZE, 1, 1)] 104 | void Lookup(uint3 id : SV_DispatchThreadID) 105 | { 106 | for (uint i = id.x; i < e_inputSize; i += GROUP_SIZE * THREAD_BLOCKS) 107 | { 108 | b_inputOutput[i] = HashLookup(b_hash, i, e_hashBufferSize); 109 | } 110 | } 111 | 112 | [numthreads(GROUP_SIZE, 1, 1)] 113 | void Delete(uint3 id : SV_DispatchThreadID) 114 | { 115 | for (uint i = id.x; i < e_inputSize; i += GROUP_SIZE * THREAD_BLOCKS) 116 | { 117 | HashDelete(b_hash, i, e_hashBufferSize); 118 | } 119 | } -------------------------------------------------------------------------------- /src/Test.cs: -------------------------------------------------------------------------------- 1 | using System.Collections; 2 | using System.Collections.Generic; 3 | using UnityEngine; 4 | using UnityEngine.Rendering; 5 | 6 | public class Test : MonoBehaviour 7 | { 8 | [SerializeField] 9 | private ComputeShader compute; 10 | [SerializeField] 11 | private int inputSize; 12 | [SerializeField] 13 | private bool validation; 14 | [SerializeField] 15 | private bool validationText; 16 | [Range(1, 1000)] 17 | public int batchSizeForTiming; 18 | 19 | private ComputeBuffer b_hash; 20 | private ComputeBuffer b_inputOutput; 21 | 22 | private int k_init; 23 | private int k_insert; 24 | private int k_lookup; 25 | private int k_delete; 26 | 27 | private static int THREAD_BLOCKS = 256; 28 | private uint[] inputOutputArray; 29 | private int hashBufferSize; 30 | private System.Random rand; 31 | 32 | void Start() 33 | { 34 | //initialize and fill test arrays based on the size of the desired input 35 | hashBufferSize = SizeToPow(inputSize); 36 | inputOutputArray = new uint[inputSize]; 37 | 38 | compute.SetInt("e_hashBufferSize", hashBufferSize); 39 | compute.SetInt("e_inputSize", inputSize); 40 | 41 | k_init = compute.FindKernel("Initialize"); 42 | k_insert = compute.FindKernel("Insert"); 43 | k_lookup = compute.FindKernel("Lookup"); 44 | k_delete = compute.FindKernel("Delete"); 45 | 46 | b_hash = new ComputeBuffer(hashBufferSize, sizeof(uint) * 2); 47 | b_inputOutput = new ComputeBuffer(inputSize, sizeof(uint)); 48 | 49 | //for simplicity we generate the random numbers on the CPU, then push to the GPU 50 | rand = new System.Random(); 51 | for (int i = 0; i < inputSize; i++) 52 | { 53 | inputOutputArray[i] = (uint)rand.Next(0, int.MaxValue); 54 | } 55 | b_inputOutput.SetData(inputOutputArray); 56 | 57 | //assign the buffers to the kernels 58 | compute.SetBuffer(k_init, "b_hash", b_hash); 59 | 60 | compute.SetBuffer(k_insert, "b_hash", b_hash); 61 | compute.SetBuffer(k_insert, "b_inputOutput", b_inputOutput); 62 | 63 | compute.SetBuffer(k_lookup, "b_hash", b_hash); 64 | compute.SetBuffer(k_lookup, "b_inputOutput", b_inputOutput); 65 | 66 | compute.SetBuffer(k_delete, "b_hash", b_hash); 67 | compute.SetBuffer(k_delete, "b_inputOutput", b_inputOutput); 68 | 69 | //Dispatch init 70 | compute.Dispatch(k_init, THREAD_BLOCKS, 1, 1); 71 | Debug.Log("Initialization Complete, press space to begin test"); 72 | } 73 | 74 | void Update() 75 | { 76 | if (Input.GetKeyDown(KeyCode.Space)) 77 | { 78 | if (validation) 79 | { 80 | HashValidation(); 81 | } 82 | else 83 | { 84 | HashTest(); 85 | } 86 | } 87 | } 88 | 89 | private void HashValidation() 90 | { 91 | uint[] validationArray = inputOutputArray; 92 | bool checks = true; 93 | 94 | compute.Dispatch(k_insert, THREAD_BLOCKS, 1, 1); 95 | compute.Dispatch(k_lookup, THREAD_BLOCKS, 1, 1); 96 | b_inputOutput.GetData(inputOutputArray); 97 | for (int i = 0; i < inputSize; i++) 98 | { 99 | if (inputOutputArray[i] != validationArray[i]) 100 | { 101 | if (validationText) 102 | Debug.LogError("EXPECTED THE SAME: " + inputOutputArray[i] + ", " + validationArray[i]); 103 | checks = false; 104 | } 105 | } 106 | 107 | compute.Dispatch(k_delete, THREAD_BLOCKS, 1, 1); 108 | compute.Dispatch(k_lookup, THREAD_BLOCKS, 1, 1); 109 | b_inputOutput.GetData(inputOutputArray); 110 | for (int i = 0; i < inputSize; i++) 111 | { 112 | if (inputOutputArray[i] != 0xffffffff) 113 | { 114 | if (validationText) 115 | Debug.Log("EXPECTED UINT MAX - 1: " + inputOutputArray[i]); 116 | checks = false; 117 | } 118 | inputOutputArray[i] = (uint)rand.Next(0, int.MaxValue); 119 | } 120 | b_inputOutput.SetData(inputOutputArray); 121 | 122 | compute.Dispatch(k_insert, THREAD_BLOCKS, 1, 1); 123 | compute.Dispatch(k_lookup, THREAD_BLOCKS, 1, 1); 124 | b_inputOutput.GetData(inputOutputArray); 125 | for (int i = 0; i < inputSize; i++) 126 | { 127 | if (inputOutputArray[i] != validationArray[i]) 128 | { 129 | if (validationText) 130 | Debug.LogError("EXPECTED THE SAME: " + inputOutputArray[i] + ", " + validationArray[i]); 131 | checks = false; 132 | } 133 | } 134 | 135 | if (checks) 136 | Debug.Log("Complete, all tests passed"); 137 | else 138 | Debug.LogError("Validation Error, further tests required"); 139 | } 140 | 141 | private void HashTest() 142 | { 143 | Debug.Log("Beginning " + batchSizeForTiming + " iterations of " + inputSize + " elements at: " + (inputSize * 100.0f / hashBufferSize) + "% load factor"); 144 | StartCoroutine(MultiTiming()); 145 | } 146 | 147 | private IEnumerator MultiTiming() 148 | { 149 | float insertTotalTime = 0; 150 | float lookupTotalTime = 0; 151 | float deletionTotalTime = 0; 152 | 153 | for (int i = 0; i < batchSizeForTiming; i++) 154 | { 155 | float time = Time.realtimeSinceStartup; 156 | compute.Dispatch(k_insert, THREAD_BLOCKS, 1, 1); 157 | var request = AsyncGPUReadback.Request(b_hash); 158 | yield return new WaitUntil(() => request.done); 159 | insertTotalTime += Time.realtimeSinceStartup - time; 160 | 161 | time = Time.realtimeSinceStartup; 162 | compute.Dispatch(k_lookup, THREAD_BLOCKS, 1, 1); 163 | request = AsyncGPUReadback.Request(b_inputOutput); 164 | yield return new WaitUntil(() => request.done); 165 | lookupTotalTime += Time.realtimeSinceStartup - time; 166 | 167 | time = Time.realtimeSinceStartup; 168 | compute.Dispatch(k_delete, THREAD_BLOCKS, 1, 1); 169 | request = AsyncGPUReadback.Request(b_hash); 170 | yield return new WaitUntil(() => request.done); 171 | deletionTotalTime += Time.realtimeSinceStartup - time; 172 | 173 | if (i == (inputSize / batchSizeForTiming)) 174 | Debug.Log("Running"); 175 | } 176 | 177 | Debug.Log("Done"); 178 | Debug.Log("Insertion average time: " + inputSize * (batchSizeForTiming / insertTotalTime) + " elements/sec."); 179 | Debug.Log("Lookup average time: " + inputSize * (batchSizeForTiming / lookupTotalTime) + " elements/sec."); 180 | Debug.Log("Insertion average time: " + inputSize * (batchSizeForTiming / deletionTotalTime) + " elements/sec."); 181 | } 182 | 183 | private int SizeToPow(int size) 184 | { 185 | size--; 186 | int counter = 1; 187 | while (size > 0) 188 | { 189 | size >>= 1; 190 | counter <<= 1; 191 | } 192 | return counter; 193 | } 194 | private void OnDisable() 195 | { 196 | b_inputOutput.Release(); 197 | b_hash.Release(); 198 | } 199 | } 200 | --------------------------------------------------------------------------------