├── LICENSE ├── dmap.h ├── readme.md └── dmap.c /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 jamesnolanverran 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /dmap.h: -------------------------------------------------------------------------------- 1 | #ifndef DMAP_H 2 | #define DMAP_H 3 | #include 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | // #define DMAP_DEBUG 11 | 12 | // todo: make more configurable 13 | 14 | #ifndef DMAP_ALIGNMENT 15 | #define DMAP_ALIGNMENT 16 16 | #endif 17 | 18 | #ifndef DMAP_DEFAULT_MAX_SIZE 19 | // 2GB for testing 20 | #define DMAP_DEFAULT_MAX_SIZE (1ULL << 31) 21 | #endif // DMAP_DEFAULT_MAX_SIZE 22 | 23 | #define DMAP_INITIAL_CAPACITY 16 24 | #define DMAP_LOAD_FACTOR 0.5f 25 | 26 | typedef struct DmapFreeList { 27 | int *data; 28 | int len; 29 | int cap; 30 | } DmapFreeList; 31 | 32 | typedef struct DmapTable DmapTable; 33 | 34 | typedef struct DmapOptions { 35 | void *(*data_allocator_fn)(void *hdr, size_t size); // custom allocator for the data array (default: realloc) 36 | void (*free_key_fn)(void*); // custom free function for keys 37 | unsigned long long (*hash_fn)(void *key, size_t len); 38 | bool (*cmp_fn)(void *a, void *b, size_t len); 39 | int initial_capacity; 40 | bool user_managed_keys; // if true, the user manages string keys; otherwise, dmap copies and frees them on delete 41 | } DmapOptions; 42 | 43 | typedef struct DmapHdr { 44 | DmapTable *table; // the actual hashtable - contains the hash and an index to data[] where the values are stored 45 | unsigned long long hash_seed; 46 | DmapFreeList *free_list; // array of indices to values stored in data[] that have been marked as deleted. 47 | DmapOptions options; 48 | int len; 49 | int cap; 50 | int hash_cap; 51 | int returned_idx; // stores an index, used internally by macros 52 | int key_size; // make sure key sizes are consistent 53 | int val_size; 54 | bool is_string; 55 | _Alignas(DMAP_ALIGNMENT) char data[]; // aligned data array - where values are stored 56 | } DmapHdr; 57 | 58 | #define DMAP_INVALID -1 59 | 60 | #if defined(__cplusplus) 61 | #define DMAP_TYPEOF(d) (decltype((d) + 0)) 62 | #elif defined(__clang__) || defined(__GNUC__) 63 | #define DMAP_TYPEOF(d) (typeof(d)) 64 | #else 65 | #define DMAP_TYPEOF(d) 66 | #endif 67 | /////////////////////// 68 | // These functions are internal but are utilized by macros so need to be declared here. 69 | /////////////////////// 70 | int dmap__get_idx(DmapHdr *d, void *key, size_t key_size); 71 | int dmap__delete(DmapHdr *d, void *key, size_t key_size); 72 | void dmap__insert_entry(DmapHdr *d, void *key, size_t key_size); 73 | void *dmap__getp(DmapHdr *d, void *key, size_t key_size); 74 | void *dmap__grow(DmapHdr *d, size_t elem_size) ; 75 | void *dmap__kstr_grow(DmapHdr *d, size_t elem_size); 76 | void *dmap__init(size_t elem_size, DmapOptions options); 77 | void *dmap__kstr_init(size_t elem_size, DmapOptions options); 78 | 79 | void dmap__free(DmapHdr *d); 80 | 81 | /////////////////////// 82 | #define dmap_hdr(d) ((DmapHdr *)((char *)(d) - offsetof(DmapHdr, data))) 83 | #define dmap_count(d) ((d) ? dmap_hdr(d)->len : 0) // how many valid entries in the dicctionary; not for iterating directly over the data 84 | #define dmap_cap(d) ((d) ? dmap_hdr(d)->cap : 0) 85 | 86 | unsigned long long dmap_hash(void *key, size_t key_size); 87 | 88 | // Helper Macros - Utilized by other macros. 89 | //////////////////////////////////////////// 90 | // allows macros to pass a value 91 | #define dmap__ret_idx(d) (dmap_hdr(d)->returned_idx) // DMAP_EMPTY by default 92 | // resize if n <= capacity 93 | #define dmap__fit(d, n) ((n) <= dmap_cap(d) ? 0 : ((d) = DMAP_TYPEOF(d) dmap__grow((d) ? dmap_hdr(d) : NULL, sizeof(*(d))))) 94 | #define dmap__kstr_fit(d, n) ((n) <= dmap_cap(d) ? 0 : ((d) = DMAP_TYPEOF(d) dmap__kstr_grow((d) ? dmap_hdr(d) : NULL, sizeof(*(d))))) 95 | //////////////////////////////////////////// 96 | 97 | // dmap_init(d, DmapOptions opts) 98 | // ex: dmap_kstr_init(dmap, (DmapOptions){.initial_capacity = 256}); 99 | #define dmap_init(d, ...)((d) = DMAP_TYPEOF(d) dmap__init(sizeof(*(d)), __VA_ARGS__)); 100 | #define dmap_kstr_init(d, ...)((d) = DMAP_TYPEOF(d) dmap__kstr_init(sizeof(*(d)), __VA_ARGS__)); 101 | 102 | // insert or update value 103 | // returns the index in the data array where the value is stored. 104 | // Parameters: 105 | // - 'd' is the hashmap from which to retrieve the value, effectively an array of v's. 106 | // - 'k' key for the value. Keys can be any type 1,2,4,8 bytes; use dmap_kstr_insert for strings and non-builtin types 107 | // - 'v' value -> VAR_ARGS to allow for direct struct initialization: dmap_kstr_insert(d, k, key_size, (MyType){2,33}); 108 | #define dmap_insert(d, k, ...) (dmap__fit((d), dmap_count(d) + 1), dmap__insert_entry(dmap_hdr(d), (k), sizeof(*(k))), ((d)[dmap__ret_idx(d)] = (__VA_ARGS__)), dmap__ret_idx(d)) 109 | // same as above but uses a string as key values 110 | #define dmap_kstr_insert(d, k, key_size, ...) (dmap__kstr_fit((d), dmap_count(d) + 1), dmap__insert_entry(dmap_hdr(d), (k), (key_size)), ((d)[dmap__ret_idx(d)] = (__VA_ARGS__)), dmap__ret_idx(d)) 111 | 112 | // returns index to data or -1 / DMAP_INVALID; indices are always stable 113 | // index can then be used to retrieve the value: d[idx] 114 | #define dmap_get(d,k) ((d) ? dmap__get_idx(dmap_hdr(d), (k), sizeof(*(k))) : -1) 115 | // same as dmap_get but for keys that are strings. 116 | #define dmap_kstr_get(d, k, key_size)((d) ? dmap__get_idx(dmap_hdr(d), (k), (key_size)) : -1) 117 | 118 | // Returns: A pointer to the value corresponding to 'k' in 'd', or NULL if the key is not found. 119 | #define dmap_getp(d, k) ((d) ? DMAP_TYPEOF(d) dmap__getp(dmap_hdr(d), (k), sizeof(*(k))) : NULL) 120 | // Returns: A pointer to the value corresponding to 'k' in 'd', or NULL if the key is not found. 121 | #define dmap_kstr_getp(d, k, key_size) ((d) ? DMAP_TYPEOF(d) dmap__getp(dmap_hdr(d), (k), (key_size)) : NULL) 122 | 123 | // returns the data index of the deleted item or -1 / DMAP_INVALID (SIZE_MAX). 124 | // The user should mark deleted data as invalid if the user intends to iterate over the data array. 125 | #define dmap_kstr_delete(d, k, len)((d) ? dmap__delete(dmap_hdr(d), (k), (len)) : -1) 126 | 127 | #define dmap_delete(d,k) ((d) ? dmap__delete(dmap_hdr(d), (k), sizeof(*(k))) : -1) 128 | // returns index to deleted data or -1 / DMAP_INVALID 129 | 130 | 131 | #define dmap_free(d) ((d) ? (dmap__free(dmap_hdr(d)), (d) = NULL, 1) : 0) 132 | 133 | // for iterating directly over the entire data array, including items marked as deleted 134 | int dmap__range(DmapHdr *d); 135 | #define dmap_range(d)(dmap__range((d) ? dmap_hdr(d) : NULL)) 136 | 137 | #ifdef __cplusplus 138 | } 139 | #endif 140 | 141 | #endif // DMAP_H 142 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # dmap 2 | 3 | ### Dmap is a flexible, lightweight, zero-friction dynamic hashmap implementation in C, designed to be user-friendly without sacrificing performance. 4 | 5 | ## ⚠️ Breaking Changes 6 | 7 | - `dmap_get` (which previously returned a pointer) has been renamed to `dmap_getp`. 8 | - `dmap_get_idx` (which returned an index) is now simply `dmap_get`. 9 | 10 | This change encourages safer usage: indices remain stable across reallocations, unlike pointers. `dmap_getp` is retained for convenience but should be used only when no insertions (and thus no reallocations) are expected. 11 | 12 | - `dmap_init` now takes a `DmapOptions` struct. This includes optional fields for a custom value allocator, key comparison function, key free function, hash function, initial capacity, and a user_managed_keys flag to indicate that keys are managed entirely by the user (i.e., dmap only stores a pointer to them). 13 | 14 | --- 15 | 16 | ## 🚀 Super Easy – Zero Setup Required 17 | 18 | ```c 19 | // Declare a dynamic hashmap of any type, `int` in this case. 20 | int *my_dmap = NULL; 21 | 22 | // Insert the value 33 using an integer key. 23 | int key = 13; 24 | dmap_insert(my_dmap, &key, 33); 25 | 26 | // Retrieve the value. 27 | size_t idx = dmap_get(my_dmap, &key); 28 | 29 | if(idx != -1){ // check if not found 30 | printf("result: %d\n", my_dmap[idx]); // output: result: 33 31 | } 32 | 33 | ``` 34 | 35 | ## 🔧 Features 36 | - **No boilerplate** – Zero setup required. 37 | - **Generic typing** – Supports multiple key and value types. 38 | - **Dynamic memory** – Grows as needed. 39 | - **Cross-platform** – Works on Linux, macOS, and Windows. 40 | - **Good performance** – Competitive with leading hashmap implementations. 41 | 42 | **Supported platforms:** Linux, macOS (untested), and Windows. **64-bit only.** 43 | 44 | --- 45 | 46 | ## ⚡ Performance 47 | **Dmap is designed for simplicity and ease of use, while still outperforming widely-used hashmaps like `uthash` and `std::unordered_map`.** 48 | 49 | - **Stores values directly in a dynamic array** 50 | - **30% to 40% faster than `uthash`** in benchmarks like [UDB3](https://github.com/attractivechaos/udb3). 51 | 52 | --- 53 | 54 | 🚨 **Memory vs. Simplicity Tradeoff** 55 | - `dmap` is built for **flexibility and ease of use**. While it achieves **solid performance**, 56 | it prioritizes **simplicity and flexibility over memory efficiency**. 57 | 58 | --- 59 | 60 | ## 🔍 Keys and Hash Collisions 61 | - Hash collisions are handled by checking hashes first, then comparing keys directly. 62 | - By default, keys are copied. Keys larger than 8 bytes are heap-allocated and freed on deletion. 63 | - Users can opt to manage keys manually. In this case, dmap stores a pointer and optionally calls a user-supplied free_key function (set via `dmap_init`). 64 | - Custom hash and key comparison functions can also be supplied through dmap_init. This is generally required for struct keys due to padding etc. 65 | 66 | --- 67 | 68 | ## ⚠️ Error Handling 69 | - By default, memory allocation failures trigger an error and `exit()`. 70 | - A custom error handler can be set using `dmap_set_error_handler` to handle allocation failures gracefully. 71 | 72 | --- 73 | 74 | ## 📦 Memory Management 75 | Dmap allows **storing complex struct values directly** in the hashmap. **Compound literals** allow inline struct initialization. 76 | 77 | ### Example: Using String Keys with Struct Values 78 | 79 | ```c 80 | #include 81 | #include 82 | #include "dmap.h" // Your dmap header 83 | 84 | // Define a struct to store directly in the hashmap 85 | typedef struct { 86 | int id; 87 | int age; 88 | float balance; 89 | } UserProfile; 90 | 91 | int main() { 92 | UserProfile *user_map = NULL; // Declare a dynamic hashmap 93 | 94 | // Insert user profiles with email addresses as keys 95 | const char *email1 = "alice@example.com"; 96 | const char *email2 = "bob@example.com"; 97 | 98 | UserProfile alice = {1, 28, 1050.75}; 99 | dmap_kstr_insert(user_map, email1, strlen(email1), alice); 100 | // or use compound literals 101 | dmap_kstr_insert(user_map, email2, strlen(email2), (UserProfile){2, 35, 893.42}); 102 | 103 | // Retrieve user profiles 104 | UserProfile *alice_profile = dmap_kstr_getp(user_map, email1, strlen(email1)); 105 | UserProfile *bob_profile = dmap_kstr_getp(user_map, email2, strlen(email2)); 106 | 107 | if (alice_profile) 108 | printf("Alice: ID=%d, Age=%d, Balance=%.2f\n", 109 | alice_profile->id, alice_profile->age, alice_profile->balance); 110 | 111 | if (bob_profile) 112 | printf("Bob: ID=%d, Age=%d, Balance=%.2f\n", 113 | bob_profile->id, bob_profile->age, bob_profile->balance); 114 | 115 | return 0; 116 | } 117 | ``` 118 | 119 | ## 🔄 Efficient Storage & Iteration 120 | Unlike traditional hashmaps that store pointers to data, **Dmap stores values directly in a dynamic array**, allowing **efficient iteration**. 121 | 122 | - **Contiguous storage** — ideal for cache locality and batch operations. 123 | - **Index-based access** — access values like an array. 124 | 125 | --- 126 | 127 | ## ⚠️ Limitations 128 | - 64-bit systems only 129 | - Macro arguments may be evaluated multiple times – avoid expressions with side effects. 130 | - Key size consistency is not enforced at compile time – the user must ensure key types are used consistently. 131 | - Untested on macOS – compatibility is expected but not guaranteed. 132 | - C++ support is a work in progress. 133 | - `dmap_getp` uses `typeof()` (or `decltype()` in C++) for type safety, and falls back to `void*` where unavailable. 134 | - Pointer validity – Pointers returned by `dmap_getp` become invalid after insertions or reallocations. Use `dmap_get` (index-based access) for stable indices. 135 | 136 | --- 137 | 138 | ## TODO: 139 | - Add a full test suite 140 | - Documentation 141 | 142 | ## Full Example: Dmap Usage 143 | 144 | ```c 145 | #include "dmap.h" 146 | #include 147 | #include 148 | 149 | int main() { 150 | 151 | // Declare a dynamic hashmap (can store any type) 152 | int *my_dmap = NULL; 153 | 154 | // Insert values into the hashmap using integer keys 155 | int key_1 = 1; 156 | int key_2 = 2; 157 | dmap_insert(my_dmap, &key_1, 33); 158 | dmap_insert(my_dmap, &key_2, 13); 159 | 160 | // Retrieve a *value using an integer key 161 | int *value = dmap_getp(my_dmap, &key_1); 162 | if (value) { 163 | printf("Value for key_1 1: %d\n", *value); 164 | } 165 | // ================================ 166 | // declare a hashmap that uses strings as keys 167 | int *my_kstr_dmap = NULL; 168 | // Use a C-string as key 169 | char *str_key = "my_key"; 170 | 171 | // Optional: Initialize the key-string hashmap w/ custom allocator 172 | // dmap_kstr_init(my_kstr_dmap, (DmapOptions){.initial_capacity = 1024 * 1024, .data_allocator_fn = v_alloc_realloc}); 173 | 174 | // Insert a value using a string key 175 | dmap_kstr_insert(my_kstr_dmap, str_key, strlen(str_key), 33); // string keys need length param 176 | 177 | // Retrieve a *value using a string key 178 | value = dmap_kstr_getp(my_kstr_dmap, str_key, strlen(str_key)); 179 | if (value) { 180 | printf("Value for key 'str_key': %d\n", *value); 181 | } 182 | // ================================ 183 | // Get an index 184 | // Retrieve an index to a value using an integer key - treat it like an array 185 | size_t idx = dmap_get(my_dmap, &key_1); 186 | if (idx != DMAP_INVALID) { 187 | printf("Index based result for key_1: %d\n", my_dmap[idx]); 188 | } 189 | 190 | // ================================ 191 | // Deletions 192 | 193 | // Delete a key from the hashmap 194 | size_t deleted_index = dmap_delete(my_dmap, &key_2); 195 | if (deleted_index != DMAP_INVALID) { 196 | printf("Deleted key_2, data index: %zu\n", deleted_index); 197 | // Mark the deleted entry as invalid for safe iteration 198 | // Here, we use -1 to represent an invalid state. 199 | my_dmap[deleted_index] = -1; 200 | } 201 | // Check if a key exists after deletion 202 | value = dmap_getp(my_dmap, &key_2); 203 | if (!value) { 204 | printf("key_2 no longer exists in the hashmap.\n"); 205 | } 206 | 207 | // ================================ 208 | // Iterate over the hashmap data - treat it like an array 209 | 210 | // Get the range of valid data indices (including deleted slots) 211 | size_t range = dmap_range(my_dmap); 212 | printf("hashmap data array range: %zu\n", range); 213 | 214 | // Iterate over the data array (including deleted slots) 215 | for (size_t i = 0; i < range; i++) { 216 | if (my_dmap[i] != -1) { // Skip invalid/deleted entries 217 | printf("Data at index %zu: %d\n", i, my_dmap[i]); 218 | } 219 | } 220 | 221 | // Free the hashmap and set the pointer to NULL 222 | dmap_free(my_dmap); 223 | 224 | return 0; 225 | } 226 | ``` 227 | 228 | ## License 229 | 230 | [MIT License](LICENSE) 231 | 232 | ### Credits & Inspiration 233 | *dmap* is inspired by Per Vognsen's dynamic array implementation on his *Bitwise* series, which was itself based on Sean Barrett's [`stb_ds`](https://github.com/nothings/stb/blob/master/stb_ds.h) library. 234 | 235 | [Per Vognsen's Bitwise series on YouTube](https://www.youtube.com/pervognsen). -------------------------------------------------------------------------------- /dmap.c: -------------------------------------------------------------------------------- 1 | #include "dmap.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #ifdef DMAP_DEBUG 9 | #if defined(_MSC_VER) || defined(_WIN32) 10 | #define DEBUGBREAK() __debugbreak() 11 | #else 12 | #define DEBUGBREAK() __builtin_trap() 13 | #endif 14 | 15 | #ifndef dmap_assert 16 | #define dmap_assert(expr) \ 17 | do { \ 18 | if (!(expr)) { \ 19 | common_assert_failed(__FILE__, __LINE__); \ 20 | } \ 21 | } while (0) 22 | #endif // dmap_assert 23 | 24 | #ifndef common_assert_failed 25 | #define common_assert_failed(f, l) \ 26 | do { \ 27 | printf("assert failed at file %s(%d)", f, l); \ 28 | DEBUGBREAK(); \ 29 | } while (0) 30 | #endif // common_assert_failed 31 | #else 32 | #define dmap_assert 33 | #endif 34 | 35 | #if defined(_MSC_VER) || defined(_WIN32) 36 | #include // MSVC intrinsics 37 | #endif 38 | 39 | static inline size_t next_power_of_2(size_t x) { 40 | if (x <= 1) return 1; // ensure minimum value of 1 41 | 42 | #if defined(_MSC_VER) || defined(_WIN32) 43 | unsigned long index; 44 | if (_BitScanReverse64(&index, x - 1)) { 45 | return 1ULL << (index + 1); 46 | } 47 | #else 48 | return 1ULL << (64 - __builtin_clzl(x - 1)); 49 | #endif 50 | 51 | return 1; 52 | } 53 | 54 | #include 55 | #if defined(__linux__) || defined(__APPLE__) 56 | #include 57 | #endif 58 | #ifdef _WIN32 59 | #include 60 | #include 61 | #endif 62 | // todo: needed? 63 | // random hash seed 64 | uint64_t dmap_generate_seed() { 65 | uint64_t seed = 14695981039346656037ULL; 66 | uint64_t timestamp = 0; 67 | #ifdef _WIN32 68 | FILETIME ft; 69 | GetSystemTimeAsFileTime(&ft); 70 | timestamp = ((uint64_t)ft.dwHighDateTime << 32) | ft.dwLowDateTime; 71 | uint64_t pid = (uint64_t)_getpid(); 72 | #else 73 | struct timespec ts; 74 | clock_gettime(CLOCK_MONOTONIC, &ts); 75 | timestamp = ((uint64_t)ts.tv_sec * 1000000000ULL) + ts.tv_nsec; 76 | uint64_t pid = (uint64_t)getpid(); 77 | #endif 78 | 79 | seed ^= timestamp; 80 | seed *= 1099511628211ULL; 81 | seed ^= pid; 82 | seed *= 1099511628211ULL; 83 | 84 | return seed; 85 | } 86 | // 87 | #ifndef MAX 88 | #define MAX(x, y) ((x) >= (y) ? (x) : (y)) 89 | #endif 90 | 91 | #define ALIGN_DOWN(n, a) ((n) & ~((a) - 1)) 92 | #define ALIGN_UP(n, a) ALIGN_DOWN((n) + (a) - 1, (a)) 93 | 94 | #define ALIGN_DOWN_PTR(p, a) ((void *)ALIGN_DOWN((uintptr_t)(p), (a))) 95 | #define ALIGN_UP_PTR(p, a) ((void *)ALIGN_UP((uintptr_t)(p), (a))) 96 | 97 | typedef int8_t s8; 98 | typedef int16_t s16; 99 | typedef int32_t s32; 100 | typedef int64_t s64; 101 | typedef uint8_t u8; 102 | typedef uint16_t u16; 103 | typedef uint32_t u32; 104 | typedef uint64_t u64; 105 | 106 | 107 | // ///////////////////////////////////////////// 108 | // MARK: ERR HANDLER 109 | // ///////////////////////////////////////////// 110 | 111 | // todo: improve default error handler 112 | static void dmap_default_error_handler(char* err_msg) { 113 | perror(err_msg); 114 | exit(1); 115 | } 116 | static void (*dmap_error_handler)(char* err_msg) = dmap_default_error_handler; 117 | 118 | void dmap_set_error_handler(void (*handler)(char* err_msg)) { 119 | dmap_error_handler = handler ? handler : dmap_default_error_handler; // fallback to default 120 | } 121 | 122 | // ///////////////////////////////////////////// 123 | // MARK: DMAP 124 | // ///////////////////////////////////////////// 125 | 126 | struct DmapTable { 127 | u64 hash; 128 | union { 129 | u64 key; 130 | void *ptr; 131 | char *kstr; 132 | char small_kstr[8]; 133 | }; 134 | s32 data_idx; 135 | s32 kstr_len; 136 | }; 137 | 138 | #define DMAP_EMPTY INT32_MAX 139 | #define DMAP_DELETED (INT32_MAX - 1) 140 | #define DMAP_MAX_CAPACITY ((size_t)INT32_MAX - 2) 141 | 142 | 143 | // declare hash functions 144 | #ifdef __cplusplus 145 | #define RAPIDHASH_NOEXCEPT noexcept 146 | #define RAPIDHASH_CONSTEXPR constexpr 147 | #ifndef RAPIDHASH_INLINE 148 | #define RAPIDHASH_INLINE inline 149 | #endif 150 | #else 151 | #define RAPIDHASH_NOEXCEPT 152 | #define RAPIDHASH_CONSTEXPR static const 153 | #ifndef RAPIDHASH_INLINE 154 | #define RAPIDHASH_INLINE static inline 155 | #endif 156 | #endif 157 | static inline u64 rapidhash_internal(const void *key, size_t len, u64 seed, const u64 *secret) RAPIDHASH_NOEXCEPT; 158 | static inline u64 rapidhash(const void *key, size_t len) RAPIDHASH_NOEXCEPT; 159 | 160 | static const u64 RAPIDHASH_SECRET[3] = { 161 | 0x9E3779B97F4A7C15ULL, 162 | 0xD6E8FEB86659FD93ULL, 163 | 0xCA9B0C7EBA1DA115ULL 164 | }; 165 | 166 | unsigned long long dmap_hash(void *key, size_t len){ 167 | return rapidhash(key, len); 168 | } 169 | static unsigned long long dmap_generate_hash(void *key, size_t key_size, unsigned long long seed) { 170 | return rapidhash_internal(key, key_size, seed, RAPIDHASH_SECRET); 171 | } 172 | 173 | static void dmap_freelist_push(DmapHdr *dh, s32 index) { 174 | if(!dh->free_list){ 175 | dh->free_list = (DmapFreeList*)malloc(sizeof(DmapFreeList)); 176 | if(!dh->free_list){ 177 | dmap_error_handler("malloc failed at freelist"); 178 | } 179 | dh->free_list->cap = 16; 180 | dh->free_list->len = 0; 181 | dh->free_list->data = (s32*)malloc(dh->cap * sizeof(s32)); 182 | if(!dh->free_list->data){ 183 | dmap_error_handler("malloc failed at freelist"); 184 | } 185 | } 186 | if (dh->free_list->len == dh->free_list->cap) { 187 | dh->free_list->cap = (dh->free_list->cap * 3) / 2 + 1; 188 | dh->free_list->data = (s32*)realloc(dh->free_list->data, dh->free_list->cap * sizeof(s32)); 189 | if(!dh->free_list->data){ 190 | dmap_error_handler("realloc failed at freelist"); 191 | } 192 | } 193 | dh->free_list->data[dh->free_list->len++] = index; 194 | } 195 | static u32 dmap_freelist_pop(DmapHdr *dh) { 196 | if (dh->free_list && dh->free_list->len > 0) { 197 | return dh->free_list->data[--dh->free_list->len]; 198 | } 199 | return DMAP_EMPTY; // no free slots available 200 | } 201 | static bool keys_match(DmapHdr *d, size_t idx, void *key, size_t key_size) { 202 | if (d->is_string && d->table[idx].kstr_len != (s32)key_size) { 203 | return false; 204 | } 205 | void *stored = (!d->options.user_managed_keys && key_size <= 8) 206 | ? (void*)&d->table[idx].key 207 | : d->table[idx].ptr; 208 | if (d->options.cmp_fn) { 209 | return d->options.cmp_fn(stored, key, key_size); 210 | } 211 | return memcmp(stored, key, key_size) == 0; 212 | } 213 | 214 | // grows the entry array of the hashmap to accommodate more elements 215 | static void dmap_grow_table(DmapHdr *d, size_t new_hash_cap, size_t old_hash_cap) { 216 | // size_t new_size_in_bytes = new_hash_cap * sizeof(DmapTable); 217 | DmapTable *new_table = (DmapTable*)calloc(new_hash_cap, sizeof(DmapTable)); 218 | if (!new_table) { 219 | dmap_error_handler("Out of memory 1"); 220 | } 221 | for(size_t i = 0; i < new_hash_cap; i++){ 222 | new_table[i].data_idx = DMAP_EMPTY; 223 | } 224 | // if the hashmap has existing table, rehash them into the new entry array 225 | if (d->len) { 226 | for (size_t i = 0; i < old_hash_cap; i++) { 227 | if(d->table[i].data_idx == DMAP_EMPTY) continue; 228 | size_t idx = d->table[i].hash & (new_hash_cap - 1); 229 | // size_t j = new_hash_cap; 230 | while(true){ 231 | // dmap_assert(j-- != 0); // unreachable, suggests no empty slot was found 232 | if(new_table[idx].data_idx == DMAP_EMPTY){ 233 | new_table[idx] = d->table[i]; 234 | break; 235 | } 236 | idx = (idx + 1) & (new_hash_cap - 1); 237 | } 238 | } 239 | } 240 | // replace the old entry array with the new one 241 | free(d->table); 242 | d->table = new_table; 243 | } 244 | static void *dmap__grow_internal(DmapHdr *d, size_t elem_size) { 245 | DmapHdr *new_hdr = NULL; 246 | size_t old_hash_cap = d->hash_cap; 247 | size_t new_hash_cap = old_hash_cap * 2; 248 | size_t new_cap = (size_t)((float)new_hash_cap * DMAP_LOAD_FACTOR); 249 | size_t total_size_in_bytes = offsetof(DmapHdr, data) + (new_cap * elem_size); 250 | if (new_cap > DMAP_MAX_CAPACITY) { 251 | dmap_error_handler("Error: Max capacity exceeded.\n"); 252 | } 253 | if(total_size_in_bytes > DMAP_DEFAULT_MAX_SIZE){ 254 | dmap_error_handler("Error: Max size exceeded. #define DMAP_DEFAULT_MAX_SIZE to overide default."); 255 | } 256 | 257 | new_hdr = (DmapHdr*)d->options.data_allocator_fn(d, total_size_in_bytes); 258 | 259 | if(!new_hdr) { 260 | dmap_error_handler("Out of memory 2"); 261 | } 262 | // grow the table to fit into the newly allocated space 263 | dmap_grow_table(new_hdr, new_hash_cap, old_hash_cap); 264 | 265 | new_hdr->cap = (u32)new_cap; 266 | new_hdr->hash_cap = (u32)new_hash_cap; 267 | 268 | dmap_assert(((uintptr_t)&new_hdr->data & (DMAP_ALIGNMENT - 1)) == 0); // ensure alignment 269 | return new_hdr->data; // return the aligned data pointer 270 | } 271 | 272 | static void *dmap__init_internal(size_t elem_size, bool is_string, DmapOptions options){ 273 | DmapHdr *new_hdr = NULL; 274 | 275 | s32 capacity = options.initial_capacity; 276 | size_t table_capacity = next_power_of_2(capacity); 277 | while ((size_t)((float)table_capacity * DMAP_LOAD_FACTOR) < (size_t)capacity) { 278 | if (table_capacity > SIZE_MAX / 2) { // Prevent overflow 279 | dmap_error_handler("Error: exceeded max capacity"); 280 | } 281 | table_capacity *= 2; 282 | } 283 | capacity = (size_t)((float)table_capacity * DMAP_LOAD_FACTOR); 284 | size_t size_in_bytes = offsetof(DmapHdr, data) + (capacity * elem_size); 285 | if(capacity > DMAP_MAX_CAPACITY){ 286 | dmap_error_handler("Error: Max capacity exceeded.\n"); 287 | } 288 | if(size_in_bytes > DMAP_DEFAULT_MAX_SIZE){ 289 | dmap_error_handler("Error: Max size exceeded. #define DMAP_DEFAULT_MAX_SIZE to overide default."); 290 | } 291 | if(!options.data_allocator_fn){ 292 | options.data_allocator_fn = realloc; 293 | } 294 | new_hdr = (DmapHdr*)options.data_allocator_fn(NULL, size_in_bytes); 295 | if(!new_hdr){ 296 | dmap_error_handler("Out of memory 3"); 297 | } 298 | if(options.free_key_fn){ 299 | options.user_managed_keys = true; 300 | } 301 | new_hdr->options = options; 302 | new_hdr->len = 0; 303 | new_hdr->cap = (u32)capacity; 304 | new_hdr->hash_cap = (u32)table_capacity; 305 | new_hdr->returned_idx = DMAP_EMPTY; 306 | new_hdr->table = NULL; 307 | new_hdr->free_list = NULL; 308 | new_hdr->key_size = 0; 309 | new_hdr->val_size = (u32)elem_size; 310 | new_hdr->hash_seed = dmap_generate_seed(); 311 | new_hdr->is_string = is_string; 312 | 313 | dmap_grow_table(new_hdr, new_hdr->hash_cap, 0); 314 | dmap_assert(((uintptr_t)&new_hdr->data & (DMAP_ALIGNMENT - 1)) == 0); // ensure alignment 315 | return new_hdr->data; 316 | } 317 | void *dmap__init(size_t elem_size, DmapOptions options){ 318 | return dmap__init_internal(elem_size, false, options); 319 | } 320 | void *dmap__kstr_init(size_t elem_size, DmapOptions options){ 321 | return dmap__init_internal(elem_size, true, options); 322 | } 323 | static inline DmapOptions dmap_default_options(void) { 324 | return (DmapOptions){ 325 | .data_allocator_fn = NULL, 326 | .free_key_fn = NULL, 327 | .initial_capacity = DMAP_INITIAL_CAPACITY, 328 | .user_managed_keys = false, 329 | }; 330 | } 331 | // grows the hashmap to a new capacity 332 | void *dmap__grow(DmapHdr *d, size_t elem_size) { 333 | if (!d) { 334 | // when this is the case we just want the defaults 335 | return dmap__init(elem_size, dmap_default_options()); 336 | } 337 | return dmap__grow_internal(d, elem_size); 338 | } 339 | void *dmap__kstr_grow(DmapHdr *d, size_t elem_size) { 340 | if (!d) { 341 | // when this is the case we just want the defaults 342 | return dmap__kstr_init(elem_size, dmap_default_options()); 343 | } 344 | return dmap__grow_internal(d, elem_size); 345 | } 346 | void dmap__free(DmapHdr *d){ 347 | if(d){ 348 | if(d->table) { 349 | if(d->options.user_managed_keys && d->options.free_key_fn){ // user manages keys & provided a free function 350 | for(s32 i = 0; i < d->hash_cap; i++){ 351 | if(d->table[i].ptr != NULL){ 352 | d->options.free_key_fn(d->table[i].ptr); 353 | } 354 | } 355 | } 356 | else if(!d->options.user_managed_keys){ 357 | if(d->is_string){ 358 | for(s32 i = 0; i < d->hash_cap; i++){ 359 | if(d->table[i].kstr_len > 8 && d->table[i].kstr != NULL){ 360 | free(d->table[i].kstr); 361 | } 362 | } 363 | } 364 | else { 365 | for(s32 i = 0; i < d->hash_cap; i++){ 366 | if(d->key_size > 8 && d->table[i].ptr != NULL){ 367 | free(d->table[i].ptr); 368 | } 369 | } 370 | } 371 | } 372 | free(d->table); 373 | } 374 | if(d->free_list){ 375 | if(d->free_list->data) { 376 | free(d->free_list->data); 377 | } 378 | free(d->free_list); 379 | } 380 | d->options.data_allocator_fn(d, 0); 381 | } 382 | } 383 | s32 dmap__get_entry_index(DmapHdr *d, void *key, size_t key_size){ 384 | s32 result = DMAP_INVALID; 385 | if(d->cap != 0) { 386 | u64 hash = d->options.hash_fn ? d->options.hash_fn(key, key_size) : dmap_generate_hash(key, key_size, d->hash_seed); // generate a hash value for the given key 387 | s32 idx = hash & (d->hash_cap - 1); 388 | // size_t j = d->hash_cap; // counter to ensure the loop doesn't iterate more than the capacity of the hashmap 389 | while(true) { // loop to search for the key in the hashmap 390 | // dmap_assert(j-- != 0); // unreachable -- suggests table is full 391 | if(d->table[idx].data_idx == DMAP_EMPTY){ // if the entry is empty, the key is not in the hashmap 392 | break; 393 | } 394 | if(d->table[idx].data_idx != DMAP_DELETED && d->table[idx].hash == hash) { 395 | if(keys_match(d, idx, key, key_size)){ 396 | result = idx; 397 | break; 398 | } 399 | } 400 | idx = (idx + 1) & (d->hash_cap - 1); // move to the next index, wrapping around to the start if necessary 401 | } 402 | } 403 | return result; 404 | } 405 | static char *dmap_strdup(char *src, size_t len) { 406 | char *dst = (char*)malloc(len + 1); 407 | if (!dst) { 408 | return NULL; 409 | } 410 | memcpy(dst, src, len); 411 | dst[len] = '\0'; 412 | return dst; 413 | } 414 | static void *dmap_dup_struct(const void *src, size_t len) { 415 | void *dst = malloc(len); 416 | if (!dst) { 417 | return NULL; 418 | } 419 | memcpy(dst, src, len); 420 | return dst; 421 | } 422 | 423 | 424 | void dmap__insert_entry(DmapHdr *d, void *key, size_t key_size){ 425 | if(d->key_size == 0){ 426 | if(d->is_string) 427 | d->key_size = -1; // strings 428 | else 429 | d->key_size = (s32)key_size; 430 | } 431 | else if(d->key_size != (s32)key_size && d->key_size != -1){ 432 | dmap_error_handler("Error: key is not the correct size"); 433 | } 434 | u64 hash = d->options.hash_fn ? d->options.hash_fn(key, key_size) : dmap_generate_hash(key, key_size, d->hash_seed); 435 | u32 idx = hash & (d->hash_cap - 1); 436 | // size_t j = d->hash_cap; 437 | while(true){ 438 | // dmap_assert(j-- != 0); // unreachable - suggests there were no empty slots 439 | if(d->table[idx].data_idx == DMAP_EMPTY || d->table[idx].data_idx == DMAP_DELETED){ // insert in empty or deleted 440 | break; 441 | } 442 | if(d->table[idx].hash == hash){ 443 | if(keys_match(d, idx, key, key_size)){ // modify existing entry 444 | break; 445 | } 446 | } 447 | idx = (idx + 1) & (d->hash_cap - 1); 448 | } 449 | if(d->table[idx].data_idx != DMAP_EMPTY && d->table[idx].data_idx != DMAP_DELETED){ 450 | d->returned_idx = d->table[idx].data_idx; 451 | } 452 | else { 453 | 454 | d->returned_idx = d->free_list && d->free_list->len > 0 ? dmap_freelist_pop(d) : d->len; 455 | d->len += 1; 456 | 457 | DmapTable *entry = &d->table[idx]; 458 | entry->hash = hash; 459 | entry->data_idx = d->returned_idx; 460 | if(d->is_string) { 461 | entry->kstr_len = (s32)key_size; 462 | if(d->options.user_managed_keys) { // user managed/allocated keys 463 | entry->kstr = key; 464 | } 465 | else if(key_size <= 8) { // otherwise dmap copies string keys 466 | memcpy(entry->small_kstr, key, key_size); 467 | } 468 | else { 469 | entry->kstr = dmap_strdup(key, key_size); 470 | if(!entry->kstr){ 471 | dmap_error_handler("Error: dmap_strdup - malloc failed"); 472 | } 473 | } 474 | } 475 | else { 476 | dmap_assert(d->key_size == (s32)key_size); 477 | entry->kstr_len = (s32)key_size; 478 | if(d->options.user_managed_keys) { // user managed/allocated keys 479 | entry->ptr = key; 480 | } 481 | else if(key_size <= 8){ // copy small keys directly to table 482 | entry->key = 0; // zero-out first 483 | memcpy(&entry->key, key, key_size); 484 | } 485 | else { 486 | entry->ptr = dmap_dup_struct(key, key_size); // allocate copies > 8 bytes 487 | if(!entry->ptr){ 488 | dmap_error_handler("Error: dmap_dup_struct - malloc failed"); 489 | } 490 | } 491 | } 492 | } 493 | return; 494 | } 495 | 496 | void* dmap__getp(DmapHdr *d, void *key, size_t key_size){ 497 | if(d->key_size != (s32)key_size && d->key_size != -1){ // -1 indicates a string key 498 | dmap_error_handler("Error: key is not the correct size"); 499 | } 500 | size_t idx = dmap__get_entry_index(d, key, key_size); 501 | if(idx == DMAP_INVALID) { 502 | return NULL; // entry is not found 503 | } 504 | return d->data + d->table[idx].data_idx * d->val_size; 505 | } 506 | // returns: int - The index of the data associated with the key, or DMAP_INVALID (-1) if the key is not found 507 | s32 dmap__get_idx(DmapHdr *d, void *key, size_t key_size){ 508 | s32 idx = dmap__get_entry_index(d, key, key_size); 509 | if(idx == DMAP_INVALID) { 510 | return DMAP_INVALID; 511 | } 512 | return d->table[idx].data_idx; 513 | } 514 | 515 | // returns the data index of the deleted entry. Caller may wish to mark data as invalid 516 | s32 dmap__delete(DmapHdr *d, void *key, size_t key_size){ 517 | s32 idx = dmap__get_entry_index(d, key, key_size); 518 | if(idx == DMAP_INVALID) { 519 | return DMAP_INVALID; 520 | } 521 | s32 data_index = d->table[idx].data_idx; 522 | dmap_freelist_push(d, data_index); 523 | d->table[idx].data_idx = DMAP_DELETED; 524 | 525 | if(!d->options.user_managed_keys && key_size > 8){ // dmap copies keys 526 | free(d->table[idx].ptr); 527 | d->table[idx].ptr = NULL; 528 | } 529 | else if(d->options.user_managed_keys && d->options.free_key_fn){ // user supplied free_key 530 | d->options.free_key_fn(d->table[idx].ptr); 531 | d->table[idx].ptr = NULL; 532 | } 533 | else { // user managed 534 | d->table[idx].kstr = NULL; 535 | } 536 | d->len -= 1; 537 | return data_index; 538 | } 539 | // len of the data array, including invalid table. For iterating 540 | s32 dmap__range(DmapHdr *d){ 541 | return d ? d->len + d->free_list->len : 0; 542 | } 543 | 544 | // MARK: hash function: 545 | // - rapidhash source repository: https://github.com/Nicoshev/rapidhash 546 | 547 | /* 548 | * rapidhash - Very fast, high quality, platform-independent hashing algorithm. 549 | * Copyright (C) 2024 Nicolas De Carli 550 | * 551 | * Based on 'wyhash', by Wang Yi 552 | * 553 | * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) 554 | * 555 | * Redistribution and use in source and binary forms, with or without 556 | * modification, are permitted provided that the following conditions are 557 | * met: 558 | * 559 | * * Redistributions of source code must retain the above copyright 560 | * notice, this list of conditions and the following disclaimer. 561 | * * Redistributions in binary form must reproduce the above 562 | * copyright notice, this list of conditions and the following disclaimer 563 | * in the documentation and/or other materials provided with the 564 | * distribution. 565 | * 566 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 567 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 568 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 569 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 570 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 571 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 572 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 573 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 574 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 575 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 576 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 577 | * 578 | * You can contact the author at: 579 | * - rapidhash source repository: https://github.com/Nicoshev/rapidhash 580 | */ 581 | 582 | /* 583 | * Includes. 584 | */ 585 | #include 586 | #include 587 | #if defined(_MSC_VER) 588 | #include 589 | #if defined(_M_X64) && !defined(_M_ARM64EC) 590 | #pragma intrinsic(_umul128) 591 | #endif 592 | #endif 593 | 594 | /* 595 | * C++ macros. 596 | * 597 | * RAPIDHASH_INLINE can be overridden to be stronger than a hint, i.e. by adding __attribute__((always_inline)). 598 | */ 599 | 600 | /* 601 | * Protection macro, alters behaviour of rapid_mum multiplication function. 602 | * 603 | * RAPIDHASH_FAST: Normal behavior, max speed. 604 | * RAPIDHASH_PROTECTED: Extra protection against entropy loss. 605 | */ 606 | #ifndef RAPIDHASH_PROTECTED 607 | #define RAPIDHASH_FAST 608 | #elif defined(RAPIDHASH_FAST) 609 | #error "cannot define RAPIDHASH_PROTECTED and RAPIDHASH_FAST simultaneously." 610 | #endif 611 | 612 | /* 613 | * Unrolling macros, changes code definition for main hash function. 614 | * 615 | * RAPIDHASH_COMPACT: Legacy variant, each loop process 48 bytes. 616 | * RAPIDHASH_UNROLLED: Unrolled variant, each loop process 96 bytes. 617 | * 618 | * Most modern CPUs should benefit from having RAPIDHASH_UNROLLED. 619 | * 620 | * These macros do not alter the output hash. 621 | */ 622 | #ifndef RAPIDHASH_COMPACT 623 | #define RAPIDHASH_UNROLLED 624 | #elif defined(RAPIDHASH_UNROLLED) 625 | #error "cannot define RAPIDHASH_COMPACT and RAPIDHASH_UNROLLED simultaneously." 626 | #endif 627 | 628 | /* 629 | * Likely and unlikely macros. 630 | */ 631 | #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) 632 | #define _likely_(x) __builtin_expect(x,1) 633 | #define _unlikely_(x) __builtin_expect(x,0) 634 | #else 635 | #define _likely_(x) (x) 636 | #define _unlikely_(x) (x) 637 | #endif 638 | 639 | /* 640 | * Endianness macros. 641 | */ 642 | #ifndef RAPIDHASH_LITTLE_ENDIAN 643 | #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) 644 | #define RAPIDHASH_LITTLE_ENDIAN 645 | #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 646 | #define RAPIDHASH_BIG_ENDIAN 647 | #else 648 | #warning "could not determine endianness! Falling back to little endian." 649 | #define RAPIDHASH_LITTLE_ENDIAN 650 | #endif 651 | #endif 652 | 653 | /* 654 | * Default seed. 655 | */ 656 | #define RAPID_SEED (0xbdd89aa982704029ull) 657 | 658 | /* 659 | * Default secret parameters. 660 | */ 661 | RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull}; 662 | 663 | /* 664 | * 64*64 -> 128bit multiply function. 665 | * 666 | * @param A Address of 64-bit number. 667 | * @param B Address of 64-bit number. 668 | * 669 | * Calculates 128-bit C = *A * *B. 670 | * 671 | * When RAPIDHASH_FAST is defined: 672 | * Overwrites A contents with C's low 64 bits. 673 | * Overwrites B contents with C's high 64 bits. 674 | * 675 | * When RAPIDHASH_PROTECTED is defined: 676 | * Xors and overwrites A contents with C's low 64 bits. 677 | * Xors and overwrites B contents with C's high 64 bits. 678 | */ 679 | RAPIDHASH_INLINE void rapid_mum(uint64_t *A, uint64_t *B) RAPIDHASH_NOEXCEPT { 680 | #if defined(__SIZEOF_INT128__) 681 | __uint128_t r=*A; r*=*B; 682 | #ifdef RAPIDHASH_PROTECTED 683 | *A^=(uint64_t)r; *B^=(uint64_t)(r>>64); 684 | #else 685 | *A=(uint64_t)r; *B=(uint64_t)(r>>64); 686 | #endif 687 | #elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64)) 688 | #if defined(_M_X64) 689 | #ifdef RAPIDHASH_PROTECTED 690 | uint64_t a, b; 691 | a=_umul128(*A,*B,&b); 692 | *A^=a; *B^=b; 693 | #else 694 | *A=_umul128(*A,*B,B); 695 | #endif 696 | #else 697 | #ifdef RAPIDHASH_PROTECTED 698 | uint64_t a, b; 699 | b = __umulh(*A, *B); 700 | a = *A * *B; 701 | *A^=a; *B^=b; 702 | #else 703 | uint64_t c = __umulh(*A, *B); 704 | *A = *A * *B; 705 | *B = c; 706 | #endif 707 | #endif 708 | #else 709 | uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo; 710 | uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c; 712 | #ifdef RAPIDHASH_PROTECTED 713 | *A^=lo; *B^=hi; 714 | #else 715 | *A=lo; *B=hi; 716 | #endif 717 | #endif 718 | } 719 | 720 | /* 721 | * Multiply and xor mix function. 722 | * 723 | * @param A 64-bit number. 724 | * @param B 64-bit number. 725 | * 726 | * Calculates 128-bit C = A * B. 727 | * Returns 64-bit xor between high and low 64 bits of C. 728 | */ 729 | RAPIDHASH_INLINE uint64_t rapid_mix(uint64_t A, uint64_t B) RAPIDHASH_NOEXCEPT { rapid_mum(&A,&B); return A^B; } 730 | 731 | /* 732 | * Read functions. 733 | */ 734 | #ifdef RAPIDHASH_LITTLE_ENDIAN 735 | RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return v;} 736 | RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return v;} 737 | #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) 738 | RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return __builtin_bswap64(v);} 739 | RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return __builtin_bswap32(v);} 740 | #elif defined(_MSC_VER) 741 | RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return _byteswap_uint64(v);} 742 | RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return _byteswap_ulong(v);} 743 | #else 744 | RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { 745 | uint64_t v; memcpy(&v, p, 8); 746 | return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000)); 747 | } 748 | RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { 749 | uint32_t v; memcpy(&v, p, 4); 750 | return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000)); 751 | } 752 | #endif 753 | 754 | /* 755 | * Reads and combines 3 bytes of input. 756 | * 757 | * @param p Buffer to read from. 758 | * @param k Length of @p, in bytes. 759 | * 760 | * Always reads and combines 3 bytes from memory. 761 | * Guarantees to read each buffer position at least once. 762 | * 763 | * Returns a 64-bit value containing all three bytes read. 764 | */ 765 | RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT { return (((uint64_t)p[0])<<56)|(((uint64_t)p[k>>1])<<32)|p[k-1];} 766 | 767 | /* 768 | * rapidhash main function. 769 | * 770 | * @param key Buffer to be hashed. 771 | * @param len @key length, in bytes. 772 | * @param seed 64-bit seed used to alter the hash result predictably. 773 | * @param secret Triplet of 64-bit secrets used to alter hash result predictably. 774 | * 775 | * Returns a 64-bit hash. 776 | */ 777 | RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t* secret) RAPIDHASH_NOEXCEPT { 778 | const uint8_t *p=(const uint8_t *)key; seed^=rapid_mix(seed^secret[0],secret[1])^len; uint64_t a, b; 779 | if(_likely_(len<=16)){ 780 | if(_likely_(len>=4)){ 781 | const uint8_t * plast = p + len - 4; 782 | a = (rapid_read32(p) << 32) | rapid_read32(plast); 783 | const uint64_t delta = ((len&24)>>(len>>3)); 784 | b = ((rapid_read32(p + delta) << 32) | rapid_read32(plast - delta)); } 785 | else if(_likely_(len>0)){ a=rapid_readSmall(p,len); b=0;} 786 | else a=b=0; 787 | } 788 | else{ 789 | size_t i=len; 790 | if(_unlikely_(i>48)){ 791 | uint64_t see1=seed, see2=seed; 792 | #ifdef RAPIDHASH_UNROLLED 793 | while(_likely_(i>=96)){ 794 | seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); 795 | see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); 796 | see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); 797 | seed=rapid_mix(rapid_read64(p+48)^secret[0],rapid_read64(p+56)^seed); 798 | see1=rapid_mix(rapid_read64(p+64)^secret[1],rapid_read64(p+72)^see1); 799 | see2=rapid_mix(rapid_read64(p+80)^secret[2],rapid_read64(p+88)^see2); 800 | p+=96; i-=96; 801 | } 802 | if(_unlikely_(i>=48)){ 803 | seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); 804 | see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); 805 | see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); 806 | p+=48; i-=48; 807 | } 808 | #else 809 | do { 810 | seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed); 811 | see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1); 812 | see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2); 813 | p+=48; i-=48; 814 | } while (_likely_(i>=48)); 815 | #endif 816 | seed^=see1^see2; 817 | } 818 | if(i>16){ 819 | seed=rapid_mix(rapid_read64(p)^secret[2],rapid_read64(p+8)^seed^secret[1]); 820 | if(i>32) 821 | seed=rapid_mix(rapid_read64(p+16)^secret[2],rapid_read64(p+24)^seed); 822 | } 823 | a=rapid_read64(p+i-16); b=rapid_read64(p+i-8); 824 | } 825 | a^=secret[1]; b^=seed; rapid_mum(&a,&b); 826 | return rapid_mix(a^secret[0]^len,b^secret[1]); 827 | } 828 | 829 | /* 830 | * rapidhash default seeded hash function. 831 | * 832 | * @param key Buffer to be hashed. 833 | * @param len @key length, in bytes. 834 | * @param seed 64-bit seed used to alter the hash result predictably. 835 | * 836 | * Calls rapidhash_internal using provided parameters and default secrets. 837 | * 838 | * Returns a 64-bit hash. 839 | */ 840 | RAPIDHASH_INLINE uint64_t rapidhash_withSeed(const void *key, size_t len, uint64_t seed) RAPIDHASH_NOEXCEPT { 841 | return rapidhash_internal(key, len, seed, rapid_secret); 842 | } 843 | 844 | /* 845 | * rapidhash default hash function. 846 | * 847 | * @param key Buffer to be hashed. 848 | * @param len @key length, in bytes. 849 | * 850 | * Calls rapidhash_withSeed using provided parameters and the default seed. 851 | * 852 | * Returns a 64-bit hash. 853 | */ 854 | RAPIDHASH_INLINE uint64_t rapidhash(const void *key, size_t len) RAPIDHASH_NOEXCEPT { 855 | return rapidhash_withSeed(key, len, RAPID_SEED); 856 | } 857 | 858 | --------------------------------------------------------------------------------