├── .clang-format ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── _config.yml ├── clib.json ├── cmake └── HashMapConfig.cmake.in ├── examples ├── CMakeLists.txt └── hashmap_example.c ├── include ├── hashmap.h └── hashmap_base.h ├── src └── hashmap.c └── tests ├── CMakeLists.txt └── hashmap_test.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | AlignConsecutiveMacros: AcrossEmptyLinesAndComments 2 | AllowShortBlocksOnASingleLine: false 3 | AllowShortFunctionsOnASingleLine: Empty 4 | AllowShortIfStatementsOnASingleLine: Never 5 | AllowShortLambdasOnASingleLine: Empty 6 | AllowShortLoopsOnASingleLine: false 7 | AlwaysBreakTemplateDeclarations: Yes 8 | BreakBeforeTernaryOperators: false 9 | BreakBeforeBraces: Custom 10 | BraceWrapping: 11 | AfterFunction: true 12 | ColumnLimit: 120 13 | ForEachMacros: 14 | [ 15 | hashmap_foreach, 16 | hashmap_foreach_safe, 17 | hashmap_foreach_key, 18 | hashmap_foreach_key_safe, 19 | hashmap_foreach_data, 20 | hashmap_foreach_data_safe, 21 | ] 22 | IndentWidth: 4 23 | InsertBraces: true 24 | KeepEmptyLinesAtTheStartOfBlocks: false 25 | WhitespaceSensitiveMacros: [hashmap_*] 26 | SpaceBeforeParens: ControlStatementsExceptForEachMacros 27 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | branches: [master] 8 | 9 | env: 10 | BUILD_DIR: ${{ github.workspace }}/build 11 | 12 | jobs: 13 | build: 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | os: [ubuntu-latest] 19 | compiler: [gcc, clang] 20 | build_type: [Debug, Release] 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - name: Create build directory 26 | run: mkdir -p ${{ env.BUILD_DIR }} 27 | 28 | - name: Install Clang 29 | if: matrix.compiler == 'clang' 30 | run: | 31 | sudo apt-get install -y clang 32 | sudo update-alternatives --remove-all cc 33 | sudo update-alternatives --install /usr/bin/cc cc /usr/bin/clang 15 34 | 35 | - name: Configure CMake 36 | run: cmake -B ${{ env.BUILD_DIR }} -S ${{ github.workspace }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DHASHMAP_BUILD_TESTS=ON -DHASHMAP_BUILD_EXAMPLES=ON 37 | 38 | - name: Build 39 | run: cmake --build ${{ env.BUILD_DIR }} --config ${{ matrix.build_type }} 40 | 41 | - name: Install 42 | run: sudo cmake --install ${{ env.BUILD_DIR }} --config ${{ matrix.build_type }} 43 | 44 | - name: Test 45 | run: ctest --output-on-failure --test-dir ${{ env.BUILD_DIR }} 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | /build/ 54 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | project(hashmap VERSION 2.1.0 LANGUAGES C) 3 | 4 | if(NOT DEFINED CMAKE_C_STANDARD) 5 | set(CMAKE_C_STANDARD 23) 6 | set(CMAKE_C_EXTENSIONS OFF) 7 | endif() 8 | 9 | ############################################## 10 | # Build options 11 | 12 | option(HASHMAP_BUILD_TESTS "Build tests" OFF) 13 | option(HASHMAP_BUILD_EXAMPLES "Build examples" OFF) 14 | 15 | ############################################## 16 | # Set default build to release 17 | 18 | if(NOT CMAKE_BUILD_TYPE) 19 | set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose Release or Debug" FORCE) 20 | endif() 21 | 22 | ############################################## 23 | # Create target and set properties 24 | 25 | add_library(hashmap 26 | src/hashmap.c 27 | ) 28 | 29 | # Add an alias so that library can be used inside the build tree, 30 | # e.g. when testing 31 | add_library(HashMap::HashMap ALIAS hashmap) 32 | 33 | # Set target properties 34 | target_include_directories(hashmap 35 | PUBLIC 36 | $ 37 | $ 38 | PRIVATE 39 | ${CMAKE_CURRENT_SOURCE_DIR}/src 40 | ) 41 | target_compile_options(hashmap 42 | PRIVATE -Wall -Werror 43 | ) 44 | 45 | ############################################## 46 | # Installation instructions 47 | 48 | include(GNUInstallDirs) 49 | set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/HashMap) 50 | 51 | install(TARGETS hashmap 52 | EXPORT hashmap-targets 53 | LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} 54 | ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} 55 | ) 56 | 57 | # Ensure the exported target has the name HashMap and not hashmap 58 | # and if this is linked into a shared library, ensure it is PIC 59 | set_target_properties(hashmap 60 | PROPERTIES 61 | EXPORT_NAME HashMap 62 | POSITION_INDEPENDENT_CODE ON 63 | ) 64 | 65 | install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) 66 | 67 | # Export the targets to a script 68 | install(EXPORT hashmap-targets 69 | FILE 70 | HashMapTargets.cmake 71 | NAMESPACE 72 | HashMap:: 73 | DESTINATION 74 | ${INSTALL_CONFIGDIR} 75 | ) 76 | 77 | # Create a ConfigVersion.cmake file 78 | include(CMakePackageConfigHelpers) 79 | write_basic_package_version_file( 80 | ${CMAKE_CURRENT_BINARY_DIR}/HashMapConfigVersion.cmake 81 | VERSION ${PROJECT_VERSION} 82 | COMPATIBILITY AnyNewerVersion 83 | ) 84 | 85 | configure_package_config_file( 86 | ${CMAKE_CURRENT_LIST_DIR}/cmake/HashMapConfig.cmake.in 87 | ${CMAKE_CURRENT_BINARY_DIR}/HashMapConfig.cmake 88 | INSTALL_DESTINATION ${INSTALL_CONFIGDIR} 89 | ) 90 | 91 | # Install the config, configversion and custom find modules 92 | install(FILES 93 | ${CMAKE_CURRENT_BINARY_DIR}/HashMapConfig.cmake 94 | ${CMAKE_CURRENT_BINARY_DIR}/HashMapConfigVersion.cmake 95 | DESTINATION ${INSTALL_CONFIGDIR} 96 | ) 97 | 98 | ############################################## 99 | # Exporting from the build tree 100 | 101 | export(EXPORT hashmap-targets 102 | FILE ${CMAKE_CURRENT_BINARY_DIR}/HashMapTargets.cmake 103 | NAMESPACE HashMap:: 104 | ) 105 | 106 | # Register package in user's package registry 107 | export(PACKAGE HashMap) 108 | 109 | ############################################## 110 | # Build unit test 111 | 112 | if(HASHMAP_BUILD_TESTS) 113 | enable_testing() 114 | add_subdirectory(tests) 115 | endif() 116 | 117 | ############################################## 118 | # Build examples 119 | 120 | if(HASHMAP_BUILD_EXAMPLES) 121 | add_subdirectory(examples) 122 | endif() 123 | 124 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016-2020 David Leeds 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hashmap 2 | 3 | [![ci](https://github.com/DavidLeeds/hashmap/workflows/CI/badge.svg)](https://github.com/DavidLeeds/hashmap/actions/workflows/ci.yml) 4 | 5 | Templated type-safe hashmap implementation in C using open addressing and linear probing for collision resolution. 6 | 7 | ## Summary 8 | 9 | This project came into existence because there are a notable lack of flexible and easy to use data structures available in C. C data structures with efficient, type-safe interfaces are virtually non-existent. Higher level languages have built-in libraries and templated classes, but plenty of embedded projects or higher level libraries are implemented in C. When it is undesireable to depend on a bulky library like Glib or grapple with a restrictive license agreement, this is the library for you. 10 | 11 | ## Goals 12 | 13 | * **To scale gracefully to the full capacity of the numeric primitives in use.** We should be able to load enough entries to consume all memory on the system without hitting any bugs relating to integer overflows. Lookups on a hashtable with a hundreds of millions of entries should be performed in close to constant time, no different than lookups in a hashtable with 20 entries. Automatic rehashing occurs and maintains a load factor of 0.75 or less. 14 | * **To provide a clean and easy-to-use interface.** C data structures often struggle to strike a balance between flexibility and ease of use. To this end, I wrapped a generic C backend implementation with light-weight pre-processor macros to create a templated interface that enables the compiler to type-check all function arguments and return values. All required type information is encoded in the hashmap declaration using the`HASHMAP()` macro. Unlike with header-only macro libraries, there is no code duplication or performance disadvantage over a traditional library with a non-type-safe `void *` interface. 15 | * **To enable easy iteration and safe entry removal during iteration.** Applications often need these features, and the data structure should not hold them back. Easy to use `hashmap_foreach()` macros and a more flexible iterator interface are provided. This hashmap also uses an open addressing scheme, which has superior iteration performance to a similar hashmap implemented using separate chaining (buckets with linked lists). This is because fewer instructions are needed per iteration, and array traversal has superior cache performance than linked list traversal. 16 | * **To use an unrestrictive software license.** I chose the MIT license because it is the most common open source license in use, and it grants full rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell the code. Basically, take this code and do what you want with it. Just be nice and leave the license comment and my name at top of the file. Feel free to add your name as a contributor if you are significantly modifying and redistributing. 17 | 18 | ## API Examples 19 | 20 | ### Declaring a type-specific hashmap 21 | 22 | Use the `HASHMAP(key_type, value_type)` macro to declare a hashmap state struct specific to your needs. Keys and values are always passed in by pointer. Keys are const. 23 | 24 | ```C 25 | /* Map with string key (const char *) and integer value (int *) */ 26 | HASHMAP(char, int) map1; 27 | 28 | /* Map with uint64 key (const uint64_t *) and struct value (struct my_value *) */ 29 | HASHMAP(uint64_t, struct my_value) map2; 30 | ``` 31 | 32 | The structure defined by the `HASHMAP()` macro may be used directly, or named using `typedef`. For example: 33 | 34 | ```C 35 | typedef HASHMAP(char, struct my_value) value_map_t; 36 | ``` 37 | 38 | ### Initialization and cleanup 39 | 40 | Maps must be initialized with a key hash function and a key comparator. 41 | 42 | ```C 43 | /* Initialize the map structure */ 44 | hashmap_init(&map, my_key_hash, my_key_compare); 45 | 46 | /* Use the map... */ 47 | 48 | /* Free resources associated with the map */ 49 | hashmap_cleanup(&map); 50 | ``` 51 | 52 | This library provides some hash functions, so you may not have to write your own: 53 | 54 | * [hashmap_hash_string()](https://github.com/DavidLeeds/hashmap/blob/137d60b3818c22c79d2be5560150eb2eff981a68/include/hashmap_base.h#L54) - Case sensitive string hash 55 | * [hashmap_hash_string_i()](https://github.com/DavidLeeds/hashmap/blob/137d60b3818c22c79d2be5560150eb2eff981a68/include/hashmap_base.h#L55) - Case insensitive string hash 56 | * [hashmap_hash_default()](https://github.com/DavidLeeds/hashmap/blob/137d60b3818c22c79d2be5560150eb2eff981a68/include/hashmap_base.h#L53) - Hash function for arbitrary bytes that can be used by a user-defined hash function 57 | 58 | I recommend using these, unless you have very specific needs. 59 | 60 | ```C 61 | /* Initialize a map with case-sensitive string keys */ 62 | hashmap_init(&map, hashmap_hash_string, strcmp); 63 | ``` 64 | 65 | Note that memory associated with map keys and values is not managed by the map, so you may need to free this before calling `hashmap_cleanup()`. Keys are often stored in the same structure as the value, but it is possible to have the map manage key memory allocation internally, by calling `hashmap_set_key_alloc_funcs()`. 66 | 67 | ### Value insertion 68 | 69 | ```C 70 | struct my_value *val = /* ... */; 71 | 72 | /* Add a my_value (fails and returns -EEXIST if the key already exists) */ 73 | int result1 = hashmap_put(&map, "KeyABC", val); 74 | 75 | /* Add or update a my_value (assigns previous value to old_data if the key already exists) */ 76 | struct my_value *old_val; 77 | int result2 = hashmap_insert(&map, "KeyABC", val, &old_val); 78 | ``` 79 | 80 | ### Value access 81 | 82 | ```C 83 | /* Access the value with a given key */ 84 | struct my_value *val1 = hashmap_get(&map, "KeyABC"); 85 | 86 | /* Access the key or value with an iterator */ 87 | HASHMAP_ITER(map) iter = hashmap_iter_find(&map, "keyABC"); 88 | const char *key = hashmap_iter_get_key(&iter); 89 | struct my_value *val2 = hashmap_iter_get_data(&iter); 90 | 91 | /* Check if an entry with the given key exists */ 92 | bool present = hashmap_contains(&map, "KeyABC"); 93 | ``` 94 | 95 | ### Value removal 96 | 97 | ```C 98 | /* Erase the entry with the given key */ 99 | struct my_value *val = hashmap_remove(&map, "KeyABC"); 100 | 101 | /* Erase the entry with an iterator */ 102 | HASHMAP_ITER(map) iter = hashmap_iter_find(&map, "keyABC"); 103 | hashmap_iter_remove(&iter); 104 | 105 | /* Erase all entries */ 106 | hashmap_clear(&map); 107 | 108 | /* Erase all entries and reset the hash table heap allocation to its initial size */ 109 | hashmap_reset(&map); 110 | ``` 111 | 112 | ### Iteration 113 | 114 | Iteration may be accomplished using the "convenience" `foreach` macros, or by using the iterator interface directly. Generally, the `foreach` macros are the most intuitive and convenient. 115 | 116 | ```C 117 | const char *key; 118 | struct my_value *val; 119 | 120 | /* Iterate over all map entries and access both keys and values */ 121 | hashmap_foreach(key, val, &map) { 122 | /* Access each entry */ 123 | } 124 | 125 | /* Iterate over all map entries and access just keys */ 126 | hashmap_foreach_key(key, &map) { 127 | /* Access each entry */ 128 | } 129 | 130 | /* Iterate over all map entries and access just values */ 131 | hashmap_foreach_data(val, &map) { 132 | /* Access each entry */ 133 | } 134 | ``` 135 | 136 | The above iteration macros are only safe for read-only access. To safely remove the current element during iteration, use the macros with a `_safe` suffix. These require an additional pointer parameter. For example: 137 | 138 | ```C 139 | const char *key; 140 | struct my_value *val; 141 | void *pos; 142 | 143 | /* Okay */ 144 | hashmap_foreach_key_safe(key, &map, pos) { 145 | hashmap_remove(&map, key); 146 | } 147 | ``` 148 | 149 | Iteration using the iterator interface. 150 | 151 | ```C 152 | HASHMAP_ITER(map) it; 153 | 154 | for (it = hashmap_iter(&map); hashmap_iter_valid(&it); hashmap_iter_next(&it)) { 155 | /* 156 | * Access entry using: 157 | * hashmap_iter_get_key() 158 | * hashmap_iter_get_data() 159 | * hashmap_iter_set_data() 160 | */ 161 | } 162 | ``` 163 | 164 | ### Additional examples 165 | 166 | Are located in the [examples](https://github.com/DavidLeeds/hashmap/tree/master/examples) directory in the source tree. 167 | 168 | ## How to Build and Install 169 | 170 | This project uses CMake to orchestrate the build and installallation process. 171 | 172 | ### CMake Options 173 | 174 | * `HASHMAP_BUILD_TESTS` - Set to `ON` to generate unit tests. 175 | * `HASHMAP_BUILD_EXAMPLES` - Set to `ON` to build example code. 176 | 177 | ### How to build from source 178 | 179 | To build and install on your host system, follow these easy steps: 180 | 181 | 1. `git clone https://github.com/DavidLeeds/hashmap.git` - download the source 182 | 2. `mkdir build-hashmap && cd build-hashmap` - create a build directory outside the source tree 183 | 3. `cmake ../hashmap` - run CMake to setup the build 184 | 4. `make` - compile the code 185 | 5. `make test` - run the unit tests (if enabled) 186 | 6. `sudo make install` - _OPTIONAL_ install the library on this system 187 | 188 | ### How to integrate with an existing CMake project 189 | 190 | Clone and build this repository: 191 | 192 | ```cmake 193 | include(FetchContent) 194 | 195 | FetchContent_Declare( 196 | hashmap 197 | GIT_REPOSITORY https://github.com/DavidLeeds/hashmap.git 198 | GIT_SHALLOW ON 199 | ) 200 | FetchContent_MakeAvailable(hashmap) 201 | ``` 202 | 203 | Add `HashMap::HashMap` as a dependnecy, e.g.: 204 | 205 | ```cmake 206 | add_executable(my_app main.c) 207 | target_link_libraries(my_app PRIVATE HashMap::HashMap) 208 | ``` 209 | 210 | ## Contibutions and Questions 211 | 212 | I welcome all questions and contributions. Feel free to e-mail me, or put up a pull request. The core algorithm is stable, but I'm happy to consider CMake improvements, compiler compatibility fixes, or API additions. 213 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-slate -------------------------------------------------------------------------------- /clib.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "templated-hashmap", 3 | "version": "v2.1.0", 4 | "repo": "DavidLeeds/hashmap", 5 | "description": " Templated type-safe hashmap implementation in C using open addressing and linear probing for collision resolution.", 6 | "keywords": ["hashmap", "dictionary", "templated"], 7 | "license": "MIT", 8 | "src": ["src/hashmap.c", "include/hashmap.h", "include/hashmap_base.h"] 9 | } 10 | -------------------------------------------------------------------------------- /cmake/HashMapConfig.cmake.in: -------------------------------------------------------------------------------- 1 | get_filename_component(HashMap_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 2 | include(CMakeFindDependencyMacro) 3 | 4 | if(NOT TARGET HashMap::HashMap) 5 | include("${HashMap_CMAKE_DIR}/HashMapTargets.cmake") 6 | endif() 7 | 8 | set(HashMap_LIBRARIES HashMap::HashMap) 9 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Hashmap example 2 | add_executable(hashmap_example hashmap_example.c) 3 | target_compile_options(hashmap_example PRIVATE -Wall -Werror) 4 | target_link_libraries(hashmap_example PRIVATE HashMap::HashMap) 5 | 6 | -------------------------------------------------------------------------------- /examples/hashmap_example.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020 David Leeds 3 | * 4 | * Hashmap is free software; you can redistribute it and/or modify 5 | * it under the terms of the MIT license. See LICENSE for details. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | 16 | /* Some sample data structure with a string key */ 17 | struct blob { 18 | char key[32]; 19 | size_t data_len; 20 | unsigned char data[1024]; 21 | }; 22 | 23 | /* 24 | * Contrived function to allocate blob structures and populate 25 | * them with randomized data. 26 | * 27 | * Returns NULL when there are no more blobs to load. 28 | */ 29 | struct blob *blob_load(void) 30 | { 31 | static size_t count = 0; 32 | struct blob *b; 33 | 34 | if (count++ > 100) { 35 | return NULL; 36 | } 37 | 38 | if ((b = malloc(sizeof(*b))) == NULL) { 39 | return NULL; 40 | } 41 | snprintf(b->key, sizeof(b->key), "%02x", rand() % 100); 42 | b->data_len = rand() % 10; 43 | memset(b->data, rand(), b->data_len); 44 | 45 | return b; 46 | } 47 | 48 | int main(int argc, char **argv) 49 | { 50 | /* Declare type-specific hashmap structure */ 51 | HASHMAP(char, struct blob) map; 52 | const char *key; 53 | struct blob *b; 54 | void *temp; 55 | int r; 56 | 57 | /* Initialize with default string key hash function and comparator */ 58 | hashmap_init(&map, hashmap_hash_string, strcmp); 59 | 60 | /* Load some sample data into the map and discard duplicates */ 61 | while ((b = blob_load()) != NULL) { 62 | r = hashmap_put(&map, b->key, b); 63 | if (r < 0) { 64 | /* Expect -EEXIST return value for duplicates */ 65 | printf("putting blob[%s] failed: %s\n", b->key, strerror(-r)); 66 | free(b); 67 | } 68 | } 69 | 70 | /* Lookup a blob with key "AbCdEf" */ 71 | b = hashmap_get(&map, "AbCdEf"); 72 | if (b) { 73 | printf("Found blob[%s]\n", b->key); 74 | } 75 | 76 | /* Iterate through all blobs and print each one */ 77 | hashmap_foreach(key, b, &map) { 78 | printf("blob[%s]: data_len %zu bytes\n", key, b->data_len); 79 | } 80 | 81 | /* Remove all blobs with no data (using remove-safe foreach macro) */ 82 | hashmap_foreach_data_safe(b, &map, temp) { 83 | if (b->data_len == 0) { 84 | printf("Discarding blob[%s] with no data\n", b->key); 85 | hashmap_remove(&map, b->key); 86 | free(b); 87 | } 88 | } 89 | 90 | /* Cleanup time: free all the blobs, and destruct the hashmap */ 91 | hashmap_foreach_data(b, &map) { 92 | free(b); 93 | } 94 | hashmap_cleanup(&map); 95 | 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /include/hashmap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020 David Leeds 3 | * 4 | * Hashmap is free software; you can redistribute it and/or modify 5 | * it under the terms of the MIT license. See LICENSE for details. 6 | */ 7 | 8 | #pragma once 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | #include 15 | 16 | #include "hashmap_base.h" 17 | 18 | /* 19 | * INTERNAL USE ONLY: Updates an iterator structure after the current element was removed. 20 | */ 21 | #define __HASHMAP_ITER_RESET(iter) \ 22 | ({ ((iter)->iter_pos = hashmap_base_iter((iter)->iter_map, (iter)->iter_pos)) != NULL; }) 23 | 24 | /* 25 | * INTERNAL USE ONLY: foreach macro internals. 26 | */ 27 | #define __HASHMAP_CONCAT_2(x, y) x##y 28 | #define __HASHMAP_CONCAT(x, y) __HASHMAP_CONCAT_2(x, y) 29 | #define __HASHMAP_MAKE_UNIQUE(prefix) __HASHMAP_CONCAT(__HASHMAP_CONCAT(prefix, __COUNTER__), _) 30 | #define __HASHMAP_UNIQUE(unique, name) __HASHMAP_CONCAT(unique, name) 31 | #define __HASHMAP_FOREACH(x, key, data, h) \ 32 | for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ 33 | ((key) = hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) && \ 34 | ((data) = hashmap_iter_get_data(&__HASHMAP_UNIQUE(x, it))); \ 35 | hashmap_iter_next(&__HASHMAP_UNIQUE(x, it))) 36 | #define __HASHMAP_FOREACH_SAFE(x, key, data, h, pos) \ 37 | for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ 38 | ((pos) = (void *)((key) = hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it)))) && \ 39 | ((data) = hashmap_iter_get_data(&__HASHMAP_UNIQUE(x, it))); \ 40 | ((pos) == (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) ? \ 41 | hashmap_iter_next(&__HASHMAP_UNIQUE(x, it)) : \ 42 | __HASHMAP_ITER_RESET(&__HASHMAP_UNIQUE(x, it))) 43 | #define __HASHMAP_FOREACH_KEY(x, key, h) \ 44 | for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ 45 | (key = hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))); hashmap_iter_next(&__HASHMAP_UNIQUE(x, it))) 46 | #define __HASHMAP_FOREACH_KEY_SAFE(x, key, h, pos) \ 47 | for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ 48 | ((pos) = (void *)((key) = hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it)))); \ 49 | ((pos) == (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) ? \ 50 | hashmap_iter_next(&__HASHMAP_UNIQUE(x, it)) : \ 51 | __HASHMAP_ITER_RESET(&__HASHMAP_UNIQUE(x, it))) 52 | #define __HASHMAP_FOREACH_DATA(x, data, h) \ 53 | for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ 54 | (data = hashmap_iter_get_data(&__HASHMAP_UNIQUE(x, it))); hashmap_iter_next(&__HASHMAP_UNIQUE(x, it))) 55 | #define __HASHMAP_FOREACH_DATA_SAFE(x, data, h, pos) \ 56 | for (HASHMAP_ITER(*(h)) __HASHMAP_UNIQUE(x, it) = hashmap_iter(h); \ 57 | ((pos) = (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) && \ 58 | ((data) = hashmap_iter_get_data(&__HASHMAP_UNIQUE(x, it))); \ 59 | ((pos) == (void *)hashmap_iter_get_key(&__HASHMAP_UNIQUE(x, it))) ? \ 60 | hashmap_iter_next(&__HASHMAP_UNIQUE(x, it)) : \ 61 | __HASHMAP_ITER_RESET(&__HASHMAP_UNIQUE(x, it))) 62 | 63 | /* 64 | * Template macro to define a type-specific hashmap. 65 | * 66 | * Example declarations: 67 | * HASHMAP(int, struct foo) map1; 68 | * // key_type: const int * 69 | * // data_type: struct foo * 70 | * 71 | * HASHMAP(char, char) map2; 72 | * // key_type: const char * 73 | * // data_type: char * 74 | */ 75 | #define HASHMAP(key_type, data_type) \ 76 | struct { \ 77 | struct hashmap_base map_base; \ 78 | struct { \ 79 | const key_type *t_key; \ 80 | data_type *t_data; \ 81 | size_t (*t_hash_func)(const key_type *); \ 82 | int (*t_compare_func)(const key_type *, const key_type *); \ 83 | key_type *(*t_key_dup_func)(const key_type *); \ 84 | void (*t_key_free_func)(key_type *); \ 85 | int (*t_foreach_func)(const key_type *, data_type *, void *); \ 86 | struct { \ 87 | struct hashmap_base *iter_map; \ 88 | struct hashmap_entry *iter_pos; \ 89 | struct { \ 90 | const key_type *t_key; \ 91 | data_type *t_data; \ 92 | } iter_types[0]; \ 93 | } t_iterator; \ 94 | } map_types[0]; \ 95 | } 96 | 97 | /* 98 | * Template macro to define a hashmap iterator. 99 | * 100 | * Example declarations: 101 | * HASHMAP_ITER(my_hashmap) iter; 102 | */ 103 | #define HASHMAP_ITER(hashmap_type) typeof((hashmap_type).map_types->t_iterator) 104 | 105 | /* 106 | * Initialize an empty hashmap. 107 | * 108 | * Parameters: 109 | * HASHMAP(, ) *h - hashmap pointer 110 | * size_t (*hash_func)(const *) - hash function that should return an 111 | * even distribution of numbers between 0 and SIZE_MAX varying on the key provided. 112 | * int (*compare_func)(const *, const *) - key comparison function that 113 | * should return 0 if the keys match, and non-zero otherwise. 114 | * 115 | * This library provides some basic hash functions: 116 | * size_t hashmap_hash_default(const void *data, size_t len) - Jenkins one-at-a-time hash for 117 | * keys of any data type. Create a type-specific wrapper function to pass to hashmap_init(). 118 | * size_t hashmap_hash_string(const char *key) - case sensitive string hash function. 119 | * Pass this directly to hashmap_init(). 120 | * size_t hashmap_hash_string_i(const char *key) - non-case sensitive string hash function. 121 | * Pass this directly to hashmap_init(). 122 | */ 123 | #define hashmap_init(h, hash_func, compare_func) \ 124 | do { \ 125 | typeof((h)->map_types->t_hash_func) __map_hash = (hash_func); \ 126 | typeof((h)->map_types->t_compare_func) __map_compare = (compare_func); \ 127 | hashmap_base_init(&(h)->map_base, (size_t (*)(const void *))__map_hash, \ 128 | (int (*)(const void *, const void *))__map_compare); \ 129 | } while (0) 130 | 131 | /* 132 | * Free the hashmap and all associated memory. 133 | * 134 | * Parameters: 135 | * HASHMAP(, ) *h - hashmap pointer 136 | */ 137 | #define hashmap_cleanup(h) hashmap_base_cleanup(&(h)->map_base) 138 | 139 | /* 140 | * Enable internal memory allocation and management for hash keys. 141 | * 142 | * Parameters: 143 | * HASHMAP(, ) *h - hashmap pointer 144 | * *(*key_dup_func)(const *) - allocate a copy of the key to be 145 | * managed internally by the hashmap. 146 | * void (*key_free_func)( *) - free resources associated with a key 147 | */ 148 | #define hashmap_set_key_alloc_funcs(h, key_dup_func, key_free_func) \ 149 | do { \ 150 | typeof((h)->map_types->t_key_dup_func) __map_key_dup = (key_dup_func); \ 151 | typeof((h)->map_types->t_key_free_func) __map_key_free = (key_free_func); \ 152 | hashmap_base_set_key_alloc_funcs(&(h)->map_base, (void *(*)(const void *))__map_key_dup, \ 153 | (void (*)(void *))__map_key_free); \ 154 | } while (0) 155 | 156 | /* 157 | * Return the number of entries in the hashmap. 158 | * 159 | * Parameters: 160 | * const HASHMAP(, ) *h - hashmap pointer 161 | */ 162 | #define hashmap_size(h) ((typeof((h)->map_base.size))(h)->map_base.size) 163 | 164 | /* 165 | * Return true if the hashmap is empty. 166 | * 167 | * Parameters: 168 | * const HASHMAP(, ) *h - hashmap pointer 169 | */ 170 | #define hashmap_empty(h) (hashmap_size(h) == 0) 171 | 172 | /* 173 | * Set the hashmap's initial allocation size such that no rehashes are 174 | * required to fit the specified number of entries. 175 | * 176 | * Parameters: 177 | * HASHMAP(, ) *h - hashmap pointer 178 | * size_t capacity - number of entries. 179 | * 180 | * Returns 0 on success, or -errno on failure. 181 | */ 182 | #define hashmap_reserve(h, capacity) hashmap_base_reserve(&(h)->map_base, capacity) 183 | 184 | /* 185 | * Get the hashmap's present allocation size. 186 | * 187 | * Parameters: 188 | * HASHMAP(, ) *h - hashmap pointer 189 | * 190 | * Returns 0 on success, or -errno on failure. 191 | */ 192 | #define hashmap_capacity(h) ((typeof((h)->map_base.table_size))(h)->map_base.table_size) 193 | 194 | /* 195 | * Add a new entry to the hashmap. If an entry with a matching key is already 196 | * present, -EEXIST is returned. 197 | * 198 | * Parameters: 199 | * HASHMAP(, ) *h - hashmap pointer 200 | * *key - pointer to the entry's key 201 | * *data - pointer to the entry's data 202 | * 203 | * Returns 0 on success, or -errno on failure. 204 | */ 205 | #define hashmap_put(h, key, data) \ 206 | ({ \ 207 | typeof((h)->map_types->t_key) __map_key = (key); \ 208 | typeof((h)->map_types->t_data) __map_data = (data); \ 209 | hashmap_base_put(&(h)->map_base, (const void *)__map_key, (void *)__map_data); \ 210 | }) 211 | 212 | /* 213 | * Add a new entry to the hashmap, or update an existing entry. If an entry 214 | * with a matching key is already present, its data is updated. If old_data 215 | * is non-null, the previous data pointer is assigned to it. 216 | * 217 | * Parameters: 218 | * HASHMAP(, ) *h - hashmap pointer 219 | * *key - pointer to the entry's key 220 | * *data - pointer to the entry's data 221 | * **old_data - optional pointer to assign the previous data to 222 | * 223 | * Returns 1 on add, 0 on update, or -errno on failure. 224 | */ 225 | #define hashmap_insert(h, key, data, old_data) \ 226 | ({ \ 227 | typeof((h)->map_types->t_key) __map_key = (key); \ 228 | typeof((h)->map_types->t_data) __map_data = (data); \ 229 | typeof((h)->map_types->t_data) *__map_old_data = (old_data); \ 230 | hashmap_base_insert(&(h)->map_base, (const void *)__map_key, (void *)__map_data, (void **)__map_old_data); \ 231 | }) 232 | 233 | /* 234 | * Do a constant-time lookup of a hashmap entry. 235 | * 236 | * Parameters: 237 | * HASHMAP(, ) *h - hashmap pointer 238 | * *key - pointer to the key to lookup 239 | * 240 | * Return the data pointer, or NULL if no entry exists. 241 | */ 242 | #define hashmap_get(h, key) \ 243 | ({ \ 244 | typeof((h)->map_types->t_key) __map_key = (key); \ 245 | (typeof((h)->map_types->t_data))hashmap_base_get(&(h)->map_base, (const void *)__map_key); \ 246 | }) 247 | 248 | /* 249 | * Return true if the hashmap contains an entry with the specified key. 250 | * 251 | * Parameters: 252 | * const HASHMAP(, ) *h - hashmap pointer 253 | */ 254 | #define hashmap_contains(h, key) (hashmap_get(h, key) != NULL) 255 | 256 | /* 257 | * Remove an entry with the specified key from the map. 258 | * 259 | * Parameters: 260 | * HASHMAP(, ) *h - hashmap pointer 261 | * *key - pointer to the key to remove 262 | * 263 | * Returns the data pointer, or NULL, if no entry was found. 264 | * 265 | * Note: it is not safe to call this function while iterating, unless 266 | * the "safe" variant of the foreach macro is used, and only the current 267 | * key is removed. 268 | */ 269 | #define hashmap_remove(h, key) \ 270 | ({ \ 271 | typeof((h)->map_types->t_key) __map_key = (key); \ 272 | (typeof((h)->map_types->t_data))hashmap_base_remove(&(h)->map_base, (const void *)__map_key); \ 273 | }) 274 | 275 | /* 276 | * Remove all entries. 277 | * 278 | * Parameters: 279 | * HASHMAP(, ) *h - hashmap pointer 280 | */ 281 | #define hashmap_clear(h) hashmap_base_clear(&(h)->map_base) 282 | 283 | /* 284 | * Remove all entries and reset the hash table to its initial size. 285 | * 286 | * Parameters: 287 | * HASHMAP(, ) *h - hashmap pointer 288 | */ 289 | #define hashmap_reset(h) hashmap_base_reset(&(h)->map_base) 290 | 291 | /* 292 | * Return an iterator for this hashmap. The iterator is a type-specific 293 | * structure that may be declared using the HASHMAP_ITER() macro. 294 | * 295 | * Parameters: 296 | * HASHMAP(, ) *h - hashmap pointer 297 | */ 298 | #define hashmap_iter(h) ((HASHMAP_ITER(*(h))){&(h)->map_base, hashmap_base_iter(&(h)->map_base, NULL)}) 299 | 300 | /* 301 | * Return true if an iterator is valid and safe to use. 302 | * 303 | * Parameters: 304 | * HASHMAP_ITER() *iter - iterator pointer 305 | */ 306 | #define hashmap_iter_valid(iter) hashmap_base_iter_valid((iter)->iter_map, (iter)->iter_pos) 307 | 308 | /* 309 | * Advance an iterator to the next hashmap entry. 310 | * 311 | * Parameters: 312 | * HASHMAP_ITER() *iter - iterator pointer 313 | * 314 | * Returns true if the iterator is valid after the operation. 315 | */ 316 | #define hashmap_iter_next(iter) hashmap_base_iter_next((iter)->iter_map, &(iter)->iter_pos) 317 | 318 | /* 319 | * This function behaves like hashmap_get(), but returns an iterator. 320 | * This provides an efficient way to access and remove an entry without 321 | * performing two lookups. 322 | * 323 | * Parameters: 324 | * HASHMAP(, ) *h - hashmap pointer 325 | * *key - pointer to the key to lookup 326 | * 327 | * Returns a valid iterator if the key exists, otherwise an invalid iterator. 328 | */ 329 | #define hashmap_iter_find(h, key) ((HASHMAP_ITER(*(h))){&(h)->map_base, hashmap_base_iter_find(&(h)->map_base, key)}) 330 | 331 | /* 332 | * Remove the hashmap entry pointed to by this iterator and advance the 333 | * iterator to the next entry. 334 | * 335 | * Parameters: 336 | * HASHMAP_ITER() *iter - iterator pointer 337 | * 338 | * Returns true if the iterator is valid after the operation. 339 | */ 340 | #define hashmap_iter_remove(iter) hashmap_base_iter_remove((iter)->iter_map, &(iter)->iter_pos) 341 | 342 | /* 343 | * Return the key of the entry pointed to by the iterator. 344 | * 345 | * Parameters: 346 | * HASHMAP_ITER() *iter - iterator pointer 347 | */ 348 | #define hashmap_iter_get_key(iter) ((typeof((iter)->iter_types->t_key))hashmap_base_iter_get_key((iter)->iter_pos)) 349 | 350 | /* 351 | * Return the data of the entry pointed to by the iterator. 352 | * 353 | * Parameters: 354 | * HASHMAP_ITER() *iter - iterator pointer 355 | */ 356 | #define hashmap_iter_get_data(iter) ((typeof((iter)->iter_types->t_data))hashmap_base_iter_get_data((iter)->iter_pos)) 357 | 358 | /* 359 | * Set the data pointer of the entry pointed to by the iterator. 360 | * 361 | * Parameters: 362 | * HASHMAP_ITER() *iter - iterator pointer 363 | * *data - new data pointer 364 | */ 365 | #define hashmap_iter_set_data(iter, data) \ 366 | ({ \ 367 | (typeof((iter)->iter_types->t_data))__map_data = (data); \ 368 | hashmap_base_iter_set_data((iter)->iter_pos), (void *)__map_data); \ 369 | }) 370 | 371 | /* 372 | * Convenience macro to iterate through the contents of a hashmap. 373 | * key and data are assigned pointers to the current hashmap entry. 374 | * It is NOT safe to modify the hashmap while iterating. 375 | * 376 | * Parameters: 377 | * const *key - key pointer assigned on each iteration 378 | * *data - data pointer assigned on each iteration 379 | * HASHMAP(, ) *h - hashmap pointer 380 | */ 381 | #define hashmap_foreach(key, data, h) __HASHMAP_FOREACH(__HASHMAP_MAKE_UNIQUE(__map), (key), (data), (h)) 382 | 383 | /* 384 | * Convenience macro to iterate through the contents of a hashmap. 385 | * key and data are assigned pointers to the current hashmap entry. 386 | * Unlike hashmap_foreach(), it is safe to call hashmap_remove() on the 387 | * current entry. 388 | * 389 | * Parameters: 390 | * const *key - key pointer assigned on each iteration 391 | * *data - data pointer assigned on each iteration 392 | * HASHMAP(, ) *h - hashmap pointer 393 | * void *pos - opaque pointer assigned on each iteration 394 | */ 395 | #define hashmap_foreach_safe(key, data, h, pos) \ 396 | __HASHMAP_FOREACH_SAFE(__HASHMAP_MAKE_UNIQUE(__map), (key), (data), (h), (pos)) 397 | 398 | /* 399 | * Convenience macro to iterate through the keys of a hashmap. 400 | * key is assigned a pointer to the current hashmap entry. 401 | * It is NOT safe to modify the hashmap while iterating. 402 | * 403 | * Parameters: 404 | * const *key - key pointer assigned on each iteration 405 | * HASHMAP(, ) *h - hashmap pointer 406 | */ 407 | #define hashmap_foreach_key(key, h) __HASHMAP_FOREACH_KEY(__HASHMAP_MAKE_UNIQUE(__map), (key), (h)) 408 | 409 | /* 410 | * Convenience macro to iterate through the keys of a hashmap. 411 | * key is assigned a pointer to the current hashmap entry. 412 | * Unlike hashmap_foreach_key(), it is safe to call hashmap_remove() on the 413 | * current entry. 414 | * 415 | * Parameters: 416 | * const *key - key pointer assigned on each iteration 417 | * HASHMAP(, ) *h - hashmap pointer 418 | * void *pos - opaque pointer assigned on each iteration 419 | */ 420 | #define hashmap_foreach_key_safe(key, h, pos) \ 421 | __HASHMAP_FOREACH_KEY_SAFE(__HASHMAP_MAKE_UNIQUE(__map), (key), (h), (pos)) 422 | 423 | /* 424 | * Convenience macro to iterate through the data of a hashmap. 425 | * data is assigned a pointer to the current hashmap entry. 426 | * It is NOT safe to modify the hashmap while iterating. 427 | * 428 | * Parameters: 429 | * *data - data pointer assigned on each iteration 430 | * HASHMAP(, ) *h - hashmap pointer 431 | */ 432 | #define hashmap_foreach_data(data, h) __HASHMAP_FOREACH_DATA(__HASHMAP_MAKE_UNIQUE(__map), (data), (h)) 433 | 434 | /* 435 | * Convenience macro to iterate through the data of a hashmap. 436 | * data is assigned a pointer to the current hashmap entry. 437 | * Unlike hashmap_foreach_data(), it is safe to call hashmap_remove() on the 438 | * current entry. 439 | * 440 | * Parameters: 441 | * *data - data pointer assigned on each iteration 442 | * HASHMAP(, ) *h - hashmap pointer 443 | * void *pos - opaque pointer assigned on each iteration 444 | */ 445 | #define hashmap_foreach_data_safe(data, h, pos) \ 446 | __HASHMAP_FOREACH_DATA_SAFE(__HASHMAP_MAKE_UNIQUE(__map), (data), (h), (pos)) 447 | 448 | /* 449 | * Return the load factor. 450 | * 451 | * Parameters: 452 | * HASHMAP(, ) *h - hashmap pointer 453 | */ 454 | #define hashmap_load_factor(h) hashmap_base_load_factor(&(h)->map_base) 455 | 456 | /* 457 | * Return the number of collisions for this key. 458 | * This would always be 0 if a perfect hash function was used, but in ordinary 459 | * usage, there may be a few collisions, depending on the hash function and 460 | * load factor. 461 | * 462 | * Parameters: 463 | * HASHMAP(, ) *h - hashmap pointer 464 | * *key - pointer to the entry's key 465 | */ 466 | #define hashmap_collisions(h, key) \ 467 | ({ \ 468 | typeof((h)->map_types->t_key) __map_key = (key); \ 469 | hashmap_base_collisions(&(h)->map_base, (const void *)__map_key); \ 470 | }) 471 | 472 | /* 473 | * Return the average number of collisions per entry. 474 | * 475 | * Parameters: 476 | * HASHMAP(, ) *h - hashmap pointer 477 | */ 478 | #define hashmap_collisions_mean(h) hashmap_base_collisions_mean(&(h)->map_base) 479 | 480 | /* 481 | * Return the variance between entry collisions. The higher the variance, 482 | * the more likely the hash function is poor and is resulting in clustering. 483 | * 484 | * Parameters: 485 | * HASHMAP(, ) *h - hashmap pointer 486 | */ 487 | #define hashmap_collisions_variance(h) hashmap_base_collisions_variance(&(h)->map_base) 488 | 489 | #ifdef __cplusplus 490 | } 491 | #endif 492 | -------------------------------------------------------------------------------- /include/hashmap_base.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020 David Leeds 3 | * 4 | * Hashmap is free software; you can redistribute it and/or modify 5 | * it under the terms of the MIT license. See LICENSE for details. 6 | */ 7 | 8 | #pragma once 9 | 10 | #include 11 | #include 12 | 13 | struct hashmap_entry; 14 | 15 | struct hashmap_base { 16 | size_t table_size_init; 17 | size_t table_size; 18 | size_t size; 19 | struct hashmap_entry *table; 20 | size_t (*hash)(const void *); 21 | int (*compare)(const void *, const void *); 22 | void *(*key_dup)(const void *); 23 | void (*key_free)(void *); 24 | }; 25 | 26 | void hashmap_base_init(struct hashmap_base *hb, size_t (*hash_func)(const void *), 27 | int (*compare_func)(const void *, const void *)); 28 | void hashmap_base_cleanup(struct hashmap_base *hb); 29 | 30 | void hashmap_base_set_key_alloc_funcs(struct hashmap_base *hb, void *(*key_dup_func)(const void *), 31 | void (*key_free_func)(void *)); 32 | 33 | int hashmap_base_reserve(struct hashmap_base *hb, size_t capacity); 34 | 35 | int hashmap_base_put(struct hashmap_base *hb, const void *key, void *data); 36 | int hashmap_base_insert(struct hashmap_base *hb, const void *key, void *data, void **old_data); 37 | void *hashmap_base_get(const struct hashmap_base *hb, const void *key); 38 | void *hashmap_base_remove(struct hashmap_base *hb, const void *key); 39 | 40 | void hashmap_base_clear(struct hashmap_base *hb); 41 | void hashmap_base_reset(struct hashmap_base *hb); 42 | 43 | struct hashmap_entry *hashmap_base_iter(const struct hashmap_base *hb, const struct hashmap_entry *pos); 44 | bool hashmap_base_iter_valid(const struct hashmap_base *hb, const struct hashmap_entry *iter); 45 | bool hashmap_base_iter_next(const struct hashmap_base *hb, struct hashmap_entry **iter); 46 | struct hashmap_entry *hashmap_base_iter_find(const struct hashmap_base *hb, const void *key); 47 | bool hashmap_base_iter_remove(struct hashmap_base *hb, struct hashmap_entry **iter); 48 | const void *hashmap_base_iter_get_key(const struct hashmap_entry *iter); 49 | void *hashmap_base_iter_get_data(const struct hashmap_entry *iter); 50 | int hashmap_base_iter_set_data(struct hashmap_entry *iter, void *data); 51 | 52 | double hashmap_base_load_factor(const struct hashmap_base *hb); 53 | size_t hashmap_base_collisions(const struct hashmap_base *hb, const void *key); 54 | double hashmap_base_collisions_mean(const struct hashmap_base *hb); 55 | double hashmap_base_collisions_variance(const struct hashmap_base *hb); 56 | 57 | size_t hashmap_hash_default(const void *data, size_t len); 58 | size_t hashmap_hash_string(const char *key); 59 | size_t hashmap_hash_string_i(const char *key); 60 | -------------------------------------------------------------------------------- /src/hashmap.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020 David Leeds 3 | * 4 | * Hashmap is free software; you can redistribute it and/or modify 5 | * it under the terms of the MIT license. See LICENSE for details. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "hashmap_base.h" 17 | 18 | /* Table sizes must be powers of 2 */ 19 | #define HASHMAP_SIZE_MIN 32 20 | #define HASHMAP_SIZE_DEFAULT 128 21 | #define HASHMAP_SIZE_MOD(map, val) ((val) & ((map)->table_size - 1)) 22 | 23 | /* Return the next linear probe index */ 24 | #define HASHMAP_PROBE_NEXT(map, index) HASHMAP_SIZE_MOD(map, (index) + 1) 25 | 26 | struct hashmap_entry { 27 | void *key; 28 | void *data; 29 | }; 30 | 31 | /* 32 | * Calculate the optimal table size, given the specified max number 33 | * of elements. 34 | */ 35 | static inline size_t hashmap_calc_table_size(const struct hashmap_base *hb, size_t size) 36 | { 37 | size_t table_size; 38 | 39 | /* Enforce a maximum 0.75 load factor */ 40 | table_size = size + (size / 3); 41 | 42 | /* Ensure capacity is not lower than the hashmap initial size */ 43 | if (table_size < hb->table_size_init) { 44 | table_size = hb->table_size_init; 45 | } else { 46 | /* Round table size up to nearest power of 2 */ 47 | table_size = 1 << ((sizeof(unsigned long) << 3) - __builtin_clzl(table_size - 1)); 48 | } 49 | 50 | return table_size; 51 | } 52 | 53 | /* 54 | * Get a valid hash table index from a key. 55 | */ 56 | static inline size_t hashmap_calc_index(const struct hashmap_base *hb, const void *key) 57 | { 58 | size_t index = hb->hash(key); 59 | 60 | /* 61 | * Run a secondary hash on the index. This is a small performance hit, but 62 | * reduces clustering and provides more consistent performance if a poor 63 | * hash function is used. 64 | */ 65 | index = hashmap_hash_default(&index, sizeof(index)); 66 | 67 | return HASHMAP_SIZE_MOD(hb, index); 68 | } 69 | 70 | /* 71 | * Return the next populated entry, starting with the specified one. 72 | * Returns NULL if there are no more valid entries. 73 | */ 74 | static struct hashmap_entry *hashmap_entry_get_populated(const struct hashmap_base *hb, 75 | const struct hashmap_entry *entry) 76 | { 77 | if (hb->size > 0 && entry >= hb->table) { 78 | for (; entry < &hb->table[hb->table_size]; ++entry) { 79 | if (entry->key) { 80 | return (struct hashmap_entry *)entry; 81 | } 82 | } 83 | } 84 | return NULL; 85 | } 86 | 87 | /* 88 | * Find the hashmap entry with the specified key, or an empty slot. 89 | * Returns NULL if the entire table has been searched without finding a match. 90 | */ 91 | static struct hashmap_entry *hashmap_entry_find(const struct hashmap_base *hb, const void *key, bool find_empty) 92 | { 93 | size_t i; 94 | size_t index; 95 | struct hashmap_entry *entry; 96 | 97 | index = hashmap_calc_index(hb, key); 98 | 99 | /* Linear probing */ 100 | for (i = 0; i < hb->table_size; ++i) { 101 | entry = &hb->table[index]; 102 | if (!entry->key) { 103 | if (find_empty) { 104 | return entry; 105 | } 106 | return NULL; 107 | } 108 | if (hb->compare(key, entry->key) == 0) { 109 | return entry; 110 | } 111 | index = HASHMAP_PROBE_NEXT(hb, index); 112 | } 113 | return NULL; 114 | } 115 | 116 | /* 117 | * Removes the specified entry and processes the following entries to 118 | * keep the chain contiguous. This is a required step for hashmaps 119 | * using linear probing. 120 | */ 121 | static void hashmap_entry_remove(struct hashmap_base *hb, struct hashmap_entry *removed_entry) 122 | { 123 | size_t i; 124 | size_t index; 125 | size_t entry_index; 126 | size_t removed_index = (removed_entry - hb->table); 127 | struct hashmap_entry *entry; 128 | 129 | /* Free the key */ 130 | if (hb->key_free) { 131 | hb->key_free(removed_entry->key); 132 | } 133 | 134 | --hb->size; 135 | 136 | /* Fill the free slot in the chain */ 137 | index = HASHMAP_PROBE_NEXT(hb, removed_index); 138 | for (i = 0; i < hb->size; ++i) { 139 | entry = &hb->table[index]; 140 | if (!entry->key) { 141 | /* Reached end of chain */ 142 | break; 143 | } 144 | entry_index = hashmap_calc_index(hb, entry->key); 145 | /* Shift in entries in the chain with an index at or before the removed slot */ 146 | if (HASHMAP_SIZE_MOD(hb, index - entry_index) > HASHMAP_SIZE_MOD(hb, removed_index - entry_index)) { 147 | *removed_entry = *entry; 148 | removed_index = index; 149 | removed_entry = entry; 150 | } 151 | index = HASHMAP_PROBE_NEXT(hb, index); 152 | } 153 | /* Clear the last removed entry */ 154 | memset(removed_entry, 0, sizeof(*removed_entry)); 155 | } 156 | 157 | /* 158 | * Reallocates the hash table to the new size and rehashes all entries. 159 | * new_size MUST be a power of 2. 160 | * Returns 0 on success and -errno on allocation or hash function failure. 161 | */ 162 | static int hashmap_rehash(struct hashmap_base *hb, size_t table_size) 163 | { 164 | size_t old_size; 165 | struct hashmap_entry *old_table; 166 | struct hashmap_entry *new_table; 167 | struct hashmap_entry *entry; 168 | struct hashmap_entry *new_entry; 169 | 170 | assert((table_size & (table_size - 1)) == 0); 171 | assert(table_size >= hb->size); 172 | 173 | new_table = (struct hashmap_entry *)calloc(table_size, sizeof(struct hashmap_entry)); 174 | if (!new_table) { 175 | return -ENOMEM; 176 | } 177 | old_size = hb->table_size; 178 | old_table = hb->table; 179 | hb->table_size = table_size; 180 | hb->table = new_table; 181 | 182 | /* Rehash */ 183 | for (entry = old_table; entry < old_table + old_size; ++entry) { 184 | if (!entry->key) { 185 | continue; 186 | } 187 | new_entry = hashmap_entry_find(hb, entry->key, true); 188 | /* Failure indicates an algorithm bug */ 189 | assert(new_entry != NULL); 190 | 191 | /* Shallow copy */ 192 | *new_entry = *entry; 193 | } 194 | free(old_table); 195 | return 0; 196 | } 197 | 198 | /* 199 | * Iterate through all entries and free all keys. 200 | */ 201 | static void hashmap_free_keys(struct hashmap_base *hb) 202 | { 203 | struct hashmap_entry *entry; 204 | 205 | if (!hb->key_free || hb->size == 0) { 206 | return; 207 | } 208 | for (entry = hb->table; entry < &hb->table[hb->table_size]; ++entry) { 209 | if (entry->key) { 210 | hb->key_free(entry->key); 211 | } 212 | } 213 | } 214 | 215 | /* 216 | * Initialize an empty hashmap. 217 | * 218 | * hash_func should return an even distribution of numbers between 0 219 | * and SIZE_MAX varying on the key provided. 220 | * 221 | * compare_func should return 0 if the keys match, and non-zero otherwise. 222 | */ 223 | void hashmap_base_init(struct hashmap_base *hb, size_t (*hash_func)(const void *), 224 | int (*compare_func)(const void *, const void *)) 225 | { 226 | assert(hash_func != NULL); 227 | assert(compare_func != NULL); 228 | 229 | memset(hb, 0, sizeof(*hb)); 230 | 231 | hb->table_size_init = HASHMAP_SIZE_DEFAULT; 232 | hb->hash = hash_func; 233 | hb->compare = compare_func; 234 | } 235 | 236 | /* 237 | * Free the hashmap and all associated memory. 238 | */ 239 | void hashmap_base_cleanup(struct hashmap_base *hb) 240 | { 241 | if (!hb) { 242 | return; 243 | } 244 | hashmap_free_keys(hb); 245 | free(hb->table); 246 | memset(hb, 0, sizeof(*hb)); 247 | } 248 | 249 | /* 250 | * Enable internal memory management of hash keys. 251 | */ 252 | void hashmap_base_set_key_alloc_funcs(struct hashmap_base *hb, void *(*key_dup_func)(const void *), 253 | void (*key_free_func)(void *)) 254 | { 255 | assert(hb->size == 0); 256 | 257 | hb->key_dup = key_dup_func; 258 | hb->key_free = key_free_func; 259 | } 260 | 261 | /* 262 | * Set the hashmap's initial allocation size such that no rehashes are 263 | * required to fit the specified number of entries. 264 | * Returns 0 on success, or -errno on failure. 265 | */ 266 | int hashmap_base_reserve(struct hashmap_base *hb, size_t capacity) 267 | { 268 | size_t old_size_init; 269 | int r = 0; 270 | 271 | /* Backup original init size in case of failure */ 272 | old_size_init = hb->table_size_init; 273 | 274 | /* Set the minimal table init size to support the specified capacity */ 275 | hb->table_size_init = HASHMAP_SIZE_MIN; 276 | hb->table_size_init = hashmap_calc_table_size(hb, capacity); 277 | 278 | if (hb->table_size_init > hb->table_size) { 279 | r = hashmap_rehash(hb, hb->table_size_init); 280 | if (r < 0) { 281 | hb->table_size_init = old_size_init; 282 | } 283 | } 284 | return r; 285 | } 286 | 287 | /* 288 | * Add a new entry to the hashmap. If an entry with a matching key 289 | * is already present, -EEXIST is returned. 290 | * Returns 0 on success, or -errno on failure. 291 | */ 292 | int hashmap_base_put(struct hashmap_base *hb, const void *key, void *data) 293 | { 294 | struct hashmap_entry *entry; 295 | size_t table_size; 296 | int r = 0; 297 | 298 | if (!key || !data) { 299 | return -EINVAL; 300 | } 301 | 302 | /* Preemptively rehash with 2x capacity if load factor is approaching 0.75 */ 303 | table_size = hashmap_calc_table_size(hb, hb->size); 304 | if (table_size > hb->table_size) { 305 | r = hashmap_rehash(hb, table_size); 306 | } 307 | 308 | /* Get the entry for this key */ 309 | entry = hashmap_entry_find(hb, key, true); 310 | if (!entry) { 311 | /* 312 | * Cannot find an empty slot. Either out of memory, 313 | * or hash or compare functions are malfunctioning. 314 | */ 315 | if (r < 0) { 316 | /* Return rehash error, if set */ 317 | return r; 318 | } 319 | return -EADDRNOTAVAIL; 320 | } 321 | 322 | if (entry->key) { 323 | /* Do not overwrite existing data */ 324 | return -EEXIST; 325 | } 326 | 327 | if (hb->key_dup) { 328 | /* Allocate copy of key to simplify memory management */ 329 | entry->key = hb->key_dup(key); 330 | if (!entry->key) { 331 | return -ENOMEM; 332 | } 333 | } else { 334 | entry->key = (void *)key; 335 | } 336 | entry->data = data; 337 | ++hb->size; 338 | return 0; 339 | } 340 | 341 | /* 342 | * Add a new entry to the hashmap, or update an existing entry. If an entry 343 | * with a matching key is already present, its data is updated. If old_data 344 | * is non-null, the previous data pointer is assigned to it. 345 | * Returns 1 on add, 0 on update, or -errno on failure. 346 | */ 347 | int hashmap_base_insert(struct hashmap_base *hb, const void *key, void *data, void **old_data) 348 | { 349 | struct hashmap_entry *entry; 350 | size_t table_size; 351 | int r = 0; 352 | 353 | if (!key || !data) { 354 | return -EINVAL; 355 | } 356 | 357 | /* Preemptively rehash with 2x capacity if load factor is approaching 0.75 */ 358 | table_size = hashmap_calc_table_size(hb, hb->size); 359 | if (table_size > hb->table_size) { 360 | r = hashmap_rehash(hb, table_size); 361 | } 362 | 363 | /* Get the entry for this key */ 364 | entry = hashmap_entry_find(hb, key, true); 365 | if (!entry) { 366 | /* 367 | * Cannot find an empty slot. Either out of memory, 368 | * or hash or compare functions are malfunctioning. 369 | */ 370 | if (r < 0) { 371 | /* Return rehash error, if set */ 372 | return r; 373 | } 374 | return -EADDRNOTAVAIL; 375 | } 376 | 377 | if (!entry->key) { 378 | /* Adding a new entry */ 379 | if (hb->key_dup) { 380 | /* Allocate copy of key to simplify memory management */ 381 | entry->key = hb->key_dup(key); 382 | if (!entry->key) { 383 | return -ENOMEM; 384 | } 385 | } else { 386 | entry->key = (void *)key; 387 | } 388 | ++hb->size; 389 | r = 1; 390 | } 391 | 392 | /* Assign the previous data pointer if data was updated, otherwise NULL */ 393 | if (old_data) { 394 | if (data == entry->data) { 395 | *old_data = NULL; 396 | } else { 397 | *old_data = entry->data; 398 | } 399 | } 400 | 401 | entry->data = data; 402 | return r; 403 | } 404 | 405 | /* 406 | * Return the data pointer, or NULL if no entry exists. 407 | */ 408 | void *hashmap_base_get(const struct hashmap_base *hb, const void *key) 409 | { 410 | struct hashmap_entry *entry; 411 | 412 | if (!key) { 413 | return NULL; 414 | } 415 | 416 | entry = hashmap_entry_find(hb, key, false); 417 | if (!entry) { 418 | return NULL; 419 | } 420 | return entry->data; 421 | } 422 | 423 | /* 424 | * Remove an entry with the specified key from the map. 425 | * Returns the data pointer, or NULL, if no entry was found. 426 | */ 427 | void *hashmap_base_remove(struct hashmap_base *hb, const void *key) 428 | { 429 | struct hashmap_entry *entry; 430 | void *data; 431 | 432 | if (!key) { 433 | return NULL; 434 | } 435 | 436 | entry = hashmap_entry_find(hb, key, false); 437 | if (!entry) { 438 | return NULL; 439 | } 440 | data = entry->data; 441 | /* Clear the entry and make the chain contiguous */ 442 | hashmap_entry_remove(hb, entry); 443 | return data; 444 | } 445 | 446 | /* 447 | * Remove all entries. 448 | */ 449 | void hashmap_base_clear(struct hashmap_base *hb) 450 | { 451 | hashmap_free_keys(hb); 452 | hb->size = 0; 453 | memset(hb->table, 0, sizeof(struct hashmap_entry) * hb->table_size); 454 | } 455 | 456 | /* 457 | * Remove all entries and reset the hash table to its initial size. 458 | */ 459 | void hashmap_base_reset(struct hashmap_base *hb) 460 | { 461 | struct hashmap_entry *new_table; 462 | 463 | hashmap_free_keys(hb); 464 | hb->size = 0; 465 | if (hb->table_size != hb->table_size_init) { 466 | new_table = (struct hashmap_entry *)realloc(hb->table, sizeof(struct hashmap_entry) * hb->table_size_init); 467 | if (new_table) { 468 | hb->table = new_table; 469 | hb->table_size = hb->table_size_init; 470 | } 471 | } 472 | memset(hb->table, 0, sizeof(struct hashmap_entry) * hb->table_size); 473 | } 474 | 475 | /* 476 | * Get a new hashmap iterator. The iterator is an opaque 477 | * pointer that may be used with hashmap_iter_*() functions. 478 | * Hashmap iterators are INVALID after a put or remove operation is performed. 479 | * hashmap_iter_remove() allows safe removal during iteration. 480 | */ 481 | struct hashmap_entry *hashmap_base_iter(const struct hashmap_base *hb, const struct hashmap_entry *pos) 482 | { 483 | if (!pos) { 484 | pos = hb->table; 485 | } 486 | return hashmap_entry_get_populated(hb, pos); 487 | } 488 | 489 | /* 490 | * Return true if an iterator is valid and safe to use. 491 | */ 492 | bool hashmap_base_iter_valid(const struct hashmap_base *hb, const struct hashmap_entry *iter) 493 | { 494 | return hb && iter && iter->key && iter >= hb->table && iter < hb->table + hb->table_size; 495 | } 496 | 497 | /* 498 | * Advance an iterator to the next hashmap entry. 499 | * Returns false if there are no more entries. 500 | */ 501 | bool hashmap_base_iter_next(const struct hashmap_base *hb, struct hashmap_entry **iter) 502 | { 503 | if (!*iter) { 504 | return false; 505 | } 506 | *iter = hashmap_entry_get_populated(hb, *iter + 1); 507 | return *iter != NULL; 508 | } 509 | 510 | /* 511 | * Returns an iterator to the hashmap entry with the specified key. 512 | * Returns NULL if there is no matching entry. 513 | */ 514 | struct hashmap_entry *hashmap_base_iter_find(const struct hashmap_base *hb, const void *key) 515 | { 516 | if (!key) { 517 | return NULL; 518 | } 519 | return hashmap_entry_find(hb, key, false); 520 | } 521 | 522 | /* 523 | * Remove the hashmap entry pointed to by this iterator and advance the 524 | * iterator to the next entry. 525 | * Returns true if the iterator is valid after the operation. 526 | */ 527 | bool hashmap_base_iter_remove(struct hashmap_base *hb, struct hashmap_entry **iter) 528 | { 529 | if (!*iter) { 530 | return false; 531 | } 532 | if ((*iter)->key) { 533 | /* Remove entry if iterator is valid */ 534 | hashmap_entry_remove(hb, *iter); 535 | } 536 | *iter = hashmap_entry_get_populated(hb, *iter); 537 | return *iter != NULL; 538 | } 539 | 540 | /* 541 | * Return the key of the entry pointed to by the iterator. 542 | */ 543 | const void *hashmap_base_iter_get_key(const struct hashmap_entry *iter) 544 | { 545 | if (!iter) { 546 | return NULL; 547 | } 548 | return (const void *)iter->key; 549 | } 550 | 551 | /* 552 | * Return the data of the entry pointed to by the iterator. 553 | */ 554 | void *hashmap_base_iter_get_data(const struct hashmap_entry *iter) 555 | { 556 | if (!iter) { 557 | return NULL; 558 | } 559 | return iter->data; 560 | } 561 | 562 | /* 563 | * Set the data pointer of the entry pointed to by the iterator. 564 | */ 565 | int hashmap_base_iter_set_data(struct hashmap_entry *iter, void *data) 566 | { 567 | if (!iter) { 568 | return -EFAULT; 569 | } 570 | if (!data) { 571 | return -EINVAL; 572 | } 573 | iter->data = data; 574 | return 0; 575 | } 576 | 577 | /* 578 | * Return the load factor. 579 | */ 580 | double hashmap_base_load_factor(const struct hashmap_base *hb) 581 | { 582 | if (!hb->table_size) { 583 | return 0; 584 | } 585 | return (double)hb->size / hb->table_size; 586 | } 587 | 588 | /* 589 | * Return the number of collisions for this key. 590 | * This would always be 0 if a perfect hash function was used, but in ordinary 591 | * usage, there may be a few collisions, depending on the hash function and 592 | * load factor. 593 | */ 594 | size_t hashmap_base_collisions(const struct hashmap_base *hb, const void *key) 595 | { 596 | size_t i; 597 | size_t index; 598 | struct hashmap_entry *entry; 599 | 600 | if (!key) { 601 | return 0; 602 | } 603 | 604 | index = hashmap_calc_index(hb, key); 605 | 606 | /* Linear probing */ 607 | for (i = 0; i < hb->table_size; ++i) { 608 | entry = &hb->table[index]; 609 | if (!entry->key) { 610 | /* Key does not exist */ 611 | return 0; 612 | } 613 | if (hb->compare(key, entry->key) == 0) { 614 | break; 615 | } 616 | index = HASHMAP_PROBE_NEXT(hb, index); 617 | } 618 | 619 | return i; 620 | } 621 | 622 | /* 623 | * Return the average number of collisions per entry. 624 | */ 625 | double hashmap_base_collisions_mean(const struct hashmap_base *hb) 626 | { 627 | struct hashmap_entry *entry; 628 | size_t total_collisions = 0; 629 | 630 | if (!hb->size) { 631 | return 0; 632 | } 633 | for (entry = hb->table; entry < &hb->table[hb->table_size]; ++entry) { 634 | if (!entry->key) { 635 | continue; 636 | } 637 | 638 | total_collisions += hashmap_base_collisions(hb, entry->key); 639 | } 640 | return (double)total_collisions / hb->size; 641 | } 642 | 643 | /* 644 | * Return the variance between entry collisions. The higher the variance, 645 | * the more likely the hash function is poor and is resulting in clustering. 646 | */ 647 | double hashmap_base_collisions_variance(const struct hashmap_base *hb) 648 | { 649 | struct hashmap_entry *entry; 650 | double mean_collisions; 651 | double variance; 652 | double total_variance = 0; 653 | 654 | if (!hb->size) { 655 | return 0; 656 | } 657 | mean_collisions = hashmap_base_collisions_mean(hb); 658 | for (entry = hb->table; entry < &hb->table[hb->table_size]; ++entry) { 659 | if (!entry->key) { 660 | continue; 661 | } 662 | variance = (double)hashmap_base_collisions(hb, entry->key) - mean_collisions; 663 | total_variance += variance * variance; 664 | } 665 | return total_variance / hb->size; 666 | } 667 | 668 | /* 669 | * Recommended hash function for data keys. 670 | * 671 | * This is an implementation of the well-documented Jenkins one-at-a-time 672 | * hash function. See https://en.wikipedia.org/wiki/Jenkins_hash_function 673 | */ 674 | size_t hashmap_hash_default(const void *data, size_t len) 675 | { 676 | const uint8_t *byte = (const uint8_t *)data; 677 | size_t hash = 0; 678 | 679 | for (size_t i = 0; i < len; ++i) { 680 | hash += *byte++; 681 | hash += (hash << 10); 682 | hash ^= (hash >> 6); 683 | } 684 | hash += (hash << 3); 685 | hash ^= (hash >> 11); 686 | hash += (hash << 15); 687 | return hash; 688 | } 689 | 690 | /* 691 | * Recommended hash function for string keys. 692 | * 693 | * This is an implementation of the well-documented Jenkins one-at-a-time 694 | * hash function. See https://en.wikipedia.org/wiki/Jenkins_hash_function 695 | */ 696 | size_t hashmap_hash_string(const char *key) 697 | { 698 | size_t hash = 0; 699 | 700 | for (; *key; ++key) { 701 | hash += *key; 702 | hash += (hash << 10); 703 | hash ^= (hash >> 6); 704 | } 705 | hash += (hash << 3); 706 | hash ^= (hash >> 11); 707 | hash += (hash << 15); 708 | return hash; 709 | } 710 | 711 | /* 712 | * Case insensitive hash function for string keys. 713 | */ 714 | size_t hashmap_hash_string_i(const char *key) 715 | { 716 | size_t hash = 0; 717 | 718 | for (; *key; ++key) { 719 | hash += tolower(*key); 720 | hash += (hash << 10); 721 | hash ^= (hash >> 6); 722 | } 723 | hash += (hash << 3); 724 | hash ^= (hash >> 11); 725 | hash += (hash << 15); 726 | return hash; 727 | } 728 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | project(hashmap_test) 3 | 4 | if(NOT DEFINED CMAKE_C_STANDARD) 5 | set(CMAKE_C_STANDARD 23) 6 | set(CMAKE_C_EXTENSIONS OFF) 7 | endif() 8 | 9 | if(NOT DEFINED CMAKE_CXX_STANDARD) 10 | set(CMAKE_CXX_STANDARD 20) 11 | endif() 12 | 13 | include(FetchContent) 14 | 15 | # Fetch Catch2 unit test framework at configure time 16 | FetchContent_Declare( 17 | Catch2 18 | GIT_REPOSITORY https://github.com/catchorg/Catch2.git 19 | GIT_TAG v3.8.1 20 | GIT_SHALLOW ON 21 | ) 22 | FetchContent_MakeAvailable(Catch2) 23 | list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras) 24 | include(CTest) 25 | include(Catch) 26 | 27 | # Fetch Sanitizer CMake helpers 28 | FetchContent_Declare( 29 | SanitizersCMake 30 | GIT_REPOSITORY https://github.com/arsenm/sanitizers-cmake.git 31 | GIT_SHALLOW ON 32 | ) 33 | FetchContent_GetProperties(SanitizersCMake) 34 | if(NOT SanitizersCMake_POPULATED) 35 | FetchContent_Populate(SanitizersCMake) 36 | list(APPEND CMAKE_MODULE_PATH ${sanitizerscmake_SOURCE_DIR}/cmake) 37 | endif() 38 | 39 | # Enable ASan and UBSan 40 | find_package(Sanitizers) 41 | set(SANITIZE_ADDRESS TRUE) 42 | set(SANITIZE_UNDEFINED TRUE) 43 | 44 | # Hashmap unit test 45 | add_executable(hashmap_test hashmap_test.cpp) 46 | target_compile_options(hashmap_test PRIVATE -Wall -Werror) 47 | target_link_libraries(hashmap_test PRIVATE Catch2::Catch2WithMain HashMap::HashMap) 48 | add_sanitizers(hashmap_test) 49 | catch_discover_tests(hashmap_test) 50 | -------------------------------------------------------------------------------- /tests/hashmap_test.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 David Leeds 3 | * 4 | * Hashmap is free software; you can redistribute it and/or modify 5 | * it under the terms of the MIT license. See LICENSE for details. 6 | */ 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | using namespace std::literals; 16 | 17 | static std::unordered_map make_kvs(size_t count) 18 | { 19 | std::unordered_map input; 20 | 21 | for (size_t i = 0; i < count; ++i) { 22 | input.emplace("key" + std::to_string(i), "value" + std::to_string(i)); 23 | } 24 | 25 | return input; 26 | } 27 | 28 | static void fill_map(auto *map, const std::unordered_map &kvs) 29 | { 30 | hashmap_clear(map); 31 | 32 | for (auto &[k, v] : kvs) { 33 | CAPTURE(k, v); 34 | REQUIRE(hashmap_put(map, k.c_str(), v.c_str()) == 0); 35 | } 36 | } 37 | 38 | TEST_CASE("hashmap", "[hashmap]") 39 | { 40 | /* Create a hashmap with string keys and values */ 41 | HASHMAP(char, const char) map; 42 | hashmap_init(&map, hashmap_hash_string, strcmp); 43 | 44 | SECTION("initial state") 45 | { 46 | REQUIRE(hashmap_empty(&map)); 47 | REQUIRE(hashmap_size(&map) == 0); 48 | 49 | /* No allocation is performed prior to use */ 50 | REQUIRE(hashmap_capacity(&map) == 0); 51 | } 52 | 53 | SECTION("reserve") 54 | { 55 | /* Reserve space for at least 1000 elements */ 56 | constexpr size_t CAPACITY = 1000; 57 | REQUIRE(hashmap_reserve(&map, CAPACITY) == 0); 58 | 59 | /* Check that at least the requested capacity was allocated */ 60 | REQUIRE(hashmap_capacity(&map) >= CAPACITY); 61 | } 62 | 63 | SECTION("put and get") 64 | { 65 | /* Input is large enough to prompt rehashes */ 66 | auto input = make_kvs(1000); 67 | 68 | for (auto &[k, v] : input) { 69 | CAPTURE(k, v); 70 | REQUIRE(hashmap_put(&map, k.c_str(), v.c_str()) == 0); 71 | } 72 | 73 | REQUIRE_FALSE(hashmap_empty(&map)); 74 | REQUIRE(hashmap_size(&map) == input.size()); 75 | 76 | for (auto &[k, v] : input) { 77 | CAPTURE(k, v); 78 | REQUIRE(hashmap_get(&map, k.c_str()) == v); 79 | } 80 | } 81 | 82 | SECTION("insert and get") 83 | { 84 | /* Input is large enough to prompt rehashes */ 85 | auto input = make_kvs(1000); 86 | 87 | for (auto &[k, v] : input) { 88 | CAPTURE(k, v); 89 | REQUIRE(hashmap_insert(&map, k.c_str(), v.c_str(), nullptr) == 1); 90 | } 91 | 92 | REQUIRE_FALSE(hashmap_empty(&map)); 93 | REQUIRE(hashmap_size(&map) == input.size()); 94 | 95 | for (auto &[k, v] : input) { 96 | CAPTURE(k, v); 97 | REQUIRE(hashmap_get(&map, k.c_str()) == v); 98 | } 99 | } 100 | 101 | SECTION("put duplicate entry") 102 | { 103 | REQUIRE(hashmap_put(&map, "key1", "value1") == 0); 104 | REQUIRE(hashmap_put(&map, "key1", "value2") == -EEXIST); 105 | REQUIRE(hashmap_size(&map) == 1); 106 | } 107 | 108 | SECTION("insert duplicate entry") 109 | { 110 | const char *val1 = "value1"; 111 | const char *val2 = "value2"; 112 | const char *old_val; 113 | 114 | /* New key */ 115 | old_val = "invalid"; 116 | REQUIRE(hashmap_insert(&map, "key1", val1, &old_val) == 1); 117 | REQUIRE(old_val == nullptr); 118 | 119 | /* Existing key, same value */ 120 | old_val = "invalid"; 121 | REQUIRE(hashmap_insert(&map, "key1", val1, &old_val) == 0); 122 | REQUIRE(old_val == nullptr); 123 | 124 | /* Existing key, new value */ 125 | old_val = "invalid"; 126 | REQUIRE(hashmap_insert(&map, "key1", val2, &old_val) == 0); 127 | REQUIRE(old_val == val1); 128 | 129 | REQUIRE(hashmap_size(&map) == 1); 130 | } 131 | 132 | SECTION("get nonexistent entry") 133 | { 134 | /* Empty map */ 135 | REQUIRE(hashmap_get(&map, "key1") == nullptr); 136 | 137 | /* Non-empty map */ 138 | REQUIRE(hashmap_put(&map, "key2", "value2") == 0); 139 | REQUIRE(hashmap_get(&map, "key1") == nullptr); 140 | } 141 | 142 | SECTION("contains") 143 | { 144 | REQUIRE(hashmap_put(&map, "key1", "value1") == 0); 145 | 146 | REQUIRE(hashmap_contains(&map, "key1")); 147 | REQUIRE_FALSE(hashmap_contains(&map, "key2")); 148 | } 149 | 150 | SECTION("remove") 151 | { 152 | auto input = make_kvs(1000); 153 | 154 | fill_map(&map, input); 155 | 156 | size_t remaining = input.size(); 157 | for (auto &[k, v] : input) { 158 | CAPTURE(k, v); 159 | 160 | REQUIRE(hashmap_size(&map) == remaining); 161 | REQUIRE(hashmap_get(&map, k.c_str()) == v); 162 | 163 | REQUIRE(hashmap_remove(&map, k.c_str()) == v); 164 | --remaining; 165 | 166 | REQUIRE(hashmap_get(&map, k.c_str()) == nullptr); 167 | REQUIRE(hashmap_size(&map) == remaining); 168 | } 169 | } 170 | 171 | SECTION("clear") 172 | { 173 | auto input = make_kvs(1000); 174 | 175 | size_t empty_capacity = hashmap_capacity(&map); 176 | 177 | fill_map(&map, input); 178 | 179 | size_t full_capacity = hashmap_capacity(&map); 180 | 181 | hashmap_clear(&map); 182 | 183 | size_t cleared_capacity = hashmap_capacity(&map); 184 | 185 | /* All elements removed */ 186 | REQUIRE(hashmap_empty(&map)); 187 | 188 | /* Should not reduce allocated space */ 189 | REQUIRE(full_capacity > empty_capacity); 190 | REQUIRE(cleared_capacity == full_capacity); 191 | } 192 | 193 | SECTION("reset") 194 | { 195 | auto input = make_kvs(1000); 196 | 197 | size_t empty_capacity = hashmap_capacity(&map); 198 | 199 | fill_map(&map, input); 200 | 201 | size_t full_capacity = hashmap_capacity(&map); 202 | 203 | hashmap_reset(&map); 204 | 205 | size_t cleared_capacity = hashmap_capacity(&map); 206 | 207 | /* All elements removed */ 208 | REQUIRE(hashmap_empty(&map)); 209 | 210 | /* Should reset allocated space to a smaller initial size */ 211 | REQUIRE(full_capacity > empty_capacity); 212 | REQUIRE(cleared_capacity >= empty_capacity); 213 | REQUIRE(cleared_capacity < full_capacity); 214 | } 215 | 216 | SECTION("iteration with iterator") 217 | { 218 | auto input = make_kvs(200); 219 | 220 | fill_map(&map, input); 221 | 222 | HASHMAP_ITER(map) iter = hashmap_iter(&map); 223 | 224 | size_t count = 0; 225 | do { 226 | REQUIRE(hashmap_iter_valid(&iter)); 227 | 228 | const char *k = hashmap_iter_get_key(&iter); 229 | const char *v = hashmap_iter_get_data(&iter); 230 | 231 | REQUIRE(k != nullptr); 232 | REQUIRE(v != nullptr); 233 | 234 | REQUIRE(input.contains(k)); 235 | REQUIRE(input.at(k) == v); 236 | 237 | ++count; 238 | } while (hashmap_iter_next(&iter)); 239 | 240 | REQUIRE(count == input.size()); 241 | } 242 | 243 | SECTION("iteration with iterator and remove all") 244 | { 245 | auto input = make_kvs(200); 246 | 247 | fill_map(&map, input); 248 | 249 | HASHMAP_ITER(map) iter = hashmap_iter(&map); 250 | 251 | size_t count = 0; 252 | 253 | while (hashmap_iter_valid(&iter)) { 254 | const char *k = hashmap_iter_get_key(&iter); 255 | const char *v = hashmap_iter_get_data(&iter); 256 | 257 | REQUIRE(k != nullptr); 258 | REQUIRE(v != nullptr); 259 | 260 | REQUIRE(input.contains(k)); 261 | REQUIRE(input.at(k) == v); 262 | 263 | hashmap_iter_remove(&iter); 264 | 265 | ++count; 266 | } 267 | 268 | REQUIRE(count == input.size()); 269 | REQUIRE(hashmap_empty(&map)); 270 | } 271 | 272 | SECTION("iteration with iterator and remove some") 273 | { 274 | auto input = make_kvs(200); 275 | 276 | fill_map(&map, input); 277 | 278 | HASHMAP_ITER(map) iter = hashmap_iter(&map); 279 | 280 | size_t count = 0; 281 | 282 | while (hashmap_iter_valid(&iter)) { 283 | const char *k = hashmap_iter_get_key(&iter); 284 | const char *v = hashmap_iter_get_data(&iter); 285 | 286 | REQUIRE(k != nullptr); 287 | REQUIRE(v != nullptr); 288 | 289 | REQUIRE(input.contains(k)); 290 | REQUIRE(input.at(k) == v); 291 | 292 | /* Remove every other entry */ 293 | if (count % 2 == 0) { 294 | hashmap_iter_remove(&iter); 295 | } else { 296 | hashmap_iter_next(&iter); 297 | } 298 | 299 | ++count; 300 | } 301 | 302 | REQUIRE(count == input.size()); 303 | REQUIRE(hashmap_size(&map) == input.size() / 2); 304 | } 305 | 306 | SECTION("find with iterator") 307 | { 308 | HASHMAP_ITER(map) iter; 309 | 310 | REQUIRE(hashmap_put(&map, "key1", "value1") == 0); 311 | 312 | /* Found */ 313 | iter = hashmap_iter_find(&map, "key1"); 314 | REQUIRE(hashmap_iter_valid(&iter)); 315 | REQUIRE(hashmap_iter_get_key(&iter) == "key1"s); 316 | REQUIRE(hashmap_iter_get_data(&iter) == "value1"s); 317 | 318 | /* Not found */ 319 | iter = hashmap_iter_find(&map, "key2"); 320 | REQUIRE_FALSE(hashmap_iter_valid(&iter)); 321 | } 322 | 323 | SECTION("iteration with foreach macros") 324 | { 325 | auto input = make_kvs(200); 326 | 327 | fill_map(&map, input); 328 | 329 | const char *key; 330 | const char *value; 331 | 332 | /* foreach */ 333 | { 334 | size_t count = 0; 335 | hashmap_foreach(key, value, &map) { 336 | REQUIRE(key != nullptr); 337 | REQUIRE(value != nullptr); 338 | 339 | REQUIRE(input.contains(key)); 340 | REQUIRE(input.at(key) == value); 341 | 342 | ++count; 343 | } 344 | REQUIRE(count == input.size()); 345 | } 346 | 347 | /* foreach_key */ 348 | { 349 | size_t count = 0; 350 | hashmap_foreach_key(key, &map) { 351 | REQUIRE(key != nullptr); 352 | REQUIRE(input.contains(key)); 353 | ++count; 354 | } 355 | REQUIRE(count == input.size()); 356 | } 357 | 358 | /* foreach_data */ 359 | { 360 | size_t count = 0; 361 | hashmap_foreach_data(value, &map) { 362 | REQUIRE(value != nullptr); 363 | ++count; 364 | } 365 | REQUIRE(count == input.size()); 366 | } 367 | } 368 | 369 | SECTION("iteration and removal with safe foreach macros") 370 | { 371 | auto input = make_kvs(200); 372 | 373 | const char *key; 374 | const char *value; 375 | const void *pos; 376 | 377 | /* safe foreach */ 378 | { 379 | size_t count = 0; 380 | 381 | fill_map(&map, input); 382 | 383 | hashmap_foreach_safe(key, value, &map, pos) { 384 | REQUIRE(key != nullptr); 385 | REQUIRE(value != nullptr); 386 | 387 | REQUIRE(input.contains(key)); 388 | REQUIRE(input.at(key) == value); 389 | 390 | /* Remove every other entry */ 391 | if (count % 2 == 0) { 392 | hashmap_remove(&map, key); 393 | } 394 | 395 | ++count; 396 | } 397 | REQUIRE(count == input.size()); 398 | REQUIRE(hashmap_size(&map) == input.size() / 2); 399 | } 400 | 401 | /* safe foreach_key */ 402 | { 403 | size_t count = 0; 404 | 405 | fill_map(&map, input); 406 | 407 | hashmap_foreach_key_safe(key, &map, pos) { 408 | REQUIRE(key != nullptr); 409 | REQUIRE(input.contains(key)); 410 | 411 | /* Remove every other entry */ 412 | if (count % 2 == 1) { 413 | hashmap_remove(&map, key); 414 | } 415 | 416 | ++count; 417 | } 418 | REQUIRE(count == input.size()); 419 | REQUIRE(hashmap_size(&map) == input.size() / 2); 420 | } 421 | 422 | /* safe foreach_data */ 423 | { 424 | size_t count = 0; 425 | 426 | fill_map(&map, input); 427 | 428 | hashmap_foreach_data_safe(value, &map, pos) { 429 | REQUIRE(value != nullptr); 430 | ++count; 431 | } 432 | REQUIRE(count == input.size()); 433 | REQUIRE(hashmap_size(&map) == input.size()); 434 | } 435 | } 436 | 437 | SECTION("internal key allocation") 438 | { 439 | const char *key = "key1"; 440 | auto strfree = [](char *k) { 441 | free(k); 442 | }; 443 | 444 | hashmap_set_key_alloc_funcs(&map, strdup, strfree); 445 | 446 | REQUIRE(hashmap_put(&map, key, key) == 0); 447 | 448 | auto iter = hashmap_iter_find(&map, key); 449 | 450 | REQUIRE(hashmap_iter_valid(&iter)); 451 | REQUIRE(hashmap_iter_get_key(&iter) != key); 452 | 453 | hashmap_iter_remove(&iter); 454 | } 455 | 456 | SECTION("bad hash functions") 457 | { 458 | auto cmp = [](const int *a, const int *b) -> int { 459 | return a - b; 460 | }; 461 | 462 | static std::unordered_map input; 463 | 464 | for (int i = 0; i < 200; ++i) { 465 | input.emplace(i, "value" + std::to_string(i)); 466 | } 467 | 468 | /* Should be functional (albeit slower) when poor hash functions are used */ 469 | auto test = [&](size_t (*hash)(const int *)) { 470 | HASHMAP(int, const char) int_map; 471 | hashmap_init(&int_map, hash, cmp); 472 | 473 | /* Put */ 474 | for (auto &[k, v] : input) { 475 | CAPTURE(k, v); 476 | REQUIRE(hashmap_put(&int_map, &k, v.c_str()) == 0); 477 | } 478 | 479 | /* Get */ 480 | for (auto &[k, v] : input) { 481 | CAPTURE(k, v); 482 | REQUIRE(hashmap_get(&int_map, &k) == v); 483 | } 484 | 485 | /* Remove */ 486 | for (auto &[k, v] : input) { 487 | CAPTURE(k); 488 | REQUIRE(hashmap_remove(&int_map, &k) == v); 489 | } 490 | }; 491 | 492 | SECTION("worst") 493 | { 494 | /* Hash lookup collides with every entry */ 495 | auto hash = [](const int *) -> size_t { 496 | return 0; 497 | }; 498 | 499 | test(hash); 500 | } 501 | 502 | SECTION("bad 1") 503 | { 504 | /* Could cause clustering depending on implementation */ 505 | auto hash = [](const int *k) -> size_t { 506 | return *k; 507 | }; 508 | 509 | test(hash); 510 | } 511 | 512 | SECTION("bad 2") 513 | { 514 | /* Could cause clustering depending on implementation */ 515 | auto hash = [](const int *k) -> size_t { 516 | return *k + *k; 517 | }; 518 | 519 | test(hash); 520 | } 521 | } 522 | 523 | hashmap_cleanup(&map); 524 | } 525 | --------------------------------------------------------------------------------