├── .gitignore ├── LICENSE.txt ├── README.md ├── ht.c ├── ht.h └── samples ├── bsearch.c ├── demo.c ├── dump.c ├── gensimilar.py ├── lsearch.c ├── output ├── bsearch.txt ├── demo.txt ├── dump.txt ├── lsearch.txt ├── stats-similar.txt └── stats-words.txt ├── perfget.c ├── perfget.go ├── perflbh.c ├── perfset.c ├── perfset.go ├── perftest.sh ├── stats.c ├── testall.sh └── words.txt /.gitignore: -------------------------------------------------------------------------------- 1 | /wordfreq 2 | /bsearch 3 | /lsearch 4 | /demo 5 | /perflbh 6 | /perfget-c 7 | /perfget-go 8 | /perfset-c 9 | /perfset-go 10 | /stats 11 | /dump 12 | samples/similar.txt 13 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Ben Hoyt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Simple hash table written in C. To go with my article **[How to implement a hash table (in C)](https://benhoyt.com/writings/hash-table-in-c/)**. 3 | 4 | This is a learning exercise, not a battle-tested data structure library -- you have been warned! :-) 5 | -------------------------------------------------------------------------------- /ht.c: -------------------------------------------------------------------------------- 1 | // Simple hash table implemented in C. 2 | 3 | #include "ht.h" 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | // Hash table entry (slot may be filled or empty). 11 | typedef struct { 12 | const char* key; // key is NULL if this slot is empty 13 | void* value; 14 | } ht_entry; 15 | 16 | // Hash table structure: create with ht_create, free with ht_destroy. 17 | struct ht { 18 | ht_entry* entries; // hash slots 19 | size_t capacity; // size of _entries array 20 | size_t length; // number of items in hash table 21 | }; 22 | 23 | #define INITIAL_CAPACITY 16 // must not be zero 24 | 25 | ht* ht_create(void) { 26 | // Allocate space for hash table struct. 27 | ht* table = malloc(sizeof(ht)); 28 | if (table == NULL) { 29 | return NULL; 30 | } 31 | table->length = 0; 32 | table->capacity = INITIAL_CAPACITY; 33 | 34 | // Allocate (zero'd) space for entry buckets. 35 | table->entries = calloc(table->capacity, sizeof(ht_entry)); 36 | if (table->entries == NULL) { 37 | free(table); // error, free table before we return! 38 | return NULL; 39 | } 40 | return table; 41 | } 42 | 43 | void ht_destroy(ht* table) { 44 | // First free allocated keys. 45 | for (size_t i = 0; i < table->capacity; i++) { 46 | free((void*)table->entries[i].key); 47 | } 48 | 49 | // Then free entries array and table itself. 50 | free(table->entries); 51 | free(table); 52 | } 53 | 54 | #define FNV_OFFSET 14695981039346656037UL 55 | #define FNV_PRIME 1099511628211UL 56 | 57 | // Return 64-bit FNV-1a hash for key (NUL-terminated). See description: 58 | // https://en.wikipedia.org/wiki/Fowler–Noll–Vo_hash_function 59 | static uint64_t hash_key(const char* key) { 60 | uint64_t hash = FNV_OFFSET; 61 | for (const char* p = key; *p; p++) { 62 | hash ^= (uint64_t)(unsigned char)(*p); 63 | hash *= FNV_PRIME; 64 | } 65 | return hash; 66 | } 67 | 68 | void* ht_get(ht* table, const char* key) { 69 | // AND hash with capacity-1 to ensure it's within entries array. 70 | uint64_t hash = hash_key(key); 71 | size_t index = (size_t)(hash & (uint64_t)(table->capacity - 1)); 72 | 73 | // Loop till we find an empty entry. 74 | while (table->entries[index].key != NULL) { 75 | if (strcmp(key, table->entries[index].key) == 0) { 76 | // Found key, return value. 77 | return table->entries[index].value; 78 | } 79 | // Key wasn't in this slot, move to next (linear probing). 80 | index++; 81 | if (index >= table->capacity) { 82 | // At end of entries array, wrap around. 83 | index = 0; 84 | } 85 | } 86 | return NULL; 87 | } 88 | 89 | // Internal function to set an entry (without expanding table). 90 | static const char* ht_set_entry(ht_entry* entries, size_t capacity, 91 | const char* key, void* value, size_t* plength) { 92 | // AND hash with capacity-1 to ensure it's within entries array. 93 | uint64_t hash = hash_key(key); 94 | size_t index = (size_t)(hash & (uint64_t)(capacity - 1)); 95 | 96 | // Loop till we find an empty entry. 97 | while (entries[index].key != NULL) { 98 | if (strcmp(key, entries[index].key) == 0) { 99 | // Found key (it already exists), update value. 100 | entries[index].value = value; 101 | return entries[index].key; 102 | } 103 | // Key wasn't in this slot, move to next (linear probing). 104 | index++; 105 | if (index >= capacity) { 106 | // At end of entries array, wrap around. 107 | index = 0; 108 | } 109 | } 110 | 111 | // Didn't find key, allocate+copy if needed, then insert it. 112 | if (plength != NULL) { 113 | key = strdup(key); 114 | if (key == NULL) { 115 | return NULL; 116 | } 117 | (*plength)++; 118 | } 119 | entries[index].key = (char*)key; 120 | entries[index].value = value; 121 | return key; 122 | } 123 | 124 | // Expand hash table to twice its current size. Return true on success, 125 | // false if out of memory. 126 | static bool ht_expand(ht* table) { 127 | // Allocate new entries array. 128 | size_t new_capacity = table->capacity * 2; 129 | if (new_capacity < table->capacity) { 130 | return false; // overflow (capacity would be too big) 131 | } 132 | ht_entry* new_entries = calloc(new_capacity, sizeof(ht_entry)); 133 | if (new_entries == NULL) { 134 | return false; 135 | } 136 | 137 | // Iterate entries, move all non-empty ones to new table's entries. 138 | for (size_t i = 0; i < table->capacity; i++) { 139 | ht_entry entry = table->entries[i]; 140 | if (entry.key != NULL) { 141 | ht_set_entry(new_entries, new_capacity, entry.key, 142 | entry.value, NULL); 143 | } 144 | } 145 | 146 | // Free old entries array and update this table's details. 147 | free(table->entries); 148 | table->entries = new_entries; 149 | table->capacity = new_capacity; 150 | return true; 151 | } 152 | 153 | const char* ht_set(ht* table, const char* key, void* value) { 154 | assert(value != NULL); 155 | if (value == NULL) { 156 | return NULL; 157 | } 158 | 159 | // If length will exceed half of current capacity, expand it. 160 | if (table->length >= table->capacity / 2) { 161 | if (!ht_expand(table)) { 162 | return NULL; 163 | } 164 | } 165 | 166 | // Set entry and update length. 167 | return ht_set_entry(table->entries, table->capacity, key, value, 168 | &table->length); 169 | } 170 | 171 | size_t ht_length(ht* table) { 172 | return table->length; 173 | } 174 | 175 | hti ht_iterator(ht* table) { 176 | hti it; 177 | it._table = table; 178 | it._index = 0; 179 | return it; 180 | } 181 | 182 | bool ht_next(hti* it) { 183 | // Loop till we've hit end of entries array. 184 | ht* table = it->_table; 185 | while (it->_index < table->capacity) { 186 | size_t i = it->_index; 187 | it->_index++; 188 | if (table->entries[i].key != NULL) { 189 | // Found next non-empty item, update iterator key and value. 190 | ht_entry entry = table->entries[i]; 191 | it->key = entry.key; 192 | it->value = entry.value; 193 | return true; 194 | } 195 | } 196 | return false; 197 | } 198 | -------------------------------------------------------------------------------- /ht.h: -------------------------------------------------------------------------------- 1 | // Simple hash table implemented in C. 2 | 3 | #ifndef _HT_H 4 | #define _HT_H 5 | 6 | #include 7 | #include 8 | 9 | // Hash table structure: create with ht_create, free with ht_destroy. 10 | typedef struct ht ht; 11 | 12 | // Create hash table and return pointer to it, or NULL if out of memory. 13 | ht* ht_create(void); 14 | 15 | // Free memory allocated for hash table, including allocated keys. 16 | void ht_destroy(ht* table); 17 | 18 | // Get item with given key (NUL-terminated) from hash table. Return 19 | // value (which was set with ht_set), or NULL if key not found. 20 | void* ht_get(ht* table, const char* key); 21 | 22 | // Set item with given key (NUL-terminated) to value (which must not 23 | // be NULL). If not already present in table, key is copied to newly 24 | // allocated memory (keys are freed automatically when ht_destroy is 25 | // called). Return address of copied key, or NULL if out of memory. 26 | const char* ht_set(ht* table, const char* key, void* value); 27 | 28 | // Return number of items in hash table. 29 | size_t ht_length(ht* table); 30 | 31 | // Hash table iterator: create with ht_iterator, iterate with ht_next. 32 | typedef struct { 33 | const char* key; // current key 34 | void* value; // current value 35 | 36 | // Don't use these fields directly. 37 | ht* _table; // reference to hash table being iterated 38 | size_t _index; // current index into ht._entries 39 | } hti; 40 | 41 | // Return new hash table iterator (for use with ht_next). 42 | hti ht_iterator(ht* table); 43 | 44 | // Move iterator to next item in hash table, update iterator's key 45 | // and value to current item, and return true. If there are no more 46 | // items, return false. Don't call ht_set during iteration. 47 | bool ht_next(hti* it); 48 | 49 | #endif // _HT_H 50 | -------------------------------------------------------------------------------- /samples/bsearch.c: -------------------------------------------------------------------------------- 1 | // Examples of binary search (with and without bsearch) 2 | 3 | /* 4 | 5 | $ gcc -O2 -o bsearch samples/bsearch.c && ./bsearch 6 | bsearch: value of 'bob' is 11 7 | binary_search: value of 'bob' is 11 8 | 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | typedef struct { 17 | char* key; 18 | int value; 19 | } item; 20 | 21 | int cmp(const void* a, const void* b) { 22 | item* item_a = (item*)a; 23 | item* item_b = (item*)b; 24 | return strcmp(item_a->key, item_b->key); 25 | } 26 | 27 | item* binary_search(item* items, size_t size, const char* key) { 28 | if (size + size < size) { 29 | return NULL; // size too big; avoid overflow 30 | } 31 | size_t low = 0; 32 | size_t high = size; 33 | while (low < high) { 34 | size_t mid = (low + high) / 2; 35 | int c = strcmp(items[mid].key, key); 36 | if (c == 0) { 37 | return &items[mid]; 38 | } 39 | if (c < 0) { 40 | low = mid + 1; // eliminate low half of array 41 | } else { 42 | high = mid; // eliminate high half of array 43 | } 44 | } 45 | // Entire array has been eliminated, key not found. 46 | return NULL; 47 | } 48 | 49 | int main(void) { 50 | item items[] = { 51 | {"bar", 42}, {"bazz", 36}, {"bob", 11}, {"buzz", 7}, 52 | {"foo", 10}, {"jane", 100}, {"x", 200}}; 53 | size_t num_items = sizeof(items) / sizeof(item); 54 | 55 | item key = {"bob", 0}; 56 | item* found = bsearch(&key, items, num_items, sizeof(item), cmp); 57 | if (found == NULL) { 58 | return 1; 59 | } 60 | printf("bsearch: value of 'bob' is %d\n", found->value); 61 | 62 | found = binary_search(items, num_items, "bob"); 63 | if (found == NULL) { 64 | return 1; 65 | } 66 | printf("binary_search: value of 'bob' is %d\n", found->value); 67 | return 0; 68 | } 69 | -------------------------------------------------------------------------------- /samples/demo.c: -------------------------------------------------------------------------------- 1 | // Simple hash table demonstration program 2 | 3 | /* 4 | 5 | $ gcc -O2 -Wall -o demo samples/demo.c ht.c 6 | $ echo 'foo bar the bar bar bar the' | ./demo 7 | 8 | See also: 9 | https://stackoverflow.com/questions/5134891/how-do-i-use-valgrind-to-find-memory-leaks 10 | 11 | */ 12 | 13 | #include "../ht.h" 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | // Example: 20 | // $ echo 'foo bar the bar bar bar the' | ./demo 21 | // foo 1 22 | // bar 4 23 | // the 2 24 | // 3 25 | 26 | void exit_nomem(void) { 27 | fprintf(stderr, "out of memory\n"); 28 | exit(1); 29 | } 30 | 31 | int main(void) { 32 | ht* counts = ht_create(); 33 | if (counts == NULL) { 34 | exit_nomem(); 35 | } 36 | 37 | // Read next word from stdin (at most 100 chars long). 38 | char word[101]; 39 | while (scanf("%100s", word) != EOF) { 40 | // Look up word. 41 | void* value = ht_get(counts, word); 42 | if (value != NULL) { 43 | // Already exists, increment int that value points to. 44 | int* pcount = (int*)value; 45 | (*pcount)++; 46 | continue; 47 | } 48 | 49 | // Word not found, allocate space for new int and set to 1. 50 | int* pcount = malloc(sizeof(int)); 51 | if (pcount == NULL) { 52 | exit_nomem(); 53 | } 54 | *pcount = 1; 55 | if (ht_set(counts, word, pcount) == NULL) { 56 | exit_nomem(); 57 | } 58 | } 59 | 60 | // Print out words and frequencies, freeing values as we go. 61 | hti it = ht_iterator(counts); 62 | while (ht_next(&it)) { 63 | printf("%s %d\n", it.key, *(int*)it.value); 64 | free(it.value); 65 | } 66 | 67 | // Show the number of unique words. 68 | printf("%d\n", (int)ht_length(counts)); 69 | 70 | ht_destroy(counts); 71 | return 0; 72 | } 73 | -------------------------------------------------------------------------------- /samples/dump.c: -------------------------------------------------------------------------------- 1 | // Print out some table buckets for use in the article 2 | 3 | /* 4 | $ gcc -Wall -O2 -o dump samples/dump.c ht.c && ./dump 5 | index 0: empty 6 | index 1: key jane, value 100 7 | index 2: empty 8 | index 3: empty 9 | index 4: key bob, value 11 10 | index 5: empty 11 | index 6: empty 12 | index 7: key foo, value 10 13 | index 8: key bazz, value 36 14 | index 9: key x, value 200 15 | index 10: key bar, value 42 16 | index 11: empty 17 | index 12: key buzz, value 7 18 | index 13: empty 19 | index 14: empty 20 | index 15: empty 21 | */ 22 | 23 | #include "../ht.h" 24 | 25 | #include 26 | #include 27 | 28 | // Copied from ht.c 29 | typedef struct { 30 | char* key; // key is NULL if this slot is empty 31 | void* value; 32 | } ht_entry; 33 | 34 | struct ht { 35 | ht_entry* entries; // hash slots 36 | size_t capacity; // size of _entries array 37 | size_t length; // number of items in hash table 38 | }; 39 | 40 | typedef struct { 41 | char* key; 42 | int value; 43 | } item; 44 | 45 | void exit_nomem(void) { 46 | fprintf(stderr, "out of memory\n"); 47 | exit(1); 48 | } 49 | 50 | int main(void) { 51 | item items[] = { 52 | {"foo", 10}, {"bar", 42}, {"bazz", 36}, {"buzz", 7}, 53 | {"bob", 11}, {"jane", 100}, {"x", 200}}; 54 | size_t num_items = sizeof(items) / sizeof(item); 55 | 56 | ht* table = ht_create(); 57 | if (table == NULL) { 58 | exit_nomem(); 59 | } 60 | 61 | for (int i = 0; i < num_items; i++) { 62 | if (ht_set(table, items[i].key, &items[i].value) == NULL) { 63 | exit_nomem(); 64 | } 65 | } 66 | 67 | for (int i = 0; i < table->capacity; i++) { 68 | if (table->entries[i].key != NULL) { 69 | printf("index %d: key %s, value %d\n", 70 | i, table->entries[i].key, *(int*)table->entries[i].value); 71 | } else { 72 | printf("index %d: empty\n", i); 73 | } 74 | } 75 | 76 | return 0; 77 | } 78 | -------------------------------------------------------------------------------- /samples/gensimilar.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | if len(sys.argv) < 2: 5 | print('usage: gensimilar.py num', file=sys.stderr) 6 | sys.exit(1) 7 | 8 | for i in range(int(sys.argv[1])): 9 | print('word{}'.format(i+1)) 10 | -------------------------------------------------------------------------------- /samples/lsearch.c: -------------------------------------------------------------------------------- 1 | // Example of linear search 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | typedef struct { 8 | char* key; 9 | int value; 10 | } item; 11 | 12 | item* linear_search(item* items, size_t size, const char* key) { 13 | for (size_t i = 0; i < size; i++) { 14 | if (strcmp(items[i].key, key) == 0) { 15 | return &items[i]; 16 | } 17 | } 18 | return NULL; 19 | } 20 | 21 | int main(void) { 22 | item items[] = { 23 | {"foo", 10}, {"bar", 42}, {"bazz", 36}, {"buzz", 7}, 24 | {"bob", 11}, {"jane", 100}, {"x", 200}}; 25 | size_t num_items = sizeof(items) / sizeof(item); 26 | 27 | item* found = linear_search(items, num_items, "bob"); 28 | if (found == NULL) { 29 | return 1; 30 | } 31 | printf("linear_search: value of 'bob' is %d\n", found->value); 32 | return 0; 33 | } 34 | -------------------------------------------------------------------------------- /samples/output/bsearch.txt: -------------------------------------------------------------------------------- 1 | bsearch: value of 'bob' is 11 2 | binary_search: value of 'bob' is 11 3 | -------------------------------------------------------------------------------- /samples/output/demo.txt: -------------------------------------------------------------------------------- 1 | foo 1 2 | bar 4 3 | the 2 4 | 3 5 | -------------------------------------------------------------------------------- /samples/output/dump.txt: -------------------------------------------------------------------------------- 1 | index 0: empty 2 | index 1: key jane, value 100 3 | index 2: empty 4 | index 3: empty 5 | index 4: key bob, value 11 6 | index 5: empty 7 | index 6: empty 8 | index 7: key foo, value 10 9 | index 8: key bazz, value 36 10 | index 9: key x, value 200 11 | index 10: key bar, value 42 12 | index 11: empty 13 | index 12: key buzz, value 7 14 | index 13: empty 15 | index 14: empty 16 | index 15: empty 17 | -------------------------------------------------------------------------------- /samples/output/lsearch.txt: -------------------------------------------------------------------------------- 1 | linear_search: value of 'bob' is 11 2 | -------------------------------------------------------------------------------- /samples/output/stats-similar.txt: -------------------------------------------------------------------------------- 1 | len=466550 cap=1048576 avgprobe=1.378 2 | -------------------------------------------------------------------------------- /samples/output/stats-words.txt: -------------------------------------------------------------------------------- 1 | len=466550 cap=1048576 avgprobe=1.402 2 | -------------------------------------------------------------------------------- /samples/perfget.c: -------------------------------------------------------------------------------- 1 | // Simple performance test of C hash table get 2 | 3 | // See perftest.sh for results 4 | 5 | #include "../ht.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | void exit_nomem(void) { 13 | fprintf(stderr, "out of memory\n"); 14 | exit(1); 15 | } 16 | 17 | void* found; 18 | 19 | int main(int argc, char **argv) { 20 | if (argc < 2) { 21 | fprintf(stderr, "usage: perftest file\n"); 22 | return 1; 23 | } 24 | 25 | // Read entire file into memory. 26 | FILE* f = fopen(argv[1], "rb"); 27 | if (f == NULL) { 28 | fprintf(stderr, "can't open file: %s\n", argv[1]); 29 | return 1; 30 | } 31 | fseek(f, 0, SEEK_END); 32 | long size = ftell(f); 33 | fseek(f, 0, SEEK_SET); 34 | char* contents = (char*)malloc(size + 1); 35 | if (contents == NULL) { 36 | exit_nomem(); 37 | } 38 | size_t nread = fread(contents, 1, size, f); 39 | if ((long)nread != size) { 40 | fprintf(stderr, "read %ld bytes instead of %ld", (long)nread, size); 41 | return 1; 42 | } 43 | fclose(f); 44 | contents[size] = 0; 45 | 46 | ht* counts = ht_create(); 47 | if (counts == NULL) { 48 | exit_nomem(); 49 | } 50 | 51 | for (char* p = contents; *p;) { 52 | // Skip whitespace. 53 | while (*p && *p <= ' ') { 54 | p++; 55 | } 56 | char* word = p; 57 | 58 | // Find end of word. 59 | while (*p && *p > ' ') { 60 | p++; 61 | } 62 | if (*p != 0) { 63 | *p = 0; 64 | p++; 65 | } 66 | 67 | // Look up word. 68 | void* value = ht_get(counts, word); 69 | if (value != NULL) { 70 | // Already exists, increment int that value points to. 71 | int* pcount = (int*)value; 72 | (*pcount)++; 73 | continue; 74 | } 75 | 76 | // Word not found, allocate space for new int and set to 1. 77 | int* pcount = malloc(sizeof(int)); 78 | if (pcount == NULL) { 79 | exit_nomem(); 80 | } 81 | *pcount = 1; 82 | if (ht_set(counts, word, pcount) == NULL) { 83 | exit_nomem(); 84 | } 85 | } 86 | 87 | // Copy keys to array 88 | const char** keys = malloc(ht_length(counts) * sizeof(char*)); 89 | if (keys == NULL) { 90 | exit_nomem(); 91 | } 92 | hti it = ht_iterator(counts); 93 | int i = 0; 94 | while (ht_next(&it)) { 95 | keys[i] = it.key; 96 | i++; 97 | } 98 | 99 | int runs = 10; 100 | clock_t start = clock(); 101 | for (int run=0; run ' ' { 34 | i++ 35 | } 36 | if i > start { 37 | word := contents[start:i] 38 | counts[string(word)]++ 39 | } 40 | } 41 | 42 | // Copy keys to slice 43 | keys := make([]string, 0, len(counts)) 44 | for k := range counts { 45 | keys = append(keys, k) 46 | } 47 | 48 | const runs = 10 49 | start := time.Now() 50 | for run := 0; run < runs; run++ { 51 | for i := 0; i < len(counts); i++ { 52 | _ = counts[keys[i]] 53 | } 54 | } 55 | end := time.Now() 56 | elapsed := end.Sub(start) 57 | fmt.Printf("%d runs getting %d keys: %v\n", runs, len(counts), elapsed) 58 | } 59 | -------------------------------------------------------------------------------- /samples/perflbh.c: -------------------------------------------------------------------------------- 1 | // Performance comparison of lookups: linear search, binary search, hash table 2 | 3 | /* 4 | 5 | $ gcc -O2 -Wall -o perflbh samples/perflbh.c ht.c 6 | $ ./perflbh 7 | NUM ITEMS: 1 8 | linear, 5000000 runs: 0.046263000s 9 | binary, 5000000 runs: 0.038028000s 10 | hash , 5000000 runs: 0.055019000s 11 | NUM ITEMS: 2 12 | linear, 5000000 runs: 0.029790500s 13 | binary, 5000000 runs: 0.042562000s 14 | hash , 5000000 runs: 0.051395500s 15 | NUM ITEMS: 3 16 | linear, 5000000 runs: 0.034900667s 17 | binary, 5000000 runs: 0.045250667s 18 | hash , 5000000 runs: 0.050218000s 19 | NUM ITEMS: 4 20 | linear, 5000000 runs: 0.042225750s 21 | binary, 5000000 runs: 0.053457000s 22 | hash , 5000000 runs: 0.049959500s 23 | NUM ITEMS: 5 24 | linear, 5000000 runs: 0.048300800s 25 | binary, 5000000 runs: 0.054513400s 26 | hash , 5000000 runs: 0.056074000s 27 | NUM ITEMS: 6 28 | linear, 5000000 runs: 0.056408000s 29 | binary, 5000000 runs: 0.062688833s 30 | hash , 5000000 runs: 0.059385833s 31 | NUM ITEMS: 7 32 | linear, 5000000 runs: 0.062235571s # this is where linear starts getting slower 33 | binary, 5000000 runs: 0.059203714s 34 | hash , 5000000 runs: 0.058669571s 35 | NUM ITEMS: 8 36 | linear, 5000000 runs: 0.074767000s 37 | binary, 5000000 runs: 0.063110250s 38 | hash , 5000000 runs: 0.054628375s 39 | NUM ITEMS: 9 40 | linear, 5000000 runs: 0.086145111s 41 | binary, 5000000 runs: 0.066840778s 42 | hash , 5000000 runs: 0.057300111s 43 | NUM ITEMS: 10 44 | linear, 5000000 runs: 0.085599600s 45 | binary, 5000000 runs: 0.069974600s 46 | hash , 5000000 runs: 0.052592700s 47 | ... 48 | 49 | */ 50 | 51 | #include "../ht.h" 52 | 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | 59 | void exit_nomem(void) { 60 | fprintf(stderr, "out of memory\n"); 61 | exit(1); 62 | } 63 | 64 | typedef struct { 65 | char* key; 66 | int value; 67 | } item; 68 | 69 | item* linear_search(item* items, size_t size, const char* key) { 70 | for (size_t i=0; i 8 | #include 9 | #include 10 | #include 11 | 12 | void exit_nomem(void) { 13 | fprintf(stderr, "out of memory\n"); 14 | exit(1); 15 | } 16 | 17 | void* found; 18 | 19 | int main(int argc, char **argv) { 20 | if (argc < 2) { 21 | fprintf(stderr, "usage: perftest file\n"); 22 | return 1; 23 | } 24 | 25 | // Read entire file into memory. 26 | FILE* f = fopen(argv[1], "rb"); 27 | if (f == NULL) { 28 | fprintf(stderr, "can't open file: %s\n", argv[1]); 29 | return 1; 30 | } 31 | fseek(f, 0, SEEK_END); 32 | long size = ftell(f); 33 | fseek(f, 0, SEEK_SET); 34 | char* contents = (char*)malloc(size + 1); 35 | if (contents == NULL) { 36 | exit_nomem(); 37 | } 38 | size_t nread = fread(contents, 1, size, f); 39 | if ((long)nread != size) { 40 | fprintf(stderr, "read %ld bytes instead of %ld", (long)nread, size); 41 | return 1; 42 | } 43 | fclose(f); 44 | contents[size] = 0; 45 | 46 | ht* counts = ht_create(); 47 | if (counts == NULL) { 48 | exit_nomem(); 49 | } 50 | 51 | for (char* p = contents; *p;) { 52 | // Skip whitespace. 53 | while (*p && *p <= ' ') { 54 | p++; 55 | } 56 | char* word = p; 57 | 58 | // Find end of word. 59 | while (*p && *p > ' ') { 60 | p++; 61 | } 62 | if (*p != 0) { 63 | *p = 0; 64 | p++; 65 | } 66 | 67 | // Look up word. 68 | void* value = ht_get(counts, word); 69 | if (value != NULL) { 70 | // Already exists, increment int that value points to. 71 | int* pcount = (int*)value; 72 | (*pcount)++; 73 | continue; 74 | } 75 | 76 | // Word not found, allocate space for new int and set to 1. 77 | int* pcount = malloc(sizeof(int)); 78 | if (pcount == NULL) { 79 | exit_nomem(); 80 | } 81 | *pcount = 1; 82 | if (ht_set(counts, word, pcount) == NULL) { 83 | exit_nomem(); 84 | } 85 | } 86 | 87 | // Copy keys to array 88 | const char** keys = malloc(ht_length(counts) * sizeof(char*)); 89 | if (keys == NULL) { 90 | exit_nomem(); 91 | } 92 | hti it = ht_iterator(counts); 93 | int i = 0; 94 | while (ht_next(&it)) { 95 | keys[i] = it.key; 96 | i++; 97 | } 98 | 99 | ht* table = ht_create(); 100 | if (table == NULL) { 101 | exit_nomem(); 102 | } 103 | 104 | int value = 1; // dummy value 105 | clock_t start = clock(); 106 | for (int i=0; i ' ' { 34 | i++ 35 | } 36 | if i > start { 37 | word := contents[start:i] 38 | counts[string(word)]++ 39 | } 40 | } 41 | 42 | // Copy keys to slice 43 | keys := make([]string, 0, len(counts)) 44 | for k := range counts { 45 | keys = append(keys, k) 46 | } 47 | 48 | table := make(map[string]int) 49 | start := time.Now() 50 | for i := 0; i < len(counts); i++ { 51 | table[keys[i]] = 1 52 | } 53 | end := time.Now() 54 | elapsed := end.Sub(start) 55 | fmt.Printf("setting %d keys: %v\n", len(counts), elapsed) 56 | } 57 | -------------------------------------------------------------------------------- /samples/perftest.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # perfget - C version 4 | # 10 runs getting 466550 keys: 472.597ms 5 | # 10 runs getting 466550 keys: 481.157ms 6 | # 10 runs getting 466550 keys: 470.343ms 7 | # 10 runs getting 466550 keys: 464.349ms 8 | # 10 runs getting 466550 keys: 465.120ms 9 | # 10 runs getting 466550 keys: 465.255ms 10 | # 10 runs getting 466550 keys: 463.070ms 11 | # MINIMUM TIME: 463.1ms (51% slower than Go) 12 | 13 | # perfget - Go version 14 | # 10 runs getting 466550 keys: 308.543747ms 15 | # 10 runs getting 466550 keys: 349.240019ms 16 | # 10 runs getting 466550 keys: 306.092176ms 17 | # 10 runs getting 466550 keys: 308.505962ms 18 | # 10 runs getting 466550 keys: 334.631387ms 19 | # 10 runs getting 466550 keys: 313.506291ms 20 | # 10 runs getting 466550 keys: 337.60812ms 21 | # MINIMUM TIME: 306.1ms 22 | 23 | # perfset - C version 24 | # setting 466550 keys: 120.224000000s 25 | # setting 466550 keys: 121.672000000s 26 | # setting 466550 keys: 119.682000000s 27 | # setting 466550 keys: 124.031000000s 28 | # setting 466550 keys: 114.645000000s 29 | # setting 466550 keys: 116.240000000s 30 | # setting 466550 keys: 118.942000000s 31 | # MINIMUM TIME: 114.6ms (40% faster than Go) 32 | 33 | # perfset - Go version 34 | # setting 466550 keys: 193.470927ms 35 | # setting 466550 keys: 211.229354ms 36 | # setting 466550 keys: 194.889048ms 37 | # setting 466550 keys: 191.71137ms 38 | # setting 466550 keys: 200.308761ms 39 | # setting 466550 keys: 199.717919ms 40 | # setting 466550 keys: 207.069321ms 41 | # MINIMUM TIME: 191.7ms 42 | 43 | # NOTE - words.txt is from here (public domain): 44 | # https://github.com/dwyl/english-words/ 45 | 46 | set -e 47 | 48 | echo 'perfget - C version' 49 | gcc -Wall -O2 -o perfget-c samples/perfget.c ht.c 50 | ./perfget-c samples/words.txt 51 | ./perfget-c samples/words.txt 52 | ./perfget-c samples/words.txt 53 | ./perfget-c samples/words.txt 54 | ./perfget-c samples/words.txt 55 | ./perfget-c samples/words.txt 56 | ./perfget-c samples/words.txt 57 | 58 | echo 'perfget - Go version' 59 | go build -o perfget-go samples/perfget.go 60 | ./perfget-go samples/words.txt 61 | ./perfget-go samples/words.txt 62 | ./perfget-go samples/words.txt 63 | ./perfget-go samples/words.txt 64 | ./perfget-go samples/words.txt 65 | ./perfget-go samples/words.txt 66 | ./perfget-go samples/words.txt 67 | 68 | echo 69 | 70 | echo 'perfset - C version' 71 | gcc -Wall -O2 -o perfset-c samples/perfset.c ht.c 72 | ./perfset-c samples/words.txt 73 | ./perfset-c samples/words.txt 74 | ./perfset-c samples/words.txt 75 | ./perfset-c samples/words.txt 76 | ./perfset-c samples/words.txt 77 | ./perfset-c samples/words.txt 78 | ./perfset-c samples/words.txt 79 | 80 | echo 'perfset - Go version' 81 | go build -o perfset-go samples/perfset.go 82 | ./perfset-go samples/words.txt 83 | ./perfset-go samples/words.txt 84 | ./perfset-go samples/words.txt 85 | ./perfset-go samples/words.txt 86 | ./perfset-go samples/words.txt 87 | ./perfset-go samples/words.txt 88 | ./perfset-go samples/words.txt 89 | -------------------------------------------------------------------------------- /samples/stats.c: -------------------------------------------------------------------------------- 1 | // Show statistics of hash table 2 | 3 | /* 4 | 5 | $ python3 samples/gensimilar.py 466550 >samples/similar.txt 6 | $ gcc -O2 -Wall -o stats samples/stats.c ht.c 7 | $ ./stats 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | void exit_nomem(void) { 23 | fprintf(stderr, "out of memory\n"); 24 | exit(1); 25 | } 26 | 27 | // Copied from ht.c 28 | typedef struct { 29 | char* key; // key is NULL if this slot is empty 30 | void* value; 31 | } ht_entry; 32 | 33 | struct ht { 34 | ht_entry* entries; // hash slots 35 | size_t capacity; // size of _entries array 36 | size_t length; // number of items in hash table 37 | }; 38 | 39 | #define FNV_OFFSET 14695981039346656037UL 40 | #define FNV_PRIME 1099511628211UL 41 | 42 | static uint64_t hash_key(const char* key) { 43 | uint64_t hash = FNV_OFFSET; 44 | for (const char* p = key; *p; p++) { 45 | hash ^= (uint64_t)(unsigned char)(*p); 46 | hash *= FNV_PRIME; 47 | } 48 | return hash; 49 | } 50 | 51 | // Copied from ht_get, but return probe length instead of value. 52 | size_t get_probe_len(ht* table, const char* key) { 53 | uint64_t hash = hash_key(key); 54 | size_t index = (size_t)(hash & (uint64_t)(table->capacity - 1)); 55 | 56 | size_t probe_len = 0; 57 | while (table->entries[index].key != NULL) { 58 | probe_len++; 59 | if (strcmp(key, table->entries[index].key) == 0) { 60 | return probe_len; 61 | } 62 | index++; 63 | if (index >= table->capacity) { 64 | index = 0; 65 | } 66 | } 67 | return probe_len; 68 | } 69 | 70 | int main(void) { 71 | ht* counts = ht_create(); 72 | if (counts == NULL) { 73 | exit_nomem(); 74 | } 75 | 76 | // Read next word from stdin (at most 100 chars long). 77 | char word[101]; 78 | while (scanf("%100s", word) != EOF) { 79 | // Look up word. 80 | void* value = ht_get(counts, word); 81 | if (value != NULL) { 82 | // Already exists, increment int that value points to. 83 | int* pcount = (int*)value; 84 | (*pcount)++; 85 | continue; 86 | } 87 | 88 | // Word not found, allocate space for new int and set to 1. 89 | int* pcount = malloc(sizeof(int)); 90 | if (pcount == NULL) { 91 | exit_nomem(); 92 | } 93 | *pcount = 1; 94 | if (ht_set(counts, word, pcount) == NULL) { 95 | exit_nomem(); 96 | } 97 | } 98 | 99 | // Calculate average probe length. 100 | hti it = ht_iterator(counts); 101 | size_t total_probes = 0; 102 | while (ht_next(&it)) { 103 | total_probes += get_probe_len(counts, it.key); 104 | free(it.value); 105 | } 106 | 107 | printf("len=%lu cap=%lu avgprobe=%.3f\n", 108 | ht_length(counts), counts->capacity, (double)total_probes / ht_length(counts)); 109 | 110 | ht_destroy(counts); 111 | return 0; 112 | } 113 | -------------------------------------------------------------------------------- /samples/testall.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | gcc -Wall -O2 -o perfget-c samples/perfget.c ht.c 6 | go build -o perfget-go samples/perfget.go 7 | gcc -Wall -O2 -o perfset-c samples/perfset.c ht.c 8 | go build -o perfset-go samples/perfset.go 9 | gcc -O2 -Wall -o perflbh samples/perflbh.c ht.c 10 | 11 | gcc -Wall -O2 -o lsearch samples/lsearch.c && ./lsearch >samples/output/lsearch.txt 12 | 13 | gcc -Wall -O2 -o bsearch samples/bsearch.c && ./bsearch >samples/output/bsearch.txt 14 | 15 | gcc -Wall -O2 -o demo samples/demo.c ht.c 16 | echo 'foo bar the bar bar bar the' | ./demo >samples/output/demo.txt 17 | 18 | gcc -Wall -O2 -o dump samples/dump.c ht.c && ./dump >samples/output/dump.txt 19 | 20 | python3 samples/gensimilar.py 466550 >samples/similar.txt 21 | gcc -O2 -Wall -o stats samples/stats.c ht.c 22 | ./stats samples/output/stats-words.txt 23 | ./stats samples/output/stats-similar.txt 24 | 25 | git diff --exit-code samples/output/* 26 | echo 'All good!' 27 | --------------------------------------------------------------------------------