├── khashp ├── Makefile ├── test_str.c ├── test_int.c ├── khashp.h └── khashp.c ├── examples ├── Makefile ├── 11int.c ├── 21int_ens.c ├── 12str.c └── 23large_ens.c ├── README.md └── khashl.h /khashp/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS=-O2 -std=c99 -Wall -Wc++-compat #-fsanitize=address,undefined 2 | EXE=test_int test_str 3 | 4 | all:$(EXE) 5 | 6 | test_int:test_int.c khashp.c khashp.h 7 | $(CC) $(CFLAGS) -o $@ $< khashp.c 8 | 9 | test_str:test_str.c khashp.c khashp.h 10 | $(CC) $(CFLAGS) -o $@ $< khashp.c 11 | 12 | clean: 13 | rm -f $(EXE) 14 | -------------------------------------------------------------------------------- /examples/Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS=-O2 -Wall 2 | EXE=11int 12str 21int_ens 23large_ens 3 | 4 | all:$(EXE) 5 | 6 | 11int:11int.c ../khashl.h 7 | $(CC) $(CFLAGS) -I.. -o $@ $< 8 | 9 | 12str:12str.c ../khashl.h 10 | $(CC) $(CFLAGS) -I.. -o $@ $< 11 | 12 | 21int_ens:21int_ens.c ../khashl.h 13 | $(CC) $(CFLAGS) -I.. -o $@ $< 14 | 15 | 23large_ens:23large_ens.c ../khashl.h 16 | $(CC) $(CFLAGS) -I.. -o $@ $< 17 | 18 | clean: 19 | rm -f $(EXE) 20 | -------------------------------------------------------------------------------- /khashp/test_str.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "khashp.h" 4 | 5 | int main(int argc, char *argv[]) 6 | { 7 | char s[4096]; // max string length: 4095 characters 8 | khashp_t *h = khp_str_init(sizeof(int32_t), 1); 9 | while (scanf("%s", s) > 0) { 10 | int absent; 11 | khint_t k = khp_str_put(h, s, &absent); 12 | int32_t c = 0; 13 | if (!absent) khp_get_val(h, k, &c); 14 | ++c; 15 | khp_set_val(h, k, &c); 16 | } 17 | printf("# of distinct words: %d\n", khp_size(h)); 18 | khp_str_destroy(h); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /examples/11int.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "khashl.h" 5 | 6 | KHASHL_MAP_INIT(KH_LOCAL, map64_t, map64, uint64_t, int, kh_hash_uint64, kh_eq_generic) 7 | 8 | int main(void) 9 | { 10 | int absent; 11 | khint_t k; 12 | map64_t *h; 13 | 14 | h = map64_init(); 15 | 16 | // put 17 | k = map64_put(h, 20, &absent); 18 | kh_val(h, k) = 2; 19 | k = map64_put(h, 50, &absent); 20 | kh_val(h, k) = 5; 21 | 22 | // get 23 | k = map64_get(h, 30); 24 | assert(k == kh_end(h)); // not found 25 | k = map64_get(h, 20); 26 | assert(k < kh_end(h)); // found 27 | 28 | // iterate 29 | kh_foreach(h, k) { 30 | printf("h[%lu]=%d\n", (unsigned long)kh_key(h, k), kh_val(h, k)); 31 | } 32 | 33 | map64_destroy(h); 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /examples/21int_ens.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "khashl.h" 5 | 6 | KHASHE_MAP_INIT(KH_LOCAL, map64_t, map64, uint64_t, int, kh_hash_uint64, kh_eq_generic) 7 | 8 | int main(void) 9 | { 10 | int absent; 11 | kh_ensitr_t k; 12 | map64_t *h; 13 | 14 | h = map64_init(6); 15 | 16 | // put 17 | k = map64_put(h, 20, &absent); 18 | kh_ens_val(h, k) = 2; 19 | k = map64_put(h, 50, &absent); 20 | kh_ens_val(h, k) = 5; 21 | 22 | // get 23 | k = map64_get(h, 30); 24 | assert(kh_ens_is_end(k)); 25 | k = map64_get(h, 20); 26 | assert(!kh_ens_is_end(k)); 27 | 28 | // iterate 29 | kh_ens_foreach(h, k) { 30 | printf("h[%lu]=%d\n", (unsigned long)kh_ens_key(h, k), kh_ens_val(h, k)); 31 | } 32 | 33 | map64_destroy(h); 34 | return 0; 35 | } 36 | -------------------------------------------------------------------------------- /examples/12str.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "khashl.h" 6 | 7 | KHASHL_MAP_INIT(KH_LOCAL, strmap_t, strmap, const char*, int, kh_hash_str, kh_eq_str) 8 | 9 | int main(void) 10 | { 11 | int absent; 12 | khint_t k; 13 | strmap_t *h; 14 | 15 | h = strmap_init(); 16 | 17 | // put 18 | k = strmap_put(h, strdup("abc"), &absent); 19 | kh_val(h, k) = 2; 20 | k = strmap_put(h, strdup("def"), &absent); 21 | kh_val(h, k) = 5; 22 | k = strmap_put(h, "ghi", &absent); 23 | if (absent) { // if not already in the table 24 | kh_key(h, k) = strdup("ghi"); 25 | kh_val(h, k) = 7; 26 | } 27 | 28 | // get 29 | k = strmap_get(h, "xyz"); 30 | assert(k == kh_end(h)); // not found 31 | k = strmap_get(h, "abc"); 32 | assert(k < kh_end(h)); // found 33 | 34 | // iterate 35 | kh_foreach(h, k) { 36 | printf("h[%s]=%d\n", kh_key(h, k), kh_val(h, k)); 37 | free((void*)kh_key(h, k)); // free memory allocated by strdup() 38 | } 39 | 40 | strmap_destroy(h); 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /examples/23large_ens.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "khashl.h" 5 | 6 | KHASHE_MAP_INIT(KH_LOCAL, map64_t, map64, uint64_t, uint64_t, kh_hash_uint64, kh_eq_generic) 7 | 8 | static double udb_cputime(void) 9 | { 10 | struct rusage r; 11 | getrusage(RUSAGE_SELF, &r); 12 | return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec); 13 | } 14 | 15 | static long udb_peakrss(void) 16 | { 17 | struct rusage r; 18 | getrusage(RUSAGE_SELF, &r); 19 | #ifdef __linux__ 20 | return r.ru_maxrss * 1024; 21 | #else 22 | return r.ru_maxrss; 23 | #endif 24 | } 25 | 26 | uint64_t splitmix64(uint64_t *x) 27 | { 28 | uint64_t z = ((*x) += 0x9e3779b97f4a7c15ULL); 29 | z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ULL; 30 | z = (z ^ (z >> 27)) * 0x94d049bb133111ebULL; 31 | return z ^ (z >> 31); 32 | } 33 | 34 | int main(int argc, char *argv[]) 35 | { 36 | int i, n = 30000000; 37 | uint64_t x = 11, sum = 0; 38 | map64_t *h; 39 | kh_ensitr_t k; 40 | double t; 41 | 42 | if (argc > 1) n = atol(argv[1]); 43 | t = udb_cputime(); 44 | h = map64_init(6); 45 | for (i = 0; i < n; ++i) { 46 | uint64_t z, key; 47 | int absent; 48 | z = splitmix64(&x); 49 | key = z % (n>>2); 50 | k = map64_put(h, key, &absent); 51 | if (absent) kh_ens_val(h, k) = z; 52 | else map64_del(h, k); 53 | } 54 | kh_ens_foreach(h, k) { 55 | sum += kh_ens_val(h, k); 56 | } 57 | printf("elements: %lu\n", (long)kh_ens_size(h)); 58 | printf("checksum: %llx\n", sum); 59 | printf("CPU time: %.3f sec\n", udb_cputime() - t); 60 | printf("Peak RSS: %.3f MB\n", udb_peakrss() / 1024. / 1024.); 61 | map64_destroy(h); 62 | return 0; 63 | } 64 | -------------------------------------------------------------------------------- /khashp/test_int.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #if 0 8 | #define KHASHP_STATIC 9 | #include "khashp.c" 10 | #else 11 | #include "khashp.h" 12 | #endif 13 | 14 | static double udb_cputime(void) 15 | { 16 | struct rusage r; 17 | getrusage(RUSAGE_SELF, &r); 18 | return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec); 19 | } 20 | 21 | static long udb_peakrss(void) 22 | { 23 | struct rusage r; 24 | getrusage(RUSAGE_SELF, &r); 25 | #ifdef __linux__ 26 | return r.ru_maxrss * 1024; 27 | #else 28 | return r.ru_maxrss; 29 | #endif 30 | } 31 | 32 | uint64_t splitmix64(uint64_t *x) 33 | { 34 | uint64_t z = ((*x) += 0x9e3779b97f4a7c15ULL); 35 | z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ULL; 36 | z = (z ^ (z >> 27)) * 0x94d049bb133111ebULL; 37 | return z ^ (z >> 31); 38 | } 39 | 40 | static khint_t hash_fn64(const void *p, uint32_t key_len) 41 | { 42 | uint64_t x = *(uint64_t*)p; 43 | x ^= x >> 30; 44 | x *= 0xbf58476d1ce4e5b9ULL; 45 | x ^= x >> 27; 46 | x *= 0x94d049bb133111ebULL; 47 | x ^= x >> 31; 48 | return (khint_t)x; 49 | } 50 | 51 | static int key_eq64(const void *p1, const void *p2, uint32_t key_len) 52 | { 53 | return *(uint64_t*)p1 == *(uint64_t*)p2; 54 | } 55 | 56 | int main(int argc, char *argv[]) 57 | { 58 | int i, n = 30000000; 59 | uint64_t x = 11, sum = 0; 60 | khashp_t *h; 61 | double t; 62 | khint_t k; 63 | 64 | if (argc > 1) n = atol(argv[1]); 65 | t = udb_cputime(); 66 | h = khp_init(8, 8, hash_fn64, key_eq64); 67 | for (i = 0; i < n; ++i) { 68 | uint64_t z, key; 69 | int absent; 70 | z = splitmix64(&x); 71 | key = z % (n>>2); 72 | k = khp_put(h, &key, &absent); 73 | if (absent) khp_set_val(h, k, &z); 74 | else khp_del(h, k); 75 | } 76 | khp_foreach(h, k) { 77 | uint64_t v; 78 | khp_get_val(h, k, &v); 79 | sum += v; 80 | } 81 | printf("elements: %lu\n", (long)khp_size(h)); 82 | printf("checksum: %llx\n", sum); 83 | printf("CPU time: %.3f sec\n", udb_cputime() - t); 84 | printf("Peak RSS: %.3f MB\n", udb_peakrss() / 1024. / 1024.); 85 | khp_destroy(h); 86 | return 0; 87 | } 88 | -------------------------------------------------------------------------------- /khashp/khashp.h: -------------------------------------------------------------------------------- 1 | #ifndef __AC_KHASHP_H 2 | #define __AC_KHASHP_H 3 | 4 | #define AC_VERSION_KHASHP_H "r35" 5 | 6 | #include 7 | #include 8 | 9 | typedef uint32_t khint_t; 10 | typedef khint_t (*khp_hash_fn_t)(const void *key, uint32_t key_len); 11 | typedef int (*khp_key_eq_t)(const void *key1, const void *key2, uint32_t key_len); 12 | 13 | typedef struct { 14 | uint32_t key_len, val_len; // key and value lengths in bytes 15 | uint16_t bits; // the capacity of the hash table is 1<b? 1U<bits : 0U; } 132 | 133 | /** End "iterator" */ 134 | static inline khint_t khp_end(const khashp_t *h) { return khp_capacity(h); } 135 | 136 | /** Get the number of elements in a hash table */ 137 | static inline khint_t khp_size(const khashp_t *h) { return h->count; } 138 | 139 | /** Test whether a bucket is occupied */ 140 | static inline int khp_exist(const khashp_t *h, khint_t x) { return h->used[x>>5] >> (x&0x1fU) & 1U; } 141 | 142 | /** Iterate over a hash table */ 143 | #define khp_foreach(h, x) for ((x) = 0; (x) != khp_end(h); ++(x)) if (khp_exist((h), (x))) 144 | 145 | static inline void *khp_get_bucket(const khashp_t *h, khint_t i) 146 | { 147 | return &h->b[(h->key_len + h->val_len) * i]; 148 | } 149 | 150 | #ifdef __cplusplus 151 | } 152 | #endif 153 | 154 | #endif // defined(__AC_KHASHP_H) 155 | -------------------------------------------------------------------------------- /khashp/khashp.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "khashp.h" 4 | 5 | #ifdef KHASHP_STATIC 6 | #define KHP_SCOPE static inline 7 | #else 8 | #define KHP_SCOPE 9 | #endif 10 | 11 | #define kh_max_count(cap) (((cap)>>1) + ((cap)>>2)) /* default load factor: 75% */ 12 | 13 | #define MALLOC(type, cnt) ((type*)malloc((cnt) * sizeof(type))) 14 | #define CALLOC(type, cnt) ((type*)calloc((cnt), sizeof(type))) 15 | #define REALLOC(type, ptr, cnt) ((type*)realloc((ptr), (cnt) * sizeof(type))) 16 | 17 | #define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U) 18 | #define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU)) 19 | #define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU))) 20 | 21 | #define __kh_fsize(m) ((m) < 32? 1 : (m)>>5) 22 | 23 | static inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); } // Fibonacci hashing 24 | 25 | static khint_t khp_hash_fn0(const void *p, uint32_t len) // FNV-1a as generic hash function 26 | { 27 | const uint8_t *s = (const uint8_t*)p; 28 | khint_t h = 2166136261U; 29 | uint32_t i; 30 | for (i = 0; i < len; ++i) 31 | h ^= s[i], h *= 16777619; 32 | return h; 33 | } 34 | 35 | static int khp_key_eq0(const void *key1, const void *key2, uint32_t key_len) // generic equality function 36 | { 37 | return memcmp(key1, key2, key_len) == 0; 38 | } 39 | 40 | KHP_SCOPE khashp_t *khp_init(uint32_t key_len, uint32_t val_len, khp_hash_fn_t fn, khp_key_eq_t eq) 41 | { 42 | khashp_t *h = CALLOC(khashp_t, 1); 43 | h->key_len = key_len, h->val_len = val_len; 44 | h->hash_fn = fn? fn : khp_hash_fn0; 45 | h->key_eq = eq? eq : khp_key_eq0; 46 | return h; 47 | } 48 | 49 | KHP_SCOPE void khp_destroy(khashp_t *h) 50 | { 51 | if (h == 0) return; 52 | free(h->b); free(h); 53 | } 54 | 55 | KHP_SCOPE void khp_clear(khashp_t *h) 56 | { 57 | if (h == 0 || h->used == 0) return; 58 | khint_t n_buckets = (khint_t)1U << h->bits; 59 | memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(uint32_t)); 60 | h->count = 0; 61 | } 62 | 63 | KHP_SCOPE khint_t khp_get(const khashp_t *h, const void *key) 64 | { 65 | khint_t i, last, n_buckets, mask, hash; 66 | if (h->b == 0) return 0; 67 | hash = h->hash_fn(key, h->key_len); 68 | n_buckets = (khint_t)1U << h->bits; 69 | mask = n_buckets - 1U; 70 | i = last = __kh_h2b(hash, h->bits); 71 | while (__kh_used(h->used, i) && h->key_eq(khp_get_bucket(h, i), key, h->key_len) != 0) { 72 | i = (i + 1U) & mask; 73 | if (i == last) return n_buckets; 74 | } 75 | return !__kh_used(h->used, i)? n_buckets : i; 76 | } 77 | 78 | KHP_SCOPE int khp_resize(khashp_t *h, khint_t new_n_buckets) 79 | { 80 | uint32_t *new_used = 0; 81 | uint8_t *tmp; 82 | khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; 83 | while ((x >>= 1) != 0) ++j; 84 | if (new_n_buckets & (new_n_buckets - 1)) ++j; 85 | new_bits = j > 2? j : 2; 86 | new_n_buckets = (khint_t)1U << new_bits; 87 | if (h->count > kh_max_count(new_n_buckets)) return 0; /* requested size is too small */ 88 | new_used = MALLOC(uint32_t, __kh_fsize(new_n_buckets)); 89 | memset(new_used, 0, __kh_fsize(new_n_buckets) * sizeof(uint32_t)); 90 | if (!new_used) return -1; /* not enough memory */ 91 | n_buckets = h->b? (khint_t)1U<bits : 0U; 92 | if (n_buckets < new_n_buckets) { /* expand */ 93 | uint8_t *new_b = REALLOC(uint8_t, h->b, new_n_buckets * (h->key_len + h->val_len)); 94 | if (!new_b) { free(new_used); return -1; } 95 | h->b = new_b; 96 | } /* otherwise shrink */ 97 | new_mask = new_n_buckets - 1; 98 | tmp = MALLOC(uint8_t, h->key_len + h->val_len); 99 | for (j = 0; j != n_buckets; ++j) { 100 | void *key; 101 | if (!__kh_used(h->used, j)) continue; 102 | key = khp_get_bucket(h, j); 103 | __kh_set_unused(h->used, j); 104 | while (1) { /* kick-out process; sort of like in Cuckoo hashing */ 105 | khint_t i; 106 | i = __kh_h2b(h->hash_fn(key, h->key_len), new_bits); 107 | while (__kh_used(new_used, i)) i = (i + 1) & new_mask; 108 | __kh_set_used(new_used, i); 109 | if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ 110 | void *keyi = khp_get_bucket(h, i); 111 | memcpy(tmp, keyi, h->key_len + h->val_len); 112 | memcpy(keyi, key, h->key_len + h->val_len); 113 | memcpy(key, tmp, h->key_len + h->val_len); 114 | __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ 115 | } else { /* write the element and jump out of the loop */ 116 | memcpy(khp_get_bucket(h, i), key, h->key_len + h->val_len); 117 | break; 118 | } 119 | } 120 | } 121 | free(tmp); 122 | if (n_buckets > new_n_buckets) /* shrink the hash table */ 123 | h->b = REALLOC(uint8_t, h->b, new_n_buckets * (h->key_len + h->val_len)); 124 | free(h->used); /* free the working space */ 125 | h->used = new_used, h->bits = new_bits; 126 | return 0; 127 | } 128 | 129 | KHP_SCOPE khint_t khp_put(khashp_t *h, const void *key, int *absent) 130 | { 131 | khint_t n_buckets, i, last, mask, hash; 132 | n_buckets = h->b? (khint_t)1U<bits : 0U; 133 | *absent = -1; 134 | if (h->count >= kh_max_count(n_buckets)) { /* rehashing */ 135 | if (khp_resize(h, n_buckets + 1U) < 0) 136 | return n_buckets; 137 | n_buckets = (khint_t)1U<bits; 138 | } /* TODO: to implement automatically shrinking; resize() already support shrinking */ 139 | mask = n_buckets - 1; 140 | hash = h->hash_fn(key, h->key_len); 141 | i = last = __kh_h2b(hash, h->bits); 142 | while (__kh_used(h->used, i) && !h->key_eq(khp_get_bucket(h, i), key, h->key_len)) { 143 | i = (i + 1U) & mask; 144 | if (i == last) break; 145 | } 146 | if (!__kh_used(h->used, i)) { /* not present at all */ 147 | memcpy(khp_get_bucket(h, i), key, h->key_len); 148 | __kh_set_used(h->used, i); 149 | ++h->count; 150 | *absent = 1; 151 | } else *absent = 0; /* Don't touch h->b[i] if present */ 152 | return i; 153 | } 154 | 155 | KHP_SCOPE int khp_del(khashp_t *h, khint_t i) 156 | { 157 | khint_t j = i, k, mask, n_buckets; 158 | if (h->b == 0) return 0; 159 | n_buckets = (khint_t)1U<bits; 160 | mask = n_buckets - 1U; 161 | while (1) { 162 | j = (j + 1U) & mask; 163 | if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ 164 | k = __kh_h2b(h->hash_fn(khp_get_bucket(h, j), h->key_len), h->bits); 165 | if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) 166 | memcpy(khp_get_bucket(h, i), khp_get_bucket(h, j), h->key_len + h->val_len), i = j; 167 | } 168 | __kh_set_unused(h->used, i); 169 | --h->count; 170 | return 1; 171 | } 172 | 173 | KHP_SCOPE void khp_get_val(const khashp_t *h, khint_t i, void *v) 174 | { 175 | uint8_t *p = (uint8_t*)khp_get_bucket(h, i) + h->key_len; 176 | if (h->val_len > 0) memcpy(v, p, h->val_len); 177 | } 178 | 179 | KHP_SCOPE void khp_set_val(const khashp_t *h, khint_t i, const void *v) 180 | { 181 | uint8_t *p = (uint8_t*)khp_get_bucket(h, i) + h->key_len; 182 | if (h->val_len > 0) memcpy(p, v, h->val_len); 183 | } 184 | 185 | KHP_SCOPE void khp_get_key(const khashp_t *h, khint_t i, void *p) 186 | { 187 | memcpy(p, khp_get_bucket(h, i), h->key_len); 188 | } 189 | 190 | /********************* 191 | * String hash table * 192 | *********************/ 193 | 194 | static khint_t khp_str_hash_fn(const void *s, uint32_t key_len) // FNV-1a 195 | { 196 | const uint8_t *p; 197 | memcpy(&p, s, key_len); // get the address to the string 198 | khint_t h = 2166136261U; 199 | for (; *p; ++p) 200 | h ^= *p, h *= 16777619; 201 | return h; 202 | } 203 | 204 | static int kh_str_key_eq(const void *s1, const void *s2, uint32_t key_len) 205 | { 206 | const char *p1, *p2; 207 | memcpy(&p1, s1, key_len); 208 | memcpy(&p2, s2, key_len); 209 | return strcmp(p1, p2) == 0; 210 | } 211 | 212 | KHP_SCOPE khashp_t *khp_str_init(uint32_t val_len, int dup) 213 | { 214 | khashp_t *h = khp_init(sizeof(void*), val_len, khp_str_hash_fn, kh_str_key_eq); 215 | h->dup = !!dup; 216 | return h; 217 | } 218 | 219 | KHP_SCOPE void khp_str_destroy(khashp_t *h) 220 | { 221 | if (h->dup) { 222 | khint_t k; 223 | khp_foreach(h, k) { 224 | char *p; 225 | khp_get_key(h, k, &p); 226 | free(p); // free 227 | } 228 | } 229 | khp_destroy(h); 230 | } 231 | 232 | KHP_SCOPE khint_t khp_str_get(const khashp_t *h, const char *key) 233 | { 234 | return khp_get(h, &key); 235 | } 236 | 237 | KHP_SCOPE khint_t khp_str_put(khashp_t *h, const char *key, int *absent) 238 | { 239 | khint_t k = khp_put(h, &key, absent); 240 | if (*absent) { 241 | if (h->dup) { 242 | size_t len = strlen(key); 243 | char *q = MALLOC(char, len + 1); 244 | memcpy(q, key, len + 1); 245 | memcpy(khp_get_bucket(h, k), &q, h->key_len); // the bucket keeps the address to the string 246 | } else { 247 | memcpy(khp_get_bucket(h, k), &key, h->key_len); 248 | } 249 | } 250 | return k; 251 | } 252 | 253 | KHP_SCOPE int khp_str_del(khashp_t *h, khint_t i) 254 | { 255 | if (h->b == 0) return 0; 256 | if (h->dup) { 257 | char *p; 258 | khp_get_key(h, i, &p); 259 | free(p); 260 | } 261 | return khp_del(h, i); 262 | } 263 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Table of Contents 2 | 3 | - [Introduction](#intro) 4 | - [Usage](#use) 5 | - [Integer keys](#int) 6 | - [String keys](#str) 7 | - [Custom keys](#custom) 8 | - [Algorithm](#algo) 9 | - [Ensemble of hash tables](#ensemble) 10 | - [Rationale](#rationale) 11 | - [Use ensemble](#use-ens) 12 | - [Performance](#perf) 13 | 14 | ## Introduction 15 | 16 | Khashl is a single-header macro-based generic hash table library in C. It is an 17 | improved version of [khash][khash] from [klib][klib] and is one of the faster 18 | hash table implementations in C/C++. Klib also has a copy of khashl for 19 | historical reason. This repo provides more [examples][ex] and better 20 | documentation. 21 | 22 | ## Usage 23 | 24 | ### Integer keys 25 | 26 | Here is a small example for integer keys: 27 | ```c 28 | #include 29 | #include 30 | #include "khashl.h" 31 | // Instantiate 32 | KHASHL_MAP_INIT(KH_LOCAL, map32_t, map32, uint32_t, int, kh_hash_uint32, kh_eq_generic) 33 | 34 | int main(void) { 35 | int absent; 36 | khint_t k; 37 | map32_t *h = map32_init(); 38 | k = map32_put(h, 20, &absent); // get iterator to the new bucket 39 | kh_val(h, k) = 2; // set value 40 | k = map32_get(h, 30); // query the hash table 41 | if (k < kh_end(h)) printf("found key '30'\n"); 42 | kh_foreach(h, k) { // iterate 43 | printf("h[%u]=%d\n", kh_key(h, k), kh_val(h, k)); 44 | } 45 | map32_destroy(h); // free 46 | return 0; 47 | } 48 | ``` 49 | 50 | To use khashl, you need to instantiate functions specific to your types with 51 | ```c 52 | KHASHL_MAP_INIT(scope, table_type, prefix, key_type, val_type, hash_func, eq_func) 53 | ``` 54 | where: 55 | * `scope` is the scope of instantiated functions. It can be empty for global 56 | visibility or `KH_LOCAL`. 57 | * `table_type` is the type of the hash table. It can be any symbol that has not 58 | been used. 59 | * `prefix` is the prefix of instantiated functions (see below) 60 | * `key_type` is the key type 61 | * `val_type` is the value type 62 | * `hash_func` is the hash function. Khashl provides hash functions for 32-bit 63 | integers, 64-bit integers and strings. See [khashl.h][khashl.h] for details. 64 | * `eq_func` is the equality function. For primitive types, use the 65 | `kh_eq_generic()` macro; for strings, use `kh_eq_str()`. 66 | 67 | After instantiation, you will be able to use the following functions: 68 | * `table_type *prefix_init(void)`: initialize an empty hash table. 69 | * `khint_t prefix_put(table_type*, key_type, int*)`: put a key into the 70 | table. It returns the position in the table. The last parameter tells you 71 | whether the key is new. 72 | * `khint_t prefix_get(table_type*, key_type)`: query a key. It returns 73 | the position of the key if the key is present; otherwise the function 74 | returns `kh_end(table)`. 75 | * `prefix_del(table_type*, khint)`: delete the key at a postion. 76 | * `prefix_destroy(table_type*)`: deallocate the entire table. 77 | 78 | In khashl, a position is like an iterator. `prefix_get()` and `prefix_put()` 79 | return iterators. Khashl additionally provides the following macros: 80 | * `key_type kh_key(table, pos)`: access or modify keys. It can be an L-value. 81 | Don't modify the content of keys. If the `key_type` is a pointer, you may 82 | change the value of the pointer but not the content the pointer points to. 83 | * `val_type kh_val(table, pos)`: access or modify values. 84 | * `kh_size(table)`: return the size of the table 85 | * `kh_end(table)`: return the capacity of the table 86 | * `kh_exist(table, pos)`: test whether the bucket at `pos` is empty. `pos` 87 | must be smaller than `kh_end(table)`. 88 | * `kh_foreach(table, pos) { }`: iterate a table. `pos` should be defined at the 89 | `khint_t` type before this macro. Note that because `prefix_put()` and 90 | `prefix_del()` may change the content of the hash table, please do not call 91 | these two functions inside a foreach loop. 92 | 93 | ### String keys 94 | 95 | It is important to note that khashl only keeps the pointers to strings. You are 96 | responsible for managing the memory allocated to the strings. 97 | 98 | Here is an example for counting the number of distinct words on the commnand 99 | line: 100 | ```c 101 | // To run this program: `./this_prog abc bc abc a bc` 102 | #include 103 | #include 104 | #include "khashl.h" 105 | KHASHL_SET_INIT(KH_LOCAL, strmap_t, strmap, const char*, kh_hash_str, kh_eq_str) 106 | 107 | int main(int argc, char *argv[]) 108 | { 109 | strmap_t *h; 110 | int i, absent; 111 | h = strmap_init(); 112 | for (i = 1; i < argc; ++i) 113 | strmap_put(h, argv[i], &absent); 114 | printf("# of distinct words: %d\n", kh_size(h)); 115 | strmap_destroy(h); 116 | return 0; 117 | } 118 | ``` 119 | In this example, the string contents are already stored in the `argv[]` array. 120 | You don't need to worry about memory management. The following demonstrates 121 | how to insert string pointers and their contents into a hash table. 122 | ```c 123 | // To run this program: `echo a bc a cd bc|./this_prog` 124 | #include 125 | #include 126 | #include "khashl.h" 127 | KHASHL_MAP_INIT(KH_LOCAL, strmap_t, strmap, const char*, int, kh_hash_str, kh_eq_str) 128 | 129 | int main(int argc, char *argv[]) 130 | { 131 | char s[4096]; // max string length: 4095 characters 132 | strmap_t *h; 133 | khint_t k; 134 | h = strmap_init(); 135 | while (scanf("%s", s) > 0) { 136 | int absent; 137 | k = strmap_put(h, s, &absent); 138 | if (absent) kh_key(h, k) = strdup(s), kh_val(h, k) = 0; 139 | // else, the key is not touched; we do nothing 140 | ++kh_val(h, k); 141 | } 142 | printf("# of distinct words: %d\n", kh_size(h)); 143 | // IMPORTANT: free memory allocated by strdup() above 144 | kh_foreach(h, k) { 145 | printf("%s: %d\n", kh_key(h, k), kh_val(h, k)); 146 | free((char*)kh_key(h, k)); 147 | } 148 | strmap_destroy(h); 149 | return 0; 150 | } 151 | ``` 152 | 153 | ### Custom keys 154 | 155 | You can put C `struct` into a hash table as long as you provide a hash function 156 | and an equality function. You can use macro functions. 157 | 158 | ## Algorithm 159 | 160 | Khashl uses linear probing and power-of-2 capacity. It applies [Fibonacci 161 | hashing][fib-hash] to protect against bad hash functions and implements 162 | [deletion without tombstones][no-tombstone]. Khashl uses one bit per bucket 163 | to indicate whether a bucket is empty. It has minimal memory overhead though 164 | this comes at the cost of one extra cache miss per query. Khashl does not use 165 | SIMD. 166 | 167 | ## Ensemble of hash tables 168 | 169 | Khashl uses 32-bit hashes, which means it cannot directly store more than 4 170 | billion keys. Nonetheless, it has a special way to handle billions of keys: 171 | ensemble of hash tables. 172 | 173 | ### Rationale 174 | 175 | Suppose a hash table consists of `n` smaller sub hash tables. A key `x` is 176 | located in sub-table `hash(x) % n`. Because it is rare for all sub-tables to 177 | rehash at the same time, the peak memory can be reduced. You can find more 178 | explanation in [this blog][ensemble]. In my opinion, **using an ensemble of 179 | hash tables it the best strategy for huge hash tables**. 180 | 181 | We can implement a hash table ensemble in the user space for any libraries. I 182 | have been using the idea since 2015. Nonetheless, it is more convenient to 183 | hide the details behind the library code such that users can use familiar hash 184 | table APIs. [phmap][phmap] is perhaps the first library to do this. Now khashl 185 | has this functionality as well. 186 | 187 | ### Use ensemble 188 | 189 | The [integer example above](#int) becomes: 190 | ```c 191 | #include 192 | #include 193 | #include "khashl.h" 194 | // use "KHASHE" for instantiation 195 | KHASHE_MAP_INIT(KH_LOCAL, map32_t, map32, uint32_t, int, kh_hash_uint32, kh_eq_generic) 196 | int main(void) { 197 | int absent; 198 | kh_ensitr_t k; // use kh_ensitr_t instead of khint_t 199 | map32_t *h = map32_init(6); // use 2**6=64 sub hash tables 200 | k = map32_put(h, 20, &absent); // get iterator to the new bucket 201 | kh_ens_val(h, k) = 2; // use kh_ens_val() instead of kh_val() 202 | k = map32_get(h, 30); // query the hash table 203 | if (!kh_ens_is_end(k)) printf("found key '30'\n"); // use kh_ens_is_end() 204 | kh_ens_foreach(h, k) { // use kh_ens_foreach() instead of kh_foreach() 205 | printf("h[%u]=%d\n", kh_ens_key(h, k), kh_ens_val(h, k)); 206 | } 207 | map32_destroy(h); 208 | return 0; 209 | } 210 | ``` 211 | You will have to change most macros and iteration: 212 | * `khint_t` → `kh_ensitr_t` (iterator type) 213 | * `kh_key()` → `kh_ens_key()` 214 | * `kh_val()` → `kh_ens_val()` 215 | * `kh_foreach()` → `kh_ens_foreach()` 216 | * `prefix_init(void)` → `prefix_init(int b)`, which enables `2**b` sub hash tables. 217 | * `k == kh_end(h)` → `kh_ens_is_end(k)` for testing the presence of a key 218 | 219 | ## Performance 220 | 221 | 222 | 223 | For details, see [udb3][udb3]. 224 | 225 | [klib]: https://github.com/attractivechaos/klib 226 | [khash]: https://github.com/attractivechaos/klib/blob/master/khash.h 227 | [ex]: https://github.com/attractivechaos/khashl/tree/main/examples 228 | [khashl.h]: https://github.com/attractivechaos/khashl/blob/main/khashl.h 229 | [fib-hash]: https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ 230 | [no-tombstone]: https://attractivechaos.wordpress.com/2019/12/28/deletion-from-hash-tables-without-tombstones/ 231 | [ensemble]: https://greg7mdp.github.io/parallel-hashmap/ 232 | [phmap]: https://github.com/greg7mdp/parallel-hashmap 233 | [udb3]: https://github.com/attractivechaos/udb3/ 234 | -------------------------------------------------------------------------------- /khashl.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2019- by Attractive Chaos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | #ifndef __AC_KHASHL_H 27 | #define __AC_KHASHL_H 28 | 29 | #define AC_VERSION_KHASHL_H "r36" 30 | 31 | #include 32 | #include 33 | #include 34 | 35 | /************************************ 36 | * Compiler specific configurations * 37 | ************************************/ 38 | 39 | #if UINT_MAX == 0xffffffffu 40 | typedef unsigned int khint32_t; 41 | #elif ULONG_MAX == 0xffffffffu 42 | typedef unsigned long khint32_t; 43 | #endif 44 | 45 | #if ULONG_MAX == ULLONG_MAX 46 | typedef unsigned long khint64_t; 47 | #else 48 | typedef unsigned long long khint64_t; 49 | #endif 50 | 51 | #ifndef kh_inline 52 | #ifdef _MSC_VER 53 | #define kh_inline __inline 54 | #else 55 | #define kh_inline inline 56 | #endif 57 | #endif /* kh_inline */ 58 | 59 | #ifndef klib_unused 60 | #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) 61 | #define klib_unused __attribute__ ((__unused__)) 62 | #else 63 | #define klib_unused 64 | #endif 65 | #endif /* klib_unused */ 66 | 67 | #define KH_LOCAL static kh_inline klib_unused 68 | 69 | typedef khint32_t khint_t; 70 | typedef const char *kh_cstr_t; 71 | 72 | /*********************** 73 | * Configurable macros * 74 | ***********************/ 75 | 76 | #ifndef kh_max_count /* set the max load factor */ 77 | #define kh_max_count(cap) (((cap)>>1) + ((cap)>>2)) /* default load factor: 75% */ 78 | #endif 79 | 80 | #ifndef kh_packed /* pack the key-value struct */ 81 | #define kh_packed __attribute__ ((__packed__)) 82 | #endif 83 | 84 | #if !defined(Kmalloc) || !defined(Kcalloc) || !defined(Krealloc) || !defined(Kfree) 85 | #define Kmalloc(km, type, cnt) ((type*)malloc((cnt) * sizeof(type))) 86 | #define Kcalloc(km, type, cnt) ((type*)calloc((cnt), sizeof(type))) 87 | #define Krealloc(km, type, ptr, cnt) ((type*)realloc((ptr), (cnt) * sizeof(type))) 88 | #define Kfree(km, ptr) free(ptr) 89 | #endif 90 | 91 | /**************************** 92 | * Simple private functions * 93 | ****************************/ 94 | 95 | #define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U) 96 | #define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU)) 97 | #define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU))) 98 | 99 | #define __kh_fsize(m) ((m) < 32? 1 : (m)>>5) 100 | 101 | static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); } /* Fibonacci hashing */ 102 | 103 | /******************* 104 | * Hash table base * 105 | *******************/ 106 | 107 | #define __KHASHL_TYPE(HType, khkey_t) \ 108 | typedef struct HType { \ 109 | void *km; \ 110 | khint_t bits, count; \ 111 | khint32_t *used; \ 112 | khkey_t *keys; \ 113 | } HType; 114 | 115 | #define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \ 116 | extern HType *prefix##_init(void); \ 117 | extern HType *prefix##_init2(void *km); \ 118 | extern void prefix##_destroy(HType *h); \ 119 | extern void prefix##_clear(HType *h); \ 120 | extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \ 121 | extern int prefix##_resize(HType *h, khint_t new_n_buckets); \ 122 | extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \ 123 | extern void prefix##_del(HType *h, khint_t k); 124 | 125 | #define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ 126 | SCOPE HType *prefix##_init2(void *km) { \ 127 | HType *h = Kcalloc(km, HType, 1); \ 128 | h->km = km; \ 129 | return h; \ 130 | } \ 131 | SCOPE HType *prefix##_init(void) { return prefix##_init2(0); } \ 132 | SCOPE void prefix##_destroy(HType *h) { \ 133 | if (!h) return; \ 134 | Kfree(h->km, (void*)h->keys); Kfree(h->km, h->used); \ 135 | Kfree(h->km, h); \ 136 | } \ 137 | SCOPE void prefix##_clear(HType *h) { \ 138 | if (h && h->used) { \ 139 | khint_t n_buckets = (khint_t)1U << h->bits; \ 140 | memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \ 141 | h->count = 0; \ 142 | } \ 143 | } 144 | 145 | #define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 146 | SCOPE khint_t prefix##_getp_core(const HType *h, const khkey_t *key, khint_t hash) { \ 147 | khint_t i, last, n_buckets, mask; \ 148 | if (h->keys == 0) return 0; \ 149 | n_buckets = (khint_t)1U << h->bits; \ 150 | mask = n_buckets - 1U; \ 151 | i = last = __kh_h2b(hash, h->bits); \ 152 | while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ 153 | i = (i + 1U) & mask; \ 154 | if (i == last) return n_buckets; \ 155 | } \ 156 | return !__kh_used(h->used, i)? n_buckets : i; \ 157 | } \ 158 | SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { return prefix##_getp_core(h, key, __hash_fn(*key)); } \ 159 | SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp_core(h, &key, __hash_fn(key)); } 160 | 161 | #define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 162 | SCOPE int prefix##_resize(HType *h, khint_t new_n_buckets) { \ 163 | khint32_t *new_used = 0; \ 164 | khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \ 165 | while ((x >>= 1) != 0) ++j; \ 166 | if (new_n_buckets & (new_n_buckets - 1)) ++j; \ 167 | new_bits = j > 2? j : 2; \ 168 | new_n_buckets = (khint_t)1U << new_bits; \ 169 | if (h->count > kh_max_count(new_n_buckets)) return 0; /* requested size is too small */ \ 170 | new_used = Kmalloc(h->km, khint32_t, __kh_fsize(new_n_buckets)); \ 171 | if (!new_used) return -1; /* not enough memory */ \ 172 | memset(new_used, 0, __kh_fsize(new_n_buckets) * sizeof(khint32_t)); \ 173 | n_buckets = h->keys? (khint_t)1U<bits : 0U; \ 174 | if (n_buckets < new_n_buckets) { /* expand */ \ 175 | khkey_t *new_keys = Krealloc(h->km, khkey_t, h->keys, new_n_buckets); \ 176 | if (!new_keys) { Kfree(h->km, new_used); return -1; } \ 177 | h->keys = new_keys; \ 178 | } \ 179 | new_mask = new_n_buckets - 1; \ 180 | for (j = 0; j != n_buckets; ++j) { \ 181 | khkey_t key; \ 182 | if (!__kh_used(h->used, j)) continue; \ 183 | key = h->keys[j]; \ 184 | __kh_set_unused(h->used, j); \ 185 | while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ 186 | khint_t i; \ 187 | i = __kh_h2b(__hash_fn(key), new_bits); \ 188 | while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \ 189 | __kh_set_used(new_used, i); \ 190 | if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \ 191 | { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ 192 | __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \ 193 | } else { /* write the element and jump out of the loop */ \ 194 | h->keys[i] = key; \ 195 | break; \ 196 | } \ 197 | } \ 198 | } \ 199 | if (n_buckets > new_n_buckets) { /* shrink the hash table */ \ 200 | khkey_t *new_keys = Krealloc(h->km, khkey_t, h->keys, new_n_buckets); \ 201 | if (!new_keys) { Kfree(h->km, new_used); return -1; } \ 202 | h->keys = new_keys; \ 203 | } \ 204 | Kfree(h->km, h->used); /* free the working space */ \ 205 | h->used = new_used, h->bits = new_bits; \ 206 | return 0; \ 207 | } 208 | 209 | #define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 210 | SCOPE khint_t prefix##_putp_core(HType *h, const khkey_t *key, khint_t hash, int *absent) { \ 211 | khint_t n_buckets, i, last, mask; \ 212 | n_buckets = h->keys? (khint_t)1U<bits : 0U; \ 213 | *absent = -1; \ 214 | if (h->count >= kh_max_count(n_buckets)) { /* rehashing */ \ 215 | if (prefix##_resize(h, n_buckets + 1U) < 0) \ 216 | return n_buckets; \ 217 | n_buckets = (khint_t)1U<bits; \ 218 | } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ 219 | mask = n_buckets - 1; \ 220 | i = last = __kh_h2b(hash, h->bits); \ 221 | while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ 222 | i = (i + 1U) & mask; \ 223 | if (i == last) break; \ 224 | } \ 225 | if (!__kh_used(h->used, i)) { /* not present at all */ \ 226 | h->keys[i] = *key; \ 227 | __kh_set_used(h->used, i); \ 228 | ++h->count; \ 229 | *absent = 1; \ 230 | } else *absent = 0; /* Don't touch h->keys[i] if present */ \ 231 | return i; \ 232 | } \ 233 | SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { return prefix##_putp_core(h, key, __hash_fn(*key), absent); } \ 234 | SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp_core(h, &key, __hash_fn(key), absent); } 235 | 236 | #define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \ 237 | SCOPE int prefix##_del(HType *h, khint_t i) { \ 238 | khint_t j = i, k, mask, n_buckets; \ 239 | if (h->keys == 0) return 0; \ 240 | n_buckets = (khint_t)1U<bits; \ 241 | mask = n_buckets - 1U; \ 242 | while (1) { \ 243 | j = (j + 1U) & mask; \ 244 | if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \ 245 | k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \ 246 | if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \ 247 | h->keys[i] = h->keys[j], i = j; \ 248 | } \ 249 | __kh_set_unused(h->used, i); \ 250 | --h->count; \ 251 | return 1; \ 252 | } 253 | 254 | #define KHASHL_DECLARE(HType, prefix, khkey_t) \ 255 | __KHASHL_TYPE(HType, khkey_t) \ 256 | __KHASHL_PROTOTYPES(HType, prefix, khkey_t) 257 | 258 | #define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 259 | __KHASHL_TYPE(HType, khkey_t) \ 260 | __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ 261 | __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 262 | __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 263 | __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 264 | __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) 265 | 266 | /*************************** 267 | * Ensemble of hash tables * 268 | ***************************/ 269 | 270 | typedef struct { 271 | khint_t sub, pos; 272 | } kh_ensitr_t; 273 | 274 | #define KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 275 | KHASHL_INIT(KH_LOCAL, HType##_sub, prefix##_sub, khkey_t, __hash_fn, __hash_eq) \ 276 | typedef struct HType { \ 277 | void *km; \ 278 | khint64_t count:54, bits:8; \ 279 | HType##_sub *sub; \ 280 | } HType; \ 281 | SCOPE HType *prefix##_init2(void *km, int bits) { \ 282 | HType *g; \ 283 | g = Kcalloc(km, HType, 1); \ 284 | if (!g) return 0; \ 285 | g->bits = bits, g->km = km; \ 286 | g->sub = Kcalloc(km, HType##_sub, 1U<bits; ++t) { Kfree(g->km, (void*)g->sub[t].keys); Kfree(g->km, g->sub[t].used); } \ 294 | Kfree(g->km, g->sub); Kfree(g->km, g); \ 295 | } \ 296 | SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \ 297 | khint_t hash, low, ret; \ 298 | kh_ensitr_t r; \ 299 | HType##_sub *h; \ 300 | hash = __hash_fn(*key); \ 301 | low = hash & ((1U<bits) - 1); \ 302 | h = &g->sub[low]; \ 303 | ret = prefix##_sub_getp_core(h, key, hash); \ 304 | if (ret == kh_end(h)) r.sub = low, r.pos = (khint_t)-1; \ 305 | else r.sub = low, r.pos = ret; \ 306 | return r; \ 307 | } \ 308 | SCOPE kh_ensitr_t prefix##_get(const HType *g, const khkey_t key) { return prefix##_getp(g, &key); } \ 309 | SCOPE kh_ensitr_t prefix##_putp(HType *g, const khkey_t *key, int *absent) { \ 310 | khint_t hash, low, ret; \ 311 | kh_ensitr_t r; \ 312 | HType##_sub *h; \ 313 | hash = __hash_fn(*key); \ 314 | low = hash & ((1U<bits) - 1); \ 315 | h = &g->sub[low]; \ 316 | ret = prefix##_sub_putp_core(h, key, hash, absent); \ 317 | if (*absent) ++g->count; \ 318 | r.sub = low, r.pos = ret; \ 319 | return r; \ 320 | } \ 321 | SCOPE kh_ensitr_t prefix##_put(HType *g, const khkey_t key, int *absent) { return prefix##_putp(g, &key, absent); } \ 322 | SCOPE int prefix##_del(HType *g, kh_ensitr_t itr) { \ 323 | HType##_sub *h = &g->sub[itr.sub]; \ 324 | int ret; \ 325 | ret = prefix##_sub_del(h, itr.pos); \ 326 | if (ret) --g->count; \ 327 | return ret; \ 328 | } \ 329 | SCOPE void prefix##_clear(HType *g) { \ 330 | int i; \ 331 | for (i = 0; i < 1U<bits; ++i) prefix##_sub_clear(&g->sub[i]); \ 332 | g->count = 0; \ 333 | } 334 | 335 | /***************************** 336 | * More convenient interface * 337 | *****************************/ 338 | 339 | /* common */ 340 | 341 | #define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 342 | typedef struct { khkey_t key; } kh_packed HType##_s_bucket_t; \ 343 | static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \ 344 | static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \ 345 | KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \ 346 | SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \ 347 | SCOPE HType *prefix##_init2(void *km) { return prefix##_s_init2(km); } \ 348 | SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \ 349 | SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \ 350 | SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \ 351 | SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \ 352 | SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \ 353 | SCOPE void prefix##_clear(HType *h) { prefix##_s_clear(h); } 354 | 355 | #define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ 356 | typedef struct { khkey_t key; kh_val_t val; } kh_packed HType##_m_bucket_t; \ 357 | static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ 358 | static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ 359 | KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ 360 | SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \ 361 | SCOPE HType *prefix##_init2(void *km) { return prefix##_m_init2(km); } \ 362 | SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ 363 | SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_m_resize(h, new_n_buckets); } \ 364 | SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ 365 | SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \ 366 | SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \ 367 | SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } 368 | 369 | /* cached hashes to trade memory for performance when hashing and comparison are expensive */ 370 | 371 | #define __kh_cached_hash(x) ((x).hash) 372 | 373 | #define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 374 | typedef struct { khkey_t key; khint_t hash; } kh_packed HType##_cs_bucket_t; \ 375 | static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ 376 | KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \ 377 | SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \ 378 | SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \ 379 | SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \ 380 | SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \ 381 | SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); } \ 382 | SCOPE void prefix##_clear(HType *h) { prefix##_cs_clear(h); } 383 | 384 | #define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ 385 | typedef struct { khkey_t key; kh_val_t val; khint_t hash; } kh_packed HType##_cm_bucket_t; \ 386 | static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ 387 | KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \ 388 | SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \ 389 | SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \ 390 | SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \ 391 | SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \ 392 | SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); } \ 393 | SCOPE void prefix##_clear(HType *h) { prefix##_cm_clear(h); } 394 | 395 | /* ensemble for huge hash tables */ 396 | 397 | #define KHASHE_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ 398 | typedef struct { khkey_t key; } kh_packed HType##_es_bucket_t; \ 399 | static kh_inline khint_t prefix##_es_hash(HType##_es_bucket_t x) { return __hash_fn(x.key); } \ 400 | static kh_inline int prefix##_es_eq(HType##_es_bucket_t x, HType##_es_bucket_t y) { return __hash_eq(x.key, y.key); } \ 401 | KHASHE_INIT(KH_LOCAL, HType, prefix##_es, HType##_es_bucket_t, prefix##_es_hash, prefix##_es_eq) \ 402 | SCOPE HType *prefix##_init(int bits) { return prefix##_es_init(bits); } \ 403 | SCOPE void prefix##_destroy(HType *h) { prefix##_es_destroy(h); } \ 404 | SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_es_bucket_t t; t.key = key; return prefix##_es_getp(h, &t); } \ 405 | SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_es_del(h, k); } \ 406 | SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_es_bucket_t t; t.key = key; return prefix##_es_putp(h, &t, absent); } \ 407 | SCOPE void prefix##_clear(HType *h) { prefix##_es_clear(h); } 408 | 409 | #define KHASHE_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ 410 | typedef struct { khkey_t key; kh_val_t val; } kh_packed HType##_em_bucket_t; \ 411 | static kh_inline khint_t prefix##_em_hash(HType##_em_bucket_t x) { return __hash_fn(x.key); } \ 412 | static kh_inline int prefix##_em_eq(HType##_em_bucket_t x, HType##_em_bucket_t y) { return __hash_eq(x.key, y.key); } \ 413 | KHASHE_INIT(KH_LOCAL, HType, prefix##_em, HType##_em_bucket_t, prefix##_em_hash, prefix##_em_eq) \ 414 | SCOPE HType *prefix##_init(int bits) { return prefix##_em_init(bits); } \ 415 | SCOPE void prefix##_destroy(HType *h) { prefix##_em_destroy(h); } \ 416 | SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_em_bucket_t t; t.key = key; return prefix##_em_getp(h, &t); } \ 417 | SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_em_del(h, k); } \ 418 | SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_em_bucket_t t; t.key = key; return prefix##_em_putp(h, &t, absent); } \ 419 | SCOPE void prefix##_clear(HType *h) { prefix##_em_clear(h); } 420 | 421 | /************************** 422 | * Public macro functions * 423 | **************************/ 424 | 425 | #define kh_bucket(h, x) ((h)->keys[x]) 426 | #define kh_size(h) ((h)->count) 427 | #define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U) 428 | #define kh_end(h) kh_capacity(h) 429 | 430 | #define kh_key(h, x) ((h)->keys[x].key) 431 | #define kh_val(h, x) ((h)->keys[x].val) 432 | #define kh_exist(h, x) __kh_used((h)->used, (x)) 433 | 434 | #define kh_foreach(h, x) for ((x) = 0; (x) != kh_end(h); ++(x)) if (kh_exist((h), (x))) 435 | 436 | #define kh_ens_key(g, x) kh_key(&(g)->sub[(x).sub], (x).pos) 437 | #define kh_ens_val(g, x) kh_val(&(g)->sub[(x).sub], (x).pos) 438 | #define kh_ens_exist(g, x) kh_exist(&(g)->sub[(x).sub], (x).pos) 439 | #define kh_ens_is_end(x) ((x).pos == (khint_t)-1) 440 | #define kh_ens_size(g) ((g)->count) 441 | 442 | #define kh_ens_foreach(g, x) for ((x).sub = 0; (x).sub != 1<<(g)->bits; ++(x).sub) for ((x).pos = 0; (x).pos != kh_end(&(g)->sub[(x).sub]); ++(x).pos) if (kh_ens_exist((g), (x))) 443 | 444 | /************************************** 445 | * Common hash and equality functions * 446 | **************************************/ 447 | 448 | #define kh_eq_generic(a, b) ((a) == (b)) 449 | #define kh_eq_str(a, b) (strcmp((a), (b)) == 0) 450 | #define kh_hash_dummy(x) ((khint_t)(x)) 451 | 452 | static kh_inline khint_t kh_hash_uint32(khint_t x) { /* murmur finishing */ 453 | x ^= x >> 16; 454 | x *= 0x85ebca6bU; 455 | x ^= x >> 13; 456 | x *= 0xc2b2ae35U; 457 | x ^= x >> 16; 458 | return x; 459 | } 460 | 461 | static kh_inline khint_t kh_hash_uint64(khint64_t x) { /* splitmix64; see https://nullprogram.com/blog/2018/07/31/ for inversion */ 462 | x ^= x >> 30; 463 | x *= 0xbf58476d1ce4e5b9ULL; 464 | x ^= x >> 27; 465 | x *= 0x94d049bb133111ebULL; 466 | x ^= x >> 31; 467 | return (khint_t)x; 468 | } 469 | 470 | static kh_inline khint_t kh_hash_str(kh_cstr_t s) { /* FNV1a */ 471 | khint_t h = 2166136261U; 472 | const unsigned char *t = (const unsigned char*)s; 473 | for (; *t; ++t) 474 | h ^= *t, h *= 16777619; 475 | return h; 476 | } 477 | 478 | static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) { 479 | khint_t h = 2166136261U; 480 | int i; 481 | for (i = 0; i < len; ++i) 482 | h ^= s[i], h *= 16777619; 483 | return h; 484 | } 485 | 486 | #endif /* __AC_KHASHL_H */ 487 | --------------------------------------------------------------------------------