├── .gitignore ├── README.md ├── khash_int2int.h ├── example.ipynb └── khash.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.swo 3 | *.o 4 | *.so 5 | 6 | .ipynb_checkpoints/ 7 | 8 | _khash_ffi.c 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # khash_numba 2 | Proof-of-concept of using a khash-based hashmap in Numba 3 | 4 | Run `python build_khash_cffi.py` to build cffi shared library. Example usage in `example.ipynb`. 5 | -------------------------------------------------------------------------------- /khash_int2int.h: -------------------------------------------------------------------------------- 1 | #ifndef KHASH_INT2INT_H 2 | #define KHASH_INT2INT_H 3 | 4 | #include "khash.h" 5 | 6 | KHASH_MAP_INIT_INT64(i2i, khint64_t) 7 | 8 | static inline void *khash_int2int_init(void) { 9 | return kh_init(i2i); 10 | } 11 | 12 | static void khash_int2int_destroy(void *_hash) { 13 | khash_t(i2i) *hash = (khash_t(i2i)*)_hash; 14 | if (hash) kh_destroy(i2i, hash); 15 | } 16 | 17 | static inline khint64_t khash_int2int_get(void *_hash, khint64_t key, khint64_t default_value) { 18 | khash_t(i2i) *hash = (khash_t(i2i)*)_hash; 19 | khint_t k = kh_get(i2i, hash, key); 20 | if ( k == kh_end(hash) ) return default_value; 21 | return kh_val(hash, k); 22 | } 23 | 24 | static inline int khash_int2int_set(void *_hash, khint64_t key, khint64_t value) 25 | { 26 | khint_t k; 27 | int ret; 28 | khash_t(i2i) *hash = (khash_t(i2i)*)_hash; 29 | if ( !hash ) return -1; 30 | k = kh_put(i2i, hash, key, &ret); 31 | kh_val(hash,k) = value; 32 | return k; 33 | } 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numba as nb\n", 10 | "import numpy as np\n", 11 | "\n", 12 | "import _khash_ffi\n", 13 | "\n", 14 | "from numba import cffi_support\n", 15 | "\n", 16 | "cffi_support.register_module(_khash_ffi)\n", 17 | "\n", 18 | "khash_init = _khash_ffi.lib.khash_int2int_init\n", 19 | "khash_get = _khash_ffi.lib.khash_int2int_get\n", 20 | "khash_set = _khash_ffi.lib.khash_int2int_set\n", 21 | "khash_destroy = _khash_ffi.lib.khash_int2int_destroy\n" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 28, 27 | "metadata": { 28 | "collapsed": true 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "@nb.jit\n", 33 | "def test1(fid, values, fetch_ids):\n", 34 | " # Build map of fid's (non-continuous) to fix (continuous compact)\n", 35 | " fid2fix = np.zeros(np.max(fid)+1,dtype=np.int64)\n", 36 | " fid2fix[np.unique(fid)] = np.arange(len(np.unique(fid)),dtype=np.int64)\n", 37 | "\n", 38 | " # Now fetch a selection of values \n", 39 | " s = np.empty_like(fetch_ids, dtype=np.float64)\n", 40 | " for i in range(fetch_ids.shape[0]):\n", 41 | " ii = fid2fix[fetch_ids[i]]\n", 42 | " s[i] = values[ii]\n", 43 | " \n", 44 | " return s" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 37, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "@nb.njit\n", 56 | "def test2(fid, values, fetch_ids):\n", 57 | " d = khash_init()\n", 58 | " \n", 59 | " fix = 0\n", 60 | " for i in range(fid.shape[0]):\n", 61 | " khash_set(d, fid[i], fix)\n", 62 | " fix += 1 \n", 63 | " \n", 64 | " s = np.empty_like(fetch_ids, dtype=np.float64)\n", 65 | " for j in range(fetch_ids.shape[0]):\n", 66 | " ii = khash_get(d, fetch_ids[j], -99)\n", 67 | " s[j] = values[ii]\n", 68 | " \n", 69 | " khash_destroy(d)\n", 70 | " return s" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 112, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "max_fid = 215000\n", 80 | "n_fids = 130\n", 81 | "n_fetch = 100\n", 82 | "\n", 83 | "_fids = np.arange(max_fid)\n", 84 | "np.random.shuffle(_fids)\n", 85 | "fids = np.empty(n_fids, dtype=np.int64)\n", 86 | "fids[-1] = max_fid\n", 87 | "fids[:-1] = np.sort(_fids[:n_fids -1])\n", 88 | "\n", 89 | "values = np.random.normal(size=(n_fids))\n", 90 | "fetch_ids = np.random.choice(fids, size=(n_fetch,), replace=True)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 113, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "s1 = test1(fids, values, fetch_ids)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 114, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "s2 = test2(fids, values, fetch_ids)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 115, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "data": { 118 | "text/plain": [ 119 | "True" 120 | ] 121 | }, 122 | "execution_count": 115, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "np.allclose(s1, s2)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 116, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "10000 loops, best of 3: 86.4 µs per loop\n" 141 | ] 142 | } 143 | ], 144 | "source": [ 145 | "%timeit s1 = test1(fids, values, fetch_ids)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 117, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "100000 loops, best of 3: 7.59 µs per loop\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "%timeit s2 = test2(fids, values, fetch_ids)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "collapsed": true 179 | }, 180 | "outputs": [], 181 | "source": [] 182 | } 183 | ], 184 | "metadata": { 185 | "kernelspec": { 186 | "display_name": "Python 2", 187 | "language": "python", 188 | "name": "python2" 189 | }, 190 | "language_info": { 191 | "codemirror_mode": { 192 | "name": "ipython", 193 | "version": 2 194 | }, 195 | "file_extension": ".py", 196 | "mimetype": "text/x-python", 197 | "name": "python", 198 | "nbconvert_exporter": "python", 199 | "pygments_lexer": "ipython2", 200 | "version": "2.7.13" 201 | } 202 | }, 203 | "nbformat": 4, 204 | "nbformat_minor": 2 205 | } 206 | -------------------------------------------------------------------------------- /khash.h: -------------------------------------------------------------------------------- 1 | /* The MIT License 2 | 3 | Copyright (c) 2008, 2009, 2011 by Attractive Chaos 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 20 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 21 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 22 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | SOFTWARE. 24 | */ 25 | 26 | /* 27 | An example: 28 | 29 | #include "khash.h" 30 | KHASH_MAP_INIT_INT(32, char) 31 | int main() { 32 | int ret, is_missing; 33 | khiter_t k; 34 | khash_t(32) *h = kh_init(32); 35 | k = kh_put(32, h, 5, &ret); 36 | kh_value(h, k) = 10; 37 | k = kh_get(32, h, 10); 38 | is_missing = (k == kh_end(h)); 39 | k = kh_get(32, h, 5); 40 | kh_del(32, h, k); 41 | for (k = kh_begin(h); k != kh_end(h); ++k) 42 | if (kh_exist(h, k)) kh_value(h, k) = 1; 43 | kh_destroy(32, h); 44 | return 0; 45 | } 46 | */ 47 | 48 | /* 49 | 2013-05-02 (0.2.8): 50 | 51 | * Use quadratic probing. When the capacity is power of 2, stepping function 52 | i*(i+1)/2 guarantees to traverse each bucket. It is better than double 53 | hashing on cache performance and is more robust than linear probing. 54 | 55 | In theory, double hashing should be more robust than quadratic probing. 56 | However, my implementation is probably not for large hash tables, because 57 | the second hash function is closely tied to the first hash function, 58 | which reduce the effectiveness of double hashing. 59 | 60 | Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php 61 | 62 | 2011-12-29 (0.2.7): 63 | 64 | * Minor code clean up; no actual effect. 65 | 66 | 2011-09-16 (0.2.6): 67 | 68 | * The capacity is a power of 2. This seems to dramatically improve the 69 | speed for simple keys. Thank Zilong Tan for the suggestion. Reference: 70 | 71 | - http://code.google.com/p/ulib/ 72 | - http://nothings.org/computer/judy/ 73 | 74 | * Allow to optionally use linear probing which usually has better 75 | performance for random input. Double hashing is still the default as it 76 | is more robust to certain non-random input. 77 | 78 | * Added Wang's integer hash function (not used by default). This hash 79 | function is more robust to certain non-random input. 80 | 81 | 2011-02-14 (0.2.5): 82 | 83 | * Allow to declare global functions. 84 | 85 | 2009-09-26 (0.2.4): 86 | 87 | * Improve portability 88 | 89 | 2008-09-19 (0.2.3): 90 | 91 | * Corrected the example 92 | * Improved interfaces 93 | 94 | 2008-09-11 (0.2.2): 95 | 96 | * Improved speed a little in kh_put() 97 | 98 | 2008-09-10 (0.2.1): 99 | 100 | * Added kh_clear() 101 | * Fixed a compiling error 102 | 103 | 2008-09-02 (0.2.0): 104 | 105 | * Changed to token concatenation which increases flexibility. 106 | 107 | 2008-08-31 (0.1.2): 108 | 109 | * Fixed a bug in kh_get(), which has not been tested previously. 110 | 111 | 2008-08-31 (0.1.1): 112 | 113 | * Added destructor 114 | */ 115 | 116 | 117 | #ifndef __AC_KHASH_H 118 | #define __AC_KHASH_H 119 | 120 | /*! 121 | @header 122 | 123 | Generic hash table library. 124 | */ 125 | 126 | #define AC_VERSION_KHASH_H "0.2.8" 127 | 128 | #include 129 | #include 130 | #include 131 | 132 | /* compiler specific configuration */ 133 | 134 | #if UINT_MAX == 0xffffffffu 135 | typedef unsigned int khint32_t; 136 | #elif ULONG_MAX == 0xffffffffu 137 | typedef unsigned long khint32_t; 138 | #endif 139 | 140 | #if ULONG_MAX == ULLONG_MAX 141 | typedef unsigned long khuint64_t; 142 | typedef signed long khint64_t; 143 | #else 144 | typedef unsigned long long khuint64_t; 145 | typedef signed long long khint64_t; 146 | #endif 147 | 148 | #ifndef kh_inline 149 | #ifdef _MSC_VER 150 | #define kh_inline __inline 151 | #else 152 | #define kh_inline inline 153 | #endif 154 | #endif /* kh_inline */ 155 | 156 | #ifndef klib_unused 157 | #if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3) 158 | #define klib_unused __attribute__ ((__unused__)) 159 | #else 160 | #define klib_unused 161 | #endif 162 | #endif /* klib_unused */ 163 | 164 | typedef khint32_t khint_t; 165 | typedef khint_t khiter_t; 166 | 167 | #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) 168 | #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) 169 | #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) 170 | #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) 171 | #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) 172 | #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) 173 | #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) 174 | 175 | #define __ac_fsize(m) ((m) < 16? 1 : (m)>>4) 176 | 177 | #ifndef kroundup32 178 | #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 179 | #endif 180 | 181 | #ifndef kcalloc 182 | #define kcalloc(N,Z) calloc(N,Z) 183 | #endif 184 | #ifndef kmalloc 185 | #define kmalloc(Z) malloc(Z) 186 | #endif 187 | #ifndef krealloc 188 | #define krealloc(P,Z) realloc(P,Z) 189 | #endif 190 | #ifndef kfree 191 | #define kfree(P) free(P) 192 | #endif 193 | 194 | static const double __ac_HASH_UPPER = 0.77; 195 | 196 | #define __KHASH_TYPE(name, khkey_t, khval_t) \ 197 | typedef struct kh_##name##_s { \ 198 | khint_t n_buckets, size, n_occupied, upper_bound; \ 199 | khint32_t *flags; \ 200 | khkey_t *keys; \ 201 | khval_t *vals; \ 202 | } kh_##name##_t; 203 | 204 | #define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \ 205 | extern kh_##name##_t *kh_init_##name(void); \ 206 | extern void kh_destroy_##name(kh_##name##_t *h); \ 207 | extern void kh_clear_##name(kh_##name##_t *h); \ 208 | extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ 209 | extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ 210 | extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ 211 | extern void kh_del_##name(kh_##name##_t *h, khint_t x); 212 | 213 | #define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ 214 | SCOPE kh_##name##_t *kh_init_##name(void) { \ 215 | return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \ 216 | } \ 217 | SCOPE void kh_destroy_##name(kh_##name##_t *h) \ 218 | { \ 219 | if (h) { \ 220 | kfree((void *)h->keys); kfree(h->flags); \ 221 | kfree((void *)h->vals); \ 222 | kfree(h); \ 223 | } \ 224 | } \ 225 | SCOPE void kh_clear_##name(kh_##name##_t *h) \ 226 | { \ 227 | if (h && h->flags) { \ 228 | memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ 229 | h->size = h->n_occupied = 0; \ 230 | } \ 231 | } \ 232 | SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ 233 | { \ 234 | if (h->n_buckets) { \ 235 | khint_t k, i, last, mask, step = 0; \ 236 | mask = h->n_buckets - 1; \ 237 | k = __hash_func(key); i = k & mask; \ 238 | last = i; \ 239 | while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ 240 | i = (i + (++step)) & mask; \ 241 | if (i == last) return h->n_buckets; \ 242 | } \ 243 | return __ac_iseither(h->flags, i)? h->n_buckets : i; \ 244 | } else return 0; \ 245 | } \ 246 | SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ 247 | { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ 248 | khint32_t *new_flags = 0; \ 249 | khint_t j = 1; \ 250 | { \ 251 | kroundup32(new_n_buckets); \ 252 | if (new_n_buckets < 4) new_n_buckets = 4; \ 253 | if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ 254 | else { /* hash table size to be changed (shrink or expand); rehash */ \ 255 | new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ 256 | if (!new_flags) return -1; \ 257 | memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ 258 | if (h->n_buckets < new_n_buckets) { /* expand */ \ 259 | khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ 260 | if (!new_keys) { kfree(new_flags); return -1; } \ 261 | h->keys = new_keys; \ 262 | if (kh_is_map) { \ 263 | khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ 264 | if (!new_vals) { kfree(new_flags); return -1; } \ 265 | h->vals = new_vals; \ 266 | } \ 267 | } /* otherwise shrink */ \ 268 | } \ 269 | } \ 270 | if (j) { /* rehashing is needed */ \ 271 | for (j = 0; j != h->n_buckets; ++j) { \ 272 | if (__ac_iseither(h->flags, j) == 0) { \ 273 | khkey_t key = h->keys[j]; \ 274 | khval_t val; \ 275 | khint_t new_mask; \ 276 | new_mask = new_n_buckets - 1; \ 277 | if (kh_is_map) val = h->vals[j]; \ 278 | __ac_set_isdel_true(h->flags, j); \ 279 | while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ 280 | khint_t k, i, step = 0; \ 281 | k = __hash_func(key); \ 282 | i = k & new_mask; \ 283 | while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \ 284 | __ac_set_isempty_false(new_flags, i); \ 285 | if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ 286 | { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ 287 | if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ 288 | __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \ 289 | } else { /* write the element and jump out of the loop */ \ 290 | h->keys[i] = key; \ 291 | if (kh_is_map) h->vals[i] = val; \ 292 | break; \ 293 | } \ 294 | } \ 295 | } \ 296 | } \ 297 | if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ 298 | h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \ 299 | if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \ 300 | } \ 301 | kfree(h->flags); /* free the working space */ \ 302 | h->flags = new_flags; \ 303 | h->n_buckets = new_n_buckets; \ 304 | h->n_occupied = h->size; \ 305 | h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ 306 | } \ 307 | return 0; \ 308 | } \ 309 | SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ 310 | { \ 311 | khint_t x; \ 312 | if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ 313 | if (h->n_buckets > (h->size<<1)) { \ 314 | if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \ 315 | *ret = -1; return h->n_buckets; \ 316 | } \ 317 | } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \ 318 | *ret = -1; return h->n_buckets; \ 319 | } \ 320 | } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ 321 | { \ 322 | khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \ 323 | x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ 324 | if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ 325 | else { \ 326 | last = i; \ 327 | while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ 328 | if (__ac_isdel(h->flags, i)) site = i; \ 329 | i = (i + (++step)) & mask; \ 330 | if (i == last) { x = site; break; } \ 331 | } \ 332 | if (x == h->n_buckets) { \ 333 | if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ 334 | else x = i; \ 335 | } \ 336 | } \ 337 | } \ 338 | if (__ac_isempty(h->flags, x)) { /* not present at all */ \ 339 | h->keys[x] = key; \ 340 | __ac_set_isboth_false(h->flags, x); \ 341 | ++h->size; ++h->n_occupied; \ 342 | *ret = 1; \ 343 | } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ 344 | h->keys[x] = key; \ 345 | __ac_set_isboth_false(h->flags, x); \ 346 | ++h->size; \ 347 | *ret = 2; \ 348 | } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ 349 | return x; \ 350 | } \ 351 | SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ 352 | { \ 353 | if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ 354 | __ac_set_isdel_true(h->flags, x); \ 355 | --h->size; \ 356 | } \ 357 | } 358 | 359 | #define KHASH_DECLARE(name, khkey_t, khval_t) \ 360 | __KHASH_TYPE(name, khkey_t, khval_t) \ 361 | __KHASH_PROTOTYPES(name, khkey_t, khval_t) 362 | 363 | #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ 364 | __KHASH_TYPE(name, khkey_t, khval_t) \ 365 | __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) 366 | 367 | #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ 368 | KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) 369 | 370 | /* --- BEGIN OF HASH FUNCTIONS --- */ 371 | 372 | /*! @function 373 | @abstract Integer hash function 374 | @param key The integer [khint32_t] 375 | @return The hash value [khint_t] 376 | */ 377 | #define kh_int_hash_func(key) (khint32_t)(key) 378 | /*! @function 379 | @abstract Integer comparison function 380 | */ 381 | #define kh_int_hash_equal(a, b) ((a) == (b)) 382 | /*! @function 383 | @abstract 64-bit integer hash function 384 | @param key The integer [khint64_t] 385 | @return The hash value [khint_t] 386 | */ 387 | #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) 388 | /*! @function 389 | @abstract 64-bit integer comparison function 390 | */ 391 | #define kh_int64_hash_equal(a, b) ((a) == (b)) 392 | /*! @function 393 | @abstract const char* hash function 394 | @param s Pointer to a null terminated string 395 | @return The hash value 396 | */ 397 | static kh_inline khint_t __ac_X31_hash_string(const char *s) 398 | { 399 | khint_t h = (khint_t)*s; 400 | if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s; 401 | return h; 402 | } 403 | /*! @function 404 | @abstract Another interface to const char* hash function 405 | @param key Pointer to a null terminated string [const char*] 406 | @return The hash value [khint_t] 407 | */ 408 | #define kh_str_hash_func(key) __ac_X31_hash_string(key) 409 | /*! @function 410 | @abstract Const char* comparison function 411 | */ 412 | #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) 413 | 414 | static kh_inline khint_t __ac_Wang_hash(khint_t key) 415 | { 416 | key += ~(key << 15); 417 | key ^= (key >> 10); 418 | key += (key << 3); 419 | key ^= (key >> 6); 420 | key += ~(key << 11); 421 | key ^= (key >> 16); 422 | return key; 423 | } 424 | #define kh_int_hash_func2(key) __ac_Wang_hash((khint_t)key) 425 | 426 | /* --- END OF HASH FUNCTIONS --- */ 427 | 428 | /* Other convenient macros... */ 429 | 430 | /*! 431 | @abstract Type of the hash table. 432 | @param name Name of the hash table [symbol] 433 | */ 434 | #define khash_t(name) kh_##name##_t 435 | 436 | /*! @function 437 | @abstract Initiate a hash table. 438 | @param name Name of the hash table [symbol] 439 | @return Pointer to the hash table [khash_t(name)*] 440 | */ 441 | #define kh_init(name) kh_init_##name() 442 | 443 | /*! @function 444 | @abstract Destroy a hash table. 445 | @param name Name of the hash table [symbol] 446 | @param h Pointer to the hash table [khash_t(name)*] 447 | */ 448 | #define kh_destroy(name, h) kh_destroy_##name(h) 449 | 450 | /*! @function 451 | @abstract Reset a hash table without deallocating memory. 452 | @param name Name of the hash table [symbol] 453 | @param h Pointer to the hash table [khash_t(name)*] 454 | */ 455 | #define kh_clear(name, h) kh_clear_##name(h) 456 | 457 | /*! @function 458 | @abstract Resize a hash table. 459 | @param name Name of the hash table [symbol] 460 | @param h Pointer to the hash table [khash_t(name)*] 461 | @param s New size [khint_t] 462 | */ 463 | #define kh_resize(name, h, s) kh_resize_##name(h, s) 464 | 465 | /*! @function 466 | @abstract Insert a key to the hash table. 467 | @param name Name of the hash table [symbol] 468 | @param h Pointer to the hash table [khash_t(name)*] 469 | @param k Key [type of keys] 470 | @param r Extra return code: -1 if the operation failed; 471 | 0 if the key is present in the hash table; 472 | 1 if the bucket is empty (never used); 2 if the element in 473 | the bucket has been deleted [int*] 474 | @return Iterator to the inserted element [khint_t] 475 | */ 476 | #define kh_put(name, h, k, r) kh_put_##name(h, k, r) 477 | 478 | /*! @function 479 | @abstract Retrieve a key from the hash table. 480 | @param name Name of the hash table [symbol] 481 | @param h Pointer to the hash table [khash_t(name)*] 482 | @param k Key [type of keys] 483 | @return Iterator to the found element, or kh_end(h) if the element is absent [khint_t] 484 | */ 485 | #define kh_get(name, h, k) kh_get_##name(h, k) 486 | 487 | /*! @function 488 | @abstract Remove a key from the hash table. 489 | @param name Name of the hash table [symbol] 490 | @param h Pointer to the hash table [khash_t(name)*] 491 | @param k Iterator to the element to be deleted [khint_t] 492 | */ 493 | #define kh_del(name, h, k) kh_del_##name(h, k) 494 | 495 | /*! @function 496 | @abstract Test whether a bucket contains data. 497 | @param h Pointer to the hash table [khash_t(name)*] 498 | @param x Iterator to the bucket [khint_t] 499 | @return 1 if containing data; 0 otherwise [int] 500 | */ 501 | #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) 502 | 503 | /*! @function 504 | @abstract Get key given an iterator 505 | @param h Pointer to the hash table [khash_t(name)*] 506 | @param x Iterator to the bucket [khint_t] 507 | @return Key [type of keys] 508 | */ 509 | #define kh_key(h, x) ((h)->keys[x]) 510 | 511 | /*! @function 512 | @abstract Get value given an iterator 513 | @param h Pointer to the hash table [khash_t(name)*] 514 | @param x Iterator to the bucket [khint_t] 515 | @return Value [type of values] 516 | @discussion For hash sets, calling this results in segfault. 517 | */ 518 | #define kh_val(h, x) ((h)->vals[x]) 519 | 520 | /*! @function 521 | @abstract Alias of kh_val() 522 | */ 523 | #define kh_value(h, x) ((h)->vals[x]) 524 | 525 | /*! @function 526 | @abstract Get the start iterator 527 | @param h Pointer to the hash table [khash_t(name)*] 528 | @return The start iterator [khint_t] 529 | */ 530 | #define kh_begin(h) (khint_t)(0) 531 | 532 | /*! @function 533 | @abstract Get the end iterator 534 | @param h Pointer to the hash table [khash_t(name)*] 535 | @return The end iterator [khint_t] 536 | */ 537 | #define kh_end(h) ((h)->n_buckets) 538 | 539 | /*! @function 540 | @abstract Get the number of elements in the hash table 541 | @param h Pointer to the hash table [khash_t(name)*] 542 | @return Number of elements in the hash table [khint_t] 543 | */ 544 | #define kh_size(h) ((h)->size) 545 | 546 | /*! @function 547 | @abstract Get the number of buckets in the hash table 548 | @param h Pointer to the hash table [khash_t(name)*] 549 | @return Number of buckets in the hash table [khint_t] 550 | */ 551 | #define kh_n_buckets(h) ((h)->n_buckets) 552 | 553 | /*! @function 554 | @abstract Iterate over the entries in the hash table 555 | @param h Pointer to the hash table [khash_t(name)*] 556 | @param kvar Variable to which key will be assigned 557 | @param vvar Variable to which value will be assigned 558 | @param code Block of code to execute 559 | */ 560 | #define kh_foreach(h, kvar, vvar, code) { khint_t __i; \ 561 | for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ 562 | if (!kh_exist(h,__i)) continue; \ 563 | (kvar) = kh_key(h,__i); \ 564 | (vvar) = kh_val(h,__i); \ 565 | code; \ 566 | } } 567 | 568 | /*! @function 569 | @abstract Iterate over the values in the hash table 570 | @param h Pointer to the hash table [khash_t(name)*] 571 | @param vvar Variable to which value will be assigned 572 | @param code Block of code to execute 573 | */ 574 | #define kh_foreach_value(h, vvar, code) { khint_t __i; \ 575 | for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \ 576 | if (!kh_exist(h,__i)) continue; \ 577 | (vvar) = kh_val(h,__i); \ 578 | code; \ 579 | } } 580 | 581 | /* More conenient interfaces */ 582 | 583 | /*! @function 584 | @abstract Instantiate a hash set containing integer keys 585 | @param name Name of the hash table [symbol] 586 | */ 587 | #define KHASH_SET_INIT_INT(name) \ 588 | KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) 589 | 590 | /*! @function 591 | @abstract Instantiate a hash map containing integer keys 592 | @param name Name of the hash table [symbol] 593 | @param khval_t Type of values [type] 594 | */ 595 | #define KHASH_MAP_INIT_INT(name, khval_t) \ 596 | KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) 597 | 598 | /*! @function 599 | @abstract Instantiate a hash map containing 64-bit integer keys 600 | @param name Name of the hash table [symbol] 601 | */ 602 | #define KHASH_SET_INIT_UINT64(name) \ 603 | KHASH_INIT(name, khuint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) 604 | 605 | #define KHASH_SET_INIT_INT64(name) \ 606 | KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) 607 | 608 | /*! @function 609 | @abstract Instantiate a hash map containing 64-bit integer keys 610 | @param name Name of the hash table [symbol] 611 | @param khval_t Type of values [type] 612 | */ 613 | #define KHASH_MAP_INIT_UINT64(name, khval_t) \ 614 | KHASH_INIT(name, khuint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) 615 | 616 | #define KHASH_MAP_INIT_INT64(name, khval_t) \ 617 | KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) 618 | 619 | typedef const char *kh_cstr_t; 620 | /*! @function 621 | @abstract Instantiate a hash map containing const char* keys 622 | @param name Name of the hash table [symbol] 623 | */ 624 | #define KHASH_SET_INIT_STR(name) \ 625 | KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) 626 | 627 | /*! @function 628 | @abstract Instantiate a hash map containing const char* keys 629 | @param name Name of the hash table [symbol] 630 | @param khval_t Type of values [type] 631 | */ 632 | #define KHASH_MAP_INIT_STR(name, khval_t) \ 633 | KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) 634 | 635 | #endif /* __AC_KHASH_H */ 636 | --------------------------------------------------------------------------------