├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── include └── simple_sparsehash.h ├── run_tests.sh └── src ├── simple_sparsehash.c └── test.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw[op] 2 | libsimple-sparsehash.so 3 | sparsehash_test 4 | *.o 5 | .gdb_history 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Quinlan Pfiffer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | VERSION=0.1 2 | SOVERSION=0 3 | CFLAGS=-std=c99 -Wextra -Wno-ignored-qualifiers -O3 -g -Werror -Wall 4 | NAME=libsimple-sparsehash.so 5 | TESTNAME=sparsehash_test 6 | OBJS=simple_sparsehash.o 7 | INCLUDES=-I./include/ 8 | LIBINCLUDES=-L. 9 | 10 | PREFIX?=/usr/local 11 | INSTALL_LIB=$(PREFIX)/lib/ 12 | INSTALL_INCLUDE=$(PREFIX)/include/ 13 | 14 | uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') 15 | LDCONFIG= 16 | ifeq ($(uname_S),Darwin) 17 | LDCONFIG=echo 18 | else 19 | LDCONFIG=ldconfig 20 | endif 21 | 22 | all: $(NAME) $(TESTNAME) 23 | 24 | clean: 25 | rm *.o 26 | rm $(TESTNAME) 27 | rm $(NAME) 28 | 29 | $(TESTNAME): test.o $(NAME) 30 | $(CC) $(CFLAGS) $(INCLUDES) $(LIBINCLUDES) -o $(TESTNAME) $< -lsimple-sparsehash 31 | 32 | %.o: ./src/%.c 33 | $(CC) $(CFLAGS) $(INCLUDES) -fPIC -c $< 34 | 35 | $(NAME): $(OBJS) 36 | $(CC) -shared -fPIC $(CFLAGS) $(INCLUDES) -o $(NAME) $^ 37 | 38 | uninstall: 39 | rm -rf $(INSTALL_LIB)$(NAME)* 40 | rm -rf $(INSTALL_INCLUDE)/simple_sparsehash.h 41 | 42 | install: 43 | @mkdir -p $(INSTALL_LIB) 44 | @mkdir -p $(INSTALL_INCLUDE) 45 | @install $(NAME) $(INSTALL_LIB)$(NAME).$(VERSION) 46 | @ln -fs $(INSTALL_LIB)$(NAME).$(VERSION) $(INSTALL_LIB)$(NAME) 47 | @ln -fs $(INSTALL_LIB)$(NAME).$(VERSION) $(INSTALL_LIB)$(NAME).$(SOVERSION) 48 | @install ./include/*.h $(INSTALL_INCLUDE) 49 | @$(LDCONFIG) $(INSTALL_LIB) 50 | @echo "$(NAME) installed to $(PREFIX) :^)." 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## What is it? 2 | 3 | This is a simple reimplementation of Google's [SparseHash](https://code.google.com/p/sparsehash/) 4 | library intended as both a learning and teaching excercise. 5 | 6 | ## How do I use it? 7 | 8 | Either copy `./include/simple_sparsehash.h` and `./src/simple_sparsehash.c` into 9 | your project and start using them, or: 10 | 11 | ``` 12 | make 13 | sudo make install 14 | ``` 15 | 16 | Then when you build your project just link to the shared library with 17 | `-lsimple-sparsehash`. 18 | 19 | ## Tests 20 | 21 | Just `make && ./run_tests.sh`. 22 | 23 | ## Differences between the official version 24 | 25 | * Doesn't support many of the things that the official version does, like 26 | iterators, swapping, deletion, etc. 27 | * There are no 'default values' of sparse arrays. You access something that 28 | isn't real? You get `NULL`. 29 | 30 | ## Eventual TODO 31 | 32 | * Store actual items in the arrays, not pointers to items. 33 | * Resize the table down when it reaches an inverse occupancy or something. 34 | * Store object size in the dictionary, so that we can make assumptions about 35 | array size. Right now it accepts any value, and is slightly slower due to not 36 | having any locality of reference, and having to jump to an extra location in 37 | memory. Maybe two different versions? 38 | * Be able to delete things from the hashtable 39 | * Refactor the get/set/rehash methods. They've got some really similar code. 40 | * Speed it up, it's currently pretty damn slow. 41 | -------------------------------------------------------------------------------- /include/simple_sparsehash.h: -------------------------------------------------------------------------------- 1 | /* vim: noet ts=4 sw=4 2 | */ 3 | #pragma once 4 | #include 5 | #include 6 | 7 | /* The maximum size of each sparse_array_group. */ 8 | #define GROUP_SIZE 48 9 | 10 | /* The default size of the hash table. Used to init bucket_max. */ 11 | #define STARTING_SIZE 32 12 | 13 | /* The default 'should we resize' percentage, out of 100 percent. */ 14 | #define RESIZE_PERCENT 80 15 | 16 | /* The math here is, I believe, so that we 17 | * store exactly enough bits for our group size. The math returns the 18 | * minimum number of bytes to hold all the bits we need. 19 | */ 20 | #define BITCHUNK_SIZE (sizeof(uint32_t) * 8) 21 | #define BITMAP_SIZE (GROUP_SIZE-1)/BITCHUNK_SIZE + 1 22 | 23 | /* These are the objects that get stored in the sparse arrays that 24 | * make up a sparse dictionary. 25 | */ 26 | struct sparse_bucket { 27 | char *key; 28 | const size_t klen; 29 | void *val; 30 | const size_t vlen; 31 | const uint64_t hash; 32 | }; 33 | 34 | struct sparse_array_group { 35 | uint32_t count; /* The number of items currently in this vector. */ 36 | size_t elem_size; /* The maximum size of each element. */ 37 | void * group; /* The place where we actually store things. */ 38 | uint32_t bitmap[BITMAP_SIZE]; /* This is how we store the state of what is occupied in group. */ 39 | /* bitmap requires some explanation. We use the bitmap to store which 40 | * `offsets` in the array are occupied. We do this through a series 41 | * of bit-testing functions. 42 | */ 43 | }; 44 | 45 | struct sparse_array { 46 | const size_t maximum; /* The maximum number of items that can be in this array. */ 47 | struct sparse_array_group *groups; /* The number of groups we have. This is (num_buckets/GROUP_SIZE). */ 48 | }; 49 | 50 | struct sparse_dict { 51 | size_t bucket_max; /* The current maximum number of buckets in this dictionary. */ 52 | size_t bucket_count; /* The number of occupied buckets in this dictionary. */ 53 | struct sparse_array *buckets; /* Array of `sparse_array` objects. Defaults to STARTING_SIZE elements in length. */ 54 | }; 55 | 56 | /* ------------ */ 57 | /* Sparse Array */ 58 | /* ------------ */ 59 | 60 | struct sparse_array *sparse_array_init(const size_t element_size, const uint32_t maximum); 61 | const int sparse_array_set(struct sparse_array *arr, const uint32_t i, 62 | const void *val, const size_t vlen); 63 | const void *sparse_array_get(struct sparse_array *arr, const uint32_t i, size_t *outsize); 64 | const int sparse_array_free(struct sparse_array *arr); 65 | 66 | 67 | /* ----------------- */ 68 | /* Sparse Dictionary */ 69 | /* ----------------- */ 70 | 71 | /* Creates a new sparse dictionary. */ 72 | struct sparse_dict *sparse_dict_init(); 73 | 74 | /* Copies `value` into `dict`. */ 75 | const int sparse_dict_set(struct sparse_dict *dict, 76 | const char *key, const size_t klen, 77 | const void *value, const size_t vlen); 78 | 79 | /* Returns the value of `key` from `dict`. *outsize will be filled out if it 80 | * is non-null. 81 | */ 82 | const void *sparse_dict_get(struct sparse_dict *dict, const char *key, 83 | const size_t klen, size_t *outsize); 84 | 85 | /* Frees and cleans up a sparse_dict created with sparse_dict_init(). */ 86 | const int sparse_dict_free(struct sparse_dict *dict); 87 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH 4 | 5 | CMD=./sparsehash_test 6 | if [ $# -eq 0 ]; then 7 | $CMD 8 | else 9 | if [ $1 == "gdb" ]; then 10 | gdb --args $CMD 11 | elif [ $1 == "valgrind" ]; then 12 | valgrind --track-origins=yes --leak-check=full $CMD 13 | else 14 | $CMD 15 | fi 16 | fi 17 | -------------------------------------------------------------------------------- /src/simple_sparsehash.c: -------------------------------------------------------------------------------- 1 | /* vim: noet ts=4 sw=4 2 | */ 3 | #include 4 | #include 5 | #include "simple_sparsehash.h" 6 | 7 | #define FULL_ELEM_SIZE (arr->elem_size + sizeof(size_t)) 8 | #define MAX_ARR_SIZE ((arr->maximum - 1)/GROUP_SIZE + 1) 9 | #define QUADRATIC_PROBE(maximum) (key_hash + num_probes * num_probes) & (maximum - 1) 10 | 11 | /* One of the simplest hashing functions, FNV-1a. See the wikipedia article for more info: 12 | * http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function 13 | */ 14 | static const uint64_t hash_fnv1a(const char *key, const size_t klen) { 15 | static const uint64_t fnv_prime = 1099511628211ULL; 16 | static const uint64_t fnv_offset_bias = 14695981039346656037ULL; 17 | 18 | const int iterations = klen; 19 | 20 | uint8_t i; 21 | uint64_t hash = fnv_offset_bias; 22 | 23 | for(i = 0; i < iterations; i++) { 24 | hash = hash ^ key[i]; 25 | hash = hash * fnv_prime; 26 | } 27 | 28 | return hash; 29 | } 30 | 31 | /* TODO: Figure out better names for charbit/modbit */ 32 | static const uint32_t charbit(const uint32_t position) { 33 | /* Get enough bits to store 0 - 31. */ 34 | return position >> 5; 35 | } 36 | 37 | static const uint32_t modbit(const uint32_t position) { 38 | /* Get the number of bits of this number that are 0 - 31, 39 | * or something like that. 40 | */ 41 | return 1 << (position & 31); 42 | } 43 | 44 | /* This is one of the popcount implementations from Wikipedia. 45 | * http://en.wikipedia.org/wiki/Hamming_weight 46 | */ 47 | static inline uint32_t popcount_32(uint32_t x) { 48 | const uint32_t m1 = 0x55555555; 49 | const uint32_t m2 = 0x33333333; 50 | const uint32_t m4 = 0x0f0f0f0f; 51 | x -= (x >> 1) & m1; 52 | x = (x & m2) + ((x >> 2) & m2); 53 | x = (x + (x >> 4)) & m4; 54 | x += x >> 8; 55 | return (x + (x >> 16)) & 0x3f; 56 | } 57 | 58 | /* This function is used to map an item's 'position' (the user-facing index 59 | * into the array) with the 'offset' which is the actual position in the 60 | * array, memory-wise. 61 | * 62 | * The way we do this is by counting the number of 1s in the bitmap from 63 | * 0 .. i-1 in the bitmap. The original implementation uses a big table for the 64 | * popcount. 65 | */ 66 | static const uint32_t position_to_offset(const uint32_t *bitmap, 67 | const uint32_t position) { 68 | uint32_t retval = 0; 69 | uint32_t pos = position; 70 | uint32_t bitmap_iter = 0; 71 | 72 | /* Here we loop through the bitmap a uint32_t at a time, and count the number 73 | * of 1s in that chunk. 74 | */ 75 | for (; pos >= BITCHUNK_SIZE; pos -= BITCHUNK_SIZE) 76 | retval += popcount_32(bitmap[bitmap_iter++]); 77 | 78 | /* This last bit does the same thing as above, but takes care of the 79 | * remainder that didn't fit cleanly into the 32 x 32 x 32 ... loop above. That 80 | * is to say, it grabs the last 0 - 7 bits and adds the number of 1s in it to 81 | * retval. 82 | */ 83 | return retval + popcount_32(bitmap[bitmap_iter] & (((uint32_t)1 << pos) - 1u)); 84 | } 85 | 86 | /* Simple check to see whether a slot in the array is occupied or not. */ 87 | static const int is_position_occupied(const uint32_t *bitmap, 88 | const uint32_t position) { 89 | return bitmap[charbit(position)] & modbit(position); 90 | } 91 | 92 | static void set_position(uint32_t *bitmap, const uint32_t position) { 93 | bitmap[charbit(position)] |= modbit(position); 94 | } 95 | 96 | /* Sparse Array */ 97 | static const int _sparse_array_group_set(struct sparse_array_group *arr, const uint32_t i, 98 | const void *val, const size_t vlen) { 99 | uint32_t offset = 0; 100 | void *destination = NULL; 101 | if (vlen > arr->elem_size) 102 | return 0; 103 | /* So what needs to happen in this function: 104 | * 1. Convert the position (i) to the 'offset' 105 | * 2. Check to see if this slot is already occupied (bmtest). 106 | * overwrite the old element if this is the case. 107 | * 3. Otherwise, expand the array by a single element and increase 108 | * our bucket count (arr->count). Finally, OR the bit in our state 109 | * bitmap that shows this position is occupied. 110 | * 4. After doing all that, create a copy of val and stick it in the right 111 | * position in our array. 112 | */ 113 | 114 | offset = position_to_offset(arr->bitmap, i); 115 | if (!is_position_occupied(arr->bitmap, i)) { 116 | const size_t to_move_siz = (arr->count - offset) * FULL_ELEM_SIZE; 117 | /* Reallocate the array to hold the new item */ 118 | void *new_group = realloc(arr->group, (arr->count + 1) * FULL_ELEM_SIZE); 119 | if (new_group == NULL) 120 | return 0; 121 | 122 | /* Now take all of the old items and move them up a slot: */ 123 | if (to_move_siz > 0) { 124 | memmove((unsigned char *)(new_group) + ((offset + 1) * FULL_ELEM_SIZE), 125 | (unsigned char *)(new_group) + (offset * FULL_ELEM_SIZE), 126 | to_move_siz); 127 | } 128 | 129 | /* Increase the bucket count because we've expanded: */ 130 | arr->count++; 131 | arr->group = new_group; 132 | /* Remember to modify the bitmap: */ 133 | set_position(arr->bitmap, i); 134 | } 135 | 136 | /* Copy the size into the position, fighting -pedantic the whole 137 | * time. 138 | */ 139 | destination = (unsigned char *)(arr->group) + (offset * FULL_ELEM_SIZE); 140 | memcpy(destination, &vlen, sizeof(vlen)); 141 | 142 | /* Here we mutate a variable because we're writing C and we don't respect 143 | * anything. 144 | */ 145 | destination = (unsigned char *)destination + sizeof(vlen); 146 | memcpy(destination, val, vlen); 147 | 148 | return 1; 149 | } 150 | 151 | static const void *_sparse_array_group_get(struct sparse_array_group *arr, 152 | const uint32_t i, size_t *outsize) { 153 | const uint32_t offset = position_to_offset(arr->bitmap, i); 154 | const unsigned char *item_siz = (unsigned char *)(arr->group) + (offset * FULL_ELEM_SIZE); 155 | const void *item = item_siz + sizeof(size_t); 156 | 157 | if (!is_position_occupied(arr->bitmap, i)) 158 | return NULL; 159 | 160 | /* In a perfect world you could store 0 sized items and have that mean 161 | * something, but I'll tolerate none of that right now. 162 | */ 163 | if (*(size_t *)item_siz == 0) 164 | return NULL; 165 | 166 | /* If the user wants to know the size (outsize is non-null), write it 167 | * out. 168 | */ 169 | if (outsize) 170 | memcpy(outsize, item_siz, sizeof(size_t)); 171 | 172 | return item; 173 | } 174 | 175 | static const int _sparse_array_group_free(struct sparse_array_group *arr) { 176 | free(arr->group); 177 | return 1; 178 | } 179 | 180 | struct sparse_array *sparse_array_init(const size_t element_size, const uint32_t maximum) { 181 | unsigned int i = 0; 182 | struct sparse_array *arr = NULL; 183 | /* CHECK YOUR SYSCALL RETURNS. Listen to djb. */ 184 | arr = calloc(1, sizeof(struct sparse_array)); 185 | if (arr == NULL) 186 | return NULL; 187 | 188 | /* This is a non-obvious hack I use. If we have const variables in a 189 | * struct then to initialize them we can either cast them or use an 190 | * initializer like this. 191 | * Then we copy it into a heap-allocated blob. The compiler lets us 192 | * do this. 193 | */ 194 | struct sparse_array stack_array = { 195 | .maximum = maximum, 196 | }; 197 | 198 | memcpy(arr, &stack_array, sizeof(struct sparse_array)); 199 | arr->groups = calloc(MAX_ARR_SIZE, sizeof(struct sparse_array_group)); 200 | if (arr->groups == NULL) { 201 | free(arr); 202 | return NULL; 203 | } 204 | 205 | for (i = 0; i < MAX_ARR_SIZE; i++) { 206 | struct sparse_array_group *sag = &arr->groups[i]; 207 | sag->elem_size = element_size; 208 | } 209 | 210 | return arr; 211 | } 212 | 213 | const int sparse_array_set(struct sparse_array *arr, const uint32_t i, 214 | const void *val, const size_t vlen) { 215 | /* Don't let users set outside the bounds of the array. */ 216 | if (i > arr->maximum) 217 | return 0; 218 | /* Since our hashtable is divided into many arrays, we need to pick the one 219 | * relevant to `i` in this case: 220 | */ 221 | struct sparse_array_group *operating_group = &arr->groups[i / GROUP_SIZE]; 222 | const int position = i % GROUP_SIZE; 223 | return _sparse_array_group_set(operating_group, position, val, vlen); 224 | } 225 | 226 | const void *sparse_array_get(struct sparse_array *arr, const uint32_t i, size_t *outsize) { 227 | if (i > arr->maximum) 228 | return NULL; 229 | struct sparse_array_group *operating_group = &arr->groups[i / GROUP_SIZE]; 230 | const int position = i % GROUP_SIZE; 231 | return _sparse_array_group_get(operating_group, position, outsize); 232 | } 233 | 234 | const int sparse_array_free(struct sparse_array *arr) { 235 | unsigned int i = 0; 236 | for (; i < MAX_ARR_SIZE; i++) { 237 | struct sparse_array_group *sag = &arr->groups[i]; 238 | _sparse_array_group_free(sag); 239 | } 240 | free(arr->groups); 241 | free(arr); 242 | return 1; 243 | } 244 | 245 | /* Sparse Dictionary */ 246 | struct sparse_dict *sparse_dict_init() { 247 | struct sparse_dict *new = NULL; 248 | new = calloc(1, sizeof(struct sparse_dict)); 249 | if (new == NULL) 250 | return NULL; 251 | 252 | new->bucket_max = STARTING_SIZE; 253 | new->bucket_count = 0; 254 | new->buckets = sparse_array_init(sizeof(struct sparse_bucket), STARTING_SIZE); 255 | if (new->buckets == NULL) 256 | goto error; 257 | 258 | return new; 259 | 260 | error: 261 | free(new); 262 | return NULL; 263 | } 264 | 265 | static const int _create_and_insert_new_bucket( 266 | struct sparse_array *array, const unsigned int i, 267 | const char *key, const size_t klen, 268 | const void *value, const size_t vlen, 269 | const uint64_t key_hash) { 270 | void *copied_value = NULL; 271 | char *copied_key = NULL; 272 | 273 | copied_value = malloc(vlen + klen); 274 | if (copied_value == NULL) 275 | goto error; 276 | memcpy(copied_value, value, vlen); 277 | 278 | copied_key = copied_value + vlen; 279 | strncpy(copied_key, key, klen); 280 | 281 | struct sparse_bucket bct = { 282 | .key = copied_key, 283 | .klen = klen, 284 | .val = copied_value, 285 | .vlen = vlen, 286 | .hash = key_hash 287 | }; 288 | 289 | if (!sparse_array_set(array, i, &bct, sizeof(bct))) 290 | goto error; 291 | 292 | return 1; 293 | 294 | error: 295 | free(copied_value); 296 | return 0; 297 | } 298 | 299 | static const int _rehash_and_grow_table(struct sparse_dict *dict) { 300 | /* We've reached our chosen 'rehash the table' point, so 301 | * we need to resize the table now. 302 | */ 303 | unsigned int i = 0, buckets_rehashed = 0; 304 | const size_t new_bucket_max = dict->bucket_max * 2; 305 | struct sparse_array *new_buckets = NULL; 306 | 307 | new_buckets = sparse_array_init(sizeof(struct sparse_bucket), new_bucket_max); 308 | if (new_buckets == NULL) 309 | goto error; 310 | 311 | /* Loop through each bucket and stick it into the new array. */ 312 | for (i = 0; i < dict->bucket_max; i++) { 313 | size_t bucket_siz = 0; 314 | const struct sparse_bucket *bucket = sparse_array_get(dict->buckets, i, &bucket_siz); 315 | 316 | if (bucket_siz != 0 && bucket != NULL) { 317 | /* We found a bucket. */ 318 | unsigned int probed_val = 0, num_probes = 0; 319 | uint64_t key_hash = bucket->hash; 320 | while (1) { 321 | /* Quadratically probe along the hash table for an empty slot. */ 322 | probed_val = QUADRATIC_PROBE(new_bucket_max); 323 | size_t current_value_siz = 0; 324 | const void *current_value = sparse_array_get(new_buckets, probed_val, ¤t_value_siz); 325 | 326 | if (current_value_siz == 0 && current_value == NULL) 327 | break; 328 | 329 | /* If the following ever happens, there are deeply troubling 330 | * things that no longer make sense in the universe. 331 | */ 332 | if (num_probes > dict->bucket_count) 333 | goto error; 334 | 335 | num_probes++; 336 | } 337 | if (!sparse_array_set(new_buckets, probed_val, 338 | bucket, sizeof(struct sparse_bucket))) 339 | goto error; 340 | buckets_rehashed++; 341 | } 342 | 343 | /* Short circuit to see if we can quit early: */ 344 | if (buckets_rehashed == dict->bucket_count) 345 | break; 346 | } 347 | 348 | /* Finally, swap out the old array with the new one: */ 349 | sparse_array_free(dict->buckets); 350 | dict->buckets = new_buckets; 351 | dict->bucket_max = new_bucket_max; 352 | 353 | return 1; 354 | 355 | error: 356 | if (new_buckets) 357 | sparse_array_free(new_buckets); 358 | return 0; 359 | } 360 | 361 | const int sparse_dict_set(struct sparse_dict *dict, 362 | const char *key, const size_t klen, 363 | const void *value, const size_t vlen) { 364 | const uint64_t key_hash = hash_fnv1a(key, klen); 365 | unsigned int num_probes = 0; 366 | 367 | /* First check the array to see if we have an object already stored in 368 | * 'out' position. 369 | */ 370 | while (1) { 371 | size_t current_value_siz = 0; 372 | /* Use quadratic probing here to insert into the table. 373 | * Further reading: https://en.wikipedia.org/wiki/Quadratic_probing 374 | */ 375 | const unsigned int probed_val = QUADRATIC_PROBE(dict->bucket_max); 376 | const void *current_value = sparse_array_get(dict->buckets, probed_val, ¤t_value_siz); 377 | 378 | if (current_value_siz == 0 && current_value == NULL) { 379 | /* Awesome, the slot we want is empty. Insert as normal. */ 380 | if (_create_and_insert_new_bucket(dict->buckets, probed_val, key, klen, value, vlen, key_hash)) 381 | break; 382 | else 383 | goto error; 384 | } else { 385 | /* We found a bucket. Check to see if it has the same key as we do. */ 386 | struct sparse_bucket *existing_bucket = (struct sparse_bucket *)current_value; 387 | if (existing_bucket->hash == key_hash && 388 | existing_bucket->klen == klen && 389 | strncmp(existing_bucket->key, key, klen) == 0) { 390 | /* Great, we probed along the hashtable and found a bucket with the same key as 391 | * the key we want to insert. Replace it. */ 392 | char *existing_key = existing_bucket->key; 393 | void *existing_val = existing_bucket->val; 394 | if (_create_and_insert_new_bucket(dict->buckets, probed_val, key, klen, value, vlen, key_hash)) { 395 | /* We return here because we don't want to execute the 'resize the table' 396 | * logic. We overwrote a bucket instead of adding a new one, so we know 397 | * we don't need to resize anything. 398 | */ 399 | free(existing_key); 400 | free(existing_val); 401 | return 1; 402 | } else { 403 | goto error; 404 | } 405 | } 406 | } 407 | 408 | num_probes++; 409 | 410 | if (num_probes > dict->bucket_count) { 411 | /* If this ever happens something has gone very, very wrong. 412 | * The hash table is full. 413 | */ 414 | printf("Could not find an open slot in the table.\n"); 415 | goto error; 416 | } 417 | } 418 | 419 | dict->bucket_count++; 420 | 421 | /* See if we've hit our 'we should rehash the table' occupancy number: */ 422 | if (dict->bucket_count / (float)dict->bucket_max >= RESIZE_PERCENT/100.0f) 423 | return _rehash_and_grow_table(dict); 424 | 425 | return 1; 426 | 427 | error: 428 | return 0; 429 | } 430 | 431 | const void *sparse_dict_get(struct sparse_dict *dict, const char *key, 432 | const size_t klen, size_t *outsize) { 433 | const uint64_t key_hash = hash_fnv1a(key, klen); 434 | unsigned int num_probes = 0; 435 | 436 | while (1) { 437 | size_t current_value_siz = 0; 438 | const unsigned int probed_val = QUADRATIC_PROBE(dict->bucket_max); 439 | const void *current_value = sparse_array_get(dict->buckets, probed_val, ¤t_value_siz); 440 | 441 | if (current_value_siz != 0 && current_value != NULL) { 442 | /* We have to do a string comparison here because we use quadratic probing. 443 | * The value we pulled from the underlying array could be anything. 444 | */ 445 | struct sparse_bucket *existing_bucket = (struct sparse_bucket *)current_value; 446 | if (existing_bucket->hash == key_hash && 447 | existing_bucket->klen == klen && 448 | strncmp(existing_bucket->key, key, klen) == 0) { 449 | if (outsize) 450 | memcpy(outsize, &existing_bucket->vlen, sizeof(existing_bucket->vlen)); 451 | 452 | return existing_bucket->val; 453 | } 454 | } else { 455 | /* We found nothing where we expected something. */ 456 | return NULL; 457 | } 458 | 459 | num_probes++; 460 | 461 | if (num_probes > dict->bucket_count) 462 | return NULL; 463 | } 464 | 465 | return NULL; 466 | } 467 | 468 | const int sparse_dict_free(struct sparse_dict *dict) { 469 | unsigned int i = 0; 470 | for (i = 0; i < dict->bucket_max; i++) { 471 | size_t current_value_siz = 0; 472 | const void *current_value = sparse_array_get(dict->buckets, i, ¤t_value_siz); 473 | 474 | if (current_value_siz != 0 && current_value != NULL) { 475 | struct sparse_bucket *existing_bucket = (struct sparse_bucket *)current_value; 476 | free(existing_bucket->val); 477 | } 478 | } 479 | sparse_array_free(dict->buckets); 480 | free(dict); 481 | return 1; 482 | } 483 | -------------------------------------------------------------------------------- /src/test.c: -------------------------------------------------------------------------------- 1 | /* vim: noet ts=4 sw=4 2 | */ 3 | #include 4 | #include 5 | #include "simple_sparsehash.h" 6 | 7 | #define begin_tests() int test_return_val = 0;\ 8 | int tests_failed = 0;\ 9 | int tests_run = 0; 10 | #define run_test(test) test_return_val = test();\ 11 | if (!test_return_val) {\ 12 | tests_failed++;\ 13 | printf("%c[%dmFailed%c[%dm: %s\n", 0x1B, 31, 0x1B, 0, #test);\ 14 | } else {\ 15 | tests_run++;\ 16 | printf("%c[%dmPassed%c[%dm: %s\n", 0x1B, 32, 0x1B, 0, #test);\ 17 | } 18 | #define finish_tests() printf("\n-----\nTests passed: (%i/%i)\n", tests_run,\ 19 | tests_run + tests_failed); 20 | #define assert(x) if (!(x)) {\ 21 | printf("%i: ", __LINE__);\ 22 | return 0;\ 23 | } 24 | 25 | 26 | int test_empty_array_does_not_blow_up() { 27 | struct sparse_array *arr = NULL; 28 | arr = sparse_array_init(sizeof(uint64_t), 32); 29 | assert(arr); 30 | 31 | assert(!sparse_array_get(arr, 0, NULL)); 32 | 33 | assert(sparse_array_free(arr)); 34 | return 1; 35 | } 36 | 37 | int test_cannot_set_outside_bounds() { 38 | struct sparse_array *arr = NULL; 39 | const uint64_t test_num = 666; 40 | arr = sparse_array_init(sizeof(uint64_t), 32); 41 | assert(arr); 42 | 43 | assert(sparse_array_set(arr, 35, &test_num, sizeof(test_num)) == 0); 44 | 45 | assert(sparse_array_free(arr)); 46 | return 1; 47 | } 48 | 49 | int test_cannot_get_outside_bounds() { 50 | struct sparse_array *arr = NULL; 51 | arr = sparse_array_init(sizeof(uint64_t), 32); 52 | assert(arr); 53 | 54 | assert(!sparse_array_get(arr, 35, NULL)); 55 | 56 | assert(sparse_array_free(arr)); 57 | return 1; 58 | } 59 | 60 | int test_cannot_set_bigger_elements() { 61 | struct sparse_array *arr = NULL; 62 | const uint64_t test_num = 666; 63 | arr = sparse_array_init(sizeof(char), 100); 64 | assert(arr); 65 | 66 | assert(sparse_array_set(arr, 0, &test_num, sizeof(test_num)) == 0); 67 | 68 | assert(sparse_array_free(arr)); 69 | return 1; 70 | 71 | } 72 | 73 | int test_array_set_backwards() { 74 | int i; 75 | const int array_size = 120; 76 | struct sparse_array *arr = NULL; 77 | arr = sparse_array_init(sizeof(int), array_size); 78 | assert(arr); 79 | 80 | for (i = array_size - 1; i >= 0; i--) { 81 | int *returned = NULL; 82 | size_t siz = 0; 83 | assert(sparse_array_set(arr, i, &i, sizeof(i))); 84 | returned = (int *)sparse_array_get(arr, i, &siz); 85 | assert(returned); 86 | assert(*returned == i); 87 | assert(siz == sizeof(int)); 88 | } 89 | 90 | for (i = array_size - 1; i >= 0; i--) { 91 | int *returned = NULL; 92 | size_t siz = 0; 93 | returned = (int *)sparse_array_get(arr, i, &siz); 94 | assert(*returned == i); 95 | assert(siz == sizeof(int)); 96 | } 97 | 98 | assert(sparse_array_free(arr)); 99 | return 1; 100 | } 101 | 102 | int test_array_set() { 103 | int i; 104 | const int array_size = 130; 105 | struct sparse_array *arr = NULL; 106 | arr = sparse_array_init(sizeof(int), array_size); 107 | assert(arr); 108 | 109 | for (i = 0; i < array_size; i++) { 110 | int *returned = NULL; 111 | size_t siz = 0; 112 | assert(sparse_array_set(arr, i, &i, sizeof(i))); 113 | returned = (int *)sparse_array_get(arr, i, &siz); 114 | assert(*returned == i); 115 | assert(siz == sizeof(int)); 116 | } 117 | 118 | for (i = 0; i < array_size; i++) { 119 | /* Loop through again just to make sure. */ 120 | int *returned = NULL; 121 | size_t siz = 0; 122 | returned = (int *)sparse_array_get(arr, i, &siz); 123 | assert(*returned == i); 124 | assert(siz == sizeof(int)); 125 | } 126 | 127 | assert(sparse_array_free(arr)); 128 | return 1; 129 | } 130 | 131 | int test_array_set_high_num() { 132 | const int test_num = 65555555; 133 | const int index = GROUP_SIZE - 1; 134 | int *returned = NULL; 135 | size_t siz = 0; 136 | struct sparse_array *arr = NULL; 137 | 138 | arr = sparse_array_init(sizeof(int), 140); 139 | assert(arr); 140 | 141 | assert(sparse_array_set(arr, index, &test_num, sizeof(test_num))); 142 | returned = (int *)sparse_array_get(arr, index, &siz); 143 | assert(returned); 144 | assert(*returned == test_num); 145 | assert(siz == sizeof(int)); 146 | 147 | assert(sparse_array_free(arr)); 148 | return 1; 149 | } 150 | 151 | int test_array_set_overwrites_old_values() { 152 | struct sparse_array *arr = NULL; 153 | const int test_num = 666; 154 | const int test_num2 = 1024; 155 | arr = sparse_array_init(sizeof(int), 150); 156 | assert(arr); 157 | 158 | assert(sparse_array_set(arr, 0, &test_num, sizeof(test_num))); 159 | assert(sparse_array_set(arr, 0, &test_num2, sizeof(test_num2))); 160 | 161 | assert(*(const int *)sparse_array_get(arr, 0, NULL) == 1024); 162 | 163 | assert(sparse_array_free(arr)); 164 | return 1; 165 | } 166 | 167 | int test_array_get() { 168 | struct sparse_array *arr = NULL; 169 | const int test_num = 666; 170 | size_t item_size = 0; 171 | arr = sparse_array_init(sizeof(int), 200); 172 | assert(arr); 173 | 174 | assert(sparse_array_set(arr, 0, &test_num, sizeof(test_num))); 175 | assert(*(const int *)sparse_array_get(arr, 0, &item_size) == 666); 176 | assert(item_size == sizeof(int)); 177 | 178 | assert(sparse_array_free(arr)); 179 | return 1; 180 | } 181 | 182 | int test_dict_set() { 183 | struct sparse_dict *dict = NULL; 184 | dict = sparse_dict_init(); 185 | assert(dict); 186 | 187 | assert(sparse_dict_set(dict, "key", strlen("key"), "value", strlen("value"))); 188 | 189 | assert(sparse_dict_free(dict)); 190 | return 1; 191 | } 192 | 193 | int test_dict_get() { 194 | struct sparse_dict *dict = NULL; 195 | size_t outsize = 0; 196 | const char *value = NULL; 197 | 198 | dict = sparse_dict_init(); 199 | assert(dict); 200 | 201 | assert(sparse_dict_set(dict, "key", strlen("key"), "value", strlen("value"))); 202 | 203 | 204 | value = sparse_dict_get(dict, "key", strlen("key"), &outsize); 205 | assert(value); 206 | assert(outsize == strlen("value")); 207 | assert(strncmp(value, "value", outsize) == 0); 208 | 209 | assert(sparse_dict_free(dict)); 210 | return 1; 211 | } 212 | 213 | int test_dict_lots_of_set() { 214 | struct sparse_dict *dict = NULL; 215 | int i = 0; 216 | 217 | dict = sparse_dict_init(); 218 | assert(dict); 219 | 220 | const int iterations = 1000000; 221 | for (i = 0; i < iterations; i++) { 222 | char key[64] = {0}; 223 | snprintf(key, sizeof(key), "crazy hash%i", i); 224 | 225 | char val[64] = {0}; 226 | snprintf(val, sizeof(val), "value%i", i); 227 | 228 | assert(sparse_dict_set(dict, key, strlen(key), val, strlen(val))); 229 | assert(dict->bucket_count == (unsigned int)(i + 1)); 230 | 231 | size_t outsize = 0; 232 | const char *retrieved_value = sparse_dict_get(dict, key, strlen(key), &outsize); 233 | assert(retrieved_value); 234 | assert(outsize == strlen(val)); 235 | assert(strncmp(retrieved_value, val, outsize) == 0); 236 | } 237 | 238 | for (i = iterations - 1; i >= 0; i--) { 239 | /* Do they same thing but just retrieve values. */ 240 | char key[64] = {0}; 241 | snprintf(key, sizeof(key), "crazy hash%i", i); 242 | 243 | char val[64] = {0}; 244 | snprintf(val, sizeof(val), "value%i", i); 245 | 246 | size_t outsize = 0; 247 | const char *retrieved_value = sparse_dict_get(dict, key, strlen(key), &outsize); 248 | assert(retrieved_value); 249 | assert(outsize == strlen(val)); 250 | assert(strncmp(retrieved_value, val, outsize) == 0); 251 | } 252 | 253 | assert(sparse_dict_free(dict)); 254 | return 1; 255 | } 256 | 257 | int main(int argc, char *argv[]) { 258 | (void)argc; 259 | (void)argv; 260 | 261 | begin_tests(); 262 | run_test(test_cannot_set_bigger_elements); 263 | run_test(test_cannot_set_outside_bounds); 264 | run_test(test_cannot_get_outside_bounds); 265 | run_test(test_empty_array_does_not_blow_up); 266 | run_test(test_array_set); 267 | run_test(test_array_set_backwards); 268 | run_test(test_array_set_overwrites_old_values); 269 | run_test(test_array_set_high_num); 270 | run_test(test_array_get); 271 | run_test(test_dict_set); 272 | run_test(test_dict_get); 273 | run_test(test_dict_lots_of_set); 274 | finish_tests(); 275 | 276 | return 0; 277 | } 278 | --------------------------------------------------------------------------------