├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── include
    └── simple_sparsehash.h
├── run_tests.sh
└── src
    ├── simple_sparsehash.c
    └── test.c


/.gitignore:
--------------------------------------------------------------------------------
1 | *.sw[op]
2 | libsimple-sparsehash.so
3 | sparsehash_test
4 | *.o
5 | .gdb_history
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Quinlan Pfiffer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | VERSION=0.1
 2 | SOVERSION=0
 3 | CFLAGS=-std=c99 -Wextra -Wno-ignored-qualifiers -O3 -g -Werror -Wall
 4 | NAME=libsimple-sparsehash.so
 5 | TESTNAME=sparsehash_test
 6 | OBJS=simple_sparsehash.o
 7 | INCLUDES=-I./include/
 8 | LIBINCLUDES=-L.
 9 | 
10 | PREFIX?=/usr/local
11 | INSTALL_LIB=$(PREFIX)/lib/
12 | INSTALL_INCLUDE=$(PREFIX)/include/
13 | 
14 | uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
15 | LDCONFIG=
16 | ifeq ($(uname_S),Darwin)
17 | 	LDCONFIG=echo
18 | else
19 | 	LDCONFIG=ldconfig
20 | endif
21 | 
22 | all: $(NAME) $(TESTNAME)
23 | 
24 | clean:
25 | 	rm *.o
26 | 	rm $(TESTNAME)
27 | 	rm $(NAME)
28 | 
29 | $(TESTNAME): test.o $(NAME)
30 | 	$(CC) $(CFLAGS) $(INCLUDES) $(LIBINCLUDES) -o $(TESTNAME) $< -lsimple-sparsehash
31 | 
32 | %.o: ./src/%.c
33 | 	$(CC) $(CFLAGS) $(INCLUDES) -fPIC -c $<
34 | 
35 | $(NAME): $(OBJS)
36 | 	$(CC) -shared -fPIC $(CFLAGS) $(INCLUDES) -o $(NAME) $^
37 | 
38 | uninstall:
39 | 	rm -rf $(INSTALL_LIB)$(NAME)*
40 | 	rm -rf $(INSTALL_INCLUDE)/simple_sparsehash.h
41 | 
42 | install:
43 | 	@mkdir -p $(INSTALL_LIB)
44 | 	@mkdir -p $(INSTALL_INCLUDE)
45 | 	@install $(NAME) $(INSTALL_LIB)$(NAME).$(VERSION)
46 | 	@ln -fs $(INSTALL_LIB)$(NAME).$(VERSION) $(INSTALL_LIB)$(NAME)
47 | 	@ln -fs $(INSTALL_LIB)$(NAME).$(VERSION) $(INSTALL_LIB)$(NAME).$(SOVERSION)
48 | 	@install ./include/*.h $(INSTALL_INCLUDE)
49 | 	@$(LDCONFIG) $(INSTALL_LIB)
50 | 	@echo "$(NAME) installed to $(PREFIX) :^)."
51 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## What is it?
 2 | 
 3 | This is a simple reimplementation of Google's [SparseHash](https://code.google.com/p/sparsehash/)
 4 | library intended as both a learning and teaching excercise.
 5 | 
 6 | ## How do I use it?
 7 | 
 8 | Either copy `./include/simple_sparsehash.h` and `./src/simple_sparsehash.c` into
 9 | your project and start using them, or:
10 | 
11 | ```
12 |     make
13 |     sudo make install
14 | ```
15 | 
16 | Then when you build your project just link to the shared library with
17 | `-lsimple-sparsehash`.
18 | 
19 | ## Tests
20 | 
21 | Just `make && ./run_tests.sh`.
22 | 
23 | ## Differences between the official version
24 | 
25 | * Doesn't support many of the things that the official version does, like
26 |   iterators, swapping, deletion, etc.
27 | * There are no 'default values' of sparse arrays. You access something that
28 |   isn't real? You get `NULL`.
29 | 
30 | ## Eventual TODO
31 | 
32 | * Store actual items in the arrays, not pointers to items.
33 | * Resize the table down when it reaches an inverse occupancy or something.
34 | * Store object size in the dictionary, so that we can make assumptions about
35 |   array size. Right now it accepts any value, and is slightly slower due to not
36 |   having any locality of reference, and having to jump to an extra location in
37 |   memory. Maybe two different versions?
38 | * Be able to delete things from the hashtable
39 | * Refactor the get/set/rehash methods. They've got some really similar code.
40 | * Speed it up, it's currently pretty damn slow.
41 | 


--------------------------------------------------------------------------------
/include/simple_sparsehash.h:
--------------------------------------------------------------------------------
 1 | /* vim: noet ts=4 sw=4
 2 | */
 3 | #pragma once
 4 | #include <inttypes.h>
 5 | #include <stdio.h>
 6 | 
 7 | /* The maximum size of each sparse_array_group. */
 8 | #define GROUP_SIZE 48
 9 | 
10 | /* The default size of the hash table. Used to init bucket_max. */
11 | #define STARTING_SIZE 32
12 | 
13 | /* The default 'should we resize' percentage, out of 100 percent. */
14 | #define RESIZE_PERCENT 80
15 | 
16 | /* The math here is, I believe, so that we
17 |  * store exactly enough bits for our group size. The math returns the
18 |  * minimum number of bytes to hold all the bits we need.
19 |  */
20 | #define BITCHUNK_SIZE (sizeof(uint32_t) * 8)
21 | #define BITMAP_SIZE (GROUP_SIZE-1)/BITCHUNK_SIZE + 1
22 | 
23 | /* These are the objects that get stored in the sparse arrays that
24 |  * make up a sparse dictionary.
25 |  */
26 | struct sparse_bucket {
27 | 	char			*key;
28 | 	const size_t	klen;
29 | 	void			*val;
30 | 	const size_t	vlen;
31 | 	const uint64_t	hash;
32 | };
33 | 
34 | struct sparse_array_group {
35 | 	uint32_t		count;							/* The number of items currently in this vector. */
36 | 	size_t			elem_size;						/* The maximum size of each element. */
37 | 	void *			group;							/* The place where we actually store things. */
38 | 	uint32_t		bitmap[BITMAP_SIZE];			/* This is how we store the state of what is occupied in group. */
39 | 	/* bitmap requires some explanation. We use the bitmap to store which
40 | 	 * `offsets` in the array are occupied. We do this through a series
41 | 	 * of bit-testing functions.
42 | 	 */
43 | };
44 | 
45 | struct sparse_array {
46 | 	const size_t					maximum;		/* The maximum number of items that can be in this array. */
47 | 	struct sparse_array_group		*groups;		/* The number of groups we have. This is (num_buckets/GROUP_SIZE). */
48 | };
49 | 
50 | struct sparse_dict {
51 | 	size_t bucket_max;					/* The current maximum number of buckets in this dictionary. */
52 | 	size_t bucket_count;				/* The number of occupied buckets in this dictionary. */
53 | 	struct sparse_array *buckets;		/* Array of `sparse_array` objects. Defaults to STARTING_SIZE elements in length. */
54 | };
55 | 
56 | /* ------------ */
57 | /* Sparse Array */
58 | /* ------------ */
59 | 
60 | struct sparse_array *sparse_array_init(const size_t element_size, const uint32_t maximum);
61 | const int sparse_array_set(struct sparse_array *arr, const uint32_t i,
62 | 						   const void *val, const size_t vlen);
63 | const void *sparse_array_get(struct sparse_array *arr, const uint32_t i, size_t *outsize);
64 | const int sparse_array_free(struct sparse_array *arr);
65 | 
66 | 
67 | /* ----------------- */
68 | /* Sparse Dictionary */
69 | /* ----------------- */
70 | 
71 | /* Creates a new sparse dictionary. */
72 | struct sparse_dict *sparse_dict_init();
73 | 
74 | /* Copies `value` into `dict`. */
75 | const int sparse_dict_set(struct sparse_dict *dict,
76 | 						  const char *key, const size_t klen,
77 | 						  const void *value, const size_t vlen);
78 | 
79 | /* Returns the value of `key` from `dict`. *outsize will be filled out if it
80 |  * is non-null.
81 |  */
82 | const void *sparse_dict_get(struct sparse_dict *dict, const char *key,
83 | 							const size_t klen, size_t *outsize);
84 | 
85 | /* Frees and cleans up a sparse_dict created with sparse_dict_init(). */
86 | const int sparse_dict_free(struct sparse_dict *dict);
87 | 


--------------------------------------------------------------------------------
/run_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | export LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH
 4 | 
 5 | CMD=./sparsehash_test
 6 | if [ $# -eq 0 ]; then
 7 |     $CMD
 8 | else
 9 |     if [ $1 == "gdb" ]; then
10 |         gdb --args $CMD
11 |     elif [ $1 == "valgrind" ]; then
12 |         valgrind --track-origins=yes --leak-check=full $CMD
13 |     else
14 |         $CMD
15 |     fi
16 | fi
17 | 


--------------------------------------------------------------------------------
/src/simple_sparsehash.c:
--------------------------------------------------------------------------------
  1 | /* vim: noet ts=4 sw=4
  2 | */
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include "simple_sparsehash.h"
  6 | 
  7 | #define FULL_ELEM_SIZE (arr->elem_size + sizeof(size_t))
  8 | #define MAX_ARR_SIZE ((arr->maximum - 1)/GROUP_SIZE + 1)
  9 | #define QUADRATIC_PROBE(maximum) (key_hash + num_probes * num_probes) & (maximum - 1)
 10 | 
 11 | /* One of the simplest hashing functions, FNV-1a. See the wikipedia article for more info:
 12 |  * http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
 13 |  */
 14 | static const uint64_t hash_fnv1a(const char *key, const size_t klen) {
 15 | 	static const uint64_t fnv_prime = 1099511628211ULL;
 16 | 	static const uint64_t fnv_offset_bias = 14695981039346656037ULL;
 17 | 
 18 | 	const int iterations = klen;
 19 | 
 20 | 	uint8_t i;
 21 | 	uint64_t hash = fnv_offset_bias;
 22 | 
 23 | 	for(i = 0; i < iterations; i++) {
 24 | 		hash = hash ^ key[i];
 25 | 		hash = hash * fnv_prime;
 26 | 	}
 27 | 
 28 | 	return hash;
 29 | }
 30 | 
 31 | /* TODO: Figure out better names for charbit/modbit */
 32 | static const uint32_t charbit(const uint32_t position) {
 33 | 	/* Get enough bits to store 0 - 31. */
 34 | 	return position >> 5;
 35 | }
 36 | 
 37 | static const uint32_t modbit(const uint32_t position) {
 38 | 	/* Get the number of bits of this number that are 0 - 31,
 39 | 	 * or something like that.
 40 | 	 */
 41 | 	return 1 << (position & 31);
 42 | }
 43 | 
 44 | /* This is one of the popcount implementations from Wikipedia.
 45 |  * http://en.wikipedia.org/wiki/Hamming_weight
 46 |  */
 47 | static inline uint32_t popcount_32(uint32_t x) {
 48 | 	const uint32_t m1 = 0x55555555;
 49 | 	const uint32_t m2 = 0x33333333;
 50 | 	const uint32_t m4 = 0x0f0f0f0f;
 51 | 	x -= (x >> 1) & m1;
 52 | 	x = (x & m2) + ((x >> 2) & m2);
 53 | 	x = (x + (x >> 4)) & m4;
 54 | 	x += x >>  8;
 55 | 	return (x + (x >> 16)) & 0x3f;
 56 | }
 57 | 
 58 | /* This function is used to map an item's 'position' (the user-facing index
 59 |  * into the array) with the 'offset' which is the actual position in the
 60 |  * array, memory-wise.
 61 |  *
 62 |  * The way we do this is by counting the number of 1s in the bitmap from
 63 |  * 0 .. i-1 in the bitmap. The original implementation uses a big table for the
 64 |  * popcount.
 65 |  */
 66 | static const uint32_t position_to_offset(const uint32_t *bitmap,
 67 | 									   const uint32_t position) {
 68 | 	uint32_t retval = 0;
 69 | 	uint32_t pos = position;
 70 | 	uint32_t bitmap_iter = 0;
 71 | 
 72 | 	/* Here we loop through the bitmap a uint32_t at a time, and count the number
 73 | 	 * of 1s in that chunk.
 74 | 	 */
 75 | 	for (; pos >= BITCHUNK_SIZE; pos -= BITCHUNK_SIZE)
 76 | 		retval += popcount_32(bitmap[bitmap_iter++]);
 77 | 
 78 | 	/* This last bit does the same thing as above, but takes care of the
 79 | 	 * remainder that didn't fit cleanly into the 32 x 32 x 32 ... loop above. That
 80 | 	 * is to say, it grabs the last 0 - 7 bits and adds the number of 1s in it to
 81 | 	 * retval.
 82 | 	 */
 83 | 	return retval + popcount_32(bitmap[bitmap_iter] & (((uint32_t)1 << pos) - 1u));
 84 | }
 85 | 
 86 | /* Simple check to see whether a slot in the array is occupied or not. */
 87 | static const int is_position_occupied(const uint32_t *bitmap,
 88 | 							 const uint32_t position) {
 89 | 	return bitmap[charbit(position)] & modbit(position);
 90 | }
 91 | 
 92 | static void set_position(uint32_t *bitmap, const uint32_t position) {
 93 | 	bitmap[charbit(position)] |= modbit(position);
 94 | }
 95 | 
 96 | /* Sparse Array */
 97 | static const int _sparse_array_group_set(struct sparse_array_group *arr, const uint32_t i,
 98 | 						   const void *val, const size_t vlen) {
 99 | 	uint32_t offset = 0;
100 | 	void *destination = NULL;
101 | 	if (vlen > arr->elem_size)
102 | 		return 0;
103 | 	/* So what needs to happen in this function:
104 | 	 * 1. Convert the position (i) to the 'offset'
105 | 	 * 2. Check to see if this slot is already occupied (bmtest).
106 | 	 *    overwrite the old element if this is the case.
107 | 	 * 3. Otherwise, expand the array by a single element and increase
108 | 	 *    our bucket count (arr->count). Finally, OR the bit in our state
109 | 	 *    bitmap that shows this position is occupied.
110 | 	 * 4. After doing all that, create a copy of val and stick it in the right
111 | 	 *    position in our array.
112 | 	 */
113 | 
114 | 	offset = position_to_offset(arr->bitmap, i);
115 | 	if (!is_position_occupied(arr->bitmap, i)) {
116 | 		const size_t to_move_siz = (arr->count - offset) * FULL_ELEM_SIZE;
117 | 		/* Reallocate the array to hold the new item */
118 | 		void *new_group = realloc(arr->group, (arr->count + 1) * FULL_ELEM_SIZE);
119 | 		if (new_group == NULL)
120 | 			return 0;
121 | 
122 | 		/* Now take all of the old items and move them up a slot: */
123 | 		if (to_move_siz > 0) {
124 | 			memmove((unsigned char *)(new_group) + ((offset + 1) * FULL_ELEM_SIZE),
125 | 					(unsigned char *)(new_group) + (offset * FULL_ELEM_SIZE),
126 | 					to_move_siz);
127 | 		}
128 | 
129 | 		/* Increase the bucket count because we've expanded: */
130 | 		arr->count++;
131 | 		arr->group = new_group;
132 | 		/* Remember to modify the bitmap: */
133 | 		set_position(arr->bitmap, i);
134 | 	}
135 | 
136 | 	/* Copy the size into the position, fighting -pedantic the whole
137 | 	 * time.
138 | 	 */
139 | 	destination = (unsigned char *)(arr->group) + (offset * FULL_ELEM_SIZE);
140 | 	memcpy(destination, &vlen, sizeof(vlen));
141 | 
142 | 	/* Here we mutate a variable because we're writing C and we don't respect
143 | 	 * anything.
144 | 	 */
145 | 	destination = (unsigned char *)destination + sizeof(vlen);
146 | 	memcpy(destination, val, vlen);
147 | 
148 | 	return 1;
149 | }
150 | 
151 | static const void *_sparse_array_group_get(struct sparse_array_group *arr,
152 | 							 const uint32_t i, size_t *outsize) {
153 | 	const uint32_t offset = position_to_offset(arr->bitmap, i);
154 | 	const unsigned char *item_siz = (unsigned char *)(arr->group) + (offset * FULL_ELEM_SIZE);
155 | 	const void *item = item_siz + sizeof(size_t);
156 | 
157 | 	if (!is_position_occupied(arr->bitmap, i))
158 | 		return NULL;
159 | 
160 | 	/* In a perfect world you could store 0 sized items and have that mean
161 | 	 * something, but I'll tolerate none of that right now.
162 | 	 */
163 | 	if (*(size_t *)item_siz == 0)
164 | 		return NULL;
165 | 
166 | 	/* If the user wants to know the size (outsize is non-null), write it
167 | 	 * out.
168 | 	 */
169 | 	if (outsize)
170 | 		memcpy(outsize, item_siz, sizeof(size_t));
171 | 
172 | 	return item;
173 | }
174 | 
175 | static const int _sparse_array_group_free(struct sparse_array_group *arr) {
176 | 	free(arr->group);
177 | 	return 1;
178 | }
179 | 
180 | struct sparse_array *sparse_array_init(const size_t element_size, const uint32_t maximum) {
181 | 	unsigned int i = 0;
182 | 	struct sparse_array *arr = NULL;
183 | 	/* CHECK YOUR SYSCALL RETURNS. Listen to djb. */
184 | 	arr = calloc(1, sizeof(struct sparse_array));
185 | 	if (arr == NULL)
186 | 		return NULL;
187 | 
188 | 	/* This is a non-obvious hack I use. If we have const variables in a
189 | 	 * struct then to initialize them we can either cast them or use an
190 | 	 * initializer like this.
191 | 	 * Then we copy it into a heap-allocated blob. The compiler lets us
192 | 	 * do this.
193 | 	 */
194 | 	struct sparse_array stack_array = {
195 | 		.maximum = maximum,
196 | 	};
197 | 
198 | 	memcpy(arr, &stack_array, sizeof(struct sparse_array));
199 | 	arr->groups = calloc(MAX_ARR_SIZE, sizeof(struct sparse_array_group));
200 | 	if (arr->groups == NULL) {
201 | 		free(arr);
202 | 		return NULL;
203 | 	}
204 | 
205 | 	for (i = 0; i < MAX_ARR_SIZE; i++) {
206 | 		struct sparse_array_group *sag = &arr->groups[i];
207 | 		sag->elem_size = element_size;
208 | 	}
209 | 
210 | 	return arr;
211 | }
212 | 
213 | const int sparse_array_set(struct sparse_array *arr, const uint32_t i,
214 | 						   const void *val, const size_t vlen) {
215 | 	/* Don't let users set outside the bounds of the array. */
216 | 	if (i > arr->maximum)
217 | 		return 0;
218 | 	/* Since our hashtable is divided into many arrays, we need to pick the one
219 | 	 * relevant to `i` in this case:
220 | 	 */
221 | 	struct sparse_array_group *operating_group = &arr->groups[i / GROUP_SIZE];
222 | 	const int position = i % GROUP_SIZE;
223 | 	return _sparse_array_group_set(operating_group, position, val, vlen);
224 | }
225 | 
226 | const void *sparse_array_get(struct sparse_array *arr, const uint32_t i, size_t *outsize) {
227 | 	if (i > arr->maximum)
228 | 		return NULL;
229 | 	struct sparse_array_group *operating_group = &arr->groups[i / GROUP_SIZE];
230 | 	const int position = i % GROUP_SIZE;
231 | 	return _sparse_array_group_get(operating_group, position, outsize);
232 | }
233 | 
234 | const int sparse_array_free(struct sparse_array *arr) {
235 | 	unsigned int i = 0;
236 | 	for (; i < MAX_ARR_SIZE; i++) {
237 | 		struct sparse_array_group *sag = &arr->groups[i];
238 | 		_sparse_array_group_free(sag);
239 | 	}
240 | 	free(arr->groups);
241 | 	free(arr);
242 | 	return 1;
243 | }
244 | 
245 | /* Sparse Dictionary */
246 | struct sparse_dict *sparse_dict_init() {
247 | 	struct sparse_dict *new = NULL;
248 | 	new = calloc(1, sizeof(struct sparse_dict));
249 | 	if (new == NULL)
250 | 		return NULL;
251 | 
252 | 	new->bucket_max = STARTING_SIZE;
253 | 	new->bucket_count = 0;
254 | 	new->buckets = sparse_array_init(sizeof(struct sparse_bucket), STARTING_SIZE);
255 | 	if (new->buckets == NULL)
256 | 		goto error;
257 | 
258 | 	return new;
259 | 
260 | error:
261 | 	free(new);
262 | 	return NULL;
263 | }
264 | 
265 | static const int _create_and_insert_new_bucket(
266 | 						struct sparse_array *array, const unsigned int i,
267 | 						const char *key, const size_t klen,
268 | 						const void *value, const size_t vlen,
269 | 						const uint64_t key_hash) {
270 | 	void *copied_value = NULL;
271 | 	char *copied_key = NULL;
272 | 
273 | 	copied_value = malloc(vlen + klen);
274 | 	if (copied_value == NULL)
275 | 		goto error;
276 | 	memcpy(copied_value, value, vlen);
277 | 
278 | 	copied_key = copied_value + vlen;
279 | 	strncpy(copied_key, key, klen);
280 | 
281 | 	struct sparse_bucket bct = {
282 | 		.key = copied_key,
283 | 		.klen = klen,
284 | 		.val = copied_value,
285 | 		.vlen = vlen,
286 | 		.hash = key_hash
287 | 	};
288 | 
289 | 	if (!sparse_array_set(array, i, &bct, sizeof(bct)))
290 | 		goto error;
291 | 
292 | 	return 1;
293 | 
294 | error:
295 | 	free(copied_value);
296 | 	return 0;
297 | }
298 | 
299 | static const int _rehash_and_grow_table(struct sparse_dict *dict) {
300 | 	/* We've reached our chosen 'rehash the table' point, so
301 | 	 * we need to resize the table now.
302 | 	 */
303 | 	unsigned int i = 0, buckets_rehashed = 0;
304 | 	const size_t new_bucket_max = dict->bucket_max * 2;
305 | 	struct sparse_array *new_buckets = NULL;
306 | 
307 | 	new_buckets = sparse_array_init(sizeof(struct sparse_bucket), new_bucket_max);
308 | 	if (new_buckets == NULL)
309 | 		goto error;
310 | 
311 | 	/* Loop through each bucket and stick it into the new array. */
312 | 	for (i = 0; i < dict->bucket_max; i++) {
313 | 		size_t bucket_siz = 0;
314 | 		const struct sparse_bucket *bucket = sparse_array_get(dict->buckets, i, &bucket_siz);
315 | 
316 | 		if (bucket_siz != 0 && bucket != NULL) {
317 | 			/* We found a bucket. */
318 | 			unsigned int probed_val = 0, num_probes = 0;
319 | 			uint64_t key_hash = bucket->hash;
320 | 			while (1) {
321 | 				/* Quadratically probe along the hash table for an empty slot. */
322 | 				probed_val = QUADRATIC_PROBE(new_bucket_max);
323 | 				size_t current_value_siz = 0;
324 | 				const void *current_value = sparse_array_get(new_buckets, probed_val, &current_value_siz);
325 | 
326 | 				if (current_value_siz == 0 && current_value == NULL)
327 | 					break;
328 | 
329 | 				/* If the following ever happens, there are deeply troubling
330 | 				 * things that no longer make sense in the universe.
331 | 				 */
332 | 				if (num_probes > dict->bucket_count)
333 | 					goto error;
334 | 
335 | 				num_probes++;
336 | 			}
337 | 			if (!sparse_array_set(new_buckets, probed_val,
338 | 						bucket, sizeof(struct sparse_bucket)))
339 | 				goto error;
340 | 			buckets_rehashed++;
341 | 		}
342 | 
343 | 		/* Short circuit to see if we can quit early: */
344 | 		if (buckets_rehashed == dict->bucket_count)
345 | 			break;
346 | 	}
347 | 
348 | 	/* Finally, swap out the old array with the new one: */
349 | 	sparse_array_free(dict->buckets);
350 | 	dict->buckets = new_buckets;
351 | 	dict->bucket_max = new_bucket_max;
352 | 
353 | 	return 1;
354 | 
355 | error:
356 | 	if (new_buckets)
357 | 		sparse_array_free(new_buckets);
358 | 	return 0;
359 | }
360 | 
361 | const int sparse_dict_set(struct sparse_dict *dict,
362 | 						  const char *key, const size_t klen,
363 | 						  const void *value, const size_t vlen) {
364 | 	const uint64_t key_hash = hash_fnv1a(key, klen);
365 | 	unsigned int num_probes = 0;
366 | 
367 | 	/* First check the array to see if we have an object already stored in
368 | 	 * 'out' position.
369 | 	 */
370 | 	while (1) {
371 | 		size_t current_value_siz = 0;
372 | 		/* Use quadratic probing here to insert into the table.
373 | 		 * Further reading: https://en.wikipedia.org/wiki/Quadratic_probing
374 | 		 */
375 | 		const unsigned int probed_val = QUADRATIC_PROBE(dict->bucket_max);
376 | 		const void *current_value = sparse_array_get(dict->buckets, probed_val, &current_value_siz);
377 | 
378 | 		if (current_value_siz == 0 && current_value == NULL) {
379 | 			/* Awesome, the slot we want is empty. Insert as normal. */
380 | 			if (_create_and_insert_new_bucket(dict->buckets, probed_val, key, klen, value, vlen, key_hash))
381 | 				break;
382 | 			else
383 | 				goto error;
384 | 		} else {
385 | 			/* We found a bucket. Check to see if it has the same key as we do. */
386 | 			struct sparse_bucket *existing_bucket = (struct sparse_bucket *)current_value;
387 | 			if (existing_bucket->hash == key_hash &&
388 | 					existing_bucket->klen == klen &&
389 | 					strncmp(existing_bucket->key, key, klen) == 0) {
390 | 				/* Great, we probed along the hashtable and found a bucket with the same key as
391 | 				 * the key we want to insert. Replace it. */
392 | 				char *existing_key = existing_bucket->key;
393 | 				void *existing_val = existing_bucket->val;
394 | 				if (_create_and_insert_new_bucket(dict->buckets, probed_val, key, klen, value, vlen, key_hash)) {
395 | 					/* We return here because we don't want to execute the 'resize the table'
396 | 					 * logic. We overwrote a bucket instead of adding a new one, so we know
397 | 					 * we don't need to resize anything.
398 | 					 */
399 | 					free(existing_key);
400 | 					free(existing_val);
401 | 					return 1;
402 | 				} else {
403 | 					goto error;
404 | 				}
405 | 			}
406 | 		}
407 | 
408 | 		num_probes++;
409 | 
410 | 		if (num_probes > dict->bucket_count) {
411 | 			/* If this ever happens something has gone very, very wrong.
412 | 			 * The hash table is full.
413 | 			 */
414 | 			printf("Could not find an open slot in the table.\n");
415 | 			goto error;
416 | 		}
417 | 	}
418 | 
419 | 	dict->bucket_count++;
420 | 
421 | 	/* See if we've hit our 'we should rehash the table' occupancy number: */
422 | 	if (dict->bucket_count / (float)dict->bucket_max >= RESIZE_PERCENT/100.0f)
423 | 		return _rehash_and_grow_table(dict);
424 | 
425 | 	return 1;
426 | 
427 | error:
428 | 	return 0;
429 | }
430 | 
431 | const void *sparse_dict_get(struct sparse_dict *dict, const char *key,
432 | 							const size_t klen, size_t *outsize) {
433 | 	const uint64_t key_hash = hash_fnv1a(key, klen);
434 | 	unsigned int num_probes = 0;
435 | 
436 | 	while (1) {
437 | 		size_t current_value_siz = 0;
438 | 		const unsigned int probed_val = QUADRATIC_PROBE(dict->bucket_max);
439 | 		const void *current_value = sparse_array_get(dict->buckets, probed_val, &current_value_siz);
440 | 
441 | 		if (current_value_siz != 0 && current_value != NULL) {
442 | 			/* We have to do a string comparison here because we use quadratic probing.
443 | 			 * The value we pulled from the underlying array could be anything.
444 | 			 */
445 | 			struct sparse_bucket *existing_bucket = (struct sparse_bucket *)current_value;
446 | 			if (existing_bucket->hash == key_hash &&
447 | 					existing_bucket->klen == klen &&
448 | 					strncmp(existing_bucket->key, key, klen) == 0) {
449 | 				if (outsize)
450 | 					memcpy(outsize, &existing_bucket->vlen, sizeof(existing_bucket->vlen));
451 | 
452 | 				return existing_bucket->val;
453 | 			}
454 | 		} else {
455 | 			/* We found nothing where we expected something. */
456 | 			return NULL;
457 | 		}
458 | 
459 | 		num_probes++;
460 | 
461 | 		if (num_probes > dict->bucket_count)
462 | 			return NULL;
463 | 	}
464 | 
465 | 	return NULL;
466 | }
467 | 
468 | const int sparse_dict_free(struct sparse_dict *dict) {
469 | 	unsigned int i = 0;
470 | 	for (i = 0; i < dict->bucket_max; i++) {
471 | 		size_t current_value_siz = 0;
472 | 		const void *current_value = sparse_array_get(dict->buckets, i, &current_value_siz);
473 | 
474 | 		if (current_value_siz != 0 && current_value != NULL) {
475 | 			struct sparse_bucket *existing_bucket = (struct sparse_bucket *)current_value;
476 | 			free(existing_bucket->val);
477 | 		}
478 | 	}
479 | 	sparse_array_free(dict->buckets);
480 | 	free(dict);
481 | 	return 1;
482 | }
483 | 


--------------------------------------------------------------------------------
/src/test.c:
--------------------------------------------------------------------------------
  1 | /* vim: noet ts=4 sw=4
  2 | */
  3 | #include <stdio.h>
  4 | #include <string.h>
  5 | #include "simple_sparsehash.h"
  6 | 
  7 | #define begin_tests() int test_return_val = 0;\
  8 | 					  int tests_failed = 0;\
  9 | 					  int tests_run = 0;
 10 | #define run_test(test) test_return_val = test();\
 11 | 	if (!test_return_val) {\
 12 | 		tests_failed++;\
 13 | 		printf("%c[%dmFailed%c[%dm: %s\n", 0x1B, 31, 0x1B, 0, #test);\
 14 | 	} else {\
 15 | 		tests_run++;\
 16 | 		printf("%c[%dmPassed%c[%dm: %s\n", 0x1B, 32, 0x1B, 0, #test);\
 17 | 	}
 18 | #define finish_tests() printf("\n-----\nTests passed: (%i/%i)\n", tests_run,\
 19 | 							  tests_run + tests_failed);
 20 | #define assert(x) if (!(x)) {\
 21 | 		printf("%i: ", __LINE__);\
 22 | 		return 0;\
 23 | 	}
 24 | 
 25 | 
 26 | int test_empty_array_does_not_blow_up() {
 27 | 	struct sparse_array *arr = NULL;
 28 | 	arr = sparse_array_init(sizeof(uint64_t), 32);
 29 | 	assert(arr);
 30 | 
 31 | 	assert(!sparse_array_get(arr, 0, NULL));
 32 | 
 33 | 	assert(sparse_array_free(arr));
 34 | 	return 1;
 35 | }
 36 | 
 37 | int test_cannot_set_outside_bounds() {
 38 | 	struct sparse_array *arr = NULL;
 39 | 	const uint64_t test_num = 666;
 40 | 	arr = sparse_array_init(sizeof(uint64_t), 32);
 41 | 	assert(arr);
 42 | 
 43 | 	assert(sparse_array_set(arr, 35, &test_num, sizeof(test_num)) == 0);
 44 | 
 45 | 	assert(sparse_array_free(arr));
 46 | 	return 1;
 47 | }
 48 | 
 49 | int test_cannot_get_outside_bounds() {
 50 | 	struct sparse_array *arr = NULL;
 51 | 	arr = sparse_array_init(sizeof(uint64_t), 32);
 52 | 	assert(arr);
 53 | 
 54 | 	assert(!sparse_array_get(arr, 35, NULL));
 55 | 
 56 | 	assert(sparse_array_free(arr));
 57 | 	return 1;
 58 | }
 59 | 
 60 | int test_cannot_set_bigger_elements() {
 61 | 	struct sparse_array *arr = NULL;
 62 | 	const uint64_t test_num = 666;
 63 | 	arr = sparse_array_init(sizeof(char), 100);
 64 | 	assert(arr);
 65 | 
 66 | 	assert(sparse_array_set(arr, 0, &test_num, sizeof(test_num)) == 0);
 67 | 
 68 | 	assert(sparse_array_free(arr));
 69 | 	return 1;
 70 | 
 71 | }
 72 | 
 73 | int test_array_set_backwards() {
 74 | 	int i;
 75 | 	const int array_size = 120;
 76 | 	struct sparse_array *arr = NULL;
 77 | 	arr = sparse_array_init(sizeof(int), array_size);
 78 | 	assert(arr);
 79 | 
 80 | 	for (i = array_size - 1; i >= 0; i--) {
 81 | 		int *returned = NULL;
 82 | 		size_t siz = 0;
 83 | 		assert(sparse_array_set(arr, i, &i, sizeof(i)));
 84 | 		returned = (int *)sparse_array_get(arr, i, &siz);
 85 | 		assert(returned);
 86 | 		assert(*returned == i);
 87 | 		assert(siz == sizeof(int));
 88 | 	}
 89 | 
 90 | 	for (i = array_size - 1; i >= 0; i--) {
 91 | 		int *returned = NULL;
 92 | 		size_t siz = 0;
 93 | 		returned = (int *)sparse_array_get(arr, i, &siz);
 94 | 		assert(*returned == i);
 95 | 		assert(siz == sizeof(int));
 96 | 	}
 97 | 
 98 | 	assert(sparse_array_free(arr));
 99 | 	return 1;
100 | }
101 | 
102 | int test_array_set() {
103 | 	int i;
104 | 	const int array_size = 130;
105 | 	struct sparse_array *arr = NULL;
106 | 	arr = sparse_array_init(sizeof(int), array_size);
107 | 	assert(arr);
108 | 
109 | 	for (i = 0; i < array_size; i++) {
110 | 		int *returned = NULL;
111 | 		size_t siz = 0;
112 | 		assert(sparse_array_set(arr, i, &i, sizeof(i)));
113 | 		returned = (int *)sparse_array_get(arr, i, &siz);
114 | 		assert(*returned == i);
115 | 		assert(siz == sizeof(int));
116 | 	}
117 | 
118 | 	for (i = 0; i < array_size; i++) {
119 | 		/* Loop through again just to make sure. */
120 | 		int *returned = NULL;
121 | 		size_t siz = 0;
122 | 		returned = (int *)sparse_array_get(arr, i, &siz);
123 | 		assert(*returned == i);
124 | 		assert(siz == sizeof(int));
125 | 	}
126 | 
127 | 	assert(sparse_array_free(arr));
128 | 	return 1;
129 | }
130 | 
131 | int test_array_set_high_num() {
132 | 	const int test_num = 65555555;
133 | 	const int index = GROUP_SIZE - 1;
134 | 	int *returned = NULL;
135 | 	size_t siz = 0;
136 | 	struct sparse_array *arr = NULL;
137 | 
138 | 	arr = sparse_array_init(sizeof(int), 140);
139 | 	assert(arr);
140 | 
141 | 	assert(sparse_array_set(arr, index, &test_num, sizeof(test_num)));
142 | 	returned = (int *)sparse_array_get(arr, index, &siz);
143 | 	assert(returned);
144 | 	assert(*returned == test_num);
145 | 	assert(siz == sizeof(int));
146 | 
147 | 	assert(sparse_array_free(arr));
148 | 	return 1;
149 | }
150 | 
151 | int test_array_set_overwrites_old_values() {
152 | 	struct sparse_array *arr = NULL;
153 | 	const int test_num = 666;
154 | 	const int test_num2 = 1024;
155 | 	arr = sparse_array_init(sizeof(int), 150);
156 | 	assert(arr);
157 | 
158 | 	assert(sparse_array_set(arr, 0, &test_num, sizeof(test_num)));
159 | 	assert(sparse_array_set(arr, 0, &test_num2, sizeof(test_num2)));
160 | 
161 | 	assert(*(const int *)sparse_array_get(arr, 0, NULL) == 1024);
162 | 
163 | 	assert(sparse_array_free(arr));
164 | 	return 1;
165 | }
166 | 
167 | int test_array_get() {
168 | 	struct sparse_array *arr = NULL;
169 | 	const int test_num = 666;
170 | 	size_t item_size = 0;
171 | 	arr = sparse_array_init(sizeof(int), 200);
172 | 	assert(arr);
173 | 
174 | 	assert(sparse_array_set(arr, 0, &test_num, sizeof(test_num)));
175 | 	assert(*(const int *)sparse_array_get(arr, 0, &item_size) == 666);
176 | 	assert(item_size == sizeof(int));
177 | 
178 | 	assert(sparse_array_free(arr));
179 | 	return 1;
180 | }
181 | 
182 | int test_dict_set() {
183 | 	struct sparse_dict *dict = NULL;
184 | 	dict = sparse_dict_init();
185 | 	assert(dict);
186 | 
187 | 	assert(sparse_dict_set(dict, "key", strlen("key"), "value", strlen("value")));
188 | 
189 | 	assert(sparse_dict_free(dict));
190 | 	return 1;
191 | }
192 | 
193 | int test_dict_get() {
194 | 	struct sparse_dict *dict = NULL;
195 | 	size_t outsize = 0;
196 | 	const char *value = NULL;
197 | 
198 | 	dict = sparse_dict_init();
199 | 	assert(dict);
200 | 
201 | 	assert(sparse_dict_set(dict, "key", strlen("key"), "value", strlen("value")));
202 | 
203 | 
204 | 	value = sparse_dict_get(dict, "key", strlen("key"), &outsize);
205 | 	assert(value);
206 | 	assert(outsize == strlen("value"));
207 | 	assert(strncmp(value, "value", outsize) == 0);
208 | 
209 | 	assert(sparse_dict_free(dict));
210 | 	return 1;
211 | }
212 | 
213 | int test_dict_lots_of_set() {
214 | 	struct sparse_dict *dict = NULL;
215 | 	int i = 0;
216 | 
217 | 	dict = sparse_dict_init();
218 | 	assert(dict);
219 | 
220 | 	const int iterations = 1000000;
221 | 	for (i = 0; i < iterations; i++) {
222 | 		char key[64] = {0};
223 | 		snprintf(key, sizeof(key), "crazy hash%i", i);
224 | 
225 | 		char val[64] = {0};
226 | 		snprintf(val, sizeof(val), "value%i", i);
227 | 
228 | 		assert(sparse_dict_set(dict, key, strlen(key), val, strlen(val)));
229 | 		assert(dict->bucket_count == (unsigned int)(i + 1));
230 | 
231 | 		size_t outsize = 0;
232 | 		const char *retrieved_value = sparse_dict_get(dict, key, strlen(key), &outsize);
233 | 		assert(retrieved_value);
234 | 		assert(outsize == strlen(val));
235 | 		assert(strncmp(retrieved_value, val, outsize) == 0);
236 | 	}
237 | 
238 | 	for (i = iterations - 1; i >= 0; i--) {
239 | 		/* Do they same thing but just retrieve values. */
240 | 		char key[64] = {0};
241 | 		snprintf(key, sizeof(key), "crazy hash%i", i);
242 | 
243 | 		char val[64] = {0};
244 | 		snprintf(val, sizeof(val), "value%i", i);
245 | 
246 | 		size_t outsize = 0;
247 | 		const char *retrieved_value = sparse_dict_get(dict, key, strlen(key), &outsize);
248 | 		assert(retrieved_value);
249 | 		assert(outsize == strlen(val));
250 | 		assert(strncmp(retrieved_value, val, outsize) == 0);
251 | 	}
252 | 
253 | 	assert(sparse_dict_free(dict));
254 | 	return 1;
255 | }
256 | 
257 | int main(int argc, char *argv[]) {
258 | 	(void)argc;
259 | 	(void)argv;
260 | 
261 | 	begin_tests();
262 | 	run_test(test_cannot_set_bigger_elements);
263 | 	run_test(test_cannot_set_outside_bounds);
264 | 	run_test(test_cannot_get_outside_bounds);
265 | 	run_test(test_empty_array_does_not_blow_up);
266 | 	run_test(test_array_set);
267 | 	run_test(test_array_set_backwards);
268 | 	run_test(test_array_set_overwrites_old_values);
269 | 	run_test(test_array_set_high_num);
270 | 	run_test(test_array_get);
271 | 	run_test(test_dict_set);
272 | 	run_test(test_dict_get);
273 | 	run_test(test_dict_lots_of_set);
274 | 	finish_tests();
275 | 
276 | 	return 0;
277 | }
278 | 


--------------------------------------------------------------------------------