├── .gitignore
├── LICENSE
├── README.md
├── bistack_allocator.c
├── buddy_allocator.c
├── bytes_to_cstring.py
├── capsule_triangle_sweep.glsl
├── ccd.c
├── dxbc_input_layout_bytecode_generator.c
├── escape_string.c
├── ffmpeg.sh
├── freelist.c
├── generic_list.c
├── generic_set.c
├── generic_table.c
├── handle_manager.c
├── hash_set.c
├── hash_table.c
├── mpmc_queue.c
├── mpmc_queue.cpp
├── mpsc_queue.c
├── mpsc_queue.cpp
├── normalize_path.c
├── platform_detection.c
├── precise_sleep.c
├── priority_queue.c
├── slab_allocator.c
├── snprintf.c
├── stack_allocator.c
├── string_buffer.c
├── string_set.c
├── string_slab.c
├── string_table.c
├── strtod.c
├── tlsf_allocator.c
├── win32_list_directory.c
├── win32_stacktrace.c
├── win32_thread_queue.c
├── win32_websocket_client.c
├── win32_websocket_server.c
├── x86_cpuid.c
└── x86_rdtsc_seconds.c


/.gitignore:
--------------------------------------------------------------------------------
1 | .vs
2 | bin
3 | *.sln
4 | *.vcxproj*
5 | *.natvis


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <https://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Snippets
2 | 
3 | Collection of short C code snippets for copy-pasting and reference.
4 | 


--------------------------------------------------------------------------------
/bistack_allocator.c:
--------------------------------------------------------------------------------
  1 | #include <stdint.h> // uintptr_t
  2 | #include <string.h> // memcpy - only needed for realloc
  3 | 
  4 | struct allocator {
  5 | 	void *buffer;
  6 | 	int capacity;
  7 | 	int lcursor;
  8 | 	int rcursor;
  9 | };
 10 | 
 11 | void *allocate_left(struct allocator *allocator, int size, int alignment) {
 12 | 	uintptr_t mask = (uintptr_t)alignment - 1; // Alignment must be a power of 2.
 13 | 	uintptr_t unaligned = (uintptr_t)allocator->buffer + allocator->lcursor;
 14 | 	uintptr_t aligned = (unaligned + mask) & ~mask;
 15 | 	int new_lcursor = allocator->lcursor + size + (int)(aligned - unaligned);
 16 | 	if (new_lcursor >= allocator->capacity - allocator->rcursor)
 17 | 		return 0;
 18 | 
 19 | 	allocator->lcursor = new_lcursor;
 20 | 	return (void *)aligned;
 21 | }
 22 | 
 23 | void *allocate_right(struct allocator *allocator, int size, int alignment) {
 24 | 	uintptr_t mask = (uintptr_t)alignment - 1; // Alignment must be a power of 2.
 25 | 	uintptr_t unaligned = (uintptr_t)allocator->buffer + allocator->capacity - allocator->rcursor - size - alignment;
 26 | 	uintptr_t aligned = (unaligned + alignment) & ~mask;
 27 | 	int new_rcursor = allocator->rcursor + size + (int)(unaligned + alignment - aligned);
 28 | 	if (allocator->lcursor >= allocator->capacity - new_rcursor)
 29 | 		return 0;
 30 | 
 31 | 	allocator->rcursor = new_rcursor;
 32 | 	return (void *)aligned;
 33 | }
 34 | 
 35 | void deallocate_left(struct allocator *allocator, void *block, int size) {
 36 | 	if ((char *)block + size == (char *)allocator->buffer + allocator->lcursor)
 37 | 		allocator->lcursor -= size;
 38 | }
 39 | 
 40 | void deallocate_right(struct allocator *allocator, void *block, int size) {
 41 | 	if (block == (char *)allocator->buffer + allocator->capacity - allocator->rcursor)
 42 | 		allocator->rcursor -= size;
 43 | }
 44 | 
 45 | void *reallocate_left(struct allocator *allocator, void *block, int old_size, int new_size, int alignment) {
 46 | 	uintptr_t mask = (uintptr_t)alignment - 1;
 47 | 	if ((char *)block + old_size == (char *)allocator->buffer + allocator->lcursor && ((uintptr_t)block & mask) == 0) {
 48 | 		int new_lcursor = allocator->lcursor + new_size - old_size;
 49 | 		if (new_lcursor >= allocator->capacity - allocator->rcursor)
 50 | 			return 0;
 51 | 		allocator->lcursor = new_lcursor;
 52 | 		return block;
 53 | 	}
 54 | 
 55 | 	void *result = allocate_left(allocator, new_size, alignment);
 56 | 	if (result) {
 57 | 		int to_copy = new_size < old_size ? new_size : old_size;
 58 | 		memcpy(result, block, (size_t)to_copy);
 59 | 	}
 60 | 	return result;
 61 | }
 62 | 
 63 | void *reallocate_right(struct allocator *allocator, void *block, int old_size, int new_size, int alignment) {
 64 | 	uintptr_t mask = (uintptr_t)alignment - 1;
 65 | 	if (block == (char *)allocator->buffer + allocator->capacity - allocator->rcursor && ((uintptr_t)block & mask) == 0) {
 66 | 		int new_rcursor = allocator->rcursor + new_size - old_size;
 67 | 		if (allocator->lcursor >= allocator->capacity - new_rcursor)
 68 | 			return 0;
 69 | 		allocator->rcursor = new_rcursor;
 70 | 		return block;
 71 | 	}
 72 | 
 73 | 	void *result = allocate_right(allocator, new_size, alignment);
 74 | 	if (result) {
 75 | 		int to_copy = new_size < old_size ? new_size : old_size;
 76 | 		memcpy(result, block, (size_t)to_copy);
 77 | 	}
 78 | 	return result;
 79 | }
 80 | 
 81 | #include <assert.h>
 82 | int main(void) {
 83 | 	{
 84 | 		struct allocator allocator = { 0 };
 85 | 		assert(!allocate_left(&allocator, 1, 1));
 86 | 		assert(!allocate_right(&allocator, 1, 1));
 87 | 		assert(!reallocate_left(&allocator, 0, 0, 1, 1));
 88 | 		assert(!reallocate_right(&allocator, 0, 0, 1, 1));
 89 | 		deallocate_left(&allocator, NULL, 0);
 90 | 		deallocate_right(&allocator, NULL, 0);
 91 | 	}
 92 | 
 93 | 	{
 94 | 		_Alignas(16) char buffer[17];
 95 | 		struct allocator allocator = { .buffer = buffer, .capacity = sizeof buffer };
 96 | 
 97 | 		char *c = allocate_left(&allocator, sizeof(char), _Alignof(char));
 98 | 		short *s = allocate_left(&allocator, sizeof(short), _Alignof(short));
 99 | 		int *i = allocate_left(&allocator, sizeof(int), _Alignof(int));
100 | 		long long *l = allocate_left(&allocator, sizeof(long long), _Alignof(long long));
101 | 		long long *null = allocate_left(&allocator, sizeof(long long), _Alignof(long long));
102 | 		assert(c && (uintptr_t)c % _Alignof(char) == 0);
103 | 		assert(s && (uintptr_t)s % _Alignof(short) == 0);
104 | 		assert(i && (uintptr_t)i % _Alignof(int) == 0);
105 | 		assert(l && (uintptr_t)l % _Alignof(long long) == 0);
106 | 		assert(!null);
107 | 	}
108 | 
109 | 	{
110 | 		_Alignas(16) char buffer[23];
111 | 		struct allocator allocator = { .buffer = buffer, .capacity = sizeof buffer };
112 | 
113 | 		char *c = allocate_right(&allocator, sizeof(char), _Alignof(char));
114 | 		short *s = allocate_right(&allocator, sizeof(short), _Alignof(short));
115 | 		int *i = allocate_right(&allocator, sizeof(int), _Alignof(int));
116 | 		long long *l = allocate_right(&allocator, sizeof(long long), _Alignof(long long));
117 | 		long long *null = allocate_right(&allocator, sizeof(long long), _Alignof(long long));
118 | 		assert(c && (uintptr_t)c % _Alignof(char) == 0);
119 | 		assert(s && (uintptr_t)s % _Alignof(short) == 0);
120 | 		assert(i && (uintptr_t)i % _Alignof(int) == 0);
121 | 		assert(l && (uintptr_t)l % _Alignof(long long) == 0);
122 | 		assert(!null);
123 | 	}
124 | 
125 | 	{
126 | 		_Alignas(16) char buffer[40];
127 | 		struct allocator allocator = { .buffer = buffer, .capacity = sizeof buffer };
128 | 
129 | 		char *lc = allocate_left(&allocator, sizeof(char), _Alignof(char));
130 | 		char *rc = allocate_right(&allocator, sizeof(char), _Alignof(char));
131 | 		short *ls = allocate_left(&allocator, sizeof(short), _Alignof(short));
132 | 		short *rs = allocate_right(&allocator, sizeof(short), _Alignof(short));
133 | 		int *li = allocate_left(&allocator, sizeof(int), _Alignof(int));
134 | 		int *ri = allocate_right(&allocator, sizeof(int), _Alignof(int));
135 | 		long long *ll = allocate_left(&allocator, sizeof(long long), _Alignof(long long));
136 | 		long long *rl = allocate_right(&allocator, sizeof(long long), _Alignof(long long));
137 | 		long long *lnull = allocate_left(&allocator, sizeof(long long), _Alignof(long long));
138 | 		long long *rnull = allocate_right(&allocator, sizeof(long long), _Alignof(long long));
139 | 		assert(lc && (uintptr_t)lc % _Alignof(char) == 0);
140 | 		assert(rc && (uintptr_t)rc % _Alignof(char) == 0);
141 | 		assert(ls && (uintptr_t)ls % _Alignof(short) == 0);
142 | 		assert(rs && (uintptr_t)rs % _Alignof(short) == 0);
143 | 		assert(li && (uintptr_t)li % _Alignof(int) == 0);
144 | 		assert(ri && (uintptr_t)ri % _Alignof(int) == 0);
145 | 		assert(ll && (uintptr_t)ll % _Alignof(long long) == 0);
146 | 		assert(rl && (uintptr_t)rl % _Alignof(long long) == 0);
147 | 		assert(!lnull);
148 | 		assert(!rnull);
149 | 	}
150 | 
151 | 	{
152 | 		char buffer[3];
153 | 		struct allocator allocator = { .buffer = buffer, .capacity = sizeof buffer };
154 | 		char *l = allocate_left(&allocator, 1, 1);
155 | 		char *r = allocate_right(&allocator, 1, 1);
156 | 		assert(l && r && l != r);
157 | 	}
158 | 
159 | 	{
160 | 		_Alignas(8) char buffer[17];
161 | 		struct allocator allocator = { .buffer = buffer + 1, .capacity = sizeof buffer - 1 };
162 | 		char *c = reallocate_left(&allocator, NULL, 0, sizeof(char), _Alignof(char));
163 | 		assert(c && (uintptr_t)c % _Alignof(char) == 0);
164 | 		short *s = reallocate_left(&allocator, c, sizeof(char), sizeof(short), _Alignof(short));
165 | 		assert(s && (uintptr_t)s % _Alignof(short) == 0);
166 | 		int *i = reallocate_left(&allocator, s, sizeof(short), sizeof(int), _Alignof(int));
167 | 		assert(i && (uintptr_t)i % _Alignof(int) == 0);
168 | 		long *l = reallocate_left(&allocator, i, sizeof(int), sizeof(long long), _Alignof(long long));
169 | 		assert(l && (uintptr_t)l % _Alignof(long long) == 0);
170 | 		int mark = allocator.lcursor;
171 | 		deallocate_left(&allocator, l, sizeof(long long));
172 | 		assert(mark - allocator.lcursor >= sizeof(long long));
173 | 	}
174 | 
175 | 	{
176 | 		_Alignas(8) char buffer[32];
177 | 		struct allocator allocator = { .buffer = buffer, .capacity = sizeof buffer };
178 | 		char *c = reallocate_right(&allocator, NULL, 0, sizeof(char), _Alignof(char));
179 | 		assert(c && (uintptr_t)c % _Alignof(char) == 0);
180 | 		short *s = reallocate_right(&allocator, c, sizeof(char), sizeof(short), _Alignof(short));
181 | 		assert(s && (uintptr_t)s % _Alignof(short) == 0);
182 | 		int *i = reallocate_right(&allocator, s, sizeof(short), sizeof(int), _Alignof(int));
183 | 		assert(i && (uintptr_t)i % _Alignof(int) == 0);
184 | 		long *l = reallocate_right(&allocator, i, sizeof(int), sizeof(long long), _Alignof(long long));
185 | 		assert(l && (uintptr_t)l % _Alignof(long long) == 0);
186 | 		int mark = allocator.rcursor;
187 | 		deallocate_right(&allocator, l, sizeof(long long));
188 | 		assert(mark - allocator.rcursor >= sizeof(long long));
189 | 	}
190 | }


--------------------------------------------------------------------------------
/buddy_allocator.c:
--------------------------------------------------------------------------------
  1 | // O(log N) allocation and deallocation
  2 | // 1/4 memory wasted on average, best fit
  3 | // 2 pointer header, 16/8 byte on 64/32-bit
  4 | // 16/8 byte min allocation on 64/32-bit
  5 | // cannot be expanded at runtime
  6 | 
  7 | #include <stdint.h> // intptr_t
  8 | #include <string.h> // memcpy
  9 | #include <assert.h>
 10 | 
 11 | union node {
 12 | 	struct usednode {
 13 | 		intptr_t free;
 14 | 		intptr_t size;
 15 | 	};
 16 | 	struct freenode {
 17 | 		union node *next;
 18 | 		union node *prev;
 19 | 	};
 20 | };
 21 | 
 22 | struct heap {
 23 | 	void *memory;
 24 | 	int capacity;
 25 | 	union node freelists[32];
 26 | };
 27 | 
 28 | int ceillog2(int x) {
 29 | 	int log2 = 0;
 30 | 	while ((1 << log2) < x)
 31 | 		++log2;
 32 | 	return log2;
 33 | }
 34 | 
 35 | void initialize(struct heap *heap, void *memory, int capacity) {
 36 | 	// capacity must be a power of 2
 37 | 	assert(capacity > 0 && (capacity & (capacity - 1)) == 0);
 38 | 
 39 | 	heap->memory = memory;
 40 | 	heap->capacity = capacity;
 41 | 	for (int i = 0; i < 32; ++i) {
 42 | 		union node *list = &heap->freelists[i];
 43 | 		list->next = list;
 44 | 		list->prev = list;
 45 | 	}
 46 | 
 47 | 	int available = capacity - sizeof(union node);
 48 | 	int log2 = ceillog2(available);
 49 | 	union node *list = &heap->freelists[log2];
 50 | 	union node *node = memory;
 51 | 	list->next = node;
 52 | 	list->prev = node;
 53 | 	node->next = list;
 54 | 	node->prev = list;
 55 | }
 56 | void *allocate(struct heap *heap, int size) {
 57 | 	// you could clamp to 0, or return NULL
 58 | 	assert(size >= 0);
 59 | 
 60 | 	int needed = size + sizeof(union node);
 61 | 	for (int log2 = ceillog2(needed); log2 < 32; ++log2) {
 62 | 		union node *list = &heap->freelists[log2];
 63 | 		if (list->next == list)
 64 | 			continue;
 65 | 
 66 | 		union node *node = list->next;
 67 | 		list->next = node->next;
 68 | 		list->next->prev = list;
 69 | 		assert(node->free);
 70 | 
 71 | 		// split node to smallest size that fits
 72 | 		while ((1 << (log2 - 1)) >= needed) {
 73 | 			--log2;
 74 | 			void *memory = (char *)node + ((intptr_t)1 << log2);
 75 | 			union node *buddy = memory;
 76 | 			list = &heap->freelists[log2];
 77 | 			buddy->next = list->next;
 78 | 			buddy->prev = list;
 79 | 			list->next->prev = buddy;
 80 | 			list->next = buddy;
 81 | 		}
 82 | 
 83 | 		node->free = 0;
 84 | 		node->size = (intptr_t)1 << log2;
 85 | 		return (char *)node + sizeof(union node);
 86 | 	}
 87 | 	return 0;
 88 | }
 89 | void deallocate(struct heap *heap, void *block) {
 90 | 	if (!block)
 91 | 		return;
 92 | 
 93 | 	assert(block >= heap->memory); // block isn't from this heap
 94 | 
 95 | 	void *header = (char *)block - sizeof(union node);
 96 | 	union node *node = header;
 97 | 
 98 | 	assert(!node->free); // double free
 99 | 	assert((char *)node + node->size <= (char *)heap->memory + heap->capacity); // block isn't from this heap.
100 | 
101 | 	// combine neighboring free nodes
102 | 	while (node->size < heap->capacity) {
103 | 		// the buddy node is always just a bitflip away
104 | 		uintptr_t base = (uintptr_t)heap->memory;
105 | 		uintptr_t nodep = (uintptr_t)node - base;
106 | 		uintptr_t buddyp = nodep ^ node->size;
107 | 		union node *buddy = (union node *)(buddyp + base);
108 | 		if (!buddy->free)
109 | 			break;
110 | 
111 | 		buddy->next->prev = buddy->prev;
112 | 		buddy->prev->next = buddy->next;
113 | 
114 | 		intptr_t size = node->size;
115 | 		node = node < buddy ? node : buddy;
116 | 		node->size = 2 * size;
117 | 	}
118 | 
119 | 	int log2 = ceillog2((int)node->size);
120 | 	union node *list = &heap->freelists[log2];
121 | 	node->next = list->next;
122 | 	node->prev = list;
123 | 	list->next->prev = node;
124 | 	list->next = node;
125 | }
126 | void *reallocate(struct heap *heap, void *block, int size) {
127 | 	// you could clamp to 0, or return NULL
128 | 	assert(size >= 0);
129 | 
130 | 	if (!block)
131 | 		return allocate(heap, size);
132 | 	if (!size) {
133 | 		deallocate(heap, block);
134 | 		return 0;
135 | 	}
136 | 
137 | 	assert(block >= heap->memory); // block isn't from this heap
138 | 
139 | 	void *header = (char *)block - sizeof(union node);
140 | 	union node *node = header;
141 | 
142 | 	assert(!node->free); // double free
143 | 	assert((char *)node + node->size <= (char *)heap->memory + heap->capacity); // block isn't from this heap.
144 | 
145 | 	int needed = size + sizeof(union node);
146 | 	if (needed > node->size) {
147 | 		if (needed > heap->capacity)
148 | 			return 0; // allocation doesn't fit in the heap
149 | 
150 | 		// try to merge with neighboring free buddies
151 | 		int oldsize = (int)node->size;
152 | 		for (;;) {
153 | 			// we can only merge with the buddy if we are the "left" buddy
154 | 			uintptr_t base = (uintptr_t)heap->memory;
155 | 			uintptr_t nodep = (uintptr_t)node - base;
156 | 			if (nodep & node->size)
157 | 				break; // we are the "right" buddy so we can't merge
158 | 
159 | 			uintptr_t buddyp = nodep ^ node->size;
160 | 			union node *buddy = (union node *)(buddyp + base);
161 | 			if (!buddy->free)
162 | 				break; // buddy isn't free so we can't merge
163 | 
164 | 			// ok we can merge with this buddy
165 | 			buddy->next->prev = buddy->prev;
166 | 			buddy->prev->next = buddy->next;
167 | 			node->size *= 2;
168 | 
169 | 			if (node->size >= needed)
170 | 				return block;
171 | 		}
172 | 
173 | 		// we couldn't reallocate in-place so undo any growth we've done
174 | 		while (node->size > oldsize) {
175 | 			node->size /= 2;
176 | 
177 | 			void *memory = (char *)node + node->size;
178 | 			union node *buddy = memory;
179 | 			buddy->free = 1;
180 | 			buddy->size = node->size;
181 | 
182 | 			// add buddy back to the freelist
183 | 			int log2 = ceillog2((int)node->size);
184 | 			union node *list = &heap->freelists[log2];
185 | 			buddy->next = list->next;
186 | 			buddy->prev = list;
187 | 			list->next->prev = buddy;
188 | 			list->next = buddy;
189 | 		}
190 | 
191 | 		// make a new allocation and copy the old one
192 | 		void *copy = allocate(heap, size);
193 | 		if (!copy)
194 | 			return 0; // out of memory
195 | 		memcpy(copy, block, (size_t)node->size);
196 | 		deallocate(heap, block);
197 | 		return copy;
198 | 	}
199 | 	else {
200 | 		// split off as many buddies from the node as we can
201 | 		int log2 = ceillog2((int)node->size);
202 | 		while ((1 << (log2 - 1)) >= needed) {
203 | 			--log2;
204 | 			void *memory = (char *)node + ((intptr_t)1 << log2);
205 | 			union node *buddy = memory;
206 | 			union node *list = &heap->freelists[log2];
207 | 			buddy->next = list->next;
208 | 			buddy->prev = list;
209 | 			list->next->prev = buddy;
210 | 			list->next = buddy;
211 | 		}
212 | 		return block;
213 | 	}
214 | }
215 | 
216 | int main(void) {
217 | 	static char memory[1024];
218 | 	struct heap heap;
219 | 	initialize(&heap, memory, sizeof memory);
220 | 
221 | 	char *a = allocate(&heap, 256); memset(a, 1, 256);
222 | 	char *b = allocate(&heap, 256); memset(b, 1, 256);
223 | 	deallocate(&heap, a);
224 | 	char *c = allocate(&heap, 256); memset(c, 1, 256);
225 | 	deallocate(&heap, c);
226 | 	deallocate(&heap, b);
227 | 
228 | 	char *d = allocate(&heap, 0); memset(d, 1, 0);
229 | 	char *e = allocate(&heap, 1); memset(e, 1, 1);
230 | 	char *f = allocate(&heap, 2); memset(f, 1, 2);
231 | 	char *g = allocate(&heap, 3); memset(g, 1, 3);
232 | 	char *h = allocate(&heap, 4); memset(h, 1, 4);
233 | 	char *i = allocate(&heap, 5); memset(i, 1, 5);
234 | 	d = reallocate(&heap, d, 256); memset(d, 1, 256);
235 | 	i = reallocate(&heap, i, 100); memset(i, 1, 100);
236 | 	deallocate(&heap, d);
237 | 	deallocate(&heap, i);
238 | 	deallocate(&heap, e);
239 | 	deallocate(&heap, h);
240 | 	deallocate(&heap, f);
241 | 	deallocate(&heap, g);
242 | }
243 | 


--------------------------------------------------------------------------------
/bytes_to_cstring.py:
--------------------------------------------------------------------------------
 1 | # Convert python byte array into a C string literal.
 2 | # Useful for baking data directly into C executable.
 3 | #
 4 | # The generated C string is NOT 0 terminated by default.
 5 | # If you want 0 termination, append b'\0' at the end of the input.
 6 | #
 7 | # The generated string literal is close to optimal in terms of
 8 | # source code length. It's possible to get it slightly shorter,
 9 | # but not in a way that's portable or doesn't produce compiler warnings.
10 | 
11 | def bytes_to_cstring(name: str, data: bytes, maxwidth: int) -> str:
12 | 	lines = []
13 | 	line = ''
14 | 	prevoct = False
15 | 	ESCAPE = {
16 | 		ord('\a'): '\\a',
17 | 		ord('\a'): '\\a',
18 | 		ord('\b'): '\\b',
19 | 		ord('\f'): '\\f',
20 | 		ord('\n'): '\\n',
21 | 		ord('\r'): '\\r',
22 | 		ord('\t'): '\\t',
23 | 		ord('\v'): '\\v',
24 | 		ord('\\'): '\\\\',
25 | 		ord('\"'): '\\"',
26 | 	}
27 | 	for byte in data:
28 | 		if len(line) > maxwidth:
29 | 			lines.append('\t"'+line+'"')
30 | 			line = ''
31 | 			prevoct = False
32 | 		if byte in ESCAPE:
33 | 			line += ESCAPE[byte]
34 | 			prevoct = False
35 | 		elif ord('0') <= byte <= ord('9'):
36 | 			if prevoct:
37 | 				line += '\\%o' % byte
38 | 			else:
39 | 				line += chr(byte)
40 | 		elif ord(' ') <= byte <= ord('~'):
41 | 			line += chr(byte)
42 | 			prevoct = False
43 | 		else:
44 | 			line += '\\%o' % byte
45 | 			prevoct = True
46 | 	if len(line) > 0:
47 | 		lines.append('\t"'+line+'"')
48 | 	result = ''
49 | 	result += 'static const unsigned char '+name+'['+str(len(data))+'] =\n'
50 | 	result += '\n'.join(lines)+';\n'
51 | 	return result
52 | 
53 | TEST = bytes([55, 138, 87, 147, 13, 123, 230, 172, 237, 133])
54 | print(bytes_to_cstring('TEST', TEST, 80))


--------------------------------------------------------------------------------
/capsule_triangle_sweep.glsl:
--------------------------------------------------------------------------------
  1 | #version 460
  2 | 
  3 | #define EPSILON 1e-5 // Used to test if float is close to 0. Tweak this if you get problems.
  4 | 
  5 | struct Sweep {
  6 | 	float time;  // Non-negative time of first contact.
  7 | 	float depth; // Non-negative penetration depth if objects start initially colliding.
  8 | 	vec3 point;  // Point of first-contact. Only updated when contact occurs.
  9 | 	vec3 normal; // Unit-length collision normal. Only updated when contact occurs.
 10 | };
 11 | 
 12 | // Return whether point P is contained inside 3D region delimited by triangle T0,T1,T2 edges.
 13 | bool pointInsideTriangle(vec3 p, vec3 t0, vec3 t1, vec3 t2) {
 14 | 	// Real-Time Collision Detection: 3.4: Barycentric Coordinates (pages 46-52).
 15 | 	//
 16 | 	// The book also has a subsection dedicated to point inside triangle tests:
 17 | 	// Real-Time Collision Detection: 5.4.2: Testing Point in Triangle (pages 203-206).
 18 | 	// But those tests only work for CCW triangles. This seems to work for either orientation.
 19 | 	vec3 t01 = t1 - t0;
 20 | 	vec3 t02 = t2 - t0;
 21 | 	vec3 t0p = p - t0;
 22 | 	float t01t01 = dot(t01,t01);
 23 | 	float t01t02 = dot(t01,t02);
 24 | 	float t02t02 = dot(t02,t02);
 25 | 	float t0pt01 = dot(t0p,t01);
 26 | 	float t0pt02 = dot(t0p,t02);
 27 | 	float denom = t01t01*t02t02 - t01t02*t01t02;
 28 | 	
 29 | 	// Normally I would have to divide vd,wd by denom to get v,w. But divisions are
 30 | 	// expensive and cause troubles around 0. If denom isn't negative then we don't
 31 | 	// ever need to divide. If in the future it does turn out denom can be negative
 32 | 	// then we can always multiply by denom instead of dividing to keep sign the same.
 33 | 	float vd = t02t02*t0pt01 - t01t02*t0pt02;
 34 | 	float wd = t01t01*t0pt02 - t01t02*t0pt01;
 35 | 	return vd >= 0 && wd >= 0 && vd + wd <= denom;
 36 | }
 37 | // Return whether point P is contained inside 3D region delimited by parallelogram P0,P1,P2 edges.
 38 | bool pointInsideParallelogram(vec3 p, vec3 p0, vec3 p1, vec3 p2) {
 39 | 	// There may be a better way.
 40 | 	// https://math.stackexchange.com/questions/4381852/point-in-parallelogram-in-3d-space
 41 | 	vec3 p3 = p2 + (p1 - p0);
 42 | 	return pointInsideTriangle(p,p0,p1,p2) || pointInsideTriangle(p,p1,p3,p2);
 43 | }
 44 | // Return whether point P is contained inside a triangular prism A0,A1,A2-B0,B1,B2.
 45 | bool pointInsideTriangularPrism(vec3 p, vec3 a0, vec3 a1, vec3 a2, vec3 b0, vec3 b1, vec3 b2) {
 46 | 	vec3 faces[5][3] = { { a0,a1,a2 }, { b0,b2,b1 }, { a0,b0,a1 }, { a1,b1,a2 }, { a2,b2,a0 } };
 47 | 	float sgn = 0;
 48 | 	for (int i = 0; i < faces.length(); i++) {
 49 | 		vec3 p0 = faces[i][0];
 50 | 		vec3 p1 = faces[i][1];
 51 | 		vec3 p2 = faces[i][2];
 52 | 		
 53 | 		// Check which side of plane point is in. If it's always on the same side, it's colliding.
 54 | 		vec3 p01 = p1 - p0;
 55 | 		vec3 p02 = p2 - p0;
 56 | 		vec3 n = cross(p01,p02);
 57 | 		float d = dot(n,p - p0);
 58 | 		if (i == 0) sgn = d;
 59 | 		if (sgn*d <= 0) 
 60 | 			return false;
 61 | 	}
 62 | 	return true;
 63 | }
 64 | // Sweep sphere C,r with velocity Sv against plane N of triangle T0,T1,T2, ignoring edges.
 65 | bool sweepSphereTrianglePlane(inout Sweep sweep, vec3 c, float r, vec3 v, vec3 t0, vec3 t1, vec3 t2, vec3 n) {
 66 | 	// Real-Time Collision Detection 5.5.3: Intersecting Moving Sphere Against Plane (pages 219-223).
 67 | 	float t;
 68 | 	float d = dot(n,c - t0);
 69 | 	float pen = r - d;
 70 | 	if (pen > 0)
 71 | 		t = 0; // Sphere already starts coliding with triangle plane.
 72 | 	else {
 73 | 		// Sphere isn't immediately colliding with the plane. Check if it's moving away.
 74 | 		float denom = dot(n,v);
 75 | 		if (denom >= 0)
 76 | 			return false; // Sphere is moving away from plane.
 77 | 		
 78 | 		// Sphere will collide with plane at some point.
 79 | 		t = (r - d)/denom;
 80 | 		pen = 0;
 81 | 	}
 82 | 	
 83 | 	// If sphere misses entire triangle plane, then it definitely misses the triangle too.
 84 | 	if (t >= sweep.time)
 85 | 		return false;
 86 | 	
 87 | 	// Is the plane collision point inside the triangle?
 88 | 	// Real-Time Collision Detection: 5.4.2: Testing Point in Triangle (pg 203-206).
 89 | 	vec3 collision = c + t*v - r*n;
 90 | 	if (!pointInsideTriangle(collision,t0,t1,t2))
 91 | 		return false;
 92 | 		
 93 | 	// Plane collision point is inside the triangle. So the sphere collides with the triangle.
 94 | 	sweep.time = t;
 95 | 	sweep.depth = pen;
 96 | 	sweep.point = collision;
 97 | 	sweep.normal = n;
 98 | 	return true;
 99 | }
100 | // Sweep sphere C,r with velocity V against plane N of parallelogram P0,P1,P2 ignoring edges.
101 | bool sweepSphereParallelogramPlane(inout Sweep sweep, vec3 c, float r, vec3 v, vec3 p0, vec3 p1, vec3 p2, vec3 n) {
102 | 	// Real-Time Collision Detection 5.5.3: Intersecting Moving Sphere Against Plane (pages 219-223).
103 | 	float t;
104 | 	float d = dot(c,n - p0);
105 | 	float pen = r - d;
106 | 	if (pen > 0)
107 | 		t = 0; // Sphere already starts coliding with the quad plane.
108 | 	else {
109 | 		// Sphere isn't immediately colliding with the plane. Check if it's moving away.
110 | 		float denom = dot(n,v);
111 | 		if (denom >= 0)
112 | 			return false; // Sphere is moving away from plane.
113 | 		
114 | 		// Sphere will collide with plane at some point.
115 | 		t = (r - d)/denom;
116 | 		pen = 0;
117 | 	}
118 | 	
119 | 	// If sphere misses entire quad plane, then it definitely misses the quad too.
120 | 	if (t >= sweep.time)
121 | 		return false;
122 | 	
123 | 	// Is the plane collision point inside the quad?
124 | 	// Real-Time Collision Detection: 5.4.2: Testing Point in Triangle (pages 203-206).
125 | 	vec3 collision = c + t*v - r*n;
126 | 	if (!pointInsideParallelogram(collision,p0,p1,p2))
127 | 		return false;
128 | 	
129 | 	// Plane collision point is inside the quad. So the sphere collides with the quad.
130 | 	sweep.time = t;
131 | 	sweep.depth = pen;
132 | 	sweep.point = collision;
133 | 	sweep.normal = n;
134 | 	return true;
135 | }
136 | // Sweep point P with velocity V against sphere S,r.
137 | bool sweepPointSphere(inout Sweep sweep, vec3 p, vec3 v, vec3 s, float r, vec3 fallbackNormal) {
138 | 	// Real-Time Collision Detection 5.3.2: Intersecting Ray or Segment Against Sphere (pages 177-179).
139 | 	
140 | 	// Set up quadratic equation.
141 | 	vec3 d = p - s;
142 | 	float b = dot(d,v);
143 | 	float c = dot(d,d) - r*r;
144 | 	if (c > 0 && b > 0)
145 | 		return false; // Point starts outside (c > 0) and moves away from sphere (b > 0).
146 | 	float a = dot(v,v);
147 | 	float discr = b*b - a*c;
148 | 	if (discr < 0)
149 | 		return false; // Point misses sphere.
150 | 	
151 | 	// Point hits sphere. Compute time of first impact.
152 | 	float t = (-b - sqrt(discr))/a;
153 | 	if (t >= sweep.time)
154 | 		return false;
155 | 	
156 | 	// The sphere is the first thing the point hits so far.
157 | 	t = max(t, 0);
158 | 	vec3 collision = p + t*v;
159 | 	vec3 vec = collision - s;
160 | 	float len = length(vec);
161 | 	sweep.time = t;
162 | 	sweep.depth = t > 0 ? 0 : r - len;
163 | 	sweep.point = collision;
164 | 	sweep.normal = len >= EPSILON ? vec/len : fallbackNormal;
165 | 	return true;
166 | }
167 | // Sweep point P with velocity V against cylinder C0,C1,r, ignoring the endcaps.
168 | bool sweepPointUncappedCylinder(inout Sweep sweep, vec3 p, vec3 v, vec3 c0, vec3 c1, float r, vec3 fallbackNormal) {
169 | 	// Real-Time Collision Detection 5.3.7: Intersecting Ray or Segment Against Cylinder (pages 194-198).
170 | 	
171 | 	// Test if swept point is fully outside of either endcap.
172 | 	vec3 n = c1 - c0;
173 | 	vec3 d = p - c0;
174 | 	float dn = dot(d,n);
175 | 	float vn = dot(v,n);
176 | 	float nn = dot(n,n);
177 | 	if (dn < 0 && dn + vn < 0)
178 | 		return false; // Fully outside c0 end of cylinder.
179 | 	if (dn > nn && dn + vn > nn)
180 | 		return false; // Fully outside c1 end of cylinder.
181 | 	
182 | 	// Set up quadratic equations and check if sweep direction is parallel to cylinder.
183 | 	float t;
184 | 	float vv = dot(v,v);
185 | 	float dv = dot(d,v);
186 | 	float dd = dot(d,d);
187 | 	float a = nn*vv - vn*vn;
188 | 	float c = nn*(dd - r*r) - dn*dn;
189 | 	if (a < EPSILON) {
190 | 		// Sweep direction is parallel to cylinder.
191 | 		if (c > 0)
192 | 			return false; // Point starts outside of cylinder, so it never collides.
193 | 		if (dn < 0)
194 | 			return false; // Point starts outside of c0 endcap.
195 | 		if (dn > nn)
196 | 			return false; // Point starts outside of c1 endcap.
197 | 		t = 0;
198 | 	} else {
199 | 		// Sweep direction is not parallel to cylinder. Solve for time of first contact.
200 | 		float b = nn*dv - vn*dn;
201 | 		float discr = b*b - a*c;
202 | 		if (discr < 0)
203 | 			return false; // Sweep misses cylinder.
204 | 		t = (-b - sqrt(discr))/a;
205 | 	}
206 | 	
207 | 	// Check if the sweep missed, or if it hits but another collision happens sooner.
208 | 	if (t < 0 || t >= sweep.time)
209 | 		return false;
210 | 	
211 | 	// This is the first collision. Find the closest point on the center of the cylinder.
212 | 	vec3 collision = p + t*v;
213 | 	vec3 center;
214 | 	if (nn < EPSILON)
215 | 		center = c0; // The cylinder is actually a circle.
216 | 	else
217 | 		center = c0 + (dot(collision - c0,n)/nn)*n;
218 | 	
219 | 	// Update collision time, depth, and normal.
220 | 	vec3 vec = collision - center;
221 | 	float len = length(vec);
222 | 	float depth = r - len;
223 | 	sweep.time = t;
224 | 	sweep.depth = t > 0 ? 0 : depth;
225 | 	sweep.point = collision;
226 | 	sweep.normal = len >= EPSILON ? vec/len : fallbackNormal;
227 | 	return true;
228 | }
229 | 
230 | // Sweep a capsule C0,C1,Cr with velocity Cv against the triangle T0,T1,T2.
231 | //   c0,c1      capsule line segment endpoints
232 | //   r          capsule radius
233 | //   v          capsule velocity
234 | //   t0,t1,t2   3 triangle vertices
235 | //   returns    whether the capsule and triangle intersect
236 | bool sweepCapsuleTriangle(inout Sweep s, vec3 c0, vec3 c1, float r, vec3 v, vec3 t0, vec3 t1, vec3 t2) {
237 | 	// Compute triangle plane equation.
238 | 	vec3 t01 = t1 - t0;
239 | 	vec3 t02 = t2 - t0;
240 | 	vec3 normal = normalize(cross(t01,t02));
241 | 	
242 | 	// Extrude triangle along capsule direction.
243 | 	vec3 c01 = c1 - c0;
244 | 	vec3 a0 = t0;
245 | 	vec3 a1 = t1;
246 | 	vec3 a2 = t2;
247 | 	vec3 b0 = t0 - c01;
248 | 	vec3 b1 = t1 - c01;
249 | 	vec3 b2 = t2 - c01;
250 | 	
251 | 	// Test for initial collision with the extruded triangle prism.
252 | 	if (pointInsideTriangularPrism(c0,a0,a1,a2,b0,b1,b2)) {
253 | 		// Capsule starts off penetrating triangle. Push it out from the triangle plane.
254 | 		float d0 = dot(normal,c0 - t0);
255 | 		float d1 = dot(normal,c1 - t0);
256 | 		float d = abs(d0) <= abs(d1) ? d0 : d1;
257 | 		vec3 n = d >= 0 ? normal : -normal;
258 | 		s.time = 0;
259 | 		s.depth = abs(d) + r;
260 | 		s.normal = n;
261 | 		s.point = c0 + d0*normal;
262 | 		return true;
263 | 	}
264 | 	
265 | 	// Decompose capsule triangle sweep into: 2 sphere-triangle + 3 sphere-parallelogram + 9 point-cylinder + 6 point-sphere sweeps.
266 | 	bool hit = false;
267 | 	vec3 triangles[2][3] = {{a0,a1,a2}, {b0,b1,b2}};
268 | 	vec3 parallelograms[3][3] = {{a0,a1,b0}, {a1,a2,b1}, {a2,a0,b2}};
269 | 	vec3 cylinders[9][2] = {{a0,a1}, {a1,a2}, {a2,a0}, {b0,b1}, {b1,b2}, {b2,b0}, {a0,b0}, {a1,b1}, {a2,b2}};
270 | 	vec3 spheres[6] = {a0, a1, a2, b0, b1, b2};
271 | 	
272 | 	// Do sphere-triangle sweeps.
273 | 	vec3 triangleNormals[2];
274 | 	for (int i = 0; i < triangles.length(); i++) {
275 | 		vec3 p0 = triangles[i][0];
276 | 		vec3 p1 = triangles[i][1];
277 | 		vec3 p2 = triangles[i][2];
278 | 		
279 | 		// Compute triangle plane normal.
280 | 		vec3 n = normal;
281 | 		if (dot(n,c0 - p0) < 0) n = -n; // Orient towards sphere.
282 | 		triangleNormals[i] = n;
283 | 		
284 | 		// Test for triangle-plane sphere intersection.
285 | 		hit = hit || sweepSphereTrianglePlane(s,c0,r,v,p0,p1,p2,n);
286 | 	}
287 | 	
288 | 	// Do sphere-parallelogram sweeps.
289 | 	vec3 parallelogramNormals[3];
290 | 	for (int i = 0; i < parallelograms.length(); i++) {
291 | 		vec3 p0 = parallelograms[i][0];
292 | 		vec3 p1 = parallelograms[i][1];
293 | 		vec3 p2 = parallelograms[i][2];
294 | 		
295 | 		// Check if quad is degenerate. Happens when triangle edge completely parallel to capsule.
296 | 		vec3 p01 = p1 - p0;
297 | 		vec3 p02 = p2 - p0;
298 | 		vec3 c = cross(p01,p02);
299 | 		float len = length(c);
300 | 		if (len > EPSILON) {
301 | 			// Compute quad plane equation.
302 | 			vec3 n = c/len;
303 | 			if (dot(n,c0 - p0) < 0) n = -n; // Orient towards sphere.
304 | 			parallelogramNormals[i] = n;
305 | 			
306 | 			// Do the sweep test.
307 | 			hit = hit || sweepSphereParallelogramPlane(s,c0,r,v,p0,p1,p2,n);
308 | 		}
309 | 		else parallelogramNormals[i] = triangleNormals[0];
310 | 	}
311 | 	
312 | 	// Do point-cylinder sweeps.
313 | 	for (int i = 0; i < cylinders.length(); i++) {
314 | 		vec3 p0 = cylinders[i][0];
315 | 		vec3 p1 = cylinders[i][1];
316 | 		vec3 n;
317 | 		if (i < 6)
318 | 			n = triangleNormals[i/3];
319 | 		else
320 | 			n = parallelogramNormals[i - 6];
321 | 		hit = hit || sweepPointUncappedCylinder(s,c0,v,p0,p1,r,n);
322 | 	}
323 | 	
324 | 	// Do point-sphere sweeps.
325 | 	for (int i = 0; i < spheres.length(); i++) {
326 | 		vec3 c = spheres[i];
327 | 		vec3 n = triangleNormals[i/3];
328 | 		hit = hit || sweepPointSphere(s,c0,v,c,r,n);
329 | 	}
330 | 	
331 | 	return hit;
332 | }
333 | 
334 | // Move a capsule and resolve any triangle collisions encountered along the way.
335 | //   p         - capsule base position
336 | //   v         - capsule velocity
337 | //   h         - capsule height
338 | //   r         - capsule radius
339 | //   dt        - time-step length
340 | //   triangles - list of triangles to collide with
341 | void resolveCollisions(inout vec3 p, inout vec3 v, float h, float r, float dt, vec3 triangles[999][3]) {
342 | 	// Store the leftover movement in this vector.
343 | 	vec3 u = dt*v;
344 | 
345 | 	// Move and resolve collisions while there is still motion. But cap max iterations to ensure simulation terminates.
346 | 	const int MAX_ITER = 16;
347 | 	for (int iter = 0; iter < MAX_ITER && dot(u,u) > 0; iter++) {
348 | 		// Compute capsule endpoints.
349 | 		vec3 c0 = p;
350 | 		vec3 c1 = p;
351 | 		c0.y += r;
352 | 		c1.y += h - r;
353 | 		
354 | 		// Perform the sweep test against all triangles.
355 | 		Sweep s;
356 | 		s.time = 1;
357 | 		for (int i = 0; i < triangles.length(); i++) {
358 | 			vec3 t0 = triangles[i][0];
359 | 			vec3 t1 = triangles[i][1];
360 | 			vec3 t2 = triangles[i][2];
361 | 			sweepCapsuleTriangle(s, c0, c1, r, u, t0, t1, t2);
362 | 		}
363 | 
364 | 		// Stop objects from intersecting.
365 | 		if (s.depth > 0)
366 | 			p += (s.depth + EPSILON)*s.normal;
367 | 
368 | 		// Advance the cylinder until the first contact time.
369 | 		vec3 dp = s.time*u;
370 | 		p += dp;
371 | 
372 | 		// If there were no collisions, entire motion is complete and we can terminate early.
373 | 		if (s.time >= 1)
374 | 			break;
375 | 
376 | 		// Cancel out motion parallel to the normal. This causes capsule to slide along surface.
377 | 		u -= dp;
378 | 		u += dot(u,s.normal)*s.normal;
379 | 		v += dot(v,s.normal)*s.normal;
380 | 
381 | 		// Nudge the position and velocity slightly away from surface to avoid another collision.
382 | 		vec3 offset = EPSILON*s.normal;
383 | 		p += offset;
384 | 		v += offset;
385 | 		u += offset;
386 | 	}
387 | }
388 | 


--------------------------------------------------------------------------------
/ccd.c:
--------------------------------------------------------------------------------
  1 | // 2D continuous collision detection tests.
  2 | // 
  3 | // These are very similar to ray tracing routines, but they're used for
  4 | // collision detection in 2D. Instead of moving the object and then testing
  5 | // and correcting for collisions after the fact, you can use these routines
  6 | // to get the exact time point when the collision will occur and stop right
  7 | // before.
  8 | // 
  9 | // Here is an example loop you can use for moving a circular player in a
 10 | // world full of rectangle colliders:
 11 | //
 12 | // for (int iter = 0; iter < MAX_ITER && (player.vx != 0 || player.vy != 0); iter++) {
 13 | // 	Hit nearest = { 1 };
 14 | // 	for (int i = 0; i < numColliders; i++) {
 15 | // 		Hit hit = circleRect(player.x, player.y, player.radius, collider[i].x, collider[i].y, collider[i].rx, collider[i].ry);
 16 | // 		if (hit.t < nearest.t) nearest = hit;
 17 | // 	}
 18 | // 	player.x += player.vx * nearest.t;
 19 | // 	player.y += player.vy * nearest.t;
 20 | // 	player.vx *= (1 - nearest.t);
 21 | // 	player.vy *= (1 - nearest.t);
 22 | // 	if (nearest.t < 1) {
 23 | // 		float dot = player.vx * nearest.nx + player.vy * nearest.ny;
 24 | // 		player.vx -= hit.nx * dot;
 25 | // 		player.vy -= hit.ny * dot;
 26 | // 		player.x += hit.nx * EPSILON;
 27 | // 		player.y += hit.ny * EPSILON;
 28 | // 	}
 29 | // }
 30 | 
 31 | #include <math.h>
 32 | 
 33 | typedef struct Hit {
 34 | 	float t; // Time of collision. 0 <= t < 1. If no collision: t >= 1.
 35 | 	float nx; // Collision normal. 0 if no collision.
 36 | 	float ny;
 37 | } Hit;
 38 | 
 39 | // Moving point vs stationary circle.
 40 | // x,y = point starting position
 41 | // vx,vy = point velocity
 42 | // cx,cy = circle center position
 43 | // r = circle radius
 44 | Hit pointCircle(float x, float y, float vx, float vy, float cx, float cy, float r) {
 45 | 	Hit hit = { 1 };
 46 | 
 47 | 	// First, check if the ray starts inside of the circle already.
 48 | 	float dx = x - cx;
 49 | 	float dy = y - cy;
 50 | 	float d2 = dx * dx + dy * dy;
 51 | 	float r2 = r * r;
 52 | 	if (d2 < r2) { // Ray already starts inside of circle and collides immediately.
 53 | 		hit.t = 0;
 54 | 		float d = sqrtf(d2);
 55 | 		if (d > 0) {
 56 | 			hit.nx = dx / d;
 57 | 			hit.ny = dy / d;
 58 | 		}
 59 | 		else { // Ray is directly at circle center. Normal is arbitrary.
 60 | 			hit.nx = 1;
 61 | 			hit.ny = 0;
 62 | 		}
 63 | 		return hit;
 64 | 	}
 65 | 
 66 | 	// Now solve quadratic to find the intersection points and get the closest one.
 67 | 	float a = vx * vx + vy * vy;
 68 | 	float b = vx * dx + vy * dy;
 69 | 	float c = d2 - r2;
 70 | 	float disc = b * b - a * c;
 71 | 	float root = sqrtf(b * b - a * c);
 72 | 	float t0 = (-b - root) / a;
 73 | 	float t1 = (-b + root) / a;
 74 | 	float t = t0 >= 0 ? t0 : t1;
 75 | 	if (!(0 <= t && t < 1)) return hit; // No hit. Relies on IEEE NaN behavior.
 76 | 
 77 | 	hit.t = t;
 78 | 	hit.nx = (dx + vx * t) / r;
 79 | 	hit.ny = (dy + vy * t) / r;
 80 | 	return hit;
 81 | }
 82 | 
 83 | // Moving point vs stationary rectangle.
 84 | // x,y = point starting position
 85 | // vx,vy = point velocity
 86 | // cx,cy = rectangle center position
 87 | // rx,ry = rectangle radius (width/2,height/2)
 88 | Hit pointRect(float x, float y, float vx, float vy, float cx, float cy, float rx, float ry) {
 89 | 	Hit hit = { 1 };
 90 | 
 91 | 	// First, check if the point starts inside of the rectangle already.
 92 | 	float dx = x - cx;
 93 | 	float dy = y - cy;
 94 | 	float absx = dx < 0 ? -dx : +dx;
 95 | 	float absy = dy < 0 ? -dy : +dy;
 96 | 	if (absx < rx && absy < ry) {
 97 | 		hit.t = 0;
 98 | 		float penx = rx - absx;
 99 | 		float peny = ry - absy;
100 | 		if (penx <= peny)
101 | 			hit.nx = dx < 0 ? -1.0f : +1.0f;
102 | 		else
103 | 			hit.ny = dy < 0 ? -1.0f : +1.0f;
104 | 		return hit;
105 | 	}
106 | 
107 | 	// Find when collisions with 4 rectangle edges happen.
108 | 	float sx = vx < 0 ? -1.0f : +1.0f;
109 | 	float sy = vy < 0 ? -1.0f : +1.0f;
110 | 	float tx0 = (-sx * rx - dx) / vx;
111 | 	float tx1 = (+sx * rx - dx) / vx;
112 | 	float ty0 = (-sy * ry - dy) / vy;
113 | 	float ty1 = (+sy * ry - dy) / vy;
114 | 
115 | 	// Find time of entry and exit.
116 | 	float tmin = 0;
117 | 	float tmax = INFINITY;
118 | 	tmin = tx0 > tmin ? tx0 : tmin;
119 | 	tmin = ty0 > tmin ? ty0 : tmin;
120 | 	tmax = tx1 < tmax ? tx0 : tmax;
121 | 	tmax = ty1 < tmax ? tx1 : tmax;
122 | 	if (!(tmin < tmax && tmin < 1)) return hit; // No hit.
123 | 
124 | 	hit.t = tmin;
125 | 	if (tx0 >= ty0)
126 | 		hit.nx = -sx;
127 | 	else
128 | 		hit.ny = -sy;
129 | 	return hit;
130 | }
131 | 
132 | // Moving point vs stationary rectangle with rounded corners.
133 | // x,y = point starting position
134 | // vx,vy = point velocity
135 | // cx,cy = rectangle center position
136 | // rx,ry = rectangle radius (width/2, height/2)
137 | // r = rectangle corner radius
138 | Hit pointRoundRect(float x, float y, float vx, float vy, float cx, float cy, float rx, float ry, float r) {
139 | 	// First test against the bounding rect.
140 | 	Hit hit = pointRect(x, y, vx, vy, cx, cy, rx, ry);
141 | 	if (hit.t >= 1) return hit; // No hit.
142 | 
143 | 	// Find where the ray hits the bounding rect.
144 | 	float dx = x - cx;
145 | 	float dy = y - cy;
146 | 	float hx = dx + vx * hit.t;
147 | 	float hy = dy + vy * hit.t;
148 | 
149 | 	// Quadrant correction.
150 | 	float qx = hx < 0 ? -1.0f : +1.0f;
151 | 	float qy = hy < 0 ? -1.0f : +1.0f;
152 | 	hx *= qx;
153 | 	hy *= qy;
154 | 
155 | 	// If ray hits the non-circular part, then we're already done.
156 | 	float circx = rx - r;
157 | 	float circy = ry - r;
158 | 	if (hx <= circx || hy <= circy) return hit;
159 | 
160 | 	// Test against the circular corner. Quadrant correct the hit normal.
161 | 	dx *= qx;
162 | 	dy *= qy;
163 | 	vx *= qx;
164 | 	vy *= qy;
165 | 	hit = pointCircle(dx, dy, vx, vy, circx, circy, r);
166 | 	hit.nx *= qx;
167 | 	hit.ny *= qy;
168 | 	return hit;
169 | }
170 | 
171 | // By taking the Minkowski sum, these other tests can all be implemented using the routines above.
172 | 
173 | Hit rectCircle(float x, float y, float rx, float ry, float vx, float vy, float cx, float cy, float r) {
174 | 	return pointRoundRect(x, y, vx, vy, cx, cy, rx + r, ry + r, r);
175 | }
176 | Hit rectRect(float ax, float ay, float arx, float ary, float vx, float vy, float bx, float by, float brx, float bry) {
177 | 	return pointRect(ax, ay, vx, vy, bx, by, brx + arx, bry + ary);
178 | }
179 | Hit rectRoundRect(float ax, float ay, float arx, float ary, float vx, float vy, float bx, float by, float brx, float bry, float br) {
180 | 	return pointRoundRect(ax, ay, vx, vy, bx, by, brx + arx, bry + ary, br);
181 | }
182 | Hit circleCircle(float ax, float ay, float ar, float vx, float vy, float bx, float by, float br) {
183 | 	return pointCircle(ax, ay, vx, vy, bx, by, br + ar);
184 | }
185 | Hit circleRect(float x, float y, float r, float vx, float vy, float cx, float cy, float rx, float ry) {
186 | 	return pointRoundRect(x, y, vx, vy, cx + r, cy + r, rx, ry, r);
187 | }
188 | Hit circleRoundRect(float x, float y, float r, float vx, float vy, float cx, float cy, float rx, float ry, float br) {
189 | 	return pointRoundRect(x, y, vx, vy, cx, cy, rx + r, ry + r, r + br);
190 | }
191 | 
192 | // A similar strategy can be used to test against 2 moving shapes.
193 | 
194 | Hit pointMovingCircle(float ax, float ay, float avx, float avy, float cx, float cy, float r, float bvx, float bvy) {
195 | 	return pointCircle(ax, ay, avx - bvx, avy - bvy, cx, cy, r);
196 | }
197 | 
198 | int main(void) {
199 | 
200 | }
201 | 


--------------------------------------------------------------------------------
/escape_string.c:
--------------------------------------------------------------------------------
  1 | // escape C string, source and destination cannot overlap, does NOT zero terminate.
  2 | int escape(char* restrict dst, const char* restrict src, int len) {
  3 | 	int cursor = 0;
  4 | 	for (int i = 0; i < len; i++) {
  5 | 		unsigned char c = src[i];
  6 | 		if (c >= ' ' && c <= '~' && c != '"' && c != '\'' && c != '\\')
  7 | 			dst[cursor++] = c;
  8 | 		else {
  9 | 			dst[cursor++] = '\\';
 10 | 			switch (c) {
 11 | 				case '\a': dst[cursor++] = 'a'; break;
 12 | 				case '\b': dst[cursor++] = 'b'; break;
 13 | 				case '\t': dst[cursor++] = 't'; break;
 14 | 				case '\n': dst[cursor++] = 'n'; break;
 15 | 				case '\v': dst[cursor++] = 'v'; break;
 16 | 				case '\f': dst[cursor++] = 'f'; break;
 17 | 				case '\r': dst[cursor++] = 'r'; break;
 18 | 				case '\"': dst[cursor++] = '"'; break;
 19 | 				case '\'': dst[cursor++] = '\''; break;
 20 | 				case '\\': dst[cursor++] = '\\'; break;
 21 | 				default:
 22 | 					dst[cursor++] = 'x';
 23 | 					dst[cursor++] = "0123456789ABCDEF"[(c >> 4) & 0xF];
 24 | 					dst[cursor++] = "0123456789ABCDEF"[(c >> 0) & 0xF];
 25 | 					break;
 26 | 			}
 27 | 		}
 28 | 	}
 29 | 	return cursor;
 30 | }
 31 | 
 32 | // unescape C string, source and destination can overlap, does NOT zero terminate.
 33 | int unescape(char* dst, const char* src, int len) {
 34 | 	int cursor = 0;
 35 | 	for (int i = 0; i < len; i++) {
 36 | 		if (src[i] == '\\' && i < len - 1) {
 37 | 			switch (src[++i]) {
 38 | 				case 'a': dst[cursor++] = '\a'; break;
 39 | 				case 'b': dst[cursor++] = '\b'; break;
 40 | 				case 'e': dst[cursor++] = '\x1B'; break;
 41 | 				case 'f': dst[cursor++] = '\f'; break;
 42 | 				case 'n': dst[cursor++] = '\n'; break;
 43 | 				case 'r': dst[cursor++] = '\r'; break;
 44 | 				case 't': dst[cursor++] = '\t'; break;
 45 | 				case 'v': dst[cursor++] = '\v'; break;
 46 | 				case 'x': {
 47 | 					int one = 0; // track if we have at least one valid hex char.
 48 | 					int hex = 0;
 49 | 					for (; i < len - 1; i++) {
 50 | 						char c = src[i + 1];
 51 | 						int dig;
 52 | 						if (c >= '0' && c <= '9')
 53 | 							dig = c - '0';
 54 | 						else if (c >= 'A' && c <= 'F')
 55 | 							dig = c - 'A' + 10;
 56 | 						else if (c >= 'a' && c <= 'f')
 57 | 							dig = c - 'a' + 10;
 58 | 						else
 59 | 							break;
 60 | 						hex = (hex << 4) | dig;
 61 | 						if (hex > 0xFF)
 62 | 							hex = 0xFF;
 63 | 						one = 1;
 64 | 					}
 65 | 					dst[cursor++] = one ? (char)hex : 'x'; // "\x" without any following hex chars unescapes to "x".
 66 | 				} break;
 67 | 				case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
 68 | 					int oct = src[i] - '0';
 69 | 					int max = i + 2;
 70 | 					if (max > len - 1)
 71 | 						max = len - 1;
 72 | 					for (; i < max; i++) {
 73 | 						unsigned dig = src[i + 1] - '0';
 74 | 						if (dig >= 8)
 75 | 							break;
 76 | 						oct = (oct << 3) | dig;
 77 | 					}
 78 | 					if (oct > 0xFF)
 79 | 						oct = 0xFF;
 80 | 					dst[cursor++] = (char)oct;
 81 | 				} break;
 82 | 				default: // handles \" \' \? \\ and invalid escapes
 83 | 					dst[cursor++] = src[i];
 84 | 					break;
 85 | 			}
 86 | 		}
 87 | 		else dst[cursor++] = src[i];
 88 | 	}
 89 | 	return cursor;
 90 | }
 91 | 
 92 | // === TESTS ===
 93 | 
 94 | #include <assert.h>
 95 | #include <string.h>
 96 | #include <stdbool.h>
 97 | 
 98 | bool escape_equal(const char* string, int string_length, const char* expected, int expected_length) {
 99 | 	char escaped[9999];
100 | 	int escaped_length = escape(escaped, string, string_length);
101 | 	return escaped_length == expected_length && !memcmp(escaped, expected, expected_length);
102 | }
103 | bool unescape_equal(const char* string, int string_length, const char* expected, int expected_length) {
104 | 	char unescaped[9999];
105 | 	int unescaped_length = unescape(unescaped, string, string_length);
106 | 	return unescaped_length == expected_length && !memcmp(unescaped, expected, expected_length);
107 | }
108 | 
109 | int main(void) {
110 | 	// exhaustively test all possible bytes
111 | 	char ascii[256];
112 | 	for (int i = 0; i <= 255; i++)
113 | 		ascii[i] = (char)i;
114 | 	char expected[] =
115 | 		"\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\a\\b\\t\\n\\v\\f\\r\\x0E\\x0F"
116 | 		"\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1A\\x1B\\x1C\\x1D\\x1E\\x1F"
117 | 		" !\\\"#$%&\\'()*+,-./"
118 | 		"0123456789:;<=>?"
119 | 		"@ABCDEFGHIJKLMNO"
120 | 		"PQRSTUVWXYZ[\\\\]^_"
121 | 		"`abcdefghijklmno"
122 | 		"pqrstuvwxyz{|}~\\x7F"
123 | 		"\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8A\\x8B\\x8C\\x8D\\x8E\\x8F"
124 | 		"\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9A\\x9B\\x9C\\x9D\\x9E\\x9F"
125 | 		"\\xA0\\xA1\\xA2\\xA3\\xA4\\xA5\\xA6\\xA7\\xA8\\xA9\\xAA\\xAB\\xAC\\xAD\\xAE\\xAF"
126 | 		"\\xB0\\xB1\\xB2\\xB3\\xB4\\xB5\\xB6\\xB7\\xB8\\xB9\\xBA\\xBB\\xBC\\xBD\\xBE\\xBF"
127 | 		"\\xC0\\xC1\\xC2\\xC3\\xC4\\xC5\\xC6\\xC7\\xC8\\xC9\\xCA\\xCB\\xCC\\xCD\\xCE\\xCF"
128 | 		"\\xD0\\xD1\\xD2\\xD3\\xD4\\xD5\\xD6\\xD7\\xD8\\xD9\\xDA\\xDB\\xDC\\xDD\\xDE\\xDF"
129 | 		"\\xE0\\xE1\\xE2\\xE3\\xE4\\xE5\\xE6\\xE7\\xE8\\xE9\\xEA\\xEB\\xEC\\xED\\xEE\\xEF"
130 | 		"\\xF0\\xF1\\xF2\\xF3\\xF4\\xF5\\xF6\\xF7\\xF8\\xF9\\xFA\\xFB\\xFC\\xFD\\xFE\\xFF";
131 | 	assert(escape_equal(ascii, sizeof ascii, expected, sizeof expected - 1));
132 | 
133 | 	#define test_unescape(string, expected) assert(unescape_equal(string, sizeof(string) - 1, expected, sizeof(expected) - 1))
134 | 	test_unescape("\\a", "\a");
135 | 	test_unescape("\\b", "\b");
136 | 	test_unescape("\\e", "\x1B");
137 | 	test_unescape("\\f", "\f");
138 | 	test_unescape("\\n", "\n");
139 | 	test_unescape("\\r", "\r");
140 | 	test_unescape("\\t", "\t");
141 | 	test_unescape("\\v", "\v");
142 | 	test_unescape("\\\\", "\\");
143 | 	test_unescape("\\\'", "\'");
144 | 	test_unescape("\\\"", "\"");
145 | 	test_unescape("\\?", "?");
146 | 	test_unescape("\\", "\\");
147 | 	test_unescape("\\%", "%");
148 | 	test_unescape("\\0", "\0");
149 | 	test_unescape("\\00", "\0");
150 | 	test_unescape("\\000", "\0");
151 | 	test_unescape("\\0000", "\x00\x30");
152 | 	test_unescape("\\123", "\123");
153 | 	test_unescape("\\777", "\xFF");
154 | 	test_unescape("\\8", "8");
155 | 	test_unescape("\\78", "\7\x38");
156 | 	test_unescape("\\x", "x");
157 | 	test_unescape("\\X", "X");
158 | 	test_unescape("\\x0", "\x0");
159 | 	test_unescape("\\x00", "\x00");
160 | 	test_unescape("\\x000", "\x00");
161 | 	test_unescape("\\x1", "\x1");
162 | 	test_unescape("\\x11", "\x11");
163 | 	test_unescape("\\x111", "\xFF");
164 | 	test_unescape("\\xF", "\xF");
165 | 	test_unescape("\\xFF", "\xFF");
166 | 	test_unescape("\\xFFF", "\xFF");
167 | 	test_unescape("\\x01\\x23\\x45\\x67\\x89", "\x01\x23\x45\x67\x89");
168 | 	test_unescape("\\xAB\\xCD\\xEF", "\xAB\xCD\xEF");
169 | 	test_unescape("\\xab\\xcd\\xef", "\xab\xcd\xef");
170 | 	test_unescape("\\xFG", "\xFG");
171 | 	test_unescape("\\xfg", "\xfg");
172 | 	test_unescape("abcABC123+-( ~{}", "abcABC123+-( ~{}");
173 | 	test_unescape("abc\\", "abc\\");
174 | 	test_unescape("abc\\r\\n\\a\\\\\\123\\xF\\xfa", "abc\r\n\a\\\123\xF\xfa");
175 | }


--------------------------------------------------------------------------------
/ffmpeg.sh:
--------------------------------------------------------------------------------
1 | # Convert video to GIF. The "filter_complex" part improves GIF quality.
2 | ffmpeg -y -filter_complex "[0:v] split [a][b];[a] palettegen [p];[b][p] paletteuse" -i input.mp4 output.gif


--------------------------------------------------------------------------------
/freelist.c:
--------------------------------------------------------------------------------
 1 | void *allocate(void **freelist) {
 2 | 	void *result = *freelist;
 3 | 	if (*freelist)
 4 | 		*freelist = **(void ***)freelist;
 5 | 	return result;
 6 | }
 7 | 
 8 | void deallocate(void **freelist, void *item) {
 9 | 	*(void **)item = *freelist;
10 | 	*freelist = item;
11 | }
12 | 
13 | #include <assert.h>
14 | int main(void) {
15 | 	void *items[10];
16 | 	void *list = 0;
17 | 	assert(!allocate(&list));
18 | 	
19 | 	for (int i = 0; i < 10; ++i)
20 | 		deallocate(&list, &items[i]);
21 | 	for (int i = 9; i >= 0; --i) {
22 | 		void **item = allocate(&list);
23 | 		int index = (int)(item - items);
24 | 		assert(index == i);
25 | 	}
26 | 	assert(!allocate(&list));
27 | 	assert(!allocate(&list));
28 | 	
29 | 	for (int i = 0; i < 10; ++i) {
30 | 		deallocate(&list, &items[i]);
31 | 		void **item = allocate(&list);
32 | 		int index = (int)(item - items);
33 | 		assert(index == i);
34 | 		assert(!allocate(&list));
35 | 	}
36 | }


--------------------------------------------------------------------------------
/generic_list.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h> // realloc, free
 2 | 
 3 | #define list(T) T*
 4 | 
 5 | #define reserve(plist, num_items)\
 6 | 	private__reserve((plist), (num_items), sizeof *(*plist))
 7 | 
 8 | #define add(plist, item) do{\
 9 | 	int private__index = count(*(plist));\
10 | 	reserve((plist), private__index + 1);\
11 | 	(*plist)[private__index] = (item);\
12 | 	(plist)[++((int*)(*(plist)))[-1]];\
13 | }while(0)
14 | 
15 | #define pop(plist)\
16 | 	(((int*)(*(plist)))[-1]--, (*(plist))[((int*)(*(plist)))[-1]])
17 | 
18 | #define swap_delete(list, index)do{\
19 | 	if(index < count(list))\
20 | 		(list)[index] = (list)[--((int*)(list))[-1]];\
21 | }while(0)
22 | 
23 | int count(const list(void) list) {
24 | 	return list ? ((int *)list)[-1] : 0;
25 | }
26 | 
27 | int capacity(const list(void) list) {
28 | 	return list ? ((int *)list)[-2] : 0;
29 | }
30 | 
31 | void destroy(list(void) *plist) {
32 | 	free((int *)(*plist) - 4);
33 | 	*plist = NULL;
34 | }
35 | 
36 | static void private__reserve(list(void) *plist, int min_capacity, int item_size) {
37 | 	int cap = capacity(*plist);
38 | 	if (cap < min_capacity) {
39 | 		cap *= 2;
40 | 		if (cap < 64)
41 | 			cap = 64;
42 | 		while (cap < min_capacity)
43 | 			cap *= 2;
44 | 		// Overallocate by 4 ints to keep overall alignment to 16 bytes.
45 | 		int cnt = count(*plist);
46 | 		int *newlist = (int *)realloc(*plist ? (int *)(*plist) - 4 : NULL, cap * item_size + 4 * sizeof(int)) + 4;
47 | 		newlist[-2] = cap;
48 | 		newlist[-1] = cnt;
49 | 		*plist = newlist;
50 | 	}
51 | }
52 | 
53 | #include <assert.h>
54 | int main(void) {
55 | 	{
56 | 		list(int) *ints = NULL;
57 | 		assert(count(ints) == 0);
58 | 		assert(capacity(ints) == 0);
59 | 
60 | 		for (int i = 0; i < 1024; ++i)
61 | 			add(&ints, i);
62 | 		assert(count(ints) == 1024);
63 | 
64 | 		for (int i = 0; i < 1024; ++i)
65 | 			assert(ints[i] == i);
66 | 
67 | 		for (int i = 1023; i >= 0; --i)
68 | 			assert(pop(&ints) == i);
69 | 		assert(count(ints) == 0);
70 | 
71 | 		destroy(&ints);
72 | 		assert(!ints);
73 | 	}
74 | 	
75 | 	{
76 | 		// This shouldn't leak.
77 | 		for (int i = 0; i < 100000; ++i) {
78 | 			list(int) *ints = NULL;
79 | 			for (int j = 0; j < 10000; ++j)
80 | 				add(&ints, j);
81 | 			destroy(&ints);
82 | 		}
83 | 	}
84 | }


--------------------------------------------------------------------------------
/handle_manager.c:
--------------------------------------------------------------------------------
  1 | struct manager {
  2 | 	void *items; // items[0] is a reserved sentinel.
  3 | 	struct metadata *metadata;
  4 | 	unsigned short freelist;
  5 | 	unsigned short num_items;
  6 | 	unsigned short item_size;
  7 | };
  8 | 
  9 | struct metadata {
 10 | 	unsigned short generation;
 11 | 	unsigned short prev;
 12 | 	unsigned short next;
 13 | };
 14 | 
 15 | union handle {
 16 | 	unsigned value;
 17 | 	struct { unsigned short index, generation; } fields;
 18 | };
 19 | 
 20 | struct manager create(void *items, struct metadata *metadata, unsigned num_items, unsigned item_size) {
 21 | 	metadata[0].prev = 0;
 22 | 	metadata[0].next = 0;
 23 | 	metadata[0].generation = 0;
 24 | 	for (unsigned i = 1; i < num_items - 1; ++i) {
 25 | 		metadata[i].prev = 0;
 26 | 		metadata[i].next = (unsigned short)(i + 1);
 27 | 		metadata[i].generation = 0;
 28 | 	}
 29 | 	metadata[num_items - 1].prev = 0;
 30 | 	metadata[num_items - 1].next = 0;
 31 | 	metadata[num_items - 1].generation = 0;
 32 | 	
 33 | 	return (struct manager) {
 34 | 		.items = items,
 35 | 		.metadata = metadata,
 36 | 		.freelist = 1,
 37 | 		.num_items = (unsigned short)num_items,
 38 | 		.item_size = (unsigned short)item_size,
 39 | 	};
 40 | }
 41 | 
 42 | union handle allocate(struct manager *manager) {
 43 | 	unsigned short index = manager->freelist;
 44 | 	if (index)
 45 | 		manager->freelist = manager->metadata[index].next;
 46 | 	
 47 | 	manager->metadata[index].prev = manager->metadata[0].prev;
 48 | 	manager->metadata[index].next = 0;
 49 | 	manager->metadata[manager->metadata[0].prev].next = index;
 50 | 	manager->metadata[0].prev = index;
 51 | 	
 52 | 	return (union handle) { .fields = { 
 53 | 		.index = (unsigned short)index, 
 54 | 		.generation = manager->metadata[index].generation 
 55 | 	}};
 56 | }
 57 | 
 58 | void deallocate(struct manager *manager, union handle handle) {
 59 | 	unsigned short index = handle.fields.index;
 60 | 	if (!handle.value)
 61 | 		return;
 62 | 	if (index >= manager->num_items || handle.fields.generation != manager->metadata[index].generation)
 63 | 		return; // Handle is invalid.
 64 | 
 65 | 	unsigned short next = manager->metadata[index].next;
 66 | 	unsigned short prev = manager->metadata[index].prev;
 67 | 	manager->metadata[prev].next = next;
 68 | 	manager->metadata[next].prev = prev;
 69 | 	++manager->metadata[index].generation;
 70 | 	manager->metadata[index].next = (unsigned short)manager->freelist;
 71 | 	manager->freelist = index;
 72 | }
 73 | 
 74 | int is_valid(struct manager manager, union handle handle) {
 75 | 	unsigned index = handle.fields.index;
 76 | 	return index < manager.num_items && handle.fields.generation == manager.metadata[index].generation;
 77 | }
 78 | 
 79 | void *get_item_from_handle(struct manager manager, union handle handle) {
 80 | 	unsigned index = handle.fields.index;
 81 | 	if (index >= manager.num_items || handle.fields.generation != manager.metadata[index].generation)
 82 | 		index = 0; // Handle is invalid.
 83 | 
 84 | 	return (char *)manager.items + index * manager.item_size;
 85 | }
 86 | 
 87 | #include <assert.h>
 88 | int main(void) {
 89 | 	int items[10] = { -999, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
 90 | 	struct metadata metadata[10];
 91 | 	struct manager manager = create(items, metadata, 10, sizeof items[0]);
 92 | 	union handle handles[10];
 93 | 
 94 | 	for (int i = 1; i < 10; ++i) {
 95 | 		handles[i] = allocate(&manager);
 96 | 		assert(is_valid(manager, handles[i]));
 97 | 		int *item = get_item_from_handle(manager, handles[i]);
 98 | 		assert(*item == items[i]);
 99 | 	}
100 | 	
101 | 	for (int i = 1; i < 10; ++i) {
102 | 		assert(is_valid(manager, handles[i]));
103 | 		deallocate(&manager, handles[i]);
104 | 		assert(!is_valid(manager, handles[i]));
105 | 	}
106 | 	
107 | 	for (int i = 0; i < 10; ++i) {
108 | 		union handle handle = allocate(&manager);
109 | 		assert(is_valid(manager, handle));
110 | 		int item = *(int *)get_item_from_handle(manager, handle);
111 | 		deallocate(&manager, handle);
112 | 		assert(!is_valid(manager, handle));
113 | 		union handle new_handle = allocate(&manager);
114 | 		assert(!is_valid(manager, handle));
115 | 		assert(item == *(int *)get_item_from_handle(manager, new_handle));
116 | 		deallocate(&manager, new_handle);
117 | 	}
118 | 
119 | 	for (int i = 1; i < 10; ++i)
120 | 		handles[i] = allocate(&manager);
121 | 	assert(*(int *)get_item_from_handle(manager, allocate(&manager)) == items[0]);
122 | 	assert(*(int *)get_item_from_handle(manager, allocate(&manager)) == items[0]);
123 | 	assert(*(int *)get_item_from_handle(manager, allocate(&manager)) == items[0]);
124 | 	for (int i = 1; i < 10; ++i)
125 | 		deallocate(&manager, handles[i]);
126 | 
127 | 	for (int i = 1; i < 5; ++i)
128 | 		handles[i] = allocate(&manager);
129 | 
130 | 	for (unsigned index = metadata[0].next, i = 1; index; index = metadata[index].next, ++i) {
131 | 		assert(i < 5);
132 | 		assert(items[index] == (int)i);
133 | 	}
134 | 
135 | 	for (int i = 5; i < 10; ++i)
136 | 		handles[i] = allocate(&manager);
137 | 	for (unsigned index = metadata[0].next, i = 1; index; index = metadata[index].next, ++i) {
138 | 		assert(i < 10);
139 | 		assert(items[index] == (int)i);
140 | 	}
141 | 	for (int i = 1; i < 10; ++i)
142 | 		deallocate(&manager, handles[i]);
143 | 
144 | 	for (unsigned index = metadata[0].next; index; index = metadata[index].next)
145 | 		assert(0);
146 | }


--------------------------------------------------------------------------------
/hash_set.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h> // calloc, free
  2 | 
  3 | // For simplicity and efficiency, this set doesn't actually store the items. 
  4 | // It only stores the item hashes. You'd better have a good hash function, because 
  5 | // if two items happen to hash to the same value you're in big trouble. They will 
  6 | // overwrite each other. In practice, if you have a decent hash function the 
  7 | // likelyhood of this happening is really small with 64-bits hashes. Note that
  8 | // this also means that you cannot iterate over all of the items in the set, since
  9 | // we only store the hashes.
 10 | struct set {
 11 | 	unsigned long long *hashes;
 12 | 	int capacity; // Always a power of 2 or 0.
 13 | 	int count;
 14 | 	int num_tombstones;
 15 | };
 16 | 
 17 | #define TOMBSTONE 1
 18 | 
 19 | void resize(struct set *set, int capacity) {
 20 | 	if (capacity <= set->count)
 21 | 		capacity = set->count + 1;
 22 | 	
 23 | 	int pow2; // Round up capacity to a power of 2.
 24 | 	for (pow2 = 1; (1 << pow2) < capacity; ++pow2);
 25 | 	capacity = (1 << pow2);
 26 | 
 27 | 	unsigned long long *new_hashes = calloc((size_t)capacity, sizeof new_hashes[0]);
 28 | 	unsigned mask = (unsigned)capacity - 1;
 29 | 	for (int i = 0; i < set->capacity; ++i) {
 30 | 		unsigned long long hash = set->hashes[i];
 31 | 		if (hash > TOMBSTONE) {
 32 | 			for (unsigned j = (unsigned)hash & mask;; j = (j + 1) & mask) {
 33 | 				if (!new_hashes[j]) {
 34 | 					new_hashes[j] = hash;
 35 | 					break;
 36 | 				}
 37 | 			}
 38 | 		}
 39 | 	}
 40 | 
 41 | 	free(set->hashes);
 42 | 	set->hashes = new_hashes;
 43 | 	set->capacity = capacity;
 44 | 	set->num_tombstones = 0;
 45 | }
 46 | 
 47 | void reserve(struct set *set, int min_capacity) {
 48 | 	if (3 * set->capacity < 4 * min_capacity) {
 49 | 		int capacity = 4 * min_capacity / 3;
 50 | 		if (capacity < 64)
 51 | 			capacity = 64;
 52 | 		resize(set, capacity);
 53 | 	}
 54 | }
 55 | 
 56 | void add(struct set *set, unsigned long long hash) {
 57 | 	hash += (hash <= TOMBSTONE) ? 2 : 0;
 58 | 	reserve(set, set->count + 1);
 59 | 	unsigned mask = (unsigned)set->capacity - 1;
 60 | 	unsigned index = (unsigned)-1;
 61 | 	for (unsigned i = (unsigned)hash & mask;; i = (i + 1) & mask) {
 62 | 		if (set->hashes[i] == hash)
 63 | 			return;
 64 | 		if (!set->hashes[i]) {
 65 | 			index = min(index, i);
 66 | 			break;
 67 | 		}
 68 | 		if (set->hashes[i] == TOMBSTONE)
 69 | 			index = min(index, i);
 70 | 	}
 71 | 	if (set->hashes[index] == TOMBSTONE)
 72 | 		--set->num_tombstones;
 73 | 	set->hashes[index] = hash;
 74 | 	set->count++;
 75 | }
 76 | 
 77 | void remove(struct set *set, unsigned long long hash) {
 78 | 	if (!set->count)
 79 | 		return;
 80 | 
 81 | 	hash += (hash <= TOMBSTONE) ? 2 : 0;
 82 | 	unsigned mask = (unsigned)set->capacity - 1;
 83 | 	for (unsigned i = (unsigned)hash & mask; set->hashes[i]; i = (i + 1) & mask) {
 84 | 		if (set->hashes[i] == hash) {
 85 | 			set->hashes[i] = TOMBSTONE;
 86 | 			set->num_tombstones++;
 87 | 			set->count--;
 88 | 			if (8 * set->num_tombstones > set->capacity)
 89 | 				resize(set, set->capacity); // Get rid of tombstones.
 90 | 			return;
 91 | 		}
 92 | 	}
 93 | }
 94 | 
 95 | int contains(struct set set, unsigned long long hash) {
 96 | 	if (!set.count)
 97 | 		return 0;
 98 | 	
 99 | 	hash += (hash <= TOMBSTONE) ? 2 : 0;
100 | 	unsigned mask = (unsigned)set.capacity - 1;
101 | 	for (unsigned i = (unsigned)hash & mask; set.hashes[i]; i = (i + 1) & mask)
102 | 		if (set.hashes[i] == hash)
103 | 			return 1;
104 | 	
105 | 	return 0;
106 | }
107 | 
108 | void destroy(struct set *set) {
109 | 	free(set->hashes);
110 | 	set->capacity = 0;
111 | 	set->count = 0;
112 | 	set->hashes = NULL;
113 | }
114 | 
115 | #include <assert.h>
116 | unsigned long long hash(const char *string) {
117 | 	// FNV-1a https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash
118 | 	unsigned long long hash = 14695981039346656037u;
119 | 	for (int i = 0; string[i]; ++i)
120 | 		hash = (hash ^ string[i]) * 1099511628211u;
121 | 	return hash;
122 | }
123 | int main(void) {
124 | 	{
125 | 		struct set set = { 0 };
126 | 		assert(!contains(set, hash("Hi")));
127 | 		remove(&set, hash("Hi"));
128 | 		destroy(&set);
129 | 	}
130 | 
131 | 	{
132 | 		struct set set = { 0 };
133 | 
134 | 		add(&set, hash("abcd"));
135 | 		add(&set, hash("efgh"));
136 | 		add(&set, hash("ijkl"));
137 | 		add(&set, hash("mnop"));
138 | 		assert(contains(set, hash("abcd")));
139 | 		assert(contains(set, hash("efgh")));
140 | 		assert(contains(set, hash("ijkl")));
141 | 		assert(contains(set, hash("mnop")));
142 | 		assert(!contains(set, hash("qrst")));
143 | 
144 | 		remove(&set, hash("abcd"));
145 | 		assert(!contains(set, hash("abcd")));
146 | 		assert(contains(set, hash("efgh")));
147 | 		assert(contains(set, hash("ijkl")));
148 | 		assert(contains(set, hash("mnop")));
149 | 
150 | 		remove(&set, hash("abcd"));
151 | 		assert(!contains(set, hash("abcd")));
152 | 		assert(contains(set, hash("efgh")));
153 | 		assert(contains(set, hash("ijkl")));
154 | 		assert(contains(set, hash("mnop")));
155 | 
156 | 		remove(&set, hash("efgh"));
157 | 		remove(&set, hash("ijkl"));
158 | 		remove(&set, hash("mnop"));
159 | 		assert(!contains(set, hash("abcd")));
160 | 		assert(!contains(set, hash("efgh")));
161 | 		assert(!contains(set, hash("ijkl")));
162 | 		assert(!contains(set, hash("mnop")));
163 | 
164 | 		destroy(&set);
165 | 	}
166 | 
167 | 	{
168 | 		static unsigned long long items[1048576];
169 | 		int n = sizeof items / sizeof items[0];
170 | 		for (int i = 0; i < n; ++i) {
171 | 			int x = i;
172 | 			char key[8] = { 0 };
173 | 			for (int j = 0; j < 7; ++j) {
174 | 				key[6 - j] = '0' + x % 10;
175 | 				x /= 10;
176 | 			}
177 | 			items[i] = hash(key);
178 | 		}
179 | 
180 | 		struct set set = { 0 };
181 | 		for (int i = 0; i < n; ++i)
182 | 			assert(!contains(set, items[i]));
183 | 		for (int i = 0; i < n; ++i)
184 | 			add(&set, items[i]);
185 | 		for (int i = 0; i < n; ++i)
186 | 			assert(contains(set, items[i]));
187 | 		for (int i = 0; i < n; ++i)
188 | 			add(&set, items[i]);
189 | 		for (int i = 0; i < n; ++i)
190 | 			remove(&set, items[i]);
191 | 		for (int i = 0; i < n; ++i)
192 | 			assert(!contains(set, items[i]));
193 | 		for (int i = 0; i < n; ++i)
194 | 			add(&set, items[i]);
195 | 		for (int i = 0; i < n; ++i)
196 | 			assert(contains(set, items[i]));
197 | 
198 | 		destroy(&set);
199 | 		for (int i = 0; i < n / 2; ++i)
200 | 			add(&set, items[i]);
201 | 		for (int i = n / 2; i < n; ++i)
202 | 			assert(!contains(set, items[i]));
203 | 		for (int i = 0; i < n / 2; ++i)
204 | 			assert(contains(set, items[i]));
205 | 		for (int i = 0; i < n / 4; ++i)
206 | 			remove(&set, items[i]);
207 | 		for (int i = 0; i < n; ++i)
208 | 			assert(contains(set, items[i]) == (i >= n / 4 && i < n / 2));
209 | 
210 | 		for (int i = 0; i < n; ++i)
211 | 			remove(&set, items[i]);
212 | 		assert(set.count == 0);
213 | 		for (int i = 0; i < n; ++i)
214 | 			add(&set, items[i]);
215 | 
216 | 		destroy(&set);
217 | 	}
218 | 
219 | 	{
220 | 		// Potential pathological case: create a bunch of items and then delete them 
221 | 		// to leave tombstones, then lookup each item. If we don't clean tombstones this is O(n^2).
222 | 		struct set set = { 0 };
223 | 		for (unsigned long long i = 2; i <= 1048577; ++i)
224 | 			add(&set, i);
225 | 		//resize(&set, set.count + 1);
226 | 		for (unsigned long long i = 3; i <= 1048577; ++i)
227 | 			remove(&set, i);
228 | 		assert(set.count == 1);
229 | 		for (unsigned long long i = 3; i <= 1048577; ++i)
230 | 			assert(!contains(set, i));
231 | 	}
232 | 
233 | 	{
234 | 		// This shouldn't leak.
235 | 		for (int i = 0; i < 10000; ++i) {
236 | 			struct set set = { 0 };
237 | 			for (int j = 0; j < 10000; ++j) {
238 | 				char item[5] = { 0 };
239 | 				int x = j;
240 | 				item[3] = x % 10; x /= 10;
241 | 				item[2] = x % 10; x /= 10;
242 | 				item[1] = x % 10; x /= 10;
243 | 				item[0] = x % 10; x /= 10;
244 | 				add(&set, hash(item));
245 | 			}
246 | 			destroy(&set);
247 | 		}
248 | 	}
249 | }


--------------------------------------------------------------------------------
/hash_table.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h> // malloc, free
  2 | 
  3 | // For simplicity and efficiency, this table doesn't actually store the keys. 
  4 | // It only stores the key hashes. You'd better have a good hash function, because 
  5 | // if two keys happen to hash to the same value you're in big trouble. They will 
  6 | // overwrite each other. In practice, if you have a decent hash function the 
  7 | // likelyhood of this happening is really small with 64-bits hashes.
  8 | struct table {
  9 | 	unsigned long long *hashes;
 10 | 	unsigned long long *values;
 11 | 	int capacity; // Always a power of 2 or 0.
 12 | 	int count;
 13 | 	int num_tombstones;
 14 | };
 15 | 
 16 | #define TOMBSTONE 1
 17 | 
 18 | void resize(struct table *table, int capacity) {
 19 | 	if (capacity <= table->count)
 20 | 		return;
 21 | 	
 22 | 	int pow2; // Round up capacity to a power of 2.
 23 | 	for (pow2 = 1; (1 << pow2) < capacity; ++pow2);
 24 | 	capacity = (1 << pow2);
 25 | 
 26 | 	unsigned long long *new_memory = malloc((size_t)capacity * 2 * sizeof new_memory[0]);
 27 | 	unsigned long long *new_hashes = new_memory;
 28 | 	unsigned long long *new_values = new_hashes + capacity;
 29 | 	for (int i = 0; i < capacity; ++i)
 30 | 		new_hashes[i] = 0;
 31 | 
 32 | 	unsigned mask = (unsigned)capacity - 1;
 33 | 	for (int i = 0; i < table->capacity; ++i) {
 34 | 		unsigned long long hash = table->hashes[i];
 35 | 		if (hash > TOMBSTONE) {
 36 | 			for (unsigned j = (unsigned)hash & mask;; j = (j + 1) & mask) {
 37 | 				if (!new_hashes[j]) {
 38 | 					new_hashes[j] = hash;
 39 | 					new_values[j] = table->values[i];
 40 | 					break;
 41 | 				}
 42 | 			}
 43 | 		}
 44 | 	}
 45 | 
 46 | 	free(table->hashes); // This also frees the values.
 47 | 	table->hashes = new_hashes;
 48 | 	table->values = new_values;
 49 | 	table->capacity = capacity;
 50 | 	table->num_tombstones = 0;
 51 | }
 52 | 
 53 | void reserve(struct table *table, int min_capacity) {
 54 | 	if (3 * table->capacity < 4 * min_capacity) {
 55 | 		int capacity = 4 * min_capacity / 3;
 56 | 		if (capacity < 64)
 57 | 			capacity = 64;
 58 | 		resize(table, capacity);
 59 | 	}
 60 | }
 61 | 
 62 | void add(struct table *table, unsigned long long hash, unsigned long long value) {
 63 | 	hash += (hash <= TOMBSTONE) ? 2 : 0;
 64 | 	reserve(table, table->count + 1);
 65 | 	unsigned mask = (unsigned)table->capacity - 1;
 66 | 	unsigned index = (unsigned)-1;
 67 | 	for (unsigned i = (unsigned)hash & mask;; i = (i + 1) & mask) {
 68 | 		if (table->hashes[i] == hash) {
 69 | 			table->values[i] = value;
 70 | 			return;
 71 | 		}
 72 | 		if (!table->hashes[i]) {
 73 | 			index = min(index, i);
 74 | 			break;
 75 | 		}
 76 | 		if (table->hashes[i] == TOMBSTONE)
 77 | 			index = min(index, i);
 78 | 	}
 79 | 
 80 | 	if (table->hashes[index] == TOMBSTONE)
 81 | 		table->num_tombstones--;
 82 | 	table->hashes[index] = hash;
 83 | 	table->values[index] = value;
 84 | 	table->count++;
 85 | }
 86 | 
 87 | void remove(struct table *table, unsigned long long hash) {
 88 | 	if (!table->count)
 89 | 		return;
 90 | 
 91 | 	hash += (hash <= TOMBSTONE) ? 2 : 0;
 92 | 	unsigned mask = (unsigned)table->capacity - 1;
 93 | 	for (unsigned i = (unsigned)hash & mask; table->hashes[i]; i = (i + 1) & mask) {
 94 | 		if (table->hashes[i] == hash) {
 95 | 			table->hashes[i] = TOMBSTONE;
 96 | 			table->count--;
 97 | 			table->num_tombstones++;
 98 | 			if (8 * table->num_tombstones > table->capacity)
 99 | 				resize(table, table->capacity); // Get rid of tombstones.
100 | 			return;
101 | 		}
102 | 	}
103 | }
104 | 
105 | unsigned long long *get(struct table table, unsigned long long hash) {
106 | 	if (!table.count)
107 | 		return NULL;
108 | 
109 | 	hash += (hash <= TOMBSTONE) ? 2 : 0;
110 | 	unsigned mask = (unsigned)table.capacity - 1;
111 | 	for (unsigned i = (unsigned)hash & mask; table.hashes[i]; i = (i + 1) & mask)
112 | 		if (table.hashes[i] == hash)
113 | 			return &table.values[i];
114 | 
115 | 	return NULL;
116 | }
117 | 
118 | int first_index(struct table table) {
119 | 	for (int i = 0; i < table.capacity; ++i)
120 | 		if (table.hashes[i] > TOMBSTONE)
121 | 			return i;
122 | 	return -1;
123 | }
124 | 
125 | int next_index(struct table table, int index) {
126 | 	for (int i = index + 1; i < table.capacity; ++i)
127 | 		if (table.hashes[i] > TOMBSTONE)
128 | 			return i;
129 | 	return -1;
130 | }
131 | 
132 | void destroy(struct table *table) {
133 | 	free(table->hashes); // This also frees the values.
134 | 	table->capacity = 0;
135 | 	table->count = 0;
136 | 	table->hashes = NULL;
137 | 	table->values = NULL;
138 | }
139 | 
140 | #include <assert.h>
141 | unsigned long long hash(const char *string) {
142 | 	// FNV-1a https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash
143 | 	unsigned long long hash = 14695981039346656037u;
144 | 	for (int i = 0; string[i]; ++i)
145 | 		hash = (hash ^ string[i]) * 1099511628211u;
146 | 	return hash;
147 | }
148 | int main(void) {
149 | 	{
150 | 		struct table table = { 0 };
151 | 		assert(!get(table, 123));
152 | 		assert(first_index(table) == -1);
153 | 		destroy(&table);
154 | 	}
155 | 
156 | 	{
157 | 		const char *strings[4] = {
158 | 			"Hello, sailor!",
159 | 			"Three jumping wizards box quickly",
160 | 			"Third",
161 | 			"Eyyo",
162 | 		};
163 | 
164 | 		struct table table = { 0 };
165 | 		for (int i = 0; i < 4; ++i)
166 | 			add(&table, hash(strings[i]), (unsigned)i);
167 | 		
168 | 		assert(table.count == 4);
169 | 		for (int i = 0; i < 4; ++i)
170 | 			assert(*get(table, hash(strings[i])) == (unsigned)i);
171 | 
172 | 		int remaining[4] = { 0, 1, 2, 3 };
173 | 		int num_remaining = 4;
174 | 		for (int i = first_index(table); i >= 0; i = next_index(table, i)) {
175 | 			int value = (int)table.values[i];
176 | 			for (int i = 0; i < num_remaining; ++i) {
177 | 				if (remaining[i] == value) {
178 | 					remaining[i] = remaining[--num_remaining];
179 | 					break;
180 | 				}
181 | 			}
182 | 		}
183 | 		assert(num_remaining == 0);
184 | 
185 | 		destroy(&table);
186 | 		assert(!table.capacity && !table.count && !table.hashes && !table.values);
187 | 	}
188 | 
189 | 	{
190 | 		static unsigned long long hashes[1048576];
191 | 		int n = sizeof hashes / sizeof hashes[0];
192 | 		unsigned long long seed = 42;
193 | 		for (int i = 0; i < n; ++i) {
194 | 			seed ^= seed >> 12;
195 | 			seed ^= seed << 25;
196 | 			seed ^= seed >> 27;
197 | 			hashes[i] = seed * 0x2545F4914F6CDD1Du;
198 | 		}
199 | 
200 | 		struct table table = { 0 };
201 | 		for (int i = 0; i < n; ++i)
202 | 			add(&table, hashes[i], (unsigned)i);
203 | 
204 | 		assert(table.count == n);
205 | 		for (int i = 0; i < n; ++i)
206 | 			assert(*get(table, hashes[i]) == (unsigned)i);
207 | 
208 | 		static int remaining[sizeof hashes / sizeof hashes[0]];
209 | 		for (int i = 0; i < n; ++i)
210 | 			remaining[i] = 1;
211 | 		for (int i = first_index(table); i >= 0; i = next_index(table, i)) {
212 | 			int value = (int)table.values[i];
213 | 			remaining[value] -= 1;
214 | 		}
215 | 		int num_remaining = 0;
216 | 		for (int i = 0; i < n; ++i)
217 | 			num_remaining += remaining[i];
218 | 		assert(num_remaining == 0);
219 | 
220 | 		for (int i = 0; i < n / 2; ++i)
221 | 			remove(&table, hashes[i]);
222 | 		assert(table.count == n / 2);
223 | 		for (int i = n / 2; i < n; ++i)
224 | 			assert(*get(table, hashes[i]) == (unsigned)i);
225 | 
226 | 		for (int i = 0; i < n; ++i)
227 | 			remaining[i] = 1;
228 | 		for (int i = first_index(table); i >= 0; i = next_index(table, i)) {
229 | 			int value = (int)table.values[i];
230 | 			remaining[value] -= 1;
231 | 		}
232 | 		int num_remaining1 = 0;
233 | 		int num_remaining2 = 0;
234 | 		for (int i = 0; i < n / 2; ++i)
235 | 			num_remaining1 += remaining[i];
236 | 		for (int i = n / 2; i < n; ++i)
237 | 			num_remaining2 += remaining[i];
238 | 		assert(num_remaining1 == n / 2);
239 | 		assert(num_remaining2 == 0);
240 | 
241 | 		for (int i = 0; i < n / 2; ++i)
242 | 			add(&table, hashes[i], (unsigned)i);
243 | 		for (int i = 0; i < n; ++i)
244 | 			remaining[i] = 1;
245 | 		for (int i = first_index(table); i >= 0; i = next_index(table, i)) {
246 | 			int value = (int)table.values[i];
247 | 			remaining[value] -= 1;
248 | 		}
249 | 		num_remaining = 0;
250 | 		for (int i = 0; i < n; ++i)
251 | 			num_remaining += remaining[i];
252 | 		assert(num_remaining == 0);
253 | 
254 | 		destroy(&table);
255 | 	}
256 | 
257 | 	{
258 | 		// Potential pathological case: create a bunch of items and then delete them 
259 | 		// to leave tombstones, then lookup each item. If we don't clean tombstones this is O(n^2).
260 | 		struct table table = { 0 };
261 | 		for (unsigned i = 2; i <= 1048577; ++i)
262 | 			add(&table, i, i);
263 | 		for (unsigned i = 2; i <= 1048577; ++i)
264 | 			remove(&table, i, i);
265 | 		assert(table.count == 0);
266 | 		for (unsigned i = 2; i <= 1048577; ++i)
267 | 			assert(!get(table, i));
268 | 	}
269 | 
270 | 	{
271 | 		// This shouldn't leak.
272 | 		for (int i = 0; i < 10000; ++i) {
273 | 			struct table table = { 0 };
274 | 			for (int j = 0; j < 10000; ++j)
275 | 				add(&table, (unsigned)j, (unsigned)j);
276 | 			destroy(&table);
277 | 		}
278 | 	}
279 | }


--------------------------------------------------------------------------------
/mpmc_queue.c:
--------------------------------------------------------------------------------
  1 | // Concurrent multi-producer-multi-consumer wait-free-ish ring buffer queue (what a mouthful!).
  2 | // 
  3 | // - Wait-free unless the queue is full on write or empty on read.
  4 | // - If full on write or empty on read, caller yields to the OS scheduler. Increases latency but conserves power.
  5 | // - Only 1 atomic increment and 2 serialization points per call in the fast case.
  6 | // - Only 2 bytes overhead per queue slot.
  7 | // - Polling versions of calls are possible.
  8 | // - Queue is initialized to all 0.
  9 | // - No memory allocations or thread local storage.
 10 | // - Slightly modified version of https://github.com/rigtorp/MPMCQueue, which is battle tested.
 11 | 
 12 | #include <Windows.h>
 13 | #pragma comment(lib, "Synchronization.lib")
 14 | 
 15 | #define CAPACITY 16384 // Must be a power of 2.
 16 | 
 17 | struct Queue
 18 | {
 19 | 	__declspec(align(64)) UINT32 WriteTicket;
 20 | 	__declspec(align(64)) UINT32 ReadTicket;
 21 | 	__declspec(align(64)) struct
 22 | 	{
 23 | 		UINT8 WriteTurn;
 24 | 		UINT8 ReadTurn;
 25 | 		int Item; // You can put anything you want here.
 26 | 	} Slots[CAPACITY];
 27 | };
 28 | 
 29 | // Blocking API
 30 | 
 31 | void Enqueue(volatile struct Queue *queue, int item)
 32 | {
 33 | 	UINT32 ticket = InterlockedIncrementNoFence((volatile LONG *)&queue->WriteTicket) - 1; // Serialization with all writers
 34 | 	UINT32 slot = ticket % CAPACITY;
 35 | 	UINT8 turn = (UINT8)(ticket / CAPACITY); // Write turns start at 0.
 36 | 
 37 | 	UINT8 currentTurn;
 38 | 	while ((currentTurn = queue->Slots[slot].WriteTurn) != turn) // Acquire, Serialization with 1 reader.
 39 | 		WaitOnAddress(&queue->Slots[slot].WriteTurn, &currentTurn, sizeof currentTurn, INFINITE); // Block while queue is full.
 40 | 
 41 | 	queue->Slots[slot].Item = item;
 42 | 	queue->Slots[slot].ReadTurn = turn + 1; // Release, serialization with 1 reader.
 43 | 	WakeByAddressAll((void *)&queue->Slots[slot].ReadTurn); // Hash table crawl.
 44 | }
 45 | int Dequeue(volatile struct Queue *queue)
 46 | {
 47 | 	UINT32 ticket = InterlockedIncrementNoFence((volatile LONG *)&queue->ReadTicket) - 1; // Acquire, serialization with all readers.
 48 | 	UINT32 slot = ticket % CAPACITY;
 49 | 	UINT8 turn = (UINT8)(ticket / CAPACITY + 1); // Read turns start at 1.
 50 | 
 51 | 	UINT8 currentTurn;
 52 | 	while ((currentTurn = queue->Slots[slot].ReadTurn) != turn) // Acquire, serialization with 1 writer.
 53 | 		WaitOnAddress(&queue->Slots[slot].ReadTurn, &currentTurn, sizeof currentTurn, INFINITE); // Block while queue is empty.
 54 | 
 55 | 	int item = queue->Slots[slot].Item;
 56 | 	queue->Slots[slot].WriteTurn = turn; // Release, serialization with 1 writer.
 57 | 	WakeByAddressAll((void *)&queue->Slots[slot].WriteTurn); // Hash table crawl.
 58 | 	return item;
 59 | }
 60 | 
 61 | // Polling API
 62 | 
 63 | BOOL TryEnqueue(volatile struct Queue *queue, int item)
 64 | {
 65 | 	UINT32 tryTicket = queue->WriteTicket; // Atomic load relaxed. Serialization with all writers.
 66 | 	for (;;)
 67 | 	{
 68 | 		UINT32 slot = tryTicket % CAPACITY;
 69 | 		UINT8 turn = (UINT8)(tryTicket / CAPACITY); // Write turns start at 0.
 70 | 		UINT8 currentTurn = queue->Slots[slot].WriteTurn; // Acquire, serialization with 1 reader.
 71 | 		
 72 | 		int turnsRemaining = (int)(turn - currentTurn);
 73 | 		if (turnsRemaining > 0)
 74 | 			return FALSE; // Queue is full.
 75 | 		if (turnsRemaining == 0)
 76 | 		{
 77 | 			UINT32 ticket = InterlockedCompareExchangeNoFence((volatile LONG *)&queue->WriteTicket, tryTicket + 1, tryTicket); // Serialization with all readers.
 78 | 			if (ticket == tryTicket)
 79 | 			{
 80 | 				queue->Slots[slot].Item = item;
 81 | 				queue->Slots[slot].ReadTurn = turn + 1; // Release, serialization with 1 reader.
 82 | 				WakeByAddressAll((void *)&queue->Slots[slot].ReadTurn); // Hash table crawl. Remove this if you only use Polling and not Blocking.
 83 | 				return TRUE;
 84 | 			}
 85 | 			tryTicket = ticket;
 86 | 		}
 87 | 		else tryTicket = queue->WriteTicket; // Another writer beat us to it, try again.
 88 | 	}
 89 | }
 90 | BOOL TryDequeue(volatile struct Queue *queue, int *outItem)
 91 | {
 92 | 	UINT32 tryTicket = queue->ReadTicket; // Atomic load relaxed. Serialization with all readers.
 93 | 	for (;;)
 94 | 	{
 95 | 		UINT32 slot = tryTicket % CAPACITY;
 96 | 		UINT8 turn = (UINT8)(tryTicket / CAPACITY + 1); // Read turns start at 1.
 97 | 		UINT8 currentTurn = queue->Slots[slot].ReadTurn; // Acquire, serialization with 1 writer.
 98 | 
 99 | 		int turnsRemaining = (int)(turn - currentTurn);
100 | 		if (turnsRemaining > 0)
101 | 			return FALSE; // Queue is empty.
102 | 		if (turnsRemaining == 0)
103 | 		{
104 | 			UINT32 ticket = InterlockedCompareExchangeNoFence((volatile LONG *)&queue->ReadTicket, tryTicket + 1, tryTicket); // Serialization with all readers.
105 | 			if (ticket == tryTicket)
106 | 			{
107 | 				(*outItem) = queue->Slots[slot].Item;
108 | 				queue->Slots[slot].WriteTurn = turn; // Release, serialization with 1 writer.
109 | 				WakeByAddressAll((void *)&queue->Slots[slot].WriteTurn); // Hash table crawl. Remove this if you only use Polling and not Blocking.
110 | 				return TRUE;
111 | 			}
112 | 			tryTicket = ticket;
113 | 		}
114 | 		else tryTicket = queue->ReadTicket; // Another reader beat us to it, try again.
115 | 	}
116 | }
117 | 
118 | // Test
119 | 
120 | #include <assert.h>
121 | 
122 | DWORD __stdcall ReaderThread(void *parameter)
123 | {
124 | 	struct Queue *queue = parameter;
125 | 	static volatile LONG counters[3][1000000];
126 | 	int lastWriterData[3] = { -1, -1, -1 };
127 | 	for (int i = 0; i < 1000000; ++i)
128 | 	{
129 | 		int item;
130 | 		if (i < 500000)
131 | 			item = Dequeue(queue);
132 | 		else
133 | 			while (!TryDequeue(queue, &item));
134 | 		int writer = item / 1000000;
135 | 		int data = item % 1000000;
136 | 		assert(writer < 3); // Ensure no data corruption.
137 | 		InterlockedIncrement(&counters[writer][data]);
138 | 		assert(lastWriterData[writer] < data); // Ensure data is correctly sequenced FIFO.
139 | 		lastWriterData[writer] = data;
140 | 	}
141 | 
142 | 	// Wait for all readers to finish.
143 | 	static volatile LONG doneCounter;
144 | 	InterlockedIncrement(&doneCounter);
145 | 	WakeByAddressAll((void *)&doneCounter);
146 | 	LONG numDone;
147 | 	while ((numDone = doneCounter) != 3)
148 | 		WaitOnAddress(&doneCounter, &numDone, sizeof numDone, INFINITE);
149 | 
150 | 	for (int writer = 0; writer < 3; ++writer)
151 | 		for (int i = 0; i < 1000000; ++i)
152 | 			assert(counters[writer][i] == 1); // Ensure all items have been properly received.
153 | 
154 | 	return EXIT_SUCCESS;
155 | }
156 | DWORD __stdcall WriterThread(void *parameter)
157 | {
158 | 	struct Queue *queue = parameter;
159 | 	static volatile LONG idDispenser;
160 | 	LONG id = InterlockedIncrement(&idDispenser) - 1;
161 | 	for (int i = 0; i < 500000; ++i)
162 | 		Enqueue(queue, id * 1000000 + i);
163 | 	for (int i = 500000; i < 1000000; ++i)
164 | 		while (!TryEnqueue(queue, id * 1000000 + i));
165 | 	return EXIT_SUCCESS;
166 | }
167 | int main(void)
168 | {
169 | 	static struct Queue queue;
170 | 	HANDLE threads[6];
171 | 	threads[0] = CreateThread(NULL, 0, ReaderThread, &queue, 0, NULL);
172 | 	threads[1] = CreateThread(NULL, 0, ReaderThread, &queue, 0, NULL);
173 | 	threads[2] = CreateThread(NULL, 0, ReaderThread, &queue, 0, NULL);
174 | 	threads[3] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL);
175 | 	threads[4] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL);
176 | 	threads[5] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL);
177 | 	WaitForMultipleObjects(6, threads, TRUE, INFINITE);
178 | 	__debugbreak();
179 | }
180 | 


--------------------------------------------------------------------------------
/mpmc_queue.cpp:
--------------------------------------------------------------------------------
  1 | // Concurrent multi-producer-multi-consumer wait-free-ish ring buffer queue (what a mouthful!).
  2 | // 
  3 | // - Wait-free unless the queue is full on write or empty on read.
  4 | // - If full on write or empty on read, caller yields to the OS scheduler. Increases latency but conserves power.
  5 | // - Only 1 atomic increment and 2 serialization points per call in the fast case.
  6 | // - Only 2 bytes overhead per queue slot.
  7 | // - Polling versions of calls are possible.
  8 | // - Queue is initialized to all 0.
  9 | // - No memory allocations or thread local storage.
 10 | // - Slightly modified version of https://github.com/rigtorp/MPMCQueue, which is battle tested.
 11 | 
 12 | #include <stdint.h>
 13 | #include <atomic>
 14 | using namespace std;
 15 | using enum std::memory_order;
 16 | 
 17 | #define CAPACITY 16384 // Must be a power of 2.
 18 | 
 19 | struct Queue {
 20 | 	alignas(64) atomic<uint32_t> write_ticket = 0;
 21 | 	alignas(64) atomic<uint32_t> read_ticket = 0;
 22 | 	struct {
 23 | 		alignas(64)
 24 | 		atomic<uint8_t> write_turn = 0;
 25 | 		atomic<uint8_t> read_turn = 0;
 26 | 		int item = 0;
 27 | 	} slots[CAPACITY];
 28 | };
 29 | 
 30 | // Blocking API
 31 | 
 32 | void enqueue(Queue *queue, int item) {
 33 | 	uint32_t ticket = queue->write_ticket.fetch_add(1, relaxed); // Serialization with all writers.
 34 | 	uint32_t slot = ticket % CAPACITY;
 35 | 	uint8_t turn = (uint8_t)(ticket / CAPACITY); // Write turns start at 0.
 36 | 
 37 | 	uint8_t current_turn;
 38 | 	while ((current_turn = queue->slots[slot].write_turn.load(acquire)) != turn) // Serialization with 1 reader.
 39 | 		queue->slots[slot].write_turn.wait(current_turn, acquire); // Block while queue is full.
 40 | 
 41 | 	queue->slots[slot].item = item;
 42 | 	queue->slots[slot].read_turn.store(turn + 1, release); // Serialization with 1 reader.
 43 | 	queue->slots[slot].read_turn.notify_all(); // Hash table crawl.
 44 | }
 45 | int dequeue(Queue *queue) {
 46 | 	uint32_t ticket = queue->read_ticket.fetch_add(1, relaxed); // Serialization with all readers.
 47 | 	uint32_t slot = ticket % CAPACITY;
 48 | 	uint8_t turn = (uint8_t)(ticket / CAPACITY + 1); // Read turns start at 1.
 49 | 
 50 | 	uint8_t current_turn;
 51 | 	while ((current_turn = queue->slots[slot].read_turn.load(acquire)) != turn) // Serialization with 1 writer.
 52 | 		queue->slots[slot].read_turn.wait(current_turn, acquire); // Block while queue is empty.
 53 | 
 54 | 	int item = queue->slots[slot].item;
 55 | 	queue->slots[slot].write_turn.store(turn, release); // Serialization with 1 writer.
 56 | 	queue->slots[slot].write_turn.notify_all(); // Hash table crawl.
 57 | 	return item;
 58 | }
 59 | 
 60 | // Polling API
 61 | 
 62 | bool try_enqueue(Queue *queue, int item) {
 63 | 	uint32_t try_ticket = queue->write_ticket.load(relaxed); // Serialization with all writers.
 64 | 	for (;;) {
 65 | 		uint32_t slot = try_ticket % CAPACITY;
 66 | 		uint8_t turn = (uint8_t)(try_ticket / CAPACITY); // Write turns start at 0.
 67 | 		uint8_t current_turn = queue->slots[slot].write_turn.load(acquire); // Serialization with 1 reader.
 68 | 
 69 | 		int turns_remaining = (int)(turn - current_turn);
 70 | 		if (turns_remaining > 0)
 71 | 			return false; // Queue is full.
 72 | 		else if (turns_remaining < 0)
 73 | 			try_ticket = queue->write_ticket.load(relaxed); // Another writer lapped us, try again.
 74 | 		else if (queue->write_ticket.compare_exchange_weak(try_ticket, try_ticket + 1, relaxed)) {
 75 | 			queue->slots[slot].item = item;
 76 | 			queue->slots[slot].read_turn.store(turn + 1, release); // Serialization with 1 reader.
 77 | 			queue->slots[slot].read_turn.notify_all(); // Hash table crawl.
 78 | 			return true;
 79 | 		}
 80 | 	}
 81 | }
 82 | bool try_dequeue(Queue *queue, int *out_item) {
 83 | 	uint32_t try_ticket = queue->read_ticket.load(relaxed); // Serialization with all readers.
 84 | 	for (;;) {
 85 | 		uint32_t slot = try_ticket % CAPACITY;
 86 | 		uint8_t turn = (uint8_t)(try_ticket / CAPACITY + 1); // Read turns start at 1.
 87 | 		uint8_t current_turn = queue->slots[slot].read_turn.load(acquire); // Serialization with 1 writer.
 88 | 
 89 | 		int turns_remaining = (int)(turn - current_turn);
 90 | 		if (turns_remaining > 0)
 91 | 			return false; // Queue is empty.
 92 | 		else if (turns_remaining < 0)
 93 | 			try_ticket = queue->read_ticket.load(relaxed); // Another reader lapped us, try again.
 94 | 		else if (queue->read_ticket.compare_exchange_weak(try_ticket, try_ticket + 1, relaxed)) {
 95 | 			(*out_item) = queue->slots[slot].item;
 96 | 			queue->slots[slot].write_turn.store(turn, release); // Serialization with 1 writer.
 97 | 			queue->slots[slot].write_turn.notify_all(); // Hash table crawl.
 98 | 			return true;
 99 | 		}
100 | 	}
101 | }
102 | 
103 | // Test
104 | 
105 | #include <thread>
106 | #include <assert.h>
107 | 
108 | void reader_thread(Queue *queue) {
109 | 	static atomic<int> counters[3][1000000];
110 | 	int last_writer_data[3] = { -1, -1, -1 };
111 | 	for (int i = 0; i < 1000000; ++i) {
112 | 		int item;
113 | 		if (i < 500000)
114 | 			item = dequeue(queue);
115 | 		else
116 | 			while (!try_dequeue(queue, &item));
117 | 		int writer_id = item / 1000000;
118 | 		int data = item % 1000000;
119 | 		assert(writer_id < 3); // Ensure no data corruption.
120 | 		counters[writer_id][data].fetch_add(1);
121 | 		assert(last_writer_data[writer_id] < data); // Ensure data is correctly sequenced FIFO.
122 | 		last_writer_data[writer_id] = data;
123 | 	}
124 | 
125 | 	// Wait for all readers to finish.
126 | 	static atomic<int> done_counter;
127 | 	done_counter.fetch_add(1);
128 | 	done_counter.notify_all();
129 | 	int num_done;
130 | 	while ((num_done = done_counter.load()) != 3)
131 | 		done_counter.wait(num_done);
132 | 
133 | 	for (int writer_id = 0; writer_id < 3; ++writer_id)
134 | 		for (int i = 0; i < 1000000; ++i)
135 | 			assert(counters[writer_id][i] == 1); // Ensure all items have been properly received.
136 | }
137 | void writer_thread(Queue *queue) {
138 | 	static atomic<int> id_dispenser;
139 | 	int id = id_dispenser.fetch_add(1);
140 | 	for (int i = 0; i < 500000; ++i)
141 | 		enqueue(queue, id * 1000000 + i);
142 | 	for (int i = 500000; i < 1000000; ++i)
143 | 		while (!try_enqueue(queue, id * 1000000 + i));
144 | }
145 | int main() {
146 | 	static Queue queue;
147 | 	thread reader0(reader_thread, &queue);
148 | 	thread reader1(reader_thread, &queue);
149 | 	thread reader2(reader_thread, &queue);
150 | 	thread writer0(writer_thread, &queue);
151 | 	thread writer1(writer_thread, &queue);
152 | 	thread writer2(writer_thread, &queue);
153 | 	reader0.join();
154 | 	reader1.join();
155 | 	reader2.join();
156 | 	writer0.join();
157 | 	writer1.join();
158 | 	writer2.join();
159 | }


--------------------------------------------------------------------------------
/mpsc_queue.c:
--------------------------------------------------------------------------------
  1 | #include <Windows.h>
  2 | #pragma comment(lib, "Synchronization.lib")
  3 | 
  4 | #define CAPACITY 16384 // Must be a power of 2.
  5 | 
  6 | struct Queue
  7 | {
  8 | 	__declspec(align(64)) UINT32 WriteTicket;
  9 | 	__declspec(align(64)) UINT32 ReadTicket;
 10 | 	__declspec(align(64)) struct { UINT8 Turn, Full; int Item; } Slots[CAPACITY];
 11 | };
 12 | 
 13 | // Blocking API
 14 | 
 15 | void Enqueue(volatile struct Queue *queue, int item)
 16 | {
 17 | 	UINT32 ticket = InterlockedIncrementNoFence((volatile long *)&queue->WriteTicket) - 1; // Serialization with writers. 
 18 | 	UINT32 slot = ticket % CAPACITY;
 19 | 	UINT8 turn = (UINT8)(ticket / CAPACITY);
 20 | 
 21 | 	UINT8 currentTurn;
 22 | 	while ((currentTurn = queue->Slots[slot].Turn) != turn) // Acquire, serialization with reader.
 23 | 		WaitOnAddress(&queue->Slots[slot].Turn, &currentTurn, sizeof currentTurn, INFINITE); // Block while queue is full.
 24 | 	
 25 | 	queue->Slots[slot].Item = item;
 26 | 	queue->Slots[slot].Full = TRUE; // Release, serialization with reader.
 27 | 	WakeByAddressSingle((void *)&queue->Slots[slot].Full); // Hash table lookup.
 28 | }
 29 | int Dequeue(volatile struct Queue *queue)
 30 | {
 31 | 	UINT32 ticket = queue->ReadTicket++;
 32 | 	UINT32 slot = ticket % CAPACITY;
 33 | 	UINT8 turn = (UINT8)(ticket / CAPACITY);
 34 | 
 35 | 	UINT8 notFull = FALSE;
 36 | 	while (!queue->Slots[slot].Full) // Acquire, serialization with 1 writer.
 37 | 		WaitOnAddress(&queue->Slots[slot].Full, &notFull, sizeof notFull, INFINITE); // Block while queue is empty.
 38 | 	
 39 | 	int item = queue->Slots[slot].Item;
 40 | 	queue->Slots[slot].Full = FALSE;
 41 | 	queue->Slots[slot].Turn = turn + 1; // Release, serialization with 1 writer.
 42 | 	WakeByAddressAll((void *)&queue->Slots[slot].Turn); // Hash table crawl.
 43 | 	return item;
 44 | }
 45 | 
 46 | // Polling API
 47 | 
 48 | BOOL TryEnqueue(volatile struct Queue *queue, int item)
 49 | {
 50 | 	UINT32 tryTicket = queue->WriteTicket; // Atomic load relaxed. Serialization with writers.
 51 | 	for (;;)
 52 | 	{
 53 | 		UINT32 slot = tryTicket % CAPACITY;
 54 | 		UINT8 turn = (UINT8)(tryTicket / CAPACITY);
 55 | 		UINT8 currentTurn = queue->Slots[slot].Turn; // Acquire, serialization with reader.
 56 | 
 57 | 		int turnsRemaining = (int)turn - (int)currentTurn;
 58 | 		if (turnsRemaining > 0)
 59 | 			return FALSE; // Queue is full.
 60 | 		else if (turnsRemaining < 0)
 61 | 			tryTicket = queue->WriteTicket; // Another writer lapped us, try again.
 62 | 		else
 63 | 		{
 64 | 			UINT32 ticket = InterlockedCompareExchangeNoFence((volatile LONG *)&queue->WriteTicket, tryTicket + 1, tryTicket); // Serialization with writers.
 65 | 			if (ticket == tryTicket)
 66 | 			{
 67 | 				queue->Slots[slot].Item = item;
 68 | 				queue->Slots[slot].Full = TRUE; // Release, serialization with reader.
 69 | 				WakeByAddressSingle((void *)&queue->Slots[slot].Full); // Hash table lookup. Remove this if you only use Polling and not Blocking.
 70 | 				return TRUE;
 71 | 			}
 72 | 			tryTicket = ticket;
 73 | 		}
 74 | 	}
 75 | }
 76 | BOOL TryDequeue(volatile struct Queue *queue, int *outItem)
 77 | {
 78 | 	UINT32 ticket = queue->ReadTicket;
 79 | 	UINT32 slot = ticket % CAPACITY;
 80 | 	if (!queue->Slots[slot].Full) // Acquire, serialization with 1 writer.
 81 | 		return FALSE; // Queue is empty.
 82 | 
 83 | 	UINT8 turn = (UINT8)(ticket / CAPACITY);
 84 | 	(*outItem) = queue->Slots[slot].Item;
 85 | 	queue->Slots[slot].Full = FALSE;
 86 | 	queue->Slots[slot].Turn = turn + 1; // Release, serialization with 1 writer.
 87 | 	WakeByAddressAll((void *)&queue->Slots[slot].Turn); // Hash table crawl.
 88 | 	++(queue->ReadTicket);
 89 | 	return TRUE;
 90 | }
 91 | 
 92 | // Test
 93 | 
 94 | #include <assert.h>
 95 | 
 96 | DWORD __stdcall ReaderThread(void *parameter)
 97 | {
 98 | 	struct Queue *queue = parameter;
 99 | 	static LONG counters[5][1000000];
100 | 	int lastWriterData[5] = { -1, -1, -1, -1, -1 };
101 | 	for (int i = 0; i < 5000000; ++i)
102 | 	{
103 | 		int item;
104 | 		if (i < 2500000)
105 | 			item = Dequeue(queue);
106 | 		else
107 | 			while (!TryDequeue(queue, &item));
108 | 		int writer = item / 1000000;
109 | 		int data = item % 1000000;
110 | 		assert(writer < 5); // Ensure no data corruption corruption.
111 | 		++(counters[writer][data]);
112 | 		assert(lastWriterData[writer] < data); // Ensure data is correctly sequenced FIFO.
113 | 		lastWriterData[writer] = data;
114 | 	}
115 | 	for (int writerId = 0; writerId < 5; ++writerId)
116 | 		for (int i = 0; i < 1000000; ++i)
117 | 			assert(counters[writerId][i] == 1); // Ensure all items have been properly received.
118 | 
119 | 	return EXIT_SUCCESS;
120 | }
121 | DWORD __stdcall WriterThread(void *parameter)
122 | {
123 | 	struct Queue *queue = parameter;
124 | 	static volatile LONG idDispenser;
125 | 	LONG id = InterlockedIncrement(&idDispenser) - 1;
126 | 	for (int i = 0; i < 500000; ++i)
127 | 		Enqueue(queue, id * 1000000 + i);
128 | 	for (int i = 500000; i < 1000000; ++i)
129 | 		while (!TryEnqueue(queue, id * 1000000 + i));
130 | 	return EXIT_SUCCESS;
131 | }
132 | int main(void)
133 | {
134 | 	static struct Queue queue;
135 | 	HANDLE threads[6];
136 | 	threads[0] = CreateThread(NULL, 0, ReaderThread, &queue, 0, NULL);
137 | 	threads[1] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL);
138 | 	threads[2] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL);
139 | 	threads[3] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL);
140 | 	threads[4] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL);
141 | 	threads[5] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL);
142 | 	WaitForMultipleObjects(6, threads, TRUE, INFINITE);
143 | 	__debugbreak();
144 | }
145 | 


--------------------------------------------------------------------------------
/mpsc_queue.cpp:
--------------------------------------------------------------------------------
  1 | #include <atomic>
  2 | #include <stdint.h>
  3 | using namespace std;
  4 | using enum std::memory_order;
  5 | 
  6 | #define CAPACITY 16384 // Must be a power of 2.
  7 | 
  8 | struct Queue {
  9 | 	alignas(64) atomic<uint32_t> write_ticket = 0;
 10 | 	alignas(64) uint32_t read_ticket = 0;
 11 | 	struct { 
 12 | 		alignas(64)
 13 | 		atomic<uint8_t> turn = 0;
 14 | 		atomic<bool> full = false; 
 15 | 		int item = 0; 
 16 | 	} slots[CAPACITY];
 17 | };
 18 | 
 19 | // Blocking API
 20 | 
 21 | void enqueue(Queue *queue, int item) {
 22 | 	uint32_t ticket = queue->write_ticket.fetch_add(1, relaxed); // Serialization with writers. 
 23 | 	uint32_t slot = ticket % CAPACITY;
 24 | 	uint8_t turn = (uint8_t)(ticket / CAPACITY);
 25 | 
 26 | 	uint8_t current_turn;
 27 | 	while ((current_turn = queue->slots[slot].turn.load(acquire)) != turn) // Serialization with reader.
 28 | 		queue->slots[slot].turn.wait(current_turn, acquire); // Block while queue is full.
 29 | 	
 30 | 	queue->slots[slot].item = item;
 31 | 	queue->slots[slot].full.store(true, release); // Serialization with reader.
 32 | 	queue->slots[slot].full.notify_one();
 33 | }
 34 | int dequeue(Queue *queue) {
 35 | 	uint32_t ticket = queue->read_ticket++;
 36 | 	uint32_t slot = ticket % CAPACITY;
 37 | 	uint8_t turn = (uint8_t)(ticket / CAPACITY);
 38 | 	queue->slots[slot].full.wait(false, acquire); // Block while queue is empty.
 39 | 	int item = queue->slots[slot].item;
 40 | 	queue->slots[slot].full.store(false, relaxed);
 41 | 	queue->slots[slot].turn.store(turn + 1, release); // Serialization with 1 writer.
 42 | 	queue->slots[slot].turn.notify_all();
 43 | 	return item;
 44 | }
 45 | 
 46 | // Polling API
 47 | 
 48 | bool try_enqueue(Queue *queue, int item) {
 49 | 	uint32_t try_ticket = queue->write_ticket.load(relaxed); // Serialization with writers.
 50 | 	for (;;) {
 51 | 		uint32_t slot = try_ticket % CAPACITY;
 52 | 		uint8_t turn = (uint8_t)(try_ticket / CAPACITY);
 53 | 		uint8_t current_turn = queue->slots[slot].turn.load(acquire); // Serialization with reader.
 54 | 		int turns_remaining = (int)turn - (int)current_turn;
 55 | 		if (turns_remaining > 0)
 56 | 			return false; // Queue is full.
 57 | 		else if (turns_remaining < 0)
 58 | 			try_ticket = queue->write_ticket; // Another writer lapped us, try again.
 59 | 		else if (queue->write_ticket.compare_exchange_weak(try_ticket, try_ticket + 1, relaxed)) {
 60 | 			queue->slots[slot].item = item;
 61 | 			queue->slots[slot].full.store(true, release); // Serialization with reader.
 62 | 			queue->slots[slot].full.notify_one(); // Hash table lookup. Remove this if you only use Polling and not Blocking.
 63 | 			return true;
 64 | 		}
 65 | 	}
 66 | }
 67 | bool try_dequeue(Queue *queue, int *out_item) {
 68 | 	uint32_t ticket = queue->read_ticket;
 69 | 	uint32_t slot = ticket % CAPACITY;
 70 | 	if (!queue->slots[slot].full.load(acquire)) // Serialization with 1 writer.
 71 | 		return false; // Queue is empty.
 72 | 
 73 | 	uint8_t turn = (uint8_t)(ticket / CAPACITY);
 74 | 	(*out_item) = queue->slots[slot].item;
 75 | 	queue->slots[slot].full.store(false, relaxed);
 76 | 	queue->slots[slot].turn.store(turn + 1, release); // Serialization with 1 writer.
 77 | 	queue->slots[slot].turn.notify_all(); // Hash table crawl. Remove this if you only use Polling and not Blocking.
 78 | 	++(queue->read_ticket);
 79 | 	return true;
 80 | }
 81 | 
 82 | // Test
 83 | 
 84 | #include <thread>
 85 | #include <assert.h>
 86 | 
 87 | void reader_thread(Queue *queue) {
 88 | 	static int counters[5][1000000];
 89 | 	int last_writer_data[5] = { -1, -1, -1, -1, -1 };
 90 | 	for (int i = 0; i < 5000000; ++i) {
 91 | 		int item;
 92 | 		if (i < 2500000)
 93 | 			item = dequeue(queue);
 94 | 		else
 95 | 			while (!try_dequeue(queue, &item));
 96 | 		int writer = item / 1000000;
 97 | 		int data = item % 1000000;
 98 | 		assert(writer < 5); // Ensure no data corruption corruption.
 99 | 		++(counters[writer][data]);
100 | 		assert(last_writer_data[writer] < data); // Ensure data is correctly sequenced FIFO.
101 | 		last_writer_data[writer] = data;
102 | 	}
103 | 	for (int writer = 0; writer < 5; ++writer)
104 | 		for (int i = 0; i < 1000000; ++i)
105 | 			assert(counters[writer][i] == 1); // Ensure all items have been properly received.
106 | }
107 | void writer_thread(Queue *queue) {
108 | 	static atomic<int> id_dispenser;
109 | 	int id = id_dispenser.fetch_add(1);
110 | 	for (int i = 0; i < 500000; ++i)
111 | 		enqueue(queue, id * 1000000 + i);
112 | 	for (int i = 500000; i < 1000000; ++i)
113 | 		while (!try_enqueue(queue, id * 1000000 + i));
114 | }
115 | int main(void) {
116 | 	static struct Queue queue;
117 | 	thread reader(reader_thread, &queue);
118 | 	thread writer0(writer_thread, &queue);
119 | 	thread writer1(writer_thread, &queue);
120 | 	thread writer2(writer_thread, &queue);
121 | 	thread writer3(writer_thread, &queue);
122 | 	thread writer4(writer_thread, &queue);
123 | 	reader.join();
124 | 	writer0.join();
125 | 	writer1.join();
126 | 	writer2.join();
127 | 	writer3.join();
128 | 	writer4.join();
129 | }
130 | 


--------------------------------------------------------------------------------
/normalize_path.c:
--------------------------------------------------------------------------------
 1 | // Normalizes file paths into a canonical form by
 2 | // - removing '.' components
 3 | // - resolving '..' components
 4 | // - replacing backslashes with forward slashes
 5 | // - merge consecutive path separators
 6 | // - removing trailing slashes
 7 | // Modifies the input string in place, the resulting string is always the same length or shorter.
 8 | void normalize(char path[]) {
 9 | 	char* src = path;
10 | 	char* dst = path;
11 | 	char* start = dst;
12 | 	for (;;) {
13 | 		if (*src == '/' || *src == '\\' || !*src) {
14 | 			int slash = *src == '/' || *src == '\\';
15 | 			while (*src == '/' || *src == '\\') src++; // merge consecutive path separators
16 | 			int exit = *src == '\0'; // we might temporarily replace this with '/'.
17 | 			if (start + 1 == dst && start[0] == '.') { // remove '.' component
18 | 				dst = start;
19 | 			}
20 | 			else if (start + 2 == dst && start[0] == '.' && start[1] == '.' && start > path) { // resolve '..' component
21 | 				start--; // skip over last separator
22 | 				if (start > path && start[-1] != ':') while (start > path && start[-1] != '/') start--; // find the separator before that, and continue from there
23 | 				if (start[0] == '.' && start[1] == '.' && start[2] == '/') { // don't remove leading '..'
24 | 					start += 5; 
25 | 					*start++ = '/';
26 | 				}
27 | 				else if ((start == path && *start == '/') || (start > path && start[-1] == ':')) start++; // don't remove absolute path
28 | 				dst = start;
29 | 			}
30 | 			else if (slash) { // replace windows '\' with unix '/' separators
31 | 				*dst++ = '/';
32 | 			}
33 | 			if (exit) break;
34 | 			start = dst;
35 | 		}
36 | 		else *dst++ = *src++;
37 | 	}
38 | 	if (dst > path + 1 && dst[-1] == '/' && dst[-2] != ':') dst--; // remove trailing separator
39 | 	*dst = '\0';
40 | }
41 | 
42 | // === TESTS ===
43 | 
44 | #include <assert.h>
45 | #include <string.h>
46 | #include <stdlib.h>
47 | 
48 | char* normalize_alloc(const char* path) {
49 | 	char* copy = malloc(strlen(path) + 1);
50 | 	memcpy(copy, path, strlen(path) + 1);
51 | 	normalize(copy);
52 | 	return copy;
53 | }
54 | 
55 | int main(void) {
56 | 	// already canonicized
57 | 	assert(!strcmp(normalize_alloc("file"), "file"));
58 | 	assert(!strcmp(normalize_alloc("dir/subdir/file"), "dir/subdir/file"));
59 | 	
60 | 	// basic usage
61 | 	assert(!strcmp(normalize_alloc("dir\\subdir\\file"), "dir/subdir/file"));
62 | 	assert(!strcmp(normalize_alloc("dir/subdir/../file"), "dir/file"));
63 | 	assert(!strcmp(normalize_alloc("dir/subdir/../../file"), "file"));
64 | 	assert(!strcmp(normalize_alloc("dir/subdir/./file"), "dir/subdir/file"));
65 | 	assert(!strcmp(normalize_alloc("dir/subdir///file"), "dir/subdir/file"));
66 | 	assert(!strcmp(normalize_alloc("dir/subdir/file/"), "dir/subdir/file"));
67 | 
68 | 	// unix absolute paths
69 | 	assert(!strcmp(normalize_alloc("/file"), "/file"));
70 | 	assert(!strcmp(normalize_alloc("/dir/subdir/file"), "/dir/subdir/file"));
71 | 	assert(!strcmp(normalize_alloc("/"), "/"));
72 | 	assert(!strcmp(normalize_alloc("/.."), "/"));
73 | 	assert(!strcmp(normalize_alloc("/../.."), "/"));
74 | 
75 | 	// windows absolute paths
76 | 	assert(!strcmp(normalize_alloc("C:/file"), "C:/file"));
77 | 	assert(!strcmp(normalize_alloc("C:/"), "C:/"));
78 | 	assert(!strcmp(normalize_alloc("C:/.."), "C:/"));
79 | 	assert(!strcmp(normalize_alloc("C:/../.."), "C:/"));
80 | 
81 | 	// edge cases
82 | 	assert(!strcmp(normalize_alloc(""), ""));
83 | 	assert(!strcmp(normalize_alloc("."), ""));
84 | 	assert(!strcmp(normalize_alloc(".."), ".."));
85 | 	assert(!strcmp(normalize_alloc("./"), ""));
86 | 	assert(!strcmp(normalize_alloc("../"), ".."));
87 | 	assert(!strcmp(normalize_alloc("/."), "/"));
88 | 	assert(!strcmp(normalize_alloc("/.."), "/"));
89 | 	assert(!strcmp(normalize_alloc(".a"), ".a"));
90 | 	assert(!strcmp(normalize_alloc("a."), "a."));
91 | 	assert(!strcmp(normalize_alloc("..a"), "..a"));
92 | 	assert(!strcmp(normalize_alloc("a.."), "a.."));
93 | 	assert(!strcmp(normalize_alloc("../.."), "../.."));
94 | 	assert(!strcmp(normalize_alloc("../../.."), "../../.."));
95 | 	assert(!strcmp(normalize_alloc("a/b/c/../../../../../"), "../.."));
96 | 	assert(!strcmp(normalize_alloc("C:"), "C:")); // adding a trailing slash would make the string longer
97 | }
98 | 


--------------------------------------------------------------------------------
/platform_detection.c:
--------------------------------------------------------------------------------
 1 | // Source: https://sourceforge.net/p/predef/wiki/Home/
 2 | //         https://abseil.io/docs/cpp/platforms/macros
 3 | 
 4 | #ifdef _MSC_VER
 5 | #	define COMPILER_MSVC
 6 | #elif defined __EMSCRIPTEN__
 7 | #	define COMPILER_EMSCRIPTEN
 8 | #elif defined __INTEL_COMPILER
 9 | #	define COMPILER_INTEL
10 | #elif defined __clang__
11 | #	define COMPILER_CLANG
12 | #elif defined __GNUC__
13 | #	define COMPILER_GCC
14 | #elif defined __TINYC__
15 | #	define COMPILER_TINYC
16 | #else
17 | #	error Unknown compiler.
18 | #endif
19 | 
20 | #if defined _WIN32
21 | #	define PLATFORM_WINDOWS
22 | #elif defined __EMSCRIPTEN__
23 | #	define PLATFORM_WEB
24 | #elif defined __ANDROID__
25 | #	define PLATFORM_ANDROID
26 | #elif defined __APPLE__
27 | #	include <TargetConditionals.h>
28 | #	if TARGET_OS_IPHONE
29 | #		define PLATFORM_IPHONE
30 | #	else
31 | #		define PLATFORM_MAC
32 | #	endif
33 | #elif defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ || defined __bsdi__ || defined __DragonFly__
34 | #	define PLATFORM_BSD
35 | #elif defined __linux__
36 | #	define PLATFORM_LINUX
37 | #else
38 | #	error Unknown platform.
39 | #endif
40 | 
41 | #if defined _M_X64 || defined __x86_64__
42 | #	define ARCH_X64
43 | #elif defined _M_IX86 || defined __i386__
44 | #	define ARCH_X86
45 | #elif defined _M_ARM64 || defined __aarch64__
46 | #	define ARCH_ARM64
47 | #elif defined __arm__ || defined _M_ARM
48 | #	define ARCH_ARM32
49 | #elif defined __EMSCRIPTEN__
50 | #	define ARCH_WASM32
51 | #else
52 | #	error Unknown CPU architecture.
53 | #endif


--------------------------------------------------------------------------------
/precise_sleep.c:
--------------------------------------------------------------------------------
 1 | // The PERFECT sleeping function for Windows.
 2 | // - Sleep times accurate to 1 microsecond
 3 | // - Low CPU usage
 4 | // - Runs on Windows Vista and up
 5 | 
 6 | #include <Windows.h>
 7 | #include <stdio.h>
 8 | #pragma comment(lib, "Winmm.lib") // timeGetDevCaps, timeBeginPeriod
 9 | 
10 | HANDLE Timer;
11 | int SchedulerPeriodMs;
12 | INT64 QpcPerSecond;
13 | 
14 | void PreciseSleep(double seconds)
15 | {
16 | 	LARGE_INTEGER qpc;
17 | 	QueryPerformanceCounter(&qpc);
18 | 	INT64 targetQpc = (INT64)(qpc.QuadPart + seconds * QpcPerSecond);
19 | 
20 | 	if (Timer) // Try using a high resolution timer first.
21 | 	{
22 | 		const double TOLERANCE = 0.001'02;
23 | 		INT64 maxTicks = (INT64)SchedulerPeriodMs * 9'500;
24 | 		for (;;) // Break sleep up into parts that are lower than scheduler period.
25 | 		{
26 | 			double remainingSeconds = (targetQpc - qpc.QuadPart) / (double)QpcPerSecond;
27 | 			INT64 sleepTicks = (INT64)((remainingSeconds - TOLERANCE) * 10'000'000);
28 | 			if (sleepTicks <= 0)
29 | 				break;
30 | 
31 | 			LARGE_INTEGER due;
32 | 			due.QuadPart = -(sleepTicks > maxTicks ? maxTicks : sleepTicks);
33 | 			SetWaitableTimerEx(Timer, &due, 0, NULL, NULL, NULL, 0);
34 | 			WaitForSingleObject(Timer, INFINITE);
35 | 			QueryPerformanceCounter(&qpc);
36 | 		}
37 | 	}
38 | 	else // Fallback to Sleep.
39 | 	{
40 | 		const double TOLERANCE = 0.000'02;
41 | 		double sleepMs = (seconds - TOLERANCE) * 1000 - SchedulerPeriodMs; // Sleep for 1 scheduler period less than requested.
42 | 		int sleepSlices = (int)(sleepMs / SchedulerPeriodMs);
43 | 		if (sleepSlices > 0)
44 | 			Sleep((DWORD)sleepSlices * SchedulerPeriodMs);
45 | 		QueryPerformanceCounter(&qpc);
46 | 	}
47 | 
48 | 	while (qpc.QuadPart < targetQpc) // Spin for any remaining time.
49 | 	{
50 | 		YieldProcessor();
51 | 		QueryPerformanceCounter(&qpc);
52 | 	}
53 | }
54 | 
55 | int main(void)
56 | {
57 | 	// Initialization
58 | 	Timer = CreateWaitableTimerExW(NULL, NULL, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS);
59 | 	TIMECAPS caps;
60 | 	timeGetDevCaps(&caps, sizeof caps);
61 | 	timeBeginPeriod(caps.wPeriodMin);
62 | 	SchedulerPeriodMs = (int)caps.wPeriodMin;
63 | 	LARGE_INTEGER qpf;
64 | 	QueryPerformanceFrequency(&qpf);
65 | 	QpcPerSecond = qpf.QuadPart;
66 | 
67 | 	// Game loop
68 | 	for (int i = 0; i < 100; ++i)
69 | 	{
70 | 		LARGE_INTEGER qpc0, qpc1;
71 | 		QueryPerformanceCounter(&qpc0);
72 | 		PreciseSleep(1 / 60.0);
73 | 		QueryPerformanceCounter(&qpc1);
74 | 		double dt = (qpc1.QuadPart - qpc0.QuadPart) / (double)QpcPerSecond;
75 | 		printf("Slept for %.2f ms\n", 1000 * dt);
76 | 	}
77 | }
78 | 


--------------------------------------------------------------------------------
/priority_queue.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h> // realloc, free
  2 | 
  3 | struct queue { // max heap
  4 | 	struct item *items;
  5 | 	int capacity;
  6 | 	int count;
  7 | };
  8 | 
  9 | struct item {
 10 | 	int priority;
 11 | 	int value;
 12 | };
 13 | 
 14 | #define LEFT_CHILD(index) (2*(index)+1)
 15 | #define RIGHT_CHILD(index) (2*(index)+2)
 16 | #define PARENT(index) ((index-1)/2)
 17 | 
 18 | void upheap(struct item *items, int index) {
 19 | 	for (; index > 0 && items[index].priority > items[PARENT(index)].priority; index = PARENT(index)) {
 20 | 		struct item temp = items[index];
 21 | 		items[index] = items[PARENT(index)];
 22 | 		items[PARENT(index)] = temp;
 23 | 	}
 24 | }
 25 | 
 26 | void downheap(struct item *items, int index, int count) {
 27 | 	while (LEFT_CHILD(index) < count) {
 28 | 		int l = LEFT_CHILD(index);
 29 | 		int r = RIGHT_CHILD(index);
 30 | 		int max_child = l;
 31 | 		if (r < count && items[r].priority >= items[l].priority)
 32 | 			max_child = r;
 33 | 
 34 | 		if (items[index].priority >= items[max_child].priority)
 35 | 			break;
 36 | 
 37 | 		struct item temp = items[max_child];
 38 | 		items[max_child] = items[index];
 39 | 		items[index] = temp;
 40 | 		index = max_child;
 41 | 	}
 42 | }
 43 | 
 44 | void reserve(struct queue *queue, int min_capacity) {
 45 | 	if (queue->capacity < min_capacity) {
 46 | 		int new_capacity = 2 * queue->capacity;
 47 | 		if (new_capacity < 128)
 48 | 			new_capacity = 128;
 49 | 		while (new_capacity < min_capacity)
 50 | 			new_capacity *= 2;
 51 | 		
 52 | 		queue->items = realloc(queue->items, (int)new_capacity * sizeof queue->items[0]);
 53 | 		queue->capacity = new_capacity;
 54 | 	}
 55 | }
 56 | 
 57 | void push(struct queue *queue, int item, int priority) {
 58 | 	reserve(queue, queue->count + 1);
 59 | 	int index = queue->count++;
 60 | 	queue->items[index].priority = priority;
 61 | 	queue->items[index].value = item;
 62 | 	upheap(queue->items, index);
 63 | }
 64 | 
 65 | int pop(struct queue *queue) {
 66 | 	if (!queue->count)
 67 | 		return 0; // Tried to pop from an empty queue.
 68 | 	
 69 | 	int result = queue->items[0].value;
 70 | 	queue->items[0] = queue->items[--queue->count];
 71 | 	downheap(queue->items, 0, queue->count);
 72 | 	return result;
 73 | }
 74 | 
 75 | int push_pop(struct queue *queue, int item, int priority) {
 76 | 	if (!queue->count || priority >= queue->items[0].priority)
 77 | 		return item;
 78 | 
 79 | 	int result = queue->items[0].value;
 80 | 	queue->items[0].priority = priority;
 81 | 	queue->items[0].value = item;
 82 | 	downheap(queue->items, 0, queue->count);
 83 | 	return result;
 84 | }
 85 | 
 86 | int pop_push(struct queue *queue, int item, int priority) {
 87 | 	if (!queue->count) {
 88 | 		push(queue, item, priority);
 89 | 		return 0; // Tried to pop from an empty queue.
 90 | 	}
 91 | 
 92 | 	int result = queue->items[0].value;
 93 | 	queue->items[0].value = item;
 94 | 	queue->items[0].priority = priority;
 95 | 	downheap(queue->items, 0, queue->count);
 96 | 	return result;
 97 | }
 98 | 
 99 | void change_priority(struct queue *queue, int index, int new_priority) {
100 | 	if (index < queue->count) {
101 | 		int old_priority = queue->items[index].priority;
102 | 		queue->items[index].priority = new_priority;
103 | 		if (new_priority > old_priority)
104 | 			upheap(queue->items, index);
105 | 		else if (new_priority < old_priority)
106 | 			downheap(queue->items, index, queue->count);
107 | 	}
108 | }
109 | 
110 | void destroy(struct queue *queue) {
111 | 	free(queue->items);
112 | 	queue->items = NULL;
113 | 	queue->capacity = 0;
114 | 	queue->count = 0;
115 | }
116 | 
117 | #include <assert.h>
118 | int main(void) {
119 | 	{
120 | 		struct queue queue = { 0 };
121 | 		for (int i = 0; i < 10; ++i)
122 | 			push(&queue, i, i);
123 | 		assert(queue.count == 10);
124 | 		
125 | 		for (int i = 9; i >= 0; --i)
126 | 			assert(pop(&queue) == i);
127 | 		assert(queue.count == 0);
128 | 
129 | 		destroy(&queue);
130 | 	}
131 | 
132 | 	{
133 | 		static int priorities[10000];
134 | 		for (int i = 0; i < 10000; ++i)
135 | 			priorities[i] = rand();
136 | 
137 | 		struct queue queue = { 0 };
138 | 		for (int i = 0; i < 10000; ++i)
139 | 			push(&queue, i, priorities[i]);
140 | 		assert(queue.count == 10000);
141 | 
142 | 		int prev = -1;
143 | 		while (queue.count > 0) {
144 | 			int index = pop(&queue);
145 | 			assert(prev == -1 || priorities[prev] >= priorities[index]);
146 | 		}
147 | 
148 | 		destroy(&queue);
149 | 	}
150 | 
151 | 	{
152 | 		struct queue queue = { 0 };
153 | 		push(&queue, 0, 0);
154 | 		push(&queue, 1, 1);
155 | 		push(&queue, 2, 2);
156 | 		change_priority(&queue, 0, -99);
157 | 		change_priority(&queue, 1, 99);
158 | 		assert(pop(&queue) == 0);
159 | 		assert(pop(&queue) == 1);
160 | 		assert(pop(&queue) == 2);
161 | 		destroy(&queue);
162 | 	}
163 | }


--------------------------------------------------------------------------------
/slab_allocator.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h> // malloc, free, size_t
  2 | #include <string.h> // memcpy
  3 | 
  4 | #define SLAB_SIZE (64*1024)
  5 | 
  6 | struct allocator {
  7 | 	struct slab *slab;
  8 | 	int cursor;
  9 | };
 10 | 
 11 | struct slab {
 12 | 	struct slab *prev;
 13 | 	struct slab *next;
 14 | 	void *memory;
 15 | 	int capacity;
 16 | 	int cursor;
 17 | };
 18 | 
 19 | void *allocate(struct allocator *allocator, int size, int alignment) {
 20 | 	size_t mask = (size_t)alignment - 1;
 21 | 	for (;;) {
 22 | 		size_t unaligned = (size_t)allocator->slab->memory + allocator->slab->cursor;
 23 | 		size_t aligned = (unaligned + mask) & ~mask;
 24 | 		int needed = size + (int)(aligned - unaligned);
 25 | 		int remaining = allocator->slab->capacity - allocator->slab->cursor;
 26 | 		if (needed <= remaining) {
 27 | 			allocator->slab->cursor += needed;
 28 | 			allocator->cursor += needed;
 29 | 			return (void *)aligned;
 30 | 		}
 31 | 
 32 | 		struct slab *next = allocator->slab->next;
 33 | 		if (!next) {
 34 | 			int worst_case = size + alignment - 1;
 35 | 			int consecutive_slabs = (worst_case + SLAB_SIZE - 1) / SLAB_SIZE;
 36 | 			int capacity = consecutive_slabs * SLAB_SIZE;
 37 | 			next = malloc(sizeof next[0] + capacity);
 38 | 			next->prev = allocator->slab;
 39 | 			next->next = NULL;
 40 | 			next->memory = next + 1;
 41 | 			next->capacity = capacity;
 42 | 			next->cursor = 0;
 43 | 			allocator->slab->next = next;
 44 | 		}
 45 | 
 46 | 		allocator->cursor += remaining;
 47 | 		allocator->slab->cursor += remaining;
 48 | 		allocator->slab = next;
 49 | 	}
 50 | }
 51 | 
 52 | void deallocate(struct allocator *allocator, void *block, int size) {
 53 | 	char *end = (char *)block + size;
 54 | 	char *top = (char *)allocator->slab->memory + allocator->slab->cursor;
 55 | 	if (end == top) {
 56 | 		allocator->slab->cursor -= size;
 57 | 		allocator->cursor -= size;
 58 | 	}
 59 | }
 60 | 
 61 | void *reallocate(struct allocator *allocator, void *block, int old_size, int new_size, int alignment) {
 62 | 	size_t mask = (size_t)alignment - 1;
 63 | 	if (!((size_t)block & mask)) {
 64 | 		char *end = (char *)block + old_size;
 65 | 		char *top = (char *)allocator->slab->memory + allocator->slab->cursor;
 66 | 		int delta = new_size - old_size;
 67 | 		if (end == top && allocator->slab->cursor + delta <= allocator->slab->capacity) {
 68 | 			allocator->slab->cursor += delta;
 69 | 			allocator->cursor += delta;
 70 | 			return block;
 71 | 		}
 72 | 		if (new_size < old_size)
 73 | 			return block;
 74 | 	}
 75 | 
 76 | 	void *copy = allocate(allocator, new_size, alignment);
 77 | 	int to_copy = new_size;
 78 | 	if (to_copy > old_size)
 79 | 		to_copy = old_size;
 80 | 	memcpy(copy, block, (size_t)to_copy);
 81 | 	return copy;
 82 | }
 83 | 
 84 | void reset(struct allocator *allocator, int cursor) {
 85 | 	for (;;) {
 86 | 		int remaining = allocator->cursor - cursor;
 87 | 		if (remaining <= allocator->slab->cursor) {
 88 | 			allocator->slab->cursor -= remaining;
 89 | 			allocator->cursor = cursor;
 90 | 			return;
 91 | 		}
 92 | 
 93 | 		allocator->cursor -= allocator->slab->cursor;
 94 | 		allocator->slab->cursor = 0;
 95 | 		if (allocator->slab->prev)
 96 | 			allocator->slab = allocator->slab->prev;
 97 | 	}
 98 | }
 99 | 
100 | void trim(struct allocator *allocator) {
101 | 	struct slab *slab = allocator->slab->next;
102 | 	allocator->slab->next = NULL;
103 | 	while (slab) {
104 | 		struct slab *next = slab->next;
105 | 		free(slab);
106 | 		slab = next;
107 | 	}
108 | }
109 | 
110 | void destroy(struct allocator *allocator) {
111 | 	struct slab *slab = allocator->slab;
112 | 	while (slab->prev)
113 | 		slab = slab->prev;
114 | 	while (slab) {
115 | 		struct slab *next = slab->next;
116 | 		if (slab->capacity)
117 | 			free(slab);
118 | 		slab = next;
119 | 	}
120 | }
121 | 
122 | #include <assert.h>
123 | int main(void) {
124 | 	{
125 | 		struct allocator allocator = { .slab = &(struct slab) { 0 } };
126 | 		// None of these should crash.
127 | 		for (int i = 0; i < 2; ++i) {
128 | 			allocate(&allocator, 0, 1);
129 | 			assert(allocator.cursor == 0);
130 | 			reallocate(&allocator, NULL, 0, 0, 1);
131 | 			assert(allocator.cursor == 0);
132 | 			deallocate(&allocator, NULL, 0);
133 | 			assert(allocator.cursor == 0);
134 | 			trim(&allocator);
135 | 			destroy(&allocator);
136 | 		}
137 | 	}
138 | 
139 | 	{
140 | 		struct allocator allocator = { .slab = &(struct slab) { 0 } };
141 | 
142 | 		assert(allocator.cursor == 0);
143 | 		int ne = 999;
144 | 		int nb = ne * sizeof(int);
145 | 		int *a = allocate(&allocator, nb, _Alignof(int));
146 | 		int marka = allocator.cursor;
147 | 		assert(marka >= nb && marka < nb + _Alignof(int) && marka == allocator.cursor);
148 | 		for (int i = 0; i < ne; ++i)
149 | 			a[i] = i;
150 | 
151 | 		int *b = allocate(&allocator, nb, _Alignof(int));
152 | 		int markb = allocator.cursor;
153 | 		assert(markb >= 2 * nb && markb < 2 * (nb + _Alignof(int)));
154 | 		for (int i = 0; i < ne; ++i) {
155 | 			assert(a[i] == i);
156 | 			b[i] = 2 * i;
157 | 		}
158 | 
159 | 		int *c = allocate(&allocator, nb, _Alignof(int));
160 | 		int markc = allocator.cursor;
161 | 		assert(markc >= 3 * nb && markc < 3 * (nb + _Alignof(int)));
162 | 		for (int i = 0; i < ne; ++i) {
163 | 			assert(a[i] == i);
164 | 			assert(b[i] == 2 * i);
165 | 			c[i] = 3 * i;
166 | 		}
167 | 
168 | 		int mark = allocator.cursor;
169 | 		int *d = reallocate(&allocator, a, nb, 2 * nb, _Alignof(int));
170 | 		int markd = allocator.cursor;
171 | 		assert(markd >= 5 * nb && markd < 5 * nb + 4 * _Alignof(int));
172 | 		for (int i = 0; i < ne; ++i) {
173 | 			assert(a[i] == i);
174 | 			assert(b[i] == 2 * i);
175 | 			assert(c[i] == 3 * i);
176 | 			assert(d[i] == i);
177 | 		}
178 | 		for (int i = ne; i < 2 * ne; ++i)
179 | 			d[i] = i;
180 | 
181 | 		int *e = reallocate(&allocator, d, 2 * nb, 3 * nb, _Alignof(int));
182 | 		int marke = allocator.cursor;
183 | 		assert(marke >= 6 * nb && marke < 6 * nb + 4 * _Alignof(int));
184 | 		assert(e == d);
185 | 		for (int i = 0; i < 2 * ne; ++i)
186 | 			assert(e[i] == i);
187 | 		for (int i = 2 * ne; i < 3 * ne; ++i)
188 | 			e[i] = i;
189 | 
190 | 		int mark1 = allocator.cursor;
191 | 		deallocate(&allocator, c, nb);
192 | 		assert(allocator.cursor == mark1);
193 | 		for (int i = 0; i < ne; ++i) {
194 | 			assert(a[i] == i);
195 | 			assert(b[i] == 2 * i);
196 | 		}
197 | 		for (int i = 0; i < 3 * ne; ++i)
198 | 			assert(e[i] == i);
199 | 
200 | 		reset(&allocator, mark);
201 | 		assert(allocator.cursor == mark);
202 | 		for (int i = 0; i < ne; ++i)
203 | 			assert(b[i] == 2 * i);
204 | 
205 | 		reset(&allocator, 0);
206 | 		assert(allocator.cursor == 0);
207 | 
208 | 		char *f = allocate(&allocator, SLAB_SIZE + 1024, 1);
209 | 		int fmark = allocator.cursor;
210 | 		assert(fmark >= 2 * SLAB_SIZE + 1024 && fmark <= 2 * SLAB_SIZE + 1025);
211 | 		memset(f, 'f', SLAB_SIZE + 1024);
212 | 
213 | 		char *g = allocate(&allocator, 2 * SLAB_SIZE + 1024, 1);
214 | 		int gmark = allocator.cursor;
215 | 		assert(gmark >= 5 * SLAB_SIZE + 1024 && gmark < 5 * SLAB_SIZE + 1024 + 64);
216 | 		memset(g, 'g', 2 * SLAB_SIZE + 1024);
217 | 
218 | 		char *h = reallocate(&allocator, f, SLAB_SIZE + 1024, 3 * SLAB_SIZE + 1024, 1);
219 | 		int hmark = allocator.cursor;
220 | 		for (int i = 0; i < SLAB_SIZE + 1024; ++i)
221 | 			assert(f[i] == 'f');
222 | 		for (int i = 0; i < 2 * SLAB_SIZE + 1024; ++i)
223 | 			assert(g[i] == 'g');
224 | 		for (int i = 0; i < SLAB_SIZE + 1024; ++i)
225 | 			assert(h[i] == 'f');
226 | 		memset(h, 'h', 3 * SLAB_SIZE + 1024);
227 | 
228 | 		deallocate(&allocator, h, 3 * SLAB_SIZE + 1024);
229 | 		assert(allocator.cursor >= gmark && allocator.cursor < hmark);
230 | 
231 | 		char *k = allocate(&allocator, SLAB_SIZE, 1);
232 | 		assert(k == h);
233 | 		memset(k, 'k', SLAB_SIZE);
234 | 
235 | 		reset(&allocator, gmark);
236 | 		assert(allocator.cursor == gmark);
237 | 
238 | 		allocate(&allocator, 2 * SLAB_SIZE, 2);
239 | 		reset(&allocator, 0);
240 | 		assert(allocator.cursor == 0);
241 | 
242 | 		for (int i = 0; i < 1000; ++i) {
243 | 			for (int align = 2048; align >= 1; align /= 2) {
244 | 				void *ptr = allocate(&allocator, 1, align);
245 | 				assert(!((size_t)ptr & (size_t)(align - 1)));
246 | 			}
247 | 		}
248 | 
249 | 		reset(&allocator, 0);
250 | 		assert(allocator.cursor == 0);
251 | 		trim(&allocator);
252 | 		destroy(&allocator);
253 | 	}
254 | }


--------------------------------------------------------------------------------
/stack_allocator.c:
--------------------------------------------------------------------------------
  1 | #include <stdint.h> // uintptr_t
  2 | #include <string.h> // memcpy - only needed for realloc
  3 | 
  4 | struct allocator {
  5 | 	void *buffer;
  6 | 	int capacity;
  7 | 	int cursor;
  8 | };
  9 | 
 10 | void *allocate(struct allocator *allocator, int size, int alignment) {
 11 | 	uintptr_t mask = (uintptr_t)alignment - 1; // Alignment must be a power of 2.
 12 | 	uintptr_t unaligned = (uintptr_t)allocator->buffer + allocator->cursor;
 13 | 	uintptr_t aligned = (unaligned + mask) & ~mask;
 14 | 	int new_cursor = allocator->cursor + size + (int)(aligned - unaligned);
 15 | 	if (new_cursor > allocator->capacity)
 16 | 		return 0;
 17 | 
 18 | 	allocator->cursor = new_cursor;
 19 | 	return (void *)aligned;
 20 | }
 21 | 
 22 | void deallocate(struct allocator *allocator, void *block, int size) {
 23 | 	if ((char *)block + size == (char *)allocator->buffer + allocator->cursor)
 24 | 		allocator->cursor -= size;
 25 | }
 26 | 
 27 | void *reallocate(struct allocator *allocator, void *block, int old_size, int new_size, int alignment) {
 28 | 	uintptr_t mask = (uintptr_t)alignment - 1;
 29 | 	if ((char *)block + old_size == (char *)allocator->buffer + allocator->cursor && ((uintptr_t)block & mask) == 0) {
 30 | 		int new_cursor = allocator->cursor + new_size - old_size;
 31 | 		if (new_cursor > allocator->capacity)
 32 | 			return 0;
 33 | 		allocator->cursor = new_cursor;
 34 | 		return block;
 35 | 	}
 36 | 
 37 | 	void *result = allocate(allocator, new_size, alignment);
 38 | 	if (result) {
 39 | 		int to_copy = new_size < old_size ? new_size : old_size;
 40 | 		memcpy(result, block, (size_t)to_copy);
 41 | 	}
 42 | 	return result;
 43 | }
 44 | 
 45 | #include <assert.h>
 46 | int main(void) {
 47 | 	struct allocator allocator = { 0 };
 48 | 	assert(!allocate(&allocator, 1, 1));
 49 | 	assert(!allocate(&allocator, 1, 1));
 50 | 	deallocate(&allocator, 0, 0);
 51 | 	assert(!reallocate(&allocator, 0, 0, 1, 1));
 52 | 
 53 | 	_Alignas(16) char buffer[16];
 54 | 	allocator = (struct allocator){ .buffer = buffer, .capacity = sizeof buffer };
 55 | 	char *c = allocate(&allocator, sizeof(char), _Alignof(char));
 56 | 	short *s = allocate(&allocator, sizeof(short), _Alignof(short));
 57 | 	int *i = allocate(&allocator, sizeof(int), _Alignof(int));
 58 | 	long long *l = allocate(&allocator, sizeof(long long), _Alignof(long long));
 59 | 	long long *null = allocate(&allocator, sizeof(long long), _Alignof(long long));
 60 | 	assert(c && (uintptr_t)c % _Alignof(char) == 0);
 61 | 	assert(s && (uintptr_t)s % _Alignof(short) == 0);
 62 | 	assert(i && (uintptr_t)i % _Alignof(int) == 0);
 63 | 	assert(l && (uintptr_t)l % _Alignof(long long) == 0);
 64 | 	assert(!null);
 65 | 
 66 | 	deallocate(&allocator, l, sizeof(long long));
 67 | 	l = allocate(&allocator, sizeof(long long), _Alignof(long long));
 68 | 	assert(l);
 69 | 
 70 | 	deallocate(&allocator, l, sizeof(long long));
 71 | 	deallocate(&allocator, i, sizeof(int));
 72 | 	int *ints = allocate(&allocator, 3 * sizeof(int), _Alignof(int));
 73 | 	assert(ints);
 74 | 	ints[0] = ints[1] = ints[2] = 42;
 75 | 
 76 | 	long long big_buffer[1024];
 77 | 	allocator = (struct allocator){ .buffer = big_buffer, .capacity = sizeof big_buffer };
 78 | 	assert(!allocate(&allocator, 1024 * sizeof(long long) + 1, 1));
 79 | 	l = allocate(&allocator, 1024 * sizeof(long long), _Alignof(long long));
 80 | 	assert(l);
 81 | 	deallocate(&allocator, l, 1024 * sizeof(long long));
 82 | 	l = allocate(&allocator, 1024 * sizeof(long long), _Alignof(long long));
 83 | 	assert(l);
 84 | 	
 85 | 	i = reallocate(&allocator, l, 1024 * sizeof(long long), 0, 1);
 86 | 	assert(allocator.cursor == 0);
 87 | 	i = reallocate(&allocator, i, 0, sizeof(int), _Alignof(int));
 88 | 	*i = 42;
 89 | 	assert(allocator.cursor == sizeof(int));
 90 | 	i = reallocate(&allocator, i, sizeof(int), 10 * sizeof(int), _Alignof(int));
 91 | 	assert(allocator.cursor == 10 * sizeof(int));
 92 | 	i = reallocate(&allocator, i, 10 * sizeof(int), 2048 * sizeof(int), _Alignof(int));
 93 | 	assert(allocator.cursor == 2048 * sizeof(int));
 94 | 	i = reallocate(&allocator, i, 2048 * sizeof(int), 11 * sizeof(int), _Alignof(int));
 95 | 	assert(allocator.cursor == 11 * sizeof(int));
 96 | 	for (int j = 0; j < 11; ++j)
 97 | 		i[j] = j;
 98 | 	l = reallocate(&allocator, NULL, 0, 1, _Alignof(long long));
 99 | 	int *i1 = reallocate(&allocator, i, 11 * sizeof(int), 12 * sizeof(int), _Alignof(int));
100 | 	assert(i1 != i);
101 | 	for (int j = 0; j < 11; ++j)
102 | 		assert(i1[j] == j);
103 | 	allocate(&allocator, 2, _Alignof(char));
104 | 	int *i2 = reallocate(&allocator, i1, 12 * sizeof(int), 3 * sizeof(int), _Alignof(int));
105 | 	assert(i2 != i1);
106 | 	for (int j = 0; j < 3; ++j)
107 | 		assert(i2[j] == j);
108 | 	int *i3 = reallocate(&allocator, i2, 3 * sizeof(int), 3 * sizeof(int), 64);
109 | 	assert(i3 != i2);
110 | 	for (int j = 0; j < 3; ++j)
111 | 		assert(i2[j] == j);
112 | }


--------------------------------------------------------------------------------
/string_buffer.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h> // snprintf
  2 | #include <string.h> // memcpy
  3 | #include <stdarg.h> // va_list, va_start, va_end
  4 | 
  5 | struct buffer {
  6 | 	char *buffer; // Always kept null terminated.
  7 | 	int cursor; 
  8 | 	int capacity;
  9 | 	int bytes_needed; // Includes null terminator.
 10 | };
 11 | 
 12 | struct buffer create(char *buffer, int capacity) {
 13 | 	if (capacity > 0)
 14 | 		buffer[0] = 0;
 15 | 	return (struct buffer) {
 16 | 		.buffer = buffer,
 17 | 		.capacity = capacity,
 18 | 		.bytes_needed = 1,
 19 | 	};
 20 | }
 21 | 
 22 | void append_char(struct buffer *buffer, char c) {
 23 | 	++buffer->bytes_needed;
 24 | 	if (buffer->cursor + 1 < buffer->capacity) {
 25 | 		buffer->buffer[buffer->cursor++] = c;
 26 | 		buffer->buffer[buffer->cursor] = 0;
 27 | 	}
 28 | }
 29 | 
 30 | void append_char_repeated(struct buffer *buffer, char c, int count) {
 31 | 	int remaining_chars = buffer->capacity - buffer->cursor - 1;
 32 | 	if (remaining_chars < 0)
 33 | 		remaining_chars = 0;
 34 | 	int bytes_to_set = count;
 35 | 	if (bytes_to_set > remaining_chars)
 36 | 		bytes_to_set = remaining_chars;
 37 | 	memset(buffer->buffer + buffer->cursor, c, (size_t)bytes_to_set);
 38 | 	buffer->bytes_needed += count;
 39 | 	buffer->cursor += bytes_to_set;
 40 | 	if (bytes_to_set > 0)
 41 | 		buffer->buffer[buffer->cursor] = 0;
 42 | }
 43 | 
 44 | void append_bytes(struct buffer *buffer, const void *bytes, int size) {
 45 | 	int remaining_chars = buffer->capacity - buffer->cursor - 1;
 46 | 	if (remaining_chars < 0)
 47 | 		remaining_chars = 0;
 48 | 	int bytes_to_copy = size;
 49 | 	if (bytes_to_copy > remaining_chars)
 50 | 		bytes_to_copy = remaining_chars;
 51 | 	memcpy(buffer->buffer + buffer->cursor, bytes, (size_t)bytes_to_copy);
 52 | 	buffer->bytes_needed += size;
 53 | 	buffer->cursor += bytes_to_copy;
 54 | 	if (bytes_to_copy > 0)
 55 | 		buffer->buffer[buffer->cursor] = 0;
 56 | }
 57 | 
 58 | void append_string(struct buffer *buffer, const char *string) {
 59 | 	int remaining_chars = buffer->capacity - buffer->cursor - 1;
 60 | 	if (remaining_chars < 0)
 61 | 		remaining_chars = 0;
 62 | 	int length;
 63 | 	for (length = 0; length < remaining_chars && string[length]; ++length)
 64 | 		buffer->buffer[buffer->cursor++] = string[length];
 65 | 	buffer->bytes_needed += length + (int)strlen(string + length);
 66 | 	if (remaining_chars > 0)
 67 | 		buffer->buffer[buffer->cursor] = 0;
 68 | }
 69 | 
 70 | void append_format_va(struct buffer *buffer, const char *format, va_list args) {
 71 | 	int remaining_bytes = buffer->capacity - buffer->cursor;
 72 | 	int chars_needed = vsnprintf(buffer->buffer + buffer->cursor, (size_t)remaining_bytes, format, args);
 73 | 	int chars_written = chars_needed;
 74 | 	if (chars_written > remaining_bytes - 1)
 75 | 		chars_written = remaining_bytes - 1;
 76 | 	if (chars_written > 0)
 77 | 		buffer->cursor += chars_written;
 78 | 	buffer->bytes_needed += chars_needed;
 79 | }
 80 | 
 81 | void append_format(struct buffer *buffer, const char *format, ...) {
 82 | 	va_list args;
 83 | 	va_start(args, format);
 84 | 	append_format_va(buffer, format, args);
 85 | 	va_end(args);
 86 | }
 87 | 
 88 | #include <assert.h>
 89 | #define BUFFER_ON_STACK(capacity) create((char[capacity]){0},(capacity))
 90 | int main(void) {
 91 | 	// create
 92 | 	{
 93 | 		struct buffer sb;
 94 | 
 95 | 		sb = create(NULL, 0);
 96 | 		assert(!sb.buffer && sb.capacity == 0 && sb.cursor == 0 && sb.bytes_needed == 1);
 97 | 
 98 | 		char a[3] = { 1, 2, 3 };
 99 | 		sb = create(a + 1, 1);
100 | 		assert(sb.buffer == a + 1 && sb.capacity == 1 && sb.cursor == 0 && sb.bytes_needed == 1);
101 | 		assert(a[1] == 0);
102 | 
103 | 		sb = BUFFER_ON_STACK(1);
104 | 		assert(sb.buffer && sb.buffer[0] == 0 && sb.capacity == 1 && sb.cursor == 0 && sb.bytes_needed == 1);
105 | 
106 | 		sb = BUFFER_ON_STACK(42);
107 | 		assert(sb.buffer && sb.buffer[0] == 0 && sb.capacity == 42 && sb.cursor == 0 && sb.bytes_needed == 1);
108 | 	}
109 | 
110 | 	// append_char
111 | 	{
112 | 		struct buffer sb;
113 | 
114 | 		sb = BUFFER_ON_STACK(4);
115 | 		append_char(&sb, 'a');
116 | 		assert(!strcmp(sb.buffer, "a"));
117 | 		assert(sb.cursor == 1 && sb.bytes_needed == 2);
118 | 
119 | 		append_char(&sb, 'b');
120 | 		append_char(&sb, 'c');
121 | 		assert(!strcmp(sb.buffer, "abc"));
122 | 		assert(sb.cursor == 3 && sb.bytes_needed == 4);
123 | 
124 | 		append_char(&sb, 'd');
125 | 		append_char(&sb, 'e');
126 | 		append_char(&sb, 'f');
127 | 		assert(!strcmp(sb.buffer, "abc"));
128 | 		assert(sb.cursor == 3 && sb.bytes_needed == 7);
129 | 
130 | 		sb = create(NULL, 0);
131 | 		for (int i = 0; i < 100; ++i)
132 | 			append_char(&sb, (char)i);
133 | 		assert(sb.cursor == 0 && sb.bytes_needed == 101);
134 | 
135 | 		sb = BUFFER_ON_STACK(8);
136 | 		append_char(&sb, 'a');
137 | 		append_char(&sb, 'b');
138 | 		append_char(&sb, 'c');
139 | 		append_char(&sb, '\0');
140 | 		assert(!strcmp(sb.buffer, "abc"));
141 | 		assert(sb.cursor == 4 && sb.bytes_needed == 5);
142 | 
143 | 		append_char(&sb, 'd');
144 | 		append_char(&sb, 'e');
145 | 		assert(!strcmp(sb.buffer, "abc"));
146 | 		assert(!strcmp(sb.buffer + 4, "de"));
147 | 		assert(sb.cursor == 6 && sb.bytes_needed == 7);
148 | 	}
149 | 
150 | 	// append_char_repeated
151 | 	{
152 | 		struct buffer sb;
153 | 
154 | 		sb = BUFFER_ON_STACK(8);
155 | 		append_char_repeated(&sb, 'a', 3);
156 | 		assert(!strcmp(sb.buffer, "aaa"));
157 | 		assert(sb.cursor == 3 && sb.bytes_needed == 4);
158 | 
159 | 		append_char_repeated(&sb, 'b', 1);
160 | 		assert(!strcmp(sb.buffer, "aaab"));
161 | 		assert(sb.cursor == 4 && sb.bytes_needed == 5);
162 | 
163 | 		append_char_repeated(&sb, 'c', 0);
164 | 		assert(!strcmp(sb.buffer, "aaab"));
165 | 		assert(sb.cursor == 4 && sb.bytes_needed == 5);
166 | 
167 | 		append_char_repeated(&sb, 'd', 4);
168 | 		assert(!strcmp(sb.buffer, "aaabddd"));
169 | 		assert(sb.cursor == 7 && sb.bytes_needed == 9);
170 | 
171 | 		append_char_repeated(&sb, 'e', 100);
172 | 		assert(!strcmp(sb.buffer, "aaabddd"));
173 | 		assert(sb.cursor == 7 && sb.bytes_needed == 109);
174 | 
175 | 		sb = create(NULL, 0);
176 | 		for (int i = 0; i < 100; ++i)
177 | 			append_char_repeated(&sb, (char)i, 100);
178 | 		assert(sb.cursor == 0 && sb.bytes_needed == 100 * 100 + 1);
179 | 
180 | 		sb = BUFFER_ON_STACK(8);
181 | 		append_char_repeated(&sb, 'a', 3);
182 | 		append_char_repeated(&sb, '\0', 3);
183 | 		assert(sb.cursor == 6 && sb.bytes_needed == 7);
184 | 		assert(!strcmp(sb.buffer, "aaa") && !memcmp(sb.buffer + 3, "\0\0\0\0", 4));
185 | 		append_char_repeated(&sb, 'b', 3);
186 | 		assert(sb.cursor == 7 && sb.bytes_needed == 10);
187 | 		assert(!strcmp(sb.buffer + 6, "b"));
188 | 	}
189 | 
190 | 	// append_bytes
191 | 	{
192 | 		struct buffer sb;
193 | 
194 | 		sb = BUFFER_ON_STACK(8);
195 | 		append_bytes(&sb, "123", 3);
196 | 		assert(!strcmp(sb.buffer, "123"));
197 | 		assert(sb.cursor == 3 && sb.bytes_needed == 4);
198 | 
199 | 		append_bytes(&sb, "", 0);
200 | 		assert(!strcmp(sb.buffer, "123"));
201 | 		assert(sb.cursor == 3 && sb.bytes_needed == 4);
202 | 
203 | 		append_bytes(&sb, "4567", 4);
204 | 		assert(!strcmp(sb.buffer, "1234567"));
205 | 		assert(sb.cursor == 7 && sb.bytes_needed == 8);
206 | 
207 | 		append_bytes(&sb, "890", 3);
208 | 		assert(!strcmp(sb.buffer, "1234567"));
209 | 		assert(sb.cursor == 7 && sb.bytes_needed == 11);
210 | 
211 | 		sb = create(NULL, 0);
212 | 		for (int i = 0; i < 100; ++i)
213 | 			append_bytes(&sb, "1234", 4);
214 | 		assert(sb.cursor == 0 && sb.bytes_needed == 401);
215 | 
216 | 		sb = BUFFER_ON_STACK(12);
217 | 		append_bytes(&sb, "12345", 5);
218 | 		append_bytes(&sb, "\0\0\0\0\0", 5);
219 | 		assert(!strcmp(sb.buffer, "12345"));
220 | 		assert(!memcmp(sb.buffer + 5, "\0\0\0\0\0\0", 6));
221 | 		assert(sb.cursor == 10 && sb.bytes_needed == 11);
222 | 
223 | 		append_bytes(&sb, "6789", 4);
224 | 		assert(!strcmp(sb.buffer + 10, "6"));
225 | 		assert(sb.cursor == 11 && sb.bytes_needed == 15);
226 | 	}
227 | 
228 | 	// append_string
229 | 	{
230 | 		struct buffer sb;
231 | 
232 | 		sb = BUFFER_ON_STACK(8);
233 | 		append_string(&sb, "123");
234 | 		assert(!strcmp(sb.buffer, "123"));
235 | 		assert(sb.cursor == 3 && sb.bytes_needed == 4);
236 | 
237 | 		append_string(&sb, "");
238 | 		assert(!strcmp(sb.buffer, "123"));
239 | 		assert(sb.cursor == 3 && sb.bytes_needed == 4);
240 | 
241 | 		append_string(&sb, "4567");
242 | 		assert(!strcmp(sb.buffer, "1234567"));
243 | 		assert(sb.cursor == 7 && sb.bytes_needed == 8);
244 | 
245 | 		append_string(&sb, "890");
246 | 		assert(!strcmp(sb.buffer, "1234567"));
247 | 		assert(sb.cursor == 7 && sb.bytes_needed == 11);
248 | 
249 | 		sb = create(NULL, 0);
250 | 		for (int i = 0; i < 100; ++i)
251 | 			append_string(&sb, "1234");
252 | 		assert(sb.cursor == 0 && sb.bytes_needed == 401);
253 | 
254 | 		sb = BUFFER_ON_STACK(7);
255 | 		append_string(&sb, "123456789");
256 | 		assert(!strcmp(sb.buffer, "123456"));
257 | 		assert(sb.cursor == 6 && sb.bytes_needed == 10);
258 | 	}
259 | 
260 | 	// append_format
261 | 	{
262 | 		struct buffer sb;
263 | 
264 | 		sb = BUFFER_ON_STACK(8);
265 | 		append_format(&sb, "123");
266 | 		assert(!strcmp(sb.buffer, "123"));
267 | 		assert(sb.cursor == 3 && sb.bytes_needed == 4);
268 | 
269 | 		append_format(&sb, "");
270 | 		assert(!strcmp(sb.buffer, "123"));
271 | 		assert(sb.cursor == 3 && sb.bytes_needed == 4);
272 | 
273 | 		append_format(&sb, "%d", 4567);
274 | 		assert(!strcmp(sb.buffer, "1234567"));
275 | 		assert(sb.cursor == 7 && sb.bytes_needed == 8);
276 | 
277 | 		append_format(&sb, "890");
278 | 		assert(!strcmp(sb.buffer, "1234567"));
279 | 		assert(sb.cursor == 7 && sb.bytes_needed == 11);
280 | 
281 | 		sb = create(NULL, 0);
282 | 		for (int i = 0; i < 100; ++i)
283 | 			append_format(&sb, "1234");
284 | 		assert(sb.cursor == 0 && sb.bytes_needed == 401);
285 | 
286 | 		sb = BUFFER_ON_STACK(7);
287 | 		append_format(&sb, "%s", "123456789");
288 | 		assert(!strcmp(sb.buffer, "123456"));
289 | 		assert(sb.cursor == 6 && sb.bytes_needed == 10);
290 | 
291 | 		sb = BUFFER_ON_STACK(256);
292 | 		append_format(&sb, "Hello%c ", '!');
293 | 		append_format(&sb, "You are '%s' number %d.", "sailor", 42);
294 | 		assert(!strcmp(sb.buffer, "Hello! You are 'sailor' number 42."));
295 | 	}
296 | }


--------------------------------------------------------------------------------
/string_set.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h> // malloc, free
  2 | #include <string.h> // strlen, strcmp, memcpy, memset
  3 | 
  4 | struct set {
  5 | 	char **items;
  6 | 	struct slab *slab;
  7 | 	int count;
  8 | 	int capacity;
  9 | 	int num_tombstones;
 10 | };
 11 | 
 12 | struct slab {
 13 | 	struct slab *prev;
 14 | 	int cursor;
 15 | 	int capacity;
 16 | 	// Memory comes right after this.
 17 | };
 18 | 
 19 | #define TOMBSTONE 1
 20 | 
 21 | unsigned long long hash_string(const char *string) {
 22 | 	unsigned long long hash = 14695981039346656037u;
 23 | 	for (int i = 0; string[i]; ++i)
 24 | 		hash = (hash ^ string[i]) * 1099511628211u;
 25 | 	return hash;
 26 | }
 27 | 
 28 | char *copy_string(struct slab **slab, const char *string) {
 29 | 	int size = 1 + (int)strlen(string);
 30 | 	if ((*slab)->capacity - (*slab)->cursor < size) {
 31 | 		int new_capacity = 1024;
 32 | 		while (new_capacity < size)
 33 | 			new_capacity *= 2;
 34 | 		struct slab *new_slab = malloc(sizeof new_slab[0] + new_capacity);
 35 | 		new_slab->capacity = new_capacity;
 36 | 		new_slab->cursor = 0;
 37 | 		new_slab->prev = *slab;
 38 | 		*slab = new_slab;
 39 | 	}
 40 | 	char *copy = (char *)(*slab + 1) + (*slab)->cursor;
 41 | 	(*slab)->cursor += size;
 42 | 	memcpy(copy, string, (size_t)size);
 43 | 	return copy;
 44 | }
 45 | 
 46 | void resize(struct set *set, int capacity) {
 47 | 	if (capacity <= set->count)
 48 | 		capacity = set->count + 1;
 49 | 	
 50 | 	int pow2;
 51 | 	for (pow2 = 0; (1 << pow2) < capacity; ++pow2);
 52 | 	capacity = 1 << pow2;
 53 | 
 54 | 	int total_string_size = 0;
 55 | 	for (struct slab *slab = set->slab; slab; slab = slab->prev)
 56 | 		total_string_size += slab->cursor;
 57 | 
 58 | 	int first_slab_capacity = 1024;
 59 | 	while (first_slab_capacity < total_string_size)
 60 | 		first_slab_capacity *= 2;
 61 | 
 62 | 	void *new_memory = malloc(capacity * sizeof set->items[0] + sizeof set->slab[0] + first_slab_capacity);
 63 | 	char **new_items = new_memory;
 64 | 	memset(new_items, 0, capacity * sizeof set->items[0]);
 65 | 	struct slab *new_slab = (struct slab *)(new_items + capacity);
 66 | 	new_slab->prev = NULL;
 67 | 	new_slab->capacity = first_slab_capacity;
 68 | 	new_slab->cursor = 0;
 69 | 
 70 | 	unsigned mask = capacity - 1;
 71 | 	for (int i = 0; i < set->capacity; ++i) {
 72 | 		if ((size_t)set->items[i] > TOMBSTONE) {
 73 | 			char *item = copy_string(&new_slab, set->items[i]);
 74 | 			unsigned long long hash = hash_string(item);
 75 | 			for (unsigned j = (unsigned)hash & mask;; j = (j + 1) & mask) {
 76 | 				if (!new_items[j]) {
 77 | 					new_items[j] = item;
 78 | 					break;
 79 | 				}
 80 | 			}
 81 | 		}
 82 | 	}
 83 | 
 84 | 	for (struct slab *slab = set->slab; slab && slab->prev;) {
 85 | 		struct slab *prev = slab->prev;
 86 | 		free(slab);
 87 | 		slab = prev;
 88 | 	}
 89 | 	free(set->items); // This also frees the metadata, and slab.
 90 | 	set->items = new_items;
 91 | 	set->slab = new_slab;
 92 | 	set->capacity = capacity;
 93 | 	set->num_tombstones = 0;
 94 | }
 95 | 
 96 | void reserve(struct set *set, int min_capacity) {
 97 | 	if (3 * min_capacity > 2 * set->capacity) {
 98 | 		int new_capacity = 3 * min_capacity / 2;
 99 | 		if (new_capacity < 64)
100 | 			new_capacity = 64;
101 | 		resize(set, new_capacity);
102 | 	}
103 | }
104 | 
105 | void add(struct set *set, const char *item) {
106 | 	reserve(set, set->count + 1);
107 | 	unsigned long long hash = hash_string(item);
108 | 	unsigned mask = (unsigned)set->capacity - 1;
109 | 	unsigned index = (unsigned)-1;
110 | 	for (unsigned i = (unsigned)hash & mask;; i = (i + 1) & mask) {
111 | 		if (!set->items[i]) {
112 | 			index = min(index, i);
113 | 			break;
114 | 		}
115 | 		if (set->items[i] == (void *)TOMBSTONE)
116 | 			index = min(index, i);
117 | 		else if (strcmp(set->items[i], item) == 0)
118 | 			return;
119 | 	}
120 | 	if (set->items[index] == (void *)TOMBSTONE)
121 | 		--set->num_tombstones;
122 | 	set->count++;
123 | 	set->items[index] = copy_string(&set->slab, item);
124 | }
125 | 
126 | void remove(struct set *set, const char *item) {
127 | 	if (!set->count)
128 | 		return;
129 | 	
130 | 	unsigned long long hash = hash_string(item);
131 | 	unsigned mask = (unsigned)set->capacity - 1;
132 | 	for (unsigned i = (unsigned)hash & mask; set->items[i]; i = (i + 1) & mask) {
133 | 		if (set->items[i] != (void *)TOMBSTONE && strcmp(set->items[i], item) == 0) {
134 | 			set->items[i] = (void *)TOMBSTONE;
135 | 			set->count--;
136 | 			set->num_tombstones++;
137 | 			if (8 * set->num_tombstones > set->capacity)
138 | 				resize(set, set->capacity); // Get rid of tombstones.
139 | 			return;
140 | 		}
141 | 	}
142 | }
143 | 
144 | int contains(struct set set, const char *item) {
145 | 	if (!set.count)
146 | 		return 0;
147 | 
148 | 	unsigned long long hash = hash_string(item);
149 | 	unsigned mask = (unsigned)set.capacity - 1;
150 | 	for (unsigned i = (unsigned)hash & mask; set.items[i]; i = (i + 1) & mask)
151 | 		if (set.items[i] != (void *)TOMBSTONE && strcmp(set.items[i], item) == 0)
152 | 			return 1;
153 | 	return 0;
154 | }
155 | 
156 | int first_index(struct set set) {
157 | 	for (int i = 0; i < set.capacity; ++i)
158 | 		if ((size_t)set.items[i] > TOMBSTONE)
159 | 			return i;
160 | 	return -1;
161 | }
162 | 
163 | int next_index(struct set set, int index) {
164 | 	for (int i = index + 1; i < set.capacity; ++i)
165 | 		if ((size_t)set.items[i] > TOMBSTONE)
166 | 			return i;
167 | 	return -1;
168 | }
169 | 
170 | void destroy(struct set *set) {
171 | 	for (struct slab *slab = set->slab; slab && slab->prev;) {
172 | 		struct slab *prev = slab->prev;
173 | 		free(slab);
174 | 		slab = prev;
175 | 	}
176 | 	free(set->items); // This also frees the slab.
177 | 	memset(set, 0, sizeof set[0]);
178 | }
179 | 
180 | #include <assert.h>
181 | int main(void) {
182 | 	static char items[1048576][8] = { 0 };
183 | 	int n = sizeof items / sizeof items[0];
184 | 	for (int i = 0; i < n; ++i) {
185 | 		int x = i;
186 | 		for (int j = 0; j < 7; ++j) {
187 | 			items[i][6 - j] = '0' + x % 10;
188 | 			x /= 10;
189 | 		}
190 | 	}
191 | 
192 | 	{
193 | 		struct set set = { 0 };
194 | 		assert(!contains(set, "Hi"));
195 | 		assert(first_index(set) < 0);
196 | 		remove(&set, "Hi");
197 | 		destroy(&set);
198 | 	}
199 | 
200 | 	{
201 | 		struct set set = { 0 };
202 | 
203 | 		add(&set, "abcd");
204 | 		add(&set, "efgh");
205 | 		add(&set, "ijkl");
206 | 		add(&set, "mnop");
207 | 		assert(contains(set, "abcd"));
208 | 		assert(contains(set, "efgh"));
209 | 		assert(contains(set, "ijkl"));
210 | 		assert(contains(set, "mnop"));
211 | 		assert(!contains(set, "qrst"));
212 | 
213 | 		remove(&set, "abcd");
214 | 		assert(!contains(set, "abcd"));
215 | 		assert(contains(set, "efgh"));
216 | 		assert(contains(set, "ijkl"));
217 | 		assert(contains(set, "mnop"));
218 | 
219 | 		remove(&set, "abcd");
220 | 		assert(!contains(set, "abcd"));
221 | 		assert(contains(set, "efgh"));
222 | 		assert(contains(set, "ijkl"));
223 | 		assert(contains(set, "mnop"));
224 | 
225 | 		remove(&set, "efgh");
226 | 		remove(&set, "ijkl");
227 | 		remove(&set, "mnop");
228 | 		assert(!contains(set, "abcd"));
229 | 		assert(!contains(set, "efgh"));
230 | 		assert(!contains(set, "ijkl"));
231 | 		assert(!contains(set, "mnop"));
232 | 
233 | 		destroy(&set);
234 | 	}
235 | 
236 | 	{
237 | 		struct set set = { 0 };
238 | 		for (int i = 0; i < n; ++i)
239 | 			assert(!contains(set, items[i]));
240 | 		for (int i = 0; i < n; ++i)
241 | 			add(&set, items[i]);
242 | 		for (int i = 0; i < n; ++i)
243 | 			assert(contains(set, items[i]));
244 | 		for (int i = 0; i < n; ++i)
245 | 			add(&set, items[i]);
246 | 		for (int i = 0; i < n; ++i)
247 | 			remove(&set, items[i]);
248 | 		for (int i = 0; i < n; ++i)
249 | 			assert(!contains(set, items[i]));
250 | 		for (int i = 0; i < n; ++i)
251 | 			add(&set, items[i]);
252 | 		for (int i = 0; i < n; ++i)
253 | 			assert(contains(set, items[i]));
254 | 		
255 | 		destroy(&set);
256 | 		for (int i = 0; i < n / 2; ++i)
257 | 			add(&set, items[i]);
258 | 		for (int i = n / 2; i < n; ++i)
259 | 			assert(!contains(set, items[i]));
260 | 		for (int i = 0; i < n / 2; ++i)
261 | 			assert(contains(set, items[i]));
262 | 		for (int i = 0; i < n / 4; ++i)
263 | 			remove(&set, items[i]);
264 | 		for (int i = 0; i < n; ++i)
265 | 			assert(contains(set, items[i]) == (i >= n / 4 && i < n / 2));
266 | 
267 | 		for (int i = 0; i < n; ++i)
268 | 			remove(&set, items[i]);
269 | 		assert(set.count == 0);
270 | 		for (int i = 0; i < n; ++i)
271 | 			add(&set, items[i]);
272 | 
273 | 		static int total[1048576] = { 0 };
274 | 		for (int i = first_index(set); i >= 0; i = next_index(set, i)) {
275 | 			char *item = set.items[i];
276 | 			int x = 0;
277 | 			for (int j = 0; j < 7; ++j) {
278 | 				x *= 10;
279 | 				x += item[j] - '0';
280 | 			}
281 | 			total[x]++;
282 | 		}
283 | 		for (int i = 0; i < n; ++i)
284 | 			assert(total[i] == 1);
285 | 
286 | 		destroy(&set);
287 | 	}
288 | 
289 | 	{
290 | 		// Potential pathological case: create a bunch of items and then delete them 
291 | 		// to leave tombstones, then lookup each item. If we don't clean tombstones this is O(n^2).
292 | 		struct set set = { 0 };
293 | 		for (int i = 0; i < n - 1; ++i)
294 | 			add(&set, items[i]);
295 | 		resize(&set, set.count + 1);
296 | 		for (int i = 1; i < n - 1; ++i)
297 | 			remove(&set, items[i]);
298 | 		assert(set.count == 1);
299 | 		for (int i = 1; i < n - 1; ++i)
300 | 			assert(!contains(set, items[i]));
301 | 		destroy(&set);
302 | 	}
303 | 
304 | 	{
305 | 		// This shouldn't leak.
306 | 		for (int i = 0; i < 10000; ++i) {
307 | 			struct set set = { 0 };
308 | 			for (int j = 0; j < 10000; ++j) {
309 | 				char item[5] = { 0 };
310 | 				int x = j;
311 | 				item[3] = x % 10; x /= 10;
312 | 				item[2] = x % 10; x /= 10;
313 | 				item[1] = x % 10; x /= 10;
314 | 				item[0] = x % 10; x /= 10;
315 | 				add(&set, item);
316 | 			}
317 | 			destroy(&set);
318 | 		}
319 | 	}
320 | }


--------------------------------------------------------------------------------
/string_slab.c:
--------------------------------------------------------------------------------
 1 | #include <string.h> // strlen, memcpy
 2 | #include <stdlib.h> // malloc, free
 3 | 
 4 | #define SLAB_SIZE (64*1024)
 5 | 
 6 | struct slab {
 7 | 	struct slab *prev;
 8 | 	char *buffer;
 9 | 	int capacity;
10 | 	int cursor;
11 | };
12 | 
13 | char *allocate(struct slab **slab, int size) {
14 | 	int remaining = (*slab)->capacity - (*slab)->cursor;
15 | 	if (remaining < size) {
16 | 		int capacity = SLAB_SIZE * ((size + SLAB_SIZE + 1) / SLAB_SIZE);
17 | 		struct slab *next = malloc(sizeof next[0] + capacity);
18 | 		next->prev = *slab;
19 | 		next->buffer = (char *)(next + 1);
20 | 		next->capacity = capacity;
21 | 		next->cursor = 0;
22 | 		*slab = next;
23 | 	}
24 | 	char *result = (*slab)->buffer + (*slab)->cursor;
25 | 	(*slab)->cursor += size;
26 | 	return result;
27 | }
28 | 
29 | char *copy_string(struct slab **slab, const char *string) {
30 | 	int size = 1 + (int)strlen(string);
31 | 	char *copy = allocate(slab, size);
32 | 	memcpy(copy, string, (size_t)size);
33 | 	return copy;
34 | }
35 | 
36 | void deallocate_all(struct slab **slab) {
37 | 	for (;;) {
38 | 		struct slab *prev = (*slab)->prev;
39 | 		if ((*slab)->capacity)
40 | 			free(*slab);
41 | 		if (!prev)
42 | 			return;
43 | 		*slab = prev;
44 | 	}
45 | }
46 | 
47 | #include <assert.h>
48 | int main(void) {
49 | 	struct slab *slab = &(struct slab) { 0 };
50 | 	assert(strcmp(copy_string(&slab, "Hello, sailor!"), "Hello, sailor!") == 0);
51 | 	assert(strcmp(copy_string(&slab, ""), "") == 0);
52 | 
53 | 	char *large_string = malloc(2 * SLAB_SIZE + 1);
54 | 	memset(large_string, 'A', 2 * SLAB_SIZE);
55 | 	large_string[2 * SLAB_SIZE] = 0;
56 | 	assert(strcmp(copy_string(&slab, large_string), large_string) == 0);
57 | 	
58 | 	assert(slab->prev);
59 | 	deallocate_all(&slab);
60 | 	assert(!slab->prev);
61 | 
62 | 	// This shouldn't leak.
63 | 	for (int i = 0; i < 10000; ++i) {
64 | 		slab = &(struct slab) { 0 };
65 | 		for (int j = 0; j < 10000; ++j)
66 | 			copy_string(&slab, "ABCDEFGHIJKLMOP");
67 | 		deallocate_all(&slab);
68 | 	}
69 | }


--------------------------------------------------------------------------------
/string_table.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h> // malloc, free
  2 | #include <string.h> // strlen, strcmp, memcpy, memset
  3 | 
  4 | struct table {
  5 | 	char **keys;
  6 | 	char **vals;
  7 | 	struct slab *slab;
  8 | 	int count;
  9 | 	int capacity;
 10 | 	int num_tombstones;
 11 | };
 12 | 
 13 | struct slab {
 14 | 	struct slab *prev;
 15 | 	int cursor;
 16 | 	int capacity;
 17 | 	// Memory comes right after this.
 18 | };
 19 | 
 20 | #define TOMBSTONE 1
 21 | 
 22 | unsigned long long hash_string(const char *string) {
 23 | 	unsigned long long hash = 14695981039346656037u;
 24 | 	for (int i = 0; string[i]; ++i)
 25 | 		hash = (hash ^ string[i]) * 1099511628211u;
 26 | 	return hash;
 27 | }
 28 | 
 29 | char *copy_string(struct slab **slab, const char *string) {
 30 | 	int size = 1 + (int)strlen(string);
 31 | 	if ((*slab)->capacity - (*slab)->cursor < size) {
 32 | 		int new_capacity = 1024;
 33 | 		while (new_capacity < size)
 34 | 			new_capacity *= 2;
 35 | 		struct slab *new_slab = malloc(sizeof new_slab[0] + new_capacity);
 36 | 		new_slab->capacity = new_capacity;
 37 | 		new_slab->cursor = 0;
 38 | 		new_slab->prev = *slab;
 39 | 		*slab = new_slab;
 40 | 	}
 41 | 	char *copy = (char *)(*slab + 1) + (*slab)->cursor;
 42 | 	(*slab)->cursor += size;
 43 | 	memcpy(copy, string, (size_t)size);
 44 | 	return copy;
 45 | }
 46 | 
 47 | void resize(struct table *table, int capacity) {
 48 | 	if (capacity <= table->count)
 49 | 		capacity = table->count + 1;
 50 | 	
 51 | 	int pow2;
 52 | 	for (pow2 = 0; (1 << pow2) < capacity; ++pow2);
 53 | 	capacity = 1 << pow2;
 54 | 
 55 | 	int total_string_size = 0;
 56 | 	for (struct slab *slab = table->slab; slab; slab = slab->prev)
 57 | 		total_string_size += slab->cursor;
 58 | 
 59 | 	int first_slab_capacity = 1024;
 60 | 	while (first_slab_capacity < total_string_size)
 61 | 		first_slab_capacity *= 2;
 62 | 
 63 | 	void *new_memory = malloc(capacity * (sizeof table->keys[0] + sizeof table->vals[0]) + sizeof table->slab[0] + first_slab_capacity);
 64 | 	char **new_keys = new_memory;
 65 | 	char **new_vals = new_keys + capacity;
 66 | 	memset(new_keys, 0, (size_t)capacity * sizeof new_keys[0]);
 67 | 	struct slab *new_slab = (struct slab *)(new_vals + capacity);
 68 | 	new_slab->prev = NULL;
 69 | 	new_slab->capacity = first_slab_capacity;
 70 | 	new_slab->cursor = 0;
 71 | 
 72 | 	unsigned mask = capacity - 1;
 73 | 	for (int i = 0; i < table->capacity; ++i) {
 74 | 		if ((size_t)table->keys[i] > TOMBSTONE) {
 75 | 			char *key = copy_string(&new_slab, table->keys[i]);
 76 | 			char *val = copy_string(&new_slab, table->vals[i]);
 77 | 			unsigned long long hash = hash_string(key);
 78 | 			for (unsigned j = (unsigned)hash & mask;; j = (j + 1) & mask) {
 79 | 				if (!new_keys[j]) {
 80 | 					new_keys[j] = key;
 81 | 					new_vals[j] = val;
 82 | 					break;
 83 | 				}
 84 | 			}
 85 | 		}
 86 | 	}
 87 | 
 88 | 	for (struct slab *slab = table->slab; slab && slab->prev;) {
 89 | 		struct slab *prev = slab->prev;
 90 | 		free(slab);
 91 | 		slab = prev;
 92 | 	}
 93 | 	free(table->keys); // This also frees the values, metadata, and slab.
 94 | 	table->keys = new_keys;
 95 | 	table->vals = new_vals;
 96 | 	table->slab = new_slab;
 97 | 	table->capacity = capacity;
 98 | 	table->num_tombstones = 0;
 99 | }
100 | 
101 | void reserve(struct table *table, int min_capacity) {
102 | 	if (2 * table->capacity < 3 * min_capacity) {
103 | 		int new_capacity = 3 * min_capacity / 2;
104 | 		if (new_capacity < 64)
105 | 			new_capacity = 64;
106 | 		resize(table, new_capacity);
107 | 	}
108 | }
109 | 
110 | void add(struct table *table, const char *key, const char *val) {
111 | 	reserve(table, table->count + 1);
112 | 	unsigned long long hash = hash_string(key);
113 | 	unsigned mask = (unsigned)table->capacity - 1;
114 | 	unsigned index = (unsigned)-1;
115 | 	for (unsigned i = (unsigned)hash & mask;; i = (i + 1) & mask) {
116 | 		if (!table->keys[i]) {
117 | 			index = min(index, i);
118 | 			break;
119 | 		}
120 | 		if (table->keys[i] == (void *)TOMBSTONE)
121 | 			index = min(index, i);
122 | 		else if (strcmp(table->keys[i], key) == 0) {
123 | 			table->vals[i] = copy_string(&table->slab, val);
124 | 			return;
125 | 		}
126 | 	}
127 | 	table->count++;
128 | 	table->keys[index] = copy_string(&table->slab, key);
129 | 	table->vals[index] = copy_string(&table->slab, val);
130 | }
131 | 
132 | void remove(struct table *table, const char *key) {
133 | 	if (!table->count)
134 | 		return;
135 | 
136 | 	unsigned long long hash = hash_string(key);
137 | 	unsigned mask = (unsigned)table->capacity - 1;
138 | 	for (unsigned i = (unsigned)hash & mask; table->keys[i]; i = (i + 1) & mask) {
139 | 		if (table->keys[i] != (void *)TOMBSTONE && strcmp(table->keys[i], key) == 0) {
140 | 			table->keys[i] = (void *)TOMBSTONE;
141 | 			table->count--;
142 | 			table->num_tombstones++;
143 | 			if (8 * table->num_tombstones > table->capacity)
144 | 				resize(table, table->capacity); // Get rid of tombstones.
145 | 		}
146 | 	}
147 | }
148 | 
149 | const char *get(struct table table, const char *key) {
150 | 	if (!table.count)
151 | 		return NULL;
152 | 
153 | 	unsigned long long hash = hash_string(key);
154 | 	unsigned mask = (unsigned)table.capacity - 1;
155 | 	for (unsigned i = (unsigned)hash & mask; table.keys[i]; i = (i + 1) & mask)
156 | 		if (table.keys[i] != (void *)TOMBSTONE && strcmp(table.keys[i], key) == 0)
157 | 			return table.vals[i];
158 | 
159 | 	return NULL;
160 | }
161 | 
162 | int first_index(struct table table) {
163 | 	for (int i = 0; i < table.capacity; ++i)
164 | 		if ((size_t)table.keys[i] > TOMBSTONE)
165 | 			return i;
166 | 	return -1;
167 | }
168 | 
169 | int next_index(struct table table, int index) {
170 | 	for (int i = index + 1; i < table.capacity; ++i)
171 | 		if ((size_t)table.keys[i] > TOMBSTONE)
172 | 			return i;
173 | 	return -1;
174 | }
175 | 
176 | void destroy(struct table *table) {
177 | 	for (struct slab *slab = table->slab; slab && slab->prev;) {
178 | 		struct slab *prev = slab->prev;
179 | 		free(slab);
180 | 		slab = prev;
181 | 	}
182 | 	free(table->keys); // This also frees the values, metadata, and slab.
183 | 	memset(table, 0, sizeof table[0]);
184 | }
185 | 
186 | #include <assert.h>
187 | int main(void) {
188 | 	static char keys[1048576][9];
189 | 	static char vals[1048576][9];
190 | 	int n = sizeof keys / sizeof keys[0];
191 | 	for (int i = 0; i < n; ++i) {
192 | 		keys[i][0] = 'k';
193 | 		vals[i][0] = 'v';
194 | 		int x = i;
195 | 		for (int j = 0; j < 7; ++j) {
196 | 			keys[i][7 - j] = '0' + x % 10;
197 | 			vals[i][7 - j] = '0' + x % 10;
198 | 			x /= 10;
199 | 		}
200 | 		keys[i][8] = 0;
201 | 		vals[i][8] = 0;
202 | 	}
203 | 
204 | 	{
205 | 		struct table table = { 0 };
206 | 		assert(!get(table, ""));
207 | 		assert(first_index(table) < 0);
208 | 		destroy(&table);
209 | 	}
210 | 
211 | 	{
212 | 		struct table table = { 0 };
213 | 		add(&table, "Key0", "Val0");
214 | 		add(&table, "Key1", "Val1");
215 | 		add(&table, "Key2", "Val2");
216 | 		add(&table, "Key3", "Val3");
217 | 		assert(strcmp(get(table, "Key0"), "Val0") == 0);
218 | 		assert(strcmp(get(table, "Key1"), "Val1") == 0);
219 | 		assert(strcmp(get(table, "Key2"), "Val2") == 0);
220 | 		assert(strcmp(get(table, "Key3"), "Val3") == 0);
221 | 
222 | 		int remaining[4] = { 1, 1, 1, 1 };
223 | 		for (int i = first_index(table); i >= 0; i = next_index(table, i)) {
224 | 			char *val = table.vals[i];
225 | 			remaining[val[3] - '0']--;
226 | 		}
227 | 		assert(remaining[0] == 0 && remaining[1] == 0 && remaining[2] == 0 && remaining[3] == 0);
228 | 
229 | 		destroy(&table);
230 | 		assert(!table.capacity && !table.count && !table.keys && !table.vals && !table.slab);
231 | 	}
232 | 
233 | 	{
234 | 		struct table table = { 0 };
235 | 		for (int i = 0; i < n; ++i)
236 | 			add(&table, keys[i], vals[i]);
237 | 		assert(table.count == n);
238 | 
239 | 		static int remaining[sizeof keys / sizeof keys[0]];
240 | 		for (int i = 0; i < n; ++i)
241 | 			remaining[i] = 1;
242 | 		for (int i = first_index(table); i >= 0; i = next_index(table, i)) {
243 | 			char *key = table.keys[i];
244 | 			char *val = table.vals[i];
245 | 			assert(key[0] == 'k' && val[0] == 'v');
246 | 			++key;
247 | 			++val;
248 | 			assert(strcmp(key, val) == 0);
249 | 			int x = 0;
250 | 			for (int j = 0; j < 7; ++j) {
251 | 				x *= 10;
252 | 				x += key[j] - '0';
253 | 			}
254 | 			remaining[x] -= 1;
255 | 		}
256 | 		for (int i = 0; i < n; ++i)
257 | 			assert(!remaining[i]);
258 | 
259 | 		for (int i = 0; i < n / 2; ++i)
260 | 			remove(&table, keys[i]);
261 | 		assert(table.count == n / 2);
262 | 		for (int i = 0; i < n; ++i)
263 | 			remaining[i] = 1;
264 | 		for (int i = first_index(table); i >= 0; i = next_index(table, i)) {
265 | 			char *key = table.keys[i];
266 | 			char *val = table.vals[i];
267 | 			assert(key[0] == 'k' && val[0] == 'v');
268 | 			++key;
269 | 			++val;
270 | 			assert(strcmp(key, val) == 0);
271 | 			int x = 0;
272 | 			for (int j = 0; j < 7; ++j)
273 | 			{
274 | 				x *= 10;
275 | 				x += key[j] - '0';
276 | 			}
277 | 			remaining[x] -= 1;
278 | 		}
279 | 		for (int i = 0; i < n / 2; ++i)
280 | 			assert(remaining[i] == 1);
281 | 		for (int i = n / 2; i < n; ++i)
282 | 			assert(!remaining[i]);
283 | 
284 | 		for (int i = 0; i < n / 2; ++i)
285 | 			add(&table, keys[i], vals[i]);
286 | 		assert(table.count == n);
287 | 		for (int i = 0; i < n; ++i)
288 | 			remaining[i] = 1;
289 | 		for (int i = first_index(table); i >= 0; i = next_index(table, i)) {
290 | 			char *key = table.keys[i];
291 | 			char *val = table.vals[i];
292 | 			assert(key[0] == 'k' && val[0] == 'v');
293 | 			++key;
294 | 			++val;
295 | 			assert(strcmp(key, val) == 0);
296 | 			int x = 0;
297 | 			for (int j = 0; j < 7; ++j) {
298 | 				x *= 10;
299 | 				x += key[j] - '0';
300 | 			}
301 | 			remaining[x] -= 1;
302 | 		}
303 | 		for (int i = 0; i < n; ++i)
304 | 			assert(!remaining[i]);
305 | 
306 | 		destroy(&table);
307 | 	}
308 | 
309 | 	{
310 | 		// Potential pathological case: create a bunch of items and then delete them 
311 | 		// to leave tombstones, then lookup each item. If we don't clean tombstones this is O(n^2).
312 | 		struct table table = { 0 };
313 | 		for (int i = 0; i < n - 1; ++i)
314 | 			add(&table, keys[i], vals[i]);
315 | 		//resize(&table, table.count + 1);
316 | 		for (int i = 1; i < n - 1; ++i)
317 | 			remove(&table, keys[i]);
318 | 		assert(table.count == 1);
319 | 		for (int i = 1; i < n - 1; ++i)
320 | 			assert(!get(table, keys[i]));
321 | 		destroy(&table);
322 | 	}
323 | 
324 | 	{
325 | 		// This shouldn't leak.
326 | 		for (int i = 0; i < 10000; ++i) {
327 | 			struct table table = { 0 };
328 | 			for (int j = 0; j < 10000; ++j) {
329 | 				char keyval[5] = { 0 };
330 | 				int x = j;
331 | 				keyval[3] = x % 10; x /= 10;
332 | 				keyval[2] = x % 10; x /= 10;
333 | 				keyval[1] = x % 10; x /= 10;
334 | 				keyval[0] = x % 10; x /= 10;
335 | 				add(&table, keyval, keyval);
336 | 			}
337 | 			destroy(&table);
338 | 		}
339 | 	}
340 | }


--------------------------------------------------------------------------------
/strtod.c:
--------------------------------------------------------------------------------
  1 | // 180 line strtod replacement.
  2 | // - no dependencies at all.
  3 | // - supports fixed-point, scientific, and hex-float notation.
  4 | // - hex-floats can round-trip.
  5 | // - fixed-point and scientific are accurate to ~15 decimal places.
  6 | // - largest issue is that DBL_MAX parses as INFINITY.
  7 | 
  8 | #include <math.h> // Only for INFINITY && NAN.
  9 | #include <stdbool.h>
 10 | 
 11 | static double bb_strtod(const char* str, char** end) {
 12 | 	if (end) *end = (char*)str;
 13 | 
 14 | 	// Skip leading whitespace.
 15 | 	while (*str == ' ' || (*str >= '\t' && *str <= '\r'))
 16 | 		str++;
 17 | 
 18 | 	// Parse optional sign.
 19 | 	bool negative = *str == '-';
 20 | 	str += *str == '-' || *str == '+';
 21 | 
 22 | 	// Determine if this is a NaN, infinity, or normal number.
 23 | 	double result = 0;
 24 | 	if ((str[0] == 'n' || str[0] == 'N') && (str[1] == 'a' || str[1] == 'A') && (str[2] == 'n' || str[2] == 'N')) {
 25 | 		str += 3;
 26 | 		if (*str == '(') {
 27 | 			// Parse optional NaN character sequence.
 28 | 			const char* backup = str++;
 29 | 			while ((*str >= '0' && *str <= '9') || (*str >= 'A' && *str <= 'Z') || (*str >= 'a' && *str <= 'z') || *str == '_')
 30 | 				str++;
 31 | 			if (*str == ')')
 32 | 				str++;
 33 | 			else
 34 | 				str = backup;
 35 | 		}
 36 | 		result = NAN;
 37 | 	}
 38 | 	else if ((str[0] == 'i' || str[0] == 'I') && (str[1] == 'n' || str[1] == 'N') && (str[2] == 'f' || str[2] == 'F')) {
 39 | 		if ((str[3] == 'i' || str[3] == 'I') &&
 40 | 			(str[4] == 'n' || str[4] == 'N') &&
 41 | 			(str[5] == 'i' || str[5] == 'I') &&
 42 | 			(str[6] == 't' || str[6] == 'T') &&
 43 | 			(str[7] == 'y' || str[7] == 'Y')) {
 44 | 			str += 8;
 45 | 		}
 46 | 		else str += 3;
 47 | 		result = INFINITY;
 48 | 	} else {
 49 | 		// This is a normal float, not a NaN or infinity.
 50 | 		// Parse the base. We support decimal and hex floats.
 51 | 		unsigned base;
 52 | 		int max_digits;
 53 | 		char exponent_separator;
 54 | 		if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) {
 55 | 			base = 16;
 56 | 			max_digits = 14;
 57 | 			exponent_separator = 'p';
 58 | 			str += 2;
 59 | 		} else {
 60 | 			base = 10;
 61 | 			max_digits = 19;
 62 | 			exponent_separator = 'e';
 63 | 		}
 64 | 
 65 | 		// Check if there's at least 1 digit.
 66 | 		if ((unsigned)*str - '0' >= base)
 67 | 			return 0;
 68 | 
 69 | 		// Skip leading zeros.
 70 | 		while (*str == '0') str++;
 71 | 		bool dot = *str == '.';
 72 | 		str += dot;
 73 | 		int num_leading_zeros_after_dot = 0;
 74 | 		while (*str == '0') {
 75 | 			str++;
 76 | 			num_leading_zeros_after_dot++;
 77 | 		}
 78 | 
 79 | 		// Parse digits before exponent.
 80 | 		unsigned long long digits = 0;
 81 | 		int num_digits = 0;
 82 | 		int num_digits_after_dot = 0;
 83 | 		int num_truncated_digits_before_dot = 0;
 84 | 		for (;;) {
 85 | 			if (*str == '.') {
 86 | 				if (dot) break; // Second dot.
 87 | 				dot = true;
 88 | 				str++;
 89 | 			} else {
 90 | 				unsigned digit = *str;
 91 | 				if (digit >= '0' && digit <= '9') digit -= '0';
 92 | 				else if (base == 16 && digit >= 'A' && digit <= 'F') digit = digit - 'A' + 10;
 93 | 				else if (base == 16 && digit >= 'a' && digit <= 'f') digit = digit - 'a' + 10;
 94 | 				else break;
 95 | 				if (num_digits < max_digits) {
 96 | 					digits = digits * base + digit;
 97 | 					num_digits++;
 98 | 					num_digits_after_dot += dot;
 99 | 				} else num_truncated_digits_before_dot += !dot;
100 | 				str++;
101 | 			}
102 | 		}
103 | 
104 | 		// Parse optional exponent.
105 | 		int exponent = 0;
106 | 		if (*str == exponent_separator || *str == exponent_separator - 'a' + 'A') {
107 | 			// Backup in case exponent parsing fails.
108 | 			const char* backup = str++;
109 | 
110 | 			// Parse optional exponent sign.
111 | 			bool negative_exp = *str == '-';
112 | 			str += *str == '-' || *str == '+';
113 | 
114 | 			// Check if we actually have a valid exponent.
115 | 			if (*str >= '0' && *str <= '9') {
116 | 				// Parse the exponent.
117 | 				do {
118 | 					exponent = exponent * 10 + (*str++ - '0');
119 | 					if (exponent > 9999) exponent = 9999; // Prevent overflow.
120 | 				} while (*str >= '0' && *str <= '9');
121 | 				if (negative_exp) exponent = -exponent;
122 | 			} else str = backup;
123 | 		}
124 | 
125 | 		// Now assemble the result!
126 | 		if (digits != 0) {
127 | 			if (base == 16) {
128 | 				// Move dot after the first digit.
129 | 				int shift = (num_digits + num_truncated_digits_before_dot) - num_digits_after_dot - 1;
130 | 				if (num_leading_zeros_after_dot > 0)
131 | 					shift -= num_leading_zeros_after_dot;
132 | 				exponent += shift * 4;
133 | 
134 | 				// Move first hex digit before floating point. The exponent was already adjusted for this.
135 | 				while (!(digits & 0xF0000000000000))
136 | 					digits <<= 4;
137 | 
138 | 				// Truncate to 53 bit double mantissa.
139 | 				while (digits & 0xE0000000000000) {
140 | 					digits >>= 1;
141 | 					exponent++;
142 | 				}
143 | 
144 | 				// Produce denormal floats.
145 | 				while (exponent < -1023 && digits) {
146 | 					digits >>= 1;
147 | 					exponent++;
148 | 				}
149 | 
150 | 				// Check for overflow to infinity or underflow to denormal.
151 | 				if (exponent > 1023) {
152 | 					exponent = 1024;
153 | 					digits = 0;
154 | 				}
155 | 				if (exponent < -1023) exponent = -1023;
156 | 
157 | 				// Assemble the float.
158 | 				unsigned long long exp = (unsigned long long)(exponent + 1023);
159 | 				union { unsigned long long u; double f; } fu = { (exp << 52) | (digits & 0xFFFFFFFFFFFFF) };
160 | 				return fu.f;
161 | 			} else {
162 | 				// Adjust exponent to account for leading zeros and truncated digits.
163 | 				exponent += num_truncated_digits_before_dot;
164 | 				exponent -= num_leading_zeros_after_dot;
165 | 
166 | 				// Right shift digits to correct decimal place.
167 | 				unsigned long long shift = 1;
168 | 				for (int i = 0; i < num_digits_after_dot; i++)
169 | 					shift *= 10;
170 | 				result = (double)digits / (double)shift;
171 | 
172 | 				if (exponent) {
173 | 					// Compute 10^abs(exponent) using binary exponentiation.
174 | 					int exp = exponent;
175 | 					if (exp < 0) exp = -exp;
176 | 					double scale = 1;
177 | 					static const double BINARY_POWERS_OF_10[9] = { 1e256, 1e128, 1e64, 1e32, 1e16, 1e8, 1e4, 1e2, 1e1 };
178 | 					for (int i = 0, decrement = 256; i < 9; i++, decrement >>= 1) {
179 | 						if (exp >= decrement) {
180 | 							exp -= decrement;
181 | 							scale *= BINARY_POWERS_OF_10[i];
182 | 						}
183 | 					}
184 | 
185 | 					// Scale by the exponent.
186 | 					if (exponent >= 0)
187 | 						result *= scale;
188 | 					else
189 | 						result /= scale;
190 | 				}
191 | 			}
192 | 		}
193 | 	}
194 | 
195 | 	if (end) *end = (char*)str;
196 | 	return negative ? -result : +result;
197 | }
198 | 
199 | // === testing ===
200 | 
201 | #include <stdio.h>
202 | 
203 | int main(void) {
204 | 	// fixed point
205 | 	printf("%f\n", bb_strtod("123", NULL));
206 | 	printf("%f\n", bb_strtod("+123", NULL));
207 | 	printf("%f\n", bb_strtod("-123", NULL));
208 | 	printf("%f\n", bb_strtod("123.456", NULL));
209 | 	printf("%f\n", bb_strtod("0", NULL));
210 | 	printf("%.20f\n", bb_strtod("0.1234567890", NULL));
211 | 	printf("%.20f\n", bb_strtod("1234567890.0", NULL));
212 | 	printf("%.20f\n", bb_strtod("1234567890.1234567890", NULL));
213 | 	printf("%.20f\n", bb_strtod("999999999999999999999999999999999.0", NULL));
214 | 	printf("%.20f\n", bb_strtod("0.999999999999999999999999999999999", NULL));
215 | 	printf("%e\n", bb_strtod("100000000000000", NULL));
216 | 	printf("%e\n", bb_strtod("10000000000000", NULL));
217 | 	printf("%e\n", bb_strtod("1000000000000", NULL));
218 | 	printf("%e\n", bb_strtod("100000000000", NULL));
219 | 	printf("%e\n", bb_strtod("10000000000", NULL));
220 | 	printf("%e\n", bb_strtod("1000000000", NULL));
221 | 	printf("%e\n", bb_strtod("100000000", NULL));
222 | 	printf("%e\n", bb_strtod("10000000", NULL));
223 | 	printf("%e\n", bb_strtod("1000000", NULL));
224 | 	printf("%e\n", bb_strtod("100000", NULL));
225 | 	printf("%e\n", bb_strtod("10000", NULL));
226 | 	printf("%e\n", bb_strtod("1000", NULL));
227 | 	printf("%e\n", bb_strtod("100", NULL));
228 | 	printf("%e\n", bb_strtod("10", NULL));
229 | 	printf("%e\n", bb_strtod("1", NULL));
230 | 	printf("%e\n", bb_strtod("0.1", NULL));
231 | 	printf("%e\n", bb_strtod("0.01", NULL));
232 | 	printf("%e\n", bb_strtod("0.001", NULL));
233 | 	printf("%e\n", bb_strtod("0.0001", NULL));
234 | 	printf("%e\n", bb_strtod("0.00001", NULL));
235 | 	printf("%e\n", bb_strtod("0.000001", NULL));
236 | 	printf("%e\n", bb_strtod("0.0000001", NULL));
237 | 	printf("%e\n", bb_strtod("0.00000001", NULL));
238 | 	printf("%e\n", bb_strtod("0.000000001", NULL));
239 | 	printf("%e\n", bb_strtod("0.0000000001", NULL));
240 | 	printf("%e\n", bb_strtod("0.00000000001", NULL));
241 | 	printf("%e\n", bb_strtod("0.000000000001", NULL));
242 | 	printf("%e\n", bb_strtod("0.0000000000001", NULL));
243 | 	printf("%e\n", bb_strtod("0.00000000000001", NULL));
244 | 	printf("%e\n", bb_strtod("0.000000000000001", NULL));
245 | 	printf("%e\n", bb_strtod("0.0000000000000001", NULL));
246 | 	printf("%e\n", bb_strtod("0.00000000000000001", NULL));
247 | 	printf("%e\n", bb_strtod("0.000000000000000001", NULL));
248 | 	printf("%e\n", bb_strtod("0.0000000000000000001", NULL));
249 | 	printf("%e\n", bb_strtod("0.00000000000000000001", NULL));
250 | 	printf("%e\n", bb_strtod("0.000000000000000000001", NULL));
251 | 	printf("%e\n", bb_strtod("0.0000000000000000000001", NULL));
252 | 	printf("%e\n", bb_strtod("0.00000000000000000000001", NULL));
253 | 	printf("%e\n", bb_strtod("0.000000000000000000000001", NULL));
254 | 	printf("%e\n", bb_strtod("0.0000000000000000000000001", NULL));
255 | 	printf("%e\n", bb_strtod("0.00000000000000000000000001", NULL));
256 | 	printf("%e\n", bb_strtod("0.000000000000000000000000001", NULL));
257 | 	printf("%e\n", bb_strtod("0.0000000000000000000000000001", NULL));
258 | 	printf("%e\n", bb_strtod("0.00000000000000000000000000001", NULL));
259 | 	printf("%e\n", bb_strtod("0.000000000000000000000000000001", NULL));
260 | 	printf("%e\n", bb_strtod("0.0000000000000000000000000000001", NULL));
261 | 	printf("%f\n", bb_strtod("01", NULL));
262 | 	printf("%f\n", bb_strtod("001", NULL));
263 | 	printf("%f\n", bb_strtod("0001", NULL));
264 | 	printf("%f\n", bb_strtod("00001", NULL));
265 | 	printf("%f\n", bb_strtod("000001", NULL));
266 | 	printf("%f\n", bb_strtod("000001.000", NULL));
267 | 	printf("%f\n", bb_strtod("00001.000", NULL));
268 | 	printf("%f\n", bb_strtod("0001.000", NULL));
269 | 	printf("%f\n", bb_strtod("001.000", NULL));
270 | 	printf("%f\n", bb_strtod("01.000", NULL));
271 | 	printf("%f\n", bb_strtod("000.100", NULL));
272 | 	printf("%f\n", bb_strtod("000.010", NULL));
273 | 	printf("%f\n", bb_strtod("000.001", NULL));
274 | 	printf("%f\n", bb_strtod("000.101", NULL));
275 | 
276 | 	// edge cases
277 | 	printf("%f\n", bb_strtod("-0", NULL));
278 | 	printf("%f\n", bb_strtod("nan", NULL));
279 | 	printf("%f\n", bb_strtod("-NAN", NULL));
280 | 	printf("%f\n", bb_strtod("inf", NULL));
281 | 	printf("%f\n", bb_strtod("-INF", NULL));
282 | 	printf("%f\n", bb_strtod("infinity", NULL));
283 | 	printf("%f\n", bb_strtod("-INFINITY", NULL));
284 | 	printf("%f\n", bb_strtod("-INFINITY", NULL));
285 | 	printf("%e\n", bb_strtod("1.7976931348623157e+308", NULL)); // Unfortunately DBL_MAX parses as INFINITY.
286 | 
287 | 	// scientific notation
288 | 	printf("%e\n", bb_strtod("1e0", NULL));
289 | 	printf("%e\n", bb_strtod("1e1", NULL));
290 | 	printf("%e\n", bb_strtod("1e+1", NULL));
291 | 	printf("%e\n", bb_strtod("1e-1", NULL));
292 | 	printf("%e\n", bb_strtod("1.23e+45", NULL));
293 | 	printf("%e\n", bb_strtod("0e0", NULL));
294 | 	printf("%e\n", bb_strtod("1.234567e300", NULL));
295 | 	printf("%e\n", bb_strtod("1.234567e-300", NULL));
296 | 	printf("%e\n", bb_strtod("1e999", NULL));
297 | 	printf("%e\n", bb_strtod("-1e999", NULL));
298 | 	printf("%e\n", bb_strtod("1e-999", NULL));
299 | 	printf("%e\n", bb_strtod("1.797693e+308", NULL));
300 | 	printf("%e\n", bb_strtod("2.225073e-308", NULL));
301 | 	printf("%e\n", bb_strtod("1e-309", NULL));
302 | 
303 | 	// hexfloat
304 | 	printf("%a\n", bb_strtod("0x1.FFFFFFFFFFFFFp+1023", NULL));
305 | 	printf("%a\n", bb_strtod("0x2.0000000000000p+1023", NULL));
306 | 	printf("%a\n", bb_strtod("0x1.FFFFFFFFFFFFFFp+1023", NULL));
307 | 	printf("%a\n", bb_strtod("0x1.0000000000000p+1024", NULL));
308 | 	printf("%a\n", bb_strtod("0x1.0000000000000p-1022", NULL));
309 | 	printf("%a\n", bb_strtod("0x0.0000000000001p-1023", NULL));
310 | 	printf("%a\n", bb_strtod("0x0.DE00000000000p-1023", NULL));
311 | 	printf("%a\n", bb_strtod("0x0.000DE00000000p-1023", NULL));
312 | 	printf("%a\n", bb_strtod("0x1.0000000000000p-1075", NULL));
313 | 	printf("%a\n", bb_strtod("0x10.0000000000000p-1079", NULL));
314 | 	printf("%a\n", bb_strtod("0x0.0000000000001p-1024", NULL));
315 | 	printf("%a\n", bb_strtod("0x0.00000000000001p-1023", NULL));
316 | 	printf("%a\n", bb_strtod("0x0.00000000000000001p-1023", NULL));
317 | 	printf("%a\n", bb_strtod("0x1FFFFFFFFFFFFF.0p+971", NULL));
318 | 	printf("%a\n", bb_strtod("0x1FFFFFFFFFFFFF0.0p+967", NULL));
319 | 	printf("%a\n", bb_strtod("0x1FFFFFFFFFFFFF00.0p+963", NULL));
320 | 	printf("%a\n", bb_strtod("0x1FFFFFFFFFFFFF000.0p+959", NULL));
321 | 	printf("%a\n", bb_strtod("0x1FFFFFFFFFFFFF0000.0p+955", NULL));
322 | 	printf("%a\n", bb_strtod("0x123.456p+78", NULL));
323 | }
324 | 
325 | 


--------------------------------------------------------------------------------
/tlsf_allocator.c:
--------------------------------------------------------------------------------
  1 | // O(1) allocation and deallocation
  2 | // 1/32 memory wasted on average, good-fit
  3 | // 4 byte header
  4 | // 32/16 byte min allocation on 64/32-bit
  5 | // can be expanded at runtime
  6 | 
  7 | #include <stdint.h> // intptr_t
  8 | #include <string.h> // memcpy
  9 | #include <assert.h>
 10 | 
 11 | #define ALIGNMENT 4 // only 4, 8, or 16 allowed
 12 | #define FREE_BIT (1 << 0)
 13 | #define PREV_FREE_BIT (1 << 1)
 14 | #define SIZE_MASK (~(FREE_BIT | PREV_FREE_BIT))
 15 | 
 16 | struct node {
 17 | 	struct node *prevnode; // this is actually at the end of the *previous* node's block, only valid if previous node is free
 18 | 	int size; // includes size of node, last 2 bits of the are used as bitfields: FREE_BIT | PREV_FREE_BIT
 19 | 	struct node *next; // only valid if node is free
 20 | 	struct node *prev; // only valid if node is free
 21 | };
 22 | 
 23 | struct heap {
 24 | 	int listmap;
 25 | 	int slotmaps[32];
 26 | 	struct node freelists[32][4];
 27 | };
 28 | 
 29 | void *node2block(struct node *n) {
 30 | 	return (char *)n + sizeof(struct node *) + ALIGNMENT;
 31 | }
 32 | struct node *block2node(void* block) {
 33 | 	return (struct node *)((char *)block - (sizeof(struct node *) + ALIGNMENT));
 34 | }
 35 | struct node *nextnode(struct node *n) {
 36 | 	return (struct node *)((char *)n + (n->size & SIZE_MASK));
 37 | }
 38 | 
 39 | int findfirstset(int x) {
 40 | 	// _BitScanForward(&i, x) on msvc, __builtin_ffs(x) - 1 on gcc/clang
 41 | 	for (int i = 0; i < 32; ++i)
 42 | 		if (x & (1 << i))
 43 | 			return i;
 44 | 	return -1;
 45 | }
 46 | int floorlog2(int x) {
 47 | 	// _BitScanReverse(&i, x) on msvc, __builtin_fls(x) - 1 on gcc/clang
 48 | 	for (int i = 31; i >= 0; --i)
 49 | 		if (x & (1 << i))
 50 | 			return i;
 51 | 	return -1;
 52 | }
 53 | 
 54 | void findslot(int size, int *listid, int *slotid) {
 55 | 	int log2 = floorlog2(size);
 56 | 	int pow2 = 1 << log2;
 57 | 	int left = size - pow2;
 58 | 	(*listid) = log2;
 59 | 	(*slotid) = left >> (log2 - 2); // (4 * left) / pow2
 60 | }
 61 | void add(struct heap *heap, struct node *node, int size) {
 62 | 	// mark the node as free
 63 | 	assert((size & SIZE_MASK) > 0);
 64 | 	node->size = size | FREE_BIT;
 65 | 
 66 | 	// write the footer
 67 | 	struct node *next = nextnode(node);
 68 | 	next->prevnode = node;
 69 | 	next->size |= PREV_FREE_BIT;
 70 | 
 71 | 	// find where the node goes
 72 | 	int listid, slotid;
 73 | 	findslot(size, &listid, &slotid);
 74 | 	struct node *list = &heap->freelists[listid][slotid];
 75 | 
 76 | 	// add the node to the list
 77 | 	node->next = list->next;
 78 | 	node->prev = list;
 79 | 	list->next->prev = node;
 80 | 	list->next = node;
 81 | 
 82 | 	// mark the list and slot as full
 83 | 	heap->listmap |= (1 << listid);
 84 | 	heap->slotmaps[listid] |= (1 << slotid);
 85 | }
 86 | void remove(struct heap *heap, struct node *node) {
 87 | 	// find where the node goes
 88 | 	int listid, slotid;
 89 | 	findslot(node->size, &listid, &slotid);
 90 | 	struct node *list = &heap->freelists[listid][slotid];
 91 | 	int *slotmap = &heap->slotmaps[listid];
 92 | 
 93 | 	// remove the node from the freelist
 94 | 	assert(node->size & FREE_BIT);
 95 | 	node->size &= ~FREE_BIT;
 96 | 	node->prev->next = node->next;
 97 | 	node->next->prev = node->prev;
 98 | 
 99 | 	// if the slot becomes empty, clear it's bitmap bit
100 | 	if (list->next == list)
101 | 		(*slotmap) &= ~(1 << slotid);
102 | 
103 | 	// and if the list becomes empty, clear it's bitmap bit too
104 | 	if (!(*slotmap))
105 | 		heap->listmap &= ~(1 << listid);
106 | 
107 | 	struct node *next = nextnode(node);
108 | 	assert(next->size & PREV_FREE_BIT);
109 | 	next->size &= ~PREV_FREE_BIT;
110 | }
111 | 
112 | void grow(struct heap *heap, void *memory, int size) {
113 | 	assert(size > sizeof(struct node));
114 | 	assert(size % sizeof(struct node) == 0);
115 | 
116 | 	// carve out a sentinel node with just the size flags at the end
117 | 	struct node *sentinel = block2node((char *)memory + size);
118 | 	sentinel->size = 0;
119 | 
120 | 	// add the root node to the list
121 | 	void *p = (char *)memory - sizeof(struct node *);
122 | 	struct node *root = p;
123 | 	add(heap, root, size - ALIGNMENT);
124 | }
125 | void initialize(struct heap *heap) {
126 | 	memset(heap, 0, sizeof(struct heap));
127 | 
128 | 	// clear freelists
129 | 	for (int i = 0; i < 32; ++i) {
130 | 		for (int j = 0; j < 4; ++j) {
131 | 			struct node *list = &heap->freelists[i][j];
132 | 			list->next = list;
133 | 			list->prev = list;
134 | 		}
135 | 	}
136 | }
137 | void *allocate(struct heap *heap, int size) {
138 | 	assert(size >= 0); // you could clamp to 0, or return NULL
139 | 
140 | 	// need extra space for size and to align allocation
141 | 	int needed = size + ALIGNMENT;
142 | 	if (needed < sizeof(struct node))
143 | 		needed = sizeof(struct node);
144 | 
145 | 	// align up
146 | 	needed = (needed + ALIGNMENT - 1) & ~(ALIGNMENT - 1);
147 | 
148 | 	// first check the exact size range for the needed amount
149 | 	// special findslot that rounds up instead of down
150 | 	int log2 = floorlog2(needed);
151 | 	int pow2 = 1 << log2;
152 | 	int left = needed - pow2;
153 | 	int listid = log2;
154 | 	int slotid = left >> (log2 - 2); // (4 * left / pow2)
155 | 	if (left) {
156 | 		++slotid;
157 | 		if (slotid == 4) {
158 | 			slotid = 0;
159 | 			++listid;
160 | 		}
161 | 	}
162 | 
163 | 	int slotmask = ~((1 << slotid) - 1);
164 | 	if (!(heap->slotmaps[listid] & slotmask)) {
165 | 		// the best fitting size range is empty so don't consider it
166 | 		++listid;
167 | 		slotmask = 0xFFFFFFFF;
168 | 	}
169 | 
170 | 	// find first free node big enough to hold the allocation
171 | 	int listmask = ~((1 << listid) - 1);
172 | 	int listmap = heap->listmap & listmask;
173 | 	listid = findfirstset(listmap);
174 | 	if (listid < 0)
175 | 		return 0; // out of memory
176 | 
177 | 	int slotmap = heap->slotmaps[listid] & slotmask;
178 | 	slotid = findfirstset(slotmap);
179 | 
180 | 	// remove the node from the freelist
181 | 	struct node *list = &heap->freelists[listid][slotid];
182 | 	struct node *node = list->next;
183 | 	assert(node->size >= needed);
184 | 	remove(heap, node);
185 | 
186 | 	// trim the excess off
187 | 	int excess = node->size - needed;
188 | 	if (excess >= sizeof(struct node)) {
189 | 		node->size -= excess;
190 | 		struct node *leftover = nextnode(node);
191 | 		add(heap, leftover, excess);
192 | 	}
193 | 
194 | 	return node2block(node);
195 | }
196 | void deallocate(struct heap *heap, void *block) {
197 | 	if (!block)
198 | 		return;
199 | 
200 | 	struct node *node = block2node(block);
201 | 	assert(!(node->size & FREE_BIT)); // double free
202 | 
203 | 	// merge with previous free node
204 | 	if (node->size & PREV_FREE_BIT) {
205 | 		struct node *prev = node->prevnode;
206 | 		assert(prev->size & FREE_BIT); // we think it's free but it disagrees
207 | 		assert(!(prev->size & PREV_FREE_BIT)); // there shouldn't be 2 consecutive free nodes
208 | 		remove(heap, prev);
209 | 		prev->size += (node->size & SIZE_MASK);
210 | 		node = prev;
211 | 	}
212 | 
213 | 	// merge with next free node
214 | 	struct node *next = nextnode(node);
215 | 	if (next->size & FREE_BIT) {
216 | 		assert(!(next->size & PREV_FREE_BIT)); // next node thinks we're free but we aren't
217 | 		remove(heap, next);
218 | 		node->size += next->size;
219 | 		next = nextnode(node);
220 | 		assert(!(next->size & FREE_BIT)); // there shouldn't be 2 consecutive free nodes
221 | 	}
222 | 
223 | 	// mark on the next node that we are free
224 | 	assert(!(next->size & PREV_FREE_BIT)); // corruption
225 | 	next->size |= PREV_FREE_BIT;
226 | 
227 | 	add(heap, node, node->size);
228 | }
229 | void *reallocate(struct heap *heap, void *block, int size) {
230 | 	// you could clamp to 0, or return NULL
231 | 	assert(size >= 0);
232 | 
233 | 	if (!block)
234 | 		return allocate(heap, size);
235 | 	if (!size) {
236 | 		deallocate(heap, block);
237 | 		return 0;
238 | 	}
239 | 
240 | 	struct node *node = block2node(block);
241 | 	assert(!(node->size & FREE_BIT)); // use after free
242 | 
243 | 	// need extra space for size and to align allocation
244 | 	int needed = size + ALIGNMENT;
245 | 	if (needed < sizeof(struct node))
246 | 		needed = sizeof(struct node);
247 | 
248 | 	// align up
249 | 	needed = (needed + ALIGNMENT - 1) & ~(ALIGNMENT - 1);
250 | 
251 | 	if (needed > (node->size & SIZE_MASK)) {
252 | 		// we need to grow, try expanding into the next block if it's free
253 | 		struct node *next = nextnode(node);
254 | 		assert(!(next->size & PREV_FREE_BIT)); // mistake, this node is not really free
255 | 
256 | 		if (!(next->size & FREE_BIT) || (node->size & SIZE_MASK) + (next->size & SIZE_MASK) < needed) {
257 | 			// bad luck, we can't grow in-place
258 | 			void *copy = allocate(heap, size);
259 | 			if (!copy)
260 | 				return 0; // out of memory
261 | 			memcpy(copy, block, (size_t)(node->size & SIZE_MASK) - ALIGNMENT);
262 | 			deallocate(heap, block);
263 | 			return copy;
264 | 		}
265 | 
266 | 		// good luck! we can grow in place
267 | 		remove(heap, next);
268 | 		node->size += next->size;
269 | 	}
270 | 
271 | 	// trim off any excess
272 | 	int excess = (node->size & SIZE_MASK) - needed;
273 | 	if (excess >= sizeof(struct node)) {
274 | 		node->size -= excess;
275 | 		struct node *left = nextnode(node);
276 | 		left->size = excess;
277 | 		// merge with next free node
278 | 		struct node *next = nextnode(left);
279 | 		if (next->size & FREE_BIT) {
280 | 			remove(heap, next);
281 | 			left->size += (next->size & SIZE_MASK);
282 | 		}
283 | 		add(heap, left, left->size);
284 | 	}
285 | 
286 | 	return block;
287 | }
288 | 
289 | void verify(struct heap *heap) {
290 | 	// if a slotmap isn't empty the corresponding listmap bit should be set
291 | 	for (int i = 0; i < 32; ++i) {
292 | 		int slotmap = heap->slotmaps[i] != 0;
293 | 		int listmap = (heap->listmap & (1 << i)) != 0;
294 | 		assert(slotmap == listmap);
295 | 	}
296 | 
297 | 	// the bitmaps should correspond to which freelists are empty
298 | 	for (int i = 0; i < 32; ++i) {
299 | 		int slotmap = heap->slotmaps[i];
300 | 		for (int j = 0; j < 4; ++j) {
301 | 			struct node *list = &heap->freelists[i][j];
302 | 			if (slotmap & (1 << j)) {
303 | 				assert(list->next != list);
304 | 				assert(list->prev != list);
305 | 			}
306 | 		}
307 | 	}
308 | 
309 | 	for (int i = 0; i < 32; ++i) {
310 | 		for (int j = 0; j < 4; ++j) {
311 | 			struct node *list = &heap->freelists[i][j];
312 | 			for (struct node *node = list->next; node != list; node = node->next) {
313 | 				// every node in the freelist should be free
314 | 				assert(node->size & FREE_BIT);
315 | 
316 | 				// free nodes cannot be empty
317 | 				assert(node->size & SIZE_MASK);
318 | 
319 | 				// the next node needs to know if we're free
320 | 				struct node *next = nextnode(node);
321 | 				assert(next->size & PREV_FREE_BIT);
322 | 
323 | 				// there should never be 2 consecutive free nodes - they should be combined
324 | 				assert(!(node->size & PREV_FREE_BIT));
325 | 				assert(!(next->size & FREE_BIT));
326 | 
327 | 				// the node should be properly aligned.
328 | 				uintptr_t block = (uintptr_t)node2block(node);
329 | 				uintptr_t nextblock = (uintptr_t)node2block(next);
330 | 				assert(block % ALIGNMENT == 0);
331 | 				assert(nextblock % ALIGNMENT == 0);
332 | 			}
333 | 		}
334 | 	}
335 | }
336 | int equal(char *bytes, char value, int count) {
337 | 	assert(bytes);
338 | 	for (int i = 0; i < count; ++i)
339 | 		if (bytes[i] != value)
340 | 			return 0;
341 | 	return 1;
342 | }
343 | 
344 | int main(void) {
345 | 	struct heap heap;
346 | 	initialize(&heap);
347 | 
348 | 	static char memory[1024];
349 | 	grow(&heap, memory, sizeof memory);
350 | 
351 | 	char *a = allocate(&heap, 256); verify(&heap); memset(a, 1, 256);
352 | 	char *b = allocate(&heap, 256); verify(&heap); memset(b, 2, 256);
353 | 	assert(equal(a, 1, 256));
354 | 	deallocate(&heap, a); verify(&heap);
355 | 	char *c = allocate(&heap, 256); verify(&heap); memset(c, 3, 256);
356 | 	deallocate(&heap, c); verify(&heap);
357 | 	assert(equal(b, 2, 256));
358 | 	deallocate(&heap, b); verify(&heap);
359 | 	
360 | 	char *d = allocate(&heap, 0); verify(&heap); memset(d, 4, 0);
361 | 	char *e = allocate(&heap, 1); verify(&heap); memset(e, 5, 1);
362 | 	char *f = allocate(&heap, 2); verify(&heap); memset(f, 6, 2);
363 | 	char *g = allocate(&heap, 3); verify(&heap); memset(g, 7, 3);
364 | 	char *h = allocate(&heap, 4); verify(&heap); memset(h, 8, 4);
365 | 	char *i = allocate(&heap, 5); verify(&heap); memset(i, 9, 5);
366 | 	char *j = allocate(&heap, 23); verify(&heap); memset(j, 10, 23);
367 | 	i = reallocate(&heap, i, 100); verify(&heap); memset(i, 11, 100);
368 | 	d = reallocate(&heap, d, 256); verify(&heap); memset(d, 12, 256);
369 | 	i = reallocate(&heap, i, 5); verify(&heap); memset(i, 13, 5);
370 | 	assert(equal(d, 12, 256));
371 | 	assert(equal(e, 5, 1));
372 | 	assert(equal(f, 6, 2));
373 | 	assert(equal(g, 7, 3));
374 | 	assert(equal(h, 8, 4));
375 | 	assert(equal(i, 13, 5));
376 | 	assert(equal(j, 10, 23));
377 | 	
378 | 	deallocate(&heap, d); verify(&heap);
379 | 	deallocate(&heap, i); verify(&heap);
380 | 	deallocate(&heap, e); verify(&heap);
381 | 	deallocate(&heap, h); verify(&heap);
382 | 	deallocate(&heap, f); verify(&heap);
383 | 	deallocate(&heap, g); verify(&heap);
384 | 	deallocate(&heap, j); verify(&heap);
385 | 
386 | 	// stress tests
387 | 
388 | 	int maxsize = 500;
389 | 	char *x = NULL;
390 | 
391 | 	// one up
392 | 	for (int size = 0; size < maxsize; ++size) {
393 | 		x = reallocate(&heap, x, size); verify(&heap);
394 | 		assert(size == 0 || equal(x, size - 1, size - 1));
395 | 		memset(x, size, size);
396 | 		verify(&heap);
397 | 	}
398 | 	x = reallocate(&heap, x, 0);
399 | 	verify(&heap);
400 | 
401 | 	// one down
402 | 	for (int size = 0; size < maxsize; ++size) {
403 | 		int ezis = maxsize - size;
404 | 		x = reallocate(&heap, x, ezis); verify(&heap);
405 | 		assert(size == 0 || equal(x, size - 1, ezis));
406 | 		memset(x, size, ezis);
407 | 		verify(&heap);
408 | 	}
409 | 	x = reallocate(&heap, x, 0);
410 | 	verify(&heap);
411 | 
412 | 	// grow
413 | 
414 | 	static char extra[1024];
415 | 	grow(&heap, extra, sizeof extra);
416 | 	char *y = NULL;
417 | 
418 | 	// both up
419 | 	for (int size = 0; size < maxsize; ++size) {
420 | 		verify(&heap);
421 | 		x = reallocate(&heap, x, size); verify(&heap);
422 | 		assert(size == 0 || equal(x, size - 1, size - 1));
423 | 		assert(size == 0 || equal(y, size - 1, size - 1));
424 | 		y = reallocate(&heap, y, size); verify(&heap);
425 | 		assert(size == 0 || equal(x, size - 1, size - 1));
426 | 		assert(size == 0 || equal(y, size - 1, size - 1));
427 | 		memset(x, size, size);
428 | 		memset(y, size, size);
429 | 		verify(&heap);
430 | 	}
431 | 	x = reallocate(&heap, x, 0);
432 | 	y = reallocate(&heap, y, 0);
433 | 	verify(&heap);
434 | 
435 | 	// both down
436 | 	for (int size = 0; size < maxsize; ++size) {
437 | 		int ezis = maxsize - size;
438 | 		x = reallocate(&heap, x, ezis); verify(&heap);
439 | 		assert(size == 0 || equal(x, size - 1, ezis));
440 | 		assert(size == 0 || equal(y, size - 1, ezis + 1));
441 | 		y = reallocate(&heap, y, ezis); verify(&heap);
442 | 		assert(size == 0 || equal(x, size - 1, ezis));
443 | 		assert(size == 0 || equal(y, size - 1, ezis));
444 | 		memset(x, size, ezis);
445 | 		memset(y, size, ezis);
446 | 		verify(&heap);
447 | 	}
448 | 	x = reallocate(&heap, x, 0);
449 | 	y = reallocate(&heap, y, 0);
450 | 	verify(&heap);
451 | 
452 | 	// one up, one down
453 | 	for (int size = 0; size < maxsize; ++size) {
454 | 		int ezis = maxsize - size;
455 | 		x = reallocate(&heap, x, size); verify(&heap);
456 | 		assert(size == 0 || equal(x, size - 1, size - 1));
457 | 		assert(size == 0 || equal(y, size - 1, ezis + 1));
458 | 		y = reallocate(&heap, y, ezis); verify(&heap);
459 | 		assert(size == 0 || equal(x, size - 1, size - 1));
460 | 		assert(size == 0 || equal(y, size - 1, ezis));
461 | 		memset(x, size, size);
462 | 		memset(y, size, ezis);
463 | 		verify(&heap);
464 | 	}
465 | 	x = reallocate(&heap, x, 0);
466 | 	y = reallocate(&heap, y, 0);
467 | 	verify(&heap);
468 | }
469 | 


--------------------------------------------------------------------------------
/win32_list_directory.c:
--------------------------------------------------------------------------------
 1 | #include <Windows.h>
 2 | #include <stdio.h> // printf
 3 | 
 4 | void list_directory(const char *path) {
 5 | 	char buffer[1024];
 6 | 	wsprintfA(buffer, "%s/*", path);
 7 | 
 8 | 	WIN32_FIND_DATAA data;
 9 | 	HANDLE find = FindFirstFileA(buffer, &data);
10 | 	if (find == INVALID_HANDLE_VALUE)
11 | 		return;
12 | 
13 | 	do {
14 | 		wsprintfA(buffer, "%s/%s", path, data.cFileName);
15 | 		if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
16 | 			int is_special = data.cFileName[0] == '.' && (!data.cFileName[1] || (data.cFileName[1] == '.' && !data.cFileName[2]));
17 | 			if (!is_special)
18 | 				list_directory(buffer);
19 | 		} else printf("%s\n", buffer);
20 | 	} while (FindNextFileA(find, &data));
21 | }
22 | 
23 | int main(void) {
24 | 	list_directory(".");
25 | }


--------------------------------------------------------------------------------
/win32_stacktrace.c:
--------------------------------------------------------------------------------
  1 | #include <Windows.h>
  2 | #include <TlHelp32.h>
  3 | #include <ImageHlp.h>
  4 | #include <stdio.h> // printf
  5 | 
  6 | typedef BOOL(WINAPI *SymGetSymFromAddr64_Func)(HANDLE hProcess, DWORD64 qwAddr, PDWORD64 pdwDisplacement, PIMAGEHLP_SYMBOL64 Symbol);
  7 | typedef BOOL(WINAPI *SymGetSymFromAddr64_Func)(HANDLE hProcess, DWORD64 qwAddr, PDWORD64 pdwDisplacement, PIMAGEHLP_SYMBOL64 Symbol);
  8 | typedef DWORD(WINAPI *UnDecorateSymbolName_Func)(PCSTR name, PSTR outputString, DWORD maxStringLength, DWORD flags);
  9 | typedef BOOL(WINAPI *SymGetLineFromAddr64_Func)(HANDLE hProcess, DWORD64 qwAddr, PDWORD pdwDisplacement, PIMAGEHLP_LINE64 Line64);
 10 | typedef BOOL(WINAPI *SymGetModuleInfo64_Func)(HANDLE hProcess, DWORD64 qwAddr, PIMAGEHLP_MODULE64 ModuleInfo);
 11 | typedef BOOL(WINAPI *StackWalk64_Func)(
 12 | 	DWORD MachineType,
 13 | 	HANDLE hProcess,
 14 | 	HANDLE hThread,
 15 | 	LPSTACKFRAME64 StackFrame,
 16 | 	PVOID ContextRecord,
 17 | 	PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine,
 18 | 	PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine,
 19 | 	PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine,
 20 | 	PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress);
 21 | SymGetSymFromAddr64_Func SymGetSymFromAddr64_func;
 22 | UnDecorateSymbolName_Func UnDecorateSymbolName_func;
 23 | SymGetLineFromAddr64_Func SymGetLineFromAddr64_func;
 24 | SymGetModuleInfo64_Func SymGetModuleInfo64_func;
 25 | StackWalk64_Func StackWalk64_func;
 26 | #define SymGetSymFromAddr64 SymGetSymFromAddr64_func
 27 | #define UnDecorateSymbolName UnDecorateSymbolName_func
 28 | #define SymGetLineFromAddr64 SymGetLineFromAddr64_func
 29 | #define SymGetModuleInfo64 SymGetModuleInfo64_func
 30 | #define StackWalk64 StackWalk64_func
 31 | 
 32 | void init_dbghelp_dll(void) {
 33 | 	static BOOL already_tried_to_init;
 34 | 	if (already_tried_to_init)
 35 | 		return;
 36 | 	already_tried_to_init = TRUE;
 37 | 
 38 | 	HMODULE dbghelp_dll = LoadLibraryA("DbgHelp.dll");
 39 | 	if (!dbghelp_dll)
 40 | 		return;
 41 | 
 42 | 	typedef BOOL(WINAPI *SymInitialize_Func)(HANDLE hProcess, PCSTR UserSearchPath, BOOL fInvadeProcess);
 43 | 	typedef DWORD(WINAPI *SymGetOptions_Func)(void);
 44 | 	typedef DWORD(WINAPI *SymSetOptions_Func)(DWORD SymOptions);
 45 | 	typedef DWORD64(WINAPI *SymLoadModule64_Func)(HANDLE hProcess, HANDLE hFile, PCSTR ImageName, PCSTR ModuleName, DWORD64 BaseOfDll, DWORD SizeOfDll);
 46 | 	SymInitialize_Func SymInitialize_func = NULL;
 47 | 	SymGetOptions_Func SymGetOptions_func = NULL;
 48 | 	SymSetOptions_Func SymSetOptions_func = NULL;
 49 | 	SymLoadModule64_Func SymLoadModule64_func = NULL;
 50 | 	#define SymInitialize SymInitialize_func
 51 | 	#define SymGetOptions SymGetOptions_func
 52 | 	#define SymSetOptions SymSetOptions_func
 53 | 	#define SymLoadModule64 SymLoadModule64_func
 54 | 
 55 | 	SymInitialize = (SymInitialize_Func)GetProcAddress(dbghelp_dll, "SymInitialize");
 56 | 	SymGetOptions = (SymGetOptions_Func)GetProcAddress(dbghelp_dll, "SymGetOptions");
 57 | 	SymSetOptions = (SymSetOptions_Func)GetProcAddress(dbghelp_dll, "SymSetOptions");
 58 | 	SymLoadModule64 = (SymLoadModule64_Func)GetProcAddress(dbghelp_dll, "SymLoadModule64");
 59 | 	SymGetSymFromAddr64 = (SymGetSymFromAddr64_Func)GetProcAddress(dbghelp_dll, "SymGetSymFromAddr64");
 60 | 	UnDecorateSymbolName = (UnDecorateSymbolName_Func)GetProcAddress(dbghelp_dll, "UnDecorateSymbolName");
 61 | 	SymGetLineFromAddr64 = (SymGetLineFromAddr64_Func)GetProcAddress(dbghelp_dll, "SymGetLineFromAddr64");
 62 | 	SymGetModuleInfo64 = (SymGetModuleInfo64_Func)GetProcAddress(dbghelp_dll, "SymGetModuleInfo64");
 63 | 	StackWalk64 = (StackWalk64_Func)GetProcAddress(dbghelp_dll, "StackWalk64");
 64 | 
 65 | 	HANDLE process = GetCurrentProcess();
 66 | 	if (SymInitialize && SymGetOptions && SymSetOptions && SymLoadModule64 && SymInitialize(process, NULL, FALSE)) {
 67 | 		DWORD options = SymGetOptions();
 68 | 		options |= SYMOPT_LOAD_LINES;
 69 | 		options |= SYMOPT_FAIL_CRITICAL_ERRORS;
 70 | 		options |= SYMOPT_DEFERRED_LOADS;
 71 | 		options = SymSetOptions(options);
 72 | 
 73 | 		HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, GetCurrentProcessId());
 74 | 		if (snapshot != INVALID_HANDLE_VALUE) {
 75 | 			#undef MODULEENTRY32 // Windows.h literally renames these to their wchar counterpart.
 76 | 			#undef Module32First
 77 | 			#undef Module32Next
 78 | 			MODULEENTRY32 entry = { sizeof(entry) };
 79 | 			for (BOOL keep_going = Module32First(snapshot, &entry); keep_going; keep_going = Module32Next(snapshot, &entry))
 80 | 				SymLoadModule64(process, NULL, entry.szExePath, entry.szModule, (DWORD64)entry.modBaseAddr, entry.modBaseSize);
 81 | 			CloseHandle(snapshot);
 82 | 		}
 83 | 	}
 84 | }
 85 | 
 86 | int generate_stacktrace(void *buffer[], int buffer_count, int frames_to_skip) {
 87 | 	// RtlCaptureStackBackTrace is orders of magnitude faster than StackWalk64, but it's
 88 | 	// undocumented and sometimes unreliable. Therefore we always try RtlCaptureStackBackTrace
 89 | 	// first, and then fall back on StackWalk64.
 90 | 	int num_frames = RtlCaptureStackBackTrace((DWORD)frames_to_skip + 1, buffer_count, buffer, NULL);
 91 | 	if (num_frames <= 1) {
 92 | 		init_dbghelp_dll();
 93 | 		if (!StackWalk64)
 94 | 			return 0;
 95 | 
 96 | 		CONTEXT context = { .ContextFlags = CONTEXT_FULL }; // Or CONTEXT_ALL? Or is there even any difference??
 97 | 		RtlCaptureContext(&context);
 98 | 
 99 | 		DWORD machine_type;
100 | 		STACKFRAME64 frame = {
101 | 			.AddrPC.Mode    = AddrModeFlat,
102 | 			.AddrFrame.Mode = AddrModeFlat,
103 | 			.AddrStack.Mode = AddrModeFlat,
104 | 		};
105 | 		#ifdef _M_X64
106 | 		{
107 | 			machine_type = IMAGE_FILE_MACHINE_AMD64;
108 | 			frame.AddrPC.Offset    = context.Rip;
109 | 			frame.AddrFrame.Offset = context.Rbp;
110 | 			frame.AddrStack.Offset = context.Rsp;
111 | 
112 | 			// Apparently StackWalk64 doesn't capture the frame of the functio that calls 
113 | 			// it in 32-bit code and so we only need to do this in x64. I'm not sure why.
114 | 			frames_to_skip += 1;
115 | 		}
116 | 		#elif defined _M_IX86
117 | 		{
118 | 			machine_type = IMAGE_FILE_MACHINE_I386;
119 | 			frame.AddrPC.Offset    = context.Eip;
120 | 			frame.AddrFrame.Offset = context.Ebp;
121 | 			frame.AddrStack.Offset = context.Esp;
122 | 		}
123 | 		#else
124 | 		{
125 | 			return 0; // Stacktraces not supported on ARM.
126 | 		}
127 | 		#endif
128 | 
129 | 		HANDLE process = GetCurrentProcess();
130 | 		HANDLE thread = GetCurrentThread();
131 | 		num_frames = 0;
132 | 		for (int i = 0; StackWalk64(machine_type, process, thread, &frame, &context, NULL, NULL, NULL, NULL); ++i) {
133 | 			if (frame.AddrPC.Offset == 0)
134 | 				break;
135 | 			if (i >= (int)frames_to_skip && num_frames < buffer_count)
136 | 				buffer[num_frames++] = (void *)(uintptr_t)frame.AddrPC.Offset;
137 | 		}
138 | 	}
139 | 
140 | 	// The PC will have advanced by 1 (or more) by the point we get the stack trace - we have to undo that otherwise we get wrong info!
141 | 	for (int i = 0; i < num_frames; ++i)
142 | 		buffer[i] = (char *)buffer[i] + 1;
143 | 	return num_frames;
144 | }
145 | 
146 | void print_stacktrace(void *const stackframes[], int num_frames) {
147 | 	init_dbghelp_dll();
148 | 	HANDLE process = GetCurrentProcess();
149 | 	for (int i = 0; i < num_frames; ++i) {
150 | 		// Print either:
151 | 		// 1) function() in file, line x
152 | 		// 2) function() in !module
153 | 		// 3) 0xaddress in file, line x
154 | 		// 4) 0xaddress in !module
155 | 		// 5) 0xaddress
156 | 
157 | 		DWORD64 address = (DWORD64)stackframes[i];
158 | 		DWORD64 symbol_buffer[64] = { 0 };
159 | 		IMAGEHLP_SYMBOL64 *symbol = (IMAGEHLP_SYMBOL64 *)symbol_buffer;
160 | 		symbol->SizeOfStruct = sizeof symbol[0];
161 | 		symbol->MaxNameLength = sizeof symbol_buffer - sizeof symbol[0];
162 | 
163 | 		if (SymGetSymFromAddr64 && SymGetSymFromAddr64(process, address, &(DWORD64){0}, symbol)) {
164 | 			const char *function = symbol->Name;
165 | 			char undecorated[512];
166 | 			if (UnDecorateSymbolName) {
167 | 				UnDecorateSymbolName(function, undecorated, sizeof undecorated, UNDNAME_NAME_ONLY);
168 | 				undecorated[sizeof undecorated - 1] = 0;
169 | 				function = undecorated;
170 | 			}
171 | 			printf("%s()", function);
172 | 		} else printf("0x%p", stackframes[i]);
173 | 
174 | 		IMAGEHLP_LINE64 line_info = { .SizeOfStruct = sizeof line_info };
175 | 		IMAGEHLP_MODULE64 module_info = { .SizeOfStruct = sizeof module_info };
176 | 		if (SymGetLineFromAddr64 && SymGetLineFromAddr64(process, address, &(DWORD){0}, &line_info)) {
177 | 			int line = (int)line_info.LineNumber;
178 | 			const char *file = line_info.FileName;
179 | 			printf(" in %s, line %d", file, line);
180 | 		} else if (SymGetModuleInfo64 && SymGetModuleInfo64(process, address, &module_info)) {
181 | 			const char *module = module_info.ModuleName;
182 | 			printf(" in !%s", module);
183 | 		}
184 | 
185 | 		printf("\n");
186 | 	}
187 | }
188 | 
189 | int main(void) {
190 | 	void *frames[128];
191 | 	int num_frames = generate_stacktrace(frames, 128, 0);
192 | 	print_stacktrace(frames, num_frames);
193 | }


--------------------------------------------------------------------------------
/win32_thread_queue.c:
--------------------------------------------------------------------------------
 1 | #include <Windows.h>
 2 | #include <stdio.h>
 3 | 
 4 | volatile int cursor;
 5 | BOOL is_prime[1048576];
 6 | 
 7 | BOOL prime(int x) {
 8 | 	if (x == 2)
 9 | 		return TRUE;
10 | 	if (x <= 1 || !(x % 2))
11 | 		return FALSE;
12 | 	for (INT64 i = 3; i * i <= x; i += 2)
13 | 		if (!(x % i))
14 | 			return FALSE;
15 | 	return TRUE;
16 | }
17 | DWORD CALLBACK thread_function(void *param) {
18 | 	for (;;) {
19 | 		int index = InterlockedIncrement(&cursor) - 1;
20 | 		if (index >= _countof(is_prime))
21 | 			return 0;
22 | 		is_prime[index] = prime(index);
23 | 	}
24 | }
25 | 
26 | int main(void) {
27 | 	SYSTEM_INFO info;
28 | 	GetSystemInfo(&info);
29 | 	
30 | 	HANDLE threads[MAXIMUM_WAIT_OBJECTS];
31 | 	int num_extra_threads = (int)info.dwNumberOfProcessors - 1;
32 | 	if (num_extra_threads > _countof(threads))
33 | 		num_extra_threads = _countof(threads);
34 | 	
35 | 	printf("Creating %d worker threads.\n", num_extra_threads);
36 | 	for (int i = 0; i < num_extra_threads; ++i)
37 | 		threads[i] = CreateThread(NULL, 0, thread_function, NULL, 0, NULL);
38 | 	
39 | 	thread_function(NULL);
40 | 	WaitForMultipleObjects((DWORD)num_extra_threads, threads, TRUE, INFINITE);
41 | 
42 | 	for (int i = 0; i < _countof(is_prime); ++i)
43 | 		if (is_prime[i])
44 | 			printf("%d is prime.\n", i);
45 | }


--------------------------------------------------------------------------------
/win32_websocket_client.c:
--------------------------------------------------------------------------------
 1 | #include <Windows.h>
 2 | #include <winhttp.h>
 3 | #include <stdio.h>
 4 | #pragma comment(lib, "winhttp.lib")
 5 | 
 6 | void checkHr(HRESULT hr) {
 7 | 	if (FAILED(hr)) {
 8 | 		char message[256] = { 0 };
 9 | 		FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, hr, MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), message, 256, NULL);
10 | 		printf("HRESULT = 0x%08X: %s\n", hr, message);
11 | 		__debugbreak();
12 | 	}
13 | }
14 | void checkCond(BOOL cond) {
15 | 	if (!cond) {
16 | 		DWORD error = GetLastError();
17 | 		HRESULT hr = HRESULT_FROM_WIN32(error);
18 | 		checkHr(hr);
19 | 	}
20 | }
21 | void check(DWORD error) {
22 | 	HRESULT hr = HRESULT_FROM_WIN32(error);
23 | 	checkHr(hr);
24 | }
25 | 
26 | int main(void) {
27 | 	HINTERNET session = WinHttpOpen(L"Websocket Client Test User Agent", WINHTTP_ACCESS_TYPE_AUTOMATIC_PROXY, WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0);
28 | 	checkCond(session != NULL);
29 | 
30 | 	HINTERNET connection = WinHttpConnect(session, L"localhost", 9999, 0);
31 | 	checkCond(connection != NULL);
32 | 	printf("Connected to server.\n");
33 | 
34 | 	HINTERNET request = WinHttpOpenRequest(connection, L"GET", L"", L"HTTP/1.1", WINHTTP_NO_REFERER, WINHTTP_DEFAULT_ACCEPT_TYPES, 0);
35 | 	checkCond(request != NULL);
36 | 
37 | 	printf("Starting websocket upgrade handshake.\n");
38 | 	checkCond(WinHttpSetOption(request, WINHTTP_OPTION_UPGRADE_TO_WEB_SOCKET, NULL, 0));
39 | 	checkCond(WinHttpSendRequest(request, WINHTTP_NO_ADDITIONAL_HEADERS, 0, NULL, 0, 0, 0));
40 | 	checkCond(WinHttpReceiveResponse(request, NULL));
41 | 
42 | 	HINTERNET websocket = WinHttpWebSocketCompleteUpgrade(request, NULL);
43 | 	checkCond(websocket != NULL);
44 | 	checkCond(WinHttpCloseHandle(request));
45 | 	printf("Websocket upgrade completed.\n");
46 | 
47 | 	const char* message = "Hello, sailor!";
48 | 	check(WinHttpWebSocketSend(websocket, WINHTTP_WEB_SOCKET_BINARY_MESSAGE_BUFFER_TYPE, message, strlen(message)));
49 | 	printf("Sent message to server.\n");
50 | 
51 | 	char buffer[999] = { 0 };
52 | 	int length = 0;
53 | 	for (;;) {
54 | 		unsigned long bytesRead = 0;
55 | 		WINHTTP_WEB_SOCKET_BUFFER_TYPE bufferType = 0;
56 | 		check(WinHttpWebSocketReceive(websocket, buffer + length, sizeof buffer - length, &bytesRead, &bufferType));
57 | 		length += bytesRead;
58 | 		if (bufferType != WINHTTP_WEB_SOCKET_BINARY_FRAGMENT_BUFFER_TYPE) break;
59 | 	}
60 | 	printf("Received response from server: \"%.*s\".\n", length, buffer);
61 | 
62 | 	check(WinHttpWebSocketClose(websocket, WINHTTP_WEB_SOCKET_SUCCESS_CLOSE_STATUS, NULL, 0));
63 | 	unsigned short status = 0;
64 | 	char reason[999];
65 | 	unsigned long reasonLength = 0;
66 | 	WinHttpWebSocketQueryCloseStatus(websocket, &status, reason, sizeof reason, &reasonLength);
67 | 	printf("Closed connection with status %d and reason \"%.*s\".", status, reasonLength, reason);
68 | 
69 | 	printf("Done");
70 | }
71 | 


--------------------------------------------------------------------------------
/win32_websocket_server.c:
--------------------------------------------------------------------------------
  1 | // Minimal websocket server setup using HTTP.sys and the Windows websocket API.
  2 | // This is just a minimal example using synchronous calls and minimal error checking.
  3 | // Don't use this in production, it's just for reference.
  4 | // 
  5 | // You can test it with this python program:
  6 | // 
  7 | // $ pip install websockets
  8 | // 
  9 | // import websockets.sync.client
 10 | // with websockets.sync.client.connect("ws://localhost:9999/server") as websocket:
 11 | //   message = websocket.recv()
 12 | //   print(f"Received: {message}")
 13 | //   websocket.send("Hello from client!")
 14 | 
 15 | #define WIN32_LEAN_AND_MEAN
 16 | #include <Windows.h>
 17 | #include <websocket.h>
 18 | #include <http.h>
 19 | #include <stdio.h>
 20 | #include <assert.h>
 21 | #pragma comment(lib, "httpapi.lib")
 22 | #pragma comment(lib, "websocket.lib")
 23 | 
 24 | void checkHr(HRESULT hr) {
 25 | 	if (FAILED(hr)) {
 26 | 		char message[256] = { 0 };
 27 | 		FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, hr, MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), message, 256, NULL);
 28 | 		printf("HRESULT = 0x%08X: %s\n", hr, message);
 29 | 		__debugbreak();
 30 | 	}
 31 | }
 32 | void check(unsigned error) {
 33 | 	HRESULT hr = HRESULT_FROM_WIN32(error);
 34 | 	checkHr(hr);
 35 | }
 36 | 
 37 | int main(void) {
 38 | 	HTTPAPI_VERSION version = HTTPAPI_VERSION_2;
 39 | 	check(HttpInitialize(version, HTTP_INITIALIZE_SERVER, NULL));
 40 | 
 41 | 	HTTP_SERVER_SESSION_ID session = 0;
 42 | 	check(HttpCreateServerSession(version, &session, 0));
 43 | 
 44 | 	HTTP_URL_GROUP_ID urlGroup = 0;
 45 | 	check(HttpCreateUrlGroup(session, &urlGroup, 0));
 46 | 	check(HttpAddUrlToUrlGroup(urlGroup, L"http://localhost:9999/server", 0, 0));
 47 | 
 48 | 	HANDLE requestQueue = NULL;
 49 | 	check(HttpCreateRequestQueue(version, NULL, NULL, 0, &requestQueue));
 50 | 
 51 | 	HTTP_BINDING_INFO binding = { 0 };
 52 | 	binding.Flags.Present = TRUE;
 53 | 	binding.RequestQueueHandle = requestQueue;
 54 | 	check(HttpSetUrlGroupProperty(urlGroup, HttpServerBindingProperty, &binding, sizeof binding));
 55 | 
 56 | 	printf("Waiting for connection.\n");
 57 | 	__declspec(align(8)) char requestBuffer[4096] = { 0 };
 58 | 	HTTP_REQUEST_V2* request = (HTTP_REQUEST_V2*)requestBuffer;
 59 | 	unsigned long requestSize = 0;
 60 | 	check(HttpReceiveHttpRequest(requestQueue, HTTP_NULL_ID, 0, request, sizeof requestBuffer, &requestSize, NULL));
 61 | 	printf("Received HTTP request.\n");
 62 | 
 63 | 	WEB_SOCKET_HTTP_HEADER wsRequestHeaders[99] = { 0 };
 64 | 	unsigned long numRequestHeaders = 0;
 65 | 	for (unsigned i = 0; i < HttpHeaderRequestMaximum; i++) {
 66 | 		HTTP_KNOWN_HEADER* src = &request->Headers.KnownHeaders[i];
 67 | 		if (src->RawValueLength) {
 68 | 			static const char* const REQUEST_HEADER_NAMES[HttpHeaderRequestMaximum] = {
 69 | 				"CacheControl",
 70 | 				"Connection",
 71 | 				"Date",
 72 | 				"KeepAlive",
 73 | 				"Pragma",
 74 | 				"Trailer",
 75 | 				"TransferEncoding",
 76 | 				"Upgrade",
 77 | 				"Via",
 78 | 				"Warning",
 79 | 				"Allow",
 80 | 				"ContentLength",
 81 | 				"ContentType",
 82 | 				"ContentEncoding",
 83 | 				"ContentLanguage",
 84 | 				"ContentLocation",
 85 | 				"ContentMd5",
 86 | 				"ContentRange",
 87 | 				"Expires",
 88 | 				"LastModified",
 89 | 				"Accept",
 90 | 				"AcceptCharset",
 91 | 				"AcceptEncoding",
 92 | 				"AcceptLanguage",
 93 | 				"Authorization",
 94 | 				"Cookie",
 95 | 				"Expect",
 96 | 				"From",
 97 | 				"Host",
 98 | 				"IfMatch",
 99 | 				"IfModifiedSince",
100 | 				"IfNoneMatch",
101 | 				"IfRange",
102 | 				"IfUnmodifiedSince",
103 | 				"MaxForwards",
104 | 				"ProxyAuthorization",
105 | 				"Referer",
106 | 				"Range",
107 | 				"Te",
108 | 				"Translate",
109 | 				"UserAgent",
110 | 			};
111 | 
112 | 			WEB_SOCKET_HTTP_HEADER* dst = &wsRequestHeaders[numRequestHeaders++];
113 | 			dst->pcName = (char*)REQUEST_HEADER_NAMES[i];
114 | 			dst->ulNameLength = (unsigned)strlen(dst->pcName);
115 | 			dst->pcValue = (char*)src->pRawValue;
116 | 			dst->ulValueLength = src->RawValueLength;
117 | 		}
118 | 	}
119 | 	for (unsigned i = 0; i < request->Headers.UnknownHeaderCount; i++) {
120 | 		HTTP_UNKNOWN_HEADER* src = &request->Headers.pUnknownHeaders[i];
121 | 		WEB_SOCKET_HTTP_HEADER* dst = &wsRequestHeaders[numRequestHeaders++];
122 | 		dst->pcName = (char*)src->pName;
123 | 		dst->ulNameLength = src->NameLength;
124 | 		dst->pcValue = (char*)src->pRawValue;
125 | 		dst->ulValueLength = src->RawValueLength;
126 | 	}
127 | 
128 | 	WEB_SOCKET_HANDLE websocket = NULL;
129 | 	checkHr(WebSocketCreateServerHandle(NULL, 0, &websocket));
130 | 
131 | 	printf("Performing websocket handshake.\n");
132 | 	WEB_SOCKET_HTTP_HEADER* wsResponseHeaders = NULL;
133 | 	unsigned long numResponseHeaders = 0;
134 | 	checkHr(WebSocketBeginServerHandshake(websocket, NULL, NULL, 0, wsRequestHeaders, numRequestHeaders, &wsResponseHeaders, &numResponseHeaders));
135 | 
136 | 	HTTP_UNKNOWN_HEADER responseHeaders[99] = { 0 };
137 | 	HTTP_RESPONSE_V2 response = { 0 };
138 | 	response.StatusCode = 101;
139 | 	response.pReason = "Switching Protocols";
140 | 	response.ReasonLength = sizeof "Switching Protocols" - 1;
141 | 	response.Headers.pUnknownHeaders = responseHeaders;
142 | 	for (unsigned i = 0; i < numResponseHeaders; i++) {
143 | 		WEB_SOCKET_HTTP_HEADER* src = &wsResponseHeaders[i];
144 | 		BOOL isKnownHeader = FALSE;
145 | 		for (int j = 0; j < HttpHeaderResponseMaximum; j++) {
146 | 			static const char* const RESPONSE_HEADER_NAMES[HttpHeaderResponseMaximum] = {
147 | 				"CacheControl",
148 | 				"Connection",
149 | 				"Date",
150 | 				"KeepAlive",
151 | 				"Pragma",
152 | 				"Trailer",
153 | 				"TransferEncoding",
154 | 				"Upgrade",
155 | 				"Via",
156 | 				"Warning",
157 | 				"Allow",
158 | 				"ContentLength",
159 | 				"ContentType",
160 | 				"ContentEncoding",
161 | 				"ContentLanguage",
162 | 				"ContentLocation",
163 | 				"ContentMd5",
164 | 				"ContentRange",
165 | 				"Expires",
166 | 				"LastModified",
167 | 				"AcceptRanges",
168 | 				"Age",
169 | 				"Etag",
170 | 				"Location",
171 | 				"ProxyAuthenticate",
172 | 				"RetryAfter",
173 | 				"Server",
174 | 				"SetCookie",
175 | 				"Vary",
176 | 				"WwwAuthenticate",
177 | 			};
178 | 			const char* name = RESPONSE_HEADER_NAMES[j];
179 | 			size_t length = strlen(name);
180 | 			if (src->ulNameLength == length && memcmp(src->pcName, name, length) == 0) {
181 | 				isKnownHeader = TRUE;
182 | 				HTTP_KNOWN_HEADER* dst = &response.Headers.KnownHeaders[j];
183 | 				dst->pRawValue = src->pcValue;
184 | 				dst->RawValueLength = (unsigned short)src->ulValueLength;
185 | 				break;
186 | 			}
187 | 		}
188 | 		if (!isKnownHeader) {
189 | 			HTTP_UNKNOWN_HEADER* dst = &response.Headers.pUnknownHeaders[response.Headers.UnknownHeaderCount++];
190 | 			dst->pName = src->pcName;
191 | 			dst->NameLength = (unsigned short)src->ulNameLength;
192 | 			dst->pRawValue = src->pcValue;
193 | 			dst->RawValueLength = (unsigned short)src->ulValueLength;
194 | 		}
195 | 	}
196 | 
197 | 	//@HACK For some reason HttpSendResponse doesn't seem to send the Connection: Upgrade header unless it's set
198 | 	//      as both an HTTP_KNOWN_HEADER and an HTTP_UNKNOWN_HEADER. No idea why, but it just ignores it.
199 | 	//      We already set it as a known header in the loop above, so now just add it as an unknown header.
200 | 	HTTP_UNKNOWN_HEADER* connectionHeader = &response.Headers.pUnknownHeaders[response.Headers.UnknownHeaderCount++];
201 | 	connectionHeader->pName = "Connection";
202 | 	connectionHeader->NameLength = sizeof "Connection" - 1;
203 | 	connectionHeader->pRawValue = "Upgrade";
204 | 	connectionHeader->RawValueLength = sizeof "Upgrade" - 1;
205 | 
206 | 	HTTP_REQUEST_ID requestId = request->RequestId;
207 | 	unsigned long responseBytesSent = 0;
208 | 	check(HttpSendHttpResponse(requestQueue, requestId, HTTP_SEND_RESPONSE_FLAG_OPAQUE | HTTP_SEND_RESPONSE_FLAG_MORE_DATA, &response, NULL, &responseBytesSent, NULL, 0, NULL, NULL));
209 | 
210 | 	checkHr(WebSocketEndServerHandshake(websocket));
211 | 	printf("Websocket handshake complete.\n");
212 | 
213 | 	WEB_SOCKET_BUFFER sendData = { 0 };
214 | 	sendData.Data.pbBuffer = (BYTE*)"Hello from server!";
215 | 	sendData.Data.ulBufferLength = sizeof "Hello from server!" - 1;
216 | 	checkHr(WebSocketSend(websocket, WEB_SOCKET_UTF8_MESSAGE_BUFFER_TYPE, &sendData, NULL));
217 | 	for (;;) {
218 | 		WEB_SOCKET_BUFFER buffer = { 0 };
219 | 		unsigned long numBuffers = 1;
220 | 		WEB_SOCKET_ACTION action = 0;
221 | 		WEB_SOCKET_BUFFER_TYPE bufferType = 0;
222 | 		void* context = NULL;
223 | 		checkHr(WebSocketGetAction(websocket, WEB_SOCKET_ALL_ACTION_QUEUE, &buffer, &numBuffers, &action, &bufferType, NULL, &context));
224 | 
225 | 		unsigned long bytesTransferred = 0;
226 | 		if (action == WEB_SOCKET_SEND_TO_NETWORK_ACTION) {
227 | 			assert(numBuffers == 1);
228 | 			HTTP_DATA_CHUNK chunk = { 0 };
229 | 			chunk.DataChunkType = HttpDataChunkFromMemory;
230 | 			chunk.FromMemory.pBuffer = buffer.Data.pbBuffer;
231 | 			chunk.FromMemory.BufferLength = buffer.Data.ulBufferLength;
232 | 			check(HttpSendResponseEntityBody(requestQueue, requestId, HTTP_SEND_RESPONSE_FLAG_MORE_DATA, 1, &chunk, &bytesTransferred, NULL, 0, NULL, NULL));
233 | 			printf("Sent %d bytes.\n", bytesTransferred);
234 | 		}
235 | 		else {
236 | 			assert(action == WEB_SOCKET_INDICATE_SEND_COMPLETE_ACTION);
237 | 			assert(numBuffers == 0);
238 | 			printf("Send completed.\n");
239 | 		}
240 | 
241 | 		WebSocketCompleteAction(websocket, context, bytesTransferred);
242 | 		if (action == WEB_SOCKET_INDICATE_SEND_COMPLETE_ACTION) break;
243 | 	}
244 | 
245 | 	checkHr(WebSocketReceive(websocket, NULL, NULL));
246 | 	for (;;) {
247 | 		WEB_SOCKET_BUFFER buffer = { 0 };
248 | 		unsigned long numBuffers = 1;
249 | 		WEB_SOCKET_ACTION action = 0;
250 | 		WEB_SOCKET_BUFFER_TYPE bufferType = 0;
251 | 		void* context = NULL;
252 | 		checkHr(WebSocketGetAction(websocket, WEB_SOCKET_ALL_ACTION_QUEUE, &buffer, &numBuffers, &action, &bufferType, NULL, &context));
253 | 
254 | 		unsigned long bytesTransferred = 0;
255 | 		if (action == WEB_SOCKET_RECEIVE_FROM_NETWORK_ACTION) {
256 | 			assert(numBuffers == 1);
257 | 			check(HttpReceiveRequestEntityBody(requestQueue, requestId, 0, buffer.Data.pbBuffer, buffer.Data.ulBufferLength, &bytesTransferred, NULL));
258 | 			printf("Received %d bytes.\n", bytesTransferred);
259 | 		}
260 | 		else {
261 | 			assert(action == WEB_SOCKET_INDICATE_RECEIVE_COMPLETE_ACTION);
262 | 			assert(numBuffers == 1);
263 | 			printf("Receive completed: \"%.*s\"\n", buffer.Data.ulBufferLength, buffer.Data.pbBuffer);
264 | 		}
265 | 
266 | 		WebSocketCompleteAction(websocket, context, bytesTransferred);
267 | 		if (action == WEB_SOCKET_INDICATE_RECEIVE_COMPLETE_ACTION) break;
268 | 	}
269 | 
270 | 	printf("Done.\n");
271 | }
272 | 


--------------------------------------------------------------------------------
/x86_cpuid.c:
--------------------------------------------------------------------------------
  1 | #ifdef _MSC_VER
  2 | #	include <intrin.h>
  3 | 	void cpuid(int leaf, int subleaf, int *eax, int *ebx, int *ecx, int *edx) {
  4 | 		int registers[4];
  5 | 		if (subleaf == 0)
  6 | 			__cpuid(registers, leaf);
  7 | 		else
  8 | 			__cpuidex(registers, leaf, subleaf);
  9 | 		*eax = registers[0];
 10 | 		*ebx = registers[1];
 11 | 		*ecx = registers[2];
 12 | 		*edx = registers[3];
 13 | 	}
 14 | 	int cpuid_is_supported(void) {
 15 | 		// Try to set and clear bit 21 in the flags register. This indicates support for the CPUID instruction.
 16 | 		// Bail out immediately if it's not supported.
 17 | 		unsigned bit21 = 1u << 21;
 18 | 
 19 | 		__writeeflags(__readeflags() | bit21);
 20 | 		if ((__readeflags() & bit21) == 0)
 21 | 			return 0;
 22 | 
 23 | 		__writeeflags(__readeflags() & ~bit21);
 24 | 		if ((__readeflags() & bit21) == 1)
 25 | 			return 0;
 26 | 
 27 | 		return 1;
 28 | 	}
 29 | #else
 30 | #	include <cpuid.h>
 31 | 	void cpuid(int leaf, int subleaf, int *eax, int *ebx, int *ecx, int *edx) {
 32 | 		__cpuid_count(leaf, subleaf, *eax, *ebx, *ecx, *edx);
 33 | 	}
 34 | 	int cpuid_is_supported(void) {
 35 | 		return __get_cpuid_max(0, 0) != 0;
 36 | 	}
 37 | #endif
 38 | 
 39 | #include <stdio.h>
 40 | #include <string.h>
 41 | int extract_bits(int x, int highest, int lowest) {
 42 | 	unsigned u = (int)x;
 43 | 	u <<=  31 - highest;
 44 | 	u >>= (31 - highest) + lowest;
 45 | 	return (int)u;
 46 | }
 47 | int extract_bit(int x, int index) {
 48 | 	return (int)(((unsigned)x >> index) & 1);
 49 | }
 50 | int main(void) {
 51 | 	if (!cpuid_is_supported()) {
 52 | 		printf("CPUID is not supported on this CPU!\n");
 53 | 		return 0;
 54 | 	}
 55 | 
 56 | 	int eax, ebx, ecx, edx;
 57 | 	cpuid(0, 0, &eax, &ebx, &ecx, &edx);
 58 | 
 59 | 	int max_cpuid = eax;
 60 | 	printf("Max CPUID leaf: %d.\n", max_cpuid);
 61 | 
 62 | 	char vendor[13];
 63 | 	memcpy(vendor + 0, &ebx, 4);
 64 | 	memcpy(vendor + 4, &edx, 4); // Note that the string is in ebx:eDx:eCx.
 65 | 	memcpy(vendor + 8, &ecx, 4);
 66 | 	vendor[12] = 0;
 67 | 	printf("Vendor: '%s'.\n", vendor);
 68 | 
 69 | 	if (max_cpuid < 1)
 70 | 		return 0;
 71 | 
 72 | 	cpuid(1, 0, &eax, &ebx, &ecx, &edx);
 73 | 	int stepping_id   = extract_bits(eax,  3, 0);
 74 | 	int model_id      = extract_bits(eax,  7, 4);
 75 | 	int family_id     = extract_bits(eax, 11, 8);
 76 | 	int ext_model_id  = extract_bits(eax, 19, 16);
 77 | 	int ext_family_id = extract_bits(eax, 27, 20);
 78 | 	
 79 | 	int actual_family_id = family_id;
 80 | 	if (family_id == 15)
 81 | 		actual_family_id += ext_family_id;
 82 | 
 83 | 	int actual_model_id = model_id;
 84 | 	if (family_id == 6 || family_id == 15)
 85 | 		actual_model_id += (ext_model_id << 4);
 86 | 
 87 | 	printf("Family: %d.\n", actual_family_id);
 88 | 	printf("Model: %d.\n", actual_model_id);
 89 | 	printf("Stepping: %d.\n", stepping_id);
 90 | 
 91 | 	int supports_hyperthreading_in_theory = extract_bit(edx, 28); // Doesn't mean the CPU is actually hyperthreaded..
 92 | 	int has_clflush = extract_bit(edx, 19);
 93 | 	int cache_line_size = 0;
 94 | 	if (has_clflush)
 95 | 		cache_line_size = 8 * extract_bits(ebx, 15, 8);
 96 | 
 97 | 	cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx);
 98 | 	int max_cpuid_ex = eax;
 99 | 	char name[48] = "Unknown";
100 | 	if (max_cpuid_ex >= 0x80000004) {
101 | 		cpuid(0x80000002, 0, (int *)name + 0, (int *)name + 1, (int *)name +  2, (int *)name +  3);
102 | 		cpuid(0x80000003, 0, (int *)name + 4, (int *)name + 5, (int *)name +  6, (int *)name +  7);
103 | 		cpuid(0x80000004, 0, (int *)name + 8, (int *)name + 9, (int *)name + 10, (int *)name + 11);
104 | 	}
105 | 	printf("Name: %s\n", name);
106 | 	printf("Cache line size: %d bytes.\n", cache_line_size);
107 | 
108 | 	int num_logical_cores = 1;
109 | 	int num_physical_cores = 1;
110 | 	int l1d_cache_size = 0;
111 | 	int l1i_cache_size = 0;
112 | 	int l2_cache_size = 0;
113 | 	int l3_cache_size = 0;
114 | 	if (strstr(vendor, "AMD")) {
115 | 		if (max_cpuid_ex >= 0x80000008) {
116 | 			cpuid(0x80000008, 0, &eax, &ebx, &ecx, &edx);
117 | 			num_logical_cores = 1 + extract_bits(ecx, 7, 0);
118 | 		} else {
119 | 			cpuid(1, 0, &eax, &ebx, &ecx, &edx);
120 | 			num_logical_cores = extract_bits(ebx, 23, 16);
121 | 		}
122 | 
123 | 		// This really isn't a great indication. Many sources say that CPUID reports hyperthreading even when the processor 
124 | 		// doesn't actually support it. But I can't test this right now since I don't have a non-hyperthreaded AMD chip.
125 | 		if (supports_hyperthreading_in_theory)
126 | 			num_physical_cores = num_logical_cores / 2;
127 | 		else
128 | 			num_physical_cores = num_logical_cores;
129 | 
130 | 		if (max_cpuid_ex >= 0x80000005) {
131 | 			cpuid(0x80000005, 0, &eax, &ebx, &ecx, &edx);
132 | 			l1d_cache_size = extract_bits(ecx, 31, 24);
133 | 			l1i_cache_size = extract_bits(edx, 31, 24);
134 | 		}
135 | 
136 | 		if (max_cpuid_ex >= 0x80000006) {
137 | 			cpuid(0x80000006, 0, &eax, &ebx, &ecx, &edx);
138 | 			l2_cache_size = extract_bits(ecx, 31, 16);
139 | 			l3_cache_size = 512 * extract_bits(edx, 31, 18); // This is reported in units of 512kB.
140 | 		}
141 | 	} else if (strstr(vendor, "Intel")) {
142 | 		if (max_cpuid >= 4) {
143 | 			cpuid(4, 0, &eax, &ebx, &ecx, &edx);
144 | 
145 | 			// The value reported here is not accurate (I'm not sure if that's always the case).
146 | 			// On an i5-7300HQ it reports 8 logical cores with hyperthreading, even though that CPU
147 | 			// doesn't have hyperthreading.. Still this is a decent approximation at least.
148 | 			num_logical_cores  = 1 + extract_bits(eax, 31, 26);
149 | 			num_physical_cores = num_logical_cores;
150 | 			if (supports_hyperthreading_in_theory)
151 | 				num_physical_cores /= 2;
152 | 
153 | 			// Enumerate all caches to find out sizes.
154 | 			for (int index = 0;; ++index) {
155 | 				cpuid(4, index, &eax, &ebx, &ecx, &edx);
156 | 				int type = extract_bits(eax, 4, 0); // 0 - invalid, 1 - data cache, 2 - instruction cache, 3 - unified cache.
157 | 				if (type == 0)
158 | 					break;
159 | 
160 | 				int level = extract_bits(eax, 7, 5);
161 | 				int ways       = 1 + extract_bits(ebx, 31, 22);
162 | 				int partitions = 1 + extract_bits(ebx, 21, 12);
163 | 				int line_size  = 1 + extract_bits(ebx, 11, 0);
164 | 				int sets       = 1 + extract_bits(ecx, 31, 0);
165 | 				int cache_size = ways * partitions * line_size * sets / 1024;
166 | 
167 | 				if (level == 1) {
168 | 					if (type == 1)
169 | 						l1d_cache_size = cache_size;
170 | 					else if (type == 2)
171 | 						l1i_cache_size = cache_size;
172 | 					else if (type == 3) {
173 | 						// For unified L1 caches, set instruction cache size to 0 and set data cache size to the actual cache size.
174 | 						l1i_cache_size = 0;
175 | 						l1d_cache_size = cache_size;
176 | 					}
177 | 				}
178 | 				else if (level == 2) 
179 | 					l2_cache_size = cache_size;
180 | 				else if (level == 3)
181 | 					l3_cache_size = cache_size;
182 | 			}
183 | 		}
184 | 
185 | 		if (max_cpuid >= 0xB) {
186 | 			// This is a much better way of checking the number of cores than with cpuid(4) above.
187 | 			// At least this one is accurate on a i5-7300HQ and i7-8550U.
188 | 			cpuid(0xB, 0, &eax, &ebx, &ecx, &edx);
189 | 			int num_logical_processors_per_physical_core = extract_bits(ebx, 15, 0);
190 | 			cpuid(0xB, 1, &eax, &ebx, &ecx, &edx);
191 | 			num_logical_cores  = extract_bits(ebx, 15, 0);
192 | 			num_physical_cores = num_logical_cores / num_logical_processors_per_physical_core;
193 | 		}
194 | 	}
195 | 	printf("Logical cores: %d.\n", num_logical_cores);
196 | 	printf("Physical cores: %d.\n", num_physical_cores);
197 | 	printf("L1i cache size: %d kB.\n", l1i_cache_size);
198 | 	printf("L1d cache size: %d kB.\n", l1d_cache_size);
199 | 	printf("L2 cahce size: %d kB.\n", l2_cache_size);
200 | 	printf("L3 cahce size: %d kB.\n", l3_cache_size);
201 | 
202 | 	printf("Feature flags: ");
203 | 	cpuid(1, 0, &eax, &ebx, &ecx, &edx);
204 | 	if (extract_bit(edx,  8)) printf("cx8 ");
205 | 	if (extract_bit(ecx, 13)) printf("cx16 ");
206 | 	if (extract_bit(edx,  4)) printf("tsc ");
207 | 	if (extract_bit(edx, 15)) printf("cmov ");
208 | 	if (extract_bit(edx, 23)) printf("mmx ");
209 | 	if (extract_bit(edx, 25)) printf("sse ");
210 | 	if (extract_bit(edx, 26)) printf("sse2 ");
211 | 	if (extract_bit(ecx,  0)) printf("sse3 ");
212 | 	if (extract_bit(ecx,  9)) printf("ssse3 ");
213 | 	if (extract_bit(ecx, 19)) printf("sse41 ");
214 | 	if (extract_bit(ecx, 20)) printf("sse42 ");
215 | 	if (extract_bit(ecx, 28)) printf("avx ");
216 | 	if (extract_bit(ecx, 12)) printf("fma ");
217 | 	if (extract_bit(ecx, 29)) printf("f16c ");
218 | 	if (extract_bit(ecx,  1)) printf("pclmulqdq ");
219 | 	if (extract_bit(ecx, 22)) printf("movbe ");
220 | 	if (extract_bit(ecx, 23)) printf("popcnt ");
221 | 	if (extract_bit(ecx, 25)) printf("aes ");
222 | 	if (extract_bit(ecx, 30)) printf("rdrnd ");
223 | 
224 | 	eax = ebx = ecx = edx = 0;
225 | 	if (max_cpuid >= 7)
226 | 		cpuid(7, 0, &eax, &ebx, &ecx, &edx);
227 | 	int max_cpuid_7 = eax;
228 | 
229 | 	if (extract_bit(ebx,  5)) printf("avx2 ");
230 | 	if (extract_bit(ebx, 16)) printf("avx512_f ");
231 | 	if (extract_bit(ebx, 17)) printf("avx512_dq ");
232 | 	if (extract_bit(ebx, 21)) printf("avx512_ifma ");
233 | 	if (extract_bit(ebx, 26)) printf("avx512_pf ");
234 | 	if (extract_bit(ebx, 27)) printf("avx512_er ");
235 | 	if (extract_bit(ebx, 28)) printf("avx512_cd ");
236 | 	if (extract_bit(ebx, 30)) printf("avx512_bw ");
237 | 	if (extract_bit(ebx, 31)) printf("avx512_vl ");
238 | 	if (extract_bit(ecx,  1)) printf("avx512_vbmi ");
239 | 	if (extract_bit(ecx,  6)) printf("avx512_vbmi2 ");
240 | 	if (extract_bit(ecx, 11)) printf("avx512_vnni ");
241 | 	if (extract_bit(ecx, 12)) printf("avx512_bitalg ");
242 | 	if (extract_bit(ecx, 14)) printf("avx512_vpopcntdq ");
243 | 	if (extract_bit(edx,  2)) printf("avx512_4vnniw ");
244 | 	if (extract_bit(edx,  3)) printf("avx512_4fmaps ");
245 | 	if (extract_bit(edx,  8)) printf("avx512_vp2intersect ");
246 | 	if (extract_bit(edx, 23)) printf("avx512_fp16 ");
247 | 	if (extract_bit(ebx,  3)) printf("bmi1 ");
248 | 	if (extract_bit(ebx,  8)) printf("bmi2 ");
249 | 	if (extract_bit(ebx, 29)) printf("sha ");
250 | 	if (extract_bit(ebx, 18)) printf("rdseed ");
251 | }


--------------------------------------------------------------------------------
/x86_rdtsc_seconds.c:
--------------------------------------------------------------------------------
  1 | // Source: https://gist.github.com/Mic92/12063527bb6d6c5a636502300d2de446
  2 | 
  3 | #ifdef _MSC_VER
  4 | #	include <intrin.h>
  5 | 	void cpuid(int leaf, int subleaf, int *eax, int *ebx, int *ecx, int *edx) {
  6 | 		int registers[4];
  7 | 		if (subleaf == 0)
  8 | 			__cpuid(registers, leaf);
  9 | 		else
 10 | 			__cpuidex(registers, leaf, subleaf);
 11 | 		*eax = registers[0];
 12 | 		*ebx = registers[1];
 13 | 		*ecx = registers[2];
 14 | 		*edx = registers[3];
 15 | 	}
 16 | 	int cpuid_is_supported(void) {
 17 | 		// Try to set and clear bit 21 in the flags register. This indicates support for the CPUID instruction.
 18 | 		// Bail out immediately if it's not supported.
 19 | 		unsigned bit21 = 1u << 21;
 20 | 
 21 | 		__writeeflags(__readeflags() | bit21);
 22 | 		if ((__readeflags() & bit21) == 0)
 23 | 			return 0;
 24 | 
 25 | 		__writeeflags(__readeflags() & ~bit21);
 26 | 		if ((__readeflags() & bit21) == 1)
 27 | 			return 0;
 28 | 
 29 | 		return 1;
 30 | 	}
 31 | 	unsigned long long rdtsc(void) {
 32 | 		return __rdtsc();
 33 | 	}
 34 | #else
 35 | #	include <cpuid.h>
 36 | 	void cpuid(int leaf, int subleaf, int *eax, int *ebx, int *ecx, int *edx) {
 37 | 		__cpuid_count(leaf, subleaf, *eax, *ebx, *ecx, *edx);
 38 | 	}
 39 | 	int cpuid_is_supported(void) {
 40 | 		return __get_cpuid_max(0, 0) != 0;
 41 | 	}
 42 | 	unsigned long long rdtsc(void) {
 43 | 		unsigned lo, hi;
 44 | 		asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); // RDTSC copies contents of 64-bit TSC into EDX:EAX
 45 | 		return lo | ((unsigned long long)hi << 32);
 46 | 	}
 47 | #endif
 48 | 
 49 | unsigned long long get_tsc_increments_per_second() {
 50 | 	if (!cpuid_is_supported())
 51 | 		return 0;
 52 | 
 53 | 	// extracted from https://github.com/torvalds/linux/blob/b95fffb9b4afa8b9aa4a389ec7a0c578811eaf42/tools/power/x86/turbostat/turbostat.c
 54 | 	int eax_crystal = 0;
 55 | 	int ebx_tsc = 0;
 56 | 	int crystal_hz = 0;
 57 | 	int edx = 0;
 58 | 	cpuid(0x15, 0, &eax_crystal, &ebx_tsc, &crystal_hz, &edx);
 59 | 	if (!ebx_tsc) // This will not work on old Intel processors, or any AMD processor. You really need a fallback..
 60 | 		return 0;
 61 | 
 62 | 	int fms, family, model, ebx, ecx;
 63 | 	cpuid(1, 0, &fms, &ebx, &ecx, &edx);
 64 | 	family = (fms >> 8) & 0xf;
 65 | 	model  = (fms >> 4) & 0xf;
 66 | 	if (family == 0xf)
 67 | 		family += (fms >> 20) & 0xff;
 68 | 	if (family >= 6)
 69 | 		model += ((fms >> 16) & 0xf) << 4;
 70 | 
 71 | 	enum {
 72 | 		INTEL_FAM6_SKYLAKE_L          = 0x4E,
 73 | 		INTEL_FAM6_SKYLAKE            = 0x5E,
 74 | 		INTEL_FAM6_KABYLAKE_L         = 0x8E,
 75 | 		INTEL_FAM6_KABYLAKE           = 0x9E,
 76 | 		INTEL_FAM6_COMETLAKE          = 0xA5,
 77 | 		INTEL_FAM6_COMETLAKE_L        = 0xA6,
 78 | 		INTEL_FAM6_ATOM_GOLDMONT      = 0x5C,
 79 | 		INTEL_FAM6_ATOM_GOLDMONT_D    = 0x5F,
 80 | 		INTEL_FAM6_ATOM_GOLDMONT_PLUS = 0x7A,
 81 | 		INTEL_FAM6_ATOM_TREMONT_D     = 0x86,
 82 | 	};
 83 | 
 84 | 	if (!crystal_hz) {
 85 | 		switch(model) {
 86 | 			case INTEL_FAM6_SKYLAKE_L:
 87 | 			case INTEL_FAM6_SKYLAKE:
 88 | 			case INTEL_FAM6_KABYLAKE_L:
 89 | 			case INTEL_FAM6_KABYLAKE:
 90 | 			case INTEL_FAM6_COMETLAKE_L:
 91 | 			case INTEL_FAM6_COMETLAKE:
 92 | 				crystal_hz = 24000000;
 93 | 				break;
 94 | 			case INTEL_FAM6_ATOM_GOLDMONT_D:
 95 | 			case INTEL_FAM6_ATOM_TREMONT_D:
 96 | 				crystal_hz = 25000000;
 97 | 				break;
 98 | 			case INTEL_FAM6_ATOM_GOLDMONT:
 99 | 			case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
100 | 				crystal_hz = 19200000;
101 | 				break;
102 | 		}
103 | 	}
104 | 
105 | 	return (unsigned long long)crystal_hz * ebx_tsc / eax_crystal;
106 | }
107 | 
108 | #include <stdio.h>
109 | #include <time.h>
110 | int main(void) {
111 | 	unsigned long long tsc_hz = get_tsc_increments_per_second();
112 | 	if (!tsc_hz) {
113 | 		printf("Couldn't get TSC frequency on this CPU.\n");
114 | 		return 0;
115 | 	}
116 | 
117 | 	double tsc_to_seconds = 1.0 / tsc_hz;
118 | 	struct timespec ts;
119 | 	timespec_get(&ts, TIME_UTC);
120 | 	unsigned long long ts0 = (unsigned long long)ts.tv_sec * 1000000000 + ts.tv_nsec;
121 | 	unsigned long long tsc0 = rdtsc();
122 | 	for (;;) {
123 | 		timespec_get(&ts, TIME_UTC);
124 | 		unsigned long long ts1 = (unsigned long long)ts.tv_sec * 1000000000 + ts.tv_nsec;
125 | 		unsigned long long tsc1 = rdtsc();
126 | 		double tsdt = (ts1 - ts0) * 1e-9;
127 | 		double tscdt = (tsc1 - tsc0) * tsc_to_seconds;
128 | 		printf("TS %.9f - TSC %.9f\n", tsdt, tscdt);
129 | 	}
130 | }


--------------------------------------------------------------------------------