├── .gitignore ├── LICENSE ├── README.md ├── bistack_allocator.c ├── buddy_allocator.c ├── bytes_to_cstring.py ├── capsule_triangle_sweep.glsl ├── ccd.c ├── dxbc_input_layout_bytecode_generator.c ├── escape_string.c ├── ffmpeg.sh ├── freelist.c ├── generic_list.c ├── generic_set.c ├── generic_table.c ├── handle_manager.c ├── hash_set.c ├── hash_table.c ├── mpmc_queue.c ├── mpmc_queue.cpp ├── mpsc_queue.c ├── mpsc_queue.cpp ├── normalize_path.c ├── platform_detection.c ├── precise_sleep.c ├── priority_queue.c ├── slab_allocator.c ├── snprintf.c ├── stack_allocator.c ├── string_buffer.c ├── string_set.c ├── string_slab.c ├── string_table.c ├── strtod.c ├── tlsf_allocator.c ├── win32_list_directory.c ├── win32_stacktrace.c ├── win32_thread_queue.c ├── win32_websocket_client.c ├── win32_websocket_server.c ├── x86_cpuid.c └── x86_rdtsc_seconds.c /.gitignore: -------------------------------------------------------------------------------- 1 | .vs 2 | bin 3 | *.sln 4 | *.vcxproj* 5 | *.natvis -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Snippets 2 | 3 | Collection of short C code snippets for copy-pasting and reference. 4 | -------------------------------------------------------------------------------- /bistack_allocator.c: -------------------------------------------------------------------------------- 1 | #include // uintptr_t 2 | #include // memcpy - only needed for realloc 3 | 4 | struct allocator { 5 | void *buffer; 6 | int capacity; 7 | int lcursor; 8 | int rcursor; 9 | }; 10 | 11 | void *allocate_left(struct allocator *allocator, int size, int alignment) { 12 | uintptr_t mask = (uintptr_t)alignment - 1; // Alignment must be a power of 2. 13 | uintptr_t unaligned = (uintptr_t)allocator->buffer + allocator->lcursor; 14 | uintptr_t aligned = (unaligned + mask) & ~mask; 15 | int new_lcursor = allocator->lcursor + size + (int)(aligned - unaligned); 16 | if (new_lcursor >= allocator->capacity - allocator->rcursor) 17 | return 0; 18 | 19 | allocator->lcursor = new_lcursor; 20 | return (void *)aligned; 21 | } 22 | 23 | void *allocate_right(struct allocator *allocator, int size, int alignment) { 24 | uintptr_t mask = (uintptr_t)alignment - 1; // Alignment must be a power of 2. 25 | uintptr_t unaligned = (uintptr_t)allocator->buffer + allocator->capacity - allocator->rcursor - size - alignment; 26 | uintptr_t aligned = (unaligned + alignment) & ~mask; 27 | int new_rcursor = allocator->rcursor + size + (int)(unaligned + alignment - aligned); 28 | if (allocator->lcursor >= allocator->capacity - new_rcursor) 29 | return 0; 30 | 31 | allocator->rcursor = new_rcursor; 32 | return (void *)aligned; 33 | } 34 | 35 | void deallocate_left(struct allocator *allocator, void *block, int size) { 36 | if ((char *)block + size == (char *)allocator->buffer + allocator->lcursor) 37 | allocator->lcursor -= size; 38 | } 39 | 40 | void deallocate_right(struct allocator *allocator, void *block, int size) { 41 | if (block == (char *)allocator->buffer + allocator->capacity - allocator->rcursor) 42 | allocator->rcursor -= size; 43 | } 44 | 45 | void *reallocate_left(struct allocator *allocator, void *block, int old_size, int new_size, int alignment) { 46 | uintptr_t mask = (uintptr_t)alignment - 1; 47 | if ((char *)block + old_size == (char *)allocator->buffer + allocator->lcursor && ((uintptr_t)block & mask) == 0) { 48 | int new_lcursor = allocator->lcursor + new_size - old_size; 49 | if (new_lcursor >= allocator->capacity - allocator->rcursor) 50 | return 0; 51 | allocator->lcursor = new_lcursor; 52 | return block; 53 | } 54 | 55 | void *result = allocate_left(allocator, new_size, alignment); 56 | if (result) { 57 | int to_copy = new_size < old_size ? new_size : old_size; 58 | memcpy(result, block, (size_t)to_copy); 59 | } 60 | return result; 61 | } 62 | 63 | void *reallocate_right(struct allocator *allocator, void *block, int old_size, int new_size, int alignment) { 64 | uintptr_t mask = (uintptr_t)alignment - 1; 65 | if (block == (char *)allocator->buffer + allocator->capacity - allocator->rcursor && ((uintptr_t)block & mask) == 0) { 66 | int new_rcursor = allocator->rcursor + new_size - old_size; 67 | if (allocator->lcursor >= allocator->capacity - new_rcursor) 68 | return 0; 69 | allocator->rcursor = new_rcursor; 70 | return block; 71 | } 72 | 73 | void *result = allocate_right(allocator, new_size, alignment); 74 | if (result) { 75 | int to_copy = new_size < old_size ? new_size : old_size; 76 | memcpy(result, block, (size_t)to_copy); 77 | } 78 | return result; 79 | } 80 | 81 | #include 82 | int main(void) { 83 | { 84 | struct allocator allocator = { 0 }; 85 | assert(!allocate_left(&allocator, 1, 1)); 86 | assert(!allocate_right(&allocator, 1, 1)); 87 | assert(!reallocate_left(&allocator, 0, 0, 1, 1)); 88 | assert(!reallocate_right(&allocator, 0, 0, 1, 1)); 89 | deallocate_left(&allocator, NULL, 0); 90 | deallocate_right(&allocator, NULL, 0); 91 | } 92 | 93 | { 94 | _Alignas(16) char buffer[17]; 95 | struct allocator allocator = { .buffer = buffer, .capacity = sizeof buffer }; 96 | 97 | char *c = allocate_left(&allocator, sizeof(char), _Alignof(char)); 98 | short *s = allocate_left(&allocator, sizeof(short), _Alignof(short)); 99 | int *i = allocate_left(&allocator, sizeof(int), _Alignof(int)); 100 | long long *l = allocate_left(&allocator, sizeof(long long), _Alignof(long long)); 101 | long long *null = allocate_left(&allocator, sizeof(long long), _Alignof(long long)); 102 | assert(c && (uintptr_t)c % _Alignof(char) == 0); 103 | assert(s && (uintptr_t)s % _Alignof(short) == 0); 104 | assert(i && (uintptr_t)i % _Alignof(int) == 0); 105 | assert(l && (uintptr_t)l % _Alignof(long long) == 0); 106 | assert(!null); 107 | } 108 | 109 | { 110 | _Alignas(16) char buffer[23]; 111 | struct allocator allocator = { .buffer = buffer, .capacity = sizeof buffer }; 112 | 113 | char *c = allocate_right(&allocator, sizeof(char), _Alignof(char)); 114 | short *s = allocate_right(&allocator, sizeof(short), _Alignof(short)); 115 | int *i = allocate_right(&allocator, sizeof(int), _Alignof(int)); 116 | long long *l = allocate_right(&allocator, sizeof(long long), _Alignof(long long)); 117 | long long *null = allocate_right(&allocator, sizeof(long long), _Alignof(long long)); 118 | assert(c && (uintptr_t)c % _Alignof(char) == 0); 119 | assert(s && (uintptr_t)s % _Alignof(short) == 0); 120 | assert(i && (uintptr_t)i % _Alignof(int) == 0); 121 | assert(l && (uintptr_t)l % _Alignof(long long) == 0); 122 | assert(!null); 123 | } 124 | 125 | { 126 | _Alignas(16) char buffer[40]; 127 | struct allocator allocator = { .buffer = buffer, .capacity = sizeof buffer }; 128 | 129 | char *lc = allocate_left(&allocator, sizeof(char), _Alignof(char)); 130 | char *rc = allocate_right(&allocator, sizeof(char), _Alignof(char)); 131 | short *ls = allocate_left(&allocator, sizeof(short), _Alignof(short)); 132 | short *rs = allocate_right(&allocator, sizeof(short), _Alignof(short)); 133 | int *li = allocate_left(&allocator, sizeof(int), _Alignof(int)); 134 | int *ri = allocate_right(&allocator, sizeof(int), _Alignof(int)); 135 | long long *ll = allocate_left(&allocator, sizeof(long long), _Alignof(long long)); 136 | long long *rl = allocate_right(&allocator, sizeof(long long), _Alignof(long long)); 137 | long long *lnull = allocate_left(&allocator, sizeof(long long), _Alignof(long long)); 138 | long long *rnull = allocate_right(&allocator, sizeof(long long), _Alignof(long long)); 139 | assert(lc && (uintptr_t)lc % _Alignof(char) == 0); 140 | assert(rc && (uintptr_t)rc % _Alignof(char) == 0); 141 | assert(ls && (uintptr_t)ls % _Alignof(short) == 0); 142 | assert(rs && (uintptr_t)rs % _Alignof(short) == 0); 143 | assert(li && (uintptr_t)li % _Alignof(int) == 0); 144 | assert(ri && (uintptr_t)ri % _Alignof(int) == 0); 145 | assert(ll && (uintptr_t)ll % _Alignof(long long) == 0); 146 | assert(rl && (uintptr_t)rl % _Alignof(long long) == 0); 147 | assert(!lnull); 148 | assert(!rnull); 149 | } 150 | 151 | { 152 | char buffer[3]; 153 | struct allocator allocator = { .buffer = buffer, .capacity = sizeof buffer }; 154 | char *l = allocate_left(&allocator, 1, 1); 155 | char *r = allocate_right(&allocator, 1, 1); 156 | assert(l && r && l != r); 157 | } 158 | 159 | { 160 | _Alignas(8) char buffer[17]; 161 | struct allocator allocator = { .buffer = buffer + 1, .capacity = sizeof buffer - 1 }; 162 | char *c = reallocate_left(&allocator, NULL, 0, sizeof(char), _Alignof(char)); 163 | assert(c && (uintptr_t)c % _Alignof(char) == 0); 164 | short *s = reallocate_left(&allocator, c, sizeof(char), sizeof(short), _Alignof(short)); 165 | assert(s && (uintptr_t)s % _Alignof(short) == 0); 166 | int *i = reallocate_left(&allocator, s, sizeof(short), sizeof(int), _Alignof(int)); 167 | assert(i && (uintptr_t)i % _Alignof(int) == 0); 168 | long *l = reallocate_left(&allocator, i, sizeof(int), sizeof(long long), _Alignof(long long)); 169 | assert(l && (uintptr_t)l % _Alignof(long long) == 0); 170 | int mark = allocator.lcursor; 171 | deallocate_left(&allocator, l, sizeof(long long)); 172 | assert(mark - allocator.lcursor >= sizeof(long long)); 173 | } 174 | 175 | { 176 | _Alignas(8) char buffer[32]; 177 | struct allocator allocator = { .buffer = buffer, .capacity = sizeof buffer }; 178 | char *c = reallocate_right(&allocator, NULL, 0, sizeof(char), _Alignof(char)); 179 | assert(c && (uintptr_t)c % _Alignof(char) == 0); 180 | short *s = reallocate_right(&allocator, c, sizeof(char), sizeof(short), _Alignof(short)); 181 | assert(s && (uintptr_t)s % _Alignof(short) == 0); 182 | int *i = reallocate_right(&allocator, s, sizeof(short), sizeof(int), _Alignof(int)); 183 | assert(i && (uintptr_t)i % _Alignof(int) == 0); 184 | long *l = reallocate_right(&allocator, i, sizeof(int), sizeof(long long), _Alignof(long long)); 185 | assert(l && (uintptr_t)l % _Alignof(long long) == 0); 186 | int mark = allocator.rcursor; 187 | deallocate_right(&allocator, l, sizeof(long long)); 188 | assert(mark - allocator.rcursor >= sizeof(long long)); 189 | } 190 | } -------------------------------------------------------------------------------- /buddy_allocator.c: -------------------------------------------------------------------------------- 1 | // O(log N) allocation and deallocation 2 | // 1/4 memory wasted on average, best fit 3 | // 2 pointer header, 16/8 byte on 64/32-bit 4 | // 16/8 byte min allocation on 64/32-bit 5 | // cannot be expanded at runtime 6 | 7 | #include // intptr_t 8 | #include // memcpy 9 | #include 10 | 11 | union node { 12 | struct usednode { 13 | intptr_t free; 14 | intptr_t size; 15 | }; 16 | struct freenode { 17 | union node *next; 18 | union node *prev; 19 | }; 20 | }; 21 | 22 | struct heap { 23 | void *memory; 24 | int capacity; 25 | union node freelists[32]; 26 | }; 27 | 28 | int ceillog2(int x) { 29 | int log2 = 0; 30 | while ((1 << log2) < x) 31 | ++log2; 32 | return log2; 33 | } 34 | 35 | void initialize(struct heap *heap, void *memory, int capacity) { 36 | // capacity must be a power of 2 37 | assert(capacity > 0 && (capacity & (capacity - 1)) == 0); 38 | 39 | heap->memory = memory; 40 | heap->capacity = capacity; 41 | for (int i = 0; i < 32; ++i) { 42 | union node *list = &heap->freelists[i]; 43 | list->next = list; 44 | list->prev = list; 45 | } 46 | 47 | int available = capacity - sizeof(union node); 48 | int log2 = ceillog2(available); 49 | union node *list = &heap->freelists[log2]; 50 | union node *node = memory; 51 | list->next = node; 52 | list->prev = node; 53 | node->next = list; 54 | node->prev = list; 55 | } 56 | void *allocate(struct heap *heap, int size) { 57 | // you could clamp to 0, or return NULL 58 | assert(size >= 0); 59 | 60 | int needed = size + sizeof(union node); 61 | for (int log2 = ceillog2(needed); log2 < 32; ++log2) { 62 | union node *list = &heap->freelists[log2]; 63 | if (list->next == list) 64 | continue; 65 | 66 | union node *node = list->next; 67 | list->next = node->next; 68 | list->next->prev = list; 69 | assert(node->free); 70 | 71 | // split node to smallest size that fits 72 | while ((1 << (log2 - 1)) >= needed) { 73 | --log2; 74 | void *memory = (char *)node + ((intptr_t)1 << log2); 75 | union node *buddy = memory; 76 | list = &heap->freelists[log2]; 77 | buddy->next = list->next; 78 | buddy->prev = list; 79 | list->next->prev = buddy; 80 | list->next = buddy; 81 | } 82 | 83 | node->free = 0; 84 | node->size = (intptr_t)1 << log2; 85 | return (char *)node + sizeof(union node); 86 | } 87 | return 0; 88 | } 89 | void deallocate(struct heap *heap, void *block) { 90 | if (!block) 91 | return; 92 | 93 | assert(block >= heap->memory); // block isn't from this heap 94 | 95 | void *header = (char *)block - sizeof(union node); 96 | union node *node = header; 97 | 98 | assert(!node->free); // double free 99 | assert((char *)node + node->size <= (char *)heap->memory + heap->capacity); // block isn't from this heap. 100 | 101 | // combine neighboring free nodes 102 | while (node->size < heap->capacity) { 103 | // the buddy node is always just a bitflip away 104 | uintptr_t base = (uintptr_t)heap->memory; 105 | uintptr_t nodep = (uintptr_t)node - base; 106 | uintptr_t buddyp = nodep ^ node->size; 107 | union node *buddy = (union node *)(buddyp + base); 108 | if (!buddy->free) 109 | break; 110 | 111 | buddy->next->prev = buddy->prev; 112 | buddy->prev->next = buddy->next; 113 | 114 | intptr_t size = node->size; 115 | node = node < buddy ? node : buddy; 116 | node->size = 2 * size; 117 | } 118 | 119 | int log2 = ceillog2((int)node->size); 120 | union node *list = &heap->freelists[log2]; 121 | node->next = list->next; 122 | node->prev = list; 123 | list->next->prev = node; 124 | list->next = node; 125 | } 126 | void *reallocate(struct heap *heap, void *block, int size) { 127 | // you could clamp to 0, or return NULL 128 | assert(size >= 0); 129 | 130 | if (!block) 131 | return allocate(heap, size); 132 | if (!size) { 133 | deallocate(heap, block); 134 | return 0; 135 | } 136 | 137 | assert(block >= heap->memory); // block isn't from this heap 138 | 139 | void *header = (char *)block - sizeof(union node); 140 | union node *node = header; 141 | 142 | assert(!node->free); // double free 143 | assert((char *)node + node->size <= (char *)heap->memory + heap->capacity); // block isn't from this heap. 144 | 145 | int needed = size + sizeof(union node); 146 | if (needed > node->size) { 147 | if (needed > heap->capacity) 148 | return 0; // allocation doesn't fit in the heap 149 | 150 | // try to merge with neighboring free buddies 151 | int oldsize = (int)node->size; 152 | for (;;) { 153 | // we can only merge with the buddy if we are the "left" buddy 154 | uintptr_t base = (uintptr_t)heap->memory; 155 | uintptr_t nodep = (uintptr_t)node - base; 156 | if (nodep & node->size) 157 | break; // we are the "right" buddy so we can't merge 158 | 159 | uintptr_t buddyp = nodep ^ node->size; 160 | union node *buddy = (union node *)(buddyp + base); 161 | if (!buddy->free) 162 | break; // buddy isn't free so we can't merge 163 | 164 | // ok we can merge with this buddy 165 | buddy->next->prev = buddy->prev; 166 | buddy->prev->next = buddy->next; 167 | node->size *= 2; 168 | 169 | if (node->size >= needed) 170 | return block; 171 | } 172 | 173 | // we couldn't reallocate in-place so undo any growth we've done 174 | while (node->size > oldsize) { 175 | node->size /= 2; 176 | 177 | void *memory = (char *)node + node->size; 178 | union node *buddy = memory; 179 | buddy->free = 1; 180 | buddy->size = node->size; 181 | 182 | // add buddy back to the freelist 183 | int log2 = ceillog2((int)node->size); 184 | union node *list = &heap->freelists[log2]; 185 | buddy->next = list->next; 186 | buddy->prev = list; 187 | list->next->prev = buddy; 188 | list->next = buddy; 189 | } 190 | 191 | // make a new allocation and copy the old one 192 | void *copy = allocate(heap, size); 193 | if (!copy) 194 | return 0; // out of memory 195 | memcpy(copy, block, (size_t)node->size); 196 | deallocate(heap, block); 197 | return copy; 198 | } 199 | else { 200 | // split off as many buddies from the node as we can 201 | int log2 = ceillog2((int)node->size); 202 | while ((1 << (log2 - 1)) >= needed) { 203 | --log2; 204 | void *memory = (char *)node + ((intptr_t)1 << log2); 205 | union node *buddy = memory; 206 | union node *list = &heap->freelists[log2]; 207 | buddy->next = list->next; 208 | buddy->prev = list; 209 | list->next->prev = buddy; 210 | list->next = buddy; 211 | } 212 | return block; 213 | } 214 | } 215 | 216 | int main(void) { 217 | static char memory[1024]; 218 | struct heap heap; 219 | initialize(&heap, memory, sizeof memory); 220 | 221 | char *a = allocate(&heap, 256); memset(a, 1, 256); 222 | char *b = allocate(&heap, 256); memset(b, 1, 256); 223 | deallocate(&heap, a); 224 | char *c = allocate(&heap, 256); memset(c, 1, 256); 225 | deallocate(&heap, c); 226 | deallocate(&heap, b); 227 | 228 | char *d = allocate(&heap, 0); memset(d, 1, 0); 229 | char *e = allocate(&heap, 1); memset(e, 1, 1); 230 | char *f = allocate(&heap, 2); memset(f, 1, 2); 231 | char *g = allocate(&heap, 3); memset(g, 1, 3); 232 | char *h = allocate(&heap, 4); memset(h, 1, 4); 233 | char *i = allocate(&heap, 5); memset(i, 1, 5); 234 | d = reallocate(&heap, d, 256); memset(d, 1, 256); 235 | i = reallocate(&heap, i, 100); memset(i, 1, 100); 236 | deallocate(&heap, d); 237 | deallocate(&heap, i); 238 | deallocate(&heap, e); 239 | deallocate(&heap, h); 240 | deallocate(&heap, f); 241 | deallocate(&heap, g); 242 | } 243 | -------------------------------------------------------------------------------- /bytes_to_cstring.py: -------------------------------------------------------------------------------- 1 | # Convert python byte array into a C string literal. 2 | # Useful for baking data directly into C executable. 3 | # 4 | # The generated C string is NOT 0 terminated by default. 5 | # If you want 0 termination, append b'\0' at the end of the input. 6 | # 7 | # The generated string literal is close to optimal in terms of 8 | # source code length. It's possible to get it slightly shorter, 9 | # but not in a way that's portable or doesn't produce compiler warnings. 10 | 11 | def bytes_to_cstring(name: str, data: bytes, maxwidth: int) -> str: 12 | lines = [] 13 | line = '' 14 | prevoct = False 15 | ESCAPE = { 16 | ord('\a'): '\\a', 17 | ord('\a'): '\\a', 18 | ord('\b'): '\\b', 19 | ord('\f'): '\\f', 20 | ord('\n'): '\\n', 21 | ord('\r'): '\\r', 22 | ord('\t'): '\\t', 23 | ord('\v'): '\\v', 24 | ord('\\'): '\\\\', 25 | ord('\"'): '\\"', 26 | } 27 | for byte in data: 28 | if len(line) > maxwidth: 29 | lines.append('\t"'+line+'"') 30 | line = '' 31 | prevoct = False 32 | if byte in ESCAPE: 33 | line += ESCAPE[byte] 34 | prevoct = False 35 | elif ord('0') <= byte <= ord('9'): 36 | if prevoct: 37 | line += '\\%o' % byte 38 | else: 39 | line += chr(byte) 40 | elif ord(' ') <= byte <= ord('~'): 41 | line += chr(byte) 42 | prevoct = False 43 | else: 44 | line += '\\%o' % byte 45 | prevoct = True 46 | if len(line) > 0: 47 | lines.append('\t"'+line+'"') 48 | result = '' 49 | result += 'static const unsigned char '+name+'['+str(len(data))+'] =\n' 50 | result += '\n'.join(lines)+';\n' 51 | return result 52 | 53 | TEST = bytes([55, 138, 87, 147, 13, 123, 230, 172, 237, 133]) 54 | print(bytes_to_cstring('TEST', TEST, 80)) -------------------------------------------------------------------------------- /capsule_triangle_sweep.glsl: -------------------------------------------------------------------------------- 1 | #version 460 2 | 3 | #define EPSILON 1e-5 // Used to test if float is close to 0. Tweak this if you get problems. 4 | 5 | struct Sweep { 6 | float time; // Non-negative time of first contact. 7 | float depth; // Non-negative penetration depth if objects start initially colliding. 8 | vec3 point; // Point of first-contact. Only updated when contact occurs. 9 | vec3 normal; // Unit-length collision normal. Only updated when contact occurs. 10 | }; 11 | 12 | // Return whether point P is contained inside 3D region delimited by triangle T0,T1,T2 edges. 13 | bool pointInsideTriangle(vec3 p, vec3 t0, vec3 t1, vec3 t2) { 14 | // Real-Time Collision Detection: 3.4: Barycentric Coordinates (pages 46-52). 15 | // 16 | // The book also has a subsection dedicated to point inside triangle tests: 17 | // Real-Time Collision Detection: 5.4.2: Testing Point in Triangle (pages 203-206). 18 | // But those tests only work for CCW triangles. This seems to work for either orientation. 19 | vec3 t01 = t1 - t0; 20 | vec3 t02 = t2 - t0; 21 | vec3 t0p = p - t0; 22 | float t01t01 = dot(t01,t01); 23 | float t01t02 = dot(t01,t02); 24 | float t02t02 = dot(t02,t02); 25 | float t0pt01 = dot(t0p,t01); 26 | float t0pt02 = dot(t0p,t02); 27 | float denom = t01t01*t02t02 - t01t02*t01t02; 28 | 29 | // Normally I would have to divide vd,wd by denom to get v,w. But divisions are 30 | // expensive and cause troubles around 0. If denom isn't negative then we don't 31 | // ever need to divide. If in the future it does turn out denom can be negative 32 | // then we can always multiply by denom instead of dividing to keep sign the same. 33 | float vd = t02t02*t0pt01 - t01t02*t0pt02; 34 | float wd = t01t01*t0pt02 - t01t02*t0pt01; 35 | return vd >= 0 && wd >= 0 && vd + wd <= denom; 36 | } 37 | // Return whether point P is contained inside 3D region delimited by parallelogram P0,P1,P2 edges. 38 | bool pointInsideParallelogram(vec3 p, vec3 p0, vec3 p1, vec3 p2) { 39 | // There may be a better way. 40 | // https://math.stackexchange.com/questions/4381852/point-in-parallelogram-in-3d-space 41 | vec3 p3 = p2 + (p1 - p0); 42 | return pointInsideTriangle(p,p0,p1,p2) || pointInsideTriangle(p,p1,p3,p2); 43 | } 44 | // Return whether point P is contained inside a triangular prism A0,A1,A2-B0,B1,B2. 45 | bool pointInsideTriangularPrism(vec3 p, vec3 a0, vec3 a1, vec3 a2, vec3 b0, vec3 b1, vec3 b2) { 46 | vec3 faces[5][3] = { { a0,a1,a2 }, { b0,b2,b1 }, { a0,b0,a1 }, { a1,b1,a2 }, { a2,b2,a0 } }; 47 | float sgn = 0; 48 | for (int i = 0; i < faces.length(); i++) { 49 | vec3 p0 = faces[i][0]; 50 | vec3 p1 = faces[i][1]; 51 | vec3 p2 = faces[i][2]; 52 | 53 | // Check which side of plane point is in. If it's always on the same side, it's colliding. 54 | vec3 p01 = p1 - p0; 55 | vec3 p02 = p2 - p0; 56 | vec3 n = cross(p01,p02); 57 | float d = dot(n,p - p0); 58 | if (i == 0) sgn = d; 59 | if (sgn*d <= 0) 60 | return false; 61 | } 62 | return true; 63 | } 64 | // Sweep sphere C,r with velocity Sv against plane N of triangle T0,T1,T2, ignoring edges. 65 | bool sweepSphereTrianglePlane(inout Sweep sweep, vec3 c, float r, vec3 v, vec3 t0, vec3 t1, vec3 t2, vec3 n) { 66 | // Real-Time Collision Detection 5.5.3: Intersecting Moving Sphere Against Plane (pages 219-223). 67 | float t; 68 | float d = dot(n,c - t0); 69 | float pen = r - d; 70 | if (pen > 0) 71 | t = 0; // Sphere already starts coliding with triangle plane. 72 | else { 73 | // Sphere isn't immediately colliding with the plane. Check if it's moving away. 74 | float denom = dot(n,v); 75 | if (denom >= 0) 76 | return false; // Sphere is moving away from plane. 77 | 78 | // Sphere will collide with plane at some point. 79 | t = (r - d)/denom; 80 | pen = 0; 81 | } 82 | 83 | // If sphere misses entire triangle plane, then it definitely misses the triangle too. 84 | if (t >= sweep.time) 85 | return false; 86 | 87 | // Is the plane collision point inside the triangle? 88 | // Real-Time Collision Detection: 5.4.2: Testing Point in Triangle (pg 203-206). 89 | vec3 collision = c + t*v - r*n; 90 | if (!pointInsideTriangle(collision,t0,t1,t2)) 91 | return false; 92 | 93 | // Plane collision point is inside the triangle. So the sphere collides with the triangle. 94 | sweep.time = t; 95 | sweep.depth = pen; 96 | sweep.point = collision; 97 | sweep.normal = n; 98 | return true; 99 | } 100 | // Sweep sphere C,r with velocity V against plane N of parallelogram P0,P1,P2 ignoring edges. 101 | bool sweepSphereParallelogramPlane(inout Sweep sweep, vec3 c, float r, vec3 v, vec3 p0, vec3 p1, vec3 p2, vec3 n) { 102 | // Real-Time Collision Detection 5.5.3: Intersecting Moving Sphere Against Plane (pages 219-223). 103 | float t; 104 | float d = dot(c,n - p0); 105 | float pen = r - d; 106 | if (pen > 0) 107 | t = 0; // Sphere already starts coliding with the quad plane. 108 | else { 109 | // Sphere isn't immediately colliding with the plane. Check if it's moving away. 110 | float denom = dot(n,v); 111 | if (denom >= 0) 112 | return false; // Sphere is moving away from plane. 113 | 114 | // Sphere will collide with plane at some point. 115 | t = (r - d)/denom; 116 | pen = 0; 117 | } 118 | 119 | // If sphere misses entire quad plane, then it definitely misses the quad too. 120 | if (t >= sweep.time) 121 | return false; 122 | 123 | // Is the plane collision point inside the quad? 124 | // Real-Time Collision Detection: 5.4.2: Testing Point in Triangle (pages 203-206). 125 | vec3 collision = c + t*v - r*n; 126 | if (!pointInsideParallelogram(collision,p0,p1,p2)) 127 | return false; 128 | 129 | // Plane collision point is inside the quad. So the sphere collides with the quad. 130 | sweep.time = t; 131 | sweep.depth = pen; 132 | sweep.point = collision; 133 | sweep.normal = n; 134 | return true; 135 | } 136 | // Sweep point P with velocity V against sphere S,r. 137 | bool sweepPointSphere(inout Sweep sweep, vec3 p, vec3 v, vec3 s, float r, vec3 fallbackNormal) { 138 | // Real-Time Collision Detection 5.3.2: Intersecting Ray or Segment Against Sphere (pages 177-179). 139 | 140 | // Set up quadratic equation. 141 | vec3 d = p - s; 142 | float b = dot(d,v); 143 | float c = dot(d,d) - r*r; 144 | if (c > 0 && b > 0) 145 | return false; // Point starts outside (c > 0) and moves away from sphere (b > 0). 146 | float a = dot(v,v); 147 | float discr = b*b - a*c; 148 | if (discr < 0) 149 | return false; // Point misses sphere. 150 | 151 | // Point hits sphere. Compute time of first impact. 152 | float t = (-b - sqrt(discr))/a; 153 | if (t >= sweep.time) 154 | return false; 155 | 156 | // The sphere is the first thing the point hits so far. 157 | t = max(t, 0); 158 | vec3 collision = p + t*v; 159 | vec3 vec = collision - s; 160 | float len = length(vec); 161 | sweep.time = t; 162 | sweep.depth = t > 0 ? 0 : r - len; 163 | sweep.point = collision; 164 | sweep.normal = len >= EPSILON ? vec/len : fallbackNormal; 165 | return true; 166 | } 167 | // Sweep point P with velocity V against cylinder C0,C1,r, ignoring the endcaps. 168 | bool sweepPointUncappedCylinder(inout Sweep sweep, vec3 p, vec3 v, vec3 c0, vec3 c1, float r, vec3 fallbackNormal) { 169 | // Real-Time Collision Detection 5.3.7: Intersecting Ray or Segment Against Cylinder (pages 194-198). 170 | 171 | // Test if swept point is fully outside of either endcap. 172 | vec3 n = c1 - c0; 173 | vec3 d = p - c0; 174 | float dn = dot(d,n); 175 | float vn = dot(v,n); 176 | float nn = dot(n,n); 177 | if (dn < 0 && dn + vn < 0) 178 | return false; // Fully outside c0 end of cylinder. 179 | if (dn > nn && dn + vn > nn) 180 | return false; // Fully outside c1 end of cylinder. 181 | 182 | // Set up quadratic equations and check if sweep direction is parallel to cylinder. 183 | float t; 184 | float vv = dot(v,v); 185 | float dv = dot(d,v); 186 | float dd = dot(d,d); 187 | float a = nn*vv - vn*vn; 188 | float c = nn*(dd - r*r) - dn*dn; 189 | if (a < EPSILON) { 190 | // Sweep direction is parallel to cylinder. 191 | if (c > 0) 192 | return false; // Point starts outside of cylinder, so it never collides. 193 | if (dn < 0) 194 | return false; // Point starts outside of c0 endcap. 195 | if (dn > nn) 196 | return false; // Point starts outside of c1 endcap. 197 | t = 0; 198 | } else { 199 | // Sweep direction is not parallel to cylinder. Solve for time of first contact. 200 | float b = nn*dv - vn*dn; 201 | float discr = b*b - a*c; 202 | if (discr < 0) 203 | return false; // Sweep misses cylinder. 204 | t = (-b - sqrt(discr))/a; 205 | } 206 | 207 | // Check if the sweep missed, or if it hits but another collision happens sooner. 208 | if (t < 0 || t >= sweep.time) 209 | return false; 210 | 211 | // This is the first collision. Find the closest point on the center of the cylinder. 212 | vec3 collision = p + t*v; 213 | vec3 center; 214 | if (nn < EPSILON) 215 | center = c0; // The cylinder is actually a circle. 216 | else 217 | center = c0 + (dot(collision - c0,n)/nn)*n; 218 | 219 | // Update collision time, depth, and normal. 220 | vec3 vec = collision - center; 221 | float len = length(vec); 222 | float depth = r - len; 223 | sweep.time = t; 224 | sweep.depth = t > 0 ? 0 : depth; 225 | sweep.point = collision; 226 | sweep.normal = len >= EPSILON ? vec/len : fallbackNormal; 227 | return true; 228 | } 229 | 230 | // Sweep a capsule C0,C1,Cr with velocity Cv against the triangle T0,T1,T2. 231 | // c0,c1 capsule line segment endpoints 232 | // r capsule radius 233 | // v capsule velocity 234 | // t0,t1,t2 3 triangle vertices 235 | // returns whether the capsule and triangle intersect 236 | bool sweepCapsuleTriangle(inout Sweep s, vec3 c0, vec3 c1, float r, vec3 v, vec3 t0, vec3 t1, vec3 t2) { 237 | // Compute triangle plane equation. 238 | vec3 t01 = t1 - t0; 239 | vec3 t02 = t2 - t0; 240 | vec3 normal = normalize(cross(t01,t02)); 241 | 242 | // Extrude triangle along capsule direction. 243 | vec3 c01 = c1 - c0; 244 | vec3 a0 = t0; 245 | vec3 a1 = t1; 246 | vec3 a2 = t2; 247 | vec3 b0 = t0 - c01; 248 | vec3 b1 = t1 - c01; 249 | vec3 b2 = t2 - c01; 250 | 251 | // Test for initial collision with the extruded triangle prism. 252 | if (pointInsideTriangularPrism(c0,a0,a1,a2,b0,b1,b2)) { 253 | // Capsule starts off penetrating triangle. Push it out from the triangle plane. 254 | float d0 = dot(normal,c0 - t0); 255 | float d1 = dot(normal,c1 - t0); 256 | float d = abs(d0) <= abs(d1) ? d0 : d1; 257 | vec3 n = d >= 0 ? normal : -normal; 258 | s.time = 0; 259 | s.depth = abs(d) + r; 260 | s.normal = n; 261 | s.point = c0 + d0*normal; 262 | return true; 263 | } 264 | 265 | // Decompose capsule triangle sweep into: 2 sphere-triangle + 3 sphere-parallelogram + 9 point-cylinder + 6 point-sphere sweeps. 266 | bool hit = false; 267 | vec3 triangles[2][3] = {{a0,a1,a2}, {b0,b1,b2}}; 268 | vec3 parallelograms[3][3] = {{a0,a1,b0}, {a1,a2,b1}, {a2,a0,b2}}; 269 | vec3 cylinders[9][2] = {{a0,a1}, {a1,a2}, {a2,a0}, {b0,b1}, {b1,b2}, {b2,b0}, {a0,b0}, {a1,b1}, {a2,b2}}; 270 | vec3 spheres[6] = {a0, a1, a2, b0, b1, b2}; 271 | 272 | // Do sphere-triangle sweeps. 273 | vec3 triangleNormals[2]; 274 | for (int i = 0; i < triangles.length(); i++) { 275 | vec3 p0 = triangles[i][0]; 276 | vec3 p1 = triangles[i][1]; 277 | vec3 p2 = triangles[i][2]; 278 | 279 | // Compute triangle plane normal. 280 | vec3 n = normal; 281 | if (dot(n,c0 - p0) < 0) n = -n; // Orient towards sphere. 282 | triangleNormals[i] = n; 283 | 284 | // Test for triangle-plane sphere intersection. 285 | hit = hit || sweepSphereTrianglePlane(s,c0,r,v,p0,p1,p2,n); 286 | } 287 | 288 | // Do sphere-parallelogram sweeps. 289 | vec3 parallelogramNormals[3]; 290 | for (int i = 0; i < parallelograms.length(); i++) { 291 | vec3 p0 = parallelograms[i][0]; 292 | vec3 p1 = parallelograms[i][1]; 293 | vec3 p2 = parallelograms[i][2]; 294 | 295 | // Check if quad is degenerate. Happens when triangle edge completely parallel to capsule. 296 | vec3 p01 = p1 - p0; 297 | vec3 p02 = p2 - p0; 298 | vec3 c = cross(p01,p02); 299 | float len = length(c); 300 | if (len > EPSILON) { 301 | // Compute quad plane equation. 302 | vec3 n = c/len; 303 | if (dot(n,c0 - p0) < 0) n = -n; // Orient towards sphere. 304 | parallelogramNormals[i] = n; 305 | 306 | // Do the sweep test. 307 | hit = hit || sweepSphereParallelogramPlane(s,c0,r,v,p0,p1,p2,n); 308 | } 309 | else parallelogramNormals[i] = triangleNormals[0]; 310 | } 311 | 312 | // Do point-cylinder sweeps. 313 | for (int i = 0; i < cylinders.length(); i++) { 314 | vec3 p0 = cylinders[i][0]; 315 | vec3 p1 = cylinders[i][1]; 316 | vec3 n; 317 | if (i < 6) 318 | n = triangleNormals[i/3]; 319 | else 320 | n = parallelogramNormals[i - 6]; 321 | hit = hit || sweepPointUncappedCylinder(s,c0,v,p0,p1,r,n); 322 | } 323 | 324 | // Do point-sphere sweeps. 325 | for (int i = 0; i < spheres.length(); i++) { 326 | vec3 c = spheres[i]; 327 | vec3 n = triangleNormals[i/3]; 328 | hit = hit || sweepPointSphere(s,c0,v,c,r,n); 329 | } 330 | 331 | return hit; 332 | } 333 | 334 | // Move a capsule and resolve any triangle collisions encountered along the way. 335 | // p - capsule base position 336 | // v - capsule velocity 337 | // h - capsule height 338 | // r - capsule radius 339 | // dt - time-step length 340 | // triangles - list of triangles to collide with 341 | void resolveCollisions(inout vec3 p, inout vec3 v, float h, float r, float dt, vec3 triangles[999][3]) { 342 | // Store the leftover movement in this vector. 343 | vec3 u = dt*v; 344 | 345 | // Move and resolve collisions while there is still motion. But cap max iterations to ensure simulation terminates. 346 | const int MAX_ITER = 16; 347 | for (int iter = 0; iter < MAX_ITER && dot(u,u) > 0; iter++) { 348 | // Compute capsule endpoints. 349 | vec3 c0 = p; 350 | vec3 c1 = p; 351 | c0.y += r; 352 | c1.y += h - r; 353 | 354 | // Perform the sweep test against all triangles. 355 | Sweep s; 356 | s.time = 1; 357 | for (int i = 0; i < triangles.length(); i++) { 358 | vec3 t0 = triangles[i][0]; 359 | vec3 t1 = triangles[i][1]; 360 | vec3 t2 = triangles[i][2]; 361 | sweepCapsuleTriangle(s, c0, c1, r, u, t0, t1, t2); 362 | } 363 | 364 | // Stop objects from intersecting. 365 | if (s.depth > 0) 366 | p += (s.depth + EPSILON)*s.normal; 367 | 368 | // Advance the cylinder until the first contact time. 369 | vec3 dp = s.time*u; 370 | p += dp; 371 | 372 | // If there were no collisions, entire motion is complete and we can terminate early. 373 | if (s.time >= 1) 374 | break; 375 | 376 | // Cancel out motion parallel to the normal. This causes capsule to slide along surface. 377 | u -= dp; 378 | u += dot(u,s.normal)*s.normal; 379 | v += dot(v,s.normal)*s.normal; 380 | 381 | // Nudge the position and velocity slightly away from surface to avoid another collision. 382 | vec3 offset = EPSILON*s.normal; 383 | p += offset; 384 | v += offset; 385 | u += offset; 386 | } 387 | } 388 | -------------------------------------------------------------------------------- /ccd.c: -------------------------------------------------------------------------------- 1 | // 2D continuous collision detection tests. 2 | // 3 | // These are very similar to ray tracing routines, but they're used for 4 | // collision detection in 2D. Instead of moving the object and then testing 5 | // and correcting for collisions after the fact, you can use these routines 6 | // to get the exact time point when the collision will occur and stop right 7 | // before. 8 | // 9 | // Here is an example loop you can use for moving a circular player in a 10 | // world full of rectangle colliders: 11 | // 12 | // for (int iter = 0; iter < MAX_ITER && (player.vx != 0 || player.vy != 0); iter++) { 13 | // Hit nearest = { 1 }; 14 | // for (int i = 0; i < numColliders; i++) { 15 | // Hit hit = circleRect(player.x, player.y, player.radius, collider[i].x, collider[i].y, collider[i].rx, collider[i].ry); 16 | // if (hit.t < nearest.t) nearest = hit; 17 | // } 18 | // player.x += player.vx * nearest.t; 19 | // player.y += player.vy * nearest.t; 20 | // player.vx *= (1 - nearest.t); 21 | // player.vy *= (1 - nearest.t); 22 | // if (nearest.t < 1) { 23 | // float dot = player.vx * nearest.nx + player.vy * nearest.ny; 24 | // player.vx -= hit.nx * dot; 25 | // player.vy -= hit.ny * dot; 26 | // player.x += hit.nx * EPSILON; 27 | // player.y += hit.ny * EPSILON; 28 | // } 29 | // } 30 | 31 | #include 32 | 33 | typedef struct Hit { 34 | float t; // Time of collision. 0 <= t < 1. If no collision: t >= 1. 35 | float nx; // Collision normal. 0 if no collision. 36 | float ny; 37 | } Hit; 38 | 39 | // Moving point vs stationary circle. 40 | // x,y = point starting position 41 | // vx,vy = point velocity 42 | // cx,cy = circle center position 43 | // r = circle radius 44 | Hit pointCircle(float x, float y, float vx, float vy, float cx, float cy, float r) { 45 | Hit hit = { 1 }; 46 | 47 | // First, check if the ray starts inside of the circle already. 48 | float dx = x - cx; 49 | float dy = y - cy; 50 | float d2 = dx * dx + dy * dy; 51 | float r2 = r * r; 52 | if (d2 < r2) { // Ray already starts inside of circle and collides immediately. 53 | hit.t = 0; 54 | float d = sqrtf(d2); 55 | if (d > 0) { 56 | hit.nx = dx / d; 57 | hit.ny = dy / d; 58 | } 59 | else { // Ray is directly at circle center. Normal is arbitrary. 60 | hit.nx = 1; 61 | hit.ny = 0; 62 | } 63 | return hit; 64 | } 65 | 66 | // Now solve quadratic to find the intersection points and get the closest one. 67 | float a = vx * vx + vy * vy; 68 | float b = vx * dx + vy * dy; 69 | float c = d2 - r2; 70 | float disc = b * b - a * c; 71 | float root = sqrtf(b * b - a * c); 72 | float t0 = (-b - root) / a; 73 | float t1 = (-b + root) / a; 74 | float t = t0 >= 0 ? t0 : t1; 75 | if (!(0 <= t && t < 1)) return hit; // No hit. Relies on IEEE NaN behavior. 76 | 77 | hit.t = t; 78 | hit.nx = (dx + vx * t) / r; 79 | hit.ny = (dy + vy * t) / r; 80 | return hit; 81 | } 82 | 83 | // Moving point vs stationary rectangle. 84 | // x,y = point starting position 85 | // vx,vy = point velocity 86 | // cx,cy = rectangle center position 87 | // rx,ry = rectangle radius (width/2,height/2) 88 | Hit pointRect(float x, float y, float vx, float vy, float cx, float cy, float rx, float ry) { 89 | Hit hit = { 1 }; 90 | 91 | // First, check if the point starts inside of the rectangle already. 92 | float dx = x - cx; 93 | float dy = y - cy; 94 | float absx = dx < 0 ? -dx : +dx; 95 | float absy = dy < 0 ? -dy : +dy; 96 | if (absx < rx && absy < ry) { 97 | hit.t = 0; 98 | float penx = rx - absx; 99 | float peny = ry - absy; 100 | if (penx <= peny) 101 | hit.nx = dx < 0 ? -1.0f : +1.0f; 102 | else 103 | hit.ny = dy < 0 ? -1.0f : +1.0f; 104 | return hit; 105 | } 106 | 107 | // Find when collisions with 4 rectangle edges happen. 108 | float sx = vx < 0 ? -1.0f : +1.0f; 109 | float sy = vy < 0 ? -1.0f : +1.0f; 110 | float tx0 = (-sx * rx - dx) / vx; 111 | float tx1 = (+sx * rx - dx) / vx; 112 | float ty0 = (-sy * ry - dy) / vy; 113 | float ty1 = (+sy * ry - dy) / vy; 114 | 115 | // Find time of entry and exit. 116 | float tmin = 0; 117 | float tmax = INFINITY; 118 | tmin = tx0 > tmin ? tx0 : tmin; 119 | tmin = ty0 > tmin ? ty0 : tmin; 120 | tmax = tx1 < tmax ? tx0 : tmax; 121 | tmax = ty1 < tmax ? tx1 : tmax; 122 | if (!(tmin < tmax && tmin < 1)) return hit; // No hit. 123 | 124 | hit.t = tmin; 125 | if (tx0 >= ty0) 126 | hit.nx = -sx; 127 | else 128 | hit.ny = -sy; 129 | return hit; 130 | } 131 | 132 | // Moving point vs stationary rectangle with rounded corners. 133 | // x,y = point starting position 134 | // vx,vy = point velocity 135 | // cx,cy = rectangle center position 136 | // rx,ry = rectangle radius (width/2, height/2) 137 | // r = rectangle corner radius 138 | Hit pointRoundRect(float x, float y, float vx, float vy, float cx, float cy, float rx, float ry, float r) { 139 | // First test against the bounding rect. 140 | Hit hit = pointRect(x, y, vx, vy, cx, cy, rx, ry); 141 | if (hit.t >= 1) return hit; // No hit. 142 | 143 | // Find where the ray hits the bounding rect. 144 | float dx = x - cx; 145 | float dy = y - cy; 146 | float hx = dx + vx * hit.t; 147 | float hy = dy + vy * hit.t; 148 | 149 | // Quadrant correction. 150 | float qx = hx < 0 ? -1.0f : +1.0f; 151 | float qy = hy < 0 ? -1.0f : +1.0f; 152 | hx *= qx; 153 | hy *= qy; 154 | 155 | // If ray hits the non-circular part, then we're already done. 156 | float circx = rx - r; 157 | float circy = ry - r; 158 | if (hx <= circx || hy <= circy) return hit; 159 | 160 | // Test against the circular corner. Quadrant correct the hit normal. 161 | dx *= qx; 162 | dy *= qy; 163 | vx *= qx; 164 | vy *= qy; 165 | hit = pointCircle(dx, dy, vx, vy, circx, circy, r); 166 | hit.nx *= qx; 167 | hit.ny *= qy; 168 | return hit; 169 | } 170 | 171 | // By taking the Minkowski sum, these other tests can all be implemented using the routines above. 172 | 173 | Hit rectCircle(float x, float y, float rx, float ry, float vx, float vy, float cx, float cy, float r) { 174 | return pointRoundRect(x, y, vx, vy, cx, cy, rx + r, ry + r, r); 175 | } 176 | Hit rectRect(float ax, float ay, float arx, float ary, float vx, float vy, float bx, float by, float brx, float bry) { 177 | return pointRect(ax, ay, vx, vy, bx, by, brx + arx, bry + ary); 178 | } 179 | Hit rectRoundRect(float ax, float ay, float arx, float ary, float vx, float vy, float bx, float by, float brx, float bry, float br) { 180 | return pointRoundRect(ax, ay, vx, vy, bx, by, brx + arx, bry + ary, br); 181 | } 182 | Hit circleCircle(float ax, float ay, float ar, float vx, float vy, float bx, float by, float br) { 183 | return pointCircle(ax, ay, vx, vy, bx, by, br + ar); 184 | } 185 | Hit circleRect(float x, float y, float r, float vx, float vy, float cx, float cy, float rx, float ry) { 186 | return pointRoundRect(x, y, vx, vy, cx + r, cy + r, rx, ry, r); 187 | } 188 | Hit circleRoundRect(float x, float y, float r, float vx, float vy, float cx, float cy, float rx, float ry, float br) { 189 | return pointRoundRect(x, y, vx, vy, cx, cy, rx + r, ry + r, r + br); 190 | } 191 | 192 | // A similar strategy can be used to test against 2 moving shapes. 193 | 194 | Hit pointMovingCircle(float ax, float ay, float avx, float avy, float cx, float cy, float r, float bvx, float bvy) { 195 | return pointCircle(ax, ay, avx - bvx, avy - bvy, cx, cy, r); 196 | } 197 | 198 | int main(void) { 199 | 200 | } 201 | -------------------------------------------------------------------------------- /escape_string.c: -------------------------------------------------------------------------------- 1 | // escape C string, source and destination cannot overlap, does NOT zero terminate. 2 | int escape(char* restrict dst, const char* restrict src, int len) { 3 | int cursor = 0; 4 | for (int i = 0; i < len; i++) { 5 | unsigned char c = src[i]; 6 | if (c >= ' ' && c <= '~' && c != '"' && c != '\'' && c != '\\') 7 | dst[cursor++] = c; 8 | else { 9 | dst[cursor++] = '\\'; 10 | switch (c) { 11 | case '\a': dst[cursor++] = 'a'; break; 12 | case '\b': dst[cursor++] = 'b'; break; 13 | case '\t': dst[cursor++] = 't'; break; 14 | case '\n': dst[cursor++] = 'n'; break; 15 | case '\v': dst[cursor++] = 'v'; break; 16 | case '\f': dst[cursor++] = 'f'; break; 17 | case '\r': dst[cursor++] = 'r'; break; 18 | case '\"': dst[cursor++] = '"'; break; 19 | case '\'': dst[cursor++] = '\''; break; 20 | case '\\': dst[cursor++] = '\\'; break; 21 | default: 22 | dst[cursor++] = 'x'; 23 | dst[cursor++] = "0123456789ABCDEF"[(c >> 4) & 0xF]; 24 | dst[cursor++] = "0123456789ABCDEF"[(c >> 0) & 0xF]; 25 | break; 26 | } 27 | } 28 | } 29 | return cursor; 30 | } 31 | 32 | // unescape C string, source and destination can overlap, does NOT zero terminate. 33 | int unescape(char* dst, const char* src, int len) { 34 | int cursor = 0; 35 | for (int i = 0; i < len; i++) { 36 | if (src[i] == '\\' && i < len - 1) { 37 | switch (src[++i]) { 38 | case 'a': dst[cursor++] = '\a'; break; 39 | case 'b': dst[cursor++] = '\b'; break; 40 | case 'e': dst[cursor++] = '\x1B'; break; 41 | case 'f': dst[cursor++] = '\f'; break; 42 | case 'n': dst[cursor++] = '\n'; break; 43 | case 'r': dst[cursor++] = '\r'; break; 44 | case 't': dst[cursor++] = '\t'; break; 45 | case 'v': dst[cursor++] = '\v'; break; 46 | case 'x': { 47 | int one = 0; // track if we have at least one valid hex char. 48 | int hex = 0; 49 | for (; i < len - 1; i++) { 50 | char c = src[i + 1]; 51 | int dig; 52 | if (c >= '0' && c <= '9') 53 | dig = c - '0'; 54 | else if (c >= 'A' && c <= 'F') 55 | dig = c - 'A' + 10; 56 | else if (c >= 'a' && c <= 'f') 57 | dig = c - 'a' + 10; 58 | else 59 | break; 60 | hex = (hex << 4) | dig; 61 | if (hex > 0xFF) 62 | hex = 0xFF; 63 | one = 1; 64 | } 65 | dst[cursor++] = one ? (char)hex : 'x'; // "\x" without any following hex chars unescapes to "x". 66 | } break; 67 | case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { 68 | int oct = src[i] - '0'; 69 | int max = i + 2; 70 | if (max > len - 1) 71 | max = len - 1; 72 | for (; i < max; i++) { 73 | unsigned dig = src[i + 1] - '0'; 74 | if (dig >= 8) 75 | break; 76 | oct = (oct << 3) | dig; 77 | } 78 | if (oct > 0xFF) 79 | oct = 0xFF; 80 | dst[cursor++] = (char)oct; 81 | } break; 82 | default: // handles \" \' \? \\ and invalid escapes 83 | dst[cursor++] = src[i]; 84 | break; 85 | } 86 | } 87 | else dst[cursor++] = src[i]; 88 | } 89 | return cursor; 90 | } 91 | 92 | // === TESTS === 93 | 94 | #include 95 | #include 96 | #include 97 | 98 | bool escape_equal(const char* string, int string_length, const char* expected, int expected_length) { 99 | char escaped[9999]; 100 | int escaped_length = escape(escaped, string, string_length); 101 | return escaped_length == expected_length && !memcmp(escaped, expected, expected_length); 102 | } 103 | bool unescape_equal(const char* string, int string_length, const char* expected, int expected_length) { 104 | char unescaped[9999]; 105 | int unescaped_length = unescape(unescaped, string, string_length); 106 | return unescaped_length == expected_length && !memcmp(unescaped, expected, expected_length); 107 | } 108 | 109 | int main(void) { 110 | // exhaustively test all possible bytes 111 | char ascii[256]; 112 | for (int i = 0; i <= 255; i++) 113 | ascii[i] = (char)i; 114 | char expected[] = 115 | "\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\a\\b\\t\\n\\v\\f\\r\\x0E\\x0F" 116 | "\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1A\\x1B\\x1C\\x1D\\x1E\\x1F" 117 | " !\\\"#$%&\\'()*+,-./" 118 | "0123456789:;<=>?" 119 | "@ABCDEFGHIJKLMNO" 120 | "PQRSTUVWXYZ[\\\\]^_" 121 | "`abcdefghijklmno" 122 | "pqrstuvwxyz{|}~\\x7F" 123 | "\\x80\\x81\\x82\\x83\\x84\\x85\\x86\\x87\\x88\\x89\\x8A\\x8B\\x8C\\x8D\\x8E\\x8F" 124 | "\\x90\\x91\\x92\\x93\\x94\\x95\\x96\\x97\\x98\\x99\\x9A\\x9B\\x9C\\x9D\\x9E\\x9F" 125 | "\\xA0\\xA1\\xA2\\xA3\\xA4\\xA5\\xA6\\xA7\\xA8\\xA9\\xAA\\xAB\\xAC\\xAD\\xAE\\xAF" 126 | "\\xB0\\xB1\\xB2\\xB3\\xB4\\xB5\\xB6\\xB7\\xB8\\xB9\\xBA\\xBB\\xBC\\xBD\\xBE\\xBF" 127 | "\\xC0\\xC1\\xC2\\xC3\\xC4\\xC5\\xC6\\xC7\\xC8\\xC9\\xCA\\xCB\\xCC\\xCD\\xCE\\xCF" 128 | "\\xD0\\xD1\\xD2\\xD3\\xD4\\xD5\\xD6\\xD7\\xD8\\xD9\\xDA\\xDB\\xDC\\xDD\\xDE\\xDF" 129 | "\\xE0\\xE1\\xE2\\xE3\\xE4\\xE5\\xE6\\xE7\\xE8\\xE9\\xEA\\xEB\\xEC\\xED\\xEE\\xEF" 130 | "\\xF0\\xF1\\xF2\\xF3\\xF4\\xF5\\xF6\\xF7\\xF8\\xF9\\xFA\\xFB\\xFC\\xFD\\xFE\\xFF"; 131 | assert(escape_equal(ascii, sizeof ascii, expected, sizeof expected - 1)); 132 | 133 | #define test_unescape(string, expected) assert(unescape_equal(string, sizeof(string) - 1, expected, sizeof(expected) - 1)) 134 | test_unescape("\\a", "\a"); 135 | test_unescape("\\b", "\b"); 136 | test_unescape("\\e", "\x1B"); 137 | test_unescape("\\f", "\f"); 138 | test_unescape("\\n", "\n"); 139 | test_unescape("\\r", "\r"); 140 | test_unescape("\\t", "\t"); 141 | test_unescape("\\v", "\v"); 142 | test_unescape("\\\\", "\\"); 143 | test_unescape("\\\'", "\'"); 144 | test_unescape("\\\"", "\""); 145 | test_unescape("\\?", "?"); 146 | test_unescape("\\", "\\"); 147 | test_unescape("\\%", "%"); 148 | test_unescape("\\0", "\0"); 149 | test_unescape("\\00", "\0"); 150 | test_unescape("\\000", "\0"); 151 | test_unescape("\\0000", "\x00\x30"); 152 | test_unescape("\\123", "\123"); 153 | test_unescape("\\777", "\xFF"); 154 | test_unescape("\\8", "8"); 155 | test_unescape("\\78", "\7\x38"); 156 | test_unescape("\\x", "x"); 157 | test_unescape("\\X", "X"); 158 | test_unescape("\\x0", "\x0"); 159 | test_unescape("\\x00", "\x00"); 160 | test_unescape("\\x000", "\x00"); 161 | test_unescape("\\x1", "\x1"); 162 | test_unescape("\\x11", "\x11"); 163 | test_unescape("\\x111", "\xFF"); 164 | test_unescape("\\xF", "\xF"); 165 | test_unescape("\\xFF", "\xFF"); 166 | test_unescape("\\xFFF", "\xFF"); 167 | test_unescape("\\x01\\x23\\x45\\x67\\x89", "\x01\x23\x45\x67\x89"); 168 | test_unescape("\\xAB\\xCD\\xEF", "\xAB\xCD\xEF"); 169 | test_unescape("\\xab\\xcd\\xef", "\xab\xcd\xef"); 170 | test_unescape("\\xFG", "\xFG"); 171 | test_unescape("\\xfg", "\xfg"); 172 | test_unescape("abcABC123+-( ~{}", "abcABC123+-( ~{}"); 173 | test_unescape("abc\\", "abc\\"); 174 | test_unescape("abc\\r\\n\\a\\\\\\123\\xF\\xfa", "abc\r\n\a\\\123\xF\xfa"); 175 | } -------------------------------------------------------------------------------- /ffmpeg.sh: -------------------------------------------------------------------------------- 1 | # Convert video to GIF. The "filter_complex" part improves GIF quality. 2 | ffmpeg -y -filter_complex "[0:v] split [a][b];[a] palettegen [p];[b][p] paletteuse" -i input.mp4 output.gif -------------------------------------------------------------------------------- /freelist.c: -------------------------------------------------------------------------------- 1 | void *allocate(void **freelist) { 2 | void *result = *freelist; 3 | if (*freelist) 4 | *freelist = **(void ***)freelist; 5 | return result; 6 | } 7 | 8 | void deallocate(void **freelist, void *item) { 9 | *(void **)item = *freelist; 10 | *freelist = item; 11 | } 12 | 13 | #include 14 | int main(void) { 15 | void *items[10]; 16 | void *list = 0; 17 | assert(!allocate(&list)); 18 | 19 | for (int i = 0; i < 10; ++i) 20 | deallocate(&list, &items[i]); 21 | for (int i = 9; i >= 0; --i) { 22 | void **item = allocate(&list); 23 | int index = (int)(item - items); 24 | assert(index == i); 25 | } 26 | assert(!allocate(&list)); 27 | assert(!allocate(&list)); 28 | 29 | for (int i = 0; i < 10; ++i) { 30 | deallocate(&list, &items[i]); 31 | void **item = allocate(&list); 32 | int index = (int)(item - items); 33 | assert(index == i); 34 | assert(!allocate(&list)); 35 | } 36 | } -------------------------------------------------------------------------------- /generic_list.c: -------------------------------------------------------------------------------- 1 | #include // realloc, free 2 | 3 | #define list(T) T* 4 | 5 | #define reserve(plist, num_items)\ 6 | private__reserve((plist), (num_items), sizeof *(*plist)) 7 | 8 | #define add(plist, item) do{\ 9 | int private__index = count(*(plist));\ 10 | reserve((plist), private__index + 1);\ 11 | (*plist)[private__index] = (item);\ 12 | (plist)[++((int*)(*(plist)))[-1]];\ 13 | }while(0) 14 | 15 | #define pop(plist)\ 16 | (((int*)(*(plist)))[-1]--, (*(plist))[((int*)(*(plist)))[-1]]) 17 | 18 | #define swap_delete(list, index)do{\ 19 | if(index < count(list))\ 20 | (list)[index] = (list)[--((int*)(list))[-1]];\ 21 | }while(0) 22 | 23 | int count(const list(void) list) { 24 | return list ? ((int *)list)[-1] : 0; 25 | } 26 | 27 | int capacity(const list(void) list) { 28 | return list ? ((int *)list)[-2] : 0; 29 | } 30 | 31 | void destroy(list(void) *plist) { 32 | free((int *)(*plist) - 4); 33 | *plist = NULL; 34 | } 35 | 36 | static void private__reserve(list(void) *plist, int min_capacity, int item_size) { 37 | int cap = capacity(*plist); 38 | if (cap < min_capacity) { 39 | cap *= 2; 40 | if (cap < 64) 41 | cap = 64; 42 | while (cap < min_capacity) 43 | cap *= 2; 44 | // Overallocate by 4 ints to keep overall alignment to 16 bytes. 45 | int cnt = count(*plist); 46 | int *newlist = (int *)realloc(*plist ? (int *)(*plist) - 4 : NULL, cap * item_size + 4 * sizeof(int)) + 4; 47 | newlist[-2] = cap; 48 | newlist[-1] = cnt; 49 | *plist = newlist; 50 | } 51 | } 52 | 53 | #include 54 | int main(void) { 55 | { 56 | list(int) *ints = NULL; 57 | assert(count(ints) == 0); 58 | assert(capacity(ints) == 0); 59 | 60 | for (int i = 0; i < 1024; ++i) 61 | add(&ints, i); 62 | assert(count(ints) == 1024); 63 | 64 | for (int i = 0; i < 1024; ++i) 65 | assert(ints[i] == i); 66 | 67 | for (int i = 1023; i >= 0; --i) 68 | assert(pop(&ints) == i); 69 | assert(count(ints) == 0); 70 | 71 | destroy(&ints); 72 | assert(!ints); 73 | } 74 | 75 | { 76 | // This shouldn't leak. 77 | for (int i = 0; i < 100000; ++i) { 78 | list(int) *ints = NULL; 79 | for (int j = 0; j < 10000; ++j) 80 | add(&ints, j); 81 | destroy(&ints); 82 | } 83 | } 84 | } -------------------------------------------------------------------------------- /handle_manager.c: -------------------------------------------------------------------------------- 1 | struct manager { 2 | void *items; // items[0] is a reserved sentinel. 3 | struct metadata *metadata; 4 | unsigned short freelist; 5 | unsigned short num_items; 6 | unsigned short item_size; 7 | }; 8 | 9 | struct metadata { 10 | unsigned short generation; 11 | unsigned short prev; 12 | unsigned short next; 13 | }; 14 | 15 | union handle { 16 | unsigned value; 17 | struct { unsigned short index, generation; } fields; 18 | }; 19 | 20 | struct manager create(void *items, struct metadata *metadata, unsigned num_items, unsigned item_size) { 21 | metadata[0].prev = 0; 22 | metadata[0].next = 0; 23 | metadata[0].generation = 0; 24 | for (unsigned i = 1; i < num_items - 1; ++i) { 25 | metadata[i].prev = 0; 26 | metadata[i].next = (unsigned short)(i + 1); 27 | metadata[i].generation = 0; 28 | } 29 | metadata[num_items - 1].prev = 0; 30 | metadata[num_items - 1].next = 0; 31 | metadata[num_items - 1].generation = 0; 32 | 33 | return (struct manager) { 34 | .items = items, 35 | .metadata = metadata, 36 | .freelist = 1, 37 | .num_items = (unsigned short)num_items, 38 | .item_size = (unsigned short)item_size, 39 | }; 40 | } 41 | 42 | union handle allocate(struct manager *manager) { 43 | unsigned short index = manager->freelist; 44 | if (index) 45 | manager->freelist = manager->metadata[index].next; 46 | 47 | manager->metadata[index].prev = manager->metadata[0].prev; 48 | manager->metadata[index].next = 0; 49 | manager->metadata[manager->metadata[0].prev].next = index; 50 | manager->metadata[0].prev = index; 51 | 52 | return (union handle) { .fields = { 53 | .index = (unsigned short)index, 54 | .generation = manager->metadata[index].generation 55 | }}; 56 | } 57 | 58 | void deallocate(struct manager *manager, union handle handle) { 59 | unsigned short index = handle.fields.index; 60 | if (!handle.value) 61 | return; 62 | if (index >= manager->num_items || handle.fields.generation != manager->metadata[index].generation) 63 | return; // Handle is invalid. 64 | 65 | unsigned short next = manager->metadata[index].next; 66 | unsigned short prev = manager->metadata[index].prev; 67 | manager->metadata[prev].next = next; 68 | manager->metadata[next].prev = prev; 69 | ++manager->metadata[index].generation; 70 | manager->metadata[index].next = (unsigned short)manager->freelist; 71 | manager->freelist = index; 72 | } 73 | 74 | int is_valid(struct manager manager, union handle handle) { 75 | unsigned index = handle.fields.index; 76 | return index < manager.num_items && handle.fields.generation == manager.metadata[index].generation; 77 | } 78 | 79 | void *get_item_from_handle(struct manager manager, union handle handle) { 80 | unsigned index = handle.fields.index; 81 | if (index >= manager.num_items || handle.fields.generation != manager.metadata[index].generation) 82 | index = 0; // Handle is invalid. 83 | 84 | return (char *)manager.items + index * manager.item_size; 85 | } 86 | 87 | #include 88 | int main(void) { 89 | int items[10] = { -999, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; 90 | struct metadata metadata[10]; 91 | struct manager manager = create(items, metadata, 10, sizeof items[0]); 92 | union handle handles[10]; 93 | 94 | for (int i = 1; i < 10; ++i) { 95 | handles[i] = allocate(&manager); 96 | assert(is_valid(manager, handles[i])); 97 | int *item = get_item_from_handle(manager, handles[i]); 98 | assert(*item == items[i]); 99 | } 100 | 101 | for (int i = 1; i < 10; ++i) { 102 | assert(is_valid(manager, handles[i])); 103 | deallocate(&manager, handles[i]); 104 | assert(!is_valid(manager, handles[i])); 105 | } 106 | 107 | for (int i = 0; i < 10; ++i) { 108 | union handle handle = allocate(&manager); 109 | assert(is_valid(manager, handle)); 110 | int item = *(int *)get_item_from_handle(manager, handle); 111 | deallocate(&manager, handle); 112 | assert(!is_valid(manager, handle)); 113 | union handle new_handle = allocate(&manager); 114 | assert(!is_valid(manager, handle)); 115 | assert(item == *(int *)get_item_from_handle(manager, new_handle)); 116 | deallocate(&manager, new_handle); 117 | } 118 | 119 | for (int i = 1; i < 10; ++i) 120 | handles[i] = allocate(&manager); 121 | assert(*(int *)get_item_from_handle(manager, allocate(&manager)) == items[0]); 122 | assert(*(int *)get_item_from_handle(manager, allocate(&manager)) == items[0]); 123 | assert(*(int *)get_item_from_handle(manager, allocate(&manager)) == items[0]); 124 | for (int i = 1; i < 10; ++i) 125 | deallocate(&manager, handles[i]); 126 | 127 | for (int i = 1; i < 5; ++i) 128 | handles[i] = allocate(&manager); 129 | 130 | for (unsigned index = metadata[0].next, i = 1; index; index = metadata[index].next, ++i) { 131 | assert(i < 5); 132 | assert(items[index] == (int)i); 133 | } 134 | 135 | for (int i = 5; i < 10; ++i) 136 | handles[i] = allocate(&manager); 137 | for (unsigned index = metadata[0].next, i = 1; index; index = metadata[index].next, ++i) { 138 | assert(i < 10); 139 | assert(items[index] == (int)i); 140 | } 141 | for (int i = 1; i < 10; ++i) 142 | deallocate(&manager, handles[i]); 143 | 144 | for (unsigned index = metadata[0].next; index; index = metadata[index].next) 145 | assert(0); 146 | } -------------------------------------------------------------------------------- /hash_set.c: -------------------------------------------------------------------------------- 1 | #include // calloc, free 2 | 3 | // For simplicity and efficiency, this set doesn't actually store the items. 4 | // It only stores the item hashes. You'd better have a good hash function, because 5 | // if two items happen to hash to the same value you're in big trouble. They will 6 | // overwrite each other. In practice, if you have a decent hash function the 7 | // likelyhood of this happening is really small with 64-bits hashes. Note that 8 | // this also means that you cannot iterate over all of the items in the set, since 9 | // we only store the hashes. 10 | struct set { 11 | unsigned long long *hashes; 12 | int capacity; // Always a power of 2 or 0. 13 | int count; 14 | int num_tombstones; 15 | }; 16 | 17 | #define TOMBSTONE 1 18 | 19 | void resize(struct set *set, int capacity) { 20 | if (capacity <= set->count) 21 | capacity = set->count + 1; 22 | 23 | int pow2; // Round up capacity to a power of 2. 24 | for (pow2 = 1; (1 << pow2) < capacity; ++pow2); 25 | capacity = (1 << pow2); 26 | 27 | unsigned long long *new_hashes = calloc((size_t)capacity, sizeof new_hashes[0]); 28 | unsigned mask = (unsigned)capacity - 1; 29 | for (int i = 0; i < set->capacity; ++i) { 30 | unsigned long long hash = set->hashes[i]; 31 | if (hash > TOMBSTONE) { 32 | for (unsigned j = (unsigned)hash & mask;; j = (j + 1) & mask) { 33 | if (!new_hashes[j]) { 34 | new_hashes[j] = hash; 35 | break; 36 | } 37 | } 38 | } 39 | } 40 | 41 | free(set->hashes); 42 | set->hashes = new_hashes; 43 | set->capacity = capacity; 44 | set->num_tombstones = 0; 45 | } 46 | 47 | void reserve(struct set *set, int min_capacity) { 48 | if (3 * set->capacity < 4 * min_capacity) { 49 | int capacity = 4 * min_capacity / 3; 50 | if (capacity < 64) 51 | capacity = 64; 52 | resize(set, capacity); 53 | } 54 | } 55 | 56 | void add(struct set *set, unsigned long long hash) { 57 | hash += (hash <= TOMBSTONE) ? 2 : 0; 58 | reserve(set, set->count + 1); 59 | unsigned mask = (unsigned)set->capacity - 1; 60 | unsigned index = (unsigned)-1; 61 | for (unsigned i = (unsigned)hash & mask;; i = (i + 1) & mask) { 62 | if (set->hashes[i] == hash) 63 | return; 64 | if (!set->hashes[i]) { 65 | index = min(index, i); 66 | break; 67 | } 68 | if (set->hashes[i] == TOMBSTONE) 69 | index = min(index, i); 70 | } 71 | if (set->hashes[index] == TOMBSTONE) 72 | --set->num_tombstones; 73 | set->hashes[index] = hash; 74 | set->count++; 75 | } 76 | 77 | void remove(struct set *set, unsigned long long hash) { 78 | if (!set->count) 79 | return; 80 | 81 | hash += (hash <= TOMBSTONE) ? 2 : 0; 82 | unsigned mask = (unsigned)set->capacity - 1; 83 | for (unsigned i = (unsigned)hash & mask; set->hashes[i]; i = (i + 1) & mask) { 84 | if (set->hashes[i] == hash) { 85 | set->hashes[i] = TOMBSTONE; 86 | set->num_tombstones++; 87 | set->count--; 88 | if (8 * set->num_tombstones > set->capacity) 89 | resize(set, set->capacity); // Get rid of tombstones. 90 | return; 91 | } 92 | } 93 | } 94 | 95 | int contains(struct set set, unsigned long long hash) { 96 | if (!set.count) 97 | return 0; 98 | 99 | hash += (hash <= TOMBSTONE) ? 2 : 0; 100 | unsigned mask = (unsigned)set.capacity - 1; 101 | for (unsigned i = (unsigned)hash & mask; set.hashes[i]; i = (i + 1) & mask) 102 | if (set.hashes[i] == hash) 103 | return 1; 104 | 105 | return 0; 106 | } 107 | 108 | void destroy(struct set *set) { 109 | free(set->hashes); 110 | set->capacity = 0; 111 | set->count = 0; 112 | set->hashes = NULL; 113 | } 114 | 115 | #include 116 | unsigned long long hash(const char *string) { 117 | // FNV-1a https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash 118 | unsigned long long hash = 14695981039346656037u; 119 | for (int i = 0; string[i]; ++i) 120 | hash = (hash ^ string[i]) * 1099511628211u; 121 | return hash; 122 | } 123 | int main(void) { 124 | { 125 | struct set set = { 0 }; 126 | assert(!contains(set, hash("Hi"))); 127 | remove(&set, hash("Hi")); 128 | destroy(&set); 129 | } 130 | 131 | { 132 | struct set set = { 0 }; 133 | 134 | add(&set, hash("abcd")); 135 | add(&set, hash("efgh")); 136 | add(&set, hash("ijkl")); 137 | add(&set, hash("mnop")); 138 | assert(contains(set, hash("abcd"))); 139 | assert(contains(set, hash("efgh"))); 140 | assert(contains(set, hash("ijkl"))); 141 | assert(contains(set, hash("mnop"))); 142 | assert(!contains(set, hash("qrst"))); 143 | 144 | remove(&set, hash("abcd")); 145 | assert(!contains(set, hash("abcd"))); 146 | assert(contains(set, hash("efgh"))); 147 | assert(contains(set, hash("ijkl"))); 148 | assert(contains(set, hash("mnop"))); 149 | 150 | remove(&set, hash("abcd")); 151 | assert(!contains(set, hash("abcd"))); 152 | assert(contains(set, hash("efgh"))); 153 | assert(contains(set, hash("ijkl"))); 154 | assert(contains(set, hash("mnop"))); 155 | 156 | remove(&set, hash("efgh")); 157 | remove(&set, hash("ijkl")); 158 | remove(&set, hash("mnop")); 159 | assert(!contains(set, hash("abcd"))); 160 | assert(!contains(set, hash("efgh"))); 161 | assert(!contains(set, hash("ijkl"))); 162 | assert(!contains(set, hash("mnop"))); 163 | 164 | destroy(&set); 165 | } 166 | 167 | { 168 | static unsigned long long items[1048576]; 169 | int n = sizeof items / sizeof items[0]; 170 | for (int i = 0; i < n; ++i) { 171 | int x = i; 172 | char key[8] = { 0 }; 173 | for (int j = 0; j < 7; ++j) { 174 | key[6 - j] = '0' + x % 10; 175 | x /= 10; 176 | } 177 | items[i] = hash(key); 178 | } 179 | 180 | struct set set = { 0 }; 181 | for (int i = 0; i < n; ++i) 182 | assert(!contains(set, items[i])); 183 | for (int i = 0; i < n; ++i) 184 | add(&set, items[i]); 185 | for (int i = 0; i < n; ++i) 186 | assert(contains(set, items[i])); 187 | for (int i = 0; i < n; ++i) 188 | add(&set, items[i]); 189 | for (int i = 0; i < n; ++i) 190 | remove(&set, items[i]); 191 | for (int i = 0; i < n; ++i) 192 | assert(!contains(set, items[i])); 193 | for (int i = 0; i < n; ++i) 194 | add(&set, items[i]); 195 | for (int i = 0; i < n; ++i) 196 | assert(contains(set, items[i])); 197 | 198 | destroy(&set); 199 | for (int i = 0; i < n / 2; ++i) 200 | add(&set, items[i]); 201 | for (int i = n / 2; i < n; ++i) 202 | assert(!contains(set, items[i])); 203 | for (int i = 0; i < n / 2; ++i) 204 | assert(contains(set, items[i])); 205 | for (int i = 0; i < n / 4; ++i) 206 | remove(&set, items[i]); 207 | for (int i = 0; i < n; ++i) 208 | assert(contains(set, items[i]) == (i >= n / 4 && i < n / 2)); 209 | 210 | for (int i = 0; i < n; ++i) 211 | remove(&set, items[i]); 212 | assert(set.count == 0); 213 | for (int i = 0; i < n; ++i) 214 | add(&set, items[i]); 215 | 216 | destroy(&set); 217 | } 218 | 219 | { 220 | // Potential pathological case: create a bunch of items and then delete them 221 | // to leave tombstones, then lookup each item. If we don't clean tombstones this is O(n^2). 222 | struct set set = { 0 }; 223 | for (unsigned long long i = 2; i <= 1048577; ++i) 224 | add(&set, i); 225 | //resize(&set, set.count + 1); 226 | for (unsigned long long i = 3; i <= 1048577; ++i) 227 | remove(&set, i); 228 | assert(set.count == 1); 229 | for (unsigned long long i = 3; i <= 1048577; ++i) 230 | assert(!contains(set, i)); 231 | } 232 | 233 | { 234 | // This shouldn't leak. 235 | for (int i = 0; i < 10000; ++i) { 236 | struct set set = { 0 }; 237 | for (int j = 0; j < 10000; ++j) { 238 | char item[5] = { 0 }; 239 | int x = j; 240 | item[3] = x % 10; x /= 10; 241 | item[2] = x % 10; x /= 10; 242 | item[1] = x % 10; x /= 10; 243 | item[0] = x % 10; x /= 10; 244 | add(&set, hash(item)); 245 | } 246 | destroy(&set); 247 | } 248 | } 249 | } -------------------------------------------------------------------------------- /hash_table.c: -------------------------------------------------------------------------------- 1 | #include // malloc, free 2 | 3 | // For simplicity and efficiency, this table doesn't actually store the keys. 4 | // It only stores the key hashes. You'd better have a good hash function, because 5 | // if two keys happen to hash to the same value you're in big trouble. They will 6 | // overwrite each other. In practice, if you have a decent hash function the 7 | // likelyhood of this happening is really small with 64-bits hashes. 8 | struct table { 9 | unsigned long long *hashes; 10 | unsigned long long *values; 11 | int capacity; // Always a power of 2 or 0. 12 | int count; 13 | int num_tombstones; 14 | }; 15 | 16 | #define TOMBSTONE 1 17 | 18 | void resize(struct table *table, int capacity) { 19 | if (capacity <= table->count) 20 | return; 21 | 22 | int pow2; // Round up capacity to a power of 2. 23 | for (pow2 = 1; (1 << pow2) < capacity; ++pow2); 24 | capacity = (1 << pow2); 25 | 26 | unsigned long long *new_memory = malloc((size_t)capacity * 2 * sizeof new_memory[0]); 27 | unsigned long long *new_hashes = new_memory; 28 | unsigned long long *new_values = new_hashes + capacity; 29 | for (int i = 0; i < capacity; ++i) 30 | new_hashes[i] = 0; 31 | 32 | unsigned mask = (unsigned)capacity - 1; 33 | for (int i = 0; i < table->capacity; ++i) { 34 | unsigned long long hash = table->hashes[i]; 35 | if (hash > TOMBSTONE) { 36 | for (unsigned j = (unsigned)hash & mask;; j = (j + 1) & mask) { 37 | if (!new_hashes[j]) { 38 | new_hashes[j] = hash; 39 | new_values[j] = table->values[i]; 40 | break; 41 | } 42 | } 43 | } 44 | } 45 | 46 | free(table->hashes); // This also frees the values. 47 | table->hashes = new_hashes; 48 | table->values = new_values; 49 | table->capacity = capacity; 50 | table->num_tombstones = 0; 51 | } 52 | 53 | void reserve(struct table *table, int min_capacity) { 54 | if (3 * table->capacity < 4 * min_capacity) { 55 | int capacity = 4 * min_capacity / 3; 56 | if (capacity < 64) 57 | capacity = 64; 58 | resize(table, capacity); 59 | } 60 | } 61 | 62 | void add(struct table *table, unsigned long long hash, unsigned long long value) { 63 | hash += (hash <= TOMBSTONE) ? 2 : 0; 64 | reserve(table, table->count + 1); 65 | unsigned mask = (unsigned)table->capacity - 1; 66 | unsigned index = (unsigned)-1; 67 | for (unsigned i = (unsigned)hash & mask;; i = (i + 1) & mask) { 68 | if (table->hashes[i] == hash) { 69 | table->values[i] = value; 70 | return; 71 | } 72 | if (!table->hashes[i]) { 73 | index = min(index, i); 74 | break; 75 | } 76 | if (table->hashes[i] == TOMBSTONE) 77 | index = min(index, i); 78 | } 79 | 80 | if (table->hashes[index] == TOMBSTONE) 81 | table->num_tombstones--; 82 | table->hashes[index] = hash; 83 | table->values[index] = value; 84 | table->count++; 85 | } 86 | 87 | void remove(struct table *table, unsigned long long hash) { 88 | if (!table->count) 89 | return; 90 | 91 | hash += (hash <= TOMBSTONE) ? 2 : 0; 92 | unsigned mask = (unsigned)table->capacity - 1; 93 | for (unsigned i = (unsigned)hash & mask; table->hashes[i]; i = (i + 1) & mask) { 94 | if (table->hashes[i] == hash) { 95 | table->hashes[i] = TOMBSTONE; 96 | table->count--; 97 | table->num_tombstones++; 98 | if (8 * table->num_tombstones > table->capacity) 99 | resize(table, table->capacity); // Get rid of tombstones. 100 | return; 101 | } 102 | } 103 | } 104 | 105 | unsigned long long *get(struct table table, unsigned long long hash) { 106 | if (!table.count) 107 | return NULL; 108 | 109 | hash += (hash <= TOMBSTONE) ? 2 : 0; 110 | unsigned mask = (unsigned)table.capacity - 1; 111 | for (unsigned i = (unsigned)hash & mask; table.hashes[i]; i = (i + 1) & mask) 112 | if (table.hashes[i] == hash) 113 | return &table.values[i]; 114 | 115 | return NULL; 116 | } 117 | 118 | int first_index(struct table table) { 119 | for (int i = 0; i < table.capacity; ++i) 120 | if (table.hashes[i] > TOMBSTONE) 121 | return i; 122 | return -1; 123 | } 124 | 125 | int next_index(struct table table, int index) { 126 | for (int i = index + 1; i < table.capacity; ++i) 127 | if (table.hashes[i] > TOMBSTONE) 128 | return i; 129 | return -1; 130 | } 131 | 132 | void destroy(struct table *table) { 133 | free(table->hashes); // This also frees the values. 134 | table->capacity = 0; 135 | table->count = 0; 136 | table->hashes = NULL; 137 | table->values = NULL; 138 | } 139 | 140 | #include 141 | unsigned long long hash(const char *string) { 142 | // FNV-1a https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash 143 | unsigned long long hash = 14695981039346656037u; 144 | for (int i = 0; string[i]; ++i) 145 | hash = (hash ^ string[i]) * 1099511628211u; 146 | return hash; 147 | } 148 | int main(void) { 149 | { 150 | struct table table = { 0 }; 151 | assert(!get(table, 123)); 152 | assert(first_index(table) == -1); 153 | destroy(&table); 154 | } 155 | 156 | { 157 | const char *strings[4] = { 158 | "Hello, sailor!", 159 | "Three jumping wizards box quickly", 160 | "Third", 161 | "Eyyo", 162 | }; 163 | 164 | struct table table = { 0 }; 165 | for (int i = 0; i < 4; ++i) 166 | add(&table, hash(strings[i]), (unsigned)i); 167 | 168 | assert(table.count == 4); 169 | for (int i = 0; i < 4; ++i) 170 | assert(*get(table, hash(strings[i])) == (unsigned)i); 171 | 172 | int remaining[4] = { 0, 1, 2, 3 }; 173 | int num_remaining = 4; 174 | for (int i = first_index(table); i >= 0; i = next_index(table, i)) { 175 | int value = (int)table.values[i]; 176 | for (int i = 0; i < num_remaining; ++i) { 177 | if (remaining[i] == value) { 178 | remaining[i] = remaining[--num_remaining]; 179 | break; 180 | } 181 | } 182 | } 183 | assert(num_remaining == 0); 184 | 185 | destroy(&table); 186 | assert(!table.capacity && !table.count && !table.hashes && !table.values); 187 | } 188 | 189 | { 190 | static unsigned long long hashes[1048576]; 191 | int n = sizeof hashes / sizeof hashes[0]; 192 | unsigned long long seed = 42; 193 | for (int i = 0; i < n; ++i) { 194 | seed ^= seed >> 12; 195 | seed ^= seed << 25; 196 | seed ^= seed >> 27; 197 | hashes[i] = seed * 0x2545F4914F6CDD1Du; 198 | } 199 | 200 | struct table table = { 0 }; 201 | for (int i = 0; i < n; ++i) 202 | add(&table, hashes[i], (unsigned)i); 203 | 204 | assert(table.count == n); 205 | for (int i = 0; i < n; ++i) 206 | assert(*get(table, hashes[i]) == (unsigned)i); 207 | 208 | static int remaining[sizeof hashes / sizeof hashes[0]]; 209 | for (int i = 0; i < n; ++i) 210 | remaining[i] = 1; 211 | for (int i = first_index(table); i >= 0; i = next_index(table, i)) { 212 | int value = (int)table.values[i]; 213 | remaining[value] -= 1; 214 | } 215 | int num_remaining = 0; 216 | for (int i = 0; i < n; ++i) 217 | num_remaining += remaining[i]; 218 | assert(num_remaining == 0); 219 | 220 | for (int i = 0; i < n / 2; ++i) 221 | remove(&table, hashes[i]); 222 | assert(table.count == n / 2); 223 | for (int i = n / 2; i < n; ++i) 224 | assert(*get(table, hashes[i]) == (unsigned)i); 225 | 226 | for (int i = 0; i < n; ++i) 227 | remaining[i] = 1; 228 | for (int i = first_index(table); i >= 0; i = next_index(table, i)) { 229 | int value = (int)table.values[i]; 230 | remaining[value] -= 1; 231 | } 232 | int num_remaining1 = 0; 233 | int num_remaining2 = 0; 234 | for (int i = 0; i < n / 2; ++i) 235 | num_remaining1 += remaining[i]; 236 | for (int i = n / 2; i < n; ++i) 237 | num_remaining2 += remaining[i]; 238 | assert(num_remaining1 == n / 2); 239 | assert(num_remaining2 == 0); 240 | 241 | for (int i = 0; i < n / 2; ++i) 242 | add(&table, hashes[i], (unsigned)i); 243 | for (int i = 0; i < n; ++i) 244 | remaining[i] = 1; 245 | for (int i = first_index(table); i >= 0; i = next_index(table, i)) { 246 | int value = (int)table.values[i]; 247 | remaining[value] -= 1; 248 | } 249 | num_remaining = 0; 250 | for (int i = 0; i < n; ++i) 251 | num_remaining += remaining[i]; 252 | assert(num_remaining == 0); 253 | 254 | destroy(&table); 255 | } 256 | 257 | { 258 | // Potential pathological case: create a bunch of items and then delete them 259 | // to leave tombstones, then lookup each item. If we don't clean tombstones this is O(n^2). 260 | struct table table = { 0 }; 261 | for (unsigned i = 2; i <= 1048577; ++i) 262 | add(&table, i, i); 263 | for (unsigned i = 2; i <= 1048577; ++i) 264 | remove(&table, i, i); 265 | assert(table.count == 0); 266 | for (unsigned i = 2; i <= 1048577; ++i) 267 | assert(!get(table, i)); 268 | } 269 | 270 | { 271 | // This shouldn't leak. 272 | for (int i = 0; i < 10000; ++i) { 273 | struct table table = { 0 }; 274 | for (int j = 0; j < 10000; ++j) 275 | add(&table, (unsigned)j, (unsigned)j); 276 | destroy(&table); 277 | } 278 | } 279 | } -------------------------------------------------------------------------------- /mpmc_queue.c: -------------------------------------------------------------------------------- 1 | // Concurrent multi-producer-multi-consumer wait-free-ish ring buffer queue (what a mouthful!). 2 | // 3 | // - Wait-free unless the queue is full on write or empty on read. 4 | // - If full on write or empty on read, caller yields to the OS scheduler. Increases latency but conserves power. 5 | // - Only 1 atomic increment and 2 serialization points per call in the fast case. 6 | // - Only 2 bytes overhead per queue slot. 7 | // - Polling versions of calls are possible. 8 | // - Queue is initialized to all 0. 9 | // - No memory allocations or thread local storage. 10 | // - Slightly modified version of https://github.com/rigtorp/MPMCQueue, which is battle tested. 11 | 12 | #include 13 | #pragma comment(lib, "Synchronization.lib") 14 | 15 | #define CAPACITY 16384 // Must be a power of 2. 16 | 17 | struct Queue 18 | { 19 | __declspec(align(64)) UINT32 WriteTicket; 20 | __declspec(align(64)) UINT32 ReadTicket; 21 | __declspec(align(64)) struct 22 | { 23 | UINT8 WriteTurn; 24 | UINT8 ReadTurn; 25 | int Item; // You can put anything you want here. 26 | } Slots[CAPACITY]; 27 | }; 28 | 29 | // Blocking API 30 | 31 | void Enqueue(volatile struct Queue *queue, int item) 32 | { 33 | UINT32 ticket = InterlockedIncrementNoFence((volatile LONG *)&queue->WriteTicket) - 1; // Serialization with all writers 34 | UINT32 slot = ticket % CAPACITY; 35 | UINT8 turn = (UINT8)(ticket / CAPACITY); // Write turns start at 0. 36 | 37 | UINT8 currentTurn; 38 | while ((currentTurn = queue->Slots[slot].WriteTurn) != turn) // Acquire, Serialization with 1 reader. 39 | WaitOnAddress(&queue->Slots[slot].WriteTurn, ¤tTurn, sizeof currentTurn, INFINITE); // Block while queue is full. 40 | 41 | queue->Slots[slot].Item = item; 42 | queue->Slots[slot].ReadTurn = turn + 1; // Release, serialization with 1 reader. 43 | WakeByAddressAll((void *)&queue->Slots[slot].ReadTurn); // Hash table crawl. 44 | } 45 | int Dequeue(volatile struct Queue *queue) 46 | { 47 | UINT32 ticket = InterlockedIncrementNoFence((volatile LONG *)&queue->ReadTicket) - 1; // Acquire, serialization with all readers. 48 | UINT32 slot = ticket % CAPACITY; 49 | UINT8 turn = (UINT8)(ticket / CAPACITY + 1); // Read turns start at 1. 50 | 51 | UINT8 currentTurn; 52 | while ((currentTurn = queue->Slots[slot].ReadTurn) != turn) // Acquire, serialization with 1 writer. 53 | WaitOnAddress(&queue->Slots[slot].ReadTurn, ¤tTurn, sizeof currentTurn, INFINITE); // Block while queue is empty. 54 | 55 | int item = queue->Slots[slot].Item; 56 | queue->Slots[slot].WriteTurn = turn; // Release, serialization with 1 writer. 57 | WakeByAddressAll((void *)&queue->Slots[slot].WriteTurn); // Hash table crawl. 58 | return item; 59 | } 60 | 61 | // Polling API 62 | 63 | BOOL TryEnqueue(volatile struct Queue *queue, int item) 64 | { 65 | UINT32 tryTicket = queue->WriteTicket; // Atomic load relaxed. Serialization with all writers. 66 | for (;;) 67 | { 68 | UINT32 slot = tryTicket % CAPACITY; 69 | UINT8 turn = (UINT8)(tryTicket / CAPACITY); // Write turns start at 0. 70 | UINT8 currentTurn = queue->Slots[slot].WriteTurn; // Acquire, serialization with 1 reader. 71 | 72 | int turnsRemaining = (int)(turn - currentTurn); 73 | if (turnsRemaining > 0) 74 | return FALSE; // Queue is full. 75 | if (turnsRemaining == 0) 76 | { 77 | UINT32 ticket = InterlockedCompareExchangeNoFence((volatile LONG *)&queue->WriteTicket, tryTicket + 1, tryTicket); // Serialization with all readers. 78 | if (ticket == tryTicket) 79 | { 80 | queue->Slots[slot].Item = item; 81 | queue->Slots[slot].ReadTurn = turn + 1; // Release, serialization with 1 reader. 82 | WakeByAddressAll((void *)&queue->Slots[slot].ReadTurn); // Hash table crawl. Remove this if you only use Polling and not Blocking. 83 | return TRUE; 84 | } 85 | tryTicket = ticket; 86 | } 87 | else tryTicket = queue->WriteTicket; // Another writer beat us to it, try again. 88 | } 89 | } 90 | BOOL TryDequeue(volatile struct Queue *queue, int *outItem) 91 | { 92 | UINT32 tryTicket = queue->ReadTicket; // Atomic load relaxed. Serialization with all readers. 93 | for (;;) 94 | { 95 | UINT32 slot = tryTicket % CAPACITY; 96 | UINT8 turn = (UINT8)(tryTicket / CAPACITY + 1); // Read turns start at 1. 97 | UINT8 currentTurn = queue->Slots[slot].ReadTurn; // Acquire, serialization with 1 writer. 98 | 99 | int turnsRemaining = (int)(turn - currentTurn); 100 | if (turnsRemaining > 0) 101 | return FALSE; // Queue is empty. 102 | if (turnsRemaining == 0) 103 | { 104 | UINT32 ticket = InterlockedCompareExchangeNoFence((volatile LONG *)&queue->ReadTicket, tryTicket + 1, tryTicket); // Serialization with all readers. 105 | if (ticket == tryTicket) 106 | { 107 | (*outItem) = queue->Slots[slot].Item; 108 | queue->Slots[slot].WriteTurn = turn; // Release, serialization with 1 writer. 109 | WakeByAddressAll((void *)&queue->Slots[slot].WriteTurn); // Hash table crawl. Remove this if you only use Polling and not Blocking. 110 | return TRUE; 111 | } 112 | tryTicket = ticket; 113 | } 114 | else tryTicket = queue->ReadTicket; // Another reader beat us to it, try again. 115 | } 116 | } 117 | 118 | // Test 119 | 120 | #include 121 | 122 | DWORD __stdcall ReaderThread(void *parameter) 123 | { 124 | struct Queue *queue = parameter; 125 | static volatile LONG counters[3][1000000]; 126 | int lastWriterData[3] = { -1, -1, -1 }; 127 | for (int i = 0; i < 1000000; ++i) 128 | { 129 | int item; 130 | if (i < 500000) 131 | item = Dequeue(queue); 132 | else 133 | while (!TryDequeue(queue, &item)); 134 | int writer = item / 1000000; 135 | int data = item % 1000000; 136 | assert(writer < 3); // Ensure no data corruption. 137 | InterlockedIncrement(&counters[writer][data]); 138 | assert(lastWriterData[writer] < data); // Ensure data is correctly sequenced FIFO. 139 | lastWriterData[writer] = data; 140 | } 141 | 142 | // Wait for all readers to finish. 143 | static volatile LONG doneCounter; 144 | InterlockedIncrement(&doneCounter); 145 | WakeByAddressAll((void *)&doneCounter); 146 | LONG numDone; 147 | while ((numDone = doneCounter) != 3) 148 | WaitOnAddress(&doneCounter, &numDone, sizeof numDone, INFINITE); 149 | 150 | for (int writer = 0; writer < 3; ++writer) 151 | for (int i = 0; i < 1000000; ++i) 152 | assert(counters[writer][i] == 1); // Ensure all items have been properly received. 153 | 154 | return EXIT_SUCCESS; 155 | } 156 | DWORD __stdcall WriterThread(void *parameter) 157 | { 158 | struct Queue *queue = parameter; 159 | static volatile LONG idDispenser; 160 | LONG id = InterlockedIncrement(&idDispenser) - 1; 161 | for (int i = 0; i < 500000; ++i) 162 | Enqueue(queue, id * 1000000 + i); 163 | for (int i = 500000; i < 1000000; ++i) 164 | while (!TryEnqueue(queue, id * 1000000 + i)); 165 | return EXIT_SUCCESS; 166 | } 167 | int main(void) 168 | { 169 | static struct Queue queue; 170 | HANDLE threads[6]; 171 | threads[0] = CreateThread(NULL, 0, ReaderThread, &queue, 0, NULL); 172 | threads[1] = CreateThread(NULL, 0, ReaderThread, &queue, 0, NULL); 173 | threads[2] = CreateThread(NULL, 0, ReaderThread, &queue, 0, NULL); 174 | threads[3] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL); 175 | threads[4] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL); 176 | threads[5] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL); 177 | WaitForMultipleObjects(6, threads, TRUE, INFINITE); 178 | __debugbreak(); 179 | } 180 | -------------------------------------------------------------------------------- /mpmc_queue.cpp: -------------------------------------------------------------------------------- 1 | // Concurrent multi-producer-multi-consumer wait-free-ish ring buffer queue (what a mouthful!). 2 | // 3 | // - Wait-free unless the queue is full on write or empty on read. 4 | // - If full on write or empty on read, caller yields to the OS scheduler. Increases latency but conserves power. 5 | // - Only 1 atomic increment and 2 serialization points per call in the fast case. 6 | // - Only 2 bytes overhead per queue slot. 7 | // - Polling versions of calls are possible. 8 | // - Queue is initialized to all 0. 9 | // - No memory allocations or thread local storage. 10 | // - Slightly modified version of https://github.com/rigtorp/MPMCQueue, which is battle tested. 11 | 12 | #include 13 | #include 14 | using namespace std; 15 | using enum std::memory_order; 16 | 17 | #define CAPACITY 16384 // Must be a power of 2. 18 | 19 | struct Queue { 20 | alignas(64) atomic write_ticket = 0; 21 | alignas(64) atomic read_ticket = 0; 22 | struct { 23 | alignas(64) 24 | atomic write_turn = 0; 25 | atomic read_turn = 0; 26 | int item = 0; 27 | } slots[CAPACITY]; 28 | }; 29 | 30 | // Blocking API 31 | 32 | void enqueue(Queue *queue, int item) { 33 | uint32_t ticket = queue->write_ticket.fetch_add(1, relaxed); // Serialization with all writers. 34 | uint32_t slot = ticket % CAPACITY; 35 | uint8_t turn = (uint8_t)(ticket / CAPACITY); // Write turns start at 0. 36 | 37 | uint8_t current_turn; 38 | while ((current_turn = queue->slots[slot].write_turn.load(acquire)) != turn) // Serialization with 1 reader. 39 | queue->slots[slot].write_turn.wait(current_turn, acquire); // Block while queue is full. 40 | 41 | queue->slots[slot].item = item; 42 | queue->slots[slot].read_turn.store(turn + 1, release); // Serialization with 1 reader. 43 | queue->slots[slot].read_turn.notify_all(); // Hash table crawl. 44 | } 45 | int dequeue(Queue *queue) { 46 | uint32_t ticket = queue->read_ticket.fetch_add(1, relaxed); // Serialization with all readers. 47 | uint32_t slot = ticket % CAPACITY; 48 | uint8_t turn = (uint8_t)(ticket / CAPACITY + 1); // Read turns start at 1. 49 | 50 | uint8_t current_turn; 51 | while ((current_turn = queue->slots[slot].read_turn.load(acquire)) != turn) // Serialization with 1 writer. 52 | queue->slots[slot].read_turn.wait(current_turn, acquire); // Block while queue is empty. 53 | 54 | int item = queue->slots[slot].item; 55 | queue->slots[slot].write_turn.store(turn, release); // Serialization with 1 writer. 56 | queue->slots[slot].write_turn.notify_all(); // Hash table crawl. 57 | return item; 58 | } 59 | 60 | // Polling API 61 | 62 | bool try_enqueue(Queue *queue, int item) { 63 | uint32_t try_ticket = queue->write_ticket.load(relaxed); // Serialization with all writers. 64 | for (;;) { 65 | uint32_t slot = try_ticket % CAPACITY; 66 | uint8_t turn = (uint8_t)(try_ticket / CAPACITY); // Write turns start at 0. 67 | uint8_t current_turn = queue->slots[slot].write_turn.load(acquire); // Serialization with 1 reader. 68 | 69 | int turns_remaining = (int)(turn - current_turn); 70 | if (turns_remaining > 0) 71 | return false; // Queue is full. 72 | else if (turns_remaining < 0) 73 | try_ticket = queue->write_ticket.load(relaxed); // Another writer lapped us, try again. 74 | else if (queue->write_ticket.compare_exchange_weak(try_ticket, try_ticket + 1, relaxed)) { 75 | queue->slots[slot].item = item; 76 | queue->slots[slot].read_turn.store(turn + 1, release); // Serialization with 1 reader. 77 | queue->slots[slot].read_turn.notify_all(); // Hash table crawl. 78 | return true; 79 | } 80 | } 81 | } 82 | bool try_dequeue(Queue *queue, int *out_item) { 83 | uint32_t try_ticket = queue->read_ticket.load(relaxed); // Serialization with all readers. 84 | for (;;) { 85 | uint32_t slot = try_ticket % CAPACITY; 86 | uint8_t turn = (uint8_t)(try_ticket / CAPACITY + 1); // Read turns start at 1. 87 | uint8_t current_turn = queue->slots[slot].read_turn.load(acquire); // Serialization with 1 writer. 88 | 89 | int turns_remaining = (int)(turn - current_turn); 90 | if (turns_remaining > 0) 91 | return false; // Queue is empty. 92 | else if (turns_remaining < 0) 93 | try_ticket = queue->read_ticket.load(relaxed); // Another reader lapped us, try again. 94 | else if (queue->read_ticket.compare_exchange_weak(try_ticket, try_ticket + 1, relaxed)) { 95 | (*out_item) = queue->slots[slot].item; 96 | queue->slots[slot].write_turn.store(turn, release); // Serialization with 1 writer. 97 | queue->slots[slot].write_turn.notify_all(); // Hash table crawl. 98 | return true; 99 | } 100 | } 101 | } 102 | 103 | // Test 104 | 105 | #include 106 | #include 107 | 108 | void reader_thread(Queue *queue) { 109 | static atomic counters[3][1000000]; 110 | int last_writer_data[3] = { -1, -1, -1 }; 111 | for (int i = 0; i < 1000000; ++i) { 112 | int item; 113 | if (i < 500000) 114 | item = dequeue(queue); 115 | else 116 | while (!try_dequeue(queue, &item)); 117 | int writer_id = item / 1000000; 118 | int data = item % 1000000; 119 | assert(writer_id < 3); // Ensure no data corruption. 120 | counters[writer_id][data].fetch_add(1); 121 | assert(last_writer_data[writer_id] < data); // Ensure data is correctly sequenced FIFO. 122 | last_writer_data[writer_id] = data; 123 | } 124 | 125 | // Wait for all readers to finish. 126 | static atomic done_counter; 127 | done_counter.fetch_add(1); 128 | done_counter.notify_all(); 129 | int num_done; 130 | while ((num_done = done_counter.load()) != 3) 131 | done_counter.wait(num_done); 132 | 133 | for (int writer_id = 0; writer_id < 3; ++writer_id) 134 | for (int i = 0; i < 1000000; ++i) 135 | assert(counters[writer_id][i] == 1); // Ensure all items have been properly received. 136 | } 137 | void writer_thread(Queue *queue) { 138 | static atomic id_dispenser; 139 | int id = id_dispenser.fetch_add(1); 140 | for (int i = 0; i < 500000; ++i) 141 | enqueue(queue, id * 1000000 + i); 142 | for (int i = 500000; i < 1000000; ++i) 143 | while (!try_enqueue(queue, id * 1000000 + i)); 144 | } 145 | int main() { 146 | static Queue queue; 147 | thread reader0(reader_thread, &queue); 148 | thread reader1(reader_thread, &queue); 149 | thread reader2(reader_thread, &queue); 150 | thread writer0(writer_thread, &queue); 151 | thread writer1(writer_thread, &queue); 152 | thread writer2(writer_thread, &queue); 153 | reader0.join(); 154 | reader1.join(); 155 | reader2.join(); 156 | writer0.join(); 157 | writer1.join(); 158 | writer2.join(); 159 | } -------------------------------------------------------------------------------- /mpsc_queue.c: -------------------------------------------------------------------------------- 1 | #include 2 | #pragma comment(lib, "Synchronization.lib") 3 | 4 | #define CAPACITY 16384 // Must be a power of 2. 5 | 6 | struct Queue 7 | { 8 | __declspec(align(64)) UINT32 WriteTicket; 9 | __declspec(align(64)) UINT32 ReadTicket; 10 | __declspec(align(64)) struct { UINT8 Turn, Full; int Item; } Slots[CAPACITY]; 11 | }; 12 | 13 | // Blocking API 14 | 15 | void Enqueue(volatile struct Queue *queue, int item) 16 | { 17 | UINT32 ticket = InterlockedIncrementNoFence((volatile long *)&queue->WriteTicket) - 1; // Serialization with writers. 18 | UINT32 slot = ticket % CAPACITY; 19 | UINT8 turn = (UINT8)(ticket / CAPACITY); 20 | 21 | UINT8 currentTurn; 22 | while ((currentTurn = queue->Slots[slot].Turn) != turn) // Acquire, serialization with reader. 23 | WaitOnAddress(&queue->Slots[slot].Turn, ¤tTurn, sizeof currentTurn, INFINITE); // Block while queue is full. 24 | 25 | queue->Slots[slot].Item = item; 26 | queue->Slots[slot].Full = TRUE; // Release, serialization with reader. 27 | WakeByAddressSingle((void *)&queue->Slots[slot].Full); // Hash table lookup. 28 | } 29 | int Dequeue(volatile struct Queue *queue) 30 | { 31 | UINT32 ticket = queue->ReadTicket++; 32 | UINT32 slot = ticket % CAPACITY; 33 | UINT8 turn = (UINT8)(ticket / CAPACITY); 34 | 35 | UINT8 notFull = FALSE; 36 | while (!queue->Slots[slot].Full) // Acquire, serialization with 1 writer. 37 | WaitOnAddress(&queue->Slots[slot].Full, ¬Full, sizeof notFull, INFINITE); // Block while queue is empty. 38 | 39 | int item = queue->Slots[slot].Item; 40 | queue->Slots[slot].Full = FALSE; 41 | queue->Slots[slot].Turn = turn + 1; // Release, serialization with 1 writer. 42 | WakeByAddressAll((void *)&queue->Slots[slot].Turn); // Hash table crawl. 43 | return item; 44 | } 45 | 46 | // Polling API 47 | 48 | BOOL TryEnqueue(volatile struct Queue *queue, int item) 49 | { 50 | UINT32 tryTicket = queue->WriteTicket; // Atomic load relaxed. Serialization with writers. 51 | for (;;) 52 | { 53 | UINT32 slot = tryTicket % CAPACITY; 54 | UINT8 turn = (UINT8)(tryTicket / CAPACITY); 55 | UINT8 currentTurn = queue->Slots[slot].Turn; // Acquire, serialization with reader. 56 | 57 | int turnsRemaining = (int)turn - (int)currentTurn; 58 | if (turnsRemaining > 0) 59 | return FALSE; // Queue is full. 60 | else if (turnsRemaining < 0) 61 | tryTicket = queue->WriteTicket; // Another writer lapped us, try again. 62 | else 63 | { 64 | UINT32 ticket = InterlockedCompareExchangeNoFence((volatile LONG *)&queue->WriteTicket, tryTicket + 1, tryTicket); // Serialization with writers. 65 | if (ticket == tryTicket) 66 | { 67 | queue->Slots[slot].Item = item; 68 | queue->Slots[slot].Full = TRUE; // Release, serialization with reader. 69 | WakeByAddressSingle((void *)&queue->Slots[slot].Full); // Hash table lookup. Remove this if you only use Polling and not Blocking. 70 | return TRUE; 71 | } 72 | tryTicket = ticket; 73 | } 74 | } 75 | } 76 | BOOL TryDequeue(volatile struct Queue *queue, int *outItem) 77 | { 78 | UINT32 ticket = queue->ReadTicket; 79 | UINT32 slot = ticket % CAPACITY; 80 | if (!queue->Slots[slot].Full) // Acquire, serialization with 1 writer. 81 | return FALSE; // Queue is empty. 82 | 83 | UINT8 turn = (UINT8)(ticket / CAPACITY); 84 | (*outItem) = queue->Slots[slot].Item; 85 | queue->Slots[slot].Full = FALSE; 86 | queue->Slots[slot].Turn = turn + 1; // Release, serialization with 1 writer. 87 | WakeByAddressAll((void *)&queue->Slots[slot].Turn); // Hash table crawl. 88 | ++(queue->ReadTicket); 89 | return TRUE; 90 | } 91 | 92 | // Test 93 | 94 | #include 95 | 96 | DWORD __stdcall ReaderThread(void *parameter) 97 | { 98 | struct Queue *queue = parameter; 99 | static LONG counters[5][1000000]; 100 | int lastWriterData[5] = { -1, -1, -1, -1, -1 }; 101 | for (int i = 0; i < 5000000; ++i) 102 | { 103 | int item; 104 | if (i < 2500000) 105 | item = Dequeue(queue); 106 | else 107 | while (!TryDequeue(queue, &item)); 108 | int writer = item / 1000000; 109 | int data = item % 1000000; 110 | assert(writer < 5); // Ensure no data corruption corruption. 111 | ++(counters[writer][data]); 112 | assert(lastWriterData[writer] < data); // Ensure data is correctly sequenced FIFO. 113 | lastWriterData[writer] = data; 114 | } 115 | for (int writerId = 0; writerId < 5; ++writerId) 116 | for (int i = 0; i < 1000000; ++i) 117 | assert(counters[writerId][i] == 1); // Ensure all items have been properly received. 118 | 119 | return EXIT_SUCCESS; 120 | } 121 | DWORD __stdcall WriterThread(void *parameter) 122 | { 123 | struct Queue *queue = parameter; 124 | static volatile LONG idDispenser; 125 | LONG id = InterlockedIncrement(&idDispenser) - 1; 126 | for (int i = 0; i < 500000; ++i) 127 | Enqueue(queue, id * 1000000 + i); 128 | for (int i = 500000; i < 1000000; ++i) 129 | while (!TryEnqueue(queue, id * 1000000 + i)); 130 | return EXIT_SUCCESS; 131 | } 132 | int main(void) 133 | { 134 | static struct Queue queue; 135 | HANDLE threads[6]; 136 | threads[0] = CreateThread(NULL, 0, ReaderThread, &queue, 0, NULL); 137 | threads[1] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL); 138 | threads[2] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL); 139 | threads[3] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL); 140 | threads[4] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL); 141 | threads[5] = CreateThread(NULL, 0, WriterThread, &queue, 0, NULL); 142 | WaitForMultipleObjects(6, threads, TRUE, INFINITE); 143 | __debugbreak(); 144 | } 145 | -------------------------------------------------------------------------------- /mpsc_queue.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | using namespace std; 4 | using enum std::memory_order; 5 | 6 | #define CAPACITY 16384 // Must be a power of 2. 7 | 8 | struct Queue { 9 | alignas(64) atomic write_ticket = 0; 10 | alignas(64) uint32_t read_ticket = 0; 11 | struct { 12 | alignas(64) 13 | atomic turn = 0; 14 | atomic full = false; 15 | int item = 0; 16 | } slots[CAPACITY]; 17 | }; 18 | 19 | // Blocking API 20 | 21 | void enqueue(Queue *queue, int item) { 22 | uint32_t ticket = queue->write_ticket.fetch_add(1, relaxed); // Serialization with writers. 23 | uint32_t slot = ticket % CAPACITY; 24 | uint8_t turn = (uint8_t)(ticket / CAPACITY); 25 | 26 | uint8_t current_turn; 27 | while ((current_turn = queue->slots[slot].turn.load(acquire)) != turn) // Serialization with reader. 28 | queue->slots[slot].turn.wait(current_turn, acquire); // Block while queue is full. 29 | 30 | queue->slots[slot].item = item; 31 | queue->slots[slot].full.store(true, release); // Serialization with reader. 32 | queue->slots[slot].full.notify_one(); 33 | } 34 | int dequeue(Queue *queue) { 35 | uint32_t ticket = queue->read_ticket++; 36 | uint32_t slot = ticket % CAPACITY; 37 | uint8_t turn = (uint8_t)(ticket / CAPACITY); 38 | queue->slots[slot].full.wait(false, acquire); // Block while queue is empty. 39 | int item = queue->slots[slot].item; 40 | queue->slots[slot].full.store(false, relaxed); 41 | queue->slots[slot].turn.store(turn + 1, release); // Serialization with 1 writer. 42 | queue->slots[slot].turn.notify_all(); 43 | return item; 44 | } 45 | 46 | // Polling API 47 | 48 | bool try_enqueue(Queue *queue, int item) { 49 | uint32_t try_ticket = queue->write_ticket.load(relaxed); // Serialization with writers. 50 | for (;;) { 51 | uint32_t slot = try_ticket % CAPACITY; 52 | uint8_t turn = (uint8_t)(try_ticket / CAPACITY); 53 | uint8_t current_turn = queue->slots[slot].turn.load(acquire); // Serialization with reader. 54 | int turns_remaining = (int)turn - (int)current_turn; 55 | if (turns_remaining > 0) 56 | return false; // Queue is full. 57 | else if (turns_remaining < 0) 58 | try_ticket = queue->write_ticket; // Another writer lapped us, try again. 59 | else if (queue->write_ticket.compare_exchange_weak(try_ticket, try_ticket + 1, relaxed)) { 60 | queue->slots[slot].item = item; 61 | queue->slots[slot].full.store(true, release); // Serialization with reader. 62 | queue->slots[slot].full.notify_one(); // Hash table lookup. Remove this if you only use Polling and not Blocking. 63 | return true; 64 | } 65 | } 66 | } 67 | bool try_dequeue(Queue *queue, int *out_item) { 68 | uint32_t ticket = queue->read_ticket; 69 | uint32_t slot = ticket % CAPACITY; 70 | if (!queue->slots[slot].full.load(acquire)) // Serialization with 1 writer. 71 | return false; // Queue is empty. 72 | 73 | uint8_t turn = (uint8_t)(ticket / CAPACITY); 74 | (*out_item) = queue->slots[slot].item; 75 | queue->slots[slot].full.store(false, relaxed); 76 | queue->slots[slot].turn.store(turn + 1, release); // Serialization with 1 writer. 77 | queue->slots[slot].turn.notify_all(); // Hash table crawl. Remove this if you only use Polling and not Blocking. 78 | ++(queue->read_ticket); 79 | return true; 80 | } 81 | 82 | // Test 83 | 84 | #include 85 | #include 86 | 87 | void reader_thread(Queue *queue) { 88 | static int counters[5][1000000]; 89 | int last_writer_data[5] = { -1, -1, -1, -1, -1 }; 90 | for (int i = 0; i < 5000000; ++i) { 91 | int item; 92 | if (i < 2500000) 93 | item = dequeue(queue); 94 | else 95 | while (!try_dequeue(queue, &item)); 96 | int writer = item / 1000000; 97 | int data = item % 1000000; 98 | assert(writer < 5); // Ensure no data corruption corruption. 99 | ++(counters[writer][data]); 100 | assert(last_writer_data[writer] < data); // Ensure data is correctly sequenced FIFO. 101 | last_writer_data[writer] = data; 102 | } 103 | for (int writer = 0; writer < 5; ++writer) 104 | for (int i = 0; i < 1000000; ++i) 105 | assert(counters[writer][i] == 1); // Ensure all items have been properly received. 106 | } 107 | void writer_thread(Queue *queue) { 108 | static atomic id_dispenser; 109 | int id = id_dispenser.fetch_add(1); 110 | for (int i = 0; i < 500000; ++i) 111 | enqueue(queue, id * 1000000 + i); 112 | for (int i = 500000; i < 1000000; ++i) 113 | while (!try_enqueue(queue, id * 1000000 + i)); 114 | } 115 | int main(void) { 116 | static struct Queue queue; 117 | thread reader(reader_thread, &queue); 118 | thread writer0(writer_thread, &queue); 119 | thread writer1(writer_thread, &queue); 120 | thread writer2(writer_thread, &queue); 121 | thread writer3(writer_thread, &queue); 122 | thread writer4(writer_thread, &queue); 123 | reader.join(); 124 | writer0.join(); 125 | writer1.join(); 126 | writer2.join(); 127 | writer3.join(); 128 | writer4.join(); 129 | } 130 | -------------------------------------------------------------------------------- /normalize_path.c: -------------------------------------------------------------------------------- 1 | // Normalizes file paths into a canonical form by 2 | // - removing '.' components 3 | // - resolving '..' components 4 | // - replacing backslashes with forward slashes 5 | // - merge consecutive path separators 6 | // - removing trailing slashes 7 | // Modifies the input string in place, the resulting string is always the same length or shorter. 8 | void normalize(char path[]) { 9 | char* src = path; 10 | char* dst = path; 11 | char* start = dst; 12 | for (;;) { 13 | if (*src == '/' || *src == '\\' || !*src) { 14 | int slash = *src == '/' || *src == '\\'; 15 | while (*src == '/' || *src == '\\') src++; // merge consecutive path separators 16 | int exit = *src == '\0'; // we might temporarily replace this with '/'. 17 | if (start + 1 == dst && start[0] == '.') { // remove '.' component 18 | dst = start; 19 | } 20 | else if (start + 2 == dst && start[0] == '.' && start[1] == '.' && start > path) { // resolve '..' component 21 | start--; // skip over last separator 22 | if (start > path && start[-1] != ':') while (start > path && start[-1] != '/') start--; // find the separator before that, and continue from there 23 | if (start[0] == '.' && start[1] == '.' && start[2] == '/') { // don't remove leading '..' 24 | start += 5; 25 | *start++ = '/'; 26 | } 27 | else if ((start == path && *start == '/') || (start > path && start[-1] == ':')) start++; // don't remove absolute path 28 | dst = start; 29 | } 30 | else if (slash) { // replace windows '\' with unix '/' separators 31 | *dst++ = '/'; 32 | } 33 | if (exit) break; 34 | start = dst; 35 | } 36 | else *dst++ = *src++; 37 | } 38 | if (dst > path + 1 && dst[-1] == '/' && dst[-2] != ':') dst--; // remove trailing separator 39 | *dst = '\0'; 40 | } 41 | 42 | // === TESTS === 43 | 44 | #include 45 | #include 46 | #include 47 | 48 | char* normalize_alloc(const char* path) { 49 | char* copy = malloc(strlen(path) + 1); 50 | memcpy(copy, path, strlen(path) + 1); 51 | normalize(copy); 52 | return copy; 53 | } 54 | 55 | int main(void) { 56 | // already canonicized 57 | assert(!strcmp(normalize_alloc("file"), "file")); 58 | assert(!strcmp(normalize_alloc("dir/subdir/file"), "dir/subdir/file")); 59 | 60 | // basic usage 61 | assert(!strcmp(normalize_alloc("dir\\subdir\\file"), "dir/subdir/file")); 62 | assert(!strcmp(normalize_alloc("dir/subdir/../file"), "dir/file")); 63 | assert(!strcmp(normalize_alloc("dir/subdir/../../file"), "file")); 64 | assert(!strcmp(normalize_alloc("dir/subdir/./file"), "dir/subdir/file")); 65 | assert(!strcmp(normalize_alloc("dir/subdir///file"), "dir/subdir/file")); 66 | assert(!strcmp(normalize_alloc("dir/subdir/file/"), "dir/subdir/file")); 67 | 68 | // unix absolute paths 69 | assert(!strcmp(normalize_alloc("/file"), "/file")); 70 | assert(!strcmp(normalize_alloc("/dir/subdir/file"), "/dir/subdir/file")); 71 | assert(!strcmp(normalize_alloc("/"), "/")); 72 | assert(!strcmp(normalize_alloc("/.."), "/")); 73 | assert(!strcmp(normalize_alloc("/../.."), "/")); 74 | 75 | // windows absolute paths 76 | assert(!strcmp(normalize_alloc("C:/file"), "C:/file")); 77 | assert(!strcmp(normalize_alloc("C:/"), "C:/")); 78 | assert(!strcmp(normalize_alloc("C:/.."), "C:/")); 79 | assert(!strcmp(normalize_alloc("C:/../.."), "C:/")); 80 | 81 | // edge cases 82 | assert(!strcmp(normalize_alloc(""), "")); 83 | assert(!strcmp(normalize_alloc("."), "")); 84 | assert(!strcmp(normalize_alloc(".."), "..")); 85 | assert(!strcmp(normalize_alloc("./"), "")); 86 | assert(!strcmp(normalize_alloc("../"), "..")); 87 | assert(!strcmp(normalize_alloc("/."), "/")); 88 | assert(!strcmp(normalize_alloc("/.."), "/")); 89 | assert(!strcmp(normalize_alloc(".a"), ".a")); 90 | assert(!strcmp(normalize_alloc("a."), "a.")); 91 | assert(!strcmp(normalize_alloc("..a"), "..a")); 92 | assert(!strcmp(normalize_alloc("a.."), "a..")); 93 | assert(!strcmp(normalize_alloc("../.."), "../..")); 94 | assert(!strcmp(normalize_alloc("../../.."), "../../..")); 95 | assert(!strcmp(normalize_alloc("a/b/c/../../../../../"), "../..")); 96 | assert(!strcmp(normalize_alloc("C:"), "C:")); // adding a trailing slash would make the string longer 97 | } 98 | -------------------------------------------------------------------------------- /platform_detection.c: -------------------------------------------------------------------------------- 1 | // Source: https://sourceforge.net/p/predef/wiki/Home/ 2 | // https://abseil.io/docs/cpp/platforms/macros 3 | 4 | #ifdef _MSC_VER 5 | # define COMPILER_MSVC 6 | #elif defined __EMSCRIPTEN__ 7 | # define COMPILER_EMSCRIPTEN 8 | #elif defined __INTEL_COMPILER 9 | # define COMPILER_INTEL 10 | #elif defined __clang__ 11 | # define COMPILER_CLANG 12 | #elif defined __GNUC__ 13 | # define COMPILER_GCC 14 | #elif defined __TINYC__ 15 | # define COMPILER_TINYC 16 | #else 17 | # error Unknown compiler. 18 | #endif 19 | 20 | #if defined _WIN32 21 | # define PLATFORM_WINDOWS 22 | #elif defined __EMSCRIPTEN__ 23 | # define PLATFORM_WEB 24 | #elif defined __ANDROID__ 25 | # define PLATFORM_ANDROID 26 | #elif defined __APPLE__ 27 | # include 28 | # if TARGET_OS_IPHONE 29 | # define PLATFORM_IPHONE 30 | # else 31 | # define PLATFORM_MAC 32 | # endif 33 | #elif defined __FreeBSD__ || defined __NetBSD__ || defined __OpenBSD__ || defined __bsdi__ || defined __DragonFly__ 34 | # define PLATFORM_BSD 35 | #elif defined __linux__ 36 | # define PLATFORM_LINUX 37 | #else 38 | # error Unknown platform. 39 | #endif 40 | 41 | #if defined _M_X64 || defined __x86_64__ 42 | # define ARCH_X64 43 | #elif defined _M_IX86 || defined __i386__ 44 | # define ARCH_X86 45 | #elif defined _M_ARM64 || defined __aarch64__ 46 | # define ARCH_ARM64 47 | #elif defined __arm__ || defined _M_ARM 48 | # define ARCH_ARM32 49 | #elif defined __EMSCRIPTEN__ 50 | # define ARCH_WASM32 51 | #else 52 | # error Unknown CPU architecture. 53 | #endif -------------------------------------------------------------------------------- /precise_sleep.c: -------------------------------------------------------------------------------- 1 | // The PERFECT sleeping function for Windows. 2 | // - Sleep times accurate to 1 microsecond 3 | // - Low CPU usage 4 | // - Runs on Windows Vista and up 5 | 6 | #include 7 | #include 8 | #pragma comment(lib, "Winmm.lib") // timeGetDevCaps, timeBeginPeriod 9 | 10 | HANDLE Timer; 11 | int SchedulerPeriodMs; 12 | INT64 QpcPerSecond; 13 | 14 | void PreciseSleep(double seconds) 15 | { 16 | LARGE_INTEGER qpc; 17 | QueryPerformanceCounter(&qpc); 18 | INT64 targetQpc = (INT64)(qpc.QuadPart + seconds * QpcPerSecond); 19 | 20 | if (Timer) // Try using a high resolution timer first. 21 | { 22 | const double TOLERANCE = 0.001'02; 23 | INT64 maxTicks = (INT64)SchedulerPeriodMs * 9'500; 24 | for (;;) // Break sleep up into parts that are lower than scheduler period. 25 | { 26 | double remainingSeconds = (targetQpc - qpc.QuadPart) / (double)QpcPerSecond; 27 | INT64 sleepTicks = (INT64)((remainingSeconds - TOLERANCE) * 10'000'000); 28 | if (sleepTicks <= 0) 29 | break; 30 | 31 | LARGE_INTEGER due; 32 | due.QuadPart = -(sleepTicks > maxTicks ? maxTicks : sleepTicks); 33 | SetWaitableTimerEx(Timer, &due, 0, NULL, NULL, NULL, 0); 34 | WaitForSingleObject(Timer, INFINITE); 35 | QueryPerformanceCounter(&qpc); 36 | } 37 | } 38 | else // Fallback to Sleep. 39 | { 40 | const double TOLERANCE = 0.000'02; 41 | double sleepMs = (seconds - TOLERANCE) * 1000 - SchedulerPeriodMs; // Sleep for 1 scheduler period less than requested. 42 | int sleepSlices = (int)(sleepMs / SchedulerPeriodMs); 43 | if (sleepSlices > 0) 44 | Sleep((DWORD)sleepSlices * SchedulerPeriodMs); 45 | QueryPerformanceCounter(&qpc); 46 | } 47 | 48 | while (qpc.QuadPart < targetQpc) // Spin for any remaining time. 49 | { 50 | YieldProcessor(); 51 | QueryPerformanceCounter(&qpc); 52 | } 53 | } 54 | 55 | int main(void) 56 | { 57 | // Initialization 58 | Timer = CreateWaitableTimerExW(NULL, NULL, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS); 59 | TIMECAPS caps; 60 | timeGetDevCaps(&caps, sizeof caps); 61 | timeBeginPeriod(caps.wPeriodMin); 62 | SchedulerPeriodMs = (int)caps.wPeriodMin; 63 | LARGE_INTEGER qpf; 64 | QueryPerformanceFrequency(&qpf); 65 | QpcPerSecond = qpf.QuadPart; 66 | 67 | // Game loop 68 | for (int i = 0; i < 100; ++i) 69 | { 70 | LARGE_INTEGER qpc0, qpc1; 71 | QueryPerformanceCounter(&qpc0); 72 | PreciseSleep(1 / 60.0); 73 | QueryPerformanceCounter(&qpc1); 74 | double dt = (qpc1.QuadPart - qpc0.QuadPart) / (double)QpcPerSecond; 75 | printf("Slept for %.2f ms\n", 1000 * dt); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /priority_queue.c: -------------------------------------------------------------------------------- 1 | #include // realloc, free 2 | 3 | struct queue { // max heap 4 | struct item *items; 5 | int capacity; 6 | int count; 7 | }; 8 | 9 | struct item { 10 | int priority; 11 | int value; 12 | }; 13 | 14 | #define LEFT_CHILD(index) (2*(index)+1) 15 | #define RIGHT_CHILD(index) (2*(index)+2) 16 | #define PARENT(index) ((index-1)/2) 17 | 18 | void upheap(struct item *items, int index) { 19 | for (; index > 0 && items[index].priority > items[PARENT(index)].priority; index = PARENT(index)) { 20 | struct item temp = items[index]; 21 | items[index] = items[PARENT(index)]; 22 | items[PARENT(index)] = temp; 23 | } 24 | } 25 | 26 | void downheap(struct item *items, int index, int count) { 27 | while (LEFT_CHILD(index) < count) { 28 | int l = LEFT_CHILD(index); 29 | int r = RIGHT_CHILD(index); 30 | int max_child = l; 31 | if (r < count && items[r].priority >= items[l].priority) 32 | max_child = r; 33 | 34 | if (items[index].priority >= items[max_child].priority) 35 | break; 36 | 37 | struct item temp = items[max_child]; 38 | items[max_child] = items[index]; 39 | items[index] = temp; 40 | index = max_child; 41 | } 42 | } 43 | 44 | void reserve(struct queue *queue, int min_capacity) { 45 | if (queue->capacity < min_capacity) { 46 | int new_capacity = 2 * queue->capacity; 47 | if (new_capacity < 128) 48 | new_capacity = 128; 49 | while (new_capacity < min_capacity) 50 | new_capacity *= 2; 51 | 52 | queue->items = realloc(queue->items, (int)new_capacity * sizeof queue->items[0]); 53 | queue->capacity = new_capacity; 54 | } 55 | } 56 | 57 | void push(struct queue *queue, int item, int priority) { 58 | reserve(queue, queue->count + 1); 59 | int index = queue->count++; 60 | queue->items[index].priority = priority; 61 | queue->items[index].value = item; 62 | upheap(queue->items, index); 63 | } 64 | 65 | int pop(struct queue *queue) { 66 | if (!queue->count) 67 | return 0; // Tried to pop from an empty queue. 68 | 69 | int result = queue->items[0].value; 70 | queue->items[0] = queue->items[--queue->count]; 71 | downheap(queue->items, 0, queue->count); 72 | return result; 73 | } 74 | 75 | int push_pop(struct queue *queue, int item, int priority) { 76 | if (!queue->count || priority >= queue->items[0].priority) 77 | return item; 78 | 79 | int result = queue->items[0].value; 80 | queue->items[0].priority = priority; 81 | queue->items[0].value = item; 82 | downheap(queue->items, 0, queue->count); 83 | return result; 84 | } 85 | 86 | int pop_push(struct queue *queue, int item, int priority) { 87 | if (!queue->count) { 88 | push(queue, item, priority); 89 | return 0; // Tried to pop from an empty queue. 90 | } 91 | 92 | int result = queue->items[0].value; 93 | queue->items[0].value = item; 94 | queue->items[0].priority = priority; 95 | downheap(queue->items, 0, queue->count); 96 | return result; 97 | } 98 | 99 | void change_priority(struct queue *queue, int index, int new_priority) { 100 | if (index < queue->count) { 101 | int old_priority = queue->items[index].priority; 102 | queue->items[index].priority = new_priority; 103 | if (new_priority > old_priority) 104 | upheap(queue->items, index); 105 | else if (new_priority < old_priority) 106 | downheap(queue->items, index, queue->count); 107 | } 108 | } 109 | 110 | void destroy(struct queue *queue) { 111 | free(queue->items); 112 | queue->items = NULL; 113 | queue->capacity = 0; 114 | queue->count = 0; 115 | } 116 | 117 | #include 118 | int main(void) { 119 | { 120 | struct queue queue = { 0 }; 121 | for (int i = 0; i < 10; ++i) 122 | push(&queue, i, i); 123 | assert(queue.count == 10); 124 | 125 | for (int i = 9; i >= 0; --i) 126 | assert(pop(&queue) == i); 127 | assert(queue.count == 0); 128 | 129 | destroy(&queue); 130 | } 131 | 132 | { 133 | static int priorities[10000]; 134 | for (int i = 0; i < 10000; ++i) 135 | priorities[i] = rand(); 136 | 137 | struct queue queue = { 0 }; 138 | for (int i = 0; i < 10000; ++i) 139 | push(&queue, i, priorities[i]); 140 | assert(queue.count == 10000); 141 | 142 | int prev = -1; 143 | while (queue.count > 0) { 144 | int index = pop(&queue); 145 | assert(prev == -1 || priorities[prev] >= priorities[index]); 146 | } 147 | 148 | destroy(&queue); 149 | } 150 | 151 | { 152 | struct queue queue = { 0 }; 153 | push(&queue, 0, 0); 154 | push(&queue, 1, 1); 155 | push(&queue, 2, 2); 156 | change_priority(&queue, 0, -99); 157 | change_priority(&queue, 1, 99); 158 | assert(pop(&queue) == 0); 159 | assert(pop(&queue) == 1); 160 | assert(pop(&queue) == 2); 161 | destroy(&queue); 162 | } 163 | } -------------------------------------------------------------------------------- /slab_allocator.c: -------------------------------------------------------------------------------- 1 | #include // malloc, free, size_t 2 | #include // memcpy 3 | 4 | #define SLAB_SIZE (64*1024) 5 | 6 | struct allocator { 7 | struct slab *slab; 8 | int cursor; 9 | }; 10 | 11 | struct slab { 12 | struct slab *prev; 13 | struct slab *next; 14 | void *memory; 15 | int capacity; 16 | int cursor; 17 | }; 18 | 19 | void *allocate(struct allocator *allocator, int size, int alignment) { 20 | size_t mask = (size_t)alignment - 1; 21 | for (;;) { 22 | size_t unaligned = (size_t)allocator->slab->memory + allocator->slab->cursor; 23 | size_t aligned = (unaligned + mask) & ~mask; 24 | int needed = size + (int)(aligned - unaligned); 25 | int remaining = allocator->slab->capacity - allocator->slab->cursor; 26 | if (needed <= remaining) { 27 | allocator->slab->cursor += needed; 28 | allocator->cursor += needed; 29 | return (void *)aligned; 30 | } 31 | 32 | struct slab *next = allocator->slab->next; 33 | if (!next) { 34 | int worst_case = size + alignment - 1; 35 | int consecutive_slabs = (worst_case + SLAB_SIZE - 1) / SLAB_SIZE; 36 | int capacity = consecutive_slabs * SLAB_SIZE; 37 | next = malloc(sizeof next[0] + capacity); 38 | next->prev = allocator->slab; 39 | next->next = NULL; 40 | next->memory = next + 1; 41 | next->capacity = capacity; 42 | next->cursor = 0; 43 | allocator->slab->next = next; 44 | } 45 | 46 | allocator->cursor += remaining; 47 | allocator->slab->cursor += remaining; 48 | allocator->slab = next; 49 | } 50 | } 51 | 52 | void deallocate(struct allocator *allocator, void *block, int size) { 53 | char *end = (char *)block + size; 54 | char *top = (char *)allocator->slab->memory + allocator->slab->cursor; 55 | if (end == top) { 56 | allocator->slab->cursor -= size; 57 | allocator->cursor -= size; 58 | } 59 | } 60 | 61 | void *reallocate(struct allocator *allocator, void *block, int old_size, int new_size, int alignment) { 62 | size_t mask = (size_t)alignment - 1; 63 | if (!((size_t)block & mask)) { 64 | char *end = (char *)block + old_size; 65 | char *top = (char *)allocator->slab->memory + allocator->slab->cursor; 66 | int delta = new_size - old_size; 67 | if (end == top && allocator->slab->cursor + delta <= allocator->slab->capacity) { 68 | allocator->slab->cursor += delta; 69 | allocator->cursor += delta; 70 | return block; 71 | } 72 | if (new_size < old_size) 73 | return block; 74 | } 75 | 76 | void *copy = allocate(allocator, new_size, alignment); 77 | int to_copy = new_size; 78 | if (to_copy > old_size) 79 | to_copy = old_size; 80 | memcpy(copy, block, (size_t)to_copy); 81 | return copy; 82 | } 83 | 84 | void reset(struct allocator *allocator, int cursor) { 85 | for (;;) { 86 | int remaining = allocator->cursor - cursor; 87 | if (remaining <= allocator->slab->cursor) { 88 | allocator->slab->cursor -= remaining; 89 | allocator->cursor = cursor; 90 | return; 91 | } 92 | 93 | allocator->cursor -= allocator->slab->cursor; 94 | allocator->slab->cursor = 0; 95 | if (allocator->slab->prev) 96 | allocator->slab = allocator->slab->prev; 97 | } 98 | } 99 | 100 | void trim(struct allocator *allocator) { 101 | struct slab *slab = allocator->slab->next; 102 | allocator->slab->next = NULL; 103 | while (slab) { 104 | struct slab *next = slab->next; 105 | free(slab); 106 | slab = next; 107 | } 108 | } 109 | 110 | void destroy(struct allocator *allocator) { 111 | struct slab *slab = allocator->slab; 112 | while (slab->prev) 113 | slab = slab->prev; 114 | while (slab) { 115 | struct slab *next = slab->next; 116 | if (slab->capacity) 117 | free(slab); 118 | slab = next; 119 | } 120 | } 121 | 122 | #include 123 | int main(void) { 124 | { 125 | struct allocator allocator = { .slab = &(struct slab) { 0 } }; 126 | // None of these should crash. 127 | for (int i = 0; i < 2; ++i) { 128 | allocate(&allocator, 0, 1); 129 | assert(allocator.cursor == 0); 130 | reallocate(&allocator, NULL, 0, 0, 1); 131 | assert(allocator.cursor == 0); 132 | deallocate(&allocator, NULL, 0); 133 | assert(allocator.cursor == 0); 134 | trim(&allocator); 135 | destroy(&allocator); 136 | } 137 | } 138 | 139 | { 140 | struct allocator allocator = { .slab = &(struct slab) { 0 } }; 141 | 142 | assert(allocator.cursor == 0); 143 | int ne = 999; 144 | int nb = ne * sizeof(int); 145 | int *a = allocate(&allocator, nb, _Alignof(int)); 146 | int marka = allocator.cursor; 147 | assert(marka >= nb && marka < nb + _Alignof(int) && marka == allocator.cursor); 148 | for (int i = 0; i < ne; ++i) 149 | a[i] = i; 150 | 151 | int *b = allocate(&allocator, nb, _Alignof(int)); 152 | int markb = allocator.cursor; 153 | assert(markb >= 2 * nb && markb < 2 * (nb + _Alignof(int))); 154 | for (int i = 0; i < ne; ++i) { 155 | assert(a[i] == i); 156 | b[i] = 2 * i; 157 | } 158 | 159 | int *c = allocate(&allocator, nb, _Alignof(int)); 160 | int markc = allocator.cursor; 161 | assert(markc >= 3 * nb && markc < 3 * (nb + _Alignof(int))); 162 | for (int i = 0; i < ne; ++i) { 163 | assert(a[i] == i); 164 | assert(b[i] == 2 * i); 165 | c[i] = 3 * i; 166 | } 167 | 168 | int mark = allocator.cursor; 169 | int *d = reallocate(&allocator, a, nb, 2 * nb, _Alignof(int)); 170 | int markd = allocator.cursor; 171 | assert(markd >= 5 * nb && markd < 5 * nb + 4 * _Alignof(int)); 172 | for (int i = 0; i < ne; ++i) { 173 | assert(a[i] == i); 174 | assert(b[i] == 2 * i); 175 | assert(c[i] == 3 * i); 176 | assert(d[i] == i); 177 | } 178 | for (int i = ne; i < 2 * ne; ++i) 179 | d[i] = i; 180 | 181 | int *e = reallocate(&allocator, d, 2 * nb, 3 * nb, _Alignof(int)); 182 | int marke = allocator.cursor; 183 | assert(marke >= 6 * nb && marke < 6 * nb + 4 * _Alignof(int)); 184 | assert(e == d); 185 | for (int i = 0; i < 2 * ne; ++i) 186 | assert(e[i] == i); 187 | for (int i = 2 * ne; i < 3 * ne; ++i) 188 | e[i] = i; 189 | 190 | int mark1 = allocator.cursor; 191 | deallocate(&allocator, c, nb); 192 | assert(allocator.cursor == mark1); 193 | for (int i = 0; i < ne; ++i) { 194 | assert(a[i] == i); 195 | assert(b[i] == 2 * i); 196 | } 197 | for (int i = 0; i < 3 * ne; ++i) 198 | assert(e[i] == i); 199 | 200 | reset(&allocator, mark); 201 | assert(allocator.cursor == mark); 202 | for (int i = 0; i < ne; ++i) 203 | assert(b[i] == 2 * i); 204 | 205 | reset(&allocator, 0); 206 | assert(allocator.cursor == 0); 207 | 208 | char *f = allocate(&allocator, SLAB_SIZE + 1024, 1); 209 | int fmark = allocator.cursor; 210 | assert(fmark >= 2 * SLAB_SIZE + 1024 && fmark <= 2 * SLAB_SIZE + 1025); 211 | memset(f, 'f', SLAB_SIZE + 1024); 212 | 213 | char *g = allocate(&allocator, 2 * SLAB_SIZE + 1024, 1); 214 | int gmark = allocator.cursor; 215 | assert(gmark >= 5 * SLAB_SIZE + 1024 && gmark < 5 * SLAB_SIZE + 1024 + 64); 216 | memset(g, 'g', 2 * SLAB_SIZE + 1024); 217 | 218 | char *h = reallocate(&allocator, f, SLAB_SIZE + 1024, 3 * SLAB_SIZE + 1024, 1); 219 | int hmark = allocator.cursor; 220 | for (int i = 0; i < SLAB_SIZE + 1024; ++i) 221 | assert(f[i] == 'f'); 222 | for (int i = 0; i < 2 * SLAB_SIZE + 1024; ++i) 223 | assert(g[i] == 'g'); 224 | for (int i = 0; i < SLAB_SIZE + 1024; ++i) 225 | assert(h[i] == 'f'); 226 | memset(h, 'h', 3 * SLAB_SIZE + 1024); 227 | 228 | deallocate(&allocator, h, 3 * SLAB_SIZE + 1024); 229 | assert(allocator.cursor >= gmark && allocator.cursor < hmark); 230 | 231 | char *k = allocate(&allocator, SLAB_SIZE, 1); 232 | assert(k == h); 233 | memset(k, 'k', SLAB_SIZE); 234 | 235 | reset(&allocator, gmark); 236 | assert(allocator.cursor == gmark); 237 | 238 | allocate(&allocator, 2 * SLAB_SIZE, 2); 239 | reset(&allocator, 0); 240 | assert(allocator.cursor == 0); 241 | 242 | for (int i = 0; i < 1000; ++i) { 243 | for (int align = 2048; align >= 1; align /= 2) { 244 | void *ptr = allocate(&allocator, 1, align); 245 | assert(!((size_t)ptr & (size_t)(align - 1))); 246 | } 247 | } 248 | 249 | reset(&allocator, 0); 250 | assert(allocator.cursor == 0); 251 | trim(&allocator); 252 | destroy(&allocator); 253 | } 254 | } -------------------------------------------------------------------------------- /stack_allocator.c: -------------------------------------------------------------------------------- 1 | #include // uintptr_t 2 | #include // memcpy - only needed for realloc 3 | 4 | struct allocator { 5 | void *buffer; 6 | int capacity; 7 | int cursor; 8 | }; 9 | 10 | void *allocate(struct allocator *allocator, int size, int alignment) { 11 | uintptr_t mask = (uintptr_t)alignment - 1; // Alignment must be a power of 2. 12 | uintptr_t unaligned = (uintptr_t)allocator->buffer + allocator->cursor; 13 | uintptr_t aligned = (unaligned + mask) & ~mask; 14 | int new_cursor = allocator->cursor + size + (int)(aligned - unaligned); 15 | if (new_cursor > allocator->capacity) 16 | return 0; 17 | 18 | allocator->cursor = new_cursor; 19 | return (void *)aligned; 20 | } 21 | 22 | void deallocate(struct allocator *allocator, void *block, int size) { 23 | if ((char *)block + size == (char *)allocator->buffer + allocator->cursor) 24 | allocator->cursor -= size; 25 | } 26 | 27 | void *reallocate(struct allocator *allocator, void *block, int old_size, int new_size, int alignment) { 28 | uintptr_t mask = (uintptr_t)alignment - 1; 29 | if ((char *)block + old_size == (char *)allocator->buffer + allocator->cursor && ((uintptr_t)block & mask) == 0) { 30 | int new_cursor = allocator->cursor + new_size - old_size; 31 | if (new_cursor > allocator->capacity) 32 | return 0; 33 | allocator->cursor = new_cursor; 34 | return block; 35 | } 36 | 37 | void *result = allocate(allocator, new_size, alignment); 38 | if (result) { 39 | int to_copy = new_size < old_size ? new_size : old_size; 40 | memcpy(result, block, (size_t)to_copy); 41 | } 42 | return result; 43 | } 44 | 45 | #include 46 | int main(void) { 47 | struct allocator allocator = { 0 }; 48 | assert(!allocate(&allocator, 1, 1)); 49 | assert(!allocate(&allocator, 1, 1)); 50 | deallocate(&allocator, 0, 0); 51 | assert(!reallocate(&allocator, 0, 0, 1, 1)); 52 | 53 | _Alignas(16) char buffer[16]; 54 | allocator = (struct allocator){ .buffer = buffer, .capacity = sizeof buffer }; 55 | char *c = allocate(&allocator, sizeof(char), _Alignof(char)); 56 | short *s = allocate(&allocator, sizeof(short), _Alignof(short)); 57 | int *i = allocate(&allocator, sizeof(int), _Alignof(int)); 58 | long long *l = allocate(&allocator, sizeof(long long), _Alignof(long long)); 59 | long long *null = allocate(&allocator, sizeof(long long), _Alignof(long long)); 60 | assert(c && (uintptr_t)c % _Alignof(char) == 0); 61 | assert(s && (uintptr_t)s % _Alignof(short) == 0); 62 | assert(i && (uintptr_t)i % _Alignof(int) == 0); 63 | assert(l && (uintptr_t)l % _Alignof(long long) == 0); 64 | assert(!null); 65 | 66 | deallocate(&allocator, l, sizeof(long long)); 67 | l = allocate(&allocator, sizeof(long long), _Alignof(long long)); 68 | assert(l); 69 | 70 | deallocate(&allocator, l, sizeof(long long)); 71 | deallocate(&allocator, i, sizeof(int)); 72 | int *ints = allocate(&allocator, 3 * sizeof(int), _Alignof(int)); 73 | assert(ints); 74 | ints[0] = ints[1] = ints[2] = 42; 75 | 76 | long long big_buffer[1024]; 77 | allocator = (struct allocator){ .buffer = big_buffer, .capacity = sizeof big_buffer }; 78 | assert(!allocate(&allocator, 1024 * sizeof(long long) + 1, 1)); 79 | l = allocate(&allocator, 1024 * sizeof(long long), _Alignof(long long)); 80 | assert(l); 81 | deallocate(&allocator, l, 1024 * sizeof(long long)); 82 | l = allocate(&allocator, 1024 * sizeof(long long), _Alignof(long long)); 83 | assert(l); 84 | 85 | i = reallocate(&allocator, l, 1024 * sizeof(long long), 0, 1); 86 | assert(allocator.cursor == 0); 87 | i = reallocate(&allocator, i, 0, sizeof(int), _Alignof(int)); 88 | *i = 42; 89 | assert(allocator.cursor == sizeof(int)); 90 | i = reallocate(&allocator, i, sizeof(int), 10 * sizeof(int), _Alignof(int)); 91 | assert(allocator.cursor == 10 * sizeof(int)); 92 | i = reallocate(&allocator, i, 10 * sizeof(int), 2048 * sizeof(int), _Alignof(int)); 93 | assert(allocator.cursor == 2048 * sizeof(int)); 94 | i = reallocate(&allocator, i, 2048 * sizeof(int), 11 * sizeof(int), _Alignof(int)); 95 | assert(allocator.cursor == 11 * sizeof(int)); 96 | for (int j = 0; j < 11; ++j) 97 | i[j] = j; 98 | l = reallocate(&allocator, NULL, 0, 1, _Alignof(long long)); 99 | int *i1 = reallocate(&allocator, i, 11 * sizeof(int), 12 * sizeof(int), _Alignof(int)); 100 | assert(i1 != i); 101 | for (int j = 0; j < 11; ++j) 102 | assert(i1[j] == j); 103 | allocate(&allocator, 2, _Alignof(char)); 104 | int *i2 = reallocate(&allocator, i1, 12 * sizeof(int), 3 * sizeof(int), _Alignof(int)); 105 | assert(i2 != i1); 106 | for (int j = 0; j < 3; ++j) 107 | assert(i2[j] == j); 108 | int *i3 = reallocate(&allocator, i2, 3 * sizeof(int), 3 * sizeof(int), 64); 109 | assert(i3 != i2); 110 | for (int j = 0; j < 3; ++j) 111 | assert(i2[j] == j); 112 | } -------------------------------------------------------------------------------- /string_buffer.c: -------------------------------------------------------------------------------- 1 | #include // snprintf 2 | #include // memcpy 3 | #include // va_list, va_start, va_end 4 | 5 | struct buffer { 6 | char *buffer; // Always kept null terminated. 7 | int cursor; 8 | int capacity; 9 | int bytes_needed; // Includes null terminator. 10 | }; 11 | 12 | struct buffer create(char *buffer, int capacity) { 13 | if (capacity > 0) 14 | buffer[0] = 0; 15 | return (struct buffer) { 16 | .buffer = buffer, 17 | .capacity = capacity, 18 | .bytes_needed = 1, 19 | }; 20 | } 21 | 22 | void append_char(struct buffer *buffer, char c) { 23 | ++buffer->bytes_needed; 24 | if (buffer->cursor + 1 < buffer->capacity) { 25 | buffer->buffer[buffer->cursor++] = c; 26 | buffer->buffer[buffer->cursor] = 0; 27 | } 28 | } 29 | 30 | void append_char_repeated(struct buffer *buffer, char c, int count) { 31 | int remaining_chars = buffer->capacity - buffer->cursor - 1; 32 | if (remaining_chars < 0) 33 | remaining_chars = 0; 34 | int bytes_to_set = count; 35 | if (bytes_to_set > remaining_chars) 36 | bytes_to_set = remaining_chars; 37 | memset(buffer->buffer + buffer->cursor, c, (size_t)bytes_to_set); 38 | buffer->bytes_needed += count; 39 | buffer->cursor += bytes_to_set; 40 | if (bytes_to_set > 0) 41 | buffer->buffer[buffer->cursor] = 0; 42 | } 43 | 44 | void append_bytes(struct buffer *buffer, const void *bytes, int size) { 45 | int remaining_chars = buffer->capacity - buffer->cursor - 1; 46 | if (remaining_chars < 0) 47 | remaining_chars = 0; 48 | int bytes_to_copy = size; 49 | if (bytes_to_copy > remaining_chars) 50 | bytes_to_copy = remaining_chars; 51 | memcpy(buffer->buffer + buffer->cursor, bytes, (size_t)bytes_to_copy); 52 | buffer->bytes_needed += size; 53 | buffer->cursor += bytes_to_copy; 54 | if (bytes_to_copy > 0) 55 | buffer->buffer[buffer->cursor] = 0; 56 | } 57 | 58 | void append_string(struct buffer *buffer, const char *string) { 59 | int remaining_chars = buffer->capacity - buffer->cursor - 1; 60 | if (remaining_chars < 0) 61 | remaining_chars = 0; 62 | int length; 63 | for (length = 0; length < remaining_chars && string[length]; ++length) 64 | buffer->buffer[buffer->cursor++] = string[length]; 65 | buffer->bytes_needed += length + (int)strlen(string + length); 66 | if (remaining_chars > 0) 67 | buffer->buffer[buffer->cursor] = 0; 68 | } 69 | 70 | void append_format_va(struct buffer *buffer, const char *format, va_list args) { 71 | int remaining_bytes = buffer->capacity - buffer->cursor; 72 | int chars_needed = vsnprintf(buffer->buffer + buffer->cursor, (size_t)remaining_bytes, format, args); 73 | int chars_written = chars_needed; 74 | if (chars_written > remaining_bytes - 1) 75 | chars_written = remaining_bytes - 1; 76 | if (chars_written > 0) 77 | buffer->cursor += chars_written; 78 | buffer->bytes_needed += chars_needed; 79 | } 80 | 81 | void append_format(struct buffer *buffer, const char *format, ...) { 82 | va_list args; 83 | va_start(args, format); 84 | append_format_va(buffer, format, args); 85 | va_end(args); 86 | } 87 | 88 | #include 89 | #define BUFFER_ON_STACK(capacity) create((char[capacity]){0},(capacity)) 90 | int main(void) { 91 | // create 92 | { 93 | struct buffer sb; 94 | 95 | sb = create(NULL, 0); 96 | assert(!sb.buffer && sb.capacity == 0 && sb.cursor == 0 && sb.bytes_needed == 1); 97 | 98 | char a[3] = { 1, 2, 3 }; 99 | sb = create(a + 1, 1); 100 | assert(sb.buffer == a + 1 && sb.capacity == 1 && sb.cursor == 0 && sb.bytes_needed == 1); 101 | assert(a[1] == 0); 102 | 103 | sb = BUFFER_ON_STACK(1); 104 | assert(sb.buffer && sb.buffer[0] == 0 && sb.capacity == 1 && sb.cursor == 0 && sb.bytes_needed == 1); 105 | 106 | sb = BUFFER_ON_STACK(42); 107 | assert(sb.buffer && sb.buffer[0] == 0 && sb.capacity == 42 && sb.cursor == 0 && sb.bytes_needed == 1); 108 | } 109 | 110 | // append_char 111 | { 112 | struct buffer sb; 113 | 114 | sb = BUFFER_ON_STACK(4); 115 | append_char(&sb, 'a'); 116 | assert(!strcmp(sb.buffer, "a")); 117 | assert(sb.cursor == 1 && sb.bytes_needed == 2); 118 | 119 | append_char(&sb, 'b'); 120 | append_char(&sb, 'c'); 121 | assert(!strcmp(sb.buffer, "abc")); 122 | assert(sb.cursor == 3 && sb.bytes_needed == 4); 123 | 124 | append_char(&sb, 'd'); 125 | append_char(&sb, 'e'); 126 | append_char(&sb, 'f'); 127 | assert(!strcmp(sb.buffer, "abc")); 128 | assert(sb.cursor == 3 && sb.bytes_needed == 7); 129 | 130 | sb = create(NULL, 0); 131 | for (int i = 0; i < 100; ++i) 132 | append_char(&sb, (char)i); 133 | assert(sb.cursor == 0 && sb.bytes_needed == 101); 134 | 135 | sb = BUFFER_ON_STACK(8); 136 | append_char(&sb, 'a'); 137 | append_char(&sb, 'b'); 138 | append_char(&sb, 'c'); 139 | append_char(&sb, '\0'); 140 | assert(!strcmp(sb.buffer, "abc")); 141 | assert(sb.cursor == 4 && sb.bytes_needed == 5); 142 | 143 | append_char(&sb, 'd'); 144 | append_char(&sb, 'e'); 145 | assert(!strcmp(sb.buffer, "abc")); 146 | assert(!strcmp(sb.buffer + 4, "de")); 147 | assert(sb.cursor == 6 && sb.bytes_needed == 7); 148 | } 149 | 150 | // append_char_repeated 151 | { 152 | struct buffer sb; 153 | 154 | sb = BUFFER_ON_STACK(8); 155 | append_char_repeated(&sb, 'a', 3); 156 | assert(!strcmp(sb.buffer, "aaa")); 157 | assert(sb.cursor == 3 && sb.bytes_needed == 4); 158 | 159 | append_char_repeated(&sb, 'b', 1); 160 | assert(!strcmp(sb.buffer, "aaab")); 161 | assert(sb.cursor == 4 && sb.bytes_needed == 5); 162 | 163 | append_char_repeated(&sb, 'c', 0); 164 | assert(!strcmp(sb.buffer, "aaab")); 165 | assert(sb.cursor == 4 && sb.bytes_needed == 5); 166 | 167 | append_char_repeated(&sb, 'd', 4); 168 | assert(!strcmp(sb.buffer, "aaabddd")); 169 | assert(sb.cursor == 7 && sb.bytes_needed == 9); 170 | 171 | append_char_repeated(&sb, 'e', 100); 172 | assert(!strcmp(sb.buffer, "aaabddd")); 173 | assert(sb.cursor == 7 && sb.bytes_needed == 109); 174 | 175 | sb = create(NULL, 0); 176 | for (int i = 0; i < 100; ++i) 177 | append_char_repeated(&sb, (char)i, 100); 178 | assert(sb.cursor == 0 && sb.bytes_needed == 100 * 100 + 1); 179 | 180 | sb = BUFFER_ON_STACK(8); 181 | append_char_repeated(&sb, 'a', 3); 182 | append_char_repeated(&sb, '\0', 3); 183 | assert(sb.cursor == 6 && sb.bytes_needed == 7); 184 | assert(!strcmp(sb.buffer, "aaa") && !memcmp(sb.buffer + 3, "\0\0\0\0", 4)); 185 | append_char_repeated(&sb, 'b', 3); 186 | assert(sb.cursor == 7 && sb.bytes_needed == 10); 187 | assert(!strcmp(sb.buffer + 6, "b")); 188 | } 189 | 190 | // append_bytes 191 | { 192 | struct buffer sb; 193 | 194 | sb = BUFFER_ON_STACK(8); 195 | append_bytes(&sb, "123", 3); 196 | assert(!strcmp(sb.buffer, "123")); 197 | assert(sb.cursor == 3 && sb.bytes_needed == 4); 198 | 199 | append_bytes(&sb, "", 0); 200 | assert(!strcmp(sb.buffer, "123")); 201 | assert(sb.cursor == 3 && sb.bytes_needed == 4); 202 | 203 | append_bytes(&sb, "4567", 4); 204 | assert(!strcmp(sb.buffer, "1234567")); 205 | assert(sb.cursor == 7 && sb.bytes_needed == 8); 206 | 207 | append_bytes(&sb, "890", 3); 208 | assert(!strcmp(sb.buffer, "1234567")); 209 | assert(sb.cursor == 7 && sb.bytes_needed == 11); 210 | 211 | sb = create(NULL, 0); 212 | for (int i = 0; i < 100; ++i) 213 | append_bytes(&sb, "1234", 4); 214 | assert(sb.cursor == 0 && sb.bytes_needed == 401); 215 | 216 | sb = BUFFER_ON_STACK(12); 217 | append_bytes(&sb, "12345", 5); 218 | append_bytes(&sb, "\0\0\0\0\0", 5); 219 | assert(!strcmp(sb.buffer, "12345")); 220 | assert(!memcmp(sb.buffer + 5, "\0\0\0\0\0\0", 6)); 221 | assert(sb.cursor == 10 && sb.bytes_needed == 11); 222 | 223 | append_bytes(&sb, "6789", 4); 224 | assert(!strcmp(sb.buffer + 10, "6")); 225 | assert(sb.cursor == 11 && sb.bytes_needed == 15); 226 | } 227 | 228 | // append_string 229 | { 230 | struct buffer sb; 231 | 232 | sb = BUFFER_ON_STACK(8); 233 | append_string(&sb, "123"); 234 | assert(!strcmp(sb.buffer, "123")); 235 | assert(sb.cursor == 3 && sb.bytes_needed == 4); 236 | 237 | append_string(&sb, ""); 238 | assert(!strcmp(sb.buffer, "123")); 239 | assert(sb.cursor == 3 && sb.bytes_needed == 4); 240 | 241 | append_string(&sb, "4567"); 242 | assert(!strcmp(sb.buffer, "1234567")); 243 | assert(sb.cursor == 7 && sb.bytes_needed == 8); 244 | 245 | append_string(&sb, "890"); 246 | assert(!strcmp(sb.buffer, "1234567")); 247 | assert(sb.cursor == 7 && sb.bytes_needed == 11); 248 | 249 | sb = create(NULL, 0); 250 | for (int i = 0; i < 100; ++i) 251 | append_string(&sb, "1234"); 252 | assert(sb.cursor == 0 && sb.bytes_needed == 401); 253 | 254 | sb = BUFFER_ON_STACK(7); 255 | append_string(&sb, "123456789"); 256 | assert(!strcmp(sb.buffer, "123456")); 257 | assert(sb.cursor == 6 && sb.bytes_needed == 10); 258 | } 259 | 260 | // append_format 261 | { 262 | struct buffer sb; 263 | 264 | sb = BUFFER_ON_STACK(8); 265 | append_format(&sb, "123"); 266 | assert(!strcmp(sb.buffer, "123")); 267 | assert(sb.cursor == 3 && sb.bytes_needed == 4); 268 | 269 | append_format(&sb, ""); 270 | assert(!strcmp(sb.buffer, "123")); 271 | assert(sb.cursor == 3 && sb.bytes_needed == 4); 272 | 273 | append_format(&sb, "%d", 4567); 274 | assert(!strcmp(sb.buffer, "1234567")); 275 | assert(sb.cursor == 7 && sb.bytes_needed == 8); 276 | 277 | append_format(&sb, "890"); 278 | assert(!strcmp(sb.buffer, "1234567")); 279 | assert(sb.cursor == 7 && sb.bytes_needed == 11); 280 | 281 | sb = create(NULL, 0); 282 | for (int i = 0; i < 100; ++i) 283 | append_format(&sb, "1234"); 284 | assert(sb.cursor == 0 && sb.bytes_needed == 401); 285 | 286 | sb = BUFFER_ON_STACK(7); 287 | append_format(&sb, "%s", "123456789"); 288 | assert(!strcmp(sb.buffer, "123456")); 289 | assert(sb.cursor == 6 && sb.bytes_needed == 10); 290 | 291 | sb = BUFFER_ON_STACK(256); 292 | append_format(&sb, "Hello%c ", '!'); 293 | append_format(&sb, "You are '%s' number %d.", "sailor", 42); 294 | assert(!strcmp(sb.buffer, "Hello! You are 'sailor' number 42.")); 295 | } 296 | } -------------------------------------------------------------------------------- /string_set.c: -------------------------------------------------------------------------------- 1 | #include // malloc, free 2 | #include // strlen, strcmp, memcpy, memset 3 | 4 | struct set { 5 | char **items; 6 | struct slab *slab; 7 | int count; 8 | int capacity; 9 | int num_tombstones; 10 | }; 11 | 12 | struct slab { 13 | struct slab *prev; 14 | int cursor; 15 | int capacity; 16 | // Memory comes right after this. 17 | }; 18 | 19 | #define TOMBSTONE 1 20 | 21 | unsigned long long hash_string(const char *string) { 22 | unsigned long long hash = 14695981039346656037u; 23 | for (int i = 0; string[i]; ++i) 24 | hash = (hash ^ string[i]) * 1099511628211u; 25 | return hash; 26 | } 27 | 28 | char *copy_string(struct slab **slab, const char *string) { 29 | int size = 1 + (int)strlen(string); 30 | if ((*slab)->capacity - (*slab)->cursor < size) { 31 | int new_capacity = 1024; 32 | while (new_capacity < size) 33 | new_capacity *= 2; 34 | struct slab *new_slab = malloc(sizeof new_slab[0] + new_capacity); 35 | new_slab->capacity = new_capacity; 36 | new_slab->cursor = 0; 37 | new_slab->prev = *slab; 38 | *slab = new_slab; 39 | } 40 | char *copy = (char *)(*slab + 1) + (*slab)->cursor; 41 | (*slab)->cursor += size; 42 | memcpy(copy, string, (size_t)size); 43 | return copy; 44 | } 45 | 46 | void resize(struct set *set, int capacity) { 47 | if (capacity <= set->count) 48 | capacity = set->count + 1; 49 | 50 | int pow2; 51 | for (pow2 = 0; (1 << pow2) < capacity; ++pow2); 52 | capacity = 1 << pow2; 53 | 54 | int total_string_size = 0; 55 | for (struct slab *slab = set->slab; slab; slab = slab->prev) 56 | total_string_size += slab->cursor; 57 | 58 | int first_slab_capacity = 1024; 59 | while (first_slab_capacity < total_string_size) 60 | first_slab_capacity *= 2; 61 | 62 | void *new_memory = malloc(capacity * sizeof set->items[0] + sizeof set->slab[0] + first_slab_capacity); 63 | char **new_items = new_memory; 64 | memset(new_items, 0, capacity * sizeof set->items[0]); 65 | struct slab *new_slab = (struct slab *)(new_items + capacity); 66 | new_slab->prev = NULL; 67 | new_slab->capacity = first_slab_capacity; 68 | new_slab->cursor = 0; 69 | 70 | unsigned mask = capacity - 1; 71 | for (int i = 0; i < set->capacity; ++i) { 72 | if ((size_t)set->items[i] > TOMBSTONE) { 73 | char *item = copy_string(&new_slab, set->items[i]); 74 | unsigned long long hash = hash_string(item); 75 | for (unsigned j = (unsigned)hash & mask;; j = (j + 1) & mask) { 76 | if (!new_items[j]) { 77 | new_items[j] = item; 78 | break; 79 | } 80 | } 81 | } 82 | } 83 | 84 | for (struct slab *slab = set->slab; slab && slab->prev;) { 85 | struct slab *prev = slab->prev; 86 | free(slab); 87 | slab = prev; 88 | } 89 | free(set->items); // This also frees the metadata, and slab. 90 | set->items = new_items; 91 | set->slab = new_slab; 92 | set->capacity = capacity; 93 | set->num_tombstones = 0; 94 | } 95 | 96 | void reserve(struct set *set, int min_capacity) { 97 | if (3 * min_capacity > 2 * set->capacity) { 98 | int new_capacity = 3 * min_capacity / 2; 99 | if (new_capacity < 64) 100 | new_capacity = 64; 101 | resize(set, new_capacity); 102 | } 103 | } 104 | 105 | void add(struct set *set, const char *item) { 106 | reserve(set, set->count + 1); 107 | unsigned long long hash = hash_string(item); 108 | unsigned mask = (unsigned)set->capacity - 1; 109 | unsigned index = (unsigned)-1; 110 | for (unsigned i = (unsigned)hash & mask;; i = (i + 1) & mask) { 111 | if (!set->items[i]) { 112 | index = min(index, i); 113 | break; 114 | } 115 | if (set->items[i] == (void *)TOMBSTONE) 116 | index = min(index, i); 117 | else if (strcmp(set->items[i], item) == 0) 118 | return; 119 | } 120 | if (set->items[index] == (void *)TOMBSTONE) 121 | --set->num_tombstones; 122 | set->count++; 123 | set->items[index] = copy_string(&set->slab, item); 124 | } 125 | 126 | void remove(struct set *set, const char *item) { 127 | if (!set->count) 128 | return; 129 | 130 | unsigned long long hash = hash_string(item); 131 | unsigned mask = (unsigned)set->capacity - 1; 132 | for (unsigned i = (unsigned)hash & mask; set->items[i]; i = (i + 1) & mask) { 133 | if (set->items[i] != (void *)TOMBSTONE && strcmp(set->items[i], item) == 0) { 134 | set->items[i] = (void *)TOMBSTONE; 135 | set->count--; 136 | set->num_tombstones++; 137 | if (8 * set->num_tombstones > set->capacity) 138 | resize(set, set->capacity); // Get rid of tombstones. 139 | return; 140 | } 141 | } 142 | } 143 | 144 | int contains(struct set set, const char *item) { 145 | if (!set.count) 146 | return 0; 147 | 148 | unsigned long long hash = hash_string(item); 149 | unsigned mask = (unsigned)set.capacity - 1; 150 | for (unsigned i = (unsigned)hash & mask; set.items[i]; i = (i + 1) & mask) 151 | if (set.items[i] != (void *)TOMBSTONE && strcmp(set.items[i], item) == 0) 152 | return 1; 153 | return 0; 154 | } 155 | 156 | int first_index(struct set set) { 157 | for (int i = 0; i < set.capacity; ++i) 158 | if ((size_t)set.items[i] > TOMBSTONE) 159 | return i; 160 | return -1; 161 | } 162 | 163 | int next_index(struct set set, int index) { 164 | for (int i = index + 1; i < set.capacity; ++i) 165 | if ((size_t)set.items[i] > TOMBSTONE) 166 | return i; 167 | return -1; 168 | } 169 | 170 | void destroy(struct set *set) { 171 | for (struct slab *slab = set->slab; slab && slab->prev;) { 172 | struct slab *prev = slab->prev; 173 | free(slab); 174 | slab = prev; 175 | } 176 | free(set->items); // This also frees the slab. 177 | memset(set, 0, sizeof set[0]); 178 | } 179 | 180 | #include 181 | int main(void) { 182 | static char items[1048576][8] = { 0 }; 183 | int n = sizeof items / sizeof items[0]; 184 | for (int i = 0; i < n; ++i) { 185 | int x = i; 186 | for (int j = 0; j < 7; ++j) { 187 | items[i][6 - j] = '0' + x % 10; 188 | x /= 10; 189 | } 190 | } 191 | 192 | { 193 | struct set set = { 0 }; 194 | assert(!contains(set, "Hi")); 195 | assert(first_index(set) < 0); 196 | remove(&set, "Hi"); 197 | destroy(&set); 198 | } 199 | 200 | { 201 | struct set set = { 0 }; 202 | 203 | add(&set, "abcd"); 204 | add(&set, "efgh"); 205 | add(&set, "ijkl"); 206 | add(&set, "mnop"); 207 | assert(contains(set, "abcd")); 208 | assert(contains(set, "efgh")); 209 | assert(contains(set, "ijkl")); 210 | assert(contains(set, "mnop")); 211 | assert(!contains(set, "qrst")); 212 | 213 | remove(&set, "abcd"); 214 | assert(!contains(set, "abcd")); 215 | assert(contains(set, "efgh")); 216 | assert(contains(set, "ijkl")); 217 | assert(contains(set, "mnop")); 218 | 219 | remove(&set, "abcd"); 220 | assert(!contains(set, "abcd")); 221 | assert(contains(set, "efgh")); 222 | assert(contains(set, "ijkl")); 223 | assert(contains(set, "mnop")); 224 | 225 | remove(&set, "efgh"); 226 | remove(&set, "ijkl"); 227 | remove(&set, "mnop"); 228 | assert(!contains(set, "abcd")); 229 | assert(!contains(set, "efgh")); 230 | assert(!contains(set, "ijkl")); 231 | assert(!contains(set, "mnop")); 232 | 233 | destroy(&set); 234 | } 235 | 236 | { 237 | struct set set = { 0 }; 238 | for (int i = 0; i < n; ++i) 239 | assert(!contains(set, items[i])); 240 | for (int i = 0; i < n; ++i) 241 | add(&set, items[i]); 242 | for (int i = 0; i < n; ++i) 243 | assert(contains(set, items[i])); 244 | for (int i = 0; i < n; ++i) 245 | add(&set, items[i]); 246 | for (int i = 0; i < n; ++i) 247 | remove(&set, items[i]); 248 | for (int i = 0; i < n; ++i) 249 | assert(!contains(set, items[i])); 250 | for (int i = 0; i < n; ++i) 251 | add(&set, items[i]); 252 | for (int i = 0; i < n; ++i) 253 | assert(contains(set, items[i])); 254 | 255 | destroy(&set); 256 | for (int i = 0; i < n / 2; ++i) 257 | add(&set, items[i]); 258 | for (int i = n / 2; i < n; ++i) 259 | assert(!contains(set, items[i])); 260 | for (int i = 0; i < n / 2; ++i) 261 | assert(contains(set, items[i])); 262 | for (int i = 0; i < n / 4; ++i) 263 | remove(&set, items[i]); 264 | for (int i = 0; i < n; ++i) 265 | assert(contains(set, items[i]) == (i >= n / 4 && i < n / 2)); 266 | 267 | for (int i = 0; i < n; ++i) 268 | remove(&set, items[i]); 269 | assert(set.count == 0); 270 | for (int i = 0; i < n; ++i) 271 | add(&set, items[i]); 272 | 273 | static int total[1048576] = { 0 }; 274 | for (int i = first_index(set); i >= 0; i = next_index(set, i)) { 275 | char *item = set.items[i]; 276 | int x = 0; 277 | for (int j = 0; j < 7; ++j) { 278 | x *= 10; 279 | x += item[j] - '0'; 280 | } 281 | total[x]++; 282 | } 283 | for (int i = 0; i < n; ++i) 284 | assert(total[i] == 1); 285 | 286 | destroy(&set); 287 | } 288 | 289 | { 290 | // Potential pathological case: create a bunch of items and then delete them 291 | // to leave tombstones, then lookup each item. If we don't clean tombstones this is O(n^2). 292 | struct set set = { 0 }; 293 | for (int i = 0; i < n - 1; ++i) 294 | add(&set, items[i]); 295 | resize(&set, set.count + 1); 296 | for (int i = 1; i < n - 1; ++i) 297 | remove(&set, items[i]); 298 | assert(set.count == 1); 299 | for (int i = 1; i < n - 1; ++i) 300 | assert(!contains(set, items[i])); 301 | destroy(&set); 302 | } 303 | 304 | { 305 | // This shouldn't leak. 306 | for (int i = 0; i < 10000; ++i) { 307 | struct set set = { 0 }; 308 | for (int j = 0; j < 10000; ++j) { 309 | char item[5] = { 0 }; 310 | int x = j; 311 | item[3] = x % 10; x /= 10; 312 | item[2] = x % 10; x /= 10; 313 | item[1] = x % 10; x /= 10; 314 | item[0] = x % 10; x /= 10; 315 | add(&set, item); 316 | } 317 | destroy(&set); 318 | } 319 | } 320 | } -------------------------------------------------------------------------------- /string_slab.c: -------------------------------------------------------------------------------- 1 | #include // strlen, memcpy 2 | #include // malloc, free 3 | 4 | #define SLAB_SIZE (64*1024) 5 | 6 | struct slab { 7 | struct slab *prev; 8 | char *buffer; 9 | int capacity; 10 | int cursor; 11 | }; 12 | 13 | char *allocate(struct slab **slab, int size) { 14 | int remaining = (*slab)->capacity - (*slab)->cursor; 15 | if (remaining < size) { 16 | int capacity = SLAB_SIZE * ((size + SLAB_SIZE + 1) / SLAB_SIZE); 17 | struct slab *next = malloc(sizeof next[0] + capacity); 18 | next->prev = *slab; 19 | next->buffer = (char *)(next + 1); 20 | next->capacity = capacity; 21 | next->cursor = 0; 22 | *slab = next; 23 | } 24 | char *result = (*slab)->buffer + (*slab)->cursor; 25 | (*slab)->cursor += size; 26 | return result; 27 | } 28 | 29 | char *copy_string(struct slab **slab, const char *string) { 30 | int size = 1 + (int)strlen(string); 31 | char *copy = allocate(slab, size); 32 | memcpy(copy, string, (size_t)size); 33 | return copy; 34 | } 35 | 36 | void deallocate_all(struct slab **slab) { 37 | for (;;) { 38 | struct slab *prev = (*slab)->prev; 39 | if ((*slab)->capacity) 40 | free(*slab); 41 | if (!prev) 42 | return; 43 | *slab = prev; 44 | } 45 | } 46 | 47 | #include 48 | int main(void) { 49 | struct slab *slab = &(struct slab) { 0 }; 50 | assert(strcmp(copy_string(&slab, "Hello, sailor!"), "Hello, sailor!") == 0); 51 | assert(strcmp(copy_string(&slab, ""), "") == 0); 52 | 53 | char *large_string = malloc(2 * SLAB_SIZE + 1); 54 | memset(large_string, 'A', 2 * SLAB_SIZE); 55 | large_string[2 * SLAB_SIZE] = 0; 56 | assert(strcmp(copy_string(&slab, large_string), large_string) == 0); 57 | 58 | assert(slab->prev); 59 | deallocate_all(&slab); 60 | assert(!slab->prev); 61 | 62 | // This shouldn't leak. 63 | for (int i = 0; i < 10000; ++i) { 64 | slab = &(struct slab) { 0 }; 65 | for (int j = 0; j < 10000; ++j) 66 | copy_string(&slab, "ABCDEFGHIJKLMOP"); 67 | deallocate_all(&slab); 68 | } 69 | } -------------------------------------------------------------------------------- /string_table.c: -------------------------------------------------------------------------------- 1 | #include // malloc, free 2 | #include // strlen, strcmp, memcpy, memset 3 | 4 | struct table { 5 | char **keys; 6 | char **vals; 7 | struct slab *slab; 8 | int count; 9 | int capacity; 10 | int num_tombstones; 11 | }; 12 | 13 | struct slab { 14 | struct slab *prev; 15 | int cursor; 16 | int capacity; 17 | // Memory comes right after this. 18 | }; 19 | 20 | #define TOMBSTONE 1 21 | 22 | unsigned long long hash_string(const char *string) { 23 | unsigned long long hash = 14695981039346656037u; 24 | for (int i = 0; string[i]; ++i) 25 | hash = (hash ^ string[i]) * 1099511628211u; 26 | return hash; 27 | } 28 | 29 | char *copy_string(struct slab **slab, const char *string) { 30 | int size = 1 + (int)strlen(string); 31 | if ((*slab)->capacity - (*slab)->cursor < size) { 32 | int new_capacity = 1024; 33 | while (new_capacity < size) 34 | new_capacity *= 2; 35 | struct slab *new_slab = malloc(sizeof new_slab[0] + new_capacity); 36 | new_slab->capacity = new_capacity; 37 | new_slab->cursor = 0; 38 | new_slab->prev = *slab; 39 | *slab = new_slab; 40 | } 41 | char *copy = (char *)(*slab + 1) + (*slab)->cursor; 42 | (*slab)->cursor += size; 43 | memcpy(copy, string, (size_t)size); 44 | return copy; 45 | } 46 | 47 | void resize(struct table *table, int capacity) { 48 | if (capacity <= table->count) 49 | capacity = table->count + 1; 50 | 51 | int pow2; 52 | for (pow2 = 0; (1 << pow2) < capacity; ++pow2); 53 | capacity = 1 << pow2; 54 | 55 | int total_string_size = 0; 56 | for (struct slab *slab = table->slab; slab; slab = slab->prev) 57 | total_string_size += slab->cursor; 58 | 59 | int first_slab_capacity = 1024; 60 | while (first_slab_capacity < total_string_size) 61 | first_slab_capacity *= 2; 62 | 63 | void *new_memory = malloc(capacity * (sizeof table->keys[0] + sizeof table->vals[0]) + sizeof table->slab[0] + first_slab_capacity); 64 | char **new_keys = new_memory; 65 | char **new_vals = new_keys + capacity; 66 | memset(new_keys, 0, (size_t)capacity * sizeof new_keys[0]); 67 | struct slab *new_slab = (struct slab *)(new_vals + capacity); 68 | new_slab->prev = NULL; 69 | new_slab->capacity = first_slab_capacity; 70 | new_slab->cursor = 0; 71 | 72 | unsigned mask = capacity - 1; 73 | for (int i = 0; i < table->capacity; ++i) { 74 | if ((size_t)table->keys[i] > TOMBSTONE) { 75 | char *key = copy_string(&new_slab, table->keys[i]); 76 | char *val = copy_string(&new_slab, table->vals[i]); 77 | unsigned long long hash = hash_string(key); 78 | for (unsigned j = (unsigned)hash & mask;; j = (j + 1) & mask) { 79 | if (!new_keys[j]) { 80 | new_keys[j] = key; 81 | new_vals[j] = val; 82 | break; 83 | } 84 | } 85 | } 86 | } 87 | 88 | for (struct slab *slab = table->slab; slab && slab->prev;) { 89 | struct slab *prev = slab->prev; 90 | free(slab); 91 | slab = prev; 92 | } 93 | free(table->keys); // This also frees the values, metadata, and slab. 94 | table->keys = new_keys; 95 | table->vals = new_vals; 96 | table->slab = new_slab; 97 | table->capacity = capacity; 98 | table->num_tombstones = 0; 99 | } 100 | 101 | void reserve(struct table *table, int min_capacity) { 102 | if (2 * table->capacity < 3 * min_capacity) { 103 | int new_capacity = 3 * min_capacity / 2; 104 | if (new_capacity < 64) 105 | new_capacity = 64; 106 | resize(table, new_capacity); 107 | } 108 | } 109 | 110 | void add(struct table *table, const char *key, const char *val) { 111 | reserve(table, table->count + 1); 112 | unsigned long long hash = hash_string(key); 113 | unsigned mask = (unsigned)table->capacity - 1; 114 | unsigned index = (unsigned)-1; 115 | for (unsigned i = (unsigned)hash & mask;; i = (i + 1) & mask) { 116 | if (!table->keys[i]) { 117 | index = min(index, i); 118 | break; 119 | } 120 | if (table->keys[i] == (void *)TOMBSTONE) 121 | index = min(index, i); 122 | else if (strcmp(table->keys[i], key) == 0) { 123 | table->vals[i] = copy_string(&table->slab, val); 124 | return; 125 | } 126 | } 127 | table->count++; 128 | table->keys[index] = copy_string(&table->slab, key); 129 | table->vals[index] = copy_string(&table->slab, val); 130 | } 131 | 132 | void remove(struct table *table, const char *key) { 133 | if (!table->count) 134 | return; 135 | 136 | unsigned long long hash = hash_string(key); 137 | unsigned mask = (unsigned)table->capacity - 1; 138 | for (unsigned i = (unsigned)hash & mask; table->keys[i]; i = (i + 1) & mask) { 139 | if (table->keys[i] != (void *)TOMBSTONE && strcmp(table->keys[i], key) == 0) { 140 | table->keys[i] = (void *)TOMBSTONE; 141 | table->count--; 142 | table->num_tombstones++; 143 | if (8 * table->num_tombstones > table->capacity) 144 | resize(table, table->capacity); // Get rid of tombstones. 145 | } 146 | } 147 | } 148 | 149 | const char *get(struct table table, const char *key) { 150 | if (!table.count) 151 | return NULL; 152 | 153 | unsigned long long hash = hash_string(key); 154 | unsigned mask = (unsigned)table.capacity - 1; 155 | for (unsigned i = (unsigned)hash & mask; table.keys[i]; i = (i + 1) & mask) 156 | if (table.keys[i] != (void *)TOMBSTONE && strcmp(table.keys[i], key) == 0) 157 | return table.vals[i]; 158 | 159 | return NULL; 160 | } 161 | 162 | int first_index(struct table table) { 163 | for (int i = 0; i < table.capacity; ++i) 164 | if ((size_t)table.keys[i] > TOMBSTONE) 165 | return i; 166 | return -1; 167 | } 168 | 169 | int next_index(struct table table, int index) { 170 | for (int i = index + 1; i < table.capacity; ++i) 171 | if ((size_t)table.keys[i] > TOMBSTONE) 172 | return i; 173 | return -1; 174 | } 175 | 176 | void destroy(struct table *table) { 177 | for (struct slab *slab = table->slab; slab && slab->prev;) { 178 | struct slab *prev = slab->prev; 179 | free(slab); 180 | slab = prev; 181 | } 182 | free(table->keys); // This also frees the values, metadata, and slab. 183 | memset(table, 0, sizeof table[0]); 184 | } 185 | 186 | #include 187 | int main(void) { 188 | static char keys[1048576][9]; 189 | static char vals[1048576][9]; 190 | int n = sizeof keys / sizeof keys[0]; 191 | for (int i = 0; i < n; ++i) { 192 | keys[i][0] = 'k'; 193 | vals[i][0] = 'v'; 194 | int x = i; 195 | for (int j = 0; j < 7; ++j) { 196 | keys[i][7 - j] = '0' + x % 10; 197 | vals[i][7 - j] = '0' + x % 10; 198 | x /= 10; 199 | } 200 | keys[i][8] = 0; 201 | vals[i][8] = 0; 202 | } 203 | 204 | { 205 | struct table table = { 0 }; 206 | assert(!get(table, "")); 207 | assert(first_index(table) < 0); 208 | destroy(&table); 209 | } 210 | 211 | { 212 | struct table table = { 0 }; 213 | add(&table, "Key0", "Val0"); 214 | add(&table, "Key1", "Val1"); 215 | add(&table, "Key2", "Val2"); 216 | add(&table, "Key3", "Val3"); 217 | assert(strcmp(get(table, "Key0"), "Val0") == 0); 218 | assert(strcmp(get(table, "Key1"), "Val1") == 0); 219 | assert(strcmp(get(table, "Key2"), "Val2") == 0); 220 | assert(strcmp(get(table, "Key3"), "Val3") == 0); 221 | 222 | int remaining[4] = { 1, 1, 1, 1 }; 223 | for (int i = first_index(table); i >= 0; i = next_index(table, i)) { 224 | char *val = table.vals[i]; 225 | remaining[val[3] - '0']--; 226 | } 227 | assert(remaining[0] == 0 && remaining[1] == 0 && remaining[2] == 0 && remaining[3] == 0); 228 | 229 | destroy(&table); 230 | assert(!table.capacity && !table.count && !table.keys && !table.vals && !table.slab); 231 | } 232 | 233 | { 234 | struct table table = { 0 }; 235 | for (int i = 0; i < n; ++i) 236 | add(&table, keys[i], vals[i]); 237 | assert(table.count == n); 238 | 239 | static int remaining[sizeof keys / sizeof keys[0]]; 240 | for (int i = 0; i < n; ++i) 241 | remaining[i] = 1; 242 | for (int i = first_index(table); i >= 0; i = next_index(table, i)) { 243 | char *key = table.keys[i]; 244 | char *val = table.vals[i]; 245 | assert(key[0] == 'k' && val[0] == 'v'); 246 | ++key; 247 | ++val; 248 | assert(strcmp(key, val) == 0); 249 | int x = 0; 250 | for (int j = 0; j < 7; ++j) { 251 | x *= 10; 252 | x += key[j] - '0'; 253 | } 254 | remaining[x] -= 1; 255 | } 256 | for (int i = 0; i < n; ++i) 257 | assert(!remaining[i]); 258 | 259 | for (int i = 0; i < n / 2; ++i) 260 | remove(&table, keys[i]); 261 | assert(table.count == n / 2); 262 | for (int i = 0; i < n; ++i) 263 | remaining[i] = 1; 264 | for (int i = first_index(table); i >= 0; i = next_index(table, i)) { 265 | char *key = table.keys[i]; 266 | char *val = table.vals[i]; 267 | assert(key[0] == 'k' && val[0] == 'v'); 268 | ++key; 269 | ++val; 270 | assert(strcmp(key, val) == 0); 271 | int x = 0; 272 | for (int j = 0; j < 7; ++j) 273 | { 274 | x *= 10; 275 | x += key[j] - '0'; 276 | } 277 | remaining[x] -= 1; 278 | } 279 | for (int i = 0; i < n / 2; ++i) 280 | assert(remaining[i] == 1); 281 | for (int i = n / 2; i < n; ++i) 282 | assert(!remaining[i]); 283 | 284 | for (int i = 0; i < n / 2; ++i) 285 | add(&table, keys[i], vals[i]); 286 | assert(table.count == n); 287 | for (int i = 0; i < n; ++i) 288 | remaining[i] = 1; 289 | for (int i = first_index(table); i >= 0; i = next_index(table, i)) { 290 | char *key = table.keys[i]; 291 | char *val = table.vals[i]; 292 | assert(key[0] == 'k' && val[0] == 'v'); 293 | ++key; 294 | ++val; 295 | assert(strcmp(key, val) == 0); 296 | int x = 0; 297 | for (int j = 0; j < 7; ++j) { 298 | x *= 10; 299 | x += key[j] - '0'; 300 | } 301 | remaining[x] -= 1; 302 | } 303 | for (int i = 0; i < n; ++i) 304 | assert(!remaining[i]); 305 | 306 | destroy(&table); 307 | } 308 | 309 | { 310 | // Potential pathological case: create a bunch of items and then delete them 311 | // to leave tombstones, then lookup each item. If we don't clean tombstones this is O(n^2). 312 | struct table table = { 0 }; 313 | for (int i = 0; i < n - 1; ++i) 314 | add(&table, keys[i], vals[i]); 315 | //resize(&table, table.count + 1); 316 | for (int i = 1; i < n - 1; ++i) 317 | remove(&table, keys[i]); 318 | assert(table.count == 1); 319 | for (int i = 1; i < n - 1; ++i) 320 | assert(!get(table, keys[i])); 321 | destroy(&table); 322 | } 323 | 324 | { 325 | // This shouldn't leak. 326 | for (int i = 0; i < 10000; ++i) { 327 | struct table table = { 0 }; 328 | for (int j = 0; j < 10000; ++j) { 329 | char keyval[5] = { 0 }; 330 | int x = j; 331 | keyval[3] = x % 10; x /= 10; 332 | keyval[2] = x % 10; x /= 10; 333 | keyval[1] = x % 10; x /= 10; 334 | keyval[0] = x % 10; x /= 10; 335 | add(&table, keyval, keyval); 336 | } 337 | destroy(&table); 338 | } 339 | } 340 | } -------------------------------------------------------------------------------- /strtod.c: -------------------------------------------------------------------------------- 1 | // 180 line strtod replacement. 2 | // - no dependencies at all. 3 | // - supports fixed-point, scientific, and hex-float notation. 4 | // - hex-floats can round-trip. 5 | // - fixed-point and scientific are accurate to ~15 decimal places. 6 | // - largest issue is that DBL_MAX parses as INFINITY. 7 | 8 | #include // Only for INFINITY && NAN. 9 | #include 10 | 11 | static double bb_strtod(const char* str, char** end) { 12 | if (end) *end = (char*)str; 13 | 14 | // Skip leading whitespace. 15 | while (*str == ' ' || (*str >= '\t' && *str <= '\r')) 16 | str++; 17 | 18 | // Parse optional sign. 19 | bool negative = *str == '-'; 20 | str += *str == '-' || *str == '+'; 21 | 22 | // Determine if this is a NaN, infinity, or normal number. 23 | double result = 0; 24 | if ((str[0] == 'n' || str[0] == 'N') && (str[1] == 'a' || str[1] == 'A') && (str[2] == 'n' || str[2] == 'N')) { 25 | str += 3; 26 | if (*str == '(') { 27 | // Parse optional NaN character sequence. 28 | const char* backup = str++; 29 | while ((*str >= '0' && *str <= '9') || (*str >= 'A' && *str <= 'Z') || (*str >= 'a' && *str <= 'z') || *str == '_') 30 | str++; 31 | if (*str == ')') 32 | str++; 33 | else 34 | str = backup; 35 | } 36 | result = NAN; 37 | } 38 | else if ((str[0] == 'i' || str[0] == 'I') && (str[1] == 'n' || str[1] == 'N') && (str[2] == 'f' || str[2] == 'F')) { 39 | if ((str[3] == 'i' || str[3] == 'I') && 40 | (str[4] == 'n' || str[4] == 'N') && 41 | (str[5] == 'i' || str[5] == 'I') && 42 | (str[6] == 't' || str[6] == 'T') && 43 | (str[7] == 'y' || str[7] == 'Y')) { 44 | str += 8; 45 | } 46 | else str += 3; 47 | result = INFINITY; 48 | } else { 49 | // This is a normal float, not a NaN or infinity. 50 | // Parse the base. We support decimal and hex floats. 51 | unsigned base; 52 | int max_digits; 53 | char exponent_separator; 54 | if (str[0] == '0' && (str[1] == 'x' || str[1] == 'X')) { 55 | base = 16; 56 | max_digits = 14; 57 | exponent_separator = 'p'; 58 | str += 2; 59 | } else { 60 | base = 10; 61 | max_digits = 19; 62 | exponent_separator = 'e'; 63 | } 64 | 65 | // Check if there's at least 1 digit. 66 | if ((unsigned)*str - '0' >= base) 67 | return 0; 68 | 69 | // Skip leading zeros. 70 | while (*str == '0') str++; 71 | bool dot = *str == '.'; 72 | str += dot; 73 | int num_leading_zeros_after_dot = 0; 74 | while (*str == '0') { 75 | str++; 76 | num_leading_zeros_after_dot++; 77 | } 78 | 79 | // Parse digits before exponent. 80 | unsigned long long digits = 0; 81 | int num_digits = 0; 82 | int num_digits_after_dot = 0; 83 | int num_truncated_digits_before_dot = 0; 84 | for (;;) { 85 | if (*str == '.') { 86 | if (dot) break; // Second dot. 87 | dot = true; 88 | str++; 89 | } else { 90 | unsigned digit = *str; 91 | if (digit >= '0' && digit <= '9') digit -= '0'; 92 | else if (base == 16 && digit >= 'A' && digit <= 'F') digit = digit - 'A' + 10; 93 | else if (base == 16 && digit >= 'a' && digit <= 'f') digit = digit - 'a' + 10; 94 | else break; 95 | if (num_digits < max_digits) { 96 | digits = digits * base + digit; 97 | num_digits++; 98 | num_digits_after_dot += dot; 99 | } else num_truncated_digits_before_dot += !dot; 100 | str++; 101 | } 102 | } 103 | 104 | // Parse optional exponent. 105 | int exponent = 0; 106 | if (*str == exponent_separator || *str == exponent_separator - 'a' + 'A') { 107 | // Backup in case exponent parsing fails. 108 | const char* backup = str++; 109 | 110 | // Parse optional exponent sign. 111 | bool negative_exp = *str == '-'; 112 | str += *str == '-' || *str == '+'; 113 | 114 | // Check if we actually have a valid exponent. 115 | if (*str >= '0' && *str <= '9') { 116 | // Parse the exponent. 117 | do { 118 | exponent = exponent * 10 + (*str++ - '0'); 119 | if (exponent > 9999) exponent = 9999; // Prevent overflow. 120 | } while (*str >= '0' && *str <= '9'); 121 | if (negative_exp) exponent = -exponent; 122 | } else str = backup; 123 | } 124 | 125 | // Now assemble the result! 126 | if (digits != 0) { 127 | if (base == 16) { 128 | // Move dot after the first digit. 129 | int shift = (num_digits + num_truncated_digits_before_dot) - num_digits_after_dot - 1; 130 | if (num_leading_zeros_after_dot > 0) 131 | shift -= num_leading_zeros_after_dot; 132 | exponent += shift * 4; 133 | 134 | // Move first hex digit before floating point. The exponent was already adjusted for this. 135 | while (!(digits & 0xF0000000000000)) 136 | digits <<= 4; 137 | 138 | // Truncate to 53 bit double mantissa. 139 | while (digits & 0xE0000000000000) { 140 | digits >>= 1; 141 | exponent++; 142 | } 143 | 144 | // Produce denormal floats. 145 | while (exponent < -1023 && digits) { 146 | digits >>= 1; 147 | exponent++; 148 | } 149 | 150 | // Check for overflow to infinity or underflow to denormal. 151 | if (exponent > 1023) { 152 | exponent = 1024; 153 | digits = 0; 154 | } 155 | if (exponent < -1023) exponent = -1023; 156 | 157 | // Assemble the float. 158 | unsigned long long exp = (unsigned long long)(exponent + 1023); 159 | union { unsigned long long u; double f; } fu = { (exp << 52) | (digits & 0xFFFFFFFFFFFFF) }; 160 | return fu.f; 161 | } else { 162 | // Adjust exponent to account for leading zeros and truncated digits. 163 | exponent += num_truncated_digits_before_dot; 164 | exponent -= num_leading_zeros_after_dot; 165 | 166 | // Right shift digits to correct decimal place. 167 | unsigned long long shift = 1; 168 | for (int i = 0; i < num_digits_after_dot; i++) 169 | shift *= 10; 170 | result = (double)digits / (double)shift; 171 | 172 | if (exponent) { 173 | // Compute 10^abs(exponent) using binary exponentiation. 174 | int exp = exponent; 175 | if (exp < 0) exp = -exp; 176 | double scale = 1; 177 | static const double BINARY_POWERS_OF_10[9] = { 1e256, 1e128, 1e64, 1e32, 1e16, 1e8, 1e4, 1e2, 1e1 }; 178 | for (int i = 0, decrement = 256; i < 9; i++, decrement >>= 1) { 179 | if (exp >= decrement) { 180 | exp -= decrement; 181 | scale *= BINARY_POWERS_OF_10[i]; 182 | } 183 | } 184 | 185 | // Scale by the exponent. 186 | if (exponent >= 0) 187 | result *= scale; 188 | else 189 | result /= scale; 190 | } 191 | } 192 | } 193 | } 194 | 195 | if (end) *end = (char*)str; 196 | return negative ? -result : +result; 197 | } 198 | 199 | // === testing === 200 | 201 | #include 202 | 203 | int main(void) { 204 | // fixed point 205 | printf("%f\n", bb_strtod("123", NULL)); 206 | printf("%f\n", bb_strtod("+123", NULL)); 207 | printf("%f\n", bb_strtod("-123", NULL)); 208 | printf("%f\n", bb_strtod("123.456", NULL)); 209 | printf("%f\n", bb_strtod("0", NULL)); 210 | printf("%.20f\n", bb_strtod("0.1234567890", NULL)); 211 | printf("%.20f\n", bb_strtod("1234567890.0", NULL)); 212 | printf("%.20f\n", bb_strtod("1234567890.1234567890", NULL)); 213 | printf("%.20f\n", bb_strtod("999999999999999999999999999999999.0", NULL)); 214 | printf("%.20f\n", bb_strtod("0.999999999999999999999999999999999", NULL)); 215 | printf("%e\n", bb_strtod("100000000000000", NULL)); 216 | printf("%e\n", bb_strtod("10000000000000", NULL)); 217 | printf("%e\n", bb_strtod("1000000000000", NULL)); 218 | printf("%e\n", bb_strtod("100000000000", NULL)); 219 | printf("%e\n", bb_strtod("10000000000", NULL)); 220 | printf("%e\n", bb_strtod("1000000000", NULL)); 221 | printf("%e\n", bb_strtod("100000000", NULL)); 222 | printf("%e\n", bb_strtod("10000000", NULL)); 223 | printf("%e\n", bb_strtod("1000000", NULL)); 224 | printf("%e\n", bb_strtod("100000", NULL)); 225 | printf("%e\n", bb_strtod("10000", NULL)); 226 | printf("%e\n", bb_strtod("1000", NULL)); 227 | printf("%e\n", bb_strtod("100", NULL)); 228 | printf("%e\n", bb_strtod("10", NULL)); 229 | printf("%e\n", bb_strtod("1", NULL)); 230 | printf("%e\n", bb_strtod("0.1", NULL)); 231 | printf("%e\n", bb_strtod("0.01", NULL)); 232 | printf("%e\n", bb_strtod("0.001", NULL)); 233 | printf("%e\n", bb_strtod("0.0001", NULL)); 234 | printf("%e\n", bb_strtod("0.00001", NULL)); 235 | printf("%e\n", bb_strtod("0.000001", NULL)); 236 | printf("%e\n", bb_strtod("0.0000001", NULL)); 237 | printf("%e\n", bb_strtod("0.00000001", NULL)); 238 | printf("%e\n", bb_strtod("0.000000001", NULL)); 239 | printf("%e\n", bb_strtod("0.0000000001", NULL)); 240 | printf("%e\n", bb_strtod("0.00000000001", NULL)); 241 | printf("%e\n", bb_strtod("0.000000000001", NULL)); 242 | printf("%e\n", bb_strtod("0.0000000000001", NULL)); 243 | printf("%e\n", bb_strtod("0.00000000000001", NULL)); 244 | printf("%e\n", bb_strtod("0.000000000000001", NULL)); 245 | printf("%e\n", bb_strtod("0.0000000000000001", NULL)); 246 | printf("%e\n", bb_strtod("0.00000000000000001", NULL)); 247 | printf("%e\n", bb_strtod("0.000000000000000001", NULL)); 248 | printf("%e\n", bb_strtod("0.0000000000000000001", NULL)); 249 | printf("%e\n", bb_strtod("0.00000000000000000001", NULL)); 250 | printf("%e\n", bb_strtod("0.000000000000000000001", NULL)); 251 | printf("%e\n", bb_strtod("0.0000000000000000000001", NULL)); 252 | printf("%e\n", bb_strtod("0.00000000000000000000001", NULL)); 253 | printf("%e\n", bb_strtod("0.000000000000000000000001", NULL)); 254 | printf("%e\n", bb_strtod("0.0000000000000000000000001", NULL)); 255 | printf("%e\n", bb_strtod("0.00000000000000000000000001", NULL)); 256 | printf("%e\n", bb_strtod("0.000000000000000000000000001", NULL)); 257 | printf("%e\n", bb_strtod("0.0000000000000000000000000001", NULL)); 258 | printf("%e\n", bb_strtod("0.00000000000000000000000000001", NULL)); 259 | printf("%e\n", bb_strtod("0.000000000000000000000000000001", NULL)); 260 | printf("%e\n", bb_strtod("0.0000000000000000000000000000001", NULL)); 261 | printf("%f\n", bb_strtod("01", NULL)); 262 | printf("%f\n", bb_strtod("001", NULL)); 263 | printf("%f\n", bb_strtod("0001", NULL)); 264 | printf("%f\n", bb_strtod("00001", NULL)); 265 | printf("%f\n", bb_strtod("000001", NULL)); 266 | printf("%f\n", bb_strtod("000001.000", NULL)); 267 | printf("%f\n", bb_strtod("00001.000", NULL)); 268 | printf("%f\n", bb_strtod("0001.000", NULL)); 269 | printf("%f\n", bb_strtod("001.000", NULL)); 270 | printf("%f\n", bb_strtod("01.000", NULL)); 271 | printf("%f\n", bb_strtod("000.100", NULL)); 272 | printf("%f\n", bb_strtod("000.010", NULL)); 273 | printf("%f\n", bb_strtod("000.001", NULL)); 274 | printf("%f\n", bb_strtod("000.101", NULL)); 275 | 276 | // edge cases 277 | printf("%f\n", bb_strtod("-0", NULL)); 278 | printf("%f\n", bb_strtod("nan", NULL)); 279 | printf("%f\n", bb_strtod("-NAN", NULL)); 280 | printf("%f\n", bb_strtod("inf", NULL)); 281 | printf("%f\n", bb_strtod("-INF", NULL)); 282 | printf("%f\n", bb_strtod("infinity", NULL)); 283 | printf("%f\n", bb_strtod("-INFINITY", NULL)); 284 | printf("%f\n", bb_strtod("-INFINITY", NULL)); 285 | printf("%e\n", bb_strtod("1.7976931348623157e+308", NULL)); // Unfortunately DBL_MAX parses as INFINITY. 286 | 287 | // scientific notation 288 | printf("%e\n", bb_strtod("1e0", NULL)); 289 | printf("%e\n", bb_strtod("1e1", NULL)); 290 | printf("%e\n", bb_strtod("1e+1", NULL)); 291 | printf("%e\n", bb_strtod("1e-1", NULL)); 292 | printf("%e\n", bb_strtod("1.23e+45", NULL)); 293 | printf("%e\n", bb_strtod("0e0", NULL)); 294 | printf("%e\n", bb_strtod("1.234567e300", NULL)); 295 | printf("%e\n", bb_strtod("1.234567e-300", NULL)); 296 | printf("%e\n", bb_strtod("1e999", NULL)); 297 | printf("%e\n", bb_strtod("-1e999", NULL)); 298 | printf("%e\n", bb_strtod("1e-999", NULL)); 299 | printf("%e\n", bb_strtod("1.797693e+308", NULL)); 300 | printf("%e\n", bb_strtod("2.225073e-308", NULL)); 301 | printf("%e\n", bb_strtod("1e-309", NULL)); 302 | 303 | // hexfloat 304 | printf("%a\n", bb_strtod("0x1.FFFFFFFFFFFFFp+1023", NULL)); 305 | printf("%a\n", bb_strtod("0x2.0000000000000p+1023", NULL)); 306 | printf("%a\n", bb_strtod("0x1.FFFFFFFFFFFFFFp+1023", NULL)); 307 | printf("%a\n", bb_strtod("0x1.0000000000000p+1024", NULL)); 308 | printf("%a\n", bb_strtod("0x1.0000000000000p-1022", NULL)); 309 | printf("%a\n", bb_strtod("0x0.0000000000001p-1023", NULL)); 310 | printf("%a\n", bb_strtod("0x0.DE00000000000p-1023", NULL)); 311 | printf("%a\n", bb_strtod("0x0.000DE00000000p-1023", NULL)); 312 | printf("%a\n", bb_strtod("0x1.0000000000000p-1075", NULL)); 313 | printf("%a\n", bb_strtod("0x10.0000000000000p-1079", NULL)); 314 | printf("%a\n", bb_strtod("0x0.0000000000001p-1024", NULL)); 315 | printf("%a\n", bb_strtod("0x0.00000000000001p-1023", NULL)); 316 | printf("%a\n", bb_strtod("0x0.00000000000000001p-1023", NULL)); 317 | printf("%a\n", bb_strtod("0x1FFFFFFFFFFFFF.0p+971", NULL)); 318 | printf("%a\n", bb_strtod("0x1FFFFFFFFFFFFF0.0p+967", NULL)); 319 | printf("%a\n", bb_strtod("0x1FFFFFFFFFFFFF00.0p+963", NULL)); 320 | printf("%a\n", bb_strtod("0x1FFFFFFFFFFFFF000.0p+959", NULL)); 321 | printf("%a\n", bb_strtod("0x1FFFFFFFFFFFFF0000.0p+955", NULL)); 322 | printf("%a\n", bb_strtod("0x123.456p+78", NULL)); 323 | } 324 | 325 | -------------------------------------------------------------------------------- /tlsf_allocator.c: -------------------------------------------------------------------------------- 1 | // O(1) allocation and deallocation 2 | // 1/32 memory wasted on average, good-fit 3 | // 4 byte header 4 | // 32/16 byte min allocation on 64/32-bit 5 | // can be expanded at runtime 6 | 7 | #include // intptr_t 8 | #include // memcpy 9 | #include 10 | 11 | #define ALIGNMENT 4 // only 4, 8, or 16 allowed 12 | #define FREE_BIT (1 << 0) 13 | #define PREV_FREE_BIT (1 << 1) 14 | #define SIZE_MASK (~(FREE_BIT | PREV_FREE_BIT)) 15 | 16 | struct node { 17 | struct node *prevnode; // this is actually at the end of the *previous* node's block, only valid if previous node is free 18 | int size; // includes size of node, last 2 bits of the are used as bitfields: FREE_BIT | PREV_FREE_BIT 19 | struct node *next; // only valid if node is free 20 | struct node *prev; // only valid if node is free 21 | }; 22 | 23 | struct heap { 24 | int listmap; 25 | int slotmaps[32]; 26 | struct node freelists[32][4]; 27 | }; 28 | 29 | void *node2block(struct node *n) { 30 | return (char *)n + sizeof(struct node *) + ALIGNMENT; 31 | } 32 | struct node *block2node(void* block) { 33 | return (struct node *)((char *)block - (sizeof(struct node *) + ALIGNMENT)); 34 | } 35 | struct node *nextnode(struct node *n) { 36 | return (struct node *)((char *)n + (n->size & SIZE_MASK)); 37 | } 38 | 39 | int findfirstset(int x) { 40 | // _BitScanForward(&i, x) on msvc, __builtin_ffs(x) - 1 on gcc/clang 41 | for (int i = 0; i < 32; ++i) 42 | if (x & (1 << i)) 43 | return i; 44 | return -1; 45 | } 46 | int floorlog2(int x) { 47 | // _BitScanReverse(&i, x) on msvc, __builtin_fls(x) - 1 on gcc/clang 48 | for (int i = 31; i >= 0; --i) 49 | if (x & (1 << i)) 50 | return i; 51 | return -1; 52 | } 53 | 54 | void findslot(int size, int *listid, int *slotid) { 55 | int log2 = floorlog2(size); 56 | int pow2 = 1 << log2; 57 | int left = size - pow2; 58 | (*listid) = log2; 59 | (*slotid) = left >> (log2 - 2); // (4 * left) / pow2 60 | } 61 | void add(struct heap *heap, struct node *node, int size) { 62 | // mark the node as free 63 | assert((size & SIZE_MASK) > 0); 64 | node->size = size | FREE_BIT; 65 | 66 | // write the footer 67 | struct node *next = nextnode(node); 68 | next->prevnode = node; 69 | next->size |= PREV_FREE_BIT; 70 | 71 | // find where the node goes 72 | int listid, slotid; 73 | findslot(size, &listid, &slotid); 74 | struct node *list = &heap->freelists[listid][slotid]; 75 | 76 | // add the node to the list 77 | node->next = list->next; 78 | node->prev = list; 79 | list->next->prev = node; 80 | list->next = node; 81 | 82 | // mark the list and slot as full 83 | heap->listmap |= (1 << listid); 84 | heap->slotmaps[listid] |= (1 << slotid); 85 | } 86 | void remove(struct heap *heap, struct node *node) { 87 | // find where the node goes 88 | int listid, slotid; 89 | findslot(node->size, &listid, &slotid); 90 | struct node *list = &heap->freelists[listid][slotid]; 91 | int *slotmap = &heap->slotmaps[listid]; 92 | 93 | // remove the node from the freelist 94 | assert(node->size & FREE_BIT); 95 | node->size &= ~FREE_BIT; 96 | node->prev->next = node->next; 97 | node->next->prev = node->prev; 98 | 99 | // if the slot becomes empty, clear it's bitmap bit 100 | if (list->next == list) 101 | (*slotmap) &= ~(1 << slotid); 102 | 103 | // and if the list becomes empty, clear it's bitmap bit too 104 | if (!(*slotmap)) 105 | heap->listmap &= ~(1 << listid); 106 | 107 | struct node *next = nextnode(node); 108 | assert(next->size & PREV_FREE_BIT); 109 | next->size &= ~PREV_FREE_BIT; 110 | } 111 | 112 | void grow(struct heap *heap, void *memory, int size) { 113 | assert(size > sizeof(struct node)); 114 | assert(size % sizeof(struct node) == 0); 115 | 116 | // carve out a sentinel node with just the size flags at the end 117 | struct node *sentinel = block2node((char *)memory + size); 118 | sentinel->size = 0; 119 | 120 | // add the root node to the list 121 | void *p = (char *)memory - sizeof(struct node *); 122 | struct node *root = p; 123 | add(heap, root, size - ALIGNMENT); 124 | } 125 | void initialize(struct heap *heap) { 126 | memset(heap, 0, sizeof(struct heap)); 127 | 128 | // clear freelists 129 | for (int i = 0; i < 32; ++i) { 130 | for (int j = 0; j < 4; ++j) { 131 | struct node *list = &heap->freelists[i][j]; 132 | list->next = list; 133 | list->prev = list; 134 | } 135 | } 136 | } 137 | void *allocate(struct heap *heap, int size) { 138 | assert(size >= 0); // you could clamp to 0, or return NULL 139 | 140 | // need extra space for size and to align allocation 141 | int needed = size + ALIGNMENT; 142 | if (needed < sizeof(struct node)) 143 | needed = sizeof(struct node); 144 | 145 | // align up 146 | needed = (needed + ALIGNMENT - 1) & ~(ALIGNMENT - 1); 147 | 148 | // first check the exact size range for the needed amount 149 | // special findslot that rounds up instead of down 150 | int log2 = floorlog2(needed); 151 | int pow2 = 1 << log2; 152 | int left = needed - pow2; 153 | int listid = log2; 154 | int slotid = left >> (log2 - 2); // (4 * left / pow2) 155 | if (left) { 156 | ++slotid; 157 | if (slotid == 4) { 158 | slotid = 0; 159 | ++listid; 160 | } 161 | } 162 | 163 | int slotmask = ~((1 << slotid) - 1); 164 | if (!(heap->slotmaps[listid] & slotmask)) { 165 | // the best fitting size range is empty so don't consider it 166 | ++listid; 167 | slotmask = 0xFFFFFFFF; 168 | } 169 | 170 | // find first free node big enough to hold the allocation 171 | int listmask = ~((1 << listid) - 1); 172 | int listmap = heap->listmap & listmask; 173 | listid = findfirstset(listmap); 174 | if (listid < 0) 175 | return 0; // out of memory 176 | 177 | int slotmap = heap->slotmaps[listid] & slotmask; 178 | slotid = findfirstset(slotmap); 179 | 180 | // remove the node from the freelist 181 | struct node *list = &heap->freelists[listid][slotid]; 182 | struct node *node = list->next; 183 | assert(node->size >= needed); 184 | remove(heap, node); 185 | 186 | // trim the excess off 187 | int excess = node->size - needed; 188 | if (excess >= sizeof(struct node)) { 189 | node->size -= excess; 190 | struct node *leftover = nextnode(node); 191 | add(heap, leftover, excess); 192 | } 193 | 194 | return node2block(node); 195 | } 196 | void deallocate(struct heap *heap, void *block) { 197 | if (!block) 198 | return; 199 | 200 | struct node *node = block2node(block); 201 | assert(!(node->size & FREE_BIT)); // double free 202 | 203 | // merge with previous free node 204 | if (node->size & PREV_FREE_BIT) { 205 | struct node *prev = node->prevnode; 206 | assert(prev->size & FREE_BIT); // we think it's free but it disagrees 207 | assert(!(prev->size & PREV_FREE_BIT)); // there shouldn't be 2 consecutive free nodes 208 | remove(heap, prev); 209 | prev->size += (node->size & SIZE_MASK); 210 | node = prev; 211 | } 212 | 213 | // merge with next free node 214 | struct node *next = nextnode(node); 215 | if (next->size & FREE_BIT) { 216 | assert(!(next->size & PREV_FREE_BIT)); // next node thinks we're free but we aren't 217 | remove(heap, next); 218 | node->size += next->size; 219 | next = nextnode(node); 220 | assert(!(next->size & FREE_BIT)); // there shouldn't be 2 consecutive free nodes 221 | } 222 | 223 | // mark on the next node that we are free 224 | assert(!(next->size & PREV_FREE_BIT)); // corruption 225 | next->size |= PREV_FREE_BIT; 226 | 227 | add(heap, node, node->size); 228 | } 229 | void *reallocate(struct heap *heap, void *block, int size) { 230 | // you could clamp to 0, or return NULL 231 | assert(size >= 0); 232 | 233 | if (!block) 234 | return allocate(heap, size); 235 | if (!size) { 236 | deallocate(heap, block); 237 | return 0; 238 | } 239 | 240 | struct node *node = block2node(block); 241 | assert(!(node->size & FREE_BIT)); // use after free 242 | 243 | // need extra space for size and to align allocation 244 | int needed = size + ALIGNMENT; 245 | if (needed < sizeof(struct node)) 246 | needed = sizeof(struct node); 247 | 248 | // align up 249 | needed = (needed + ALIGNMENT - 1) & ~(ALIGNMENT - 1); 250 | 251 | if (needed > (node->size & SIZE_MASK)) { 252 | // we need to grow, try expanding into the next block if it's free 253 | struct node *next = nextnode(node); 254 | assert(!(next->size & PREV_FREE_BIT)); // mistake, this node is not really free 255 | 256 | if (!(next->size & FREE_BIT) || (node->size & SIZE_MASK) + (next->size & SIZE_MASK) < needed) { 257 | // bad luck, we can't grow in-place 258 | void *copy = allocate(heap, size); 259 | if (!copy) 260 | return 0; // out of memory 261 | memcpy(copy, block, (size_t)(node->size & SIZE_MASK) - ALIGNMENT); 262 | deallocate(heap, block); 263 | return copy; 264 | } 265 | 266 | // good luck! we can grow in place 267 | remove(heap, next); 268 | node->size += next->size; 269 | } 270 | 271 | // trim off any excess 272 | int excess = (node->size & SIZE_MASK) - needed; 273 | if (excess >= sizeof(struct node)) { 274 | node->size -= excess; 275 | struct node *left = nextnode(node); 276 | left->size = excess; 277 | // merge with next free node 278 | struct node *next = nextnode(left); 279 | if (next->size & FREE_BIT) { 280 | remove(heap, next); 281 | left->size += (next->size & SIZE_MASK); 282 | } 283 | add(heap, left, left->size); 284 | } 285 | 286 | return block; 287 | } 288 | 289 | void verify(struct heap *heap) { 290 | // if a slotmap isn't empty the corresponding listmap bit should be set 291 | for (int i = 0; i < 32; ++i) { 292 | int slotmap = heap->slotmaps[i] != 0; 293 | int listmap = (heap->listmap & (1 << i)) != 0; 294 | assert(slotmap == listmap); 295 | } 296 | 297 | // the bitmaps should correspond to which freelists are empty 298 | for (int i = 0; i < 32; ++i) { 299 | int slotmap = heap->slotmaps[i]; 300 | for (int j = 0; j < 4; ++j) { 301 | struct node *list = &heap->freelists[i][j]; 302 | if (slotmap & (1 << j)) { 303 | assert(list->next != list); 304 | assert(list->prev != list); 305 | } 306 | } 307 | } 308 | 309 | for (int i = 0; i < 32; ++i) { 310 | for (int j = 0; j < 4; ++j) { 311 | struct node *list = &heap->freelists[i][j]; 312 | for (struct node *node = list->next; node != list; node = node->next) { 313 | // every node in the freelist should be free 314 | assert(node->size & FREE_BIT); 315 | 316 | // free nodes cannot be empty 317 | assert(node->size & SIZE_MASK); 318 | 319 | // the next node needs to know if we're free 320 | struct node *next = nextnode(node); 321 | assert(next->size & PREV_FREE_BIT); 322 | 323 | // there should never be 2 consecutive free nodes - they should be combined 324 | assert(!(node->size & PREV_FREE_BIT)); 325 | assert(!(next->size & FREE_BIT)); 326 | 327 | // the node should be properly aligned. 328 | uintptr_t block = (uintptr_t)node2block(node); 329 | uintptr_t nextblock = (uintptr_t)node2block(next); 330 | assert(block % ALIGNMENT == 0); 331 | assert(nextblock % ALIGNMENT == 0); 332 | } 333 | } 334 | } 335 | } 336 | int equal(char *bytes, char value, int count) { 337 | assert(bytes); 338 | for (int i = 0; i < count; ++i) 339 | if (bytes[i] != value) 340 | return 0; 341 | return 1; 342 | } 343 | 344 | int main(void) { 345 | struct heap heap; 346 | initialize(&heap); 347 | 348 | static char memory[1024]; 349 | grow(&heap, memory, sizeof memory); 350 | 351 | char *a = allocate(&heap, 256); verify(&heap); memset(a, 1, 256); 352 | char *b = allocate(&heap, 256); verify(&heap); memset(b, 2, 256); 353 | assert(equal(a, 1, 256)); 354 | deallocate(&heap, a); verify(&heap); 355 | char *c = allocate(&heap, 256); verify(&heap); memset(c, 3, 256); 356 | deallocate(&heap, c); verify(&heap); 357 | assert(equal(b, 2, 256)); 358 | deallocate(&heap, b); verify(&heap); 359 | 360 | char *d = allocate(&heap, 0); verify(&heap); memset(d, 4, 0); 361 | char *e = allocate(&heap, 1); verify(&heap); memset(e, 5, 1); 362 | char *f = allocate(&heap, 2); verify(&heap); memset(f, 6, 2); 363 | char *g = allocate(&heap, 3); verify(&heap); memset(g, 7, 3); 364 | char *h = allocate(&heap, 4); verify(&heap); memset(h, 8, 4); 365 | char *i = allocate(&heap, 5); verify(&heap); memset(i, 9, 5); 366 | char *j = allocate(&heap, 23); verify(&heap); memset(j, 10, 23); 367 | i = reallocate(&heap, i, 100); verify(&heap); memset(i, 11, 100); 368 | d = reallocate(&heap, d, 256); verify(&heap); memset(d, 12, 256); 369 | i = reallocate(&heap, i, 5); verify(&heap); memset(i, 13, 5); 370 | assert(equal(d, 12, 256)); 371 | assert(equal(e, 5, 1)); 372 | assert(equal(f, 6, 2)); 373 | assert(equal(g, 7, 3)); 374 | assert(equal(h, 8, 4)); 375 | assert(equal(i, 13, 5)); 376 | assert(equal(j, 10, 23)); 377 | 378 | deallocate(&heap, d); verify(&heap); 379 | deallocate(&heap, i); verify(&heap); 380 | deallocate(&heap, e); verify(&heap); 381 | deallocate(&heap, h); verify(&heap); 382 | deallocate(&heap, f); verify(&heap); 383 | deallocate(&heap, g); verify(&heap); 384 | deallocate(&heap, j); verify(&heap); 385 | 386 | // stress tests 387 | 388 | int maxsize = 500; 389 | char *x = NULL; 390 | 391 | // one up 392 | for (int size = 0; size < maxsize; ++size) { 393 | x = reallocate(&heap, x, size); verify(&heap); 394 | assert(size == 0 || equal(x, size - 1, size - 1)); 395 | memset(x, size, size); 396 | verify(&heap); 397 | } 398 | x = reallocate(&heap, x, 0); 399 | verify(&heap); 400 | 401 | // one down 402 | for (int size = 0; size < maxsize; ++size) { 403 | int ezis = maxsize - size; 404 | x = reallocate(&heap, x, ezis); verify(&heap); 405 | assert(size == 0 || equal(x, size - 1, ezis)); 406 | memset(x, size, ezis); 407 | verify(&heap); 408 | } 409 | x = reallocate(&heap, x, 0); 410 | verify(&heap); 411 | 412 | // grow 413 | 414 | static char extra[1024]; 415 | grow(&heap, extra, sizeof extra); 416 | char *y = NULL; 417 | 418 | // both up 419 | for (int size = 0; size < maxsize; ++size) { 420 | verify(&heap); 421 | x = reallocate(&heap, x, size); verify(&heap); 422 | assert(size == 0 || equal(x, size - 1, size - 1)); 423 | assert(size == 0 || equal(y, size - 1, size - 1)); 424 | y = reallocate(&heap, y, size); verify(&heap); 425 | assert(size == 0 || equal(x, size - 1, size - 1)); 426 | assert(size == 0 || equal(y, size - 1, size - 1)); 427 | memset(x, size, size); 428 | memset(y, size, size); 429 | verify(&heap); 430 | } 431 | x = reallocate(&heap, x, 0); 432 | y = reallocate(&heap, y, 0); 433 | verify(&heap); 434 | 435 | // both down 436 | for (int size = 0; size < maxsize; ++size) { 437 | int ezis = maxsize - size; 438 | x = reallocate(&heap, x, ezis); verify(&heap); 439 | assert(size == 0 || equal(x, size - 1, ezis)); 440 | assert(size == 0 || equal(y, size - 1, ezis + 1)); 441 | y = reallocate(&heap, y, ezis); verify(&heap); 442 | assert(size == 0 || equal(x, size - 1, ezis)); 443 | assert(size == 0 || equal(y, size - 1, ezis)); 444 | memset(x, size, ezis); 445 | memset(y, size, ezis); 446 | verify(&heap); 447 | } 448 | x = reallocate(&heap, x, 0); 449 | y = reallocate(&heap, y, 0); 450 | verify(&heap); 451 | 452 | // one up, one down 453 | for (int size = 0; size < maxsize; ++size) { 454 | int ezis = maxsize - size; 455 | x = reallocate(&heap, x, size); verify(&heap); 456 | assert(size == 0 || equal(x, size - 1, size - 1)); 457 | assert(size == 0 || equal(y, size - 1, ezis + 1)); 458 | y = reallocate(&heap, y, ezis); verify(&heap); 459 | assert(size == 0 || equal(x, size - 1, size - 1)); 460 | assert(size == 0 || equal(y, size - 1, ezis)); 461 | memset(x, size, size); 462 | memset(y, size, ezis); 463 | verify(&heap); 464 | } 465 | x = reallocate(&heap, x, 0); 466 | y = reallocate(&heap, y, 0); 467 | verify(&heap); 468 | } 469 | -------------------------------------------------------------------------------- /win32_list_directory.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include // printf 3 | 4 | void list_directory(const char *path) { 5 | char buffer[1024]; 6 | wsprintfA(buffer, "%s/*", path); 7 | 8 | WIN32_FIND_DATAA data; 9 | HANDLE find = FindFirstFileA(buffer, &data); 10 | if (find == INVALID_HANDLE_VALUE) 11 | return; 12 | 13 | do { 14 | wsprintfA(buffer, "%s/%s", path, data.cFileName); 15 | if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { 16 | int is_special = data.cFileName[0] == '.' && (!data.cFileName[1] || (data.cFileName[1] == '.' && !data.cFileName[2])); 17 | if (!is_special) 18 | list_directory(buffer); 19 | } else printf("%s\n", buffer); 20 | } while (FindNextFileA(find, &data)); 21 | } 22 | 23 | int main(void) { 24 | list_directory("."); 25 | } -------------------------------------------------------------------------------- /win32_stacktrace.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include // printf 5 | 6 | typedef BOOL(WINAPI *SymGetSymFromAddr64_Func)(HANDLE hProcess, DWORD64 qwAddr, PDWORD64 pdwDisplacement, PIMAGEHLP_SYMBOL64 Symbol); 7 | typedef BOOL(WINAPI *SymGetSymFromAddr64_Func)(HANDLE hProcess, DWORD64 qwAddr, PDWORD64 pdwDisplacement, PIMAGEHLP_SYMBOL64 Symbol); 8 | typedef DWORD(WINAPI *UnDecorateSymbolName_Func)(PCSTR name, PSTR outputString, DWORD maxStringLength, DWORD flags); 9 | typedef BOOL(WINAPI *SymGetLineFromAddr64_Func)(HANDLE hProcess, DWORD64 qwAddr, PDWORD pdwDisplacement, PIMAGEHLP_LINE64 Line64); 10 | typedef BOOL(WINAPI *SymGetModuleInfo64_Func)(HANDLE hProcess, DWORD64 qwAddr, PIMAGEHLP_MODULE64 ModuleInfo); 11 | typedef BOOL(WINAPI *StackWalk64_Func)( 12 | DWORD MachineType, 13 | HANDLE hProcess, 14 | HANDLE hThread, 15 | LPSTACKFRAME64 StackFrame, 16 | PVOID ContextRecord, 17 | PREAD_PROCESS_MEMORY_ROUTINE64 ReadMemoryRoutine, 18 | PFUNCTION_TABLE_ACCESS_ROUTINE64 FunctionTableAccessRoutine, 19 | PGET_MODULE_BASE_ROUTINE64 GetModuleBaseRoutine, 20 | PTRANSLATE_ADDRESS_ROUTINE64 TranslateAddress); 21 | SymGetSymFromAddr64_Func SymGetSymFromAddr64_func; 22 | UnDecorateSymbolName_Func UnDecorateSymbolName_func; 23 | SymGetLineFromAddr64_Func SymGetLineFromAddr64_func; 24 | SymGetModuleInfo64_Func SymGetModuleInfo64_func; 25 | StackWalk64_Func StackWalk64_func; 26 | #define SymGetSymFromAddr64 SymGetSymFromAddr64_func 27 | #define UnDecorateSymbolName UnDecorateSymbolName_func 28 | #define SymGetLineFromAddr64 SymGetLineFromAddr64_func 29 | #define SymGetModuleInfo64 SymGetModuleInfo64_func 30 | #define StackWalk64 StackWalk64_func 31 | 32 | void init_dbghelp_dll(void) { 33 | static BOOL already_tried_to_init; 34 | if (already_tried_to_init) 35 | return; 36 | already_tried_to_init = TRUE; 37 | 38 | HMODULE dbghelp_dll = LoadLibraryA("DbgHelp.dll"); 39 | if (!dbghelp_dll) 40 | return; 41 | 42 | typedef BOOL(WINAPI *SymInitialize_Func)(HANDLE hProcess, PCSTR UserSearchPath, BOOL fInvadeProcess); 43 | typedef DWORD(WINAPI *SymGetOptions_Func)(void); 44 | typedef DWORD(WINAPI *SymSetOptions_Func)(DWORD SymOptions); 45 | typedef DWORD64(WINAPI *SymLoadModule64_Func)(HANDLE hProcess, HANDLE hFile, PCSTR ImageName, PCSTR ModuleName, DWORD64 BaseOfDll, DWORD SizeOfDll); 46 | SymInitialize_Func SymInitialize_func = NULL; 47 | SymGetOptions_Func SymGetOptions_func = NULL; 48 | SymSetOptions_Func SymSetOptions_func = NULL; 49 | SymLoadModule64_Func SymLoadModule64_func = NULL; 50 | #define SymInitialize SymInitialize_func 51 | #define SymGetOptions SymGetOptions_func 52 | #define SymSetOptions SymSetOptions_func 53 | #define SymLoadModule64 SymLoadModule64_func 54 | 55 | SymInitialize = (SymInitialize_Func)GetProcAddress(dbghelp_dll, "SymInitialize"); 56 | SymGetOptions = (SymGetOptions_Func)GetProcAddress(dbghelp_dll, "SymGetOptions"); 57 | SymSetOptions = (SymSetOptions_Func)GetProcAddress(dbghelp_dll, "SymSetOptions"); 58 | SymLoadModule64 = (SymLoadModule64_Func)GetProcAddress(dbghelp_dll, "SymLoadModule64"); 59 | SymGetSymFromAddr64 = (SymGetSymFromAddr64_Func)GetProcAddress(dbghelp_dll, "SymGetSymFromAddr64"); 60 | UnDecorateSymbolName = (UnDecorateSymbolName_Func)GetProcAddress(dbghelp_dll, "UnDecorateSymbolName"); 61 | SymGetLineFromAddr64 = (SymGetLineFromAddr64_Func)GetProcAddress(dbghelp_dll, "SymGetLineFromAddr64"); 62 | SymGetModuleInfo64 = (SymGetModuleInfo64_Func)GetProcAddress(dbghelp_dll, "SymGetModuleInfo64"); 63 | StackWalk64 = (StackWalk64_Func)GetProcAddress(dbghelp_dll, "StackWalk64"); 64 | 65 | HANDLE process = GetCurrentProcess(); 66 | if (SymInitialize && SymGetOptions && SymSetOptions && SymLoadModule64 && SymInitialize(process, NULL, FALSE)) { 67 | DWORD options = SymGetOptions(); 68 | options |= SYMOPT_LOAD_LINES; 69 | options |= SYMOPT_FAIL_CRITICAL_ERRORS; 70 | options |= SYMOPT_DEFERRED_LOADS; 71 | options = SymSetOptions(options); 72 | 73 | HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, GetCurrentProcessId()); 74 | if (snapshot != INVALID_HANDLE_VALUE) { 75 | #undef MODULEENTRY32 // Windows.h literally renames these to their wchar counterpart. 76 | #undef Module32First 77 | #undef Module32Next 78 | MODULEENTRY32 entry = { sizeof(entry) }; 79 | for (BOOL keep_going = Module32First(snapshot, &entry); keep_going; keep_going = Module32Next(snapshot, &entry)) 80 | SymLoadModule64(process, NULL, entry.szExePath, entry.szModule, (DWORD64)entry.modBaseAddr, entry.modBaseSize); 81 | CloseHandle(snapshot); 82 | } 83 | } 84 | } 85 | 86 | int generate_stacktrace(void *buffer[], int buffer_count, int frames_to_skip) { 87 | // RtlCaptureStackBackTrace is orders of magnitude faster than StackWalk64, but it's 88 | // undocumented and sometimes unreliable. Therefore we always try RtlCaptureStackBackTrace 89 | // first, and then fall back on StackWalk64. 90 | int num_frames = RtlCaptureStackBackTrace((DWORD)frames_to_skip + 1, buffer_count, buffer, NULL); 91 | if (num_frames <= 1) { 92 | init_dbghelp_dll(); 93 | if (!StackWalk64) 94 | return 0; 95 | 96 | CONTEXT context = { .ContextFlags = CONTEXT_FULL }; // Or CONTEXT_ALL? Or is there even any difference?? 97 | RtlCaptureContext(&context); 98 | 99 | DWORD machine_type; 100 | STACKFRAME64 frame = { 101 | .AddrPC.Mode = AddrModeFlat, 102 | .AddrFrame.Mode = AddrModeFlat, 103 | .AddrStack.Mode = AddrModeFlat, 104 | }; 105 | #ifdef _M_X64 106 | { 107 | machine_type = IMAGE_FILE_MACHINE_AMD64; 108 | frame.AddrPC.Offset = context.Rip; 109 | frame.AddrFrame.Offset = context.Rbp; 110 | frame.AddrStack.Offset = context.Rsp; 111 | 112 | // Apparently StackWalk64 doesn't capture the frame of the functio that calls 113 | // it in 32-bit code and so we only need to do this in x64. I'm not sure why. 114 | frames_to_skip += 1; 115 | } 116 | #elif defined _M_IX86 117 | { 118 | machine_type = IMAGE_FILE_MACHINE_I386; 119 | frame.AddrPC.Offset = context.Eip; 120 | frame.AddrFrame.Offset = context.Ebp; 121 | frame.AddrStack.Offset = context.Esp; 122 | } 123 | #else 124 | { 125 | return 0; // Stacktraces not supported on ARM. 126 | } 127 | #endif 128 | 129 | HANDLE process = GetCurrentProcess(); 130 | HANDLE thread = GetCurrentThread(); 131 | num_frames = 0; 132 | for (int i = 0; StackWalk64(machine_type, process, thread, &frame, &context, NULL, NULL, NULL, NULL); ++i) { 133 | if (frame.AddrPC.Offset == 0) 134 | break; 135 | if (i >= (int)frames_to_skip && num_frames < buffer_count) 136 | buffer[num_frames++] = (void *)(uintptr_t)frame.AddrPC.Offset; 137 | } 138 | } 139 | 140 | // The PC will have advanced by 1 (or more) by the point we get the stack trace - we have to undo that otherwise we get wrong info! 141 | for (int i = 0; i < num_frames; ++i) 142 | buffer[i] = (char *)buffer[i] + 1; 143 | return num_frames; 144 | } 145 | 146 | void print_stacktrace(void *const stackframes[], int num_frames) { 147 | init_dbghelp_dll(); 148 | HANDLE process = GetCurrentProcess(); 149 | for (int i = 0; i < num_frames; ++i) { 150 | // Print either: 151 | // 1) function() in file, line x 152 | // 2) function() in !module 153 | // 3) 0xaddress in file, line x 154 | // 4) 0xaddress in !module 155 | // 5) 0xaddress 156 | 157 | DWORD64 address = (DWORD64)stackframes[i]; 158 | DWORD64 symbol_buffer[64] = { 0 }; 159 | IMAGEHLP_SYMBOL64 *symbol = (IMAGEHLP_SYMBOL64 *)symbol_buffer; 160 | symbol->SizeOfStruct = sizeof symbol[0]; 161 | symbol->MaxNameLength = sizeof symbol_buffer - sizeof symbol[0]; 162 | 163 | if (SymGetSymFromAddr64 && SymGetSymFromAddr64(process, address, &(DWORD64){0}, symbol)) { 164 | const char *function = symbol->Name; 165 | char undecorated[512]; 166 | if (UnDecorateSymbolName) { 167 | UnDecorateSymbolName(function, undecorated, sizeof undecorated, UNDNAME_NAME_ONLY); 168 | undecorated[sizeof undecorated - 1] = 0; 169 | function = undecorated; 170 | } 171 | printf("%s()", function); 172 | } else printf("0x%p", stackframes[i]); 173 | 174 | IMAGEHLP_LINE64 line_info = { .SizeOfStruct = sizeof line_info }; 175 | IMAGEHLP_MODULE64 module_info = { .SizeOfStruct = sizeof module_info }; 176 | if (SymGetLineFromAddr64 && SymGetLineFromAddr64(process, address, &(DWORD){0}, &line_info)) { 177 | int line = (int)line_info.LineNumber; 178 | const char *file = line_info.FileName; 179 | printf(" in %s, line %d", file, line); 180 | } else if (SymGetModuleInfo64 && SymGetModuleInfo64(process, address, &module_info)) { 181 | const char *module = module_info.ModuleName; 182 | printf(" in !%s", module); 183 | } 184 | 185 | printf("\n"); 186 | } 187 | } 188 | 189 | int main(void) { 190 | void *frames[128]; 191 | int num_frames = generate_stacktrace(frames, 128, 0); 192 | print_stacktrace(frames, num_frames); 193 | } -------------------------------------------------------------------------------- /win32_thread_queue.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | volatile int cursor; 5 | BOOL is_prime[1048576]; 6 | 7 | BOOL prime(int x) { 8 | if (x == 2) 9 | return TRUE; 10 | if (x <= 1 || !(x % 2)) 11 | return FALSE; 12 | for (INT64 i = 3; i * i <= x; i += 2) 13 | if (!(x % i)) 14 | return FALSE; 15 | return TRUE; 16 | } 17 | DWORD CALLBACK thread_function(void *param) { 18 | for (;;) { 19 | int index = InterlockedIncrement(&cursor) - 1; 20 | if (index >= _countof(is_prime)) 21 | return 0; 22 | is_prime[index] = prime(index); 23 | } 24 | } 25 | 26 | int main(void) { 27 | SYSTEM_INFO info; 28 | GetSystemInfo(&info); 29 | 30 | HANDLE threads[MAXIMUM_WAIT_OBJECTS]; 31 | int num_extra_threads = (int)info.dwNumberOfProcessors - 1; 32 | if (num_extra_threads > _countof(threads)) 33 | num_extra_threads = _countof(threads); 34 | 35 | printf("Creating %d worker threads.\n", num_extra_threads); 36 | for (int i = 0; i < num_extra_threads; ++i) 37 | threads[i] = CreateThread(NULL, 0, thread_function, NULL, 0, NULL); 38 | 39 | thread_function(NULL); 40 | WaitForMultipleObjects((DWORD)num_extra_threads, threads, TRUE, INFINITE); 41 | 42 | for (int i = 0; i < _countof(is_prime); ++i) 43 | if (is_prime[i]) 44 | printf("%d is prime.\n", i); 45 | } -------------------------------------------------------------------------------- /win32_websocket_client.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #pragma comment(lib, "winhttp.lib") 5 | 6 | void checkHr(HRESULT hr) { 7 | if (FAILED(hr)) { 8 | char message[256] = { 0 }; 9 | FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, hr, MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), message, 256, NULL); 10 | printf("HRESULT = 0x%08X: %s\n", hr, message); 11 | __debugbreak(); 12 | } 13 | } 14 | void checkCond(BOOL cond) { 15 | if (!cond) { 16 | DWORD error = GetLastError(); 17 | HRESULT hr = HRESULT_FROM_WIN32(error); 18 | checkHr(hr); 19 | } 20 | } 21 | void check(DWORD error) { 22 | HRESULT hr = HRESULT_FROM_WIN32(error); 23 | checkHr(hr); 24 | } 25 | 26 | int main(void) { 27 | HINTERNET session = WinHttpOpen(L"Websocket Client Test User Agent", WINHTTP_ACCESS_TYPE_AUTOMATIC_PROXY, WINHTTP_NO_PROXY_NAME, WINHTTP_NO_PROXY_BYPASS, 0); 28 | checkCond(session != NULL); 29 | 30 | HINTERNET connection = WinHttpConnect(session, L"localhost", 9999, 0); 31 | checkCond(connection != NULL); 32 | printf("Connected to server.\n"); 33 | 34 | HINTERNET request = WinHttpOpenRequest(connection, L"GET", L"", L"HTTP/1.1", WINHTTP_NO_REFERER, WINHTTP_DEFAULT_ACCEPT_TYPES, 0); 35 | checkCond(request != NULL); 36 | 37 | printf("Starting websocket upgrade handshake.\n"); 38 | checkCond(WinHttpSetOption(request, WINHTTP_OPTION_UPGRADE_TO_WEB_SOCKET, NULL, 0)); 39 | checkCond(WinHttpSendRequest(request, WINHTTP_NO_ADDITIONAL_HEADERS, 0, NULL, 0, 0, 0)); 40 | checkCond(WinHttpReceiveResponse(request, NULL)); 41 | 42 | HINTERNET websocket = WinHttpWebSocketCompleteUpgrade(request, NULL); 43 | checkCond(websocket != NULL); 44 | checkCond(WinHttpCloseHandle(request)); 45 | printf("Websocket upgrade completed.\n"); 46 | 47 | const char* message = "Hello, sailor!"; 48 | check(WinHttpWebSocketSend(websocket, WINHTTP_WEB_SOCKET_BINARY_MESSAGE_BUFFER_TYPE, message, strlen(message))); 49 | printf("Sent message to server.\n"); 50 | 51 | char buffer[999] = { 0 }; 52 | int length = 0; 53 | for (;;) { 54 | unsigned long bytesRead = 0; 55 | WINHTTP_WEB_SOCKET_BUFFER_TYPE bufferType = 0; 56 | check(WinHttpWebSocketReceive(websocket, buffer + length, sizeof buffer - length, &bytesRead, &bufferType)); 57 | length += bytesRead; 58 | if (bufferType != WINHTTP_WEB_SOCKET_BINARY_FRAGMENT_BUFFER_TYPE) break; 59 | } 60 | printf("Received response from server: \"%.*s\".\n", length, buffer); 61 | 62 | check(WinHttpWebSocketClose(websocket, WINHTTP_WEB_SOCKET_SUCCESS_CLOSE_STATUS, NULL, 0)); 63 | unsigned short status = 0; 64 | char reason[999]; 65 | unsigned long reasonLength = 0; 66 | WinHttpWebSocketQueryCloseStatus(websocket, &status, reason, sizeof reason, &reasonLength); 67 | printf("Closed connection with status %d and reason \"%.*s\".", status, reasonLength, reason); 68 | 69 | printf("Done"); 70 | } 71 | -------------------------------------------------------------------------------- /win32_websocket_server.c: -------------------------------------------------------------------------------- 1 | // Minimal websocket server setup using HTTP.sys and the Windows websocket API. 2 | // This is just a minimal example using synchronous calls and minimal error checking. 3 | // Don't use this in production, it's just for reference. 4 | // 5 | // You can test it with this python program: 6 | // 7 | // $ pip install websockets 8 | // 9 | // import websockets.sync.client 10 | // with websockets.sync.client.connect("ws://localhost:9999/server") as websocket: 11 | // message = websocket.recv() 12 | // print(f"Received: {message}") 13 | // websocket.send("Hello from client!") 14 | 15 | #define WIN32_LEAN_AND_MEAN 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #pragma comment(lib, "httpapi.lib") 22 | #pragma comment(lib, "websocket.lib") 23 | 24 | void checkHr(HRESULT hr) { 25 | if (FAILED(hr)) { 26 | char message[256] = { 0 }; 27 | FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, hr, MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), message, 256, NULL); 28 | printf("HRESULT = 0x%08X: %s\n", hr, message); 29 | __debugbreak(); 30 | } 31 | } 32 | void check(unsigned error) { 33 | HRESULT hr = HRESULT_FROM_WIN32(error); 34 | checkHr(hr); 35 | } 36 | 37 | int main(void) { 38 | HTTPAPI_VERSION version = HTTPAPI_VERSION_2; 39 | check(HttpInitialize(version, HTTP_INITIALIZE_SERVER, NULL)); 40 | 41 | HTTP_SERVER_SESSION_ID session = 0; 42 | check(HttpCreateServerSession(version, &session, 0)); 43 | 44 | HTTP_URL_GROUP_ID urlGroup = 0; 45 | check(HttpCreateUrlGroup(session, &urlGroup, 0)); 46 | check(HttpAddUrlToUrlGroup(urlGroup, L"http://localhost:9999/server", 0, 0)); 47 | 48 | HANDLE requestQueue = NULL; 49 | check(HttpCreateRequestQueue(version, NULL, NULL, 0, &requestQueue)); 50 | 51 | HTTP_BINDING_INFO binding = { 0 }; 52 | binding.Flags.Present = TRUE; 53 | binding.RequestQueueHandle = requestQueue; 54 | check(HttpSetUrlGroupProperty(urlGroup, HttpServerBindingProperty, &binding, sizeof binding)); 55 | 56 | printf("Waiting for connection.\n"); 57 | __declspec(align(8)) char requestBuffer[4096] = { 0 }; 58 | HTTP_REQUEST_V2* request = (HTTP_REQUEST_V2*)requestBuffer; 59 | unsigned long requestSize = 0; 60 | check(HttpReceiveHttpRequest(requestQueue, HTTP_NULL_ID, 0, request, sizeof requestBuffer, &requestSize, NULL)); 61 | printf("Received HTTP request.\n"); 62 | 63 | WEB_SOCKET_HTTP_HEADER wsRequestHeaders[99] = { 0 }; 64 | unsigned long numRequestHeaders = 0; 65 | for (unsigned i = 0; i < HttpHeaderRequestMaximum; i++) { 66 | HTTP_KNOWN_HEADER* src = &request->Headers.KnownHeaders[i]; 67 | if (src->RawValueLength) { 68 | static const char* const REQUEST_HEADER_NAMES[HttpHeaderRequestMaximum] = { 69 | "CacheControl", 70 | "Connection", 71 | "Date", 72 | "KeepAlive", 73 | "Pragma", 74 | "Trailer", 75 | "TransferEncoding", 76 | "Upgrade", 77 | "Via", 78 | "Warning", 79 | "Allow", 80 | "ContentLength", 81 | "ContentType", 82 | "ContentEncoding", 83 | "ContentLanguage", 84 | "ContentLocation", 85 | "ContentMd5", 86 | "ContentRange", 87 | "Expires", 88 | "LastModified", 89 | "Accept", 90 | "AcceptCharset", 91 | "AcceptEncoding", 92 | "AcceptLanguage", 93 | "Authorization", 94 | "Cookie", 95 | "Expect", 96 | "From", 97 | "Host", 98 | "IfMatch", 99 | "IfModifiedSince", 100 | "IfNoneMatch", 101 | "IfRange", 102 | "IfUnmodifiedSince", 103 | "MaxForwards", 104 | "ProxyAuthorization", 105 | "Referer", 106 | "Range", 107 | "Te", 108 | "Translate", 109 | "UserAgent", 110 | }; 111 | 112 | WEB_SOCKET_HTTP_HEADER* dst = &wsRequestHeaders[numRequestHeaders++]; 113 | dst->pcName = (char*)REQUEST_HEADER_NAMES[i]; 114 | dst->ulNameLength = (unsigned)strlen(dst->pcName); 115 | dst->pcValue = (char*)src->pRawValue; 116 | dst->ulValueLength = src->RawValueLength; 117 | } 118 | } 119 | for (unsigned i = 0; i < request->Headers.UnknownHeaderCount; i++) { 120 | HTTP_UNKNOWN_HEADER* src = &request->Headers.pUnknownHeaders[i]; 121 | WEB_SOCKET_HTTP_HEADER* dst = &wsRequestHeaders[numRequestHeaders++]; 122 | dst->pcName = (char*)src->pName; 123 | dst->ulNameLength = src->NameLength; 124 | dst->pcValue = (char*)src->pRawValue; 125 | dst->ulValueLength = src->RawValueLength; 126 | } 127 | 128 | WEB_SOCKET_HANDLE websocket = NULL; 129 | checkHr(WebSocketCreateServerHandle(NULL, 0, &websocket)); 130 | 131 | printf("Performing websocket handshake.\n"); 132 | WEB_SOCKET_HTTP_HEADER* wsResponseHeaders = NULL; 133 | unsigned long numResponseHeaders = 0; 134 | checkHr(WebSocketBeginServerHandshake(websocket, NULL, NULL, 0, wsRequestHeaders, numRequestHeaders, &wsResponseHeaders, &numResponseHeaders)); 135 | 136 | HTTP_UNKNOWN_HEADER responseHeaders[99] = { 0 }; 137 | HTTP_RESPONSE_V2 response = { 0 }; 138 | response.StatusCode = 101; 139 | response.pReason = "Switching Protocols"; 140 | response.ReasonLength = sizeof "Switching Protocols" - 1; 141 | response.Headers.pUnknownHeaders = responseHeaders; 142 | for (unsigned i = 0; i < numResponseHeaders; i++) { 143 | WEB_SOCKET_HTTP_HEADER* src = &wsResponseHeaders[i]; 144 | BOOL isKnownHeader = FALSE; 145 | for (int j = 0; j < HttpHeaderResponseMaximum; j++) { 146 | static const char* const RESPONSE_HEADER_NAMES[HttpHeaderResponseMaximum] = { 147 | "CacheControl", 148 | "Connection", 149 | "Date", 150 | "KeepAlive", 151 | "Pragma", 152 | "Trailer", 153 | "TransferEncoding", 154 | "Upgrade", 155 | "Via", 156 | "Warning", 157 | "Allow", 158 | "ContentLength", 159 | "ContentType", 160 | "ContentEncoding", 161 | "ContentLanguage", 162 | "ContentLocation", 163 | "ContentMd5", 164 | "ContentRange", 165 | "Expires", 166 | "LastModified", 167 | "AcceptRanges", 168 | "Age", 169 | "Etag", 170 | "Location", 171 | "ProxyAuthenticate", 172 | "RetryAfter", 173 | "Server", 174 | "SetCookie", 175 | "Vary", 176 | "WwwAuthenticate", 177 | }; 178 | const char* name = RESPONSE_HEADER_NAMES[j]; 179 | size_t length = strlen(name); 180 | if (src->ulNameLength == length && memcmp(src->pcName, name, length) == 0) { 181 | isKnownHeader = TRUE; 182 | HTTP_KNOWN_HEADER* dst = &response.Headers.KnownHeaders[j]; 183 | dst->pRawValue = src->pcValue; 184 | dst->RawValueLength = (unsigned short)src->ulValueLength; 185 | break; 186 | } 187 | } 188 | if (!isKnownHeader) { 189 | HTTP_UNKNOWN_HEADER* dst = &response.Headers.pUnknownHeaders[response.Headers.UnknownHeaderCount++]; 190 | dst->pName = src->pcName; 191 | dst->NameLength = (unsigned short)src->ulNameLength; 192 | dst->pRawValue = src->pcValue; 193 | dst->RawValueLength = (unsigned short)src->ulValueLength; 194 | } 195 | } 196 | 197 | //@HACK For some reason HttpSendResponse doesn't seem to send the Connection: Upgrade header unless it's set 198 | // as both an HTTP_KNOWN_HEADER and an HTTP_UNKNOWN_HEADER. No idea why, but it just ignores it. 199 | // We already set it as a known header in the loop above, so now just add it as an unknown header. 200 | HTTP_UNKNOWN_HEADER* connectionHeader = &response.Headers.pUnknownHeaders[response.Headers.UnknownHeaderCount++]; 201 | connectionHeader->pName = "Connection"; 202 | connectionHeader->NameLength = sizeof "Connection" - 1; 203 | connectionHeader->pRawValue = "Upgrade"; 204 | connectionHeader->RawValueLength = sizeof "Upgrade" - 1; 205 | 206 | HTTP_REQUEST_ID requestId = request->RequestId; 207 | unsigned long responseBytesSent = 0; 208 | check(HttpSendHttpResponse(requestQueue, requestId, HTTP_SEND_RESPONSE_FLAG_OPAQUE | HTTP_SEND_RESPONSE_FLAG_MORE_DATA, &response, NULL, &responseBytesSent, NULL, 0, NULL, NULL)); 209 | 210 | checkHr(WebSocketEndServerHandshake(websocket)); 211 | printf("Websocket handshake complete.\n"); 212 | 213 | WEB_SOCKET_BUFFER sendData = { 0 }; 214 | sendData.Data.pbBuffer = (BYTE*)"Hello from server!"; 215 | sendData.Data.ulBufferLength = sizeof "Hello from server!" - 1; 216 | checkHr(WebSocketSend(websocket, WEB_SOCKET_UTF8_MESSAGE_BUFFER_TYPE, &sendData, NULL)); 217 | for (;;) { 218 | WEB_SOCKET_BUFFER buffer = { 0 }; 219 | unsigned long numBuffers = 1; 220 | WEB_SOCKET_ACTION action = 0; 221 | WEB_SOCKET_BUFFER_TYPE bufferType = 0; 222 | void* context = NULL; 223 | checkHr(WebSocketGetAction(websocket, WEB_SOCKET_ALL_ACTION_QUEUE, &buffer, &numBuffers, &action, &bufferType, NULL, &context)); 224 | 225 | unsigned long bytesTransferred = 0; 226 | if (action == WEB_SOCKET_SEND_TO_NETWORK_ACTION) { 227 | assert(numBuffers == 1); 228 | HTTP_DATA_CHUNK chunk = { 0 }; 229 | chunk.DataChunkType = HttpDataChunkFromMemory; 230 | chunk.FromMemory.pBuffer = buffer.Data.pbBuffer; 231 | chunk.FromMemory.BufferLength = buffer.Data.ulBufferLength; 232 | check(HttpSendResponseEntityBody(requestQueue, requestId, HTTP_SEND_RESPONSE_FLAG_MORE_DATA, 1, &chunk, &bytesTransferred, NULL, 0, NULL, NULL)); 233 | printf("Sent %d bytes.\n", bytesTransferred); 234 | } 235 | else { 236 | assert(action == WEB_SOCKET_INDICATE_SEND_COMPLETE_ACTION); 237 | assert(numBuffers == 0); 238 | printf("Send completed.\n"); 239 | } 240 | 241 | WebSocketCompleteAction(websocket, context, bytesTransferred); 242 | if (action == WEB_SOCKET_INDICATE_SEND_COMPLETE_ACTION) break; 243 | } 244 | 245 | checkHr(WebSocketReceive(websocket, NULL, NULL)); 246 | for (;;) { 247 | WEB_SOCKET_BUFFER buffer = { 0 }; 248 | unsigned long numBuffers = 1; 249 | WEB_SOCKET_ACTION action = 0; 250 | WEB_SOCKET_BUFFER_TYPE bufferType = 0; 251 | void* context = NULL; 252 | checkHr(WebSocketGetAction(websocket, WEB_SOCKET_ALL_ACTION_QUEUE, &buffer, &numBuffers, &action, &bufferType, NULL, &context)); 253 | 254 | unsigned long bytesTransferred = 0; 255 | if (action == WEB_SOCKET_RECEIVE_FROM_NETWORK_ACTION) { 256 | assert(numBuffers == 1); 257 | check(HttpReceiveRequestEntityBody(requestQueue, requestId, 0, buffer.Data.pbBuffer, buffer.Data.ulBufferLength, &bytesTransferred, NULL)); 258 | printf("Received %d bytes.\n", bytesTransferred); 259 | } 260 | else { 261 | assert(action == WEB_SOCKET_INDICATE_RECEIVE_COMPLETE_ACTION); 262 | assert(numBuffers == 1); 263 | printf("Receive completed: \"%.*s\"\n", buffer.Data.ulBufferLength, buffer.Data.pbBuffer); 264 | } 265 | 266 | WebSocketCompleteAction(websocket, context, bytesTransferred); 267 | if (action == WEB_SOCKET_INDICATE_RECEIVE_COMPLETE_ACTION) break; 268 | } 269 | 270 | printf("Done.\n"); 271 | } 272 | -------------------------------------------------------------------------------- /x86_cpuid.c: -------------------------------------------------------------------------------- 1 | #ifdef _MSC_VER 2 | # include 3 | void cpuid(int leaf, int subleaf, int *eax, int *ebx, int *ecx, int *edx) { 4 | int registers[4]; 5 | if (subleaf == 0) 6 | __cpuid(registers, leaf); 7 | else 8 | __cpuidex(registers, leaf, subleaf); 9 | *eax = registers[0]; 10 | *ebx = registers[1]; 11 | *ecx = registers[2]; 12 | *edx = registers[3]; 13 | } 14 | int cpuid_is_supported(void) { 15 | // Try to set and clear bit 21 in the flags register. This indicates support for the CPUID instruction. 16 | // Bail out immediately if it's not supported. 17 | unsigned bit21 = 1u << 21; 18 | 19 | __writeeflags(__readeflags() | bit21); 20 | if ((__readeflags() & bit21) == 0) 21 | return 0; 22 | 23 | __writeeflags(__readeflags() & ~bit21); 24 | if ((__readeflags() & bit21) == 1) 25 | return 0; 26 | 27 | return 1; 28 | } 29 | #else 30 | # include 31 | void cpuid(int leaf, int subleaf, int *eax, int *ebx, int *ecx, int *edx) { 32 | __cpuid_count(leaf, subleaf, *eax, *ebx, *ecx, *edx); 33 | } 34 | int cpuid_is_supported(void) { 35 | return __get_cpuid_max(0, 0) != 0; 36 | } 37 | #endif 38 | 39 | #include 40 | #include 41 | int extract_bits(int x, int highest, int lowest) { 42 | unsigned u = (int)x; 43 | u <<= 31 - highest; 44 | u >>= (31 - highest) + lowest; 45 | return (int)u; 46 | } 47 | int extract_bit(int x, int index) { 48 | return (int)(((unsigned)x >> index) & 1); 49 | } 50 | int main(void) { 51 | if (!cpuid_is_supported()) { 52 | printf("CPUID is not supported on this CPU!\n"); 53 | return 0; 54 | } 55 | 56 | int eax, ebx, ecx, edx; 57 | cpuid(0, 0, &eax, &ebx, &ecx, &edx); 58 | 59 | int max_cpuid = eax; 60 | printf("Max CPUID leaf: %d.\n", max_cpuid); 61 | 62 | char vendor[13]; 63 | memcpy(vendor + 0, &ebx, 4); 64 | memcpy(vendor + 4, &edx, 4); // Note that the string is in ebx:eDx:eCx. 65 | memcpy(vendor + 8, &ecx, 4); 66 | vendor[12] = 0; 67 | printf("Vendor: '%s'.\n", vendor); 68 | 69 | if (max_cpuid < 1) 70 | return 0; 71 | 72 | cpuid(1, 0, &eax, &ebx, &ecx, &edx); 73 | int stepping_id = extract_bits(eax, 3, 0); 74 | int model_id = extract_bits(eax, 7, 4); 75 | int family_id = extract_bits(eax, 11, 8); 76 | int ext_model_id = extract_bits(eax, 19, 16); 77 | int ext_family_id = extract_bits(eax, 27, 20); 78 | 79 | int actual_family_id = family_id; 80 | if (family_id == 15) 81 | actual_family_id += ext_family_id; 82 | 83 | int actual_model_id = model_id; 84 | if (family_id == 6 || family_id == 15) 85 | actual_model_id += (ext_model_id << 4); 86 | 87 | printf("Family: %d.\n", actual_family_id); 88 | printf("Model: %d.\n", actual_model_id); 89 | printf("Stepping: %d.\n", stepping_id); 90 | 91 | int supports_hyperthreading_in_theory = extract_bit(edx, 28); // Doesn't mean the CPU is actually hyperthreaded.. 92 | int has_clflush = extract_bit(edx, 19); 93 | int cache_line_size = 0; 94 | if (has_clflush) 95 | cache_line_size = 8 * extract_bits(ebx, 15, 8); 96 | 97 | cpuid(0x80000000, 0, &eax, &ebx, &ecx, &edx); 98 | int max_cpuid_ex = eax; 99 | char name[48] = "Unknown"; 100 | if (max_cpuid_ex >= 0x80000004) { 101 | cpuid(0x80000002, 0, (int *)name + 0, (int *)name + 1, (int *)name + 2, (int *)name + 3); 102 | cpuid(0x80000003, 0, (int *)name + 4, (int *)name + 5, (int *)name + 6, (int *)name + 7); 103 | cpuid(0x80000004, 0, (int *)name + 8, (int *)name + 9, (int *)name + 10, (int *)name + 11); 104 | } 105 | printf("Name: %s\n", name); 106 | printf("Cache line size: %d bytes.\n", cache_line_size); 107 | 108 | int num_logical_cores = 1; 109 | int num_physical_cores = 1; 110 | int l1d_cache_size = 0; 111 | int l1i_cache_size = 0; 112 | int l2_cache_size = 0; 113 | int l3_cache_size = 0; 114 | if (strstr(vendor, "AMD")) { 115 | if (max_cpuid_ex >= 0x80000008) { 116 | cpuid(0x80000008, 0, &eax, &ebx, &ecx, &edx); 117 | num_logical_cores = 1 + extract_bits(ecx, 7, 0); 118 | } else { 119 | cpuid(1, 0, &eax, &ebx, &ecx, &edx); 120 | num_logical_cores = extract_bits(ebx, 23, 16); 121 | } 122 | 123 | // This really isn't a great indication. Many sources say that CPUID reports hyperthreading even when the processor 124 | // doesn't actually support it. But I can't test this right now since I don't have a non-hyperthreaded AMD chip. 125 | if (supports_hyperthreading_in_theory) 126 | num_physical_cores = num_logical_cores / 2; 127 | else 128 | num_physical_cores = num_logical_cores; 129 | 130 | if (max_cpuid_ex >= 0x80000005) { 131 | cpuid(0x80000005, 0, &eax, &ebx, &ecx, &edx); 132 | l1d_cache_size = extract_bits(ecx, 31, 24); 133 | l1i_cache_size = extract_bits(edx, 31, 24); 134 | } 135 | 136 | if (max_cpuid_ex >= 0x80000006) { 137 | cpuid(0x80000006, 0, &eax, &ebx, &ecx, &edx); 138 | l2_cache_size = extract_bits(ecx, 31, 16); 139 | l3_cache_size = 512 * extract_bits(edx, 31, 18); // This is reported in units of 512kB. 140 | } 141 | } else if (strstr(vendor, "Intel")) { 142 | if (max_cpuid >= 4) { 143 | cpuid(4, 0, &eax, &ebx, &ecx, &edx); 144 | 145 | // The value reported here is not accurate (I'm not sure if that's always the case). 146 | // On an i5-7300HQ it reports 8 logical cores with hyperthreading, even though that CPU 147 | // doesn't have hyperthreading.. Still this is a decent approximation at least. 148 | num_logical_cores = 1 + extract_bits(eax, 31, 26); 149 | num_physical_cores = num_logical_cores; 150 | if (supports_hyperthreading_in_theory) 151 | num_physical_cores /= 2; 152 | 153 | // Enumerate all caches to find out sizes. 154 | for (int index = 0;; ++index) { 155 | cpuid(4, index, &eax, &ebx, &ecx, &edx); 156 | int type = extract_bits(eax, 4, 0); // 0 - invalid, 1 - data cache, 2 - instruction cache, 3 - unified cache. 157 | if (type == 0) 158 | break; 159 | 160 | int level = extract_bits(eax, 7, 5); 161 | int ways = 1 + extract_bits(ebx, 31, 22); 162 | int partitions = 1 + extract_bits(ebx, 21, 12); 163 | int line_size = 1 + extract_bits(ebx, 11, 0); 164 | int sets = 1 + extract_bits(ecx, 31, 0); 165 | int cache_size = ways * partitions * line_size * sets / 1024; 166 | 167 | if (level == 1) { 168 | if (type == 1) 169 | l1d_cache_size = cache_size; 170 | else if (type == 2) 171 | l1i_cache_size = cache_size; 172 | else if (type == 3) { 173 | // For unified L1 caches, set instruction cache size to 0 and set data cache size to the actual cache size. 174 | l1i_cache_size = 0; 175 | l1d_cache_size = cache_size; 176 | } 177 | } 178 | else if (level == 2) 179 | l2_cache_size = cache_size; 180 | else if (level == 3) 181 | l3_cache_size = cache_size; 182 | } 183 | } 184 | 185 | if (max_cpuid >= 0xB) { 186 | // This is a much better way of checking the number of cores than with cpuid(4) above. 187 | // At least this one is accurate on a i5-7300HQ and i7-8550U. 188 | cpuid(0xB, 0, &eax, &ebx, &ecx, &edx); 189 | int num_logical_processors_per_physical_core = extract_bits(ebx, 15, 0); 190 | cpuid(0xB, 1, &eax, &ebx, &ecx, &edx); 191 | num_logical_cores = extract_bits(ebx, 15, 0); 192 | num_physical_cores = num_logical_cores / num_logical_processors_per_physical_core; 193 | } 194 | } 195 | printf("Logical cores: %d.\n", num_logical_cores); 196 | printf("Physical cores: %d.\n", num_physical_cores); 197 | printf("L1i cache size: %d kB.\n", l1i_cache_size); 198 | printf("L1d cache size: %d kB.\n", l1d_cache_size); 199 | printf("L2 cahce size: %d kB.\n", l2_cache_size); 200 | printf("L3 cahce size: %d kB.\n", l3_cache_size); 201 | 202 | printf("Feature flags: "); 203 | cpuid(1, 0, &eax, &ebx, &ecx, &edx); 204 | if (extract_bit(edx, 8)) printf("cx8 "); 205 | if (extract_bit(ecx, 13)) printf("cx16 "); 206 | if (extract_bit(edx, 4)) printf("tsc "); 207 | if (extract_bit(edx, 15)) printf("cmov "); 208 | if (extract_bit(edx, 23)) printf("mmx "); 209 | if (extract_bit(edx, 25)) printf("sse "); 210 | if (extract_bit(edx, 26)) printf("sse2 "); 211 | if (extract_bit(ecx, 0)) printf("sse3 "); 212 | if (extract_bit(ecx, 9)) printf("ssse3 "); 213 | if (extract_bit(ecx, 19)) printf("sse41 "); 214 | if (extract_bit(ecx, 20)) printf("sse42 "); 215 | if (extract_bit(ecx, 28)) printf("avx "); 216 | if (extract_bit(ecx, 12)) printf("fma "); 217 | if (extract_bit(ecx, 29)) printf("f16c "); 218 | if (extract_bit(ecx, 1)) printf("pclmulqdq "); 219 | if (extract_bit(ecx, 22)) printf("movbe "); 220 | if (extract_bit(ecx, 23)) printf("popcnt "); 221 | if (extract_bit(ecx, 25)) printf("aes "); 222 | if (extract_bit(ecx, 30)) printf("rdrnd "); 223 | 224 | eax = ebx = ecx = edx = 0; 225 | if (max_cpuid >= 7) 226 | cpuid(7, 0, &eax, &ebx, &ecx, &edx); 227 | int max_cpuid_7 = eax; 228 | 229 | if (extract_bit(ebx, 5)) printf("avx2 "); 230 | if (extract_bit(ebx, 16)) printf("avx512_f "); 231 | if (extract_bit(ebx, 17)) printf("avx512_dq "); 232 | if (extract_bit(ebx, 21)) printf("avx512_ifma "); 233 | if (extract_bit(ebx, 26)) printf("avx512_pf "); 234 | if (extract_bit(ebx, 27)) printf("avx512_er "); 235 | if (extract_bit(ebx, 28)) printf("avx512_cd "); 236 | if (extract_bit(ebx, 30)) printf("avx512_bw "); 237 | if (extract_bit(ebx, 31)) printf("avx512_vl "); 238 | if (extract_bit(ecx, 1)) printf("avx512_vbmi "); 239 | if (extract_bit(ecx, 6)) printf("avx512_vbmi2 "); 240 | if (extract_bit(ecx, 11)) printf("avx512_vnni "); 241 | if (extract_bit(ecx, 12)) printf("avx512_bitalg "); 242 | if (extract_bit(ecx, 14)) printf("avx512_vpopcntdq "); 243 | if (extract_bit(edx, 2)) printf("avx512_4vnniw "); 244 | if (extract_bit(edx, 3)) printf("avx512_4fmaps "); 245 | if (extract_bit(edx, 8)) printf("avx512_vp2intersect "); 246 | if (extract_bit(edx, 23)) printf("avx512_fp16 "); 247 | if (extract_bit(ebx, 3)) printf("bmi1 "); 248 | if (extract_bit(ebx, 8)) printf("bmi2 "); 249 | if (extract_bit(ebx, 29)) printf("sha "); 250 | if (extract_bit(ebx, 18)) printf("rdseed "); 251 | } -------------------------------------------------------------------------------- /x86_rdtsc_seconds.c: -------------------------------------------------------------------------------- 1 | // Source: https://gist.github.com/Mic92/12063527bb6d6c5a636502300d2de446 2 | 3 | #ifdef _MSC_VER 4 | # include 5 | void cpuid(int leaf, int subleaf, int *eax, int *ebx, int *ecx, int *edx) { 6 | int registers[4]; 7 | if (subleaf == 0) 8 | __cpuid(registers, leaf); 9 | else 10 | __cpuidex(registers, leaf, subleaf); 11 | *eax = registers[0]; 12 | *ebx = registers[1]; 13 | *ecx = registers[2]; 14 | *edx = registers[3]; 15 | } 16 | int cpuid_is_supported(void) { 17 | // Try to set and clear bit 21 in the flags register. This indicates support for the CPUID instruction. 18 | // Bail out immediately if it's not supported. 19 | unsigned bit21 = 1u << 21; 20 | 21 | __writeeflags(__readeflags() | bit21); 22 | if ((__readeflags() & bit21) == 0) 23 | return 0; 24 | 25 | __writeeflags(__readeflags() & ~bit21); 26 | if ((__readeflags() & bit21) == 1) 27 | return 0; 28 | 29 | return 1; 30 | } 31 | unsigned long long rdtsc(void) { 32 | return __rdtsc(); 33 | } 34 | #else 35 | # include 36 | void cpuid(int leaf, int subleaf, int *eax, int *ebx, int *ecx, int *edx) { 37 | __cpuid_count(leaf, subleaf, *eax, *ebx, *ecx, *edx); 38 | } 39 | int cpuid_is_supported(void) { 40 | return __get_cpuid_max(0, 0) != 0; 41 | } 42 | unsigned long long rdtsc(void) { 43 | unsigned lo, hi; 44 | asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); // RDTSC copies contents of 64-bit TSC into EDX:EAX 45 | return lo | ((unsigned long long)hi << 32); 46 | } 47 | #endif 48 | 49 | unsigned long long get_tsc_increments_per_second() { 50 | if (!cpuid_is_supported()) 51 | return 0; 52 | 53 | // extracted from https://github.com/torvalds/linux/blob/b95fffb9b4afa8b9aa4a389ec7a0c578811eaf42/tools/power/x86/turbostat/turbostat.c 54 | int eax_crystal = 0; 55 | int ebx_tsc = 0; 56 | int crystal_hz = 0; 57 | int edx = 0; 58 | cpuid(0x15, 0, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); 59 | if (!ebx_tsc) // This will not work on old Intel processors, or any AMD processor. You really need a fallback.. 60 | return 0; 61 | 62 | int fms, family, model, ebx, ecx; 63 | cpuid(1, 0, &fms, &ebx, &ecx, &edx); 64 | family = (fms >> 8) & 0xf; 65 | model = (fms >> 4) & 0xf; 66 | if (family == 0xf) 67 | family += (fms >> 20) & 0xff; 68 | if (family >= 6) 69 | model += ((fms >> 16) & 0xf) << 4; 70 | 71 | enum { 72 | INTEL_FAM6_SKYLAKE_L = 0x4E, 73 | INTEL_FAM6_SKYLAKE = 0x5E, 74 | INTEL_FAM6_KABYLAKE_L = 0x8E, 75 | INTEL_FAM6_KABYLAKE = 0x9E, 76 | INTEL_FAM6_COMETLAKE = 0xA5, 77 | INTEL_FAM6_COMETLAKE_L = 0xA6, 78 | INTEL_FAM6_ATOM_GOLDMONT = 0x5C, 79 | INTEL_FAM6_ATOM_GOLDMONT_D = 0x5F, 80 | INTEL_FAM6_ATOM_GOLDMONT_PLUS = 0x7A, 81 | INTEL_FAM6_ATOM_TREMONT_D = 0x86, 82 | }; 83 | 84 | if (!crystal_hz) { 85 | switch(model) { 86 | case INTEL_FAM6_SKYLAKE_L: 87 | case INTEL_FAM6_SKYLAKE: 88 | case INTEL_FAM6_KABYLAKE_L: 89 | case INTEL_FAM6_KABYLAKE: 90 | case INTEL_FAM6_COMETLAKE_L: 91 | case INTEL_FAM6_COMETLAKE: 92 | crystal_hz = 24000000; 93 | break; 94 | case INTEL_FAM6_ATOM_GOLDMONT_D: 95 | case INTEL_FAM6_ATOM_TREMONT_D: 96 | crystal_hz = 25000000; 97 | break; 98 | case INTEL_FAM6_ATOM_GOLDMONT: 99 | case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 100 | crystal_hz = 19200000; 101 | break; 102 | } 103 | } 104 | 105 | return (unsigned long long)crystal_hz * ebx_tsc / eax_crystal; 106 | } 107 | 108 | #include 109 | #include 110 | int main(void) { 111 | unsigned long long tsc_hz = get_tsc_increments_per_second(); 112 | if (!tsc_hz) { 113 | printf("Couldn't get TSC frequency on this CPU.\n"); 114 | return 0; 115 | } 116 | 117 | double tsc_to_seconds = 1.0 / tsc_hz; 118 | struct timespec ts; 119 | timespec_get(&ts, TIME_UTC); 120 | unsigned long long ts0 = (unsigned long long)ts.tv_sec * 1000000000 + ts.tv_nsec; 121 | unsigned long long tsc0 = rdtsc(); 122 | for (;;) { 123 | timespec_get(&ts, TIME_UTC); 124 | unsigned long long ts1 = (unsigned long long)ts.tv_sec * 1000000000 + ts.tv_nsec; 125 | unsigned long long tsc1 = rdtsc(); 126 | double tsdt = (ts1 - ts0) * 1e-9; 127 | double tscdt = (tsc1 - tsc0) * tsc_to_seconds; 128 | printf("TS %.9f - TSC %.9f\n", tsdt, tscdt); 129 | } 130 | } --------------------------------------------------------------------------------