├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── alloc.c ├── arena.h ├── bump.c ├── bump.h ├── chunk.c ├── chunk.h ├── extent.c ├── extent.h ├── huge.c ├── huge.h ├── memory.c ├── memory.h ├── mutex.c ├── mutex.h ├── purge.c ├── purge.h ├── rb.h ├── test_huge.c ├── test_large.c ├── test_small.c └── util.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2015 Daniel Micay . All rights reserved. 2 | Copyright (C) 2002-2014 Jason Evans . 3 | All rights reserved. 4 | Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. 5 | Copyright (C) 2009-2014 Facebook, Inc. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 1. Redistributions of source code must retain the above copyright notice(s), 10 | this list of conditions and the following disclaimer. 11 | 2. Redistributions in binary form must reproduce the above copyright notice(s), 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS 16 | OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 17 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 18 | EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 19 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 22 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 23 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 24 | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS = -std=c11 -fPIC -D_GNU_SOURCE -fvisibility=hidden -Wall -Wextra 2 | LDFLAGS = -Wl,--as-needed 3 | LDLIBS = -lpthread 4 | OBJECTS = alloc.o bump.o chunk.o extent.o huge.o memory.o mutex.o purge.o 5 | BINARIES = alloc.so test_small test_large test_huge 6 | 7 | DEBUG ?= 0 8 | ifeq ($(DEBUG), 1) 9 | CFLAGS += -Og -g 10 | else 11 | CFLAGS += -flto -O2 -DNDEBUG 12 | LDFLAGS += -flto -O2 13 | endif 14 | 15 | all: $(BINARIES) 16 | 17 | alloc.so: $(OBJECTS) 18 | $(CC) $(CFLAGS) $(LDFLAGS) -shared $^ $(LDLIBS) -o $@ 19 | 20 | test_small: test_small.c $(OBJECTS) 21 | test_large: test_large.c $(OBJECTS) 22 | test_huge: test_huge.c $(OBJECTS) 23 | 24 | alloc.o: alloc.c arena.h bump.h chunk.h chunk.h huge.h memory.h mutex.h purge.h util.h 25 | bump.o: bump.c bump.h chunk.h memory.h mutex.h 26 | chunk.o: chunk.c chunk.h extent.h memory.h mutex.h 27 | extent.o: extent.c bump.h extent.h mutex.h 28 | huge.o: huge.c arena.h chunk.h huge.h memory.h purge.h mutex.h util.h 29 | memory.o: memory.c memory.h 30 | mutex.o: mutex.c mutex.h util.h 31 | purge.o: purge.c purge.h 32 | 33 | clean: 34 | rm -f $(OBJECTS) $(BINARIES) 35 | 36 | .PHONY: all clean 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Current implementation 2 | 3 | ### Low-level memory management 4 | 5 | Chunks are the fundamental building block for all memory allocations. In the 6 | current implementation, the chunks are 4MiB blocks with 4MiB alignment. 7 | 8 | Rather than unmapping chunks, extents of free chunks are managed in userspace 9 | to reduce fragmentation, system call overhead and synchronization. 10 | 11 | Extents of free chunks are managed via address-ordered best-fit, which greatly 12 | reduces overall fragmentation and provides logarithmic time complexity for 13 | every case where the peak virtual memory doesn't need to be increased. A node 14 | is used to represent each extent, with one intrusive tree for keyed by (size, 15 | address) for allocation and another keyed by address for coalescing. 16 | 17 | If there is no address space resource limit, a large portion of the address 18 | space is reserved up-front for a stronger time complexity guarantee and more 19 | compaction from the address ordering. The reserved memory is partitioned 20 | between each core for parallel chunk allocation, falling back to the global 21 | data structure only when it runs out. On 64-bit, this means that there are no 22 | global resources preventing linear scaling as the reserved mapping is enormous. 23 | 24 | The system calls for managing mappings (mmap, mprotect, munmap) require taking 25 | the global mmap_sem lock as writers, while page faults and madvise purging use 26 | concurrent reader access. Doing the work in userspace avoids getting in the way 27 | of page faults and is significantly cheaper. 28 | 29 | The Linux kernel also lacks an ordering by size, so it has to use an ugly 30 | heuristic for allocation rather than best-fit. It allocates below the lowest 31 | mapping so far if there and room and then falls back to an O(n) scan. This 32 | leaves behind gaps when anything but the lowest mapping is freed, increasing 33 | the rate of TLB misses. 34 | 35 | Natural alignment for chunks provides the ability to distinguish between 36 | allocations smaller and larger than the chunk size from the addresses, which 37 | leads to the ability to find metadata in O(1) time. Note that it is currently 38 | O(log n) for huge allocations, but it doesn't have to be. As long as the chunk 39 | size remains a multiple of the transparent huge page size (2MiB), there is also 40 | the benefit of huge pages being able to back every allocation. 41 | 42 | ### Decommit / purging 43 | 44 | When overcommit is enabled, memory is released back to the operating system 45 | with MADV_FREE, or MADV_DONTNEED if the superior lazy MADV_FREE is unavailable. 46 | 47 | When overcommit is disabled, commit charge is dropped by setting PROT_NONE on 48 | the mappings in addition to purging. 49 | 50 | The ability to opt-in to lightweight purging even without overcommit enabled or 51 | to disable purging completely will be exposed in the future. 52 | 53 | On Windows, the usage of PROT_NONE maps directly to MEM_COMMIT and MEM_DECOMMIT 54 | while MADV_FREE is the same as MEM_RESET. 55 | 56 | Purging is currently only implemented at a chunk level and does not perform the 57 | work lazily (beyond MADV_FREE lazily dropping pages). The intention is to track 58 | dirty prefixes in free spans of memory, with lazy purging in FIFO order. The 59 | same purging strategy can be used for small, large and chunk allocation. 60 | 61 | There will be a minimum permitted amount of dirty memory per-arena before 62 | purging is used along with a ratio of active:dirty pages. 63 | 64 | Coalescing a freed span with a free span succeeding it will be painless, while 65 | coalescing with a preceding span that is not entirely dirty will need to use a 66 | heuristic to choose between considering the whole span dirty or purging the new 67 | space and leaving it clean. The address-ordered best-fit algorithm plays well 68 | with dirty prefixes because spans with lower addresses are preferred. Using 69 | first-fit would likely synergize even more, but at the expense of increasing 70 | fragmentation which is what this design tries to avoid in the first place. 71 | 72 | The alternative would be segregating clean and dirty memory entirely, but this 73 | would create a new form of fragmentation. It may be tested in the future, but 74 | it is expected that the chosen design will be faster overall without the need 75 | to pay the cost of fragmenting the memory. Tracking the dirty spans could be 76 | done precisely without segregating the memory, but it would be more complex and 77 | more time would be spent managing metadata. 78 | 79 | ### Rest of the implementation 80 | 81 | - major allocation classes: 82 | - huge: spans of chunks, always chunk-aligned 83 | - small/large: managed within chunks, never chunk-aligned 84 | 85 | - arenas: 86 | - assign chunks to per-core arenas 87 | - pick a preferred arena with sched_getcpu and update it on contention 88 | - separate chunks for small/large, distinguished via chunk header flag 89 | - per-arena cache of the most recently freed chunk(s) 90 | 91 | - large allocations: 92 | - allocation headers for freeing allocations and coalescing: 93 | - find the next span with `addr + size` for forward coalescing 94 | - maintain a pointer to the previous span for backward coalescing 95 | - spans must be a multiple of the header size, which is 4x pointer-size 96 | - headers act as spacers and prevent false sharing with 64-bit pointers 97 | and 64 byte cachelines 98 | - intrusive tree keyed by (size, addr) for address-ordered best-fit 99 | - the span headers are the tree nodes 100 | - chunks are released when a free span covers the entire usable area 101 | 102 | - small allocations: 103 | - per-arena slab LIFO free lists: 104 | - empty slabs 105 | - partially filled slabs: doubly-linked list per size class 106 | - empty slabs are returned to the empty slab list 107 | - per-slab LIFO free list 108 | - per-thread LIFO free list 109 | 110 | ## Future improvements 111 | 112 | See the issue tracker. 113 | -------------------------------------------------------------------------------- /alloc.c: -------------------------------------------------------------------------------- 1 | #define RB_COMPACT 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "arena.h" 19 | #include "bump.h" 20 | #include "chunk.h" 21 | #include "huge.h" 22 | #include "memory.h" 23 | #include "mutex.h" 24 | #include "purge.h" 25 | #include "util.h" 26 | #include "rb.h" 27 | 28 | #ifndef thread_local 29 | #define thread_local _Thread_local 30 | #endif 31 | 32 | #define LARGE_ALIGN (sizeof(struct large)) 33 | #define LARGE_MASK (sizeof(struct large) - 1) 34 | #define MIN_ALIGN 16 35 | #define SLAB_SIZE (64 * 1024) 36 | #define CACHE_SIZE (16 * 1024) 37 | #define MAX_SMALL 512 38 | #define LARGE_CHUNK_HEADER ((sizeof(struct chunk) + LARGE_MASK) & ~LARGE_MASK) 39 | #define MAX_LARGE (CHUNK_SIZE - (LARGE_CHUNK_HEADER + sizeof(struct large))) 40 | 41 | #if INTPTR_MAX == INT32_MAX 42 | #define INITIAL_VA ((size_t)256 * 1024 * 1024) 43 | #else 44 | #define INITIAL_VA ((size_t)1024 * 1024 * 1024 * 1024) 45 | #endif 46 | 47 | static_assert(INITIAL_VA % CHUNK_SIZE == 0, "INITIAL_VA not a multiple of CHUNK_SIZE"); 48 | 49 | static int large_addr_comp(struct large *a, struct large *b) { 50 | uintptr_t a_addr = (uintptr_t)a; 51 | uintptr_t b_addr = (uintptr_t)b; 52 | return (a_addr > b_addr) - (a_addr < b_addr); 53 | } 54 | 55 | static int large_size_addr_comp(struct large *a, struct large *b) { 56 | size_t a_size = a->size; 57 | size_t b_size = b->size; 58 | 59 | int ret = (a_size > b_size) - (a_size < b_size); 60 | if (ret) { 61 | return ret; 62 | } 63 | 64 | return large_addr_comp(a, b); 65 | } 66 | 67 | rb_gen(, large_tree_size_addr_, large_tree, struct large, link_size_addr, large_size_addr_comp) 68 | 69 | static bool init_failed = false; 70 | static atomic_bool initialized = ATOMIC_VAR_INIT(false); 71 | static mutex init_mutex = MUTEX_INITIALIZER; 72 | 73 | static struct arena *arenas; 74 | static int n_arenas = 0; 75 | 76 | static void *reserved_start; 77 | static void *reserved_end; 78 | static size_t arena_initial_va_log2; 79 | 80 | static pthread_key_t tcache_key; 81 | 82 | __attribute__((tls_model("initial-exec"))) 83 | static thread_local struct thread_cache tcache = {{NULL}, {0}, -1, true}; 84 | 85 | struct arena *get_huge_arena(void *ptr) { 86 | if (ptr >= reserved_start && ptr < reserved_end) { 87 | size_t diff = (char *)ptr - (char *)reserved_start; 88 | return arenas + (diff >> arena_initial_va_log2); 89 | } 90 | return NULL; 91 | } 92 | 93 | static inline struct slab *to_slab(void *ptr) { 94 | return ALIGNMENT_ADDR2BASE(ptr, SLAB_SIZE); 95 | } 96 | 97 | static void slab_deallocate(struct arena *arena, struct slab *slab, struct slot *ptr, size_t bin); 98 | 99 | static void tcache_destroy(void *key) { 100 | struct thread_cache *cache = key; 101 | for (int a = 0; a < n_arenas; a++) { 102 | struct arena *arena = &arenas[a]; 103 | bool locked = false; 104 | for (size_t bin = 0; bin < N_CLASS; bin++) { 105 | struct slot **last_next = &cache->bin[bin]; 106 | struct slot *slot = cache->bin[bin]; 107 | 108 | while (slot) { 109 | struct slot *next = slot->next; 110 | struct chunk *chunk = CHUNK_ADDR2BASE(slot); 111 | assert(chunk->small); 112 | if (chunk->arena == a) { 113 | if (!locked) { 114 | mutex_lock(&arena->mutex); 115 | locked = true; 116 | } 117 | slab_deallocate(arena, to_slab(slot), slot, bin); 118 | *last_next = next; 119 | } else { 120 | last_next = &slot->next; 121 | } 122 | slot = next; 123 | } 124 | } 125 | if (locked) { 126 | mutex_unlock(&arena->mutex); 127 | } 128 | } 129 | cache->dead = true; 130 | } 131 | 132 | static void pick_arena(struct thread_cache *cache) { 133 | cache->arena_index = sched_getcpu(); 134 | if (unlikely(cache->arena_index == -1 || cache->arena_index > n_arenas)) { 135 | cache->arena_index = 0; 136 | } 137 | } 138 | 139 | static void thread_init(struct thread_cache *cache) { 140 | pick_arena(cache); 141 | if (likely(!pthread_setspecific(tcache_key, cache))) { 142 | cache->dead = false; 143 | } 144 | } 145 | 146 | static bool malloc_init_slow(struct thread_cache *cache) { 147 | if (likely(atomic_load_explicit(&initialized, memory_order_consume))) { 148 | thread_init(cache); 149 | return false; 150 | } 151 | 152 | mutex_lock(&init_mutex); 153 | 154 | if (atomic_load_explicit(&initialized, memory_order_consume)) { 155 | mutex_unlock(&init_mutex); 156 | thread_init(cache); 157 | return false; 158 | } 159 | 160 | if (unlikely(init_failed)) { 161 | return true; 162 | } 163 | 164 | n_arenas = get_nprocs(); 165 | arenas = bump_alloc(sizeof(struct arena) * n_arenas, alignof(struct arena)); 166 | if (!arenas) { 167 | init_failed = true; 168 | mutex_unlock(&init_mutex); 169 | return true; 170 | } 171 | 172 | if (pthread_key_create(&tcache_key, tcache_destroy)) { 173 | init_failed = true; 174 | mutex_unlock(&init_mutex); 175 | return true; 176 | } 177 | 178 | memory_init(); 179 | chunk_init(); 180 | huge_init(); 181 | purge_init(); 182 | 183 | struct rlimit limit; 184 | void *reserved = NULL; 185 | arena_initial_va_log2 = size_log2(INITIAL_VA / n_arenas); 186 | size_t arena_initial_va = (size_t)1 << arena_initial_va_log2; 187 | size_t total_initial_va = arena_initial_va * n_arenas; 188 | if (arena_initial_va >= CHUNK_SIZE 189 | && !getrlimit(RLIMIT_AS, &limit) && limit.rlim_cur == RLIM_INFINITY) { 190 | reserved = memory_map_aligned(NULL, total_initial_va, CHUNK_SIZE, false); 191 | if (reserved) { 192 | reserved_start = reserved; 193 | reserved_end = (char *)reserved + total_initial_va; 194 | } 195 | } 196 | 197 | for (int i = 0; i < n_arenas; i++) { 198 | struct arena *arena = &arenas[i]; 199 | if (mutex_init(&arena->mutex)) { 200 | init_failed = true; 201 | mutex_unlock(&init_mutex); 202 | return true; 203 | } 204 | for (size_t bin = 0; bin < N_CLASS; bin++) { 205 | #ifndef NDEBUG 206 | arena->partial_slab[bin].prev = (struct slab *)0xdeadbeef; 207 | #endif 208 | arena->partial_slab[bin].next = &arena->partial_slab[bin]; 209 | } 210 | large_tree_size_addr_new(&arena->large_size_addr); 211 | extent_tree_ad_new(&arena->huge); 212 | 213 | chunk_recycler_init(&arena->chunks); 214 | if (reserved) { 215 | chunk_free(&arena->chunks, reserved, arena_initial_va); 216 | arena->chunks_start = reserved; 217 | reserved = arena->chunks_end = (char *)reserved + arena_initial_va; 218 | } 219 | } 220 | 221 | atomic_store_explicit(&initialized, true, memory_order_release); 222 | 223 | mutex_unlock(&init_mutex); 224 | thread_init(cache); 225 | return false; 226 | } 227 | 228 | static bool malloc_init(struct thread_cache *cache) { 229 | if (likely(cache->arena_index != -1)) { 230 | return false; 231 | } 232 | return malloc_init_slow(cache); 233 | } 234 | 235 | inline struct arena *get_arena(struct thread_cache *cache) { 236 | if (unlikely(mutex_trylock(&arenas[cache->arena_index].mutex))) { 237 | pick_arena(cache); 238 | mutex_lock(&arenas[cache->arena_index].mutex); 239 | } 240 | return &arenas[cache->arena_index]; 241 | } 242 | 243 | static void *arena_chunk_alloc(struct arena *arena) { 244 | if (arena->free_chunk) { 245 | struct chunk *chunk = arena->free_chunk; 246 | arena->free_chunk = NULL; 247 | return chunk; 248 | } 249 | void *chunk = chunk_recycle(&arena->chunks, NULL, CHUNK_SIZE, CHUNK_SIZE); 250 | if (chunk) { 251 | if (unlikely(memory_commit(chunk, CHUNK_SIZE))) { 252 | chunk_free(&arena->chunks, chunk, CHUNK_SIZE); 253 | return NULL; 254 | } 255 | } else { 256 | chunk = chunk_alloc(NULL, CHUNK_SIZE, CHUNK_SIZE); 257 | if (unlikely(!chunk)) { 258 | return NULL; 259 | } 260 | } 261 | ((struct chunk *)chunk)->arena = arena - arenas; 262 | return chunk; 263 | } 264 | 265 | static void arena_chunk_free(struct arena *arena, void *chunk) { 266 | if (arena->free_chunk) { 267 | if (purge_ratio >= 0) { 268 | memory_decommit(arena->free_chunk, CHUNK_SIZE); 269 | } 270 | if (chunk >= arena->chunks_start && chunk < arena->chunks_end) { 271 | chunk_free(&arena->chunks, arena->free_chunk, CHUNK_SIZE); 272 | } else { 273 | chunk_free(NULL, arena->free_chunk, CHUNK_SIZE); 274 | } 275 | } 276 | arena->free_chunk = chunk; 277 | } 278 | 279 | static void *slab_first_alloc(struct arena *arena, struct slab *slab, size_t size, size_t bin) { 280 | slab->prev = &arena->partial_slab[bin]; 281 | slab->size = size; 282 | slab->count = 1; 283 | void *first = (void *)ALIGNMENT_CEILING((uintptr_t)slab->data, MIN_ALIGN); 284 | slab->next_slot = (struct slot *)((char *)first + size); 285 | slab->next_slot->next = NULL; 286 | slab->end = (struct slot *)((char *)slab->next_slot + size); 287 | return first; 288 | } 289 | 290 | static void *slab_allocate(struct arena *arena, size_t size, size_t bin) { 291 | // check for the sentinel node terminating the list 292 | if (!arena->partial_slab[bin].next->next_slot) { 293 | if (arena->free_slab) { 294 | struct slab *slab = arena->free_slab; 295 | arena->free_slab = arena->free_slab->next; 296 | 297 | slab->next = arena->partial_slab[bin].next; 298 | arena->partial_slab[bin].next = slab; 299 | 300 | return slab_first_alloc(arena, slab, size, bin); 301 | } 302 | 303 | struct chunk *chunk = arena_chunk_alloc(arena); 304 | if (unlikely(!chunk)) { 305 | return NULL; 306 | } 307 | chunk->small = true; 308 | 309 | struct slab *slab = (struct slab *)ALIGNMENT_CEILING((uintptr_t)chunk->data, SLAB_SIZE); 310 | slab->next = arena->partial_slab[bin].next; 311 | arena->partial_slab[bin].next = slab; 312 | 313 | void *chunk_end = (char *)chunk + CHUNK_SIZE; 314 | while ((uintptr_t)slab + SLAB_SIZE < (uintptr_t)chunk_end) { 315 | slab = (struct slab *)((char *)slab + SLAB_SIZE); 316 | slab->next = arena->free_slab; 317 | arena->free_slab = slab; 318 | } 319 | 320 | return slab_first_alloc(arena, arena->partial_slab[bin].next, size, bin); 321 | } 322 | 323 | struct slab *slab = arena->partial_slab[bin].next; 324 | struct slot *slot = slab->next_slot; 325 | slab->next_slot = slot->next; 326 | slab->count++; 327 | if (!slab->next_slot) { 328 | uintptr_t new_end = (uintptr_t)slab->end + size; 329 | if (new_end > (uintptr_t)slab + SLAB_SIZE) { 330 | struct slab *next = slab->next; 331 | next->prev = &arena->partial_slab[bin]; 332 | arena->partial_slab[bin].next = next; 333 | } else { 334 | slab->next_slot = slab->end; 335 | slab->next_slot->next = NULL; 336 | slab->end = (struct slot *)new_end; 337 | } 338 | } 339 | 340 | return slot; 341 | } 342 | 343 | static size_t size2bin(size_t size) { 344 | return (size >> 4) - 1; 345 | } 346 | 347 | static void slab_deallocate(struct arena *arena, struct slab *slab, struct slot *slot, size_t bin) { 348 | slot->next = slab->next_slot; 349 | slab->next_slot = slot; 350 | slab->count--; 351 | 352 | if (!slot->next) { 353 | struct slab *next = arena->partial_slab[bin].next; 354 | slab->next = next; 355 | slab->prev = &arena->partial_slab[bin]; 356 | next->prev = slab; 357 | arena->partial_slab[bin].next = slab; 358 | } else if (!slab->count) { 359 | slab->prev->next = slab->next; 360 | slab->next->prev = slab->prev; 361 | 362 | slab->next = arena->free_slab; 363 | arena->free_slab = slab; 364 | } 365 | } 366 | 367 | static inline void *allocate_small(struct thread_cache *cache, size_t size) { 368 | size_t bin = size2bin(size); 369 | 370 | if (unlikely(cache->dead)) { 371 | if (cache->arena_index == -1 && unlikely(malloc_init(cache))) { 372 | return NULL; 373 | } 374 | if (unlikely(cache->dead)) { 375 | struct arena *arena = get_arena(cache); 376 | void *ptr = slab_allocate(arena, size, bin); 377 | mutex_unlock(&arena->mutex); 378 | return ptr; 379 | } 380 | } 381 | 382 | struct slot *slot = cache->bin[bin]; 383 | if (likely(slot)) { 384 | cache->bin[bin] = slot->next; 385 | cache->bin_size[bin] -= size; 386 | return slot; 387 | } 388 | 389 | struct arena *arena = get_arena(cache); 390 | void *ptr = slab_allocate(arena, size, bin); 391 | 392 | while (cache->bin_size[bin] + size < CACHE_SIZE / 2) { 393 | struct slot *slot = slab_allocate(arena, size, bin); 394 | if (!slot) { 395 | mutex_unlock(&arena->mutex); 396 | return ptr; 397 | } 398 | slot->next = cache->bin[bin]; 399 | cache->bin[bin] = slot; 400 | cache->bin_size[bin] += size; 401 | } 402 | 403 | mutex_unlock(&arena->mutex); 404 | return ptr; 405 | } 406 | 407 | static const struct large *const used_sentinel = (void *)0x1; 408 | 409 | static bool is_used(const struct large *large) { 410 | return large->link_size_addr.rbn_left == used_sentinel; 411 | } 412 | 413 | static void mark_used(struct large *large) { 414 | large->link_size_addr.rbn_left = (struct large *)used_sentinel; 415 | } 416 | 417 | static struct large *to_head(void *ptr) { 418 | return (struct large *)((char *)ptr - sizeof(struct large)); 419 | } 420 | 421 | static void update_next_span(void *ptr, size_t size) { 422 | struct large *next = (struct large *)((char *)ptr + size); 423 | if (next <= to_head((void *)CHUNK_CEILING((uintptr_t)next))) { 424 | next->prev = ptr; 425 | } 426 | } 427 | 428 | static void large_free(struct arena *arena, void *span, size_t size) { 429 | struct large *self = span; 430 | self->size = size; 431 | 432 | struct large *next = (void *)((char *)span + size); 433 | 434 | // Try to coalesce forward. 435 | if (next <= to_head((void *)CHUNK_CEILING((uintptr_t)next)) && !is_used(next)) { 436 | // Coalesce span with the following address range. 437 | large_tree_size_addr_remove(&arena->large_size_addr, next); 438 | self->size += next->size; 439 | } 440 | 441 | // Try to coalesce backward. 442 | struct large *prev = ((struct large *)span)->prev; 443 | if (prev && !is_used(prev)) { 444 | // Coalesce span with the previous address range. 445 | assert((char *)prev + prev->size == (char *)self); 446 | large_tree_size_addr_remove(&arena->large_size_addr, prev); 447 | size_t new_size = self->size + prev->size; 448 | self = prev; 449 | self->size = new_size; 450 | } 451 | 452 | if (self->size == CHUNK_SIZE - LARGE_CHUNK_HEADER) { 453 | arena_chunk_free(arena, (struct chunk *)((char *)self - LARGE_CHUNK_HEADER)); 454 | } else { 455 | large_tree_size_addr_insert(&arena->large_size_addr, self); 456 | update_next_span(self, self->size); 457 | } 458 | } 459 | 460 | static struct large *large_recycle(struct arena *arena, size_t size, size_t alignment) { 461 | size_t full_size = size + sizeof(struct large); 462 | size_t alloc_size = full_size + alignment - LARGE_ALIGN; 463 | assert(alloc_size >= full_size); 464 | struct large key; 465 | key.size = alloc_size; 466 | struct large *span = large_tree_size_addr_nsearch(&arena->large_size_addr, &key); 467 | if (!span) { 468 | return NULL; 469 | } 470 | 471 | void *data = (void *)ALIGNMENT_CEILING((uintptr_t)span + sizeof(struct large), alignment); 472 | struct large *head = to_head(data); 473 | 474 | size_t leadsize = (char *)head - (char *)span; 475 | assert(span->size >= leadsize + full_size); 476 | size_t trailsize = span->size - leadsize - full_size; 477 | 478 | // Remove free span from the tree. 479 | large_tree_size_addr_remove(&arena->large_size_addr, span); 480 | if (leadsize) { 481 | // Insert the leading space as a smaller span. 482 | span->size = leadsize; 483 | large_tree_size_addr_insert(&arena->large_size_addr, span); 484 | update_next_span(span, span->size); 485 | } 486 | if (trailsize) { 487 | // Insert the trailing space as a smaller span. 488 | struct large *trail = (struct large *)((char *)head + full_size); 489 | trail->size = trailsize; 490 | large_tree_size_addr_insert(&arena->large_size_addr, trail); 491 | update_next_span(trail, trail->size); 492 | } 493 | 494 | update_next_span(head, full_size); 495 | head->size = size; 496 | mark_used(head); 497 | return head; 498 | } 499 | 500 | static void *allocate_large(struct thread_cache *cache, size_t size, size_t alignment) { 501 | assert(alignment >= LARGE_ALIGN); 502 | 503 | struct arena *arena = get_arena(cache); 504 | 505 | struct large *head = large_recycle(arena, size, alignment); 506 | if (head) { 507 | mutex_unlock(&arena->mutex); 508 | return head->data; 509 | } 510 | 511 | struct chunk *chunk = arena_chunk_alloc(arena); 512 | if (unlikely(!chunk)) { 513 | mutex_unlock(&arena->mutex); 514 | return NULL; 515 | } 516 | chunk->small = false; 517 | 518 | void *base = (char *)chunk + LARGE_CHUNK_HEADER; 519 | void *data = (void *)ALIGNMENT_CEILING((uintptr_t)base + sizeof(struct large), alignment); 520 | head = to_head(data); 521 | head->size = size; 522 | head->prev = NULL; 523 | 524 | update_next_span(head, size + sizeof(struct large)); 525 | mark_used(head); 526 | 527 | if (head != base) { 528 | assert(alignment > MIN_ALIGN); 529 | size_t lead = (char *)head - (char *)base; 530 | head = (struct large *)((char *)base); 531 | head->size = lead; 532 | head->prev = NULL; 533 | large_free(arena, base, lead); 534 | } 535 | 536 | void *end = (char *)head->data + size; 537 | void *chunk_end = (char *)chunk + CHUNK_SIZE; 538 | if (end != chunk_end) { 539 | large_free(arena, end, (char *)chunk_end - (char *)end); 540 | } 541 | 542 | mutex_unlock(&arena->mutex); 543 | 544 | return head->data; 545 | } 546 | 547 | static bool large_expand_recycle(struct arena *arena, void *new_addr, size_t size) { 548 | assert(new_addr); 549 | assert(ALIGNMENT_ADDR2BASE(new_addr, MIN_ALIGN) == new_addr); 550 | 551 | if (new_addr > (void *)to_head((void *)CHUNK_CEILING((uintptr_t)new_addr))) { 552 | return true; 553 | } 554 | 555 | struct large *next = new_addr; 556 | if (is_used(next) || next->size < size) { 557 | return true; 558 | } 559 | 560 | // Remove node from the tree. 561 | large_tree_size_addr_remove(&arena->large_size_addr, next); 562 | 563 | size_t trailsize = next->size - size; 564 | if (trailsize) { 565 | // Insert the trailing space as a smaller span. 566 | struct large *trail = (struct large *)((char *)next + size); 567 | trail->size = trailsize; 568 | large_tree_size_addr_insert(&arena->large_size_addr, trail); 569 | update_next_span(trail, trail->size); 570 | } 571 | 572 | return false; 573 | } 574 | 575 | static bool large_realloc_no_move(void *ptr, size_t old_size, size_t new_size) { 576 | struct chunk *chunk = CHUNK_ADDR2BASE(ptr); 577 | assert(!chunk->small); 578 | struct arena *arena = &arenas[chunk->arena]; 579 | struct large *head = to_head(ptr); 580 | 581 | if (old_size < new_size) { 582 | void *expand_addr = (char *)ptr + old_size; 583 | size_t expand_size = new_size - old_size; 584 | 585 | mutex_lock(&arena->mutex); 586 | if (large_expand_recycle(arena, expand_addr, expand_size)) { 587 | mutex_unlock(&arena->mutex); 588 | return true; 589 | } 590 | head->size = new_size; 591 | update_next_span(head, new_size + sizeof(struct large)); 592 | mutex_unlock(&arena->mutex); 593 | } else if (new_size < old_size) { 594 | void *excess_addr = (char *)ptr + new_size; 595 | size_t excess_size = old_size - new_size; 596 | 597 | mutex_lock(&arena->mutex); 598 | update_next_span(head, new_size + sizeof(struct large)); 599 | head->size = new_size; 600 | large_free(arena, excess_addr, excess_size); 601 | mutex_unlock(&arena->mutex); 602 | } 603 | 604 | return false; 605 | } 606 | 607 | static inline void *allocate(struct thread_cache *cache, size_t size) { 608 | if (size <= MAX_SMALL) { 609 | size_t non_zero_size = size | (!size); 610 | size_t real_size = (non_zero_size + 15) & ~15; 611 | return allocate_small(cache, real_size); 612 | } 613 | 614 | if (unlikely(malloc_init(cache))) { 615 | return NULL; 616 | } 617 | 618 | if (size <= MAX_LARGE) { 619 | size_t real_size = (size + LARGE_MASK) & ~LARGE_MASK; 620 | return allocate_large(cache, real_size, LARGE_ALIGN); 621 | } 622 | return huge_alloc(cache, size, CHUNK_SIZE); 623 | } 624 | 625 | static inline void deallocate_small(struct thread_cache *cache, void *ptr) { 626 | struct slot *slot = ptr; 627 | struct slab *slab = to_slab(slot); 628 | size_t size = slab->size; 629 | size_t bin = size2bin(size); 630 | 631 | if (unlikely(cache->dead)) { 632 | if (cache->arena_index == -1) { 633 | thread_init(cache); 634 | } 635 | if (unlikely(cache->dead)) { 636 | struct chunk *chunk = CHUNK_ADDR2BASE(slot); 637 | struct arena *arena = &arenas[chunk->arena]; 638 | mutex_lock(&arena->mutex); 639 | slab_deallocate(arena, slab, slot, bin); 640 | mutex_unlock(&arena->mutex); 641 | return; 642 | } 643 | } 644 | 645 | slot->next = cache->bin[bin]; 646 | cache->bin[bin] = slot; 647 | cache->bin_size[bin] += size; 648 | 649 | if (unlikely(cache->bin_size[bin] > CACHE_SIZE)) { 650 | cache->bin_size[bin] = size; 651 | while (cache->bin_size[bin] < CACHE_SIZE / 2) { 652 | slot = slot->next; 653 | assert(slot); 654 | cache->bin_size[bin] += size; 655 | } 656 | 657 | struct slot *flush = slot->next; 658 | slot->next = NULL; 659 | 660 | do { 661 | struct slot *slot = flush; 662 | flush = NULL; 663 | 664 | int arena_index = ((struct chunk *)CHUNK_ADDR2BASE(slot))->arena; 665 | struct arena *arena = &arenas[arena_index]; 666 | mutex_lock(&arena->mutex); 667 | do { 668 | struct slot *next = slot->next; 669 | 670 | struct chunk *chunk = CHUNK_ADDR2BASE(slot); 671 | assert(chunk->small); 672 | if (chunk->arena == arena_index) { 673 | slab_deallocate(arena, to_slab(slot), slot, bin); 674 | slot = slot->next; 675 | } else { 676 | slot->next = flush; 677 | flush = slot; 678 | } 679 | 680 | slot = next; 681 | } while (slot); 682 | mutex_unlock(&arena->mutex); 683 | } while (flush); 684 | } 685 | } 686 | 687 | static inline void deallocate(struct thread_cache *cache, void *ptr) { 688 | // malloc_init has been called if the pointer is non-NULL 689 | assert(!ptr || atomic_load(&initialized)); 690 | 691 | struct chunk *chunk = CHUNK_ADDR2BASE(ptr); 692 | if (ptr == chunk) { 693 | if (!ptr) { 694 | return; 695 | } 696 | huge_free(ptr); 697 | return; 698 | } 699 | if (chunk->small) { 700 | deallocate_small(cache, ptr); 701 | } else { 702 | struct arena *arena = &arenas[chunk->arena]; 703 | mutex_lock(&arena->mutex); 704 | struct large *head = to_head(ptr); 705 | large_free(arena, head, head->size + sizeof(struct large)); 706 | mutex_unlock(&arena->mutex); 707 | } 708 | } 709 | 710 | static size_t alloc_size(void *ptr) { 711 | // malloc_init has been called if the pointer is non-NULL 712 | assert(!ptr || atomic_load(&initialized)); 713 | 714 | struct chunk *chunk = CHUNK_ADDR2BASE(ptr); 715 | if (ptr == chunk) { 716 | if (!ptr) { 717 | return 0; 718 | } 719 | return huge_alloc_size(ptr); 720 | } 721 | if (chunk->small) { 722 | return to_slab(ptr)->size; 723 | } 724 | return to_head(ptr)->size; 725 | } 726 | 727 | static int alloc_aligned_result(void **memptr, void *ptr) { 728 | if (unlikely(!ptr)) { 729 | return ENOMEM; 730 | } 731 | *memptr = ptr; 732 | return 0; 733 | } 734 | 735 | static int alloc_aligned(void **memptr, size_t alignment, size_t size, size_t min_alignment) { 736 | assert(min_alignment != 0); 737 | 738 | if (unlikely((alignment - 1) & alignment || alignment < min_alignment)) { 739 | return EINVAL; 740 | } 741 | 742 | struct thread_cache *cache = &tcache; 743 | 744 | if (alignment <= MIN_ALIGN) { 745 | return alloc_aligned_result(memptr, allocate(cache, size)); 746 | } 747 | 748 | size_t non_zero_size = size | (!size); 749 | size_t large_size = (non_zero_size + LARGE_MASK) & ~LARGE_MASK; 750 | size_t large_alignment = (alignment + LARGE_MASK) & ~LARGE_MASK; 751 | size_t worst_large_size = large_size + large_alignment - LARGE_ALIGN; 752 | if (unlikely(worst_large_size < size)) { 753 | return ENOMEM; 754 | } 755 | 756 | if (unlikely(malloc_init(cache))) { 757 | return ENOMEM; 758 | } 759 | 760 | if (worst_large_size <= MAX_LARGE) { 761 | return alloc_aligned_result(memptr, allocate_large(cache, large_size, large_alignment)); 762 | } 763 | return alloc_aligned_result(memptr, huge_alloc(cache, size, CHUNK_CEILING(alignment))); 764 | } 765 | 766 | static void *alloc_aligned_simple(size_t alignment, size_t size) { 767 | void *ptr; 768 | int ret = alloc_aligned(&ptr, alignment, size, 1); 769 | if (unlikely(ret)) { 770 | errno = ret; 771 | return NULL; 772 | } 773 | return ptr; 774 | } 775 | 776 | EXPORT void *malloc(size_t size) { 777 | void *ptr = allocate(&tcache, size); 778 | if (unlikely(!ptr)) { 779 | errno = ENOMEM; 780 | return NULL; 781 | } 782 | return ptr; 783 | } 784 | 785 | EXPORT void *calloc(size_t nmemb, size_t size) { 786 | size_t total; 787 | if (unlikely(size_mul_overflow(nmemb, size, &total))) { 788 | errno = ENOMEM; 789 | return NULL; 790 | } 791 | void *new_ptr = allocate(&tcache, total); 792 | if (unlikely(!new_ptr)) { 793 | errno = ENOMEM; 794 | return NULL; 795 | } 796 | memset(new_ptr, 0, total); 797 | return new_ptr; 798 | } 799 | 800 | EXPORT void *realloc(void *ptr, size_t size) { 801 | if (!ptr) { 802 | return malloc(size); 803 | } 804 | 805 | // malloc_init has been called 806 | assert(atomic_load(&initialized)); 807 | 808 | struct thread_cache *cache = &tcache; 809 | 810 | // Marked obsolete in DR400 811 | if (unlikely(!size)) { 812 | deallocate(cache, ptr); 813 | return NULL; 814 | } 815 | 816 | size_t old_size = alloc_size(ptr); 817 | 818 | size_t real_size = (size + 15) & ~15; 819 | if (old_size == real_size) { 820 | return ptr; 821 | } 822 | 823 | if (old_size <= MAX_LARGE && real_size <= MAX_LARGE && 824 | old_size > MAX_SMALL && real_size > MAX_SMALL) { 825 | size_t real_size = (size + LARGE_MASK) & ~LARGE_MASK; 826 | if (!large_realloc_no_move(ptr, old_size, real_size)) { 827 | return ptr; 828 | } 829 | } 830 | 831 | if (old_size > MAX_LARGE && size > MAX_LARGE) { 832 | return huge_realloc(cache, ptr, old_size, CHUNK_CEILING(size)); 833 | } 834 | 835 | void *new_ptr = allocate(cache, size); 836 | if (unlikely(!new_ptr)) { 837 | errno = ENOMEM; 838 | return NULL; 839 | } 840 | size_t copy_size = size < old_size ? size : old_size; 841 | memcpy(new_ptr, ptr, copy_size); 842 | deallocate(cache, ptr); 843 | return new_ptr; 844 | } 845 | 846 | EXPORT void free(void *ptr) { 847 | deallocate(&tcache, ptr); 848 | } 849 | 850 | EXPORT void cfree(void *ptr) __attribute__((alias("free"))); 851 | 852 | EXPORT int posix_memalign(void **memptr, size_t alignment, size_t size) { 853 | return alloc_aligned(memptr, alignment, size, sizeof(void *)); 854 | } 855 | 856 | EXPORT void *aligned_alloc(size_t alignment, size_t size) { 857 | // Comply with the semantics specified in DR460 858 | if (unlikely(size % alignment)) { 859 | errno = EINVAL; 860 | return NULL; 861 | } 862 | return alloc_aligned_simple(alignment, size); 863 | } 864 | 865 | EXPORT void *memalign(size_t alignment, size_t size) { 866 | return alloc_aligned_simple(alignment, size); 867 | } 868 | 869 | EXPORT void *valloc(size_t size) { 870 | return alloc_aligned_simple(PAGE_SIZE, size); 871 | } 872 | 873 | EXPORT void *pvalloc(size_t size) { 874 | size_t rounded = PAGE_CEILING(size); 875 | if (unlikely(!rounded)) { 876 | errno = ENOMEM; 877 | return NULL; 878 | } 879 | return alloc_aligned_simple(PAGE_SIZE, rounded); 880 | } 881 | 882 | EXPORT size_t malloc_usable_size(void *ptr) { 883 | return alloc_size(ptr); 884 | } 885 | 886 | COLD EXPORT int malloc_trim(UNUSED size_t pad) { 887 | return 0; 888 | } 889 | 890 | COLD EXPORT void malloc_stats(void) {} 891 | 892 | COLD EXPORT struct mallinfo mallinfo(void) { 893 | return (struct mallinfo){0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 894 | } 895 | 896 | COLD EXPORT int mallopt(UNUSED int param, UNUSED int value) { 897 | return 1; 898 | } 899 | 900 | COLD EXPORT int malloc_info(UNUSED int options, UNUSED FILE *fp) { 901 | return ENOSYS; 902 | } 903 | 904 | COLD EXPORT void *malloc_get_state(void) { 905 | return NULL; 906 | } 907 | 908 | COLD EXPORT int malloc_set_state(UNUSED void *state) { 909 | return -2; 910 | } 911 | -------------------------------------------------------------------------------- /arena.h: -------------------------------------------------------------------------------- 1 | #ifndef ARENA_H 2 | #define ARENA_H 3 | 4 | #define RB_COMPACT 5 | 6 | #include 7 | 8 | #include "chunk.h" 9 | #include "memory.h" 10 | #include "mutex.h" 11 | #include "rb.h" 12 | 13 | #define N_CLASS 32 14 | 15 | struct large { 16 | size_t size; 17 | void *prev; 18 | rb_node(struct large) link_size_addr; 19 | max_align_t data[]; 20 | }; 21 | 22 | typedef rb_tree(struct large) large_tree; 23 | rb_proto(, large_tree_size_addr_, large_tree, struct large) 24 | 25 | struct slot { 26 | struct slot *next; 27 | uint8_t data[]; 28 | }; 29 | 30 | struct slab { 31 | struct slab *next; 32 | struct slab *prev; 33 | 34 | size_t size; 35 | struct slot *next_slot; 36 | struct slot *end; 37 | 38 | uint16_t count; 39 | uint8_t data[]; 40 | }; 41 | 42 | struct chunk { 43 | int arena; 44 | bool small; 45 | max_align_t data[]; 46 | }; 47 | 48 | struct arena { 49 | alignas(CACHELINE) mutex mutex; 50 | 51 | // intrusive singly-linked list 52 | struct slab *free_slab; 53 | 54 | // intrusive circular doubly-linked list, with this sentinel node at both ends 55 | struct slab partial_slab[N_CLASS]; 56 | 57 | large_tree large_size_addr; 58 | struct chunk *free_chunk; 59 | 60 | struct chunk_recycler chunks; 61 | void *chunks_start; 62 | void *chunks_end; 63 | 64 | struct extent_node *huge_nodes; 65 | extent_tree huge; 66 | }; 67 | 68 | struct thread_cache { 69 | struct slot *bin[N_CLASS]; 70 | size_t bin_size[N_CLASS]; 71 | int arena_index; // -1 if uninitialized 72 | bool dead; // true if destroyed or uninitialized 73 | }; 74 | 75 | struct arena *get_huge_arena(void *ptr); 76 | struct arena *get_arena(struct thread_cache *cache); 77 | 78 | #endif 79 | -------------------------------------------------------------------------------- /bump.c: -------------------------------------------------------------------------------- 1 | #include "bump.h" 2 | #include "chunk.h" 3 | #include "memory.h" 4 | #include "mutex.h" 5 | #include "util.h" 6 | 7 | static mutex bump_mutex = MUTEX_INITIALIZER; 8 | static void *bump; 9 | static void *bump_end; 10 | 11 | void *bump_alloc(size_t size, size_t align) { 12 | assert(align <= PAGE_SIZE); 13 | 14 | mutex_lock(&bump_mutex); 15 | 16 | uintptr_t ret = ALIGNMENT_CEILING((uintptr_t)bump, align); 17 | if (ret + size > (uintptr_t)bump_end) { 18 | size_t chunk_size = CHUNK_CEILING(size); 19 | void *ptr = memory_map(NULL, chunk_size, true); 20 | if (!ptr) { 21 | mutex_unlock(&bump_mutex); 22 | return NULL; 23 | } 24 | bump = ptr; 25 | bump_end = (char *)ptr + chunk_size; 26 | ret = (uintptr_t)ptr; 27 | } 28 | 29 | bump = (void *)(ret + size); 30 | mutex_unlock(&bump_mutex); 31 | return (void *)ret; 32 | } 33 | -------------------------------------------------------------------------------- /bump.h: -------------------------------------------------------------------------------- 1 | #ifndef BUMP_H 2 | #define BUMP_H 3 | 4 | #include 5 | 6 | void *bump_alloc(size_t size, size_t align); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /chunk.c: -------------------------------------------------------------------------------- 1 | #include "chunk.h" 2 | #include "memory.h" 3 | #include "mutex.h" 4 | #include "util.h" 5 | 6 | static struct chunk_recycler chunks_global; 7 | static mutex chunks_mutex = MUTEX_INITIALIZER; 8 | 9 | COLD void chunk_init(void) { 10 | chunk_recycler_init(&chunks_global); 11 | } 12 | 13 | COLD void chunk_recycler_init(struct chunk_recycler *chunks) { 14 | extent_tree_ad_new(&chunks->addr); 15 | extent_tree_szad_new(&chunks->size_addr); 16 | } 17 | 18 | static void maybe_get_chunks(struct chunk_recycler **chunks) { 19 | if (!*chunks) { 20 | *chunks = &chunks_global; 21 | mutex_lock(&chunks_mutex); 22 | } 23 | } 24 | 25 | static void maybe_unlock(struct chunk_recycler *chunks) { 26 | if (chunks == &chunks_global) { 27 | mutex_unlock(&chunks_mutex); 28 | } 29 | } 30 | 31 | void chunk_free(struct chunk_recycler *chunks, void *chunk, size_t size) { 32 | maybe_get_chunks(&chunks); 33 | struct extent_node key; 34 | key.addr = (void *)((uintptr_t)chunk + size); 35 | struct extent_node *node = extent_tree_ad_nsearch(&chunks->addr, &key); 36 | /* Try to coalesce forward. */ 37 | if (node && node->addr == key.addr) { 38 | /* 39 | * Coalesce chunk with the following address range. This does 40 | * not change the position within chunks_ad, so only 41 | * remove/insert from/into chunks_szad. 42 | */ 43 | extent_tree_szad_remove(&chunks->size_addr, node); 44 | node->addr = chunk; 45 | node->size += size; 46 | extent_tree_szad_insert(&chunks->size_addr, node); 47 | } else { 48 | node = node_alloc(&chunks->nodes); 49 | /* Coalescing forward failed, so insert a new node. */ 50 | if (!node) { 51 | // Failed to allocate an extent node, so just unmap the chunk(s). 52 | memory_unmap(chunk, size); 53 | goto label_return; 54 | } 55 | node->addr = chunk; 56 | node->size = size; 57 | extent_tree_ad_insert(&chunks->addr, node); 58 | extent_tree_szad_insert(&chunks->size_addr, node); 59 | } 60 | 61 | /* Try to coalesce backward. */ 62 | struct extent_node *prev = extent_tree_ad_prev(&chunks->addr, node); 63 | if (prev && (void *)((uintptr_t)prev->addr + prev->size) == chunk) { 64 | /* 65 | * Coalesce chunk with the previous address range. This does 66 | * not change the position within chunks_ad, so only 67 | * remove/insert node from/into chunks_szad. 68 | */ 69 | extent_tree_szad_remove(&chunks->size_addr, prev); 70 | extent_tree_ad_remove(&chunks->addr, prev); 71 | 72 | extent_tree_szad_remove(&chunks->size_addr, node); 73 | node->addr = prev->addr; 74 | node->size += prev->size; 75 | extent_tree_szad_insert(&chunks->size_addr, node); 76 | 77 | node_free(&chunks->nodes, prev); 78 | } 79 | 80 | label_return: 81 | maybe_unlock(chunks); 82 | } 83 | 84 | void *chunk_recycle(struct chunk_recycler *chunks, void *new_addr, size_t size, size_t alignment) { 85 | size_t alloc_size = size + alignment - CHUNK_SIZE; 86 | 87 | assert(!new_addr || alignment == CHUNK_SIZE); 88 | 89 | /* Beware size_t wrap-around. */ 90 | if (alloc_size < size) 91 | return NULL; 92 | struct extent_node key; 93 | key.addr = new_addr; 94 | key.size = alloc_size; 95 | maybe_get_chunks(&chunks); 96 | struct extent_node *node = new_addr ? extent_tree_ad_search(&chunks->addr, &key) : 97 | extent_tree_szad_nsearch(&chunks->size_addr, &key); 98 | if (!node || (new_addr && node->size < size)) { 99 | return NULL; 100 | } 101 | size_t leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - (uintptr_t)node->addr; 102 | assert(node->size >= leadsize + size); 103 | assert(!new_addr || leadsize == 0); 104 | size_t trailsize = node->size - leadsize - size; 105 | void *ret = (void *)((uintptr_t)node->addr + leadsize); 106 | 107 | /* Remove node from the tree. */ 108 | extent_tree_szad_remove(&chunks->size_addr, node); 109 | extent_tree_ad_remove(&chunks->addr, node); 110 | if (leadsize) { 111 | /* Insert the leading space as a smaller chunk. */ 112 | node->size = leadsize; 113 | extent_tree_szad_insert(&chunks->size_addr, node); 114 | extent_tree_ad_insert(&chunks->addr, node); 115 | node = NULL; 116 | } 117 | if (trailsize) { 118 | /* Insert the trailing space as a smaller chunk. */ 119 | if (!node) { 120 | node = node_alloc(&chunks->nodes); 121 | if (!node) { 122 | maybe_unlock(chunks); 123 | chunk_free(chunks, ret, size); 124 | return NULL; 125 | } 126 | } 127 | node->addr = (void *)((uintptr_t)(ret) + size); 128 | node->size = trailsize; 129 | extent_tree_szad_insert(&chunks->size_addr, node); 130 | extent_tree_ad_insert(&chunks->addr, node); 131 | node = NULL; 132 | } 133 | 134 | if (node) { 135 | node_free(&chunks->nodes, node); 136 | } 137 | 138 | maybe_unlock(chunks); 139 | return ret; 140 | } 141 | 142 | void *chunk_alloc(void *new_addr, size_t size, size_t alignment) { 143 | void *ptr; 144 | if ((ptr = chunk_recycle(&chunks_global, new_addr, size, alignment))) { 145 | if (unlikely(memory_commit(ptr, size))) { 146 | chunk_free(&chunks_global, ptr, size); 147 | return NULL; 148 | } 149 | return ptr; 150 | } 151 | if (new_addr) { 152 | return NULL; 153 | } 154 | if (!(ptr = memory_map(NULL, size, true))) { 155 | return NULL; 156 | } 157 | if (ALIGNMENT_ADDR2OFFSET(ptr, alignment)) { 158 | memory_unmap(ptr, size); 159 | return memory_map_aligned(NULL, size, alignment, true); 160 | } 161 | return ptr; 162 | } 163 | -------------------------------------------------------------------------------- /chunk.h: -------------------------------------------------------------------------------- 1 | #ifndef CHUNK_H 2 | #define CHUNK_H 3 | 4 | #include 5 | #include 6 | 7 | #include "extent.h" 8 | 9 | #define CHUNK_SIZE ((size_t)4096 * 1024) 10 | #define CHUNK_MASK ((size_t)(CHUNK_SIZE - 1)) 11 | // Return the smallest chunk size multiple that is >= s. 12 | #define CHUNK_CEILING(s) (((s) + CHUNK_MASK) & ~CHUNK_MASK) 13 | 14 | /* Return the chunk address for allocation address a. */ 15 | #define CHUNK_ADDR2BASE(a) ((void *)((uintptr_t)(a) & ~CHUNK_MASK)) 16 | 17 | struct chunk_recycler { 18 | extent_tree addr; 19 | extent_tree size_addr; 20 | struct extent_node *nodes; 21 | }; 22 | 23 | void chunk_init(void); 24 | void chunk_recycler_init(struct chunk_recycler *chunks); 25 | void chunk_free(struct chunk_recycler *chunks, void *chunk, size_t size); 26 | void *chunk_recycle(struct chunk_recycler *chunks, void *new_addr, size_t size, size_t alignment); 27 | void *chunk_alloc(void *new_addr, size_t size, size_t alignment); 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /extent.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "bump.h" 4 | #include "extent.h" 5 | #include "mutex.h" 6 | 7 | static int extent_ad_comp(struct extent_node *a, struct extent_node *b) { 8 | uintptr_t a_addr = (uintptr_t)a->addr; 9 | uintptr_t b_addr = (uintptr_t)b->addr; 10 | return (a_addr > b_addr) - (a_addr < b_addr); 11 | } 12 | 13 | /* Generate red-black tree functions. */ 14 | rb_gen(, extent_tree_ad_, extent_tree, struct extent_node, link_addr, extent_ad_comp) 15 | 16 | static int extent_szad_comp(struct extent_node *a, struct extent_node *b) { 17 | size_t a_size = a->size; 18 | size_t b_size = b->size; 19 | 20 | int ret = (a_size > b_size) - (a_size < b_size); 21 | if (ret) { 22 | return ret; 23 | } 24 | 25 | return extent_ad_comp(a, b); 26 | } 27 | 28 | /* Generate red-black tree functions. */ 29 | rb_gen(, extent_tree_szad_, extent_tree, struct extent_node, link_size_addr, extent_szad_comp) 30 | 31 | struct extent_node *node_alloc(struct extent_node **free_nodes) { 32 | if (*free_nodes) { 33 | struct extent_node *node = *free_nodes; 34 | *free_nodes = node->next; 35 | return node; 36 | } 37 | return bump_alloc(sizeof(struct extent_node), alignof(struct extent_node)); 38 | } 39 | 40 | void node_free(struct extent_node **free_nodes, struct extent_node *node) { 41 | node->next = *free_nodes; 42 | *free_nodes = node; 43 | } 44 | -------------------------------------------------------------------------------- /extent.h: -------------------------------------------------------------------------------- 1 | #ifndef EXTENT_H 2 | #define EXTENT_H 3 | 4 | #define RB_COMPACT 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "rb.h" 12 | #include "util.h" 13 | 14 | struct extent_node { 15 | union { 16 | struct { 17 | void *addr; 18 | size_t size; 19 | rb_node(struct extent_node) link_size_addr; 20 | rb_node(struct extent_node) link_addr; 21 | }; 22 | struct extent_node *next; 23 | }; 24 | }; 25 | 26 | typedef rb_tree(struct extent_node) extent_tree; 27 | rb_proto(, extent_tree_szad_, extent_tree, struct extent_node) 28 | rb_proto(, extent_tree_ad_, extent_tree, struct extent_node) 29 | 30 | struct extent_node *node_alloc(struct extent_node **free_nodes); 31 | void node_free(struct extent_node **free_nodes, struct extent_node *node); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /huge.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "chunk.h" 4 | #include "extent.h" 5 | #include "huge.h" 6 | #include "memory.h" 7 | #include "mutex.h" 8 | #include "purge.h" 9 | #include "util.h" 10 | 11 | static extent_tree huge_global; 12 | static mutex huge_global_mutex = MUTEX_INITIALIZER; 13 | static struct extent_node *huge_nodes; 14 | 15 | COLD void huge_init(void) { 16 | extent_tree_ad_new(&huge_global); 17 | } 18 | 19 | static struct chunk_recycler *get_recycler(struct arena *arena) { 20 | return arena ? &arena->chunks : NULL; 21 | } 22 | 23 | static struct extent_node **get_huge_nodes(struct arena *arena) { 24 | return arena ? &arena->huge_nodes : &huge_nodes; 25 | } 26 | 27 | static void maybe_lock_arena(struct arena *arena) { 28 | if (arena) { 29 | mutex_lock(&arena->mutex); 30 | } 31 | } 32 | 33 | static void maybe_unlock_arena(struct arena *arena) { 34 | if (arena) { 35 | mutex_unlock(&arena->mutex); 36 | } 37 | } 38 | 39 | static extent_tree *acquire_huge(struct arena *arena) { 40 | if (!arena) { 41 | mutex_lock(&huge_global_mutex); 42 | return &huge_global; 43 | } 44 | return &arena->huge; 45 | } 46 | 47 | static void release_huge(struct arena *arena) { 48 | if (!arena) { 49 | mutex_unlock(&huge_global_mutex); 50 | } 51 | } 52 | 53 | static void *huge_chunk_alloc(struct thread_cache *cache, size_t size, size_t alignment, 54 | struct arena **out_arena) { 55 | struct arena *arena = get_arena(cache); 56 | void *chunk = chunk_recycle(&arena->chunks, NULL, size, alignment); 57 | if (chunk) { 58 | if (unlikely(memory_commit(chunk, size))) { 59 | chunk_free(&arena->chunks, chunk, size); 60 | return NULL; 61 | } 62 | } else { 63 | if (unlikely(!(chunk = chunk_alloc(NULL, size, alignment)))) { 64 | return NULL; 65 | } 66 | 67 | // Work around the possibility of holes created by huge_move_expand (see below). 68 | struct arena *chunk_arena = get_huge_arena(chunk); 69 | if (chunk_arena != arena) { 70 | mutex_unlock(&arena->mutex); 71 | if (chunk_arena) { 72 | mutex_lock(&chunk_arena->mutex); 73 | } 74 | arena = chunk_arena; 75 | } 76 | } 77 | 78 | *out_arena = arena; 79 | return chunk; 80 | } 81 | 82 | void *huge_alloc(struct thread_cache *cache, size_t size, size_t alignment) { 83 | size_t real_size = CHUNK_CEILING(size); 84 | struct arena *arena; 85 | void *chunk = huge_chunk_alloc(cache, real_size, alignment, &arena); 86 | if (unlikely(!chunk)) { 87 | return NULL; 88 | } 89 | 90 | extent_tree *huge = acquire_huge(arena); 91 | 92 | struct extent_node *node = node_alloc(get_huge_nodes(arena)); 93 | if (unlikely(!node)) { 94 | chunk_free(get_recycler(arena), chunk, real_size); 95 | chunk = NULL; 96 | } else { 97 | node->size = real_size; 98 | node->addr = chunk; 99 | extent_tree_ad_insert(huge, node); 100 | } 101 | 102 | release_huge(arena); 103 | maybe_unlock_arena(arena); 104 | return chunk; 105 | } 106 | 107 | static void huge_update_size(struct arena *arena, void *ptr, size_t new_size) { 108 | struct extent_node key; 109 | key.addr = ptr; 110 | 111 | extent_tree *huge = acquire_huge(arena); 112 | struct extent_node *node = extent_tree_ad_search(huge, &key); 113 | assert(node); 114 | node->size = new_size; 115 | release_huge(arena); 116 | } 117 | 118 | static void huge_no_move_shrink(void *ptr, size_t old_size, size_t new_size) { 119 | void *excess_addr = (char *)ptr + new_size; 120 | size_t excess_size = old_size - new_size; 121 | 122 | if (purge_ratio >= 0) { 123 | memory_decommit(excess_addr, excess_size); 124 | } 125 | 126 | struct arena *arena = get_huge_arena(ptr); 127 | maybe_lock_arena(arena); 128 | chunk_free(get_recycler(arena), excess_addr, excess_size); 129 | huge_update_size(arena, ptr, new_size); 130 | maybe_unlock_arena(arena); 131 | } 132 | 133 | static bool huge_no_move_expand(void *ptr, size_t old_size, size_t new_size) { 134 | bool failure = true; 135 | void *expand_addr = (char *)ptr + old_size; 136 | size_t expand_size = new_size - old_size; 137 | 138 | struct arena *arena = get_huge_arena(ptr); 139 | struct chunk_recycler *chunks = get_recycler(arena); 140 | maybe_lock_arena(arena); 141 | if (chunk_recycle(chunks, expand_addr, expand_size, CHUNK_SIZE)) { 142 | if (unlikely(memory_commit(expand_addr, expand_size))) { 143 | chunk_free(chunks, expand_addr, expand_size); 144 | } else { 145 | huge_update_size(arena, ptr, new_size); 146 | failure = false; 147 | } 148 | } 149 | maybe_unlock_arena(arena); 150 | return failure; 151 | } 152 | 153 | static void *huge_move_expand(struct thread_cache *cache, void *old_addr, size_t old_size, size_t new_size) { 154 | struct arena *arena; 155 | void *new_addr = huge_chunk_alloc(cache, new_size, CHUNK_SIZE, &arena); 156 | if (unlikely(!new_addr)) { 157 | return NULL; 158 | } 159 | 160 | bool gap = true; 161 | if (unlikely(memory_remap_fixed(old_addr, old_size, new_addr, new_size))) { 162 | memcpy(new_addr, old_addr, old_size); 163 | if (purge_ratio >= 0) { 164 | memory_decommit(old_addr, old_size); 165 | } 166 | gap = false; 167 | } else { 168 | // Attempt to fill the virtual memory hole. The kernel should provide a flag for preserving 169 | // the old mapping to avoid the possibility of this failing and creating fragmentation. 170 | // 171 | // https://lkml.org/lkml/2014/10/2/624 172 | void *extra = memory_map(old_addr, old_size, false); 173 | if (likely(extra)) { 174 | if (unlikely(extra != old_addr)) { 175 | memory_unmap(extra, old_size); 176 | } else { 177 | gap = false; 178 | } 179 | } 180 | } 181 | 182 | struct extent_node key; 183 | key.addr = old_addr; 184 | 185 | struct arena *old_arena = get_huge_arena(old_addr); 186 | 187 | extent_tree *huge = acquire_huge(old_arena); 188 | struct extent_node *node = extent_tree_ad_search(huge, &key); 189 | assert(node); 190 | extent_tree_ad_remove(huge, node); 191 | node->addr = new_addr; 192 | node->size = new_size; 193 | 194 | if (arena != old_arena) { 195 | release_huge(old_arena); 196 | huge = acquire_huge(arena); 197 | } 198 | 199 | extent_tree_ad_insert(huge, node); 200 | release_huge(arena); 201 | 202 | if (!gap) { 203 | if (arena != old_arena && old_arena) { 204 | mutex_lock(&old_arena->mutex); 205 | } 206 | chunk_free(get_recycler(old_arena), old_addr, old_size); 207 | if (arena != old_arena && old_arena) { 208 | mutex_unlock(&old_arena->mutex); 209 | } 210 | } 211 | 212 | maybe_unlock_arena(arena); 213 | return new_addr; 214 | } 215 | 216 | void *huge_realloc(struct thread_cache *cache, void *ptr, size_t old_size, size_t new_real_size) { 217 | if (new_real_size > old_size) { 218 | if (!huge_no_move_expand(ptr, old_size, new_real_size)) { 219 | return ptr; 220 | } 221 | return huge_move_expand(cache, ptr, old_size, new_real_size); 222 | } else if (new_real_size < old_size) { 223 | huge_no_move_shrink(ptr, old_size, new_real_size); 224 | } 225 | return ptr; 226 | } 227 | 228 | void huge_free(void *ptr) { 229 | struct extent_node *node, key; 230 | key.addr = ptr; 231 | struct arena *arena = get_huge_arena(ptr); 232 | 233 | maybe_lock_arena(arena); 234 | extent_tree *huge = acquire_huge(arena); 235 | 236 | node = extent_tree_ad_search(huge, &key); 237 | assert(node); 238 | size_t size = node->size; 239 | extent_tree_ad_remove(huge, node); 240 | node_free(get_huge_nodes(arena), node); 241 | release_huge(arena); 242 | 243 | if (purge_ratio >= 0) { 244 | memory_decommit(ptr, size); 245 | } 246 | chunk_free(get_recycler(arena), ptr, size); 247 | maybe_unlock_arena(arena); 248 | } 249 | 250 | size_t huge_alloc_size(void *ptr) { 251 | struct extent_node key; 252 | key.addr = ptr; 253 | struct arena *arena = get_huge_arena(ptr); 254 | 255 | maybe_lock_arena(arena); 256 | extent_tree *huge = acquire_huge(arena); 257 | 258 | struct extent_node *node = extent_tree_ad_search(huge, &key); 259 | assert(node); 260 | size_t size = node->size; 261 | 262 | release_huge(arena); 263 | maybe_unlock_arena(arena); 264 | 265 | return size; 266 | } 267 | -------------------------------------------------------------------------------- /huge.h: -------------------------------------------------------------------------------- 1 | #ifndef HUGE_H 2 | #define HUGE_H 3 | 4 | #include 5 | 6 | #include "arena.h" 7 | 8 | void huge_init(void); 9 | void *huge_alloc(struct thread_cache *cache, size_t size, size_t alignment); 10 | void huge_free(void *ptr); 11 | size_t huge_alloc_size(void *ptr); 12 | void *huge_realloc(struct thread_cache *cache, void *ptr, size_t old_size, size_t new_real_size); 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /memory.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "memory.h" 8 | #include "util.h" 9 | 10 | // use MAP_NORESERVE to get either proper memory accounting or full overcommit 11 | static const int map_flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE; 12 | static bool reduce_commit_charge = true; 13 | 14 | COLD void memory_init(void) { 15 | int overcommit = open("/proc/sys/vm/overcommit_memory", O_RDONLY|O_CLOEXEC); 16 | if (overcommit != -1) { 17 | char digit; 18 | int rc = TEMP_FAILURE_RETRY(read(overcommit, &digit, 1)); 19 | if (rc == 1 && digit != '2') { 20 | reduce_commit_charge = false; 21 | } 22 | close(overcommit); 23 | } 24 | } 25 | 26 | void memory_decommit(void *addr, size_t size) { 27 | if (reduce_commit_charge) { 28 | mmap(addr, size, PROT_NONE, map_flags|MAP_FIXED, -1, 0); 29 | } else { 30 | madvise(addr, size, MADV_DONTNEED); 31 | } 32 | } 33 | 34 | bool memory_commit(void *addr, size_t size) { 35 | if (reduce_commit_charge) { 36 | return mprotect(addr, size, PROT_READ|PROT_WRITE); 37 | } 38 | return false; 39 | } 40 | 41 | void *memory_map(void *hint, size_t size, bool commit) { 42 | int prot = !commit && reduce_commit_charge ? PROT_NONE : PROT_READ|PROT_WRITE; 43 | void *addr = mmap(hint, size, prot, map_flags, -1, 0); 44 | if (unlikely(addr == MAP_FAILED)) { 45 | return NULL; 46 | } 47 | return addr; 48 | } 49 | 50 | void *memory_map_aligned(void *hint, size_t size, size_t alignment, bool commit) { 51 | size_t alloc_size = size + alignment - PAGE_SIZE; 52 | if (unlikely(alloc_size < size)) { 53 | return NULL; 54 | } 55 | void *addr = memory_map(hint, alloc_size, commit); 56 | if (unlikely(!addr)) { 57 | return NULL; 58 | } 59 | size_t lead_size = ALIGNMENT_CEILING((uintptr_t)addr, alignment) - (uintptr_t)addr; 60 | size_t trail_size = alloc_size - lead_size - size; 61 | void *base = (char *)addr + lead_size; 62 | if (lead_size) { 63 | munmap(addr, lead_size); 64 | } 65 | if (trail_size) { 66 | munmap((char *)base + size, trail_size); 67 | } 68 | return base; 69 | } 70 | 71 | void memory_unmap(void *addr, size_t size) { 72 | munmap(addr, size); 73 | } 74 | 75 | bool memory_remap_fixed(void *addr, size_t old_size, void *new_addr, size_t new_size) { 76 | return mremap(addr, old_size, new_size, MREMAP_MAYMOVE|MREMAP_FIXED, new_addr) == MAP_FAILED; 77 | } 78 | -------------------------------------------------------------------------------- /memory.h: -------------------------------------------------------------------------------- 1 | #ifndef MEMORY_H 2 | #define MEMORY_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define CACHELINE 64 9 | 10 | // Return the smallest alignment multiple that is >= s. 11 | #define ALIGNMENT_CEILING(s, alignment) (((s) + (alignment - 1)) & (-(alignment))) 12 | 13 | // Return the nearest aligned address at or below a. 14 | #define ALIGNMENT_ADDR2BASE(a, alignment) ((void *)((uintptr_t)(a) & (-(alignment)))) 15 | 16 | // Return the offset between a and the nearest aligned address at or below a. 17 | #define ALIGNMENT_ADDR2OFFSET(a, alignment) ((size_t)((uintptr_t)(a) & (alignment - 1))) 18 | 19 | #define PAGE_SIZE ((size_t)4096) 20 | #define PAGE_MASK ((size_t)(PAGE_SIZE - 1)) 21 | 22 | // Return the smallest page size multiple that is >= s. 23 | #define PAGE_CEILING(s) (((s) + PAGE_MASK) & ~PAGE_MASK) 24 | 25 | void memory_init(void); 26 | void memory_decommit(void *ptr, size_t size); 27 | bool memory_commit(void *ptr, size_t size); 28 | void *memory_map(void *hint, size_t size, bool commit); 29 | void *memory_map_aligned(void *hint, size_t size, size_t alignment, bool commit); 30 | void memory_unmap(void *ptr, size_t size); 31 | bool memory_remap_fixed(void *addr, size_t old_size, void *new_addr, size_t new_size); 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /mutex.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "mutex.h" 4 | #include "util.h" 5 | 6 | #ifdef __linux__ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | static int sys_futex(void *uaddr, int op, int val1, struct timespec *timeout, void *uaddr2, 14 | int val3) { 15 | return syscall(SYS_futex, uaddr, op, val1, timeout, uaddr2, val3); 16 | } 17 | 18 | bool mutex_init(mutex *m) { 19 | *m = 0; 20 | return false; 21 | } 22 | 23 | bool mutex_trylock(mutex *m) { 24 | int expected = 0; 25 | return !atomic_compare_exchange_strong_explicit(m, &expected, 1, memory_order_acquire, 26 | memory_order_relaxed); 27 | } 28 | 29 | void mutex_lock(mutex *m) { 30 | int expected = 0; 31 | if (unlikely(!atomic_compare_exchange_strong_explicit(m, &expected, 1, memory_order_acquire, 32 | memory_order_relaxed))) { 33 | if (expected != 2) { 34 | expected = atomic_exchange_explicit(m, 2, memory_order_acquire); 35 | } 36 | while (expected) { 37 | sys_futex(m, FUTEX_WAIT_PRIVATE, 2, NULL, NULL, 0); 38 | expected = atomic_exchange_explicit(m, 2, memory_order_acquire); 39 | } 40 | } 41 | } 42 | 43 | void mutex_unlock(mutex *m) { 44 | if (unlikely(atomic_fetch_sub_explicit(m, 1, memory_order_release) != 1)) { 45 | atomic_store_explicit(m, 0, memory_order_release); 46 | sys_futex(m, FUTEX_WAKE_PRIVATE, 1, NULL, NULL, 0); 47 | } 48 | } 49 | 50 | #else 51 | 52 | bool mutex_init(mutex *m) { 53 | return pthread_mutex_init(m, NULL); 54 | } 55 | 56 | bool mutex_trylock(mutex *m) { 57 | int ret = pthread_mutex_trylock(m); 58 | assert(!ret || ret == EBUSY); 59 | return ret; 60 | } 61 | 62 | void mutex_lock(mutex *m) { 63 | UNUSED int ret = pthread_mutex_lock(m); 64 | assert(!ret); 65 | } 66 | 67 | void mutex_unlock(mutex *m) { 68 | UNUSED int ret = pthread_mutex_unlock(m); 69 | assert(!ret); 70 | } 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /mutex.h: -------------------------------------------------------------------------------- 1 | #ifndef MUTEX_H 2 | #define MUTEX_H 3 | 4 | #include 5 | 6 | #ifdef __linux__ 7 | 8 | #include 9 | 10 | #define MUTEX_INITIALIZER 0 11 | typedef atomic_int mutex; 12 | 13 | #else 14 | 15 | #include 16 | 17 | #define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER; 18 | typedef pthread_mutex_t mutex; 19 | 20 | #endif 21 | 22 | bool mutex_init(mutex *m); 23 | bool mutex_trylock(mutex *m); 24 | void mutex_lock(mutex *m); 25 | void mutex_unlock(mutex *m); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /purge.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "errno.h" 4 | #include "purge.h" 5 | #include "util.h" 6 | 7 | long int purge_ratio = -1; 8 | 9 | COLD void purge_init(void) { 10 | char *ratio = secure_getenv("MALLOC_PURGE_RATIO"); 11 | if (ratio) { 12 | purge_ratio = strtol(ratio, NULL, 10); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /purge.h: -------------------------------------------------------------------------------- 1 | #ifndef PURGE_H 2 | #define PURGE_H 3 | 4 | long int purge_ratio; 5 | void purge_init(void); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /rb.h: -------------------------------------------------------------------------------- 1 | /*- 2 | ******************************************************************************* 3 | * 4 | * cpp macro implementation of left-leaning 2-3 red-black trees. Parent 5 | * pointers are not used, and color bits are stored in the least significant 6 | * bit of right-child pointers (if RB_COMPACT is defined), thus making node 7 | * linkage as compact as is possible for red-black trees. 8 | * 9 | * Usage: 10 | * 11 | * #include 12 | * #include 13 | * #define NDEBUG // (Optional, see assert(3).) 14 | * #include 15 | * #define RB_COMPACT // (Optional, embed color bits in right-child pointers.) 16 | * #include 17 | * ... 18 | * 19 | ******************************************************************************* 20 | */ 21 | 22 | #ifndef RB_H_ 23 | #define RB_H_ 24 | 25 | #ifdef RB_COMPACT 26 | /* Node structure. */ 27 | #define rb_node(a_type) \ 28 | struct { \ 29 | a_type *rbn_left; \ 30 | a_type *rbn_right_red; \ 31 | } 32 | #else 33 | #define rb_node(a_type) \ 34 | struct { \ 35 | a_type *rbn_left; \ 36 | a_type *rbn_right; \ 37 | bool rbn_red; \ 38 | } 39 | #endif 40 | 41 | /* Root structure. */ 42 | #define rb_tree(a_type) \ 43 | struct { \ 44 | a_type *rbt_root; \ 45 | a_type rbt_nil; \ 46 | } 47 | 48 | /* Left accessors. */ 49 | #define rbtn_left_get(a_type, a_field, a_node) \ 50 | ((a_node)->a_field.rbn_left) 51 | #define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ 52 | (a_node)->a_field.rbn_left = a_left; \ 53 | } while (0) 54 | 55 | #ifdef RB_COMPACT 56 | /* Right accessors. */ 57 | #define rbtn_right_get(a_type, a_field, a_node) \ 58 | ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \ 59 | & ((ssize_t)-2))) 60 | #define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ 61 | (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \ 62 | | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \ 63 | } while (0) 64 | 65 | /* Color accessors. */ 66 | #define rbtn_red_get(a_type, a_field, a_node) \ 67 | ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \ 68 | & ((size_t)1))) 69 | #define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ 70 | (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \ 71 | (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \ 72 | | ((ssize_t)a_red)); \ 73 | } while (0) 74 | #define rbtn_red_set(a_type, a_field, a_node) do { \ 75 | (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \ 76 | (a_node)->a_field.rbn_right_red) | ((size_t)1)); \ 77 | } while (0) 78 | #define rbtn_black_set(a_type, a_field, a_node) do { \ 79 | (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ 80 | (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ 81 | } while (0) 82 | #else 83 | /* Right accessors. */ 84 | #define rbtn_right_get(a_type, a_field, a_node) \ 85 | ((a_node)->a_field.rbn_right) 86 | #define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ 87 | (a_node)->a_field.rbn_right = a_right; \ 88 | } while (0) 89 | 90 | /* Color accessors. */ 91 | #define rbtn_red_get(a_type, a_field, a_node) \ 92 | ((a_node)->a_field.rbn_red) 93 | #define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ 94 | (a_node)->a_field.rbn_red = (a_red); \ 95 | } while (0) 96 | #define rbtn_red_set(a_type, a_field, a_node) do { \ 97 | (a_node)->a_field.rbn_red = true; \ 98 | } while (0) 99 | #define rbtn_black_set(a_type, a_field, a_node) do { \ 100 | (a_node)->a_field.rbn_red = false; \ 101 | } while (0) 102 | #endif 103 | 104 | /* Node initializer. */ 105 | #define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ 106 | rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ 107 | rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ 108 | rbtn_red_set(a_type, a_field, (a_node)); \ 109 | } while (0) 110 | 111 | /* Tree initializer. */ 112 | #define rb_new(a_type, a_field, a_rbt) do { \ 113 | (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ 114 | rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ 115 | rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ 116 | } while (0) 117 | 118 | /* Internal utility macros. */ 119 | #define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ 120 | (r_node) = (a_root); \ 121 | if ((r_node) != &(a_rbt)->rbt_nil) { \ 122 | for (; \ 123 | rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ 124 | (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ 125 | } \ 126 | } \ 127 | } while (0) 128 | 129 | #define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ 130 | (r_node) = (a_root); \ 131 | if ((r_node) != &(a_rbt)->rbt_nil) { \ 132 | for (; rbtn_right_get(a_type, a_field, (r_node)) != \ 133 | &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ 134 | (r_node))) { \ 135 | } \ 136 | } \ 137 | } while (0) 138 | 139 | #define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ 140 | (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \ 141 | rbtn_right_set(a_type, a_field, (a_node), \ 142 | rbtn_left_get(a_type, a_field, (r_node))); \ 143 | rbtn_left_set(a_type, a_field, (r_node), (a_node)); \ 144 | } while (0) 145 | 146 | #define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ 147 | (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \ 148 | rbtn_left_set(a_type, a_field, (a_node), \ 149 | rbtn_right_get(a_type, a_field, (r_node))); \ 150 | rbtn_right_set(a_type, a_field, (r_node), (a_node)); \ 151 | } while (0) 152 | 153 | /* 154 | * The rb_proto() macro generates function prototypes that correspond to the 155 | * functions generated by an equivalently parameterized call to rb_gen(). 156 | */ 157 | 158 | #define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ 159 | a_attr void \ 160 | a_prefix##new(a_rbt_type *rbtree); \ 161 | a_attr bool \ 162 | a_prefix##empty(a_rbt_type *rbtree); \ 163 | a_attr a_type * \ 164 | a_prefix##first(a_rbt_type *rbtree); \ 165 | a_attr a_type * \ 166 | a_prefix##last(a_rbt_type *rbtree); \ 167 | a_attr a_type * \ 168 | a_prefix##next(a_rbt_type *rbtree, a_type *node); \ 169 | a_attr a_type * \ 170 | a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ 171 | a_attr a_type * \ 172 | a_prefix##search(a_rbt_type *rbtree, a_type *key); \ 173 | a_attr a_type * \ 174 | a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ 175 | a_attr a_type * \ 176 | a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ 177 | a_attr void \ 178 | a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ 179 | a_attr void \ 180 | a_prefix##remove(a_rbt_type *rbtree, a_type *node); \ 181 | a_attr a_type * \ 182 | a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ 183 | a_rbt_type *, a_type *, void *), void *arg); \ 184 | a_attr a_type * \ 185 | a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ 186 | a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); 187 | 188 | /* 189 | * The rb_gen() macro generates a type-specific red-black tree implementation, 190 | * based on the above cpp macros. 191 | * 192 | * Arguments: 193 | * 194 | * a_attr : Function attribute for generated functions (ex: static). 195 | * a_prefix : Prefix for generated functions (ex: ex_). 196 | * a_rb_type : Type for red-black tree data structure (ex: ex_t). 197 | * a_type : Type for red-black tree node data structure (ex: ex_node_t). 198 | * a_field : Name of red-black tree node linkage (ex: ex_link). 199 | * a_cmp : Node comparison function name, with the following prototype: 200 | * int (a_cmp *)(a_type *a_node, a_type *a_other); 201 | * ^^^^^^ 202 | * or a_key 203 | * Interpretation of comparison function return values: 204 | * -1 : a_node < a_other 205 | * 0 : a_node == a_other 206 | * 1 : a_node > a_other 207 | * In all cases, the a_node or a_key macro argument is the first 208 | * argument to the comparison function, which makes it possible 209 | * to write comparison functions that treat the first argument 210 | * specially. 211 | * 212 | * Assuming the following setup: 213 | * 214 | * typedef struct ex_node_s ex_node_t; 215 | * struct ex_node_s { 216 | * rb_node(ex_node_t) ex_link; 217 | * }; 218 | * typedef rb_tree(ex_node_t) ex_t; 219 | * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp) 220 | * 221 | * The following API is generated: 222 | * 223 | * static void 224 | * ex_new(ex_t *tree); 225 | * Description: Initialize a red-black tree structure. 226 | * Args: 227 | * tree: Pointer to an uninitialized red-black tree object. 228 | * 229 | * static bool 230 | * ex_empty(ex_t *tree); 231 | * Description: Determine whether tree is empty. 232 | * Args: 233 | * tree: Pointer to an initialized red-black tree object. 234 | * Ret: True if tree is empty, false otherwise. 235 | * 236 | * static ex_node_t * 237 | * ex_first(ex_t *tree); 238 | * static ex_node_t * 239 | * ex_last(ex_t *tree); 240 | * Description: Get the first/last node in tree. 241 | * Args: 242 | * tree: Pointer to an initialized red-black tree object. 243 | * Ret: First/last node in tree, or NULL if tree is empty. 244 | * 245 | * static ex_node_t * 246 | * ex_next(ex_t *tree, ex_node_t *node); 247 | * static ex_node_t * 248 | * ex_prev(ex_t *tree, ex_node_t *node); 249 | * Description: Get node's successor/predecessor. 250 | * Args: 251 | * tree: Pointer to an initialized red-black tree object. 252 | * node: A node in tree. 253 | * Ret: node's successor/predecessor in tree, or NULL if node is 254 | * last/first. 255 | * 256 | * static ex_node_t * 257 | * ex_search(ex_t *tree, ex_node_t *key); 258 | * Description: Search for node that matches key. 259 | * Args: 260 | * tree: Pointer to an initialized red-black tree object. 261 | * key : Search key. 262 | * Ret: Node in tree that matches key, or NULL if no match. 263 | * 264 | * static ex_node_t * 265 | * ex_nsearch(ex_t *tree, ex_node_t *key); 266 | * static ex_node_t * 267 | * ex_psearch(ex_t *tree, ex_node_t *key); 268 | * Description: Search for node that matches key. If no match is found, 269 | * return what would be key's successor/predecessor, were 270 | * key in tree. 271 | * Args: 272 | * tree: Pointer to an initialized red-black tree object. 273 | * key : Search key. 274 | * Ret: Node in tree that matches key, or if no match, hypothetical node's 275 | * successor/predecessor (NULL if no successor/predecessor). 276 | * 277 | * static void 278 | * ex_insert(ex_t *tree, ex_node_t *node); 279 | * Description: Insert node into tree. 280 | * Args: 281 | * tree: Pointer to an initialized red-black tree object. 282 | * node: Node to be inserted into tree. 283 | * 284 | * static void 285 | * ex_remove(ex_t *tree, ex_node_t *node); 286 | * Description: Remove node from tree. 287 | * Args: 288 | * tree: Pointer to an initialized red-black tree object. 289 | * node: Node in tree to be removed. 290 | * 291 | * static ex_node_t * 292 | * ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, 293 | * ex_node_t *, void *), void *arg); 294 | * static ex_node_t * 295 | * ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *, 296 | * ex_node_t *, void *), void *arg); 297 | * Description: Iterate forward/backward over tree, starting at node. If 298 | * tree is modified, iteration must be immediately 299 | * terminated by the callback function that causes the 300 | * modification. 301 | * Args: 302 | * tree : Pointer to an initialized red-black tree object. 303 | * start: Node at which to start iteration, or NULL to start at 304 | * first/last node. 305 | * cb : Callback function, which is called for each node during 306 | * iteration. Under normal circumstances the callback function 307 | * should return NULL, which causes iteration to continue. If a 308 | * callback function returns non-NULL, iteration is immediately 309 | * terminated and the non-NULL return value is returned by the 310 | * iterator. This is useful for re-starting iteration after 311 | * modifying tree. 312 | * arg : Opaque pointer passed to cb(). 313 | * Ret: NULL if iteration completed, or the non-NULL callback return value 314 | * that caused termination of the iteration. 315 | */ 316 | #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ 317 | a_attr void \ 318 | a_prefix##new(a_rbt_type *rbtree) { \ 319 | rb_new(a_type, a_field, rbtree); \ 320 | } \ 321 | a_attr bool \ 322 | a_prefix##empty(a_rbt_type *rbtree) { \ 323 | return (rbtree->rbt_root == &rbtree->rbt_nil); \ 324 | } \ 325 | a_attr a_type * \ 326 | a_prefix##first(a_rbt_type *rbtree) { \ 327 | a_type *ret; \ 328 | rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ 329 | if (ret == &rbtree->rbt_nil) { \ 330 | ret = NULL; \ 331 | } \ 332 | return (ret); \ 333 | } \ 334 | a_attr a_type * \ 335 | a_prefix##last(a_rbt_type *rbtree) { \ 336 | a_type *ret; \ 337 | rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ 338 | if (ret == &rbtree->rbt_nil) { \ 339 | ret = NULL; \ 340 | } \ 341 | return (ret); \ 342 | } \ 343 | a_attr a_type * \ 344 | a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ 345 | a_type *ret; \ 346 | if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ 347 | rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ 348 | a_field, node), ret); \ 349 | } else { \ 350 | a_type *tnode = rbtree->rbt_root; \ 351 | assert(tnode != &rbtree->rbt_nil); \ 352 | ret = &rbtree->rbt_nil; \ 353 | while (true) { \ 354 | int cmp = (a_cmp)(node, tnode); \ 355 | if (cmp < 0) { \ 356 | ret = tnode; \ 357 | tnode = rbtn_left_get(a_type, a_field, tnode); \ 358 | } else if (cmp > 0) { \ 359 | tnode = rbtn_right_get(a_type, a_field, tnode); \ 360 | } else { \ 361 | break; \ 362 | } \ 363 | assert(tnode != &rbtree->rbt_nil); \ 364 | } \ 365 | } \ 366 | if (ret == &rbtree->rbt_nil) { \ 367 | ret = (NULL); \ 368 | } \ 369 | return (ret); \ 370 | } \ 371 | a_attr a_type * \ 372 | a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ 373 | a_type *ret; \ 374 | if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ 375 | rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ 376 | a_field, node), ret); \ 377 | } else { \ 378 | a_type *tnode = rbtree->rbt_root; \ 379 | assert(tnode != &rbtree->rbt_nil); \ 380 | ret = &rbtree->rbt_nil; \ 381 | while (true) { \ 382 | int cmp = (a_cmp)(node, tnode); \ 383 | if (cmp < 0) { \ 384 | tnode = rbtn_left_get(a_type, a_field, tnode); \ 385 | } else if (cmp > 0) { \ 386 | ret = tnode; \ 387 | tnode = rbtn_right_get(a_type, a_field, tnode); \ 388 | } else { \ 389 | break; \ 390 | } \ 391 | assert(tnode != &rbtree->rbt_nil); \ 392 | } \ 393 | } \ 394 | if (ret == &rbtree->rbt_nil) { \ 395 | ret = (NULL); \ 396 | } \ 397 | return (ret); \ 398 | } \ 399 | a_attr a_type * \ 400 | a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ 401 | a_type *ret; \ 402 | int cmp; \ 403 | ret = rbtree->rbt_root; \ 404 | while (ret != &rbtree->rbt_nil \ 405 | && (cmp = (a_cmp)(key, ret)) != 0) { \ 406 | if (cmp < 0) { \ 407 | ret = rbtn_left_get(a_type, a_field, ret); \ 408 | } else { \ 409 | ret = rbtn_right_get(a_type, a_field, ret); \ 410 | } \ 411 | } \ 412 | if (ret == &rbtree->rbt_nil) { \ 413 | ret = (NULL); \ 414 | } \ 415 | return (ret); \ 416 | } \ 417 | a_attr a_type * \ 418 | a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ 419 | a_type *ret; \ 420 | a_type *tnode = rbtree->rbt_root; \ 421 | ret = &rbtree->rbt_nil; \ 422 | while (tnode != &rbtree->rbt_nil) { \ 423 | int cmp = (a_cmp)(key, tnode); \ 424 | if (cmp < 0) { \ 425 | ret = tnode; \ 426 | tnode = rbtn_left_get(a_type, a_field, tnode); \ 427 | } else if (cmp > 0) { \ 428 | tnode = rbtn_right_get(a_type, a_field, tnode); \ 429 | } else { \ 430 | ret = tnode; \ 431 | break; \ 432 | } \ 433 | } \ 434 | if (ret == &rbtree->rbt_nil) { \ 435 | ret = (NULL); \ 436 | } \ 437 | return (ret); \ 438 | } \ 439 | a_attr a_type * \ 440 | a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ 441 | a_type *ret; \ 442 | a_type *tnode = rbtree->rbt_root; \ 443 | ret = &rbtree->rbt_nil; \ 444 | while (tnode != &rbtree->rbt_nil) { \ 445 | int cmp = (a_cmp)(key, tnode); \ 446 | if (cmp < 0) { \ 447 | tnode = rbtn_left_get(a_type, a_field, tnode); \ 448 | } else if (cmp > 0) { \ 449 | ret = tnode; \ 450 | tnode = rbtn_right_get(a_type, a_field, tnode); \ 451 | } else { \ 452 | ret = tnode; \ 453 | break; \ 454 | } \ 455 | } \ 456 | if (ret == &rbtree->rbt_nil) { \ 457 | ret = (NULL); \ 458 | } \ 459 | return (ret); \ 460 | } \ 461 | a_attr void \ 462 | a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ 463 | struct { \ 464 | a_type *node; \ 465 | int cmp; \ 466 | } path[sizeof(void *) << 4], *pathp; \ 467 | rbt_node_new(a_type, a_field, rbtree, node); \ 468 | /* Wind. */ \ 469 | path->node = rbtree->rbt_root; \ 470 | for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ 471 | int cmp = pathp->cmp = a_cmp(node, pathp->node); \ 472 | assert(cmp != 0); \ 473 | if (cmp < 0) { \ 474 | pathp[1].node = rbtn_left_get(a_type, a_field, \ 475 | pathp->node); \ 476 | } else { \ 477 | pathp[1].node = rbtn_right_get(a_type, a_field, \ 478 | pathp->node); \ 479 | } \ 480 | } \ 481 | pathp->node = node; \ 482 | /* Unwind. */ \ 483 | for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ 484 | a_type *cnode = pathp->node; \ 485 | if (pathp->cmp < 0) { \ 486 | a_type *left = pathp[1].node; \ 487 | rbtn_left_set(a_type, a_field, cnode, left); \ 488 | if (rbtn_red_get(a_type, a_field, left)) { \ 489 | a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ 490 | if (rbtn_red_get(a_type, a_field, leftleft)) { \ 491 | /* Fix up 4-node. */ \ 492 | a_type *tnode; \ 493 | rbtn_black_set(a_type, a_field, leftleft); \ 494 | rbtn_rotate_right(a_type, a_field, cnode, tnode); \ 495 | cnode = tnode; \ 496 | } \ 497 | } else { \ 498 | return; \ 499 | } \ 500 | } else { \ 501 | a_type *right = pathp[1].node; \ 502 | rbtn_right_set(a_type, a_field, cnode, right); \ 503 | if (rbtn_red_get(a_type, a_field, right)) { \ 504 | a_type *left = rbtn_left_get(a_type, a_field, cnode); \ 505 | if (rbtn_red_get(a_type, a_field, left)) { \ 506 | /* Split 4-node. */ \ 507 | rbtn_black_set(a_type, a_field, left); \ 508 | rbtn_black_set(a_type, a_field, right); \ 509 | rbtn_red_set(a_type, a_field, cnode); \ 510 | } else { \ 511 | /* Lean left. */ \ 512 | a_type *tnode; \ 513 | bool tred = rbtn_red_get(a_type, a_field, cnode); \ 514 | rbtn_rotate_left(a_type, a_field, cnode, tnode); \ 515 | rbtn_color_set(a_type, a_field, tnode, tred); \ 516 | rbtn_red_set(a_type, a_field, cnode); \ 517 | cnode = tnode; \ 518 | } \ 519 | } else { \ 520 | return; \ 521 | } \ 522 | } \ 523 | pathp->node = cnode; \ 524 | } \ 525 | /* Set root, and make it black. */ \ 526 | rbtree->rbt_root = path->node; \ 527 | rbtn_black_set(a_type, a_field, rbtree->rbt_root); \ 528 | } \ 529 | a_attr void \ 530 | a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ 531 | struct { \ 532 | a_type *node; \ 533 | int cmp; \ 534 | } *pathp, *nodep, path[sizeof(void *) << 4]; \ 535 | /* Wind. */ \ 536 | nodep = NULL; /* Silence compiler warning. */ \ 537 | path->node = rbtree->rbt_root; \ 538 | for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ 539 | int cmp = pathp->cmp = a_cmp(node, pathp->node); \ 540 | if (cmp < 0) { \ 541 | pathp[1].node = rbtn_left_get(a_type, a_field, \ 542 | pathp->node); \ 543 | } else { \ 544 | pathp[1].node = rbtn_right_get(a_type, a_field, \ 545 | pathp->node); \ 546 | if (cmp == 0) { \ 547 | /* Find node's successor, in preparation for swap. */ \ 548 | pathp->cmp = 1; \ 549 | nodep = pathp; \ 550 | for (pathp++; pathp->node != &rbtree->rbt_nil; \ 551 | pathp++) { \ 552 | pathp->cmp = -1; \ 553 | pathp[1].node = rbtn_left_get(a_type, a_field, \ 554 | pathp->node); \ 555 | } \ 556 | break; \ 557 | } \ 558 | } \ 559 | } \ 560 | assert(nodep->node == node); \ 561 | pathp--; \ 562 | if (pathp->node != node) { \ 563 | /* Swap node with its successor. */ \ 564 | bool tred = rbtn_red_get(a_type, a_field, pathp->node); \ 565 | rbtn_color_set(a_type, a_field, pathp->node, \ 566 | rbtn_red_get(a_type, a_field, node)); \ 567 | rbtn_left_set(a_type, a_field, pathp->node, \ 568 | rbtn_left_get(a_type, a_field, node)); \ 569 | /* If node's successor is its right child, the following code */\ 570 | /* will do the wrong thing for the right child pointer. */\ 571 | /* However, it doesn't matter, because the pointer will be */\ 572 | /* properly set when the successor is pruned. */\ 573 | rbtn_right_set(a_type, a_field, pathp->node, \ 574 | rbtn_right_get(a_type, a_field, node)); \ 575 | rbtn_color_set(a_type, a_field, node, tred); \ 576 | /* The pruned leaf node's child pointers are never accessed */\ 577 | /* again, so don't bother setting them to nil. */\ 578 | nodep->node = pathp->node; \ 579 | pathp->node = node; \ 580 | if (nodep == path) { \ 581 | rbtree->rbt_root = nodep->node; \ 582 | } else { \ 583 | if (nodep[-1].cmp < 0) { \ 584 | rbtn_left_set(a_type, a_field, nodep[-1].node, \ 585 | nodep->node); \ 586 | } else { \ 587 | rbtn_right_set(a_type, a_field, nodep[-1].node, \ 588 | nodep->node); \ 589 | } \ 590 | } \ 591 | } else { \ 592 | a_type *left = rbtn_left_get(a_type, a_field, node); \ 593 | if (left != &rbtree->rbt_nil) { \ 594 | /* node has no successor, but it has a left child. */\ 595 | /* Splice node out, without losing the left child. */\ 596 | assert(!rbtn_red_get(a_type, a_field, node)); \ 597 | assert(rbtn_red_get(a_type, a_field, left)); \ 598 | rbtn_black_set(a_type, a_field, left); \ 599 | if (pathp == path) { \ 600 | rbtree->rbt_root = left; \ 601 | } else { \ 602 | if (pathp[-1].cmp < 0) { \ 603 | rbtn_left_set(a_type, a_field, pathp[-1].node, \ 604 | left); \ 605 | } else { \ 606 | rbtn_right_set(a_type, a_field, pathp[-1].node, \ 607 | left); \ 608 | } \ 609 | } \ 610 | return; \ 611 | } else if (pathp == path) { \ 612 | /* The tree only contained one node. */ \ 613 | rbtree->rbt_root = &rbtree->rbt_nil; \ 614 | return; \ 615 | } \ 616 | } \ 617 | if (rbtn_red_get(a_type, a_field, pathp->node)) { \ 618 | /* Prune red node, which requires no fixup. */ \ 619 | assert(pathp[-1].cmp < 0); \ 620 | rbtn_left_set(a_type, a_field, pathp[-1].node, \ 621 | &rbtree->rbt_nil); \ 622 | return; \ 623 | } \ 624 | /* The node to be pruned is black, so unwind until balance is */\ 625 | /* restored. */\ 626 | pathp->node = &rbtree->rbt_nil; \ 627 | for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ 628 | assert(pathp->cmp != 0); \ 629 | if (pathp->cmp < 0) { \ 630 | rbtn_left_set(a_type, a_field, pathp->node, \ 631 | pathp[1].node); \ 632 | assert(!rbtn_red_get(a_type, a_field, pathp[1].node)); \ 633 | if (rbtn_red_get(a_type, a_field, pathp->node)) { \ 634 | a_type *right = rbtn_right_get(a_type, a_field, \ 635 | pathp->node); \ 636 | a_type *rightleft = rbtn_left_get(a_type, a_field, \ 637 | right); \ 638 | a_type *tnode; \ 639 | if (rbtn_red_get(a_type, a_field, rightleft)) { \ 640 | /* In the following diagrams, ||, //, and \\ */\ 641 | /* indicate the path to the removed node. */\ 642 | /* */\ 643 | /* || */\ 644 | /* pathp(r) */\ 645 | /* // \ */\ 646 | /* (b) (b) */\ 647 | /* / */\ 648 | /* (r) */\ 649 | /* */\ 650 | rbtn_black_set(a_type, a_field, pathp->node); \ 651 | rbtn_rotate_right(a_type, a_field, right, tnode); \ 652 | rbtn_right_set(a_type, a_field, pathp->node, tnode);\ 653 | rbtn_rotate_left(a_type, a_field, pathp->node, \ 654 | tnode); \ 655 | } else { \ 656 | /* || */\ 657 | /* pathp(r) */\ 658 | /* // \ */\ 659 | /* (b) (b) */\ 660 | /* / */\ 661 | /* (b) */\ 662 | /* */\ 663 | rbtn_rotate_left(a_type, a_field, pathp->node, \ 664 | tnode); \ 665 | } \ 666 | /* Balance restored, but rotation modified subtree */\ 667 | /* root. */\ 668 | assert((uintptr_t)pathp > (uintptr_t)path); \ 669 | if (pathp[-1].cmp < 0) { \ 670 | rbtn_left_set(a_type, a_field, pathp[-1].node, \ 671 | tnode); \ 672 | } else { \ 673 | rbtn_right_set(a_type, a_field, pathp[-1].node, \ 674 | tnode); \ 675 | } \ 676 | return; \ 677 | } else { \ 678 | a_type *right = rbtn_right_get(a_type, a_field, \ 679 | pathp->node); \ 680 | a_type *rightleft = rbtn_left_get(a_type, a_field, \ 681 | right); \ 682 | if (rbtn_red_get(a_type, a_field, rightleft)) { \ 683 | /* || */\ 684 | /* pathp(b) */\ 685 | /* // \ */\ 686 | /* (b) (b) */\ 687 | /* / */\ 688 | /* (r) */\ 689 | a_type *tnode; \ 690 | rbtn_black_set(a_type, a_field, rightleft); \ 691 | rbtn_rotate_right(a_type, a_field, right, tnode); \ 692 | rbtn_right_set(a_type, a_field, pathp->node, tnode);\ 693 | rbtn_rotate_left(a_type, a_field, pathp->node, \ 694 | tnode); \ 695 | /* Balance restored, but rotation modified */\ 696 | /* subtree root, which may actually be the tree */\ 697 | /* root. */\ 698 | if (pathp == path) { \ 699 | /* Set root. */ \ 700 | rbtree->rbt_root = tnode; \ 701 | } else { \ 702 | if (pathp[-1].cmp < 0) { \ 703 | rbtn_left_set(a_type, a_field, \ 704 | pathp[-1].node, tnode); \ 705 | } else { \ 706 | rbtn_right_set(a_type, a_field, \ 707 | pathp[-1].node, tnode); \ 708 | } \ 709 | } \ 710 | return; \ 711 | } else { \ 712 | /* || */\ 713 | /* pathp(b) */\ 714 | /* // \ */\ 715 | /* (b) (b) */\ 716 | /* / */\ 717 | /* (b) */\ 718 | a_type *tnode; \ 719 | rbtn_red_set(a_type, a_field, pathp->node); \ 720 | rbtn_rotate_left(a_type, a_field, pathp->node, \ 721 | tnode); \ 722 | pathp->node = tnode; \ 723 | } \ 724 | } \ 725 | } else { \ 726 | a_type *left; \ 727 | rbtn_right_set(a_type, a_field, pathp->node, \ 728 | pathp[1].node); \ 729 | left = rbtn_left_get(a_type, a_field, pathp->node); \ 730 | if (rbtn_red_get(a_type, a_field, left)) { \ 731 | a_type *tnode; \ 732 | a_type *leftright = rbtn_right_get(a_type, a_field, \ 733 | left); \ 734 | a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ 735 | leftright); \ 736 | if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ 737 | /* || */\ 738 | /* pathp(b) */\ 739 | /* / \\ */\ 740 | /* (r) (b) */\ 741 | /* \ */\ 742 | /* (b) */\ 743 | /* / */\ 744 | /* (r) */\ 745 | a_type *unode; \ 746 | rbtn_black_set(a_type, a_field, leftrightleft); \ 747 | rbtn_rotate_right(a_type, a_field, pathp->node, \ 748 | unode); \ 749 | rbtn_rotate_right(a_type, a_field, pathp->node, \ 750 | tnode); \ 751 | rbtn_right_set(a_type, a_field, unode, tnode); \ 752 | rbtn_rotate_left(a_type, a_field, unode, tnode); \ 753 | } else { \ 754 | /* || */\ 755 | /* pathp(b) */\ 756 | /* / \\ */\ 757 | /* (r) (b) */\ 758 | /* \ */\ 759 | /* (b) */\ 760 | /* / */\ 761 | /* (b) */\ 762 | assert(leftright != &rbtree->rbt_nil); \ 763 | rbtn_red_set(a_type, a_field, leftright); \ 764 | rbtn_rotate_right(a_type, a_field, pathp->node, \ 765 | tnode); \ 766 | rbtn_black_set(a_type, a_field, tnode); \ 767 | } \ 768 | /* Balance restored, but rotation modified subtree */\ 769 | /* root, which may actually be the tree root. */\ 770 | if (pathp == path) { \ 771 | /* Set root. */ \ 772 | rbtree->rbt_root = tnode; \ 773 | } else { \ 774 | if (pathp[-1].cmp < 0) { \ 775 | rbtn_left_set(a_type, a_field, pathp[-1].node, \ 776 | tnode); \ 777 | } else { \ 778 | rbtn_right_set(a_type, a_field, pathp[-1].node, \ 779 | tnode); \ 780 | } \ 781 | } \ 782 | return; \ 783 | } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ 784 | a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ 785 | if (rbtn_red_get(a_type, a_field, leftleft)) { \ 786 | /* || */\ 787 | /* pathp(r) */\ 788 | /* / \\ */\ 789 | /* (b) (b) */\ 790 | /* / */\ 791 | /* (r) */\ 792 | a_type *tnode; \ 793 | rbtn_black_set(a_type, a_field, pathp->node); \ 794 | rbtn_red_set(a_type, a_field, left); \ 795 | rbtn_black_set(a_type, a_field, leftleft); \ 796 | rbtn_rotate_right(a_type, a_field, pathp->node, \ 797 | tnode); \ 798 | /* Balance restored, but rotation modified */\ 799 | /* subtree root. */\ 800 | assert((uintptr_t)pathp > (uintptr_t)path); \ 801 | if (pathp[-1].cmp < 0) { \ 802 | rbtn_left_set(a_type, a_field, pathp[-1].node, \ 803 | tnode); \ 804 | } else { \ 805 | rbtn_right_set(a_type, a_field, pathp[-1].node, \ 806 | tnode); \ 807 | } \ 808 | return; \ 809 | } else { \ 810 | /* || */\ 811 | /* pathp(r) */\ 812 | /* / \\ */\ 813 | /* (b) (b) */\ 814 | /* / */\ 815 | /* (b) */\ 816 | rbtn_red_set(a_type, a_field, left); \ 817 | rbtn_black_set(a_type, a_field, pathp->node); \ 818 | /* Balance restored. */ \ 819 | return; \ 820 | } \ 821 | } else { \ 822 | a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ 823 | if (rbtn_red_get(a_type, a_field, leftleft)) { \ 824 | /* || */\ 825 | /* pathp(b) */\ 826 | /* / \\ */\ 827 | /* (b) (b) */\ 828 | /* / */\ 829 | /* (r) */\ 830 | a_type *tnode; \ 831 | rbtn_black_set(a_type, a_field, leftleft); \ 832 | rbtn_rotate_right(a_type, a_field, pathp->node, \ 833 | tnode); \ 834 | /* Balance restored, but rotation modified */\ 835 | /* subtree root, which may actually be the tree */\ 836 | /* root. */\ 837 | if (pathp == path) { \ 838 | /* Set root. */ \ 839 | rbtree->rbt_root = tnode; \ 840 | } else { \ 841 | if (pathp[-1].cmp < 0) { \ 842 | rbtn_left_set(a_type, a_field, \ 843 | pathp[-1].node, tnode); \ 844 | } else { \ 845 | rbtn_right_set(a_type, a_field, \ 846 | pathp[-1].node, tnode); \ 847 | } \ 848 | } \ 849 | return; \ 850 | } else { \ 851 | /* || */\ 852 | /* pathp(b) */\ 853 | /* / \\ */\ 854 | /* (b) (b) */\ 855 | /* / */\ 856 | /* (b) */\ 857 | rbtn_red_set(a_type, a_field, left); \ 858 | } \ 859 | } \ 860 | } \ 861 | } \ 862 | /* Set root. */ \ 863 | rbtree->rbt_root = path->node; \ 864 | assert(!rbtn_red_get(a_type, a_field, rbtree->rbt_root)); \ 865 | } \ 866 | a_attr a_type * \ 867 | a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ 868 | a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ 869 | if (node == &rbtree->rbt_nil) { \ 870 | return (&rbtree->rbt_nil); \ 871 | } else { \ 872 | a_type *ret; \ 873 | if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ 874 | a_field, node), cb, arg)) != &rbtree->rbt_nil \ 875 | || (ret = cb(rbtree, node, arg)) != NULL) { \ 876 | return (ret); \ 877 | } \ 878 | return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ 879 | a_field, node), cb, arg)); \ 880 | } \ 881 | } \ 882 | a_attr a_type * \ 883 | a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ 884 | a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ 885 | int cmp = a_cmp(start, node); \ 886 | if (cmp < 0) { \ 887 | a_type *ret; \ 888 | if ((ret = a_prefix##iter_start(rbtree, start, \ 889 | rbtn_left_get(a_type, a_field, node), cb, arg)) != \ 890 | &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ 891 | return (ret); \ 892 | } \ 893 | return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ 894 | a_field, node), cb, arg)); \ 895 | } else if (cmp > 0) { \ 896 | return (a_prefix##iter_start(rbtree, start, \ 897 | rbtn_right_get(a_type, a_field, node), cb, arg)); \ 898 | } else { \ 899 | a_type *ret; \ 900 | if ((ret = cb(rbtree, node, arg)) != NULL) { \ 901 | return (ret); \ 902 | } \ 903 | return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ 904 | a_field, node), cb, arg)); \ 905 | } \ 906 | } \ 907 | a_attr a_type * \ 908 | a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ 909 | a_rbt_type *, a_type *, void *), void *arg) { \ 910 | a_type *ret; \ 911 | if (start != NULL) { \ 912 | ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root, \ 913 | cb, arg); \ 914 | } else { \ 915 | ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ 916 | } \ 917 | if (ret == &rbtree->rbt_nil) { \ 918 | ret = NULL; \ 919 | } \ 920 | return (ret); \ 921 | } \ 922 | a_attr a_type * \ 923 | a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ 924 | a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ 925 | if (node == &rbtree->rbt_nil) { \ 926 | return (&rbtree->rbt_nil); \ 927 | } else { \ 928 | a_type *ret; \ 929 | if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ 930 | rbtn_right_get(a_type, a_field, node), cb, arg)) != \ 931 | &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ 932 | return (ret); \ 933 | } \ 934 | return (a_prefix##reverse_iter_recurse(rbtree, \ 935 | rbtn_left_get(a_type, a_field, node), cb, arg)); \ 936 | } \ 937 | } \ 938 | a_attr a_type * \ 939 | a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ 940 | a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \ 941 | void *arg) { \ 942 | int cmp = a_cmp(start, node); \ 943 | if (cmp > 0) { \ 944 | a_type *ret; \ 945 | if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ 946 | rbtn_right_get(a_type, a_field, node), cb, arg)) != \ 947 | &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ 948 | return (ret); \ 949 | } \ 950 | return (a_prefix##reverse_iter_recurse(rbtree, \ 951 | rbtn_left_get(a_type, a_field, node), cb, arg)); \ 952 | } else if (cmp < 0) { \ 953 | return (a_prefix##reverse_iter_start(rbtree, start, \ 954 | rbtn_left_get(a_type, a_field, node), cb, arg)); \ 955 | } else { \ 956 | a_type *ret; \ 957 | if ((ret = cb(rbtree, node, arg)) != NULL) { \ 958 | return (ret); \ 959 | } \ 960 | return (a_prefix##reverse_iter_recurse(rbtree, \ 961 | rbtn_left_get(a_type, a_field, node), cb, arg)); \ 962 | } \ 963 | } \ 964 | a_attr a_type * \ 965 | a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ 966 | a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ 967 | a_type *ret; \ 968 | if (start != NULL) { \ 969 | ret = a_prefix##reverse_iter_start(rbtree, start, \ 970 | rbtree->rbt_root, cb, arg); \ 971 | } else { \ 972 | ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ 973 | cb, arg); \ 974 | } \ 975 | if (ret == &rbtree->rbt_nil) { \ 976 | ret = NULL; \ 977 | } \ 978 | return (ret); \ 979 | } 980 | 981 | #endif /* RB_H_ */ 982 | -------------------------------------------------------------------------------- /test_huge.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "chunk.h" 4 | #include "memory.h" 5 | 6 | int main(void) { 7 | // mmap(NULL, CHUNK_SIZE * 4, ...) 8 | void *p = malloc(CHUNK_SIZE * 4); 9 | if (!p) return 1; 10 | 11 | { 12 | // no change to the allocation 13 | void *q = realloc(p, CHUNK_SIZE * 4); 14 | if (q != p) return 1; 15 | 16 | // no change to the allocation 17 | q = realloc(p, CHUNK_SIZE * 4 - (CHUNK_SIZE / 2)); 18 | if (q != p) return 1; 19 | 20 | // in-place shrink, madvise purge 21 | q = realloc(p, CHUNK_SIZE * 2); 22 | if (q != p) return 1; 23 | 24 | // in-place shrink, madvise purge 25 | q = realloc(p, CHUNK_SIZE); 26 | if (q != p) return 1; 27 | 28 | // in-place expand, no syscall 29 | q = realloc(p, CHUNK_SIZE * 2); 30 | if (q != p) return 1; 31 | 32 | // in-place expand, no syscall 33 | q = realloc(p, CHUNK_SIZE * 4); 34 | if (q != p) return 1; 35 | } 36 | 37 | // extended/moved by mremap(..., CHUNK_SIZE * 8, MREMAP_MAYMOVE) 38 | // 39 | // if it is moved, the source is mapped back in (MREMAP_RETAIN landing would be nicer) 40 | p = realloc(p, CHUNK_SIZE * 8); 41 | if (!p) return 1; 42 | 43 | // mmap(NULL, CHUNK_SIZE * 16, ...) 44 | void *dest = malloc(CHUNK_SIZE * 16); 45 | if (!dest) return 1; 46 | 47 | // madvise purge 48 | free(dest); 49 | 50 | // moved via MREMAP_MAYMOVE|MREMAP_FIXED to dest 51 | // 52 | // the source is mapped back in (MREMAP_RETAIN landing would be nicer) 53 | p = realloc(p, CHUNK_SIZE * 16); 54 | if (p != dest) return 1; 55 | 56 | // madvise purge 57 | free(p); 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /test_large.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "chunk.h" 4 | 5 | int main(void) { 6 | void *p = malloc(4096 * 4); 7 | if (!p) return 1; 8 | 9 | { 10 | // in-place shrink 11 | void *q = realloc(p, 4096 * 2); 12 | if (q != p) return 1; 13 | 14 | // in-place shrink 15 | q = realloc(p, 4096); 16 | if (q != p) return 1; 17 | 18 | // in-place expand 19 | q = realloc(p, 4096 * 2); 20 | if (q != p) return 1; 21 | 22 | // in-place expand 23 | q = realloc(p, 4096 * 4); 24 | if (q != p) return 1; 25 | 26 | // in-place expand 27 | q = realloc(p, 4096 * 8); 28 | if (q != p) return 1; 29 | 30 | // in-place expand 31 | q = realloc(p, 4096 * 64); 32 | if (q != p) return 1; 33 | } 34 | 35 | free(p); 36 | return 0; 37 | } 38 | -------------------------------------------------------------------------------- /test_small.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define N 10000000 5 | 6 | void *do_work(void *ptr) { 7 | void **p = malloc(N * sizeof(void *)); 8 | 9 | for (size_t i = 0; i < N; i++) { 10 | p[i] = malloc(16); 11 | if (!p[i]) { 12 | exit(1); 13 | } 14 | } 15 | 16 | for (size_t i = 0; i < N; i++) { 17 | free(p[i]); 18 | } 19 | return ptr; 20 | } 21 | 22 | int main(void) { 23 | pthread_t thread; 24 | pthread_create(&thread, NULL, do_work, NULL); 25 | pthread_join(thread, NULL); 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /util.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H 2 | #define UTIL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #undef assert 10 | #ifdef NDEBUG 11 | #define assert(expr) ((void)0) 12 | #else 13 | #define assert(expr) (likely(expr) ? (void)0 : abort()) 14 | #endif 15 | 16 | #define likely(x) __builtin_expect(!!(x), 1) 17 | #define unlikely(x) __builtin_expect(!!(x), 0) 18 | #define UNUSED __attribute__((unused)) 19 | #define EXPORT __attribute__((visibility("default"))) 20 | #define COLD __attribute__((cold)) 21 | 22 | #ifndef __has_builtin 23 | #define __has_builtin(x) 0 24 | #endif 25 | 26 | static inline bool size_mul_overflow(size_t a, size_t b, size_t *result) { 27 | #if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 28 | #if INTPTR_MAX == INT32_MAX 29 | return __builtin_umul_overflow(a, b, result); 30 | #else 31 | return __builtin_umull_overflow(a, b, result); 32 | #endif 33 | #else 34 | *result = a * b; 35 | static const size_t mul_no_overflow = 1UL << (sizeof(size_t) * 4); 36 | return (a >= mul_no_overflow || b >= mul_no_overflow) && a && SIZE_MAX / a < b; 37 | #endif 38 | } 39 | 40 | static inline size_t size_log2(size_t x) { 41 | #if INTPTR_MAX == INT32_MAX 42 | return 31 - __builtin_clz(x); 43 | #else 44 | return 63 - __builtin_clzll(x); 45 | #endif 46 | } 47 | 48 | #endif 49 | --------------------------------------------------------------------------------