├── .gitignore ├── Makefile ├── .clang-format ├── README.md ├── test-malloc.c ├── LICENSE ├── list.h └── alloc.c /.gitignore: -------------------------------------------------------------------------------- 1 | alloc.o 2 | test-malloc 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CFLAGS= -O2 -Wall -fPIC -pg -g 2 | 3 | all: test-malloc 4 | 5 | alloc.o: alloc.c 6 | $(CC) $(CFLAGS) -c alloc.c -o $@ 7 | 8 | test-malloc: alloc.o test-malloc.c 9 | $(CC) -o $@ $^ 10 | 11 | clean: 12 | $(RM) $(OBJS) test-malloc 13 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Chromium 2 | Language: Cpp 3 | MaxEmptyLinesToKeep: 3 4 | IndentCaseLabels: false 5 | AllowShortIfStatementsOnASingleLine: false 6 | AllowShortCaseLabelsOnASingleLine: false 7 | AllowShortLoopsOnASingleLine: false 8 | DerivePointerAlignment: false 9 | PointerAlignment: Right 10 | SpaceAfterCStyleCast: true 11 | TabWidth: 4 12 | UseTab: Never 13 | IndentWidth: 4 14 | BreakBeforeBraces: Linux 15 | AccessModifierOffset: -4 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Buddy Memory Allocator 2 | 3 | This package implements a buddy memory allocator, which is an allocator that 4 | allocates memory within a fixed linear address range. It spans the address 5 | range with a binary tree that tracks free space. 6 | 7 | Both "malloc" and "free" are O(log N) time where N is the maximum possible 8 | number of allocations. All of the metadata about the memory chunk (e.g., 9 | its size and whether it is currently being used) is stored at the beginning 10 | of that block of memory. 11 | 12 | The "buddy" term comes from how the tree is used. When memory is allocated, 13 | nodes in the tree are split recursively until a node of the appropriate size 14 | is reached. Every split results in two child nodes, each of which is the 15 | buddy of the other. When a node is freed, the node and its buddy can be merged 16 | if the buddy is also free. This makes the memory available for larger 17 | allocations again. 18 | 19 | Reference: [Wikipedia](https://en.wikipedia.org/wiki/Buddy_memory_allocation) 20 | -------------------------------------------------------------------------------- /test-malloc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define MSG(foo) fprintf(stderr, "Line %d: %s\n", __LINE__, foo) 5 | 6 | int main(int argc, char *argv[]) 7 | { 8 | char *p, *q; 9 | double *d; 10 | 11 | MSG("-- Test malloc() for unusual situations"); 12 | 13 | MSG("Allocate small block of 17 bytes "); 14 | p = malloc(17); 15 | if (p == NULL) 16 | MSG("* ERROR: Can't allocate even 17 bytes."); 17 | MSG("Write on allocated block"); 18 | p[0] = p[16] = 17; 19 | 20 | MSG("Allocate big block of 4711 bytes"); 21 | q = malloc(4711); 22 | if (q == NULL) 23 | MSG("* ERROR: Could not allocate big block"); 24 | MSG("Write on allocated block"); 25 | q[4710] = 47; 26 | MSG("Free big block"); 27 | free(q); 28 | 29 | MSG("Free small block"); 30 | free(p); 31 | 32 | MSG("Free NULL"); 33 | free(NULL); 34 | 35 | MSG("Allocate zero"); 36 | if ((p = malloc(0)) != NULL) 37 | MSG("* ERROR: malloc(0) returned non NULL pointer!"); 38 | 39 | MSG("Free pointer from malloc(0)"); 40 | free(p); 41 | 42 | MSG("Test alignment for double"); 43 | if ((d = malloc(2 * sizeof(double))) == NULL) 44 | MSG("* ERROR: malloc(2 * sizeof(double)) returned NULL"); 45 | d[0] = d[1] = (double) 4711.4711; 46 | free(d); 47 | 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 National Cheng Kung University, Taiwan. 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 22 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /list.h: -------------------------------------------------------------------------------- 1 | #ifndef INTERNAL_LIST_H 2 | #define INTERNAL_LIST_H 3 | 4 | /* circular doubly-linked list */ 5 | typedef struct list_t { 6 | struct list_t *prev, *next; 7 | } list_t; 8 | 9 | /* 10 | * Initialize a list to empty. Because these are circular lists, an "empty" 11 | * list is an entry where both links point to itself. This makes insertion 12 | * and removal simpler because they do not need any branches. 13 | */ 14 | static void list_init(list_t *list) 15 | { 16 | list->prev = list; 17 | list->next = list; 18 | } 19 | 20 | /* 21 | * Append the provided entry to the end of the list. This assumes the entry 22 | * is not in a list already because it overwrites the linked list pointers. 23 | */ 24 | static void list_push(list_t *list, list_t *entry) 25 | { 26 | list_t *prev = list->prev; 27 | entry->prev = prev; 28 | entry->next = list; 29 | prev->next = entry; 30 | list->prev = entry; 31 | } 32 | 33 | /* 34 | * Remove the provided entry from whichever list it is currently in. This 35 | * assumes that the entry is in a list. You do not need to provide the list 36 | * because the lists are circular, so the list's pointers will automatically 37 | * be updated if the first or last entries are removed. 38 | */ 39 | static void list_remove(list_t *entry) 40 | { 41 | list_t *prev = entry->prev; 42 | list_t *next = entry->next; 43 | prev->next = next; 44 | next->prev = prev; 45 | } 46 | 47 | /* 48 | * Remove and return the first entry in the list or NULL if the list is empty. 49 | */ 50 | static list_t *list_pop(list_t *list) 51 | { 52 | list_t *back = list->prev; 53 | if (back == list) 54 | return NULL; 55 | list_remove(back); 56 | return back; 57 | } 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /alloc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "list.h" 7 | 8 | /* 9 | * Every allocation needs an 8-byte header to store the allocation size while 10 | * staying 8-byte aligned. The address returned by "malloc" is the address 11 | * right after this header (i.e. the size occupies the 8 bytes before the 12 | * returned address). 13 | */ 14 | #define HEADER_SIZE 8 15 | 16 | /* 17 | * The minimum allocation size is 16 bytes because we have an 8-byte header and 18 | * we need to stay 8-byte aligned. 19 | */ 20 | #define MIN_ALLOC_LOG2 4 21 | #define MIN_ALLOC ((size_t) 1 << MIN_ALLOC_LOG2) 22 | 23 | /* 24 | * The maximum allocation size is currently set to 2gb. This is the total size 25 | * of the heap. It's technically also the maximum allocation size because the 26 | * heap could consist of a single allocation of this size. But of course real 27 | * heaps will have multiple allocations, so the real maximum allocation limit 28 | * is at most 1G bytes. 29 | */ 30 | #define MAX_ALLOC_LOG2 31 31 | #define MAX_ALLOC ((size_t) 1 << MAX_ALLOC_LOG2) 32 | 33 | /* 34 | * Allocations are done in powers of two starting from MIN_ALLOC and ending at 35 | * MAX_ALLOC inclusive. Each allocation size has a bucket that stores the free 36 | * list for that allocation size. 37 | * 38 | * Given a bucket index, the size of the allocations in that bucket can be 39 | * found with "(size_t)1 << (MAX_ALLOC_LOG2 - bucket)". 40 | */ 41 | #define BUCKET_COUNT (MAX_ALLOC_LOG2 - MIN_ALLOC_LOG2 + 1) 42 | 43 | /* 44 | * Each bucket corresponds to a certain allocation size and stores a free list 45 | * for that size. The bucket at index 0 corresponds to an allocation size of 46 | * MAX_ALLOC (i.e. the whole address space). 47 | */ 48 | static list_t buckets[BUCKET_COUNT]; 49 | 50 | /* 51 | * Free lists are stored as circular doubly-linked lists. Every possible 52 | * allocation size has an associated free list that is threaded through all 53 | * currently free blocks of that size. 54 | * 55 | * We could initialize the allocator by giving it one free block the size of 56 | * the entire address space. However, this would cause us to instantly reserve 57 | * half of the entire address space on the first allocation, since the first 58 | * split would store a free list entry at the start of the right child of the 59 | * root. Instead, we have the tree start out small and grow the size of the 60 | * tree as we use more memory. The size of the tree is tracked by this value. 61 | */ 62 | static size_t bucket_limit; 63 | 64 | /* 65 | * This array represents a linearized binary tree of bits. Every possible 66 | * allocation larger than MIN_ALLOC has a node in this tree (and therefore a 67 | * bit in this array). 68 | * 69 | * Given the index for a node, lineraized binary trees allow you to traverse to 70 | * the parent node or the child nodes just by doing simple arithmetic on the 71 | * index: 72 | * - Move to parent: index = (index - 1) / 2; 73 | * - Move to left child: index = index * 2 + 1; 74 | * - Move to right child: index = index * 2 + 2; 75 | * - Move to sibling: index = ((index - 1) ^ 1) + 1; 76 | * 77 | * Each node in this tree can be in one of several states: 78 | * 79 | * - UNUSED (both children are UNUSED) 80 | * - SPLIT (one child is UNUSED and the other child isn't) 81 | * - USED (neither children are UNUSED) 82 | * 83 | * These states take two bits to store. However, it turns out we have enough 84 | * information to distinguish between UNUSED and USED from context, so we only 85 | * need to store SPLIT or not, which only takes a single bit. 86 | * 87 | * Note that we don't need to store any nodes for allocations of size MIN_ALLOC 88 | * since we only ever care about parent nodes. 89 | */ 90 | static uint8_t node_is_split[(1 << (BUCKET_COUNT - 1)) / 8]; 91 | 92 | /* 93 | * This is the starting address of the address range for this allocator. Every 94 | * returned allocation will be an offset of this pointer from 0 to MAX_ALLOC. 95 | */ 96 | static uint8_t *base_ptr; 97 | 98 | /* 99 | * This is the maximum address that has ever been used by the allocator. It's 100 | * used to know when to call "brk" to request more memory from the kernel. 101 | */ 102 | static uint8_t *max_ptr; 103 | 104 | /* 105 | * Make sure all addresses before "new_value" are valid and can be used. Memory 106 | * is allocated in a 2gb address range but that memory is not reserved up 107 | * front. It's only reserved when it's needed by calling this function. This 108 | * will return false if the memory could not be reserved. 109 | */ 110 | static bool update_max_ptr(uint8_t *new_value) 111 | { 112 | if (new_value > max_ptr) { 113 | if (brk(new_value)) 114 | return false; 115 | max_ptr = new_value; 116 | } 117 | return true; 118 | } 119 | 120 | /* 121 | * This maps from the index of a node to the address of memory that node 122 | * represents. The bucket can be derived from the index using a loop but is 123 | * required to be provided here since having them means we can avoid the loop 124 | * and have this function return in constant time. 125 | */ 126 | static uint8_t *ptr_for_node(size_t index, size_t bucket) 127 | { 128 | return base_ptr + 129 | ((index - (1 << bucket) + 1) << (MAX_ALLOC_LOG2 - bucket)); 130 | } 131 | 132 | /* 133 | * This maps from an address of memory to the node that represents that 134 | * address. There are often many nodes that all map to the same address, so 135 | * the bucket is needed to uniquely identify a node. 136 | */ 137 | static size_t node_for_ptr(uint8_t *ptr, size_t bucket) 138 | { 139 | return ((ptr - base_ptr) >> (MAX_ALLOC_LOG2 - bucket)) + (1 << bucket) - 1; 140 | } 141 | 142 | // Given the index of a node, this returns the "is split" flag of the parent. 143 | static bool parent_is_split(size_t index) 144 | { 145 | index = (index - 1) / 2; 146 | return (bool) ((node_is_split[index / 8] >> (index % 8)) & 1); 147 | } 148 | 149 | // Given the index of a node, this flips the "is split" flag of the parent. 150 | static void flip_parent_is_split(size_t index) 151 | { 152 | index = (index - 1) / 2; 153 | node_is_split[index / 8] ^= 1 << (index % 8); 154 | } 155 | 156 | /* 157 | * Given the requested size passed to "malloc", this function returns the index 158 | * of the smallest bucket that can fit that size. 159 | */ 160 | static size_t bucket_for_request(size_t request) 161 | { 162 | size_t bucket = BUCKET_COUNT - 1; 163 | size_t size = MIN_ALLOC; 164 | 165 | while (size < request) { 166 | bucket--; 167 | size *= 2; 168 | } 169 | 170 | return bucket; 171 | } 172 | 173 | /* 174 | * The tree is always rooted at the current bucket limit. This call grows the 175 | * tree by repeatedly doubling it in size until the root lies at the provided 176 | * bucket index. Each doubling lowers the bucket limit by 1. 177 | */ 178 | static bool lower_bucket_limit(size_t bucket) 179 | { 180 | while (bucket < bucket_limit) { 181 | size_t root = node_for_ptr(base_ptr, bucket_limit); 182 | 183 | /* 184 | * If the parent isn't SPLIT, that means the node at the current bucket 185 | * limit is UNUSED and our address space is entirely free. In that case, 186 | * clear the root free list, increase the bucket limit, and add a single 187 | * block with the newly-expanded address space to the new root free 188 | * list. 189 | */ 190 | if (!parent_is_split(root)) { 191 | list_remove((list_t *) base_ptr); 192 | list_init(&buckets[--bucket_limit]); 193 | list_push(&buckets[bucket_limit], (list_t *) base_ptr); 194 | continue; 195 | } 196 | 197 | /* 198 | * Otherwise, the tree is currently in use. Create a parent node for the 199 | * current root node in the SPLIT state with a right child on the free 200 | * list. Make sure to reserve the memory for the free list entry before 201 | * writing to it. Note that we do not need to flip the "is split" flag 202 | * for our current parent because it's already on (we know because we 203 | * just checked it above). 204 | */ 205 | uint8_t *right_child = ptr_for_node(root + 1, bucket_limit); 206 | if (!update_max_ptr(right_child + sizeof(list_t))) 207 | return false; 208 | list_push(&buckets[bucket_limit], (list_t *) right_child); 209 | list_init(&buckets[--bucket_limit]); 210 | 211 | /* 212 | * Set the grandparent's SPLIT flag so if we need to lower the bucket 213 | * limit again, we'll know that the new root node we just added is in 214 | * use. 215 | */ 216 | root = (root - 1) / 2; 217 | if (root != 0) 218 | flip_parent_is_split(root); 219 | } 220 | 221 | return true; 222 | } 223 | 224 | void *malloc(size_t request) 225 | { 226 | if (!request || request + HEADER_SIZE > MAX_ALLOC) 227 | return NULL; 228 | 229 | /* 230 | * Initialize our global state if this is the first call to "malloc". At the 231 | * beginning, the tree has a single node that represents the smallest 232 | * possible allocation size. More memory will be reserved later as needed. 233 | */ 234 | if (!base_ptr) { 235 | /* FIXME: sbrk is deprecated on some platforms where mmap is suggested 236 | * as a better replacement (macOS, FreeBSD). sbrk is generally 237 | * onsidered quite archaic. 238 | */ 239 | base_ptr = max_ptr = (uint8_t *) sbrk(0); 240 | bucket_limit = BUCKET_COUNT - 1; 241 | update_max_ptr(base_ptr + sizeof(list_t)); 242 | list_init(&buckets[BUCKET_COUNT - 1]); 243 | list_push(&buckets[BUCKET_COUNT - 1], (list_t *) base_ptr); 244 | } 245 | 246 | /* 247 | * Find the smallest bucket that will fit this request. This doesn't check 248 | * that there's space for the request yet. 249 | */ 250 | size_t bucket = bucket_for_request(request + HEADER_SIZE); 251 | size_t original_bucket = bucket; 252 | 253 | /* 254 | * Search for a bucket with a non-empty free list that's as large or larger 255 | * than what we need. If there isn't an exact match, we'll need to split a 256 | * larger one to get a match. 257 | */ 258 | while (bucket + 1 != 0) { 259 | /* 260 | * We may need to grow the tree to be able to fit an allocation of this 261 | * size. Try to grow the tree and stop here if we can't. 262 | */ 263 | if (!lower_bucket_limit(bucket)) 264 | return NULL; 265 | 266 | /* 267 | * Try to pop a block off the free list for this bucket. If the free 268 | * list is empty, we're going to have to split a larger block instead. 269 | */ 270 | uint8_t *ptr = (uint8_t *) list_pop(&buckets[bucket]); 271 | if (!ptr) { 272 | /* 273 | * If we're not at the root of the tree or it's impossible to grow 274 | * the tree any more, continue on to the next bucket. 275 | */ 276 | if (bucket != bucket_limit || bucket == 0) { 277 | bucket--; 278 | continue; 279 | } 280 | 281 | /* 282 | * Otherwise, grow the tree one more level and then pop a block off 283 | * the free list again. Since we know the root of the tree is used 284 | * (because the free list was empty), this will add a parent above 285 | * this node in the SPLIT state and then add the new right child 286 | * node to the free list for this bucket. Popping the free list will 287 | * give us this right child. 288 | */ 289 | if (!lower_bucket_limit(bucket - 1)) 290 | return NULL; 291 | ptr = (uint8_t *) list_pop(&buckets[bucket]); 292 | } 293 | 294 | /* 295 | * Try to expand the address space first before going any further. If we 296 | * have run out of space, put this block back on the free list and fail. 297 | */ 298 | size_t size = (size_t) 1 << (MAX_ALLOC_LOG2 - bucket); 299 | size_t bytes_needed = 300 | bucket < original_bucket ? size / 2 + sizeof(list_t) : size; 301 | if (!update_max_ptr(ptr + bytes_needed)) { 302 | list_push(&buckets[bucket], (list_t *) ptr); 303 | return NULL; 304 | } 305 | 306 | /* 307 | * If we got a node off the free list, change the node from UNUSED to 308 | * USED. This involves flipping our parent's "is split" bit because that 309 | * bit is the exclusive-or of the UNUSED flags of both children, and our 310 | * UNUSED flag (which isn't ever stored explicitly) has just changed. 311 | * 312 | * Note that we shouldn't ever need to flip the "is split" bit of our 313 | * grandparent because we know our buddy is USED so it's impossible for 314 | * our grandparent to be UNUSED (if our buddy chunk was UNUSED, our 315 | * parent wouldn't ever have been split in the first place). 316 | */ 317 | size_t i = node_for_ptr(ptr, bucket); 318 | if (i != 0) 319 | flip_parent_is_split(i); 320 | 321 | /* 322 | * If the node we got is larger than we need, split it down to the 323 | * correct size and put the new unused child nodes on the free list in 324 | * the corresponding bucket. This is done by repeatedly moving to the 325 | * left child, splitting the parent, and then adding the right child to 326 | * the free list. 327 | */ 328 | while (bucket < original_bucket) { 329 | i = i * 2 + 1; 330 | bucket++; 331 | flip_parent_is_split(i); 332 | list_push(&buckets[bucket], (list_t *) ptr_for_node(i + 1, bucket)); 333 | } 334 | 335 | /* 336 | * Now that we have a memory address, write the block header (just the 337 | * size of the allocation) and return the address immediately after the 338 | * header. 339 | */ 340 | *(size_t *) ptr = request; 341 | return ptr + HEADER_SIZE; 342 | } 343 | 344 | return NULL; 345 | } 346 | 347 | void free(void *ptr) 348 | { 349 | /* Ignore any attempts to free a NULL pointer */ 350 | if (!ptr) 351 | return; 352 | 353 | /* 354 | * We were given the address returned by "malloc" so get back to the actual 355 | * address of the node by subtracting off the size of the block header. Then 356 | * look up the index of the node corresponding to this address. 357 | */ 358 | ptr = (uint8_t *) ptr - HEADER_SIZE; 359 | size_t bucket = bucket_for_request(*(size_t *) ptr + HEADER_SIZE); 360 | size_t i = node_for_ptr((uint8_t *) ptr, bucket); 361 | 362 | /* 363 | * Traverse up to the root node, flipping USED blocks to UNUSED and merging 364 | * UNUSED buddies together into a single UNUSED parent. 365 | */ 366 | while (i != 0) { 367 | /* 368 | * Change this node from UNUSED to USED. This involves flipping our 369 | * parent's "is split" bit because that bit is the exclusive-or of the 370 | * UNUSED flags of both children, and our UNUSED flag (which isn't ever 371 | * stored explicitly) has just changed. 372 | */ 373 | flip_parent_is_split(i); 374 | 375 | /* 376 | * If the parent is now SPLIT, that means our buddy is USED, so don't 377 | * merge with it. Instead, stop the iteration here and add ourselves to 378 | * the free list for our bucket. 379 | * 380 | * Also stop here if we're at the current root node, even if that root 381 | * node is now UNUSED. Root nodes don't have a buddy so we can't merge 382 | * with one. 383 | */ 384 | if (parent_is_split(i) || bucket == bucket_limit) 385 | break; 386 | 387 | /* 388 | * If we get here, we know our buddy is UNUSED. In this case we should 389 | * merge with that buddy and continue traversing up to the root node. We 390 | * need to remove the buddy from its free list here but we don't need to 391 | * add the merged parent to its free list yet. That will be done once 392 | * after this loop is finished. 393 | */ 394 | list_remove((list_t *) ptr_for_node(((i - 1) ^ 1) + 1, bucket)); 395 | i = (i - 1) / 2; 396 | bucket--; 397 | } 398 | 399 | /* 400 | * Add ourselves to the free list for our bucket. We add to the back of the 401 | * list because "malloc" takes from the back of the list and we want a 402 | * "free" followed by a "malloc" of the same size to ideally use the same 403 | * address for better memory locality. 404 | */ 405 | list_push(&buckets[bucket], (list_t *) ptr_for_node(i, bucket)); 406 | } 407 | --------------------------------------------------------------------------------