├── .clang-format ├── .gitignore ├── COPYING ├── README.org ├── meson.build └── src ├── main.c ├── test.c ├── test.h ├── vmem.c └── vmem.h /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | BasedOnStyle: LLVM 4 | UseTab: Never 5 | IndentWidth: 4 6 | TabWidth: 4 7 | BreakBeforeBraces: Allman 8 | AllowShortIfStatementsOnASingleLine: false 9 | IndentCaseLabels: false 10 | ColumnLimit: 0 11 | AccessModifierOffset: -4 12 | FixNamespaceComments: true 13 | SpaceBeforeInheritanceColon: true 14 | BreakInheritanceList: AfterColon 15 | IndentPPDirectives: AfterHash 16 | 17 | ... 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | 54 | build 55 | .cache 56 | 57 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | #+AUTHOR: me 2 | #+TITLE: TinyVMem 3 | 4 | TinyVMem is a public domain implementation of the VMem resource allocator first described in the paper [[https://www.usenix.org/legacy/publications/library/proceedings/usenix01/full_papers/bonwick/bonwick.pdf]["Magazines and Vmem: Extending the Slab Allocator to Many CPUs and Arbitrary Resources"]] 5 | 6 | ** Features 7 | - VMem, despite its name, is not limited to allocation of virtual address space; it can deal with any sort of interval scale (for example, PIDs). 8 | - Support for multiple allocation strategies such as best-fit and instant fit (constant time). Next-fit support is planned. 9 | - Reduced fragmentation. 10 | - Allows importing spans from other arenas. 11 | 12 | ** Porting 13 | TinyVMem is written in portable ANSI C therefore porting to a new platform should be easy enough. 14 | If you're running on a freestanding environment, you need to define the =__KERNEL__= macro and the following functions/macros: 15 | #+BEGIN_SRC c 16 | /* Allocates 'n' pages */ 17 | void *vmem_alloc_pages(size_t n); 18 | 19 | /* Locks a global lock (defined by the user) */ 20 | void vmem_lock(void); 21 | 22 | /* Locks a global lock (defined by the user) */ 23 | void vmem_unlock(void); 24 | 25 | /* From libc's string.h */ 26 | char *strcpy(char *restrict dst, const char *restrict src); 27 | 28 | /* Assertion */ 29 | #define ASSERT(...) assert 30 | 31 | /* Printf-like function, can be ignored if you're not going to call vmem_dump() */ 32 | #define vmem_printf(...) printf 33 | 34 | #+END_SRC 35 | 36 | You also need to have a complete implementation of =sys/queue.h= available. If not, I suggest you use [[https://github.com/IIJ-NetBSD/netbsd-src/blob/master/sys/sys/queue.h][netbsd's]]. 37 | 38 | ** todo 39 | - Implement support for next-fit allocation 40 | - Implement support for VM_NOSLEEP and VM_SLEEP 41 | -------------------------------------------------------------------------------- /meson.build: -------------------------------------------------------------------------------- 1 | project('vmem', 'c', default_options: ['c_std=c89', 'warning_level=3', 'werror=true']) 2 | 3 | cmocka = dependency('cmocka') 4 | 5 | srcs = files('src/vmem.c', 'src/main.c', 'src/test.c') 6 | inc = include_directories('src') 7 | 8 | executable('vmem', srcs, include_directories: inc, dependencies: cmocka) 9 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(void) 5 | { 6 | vmem_bootstrap(); 7 | return vmem_run_tests(); 8 | } 9 | -------------------------------------------------------------------------------- /src/test.c: -------------------------------------------------------------------------------- 1 | /* clang-format off */ 2 | #define inline 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | /* clang-format on */ 11 | 12 | #define VMEM_ADDR_MIN (void *)0 13 | #define VMEM_ADDR_MAX (void *)(~(uintptr_t)0) 14 | 15 | /* We cannot use cmocka's state since it requires C99 */ 16 | static Vmem vmem_va; 17 | static Vmem vmem_wired; 18 | 19 | static void *internal_allocwired(Vmem *vmem, size_t size, int vmflag) 20 | { 21 | return vmem_alloc(vmem, size, vmflag); 22 | } 23 | 24 | static void internal_freewired(Vmem *vmem, void *ptr, size_t size) 25 | { 26 | vmem_free(vmem, ptr, size); 27 | } 28 | 29 | static void test_vmem_alloc(void **state) 30 | { 31 | int prev_in_use = vmem_va.stat.in_use; 32 | void *ret = vmem_alloc(&vmem_va, 0x1000, VM_INSTANTFIT); 33 | void *ret2 = vmem_alloc(&vmem_va, 0x1000, VM_INSTANTFIT); 34 | 35 | (void)state; 36 | 37 | assert_ptr_equal(ret, (void *)0x1000); 38 | assert_ptr_equal(ret2, (void *)0x2000); 39 | assert_int_equal(vmem_va.stat.in_use, prev_in_use + 0x2000); 40 | 41 | vmem_free(&vmem_va, ret, 0x1000); 42 | vmem_free(&vmem_va, ret2, 0x1000); 43 | } 44 | 45 | static void test_vmem_free(void **state) 46 | { 47 | void *ret = vmem_alloc(&vmem_va, 0x1000, VM_INSTANTFIT); 48 | int prev_free = vmem_va.stat.free; 49 | 50 | (void)state; 51 | 52 | assert_ptr_not_equal(ret, NULL); 53 | 54 | vmem_free(&vmem_va, ret, 0x1000); 55 | 56 | assert_int_equal(vmem_va.stat.free, prev_free + 0x1000); 57 | } 58 | 59 | static void test_vmem_free_coalesce(void **state) 60 | { 61 | void *ptr1, *ptr2, *ptr3, *ptr4; 62 | int prev_free; 63 | 64 | (void)state; 65 | 66 | ptr1 = vmem_alloc(&vmem_va, 0x1000, VM_INSTANTFIT); 67 | ptr2 = vmem_alloc(&vmem_va, 0x1000, VM_INSTANTFIT); 68 | ptr3 = vmem_alloc(&vmem_va, 0x1000, VM_INSTANTFIT); 69 | ptr4 = vmem_alloc(&vmem_va, 0x1000, VM_INSTANTFIT); 70 | 71 | prev_free = vmem_va.stat.free; 72 | 73 | vmem_xfree(&vmem_va, ptr2, 0x1000); 74 | vmem_xfree(&vmem_va, ptr1, 0x1000); 75 | vmem_xfree(&vmem_va, ptr4, 0x1000); 76 | vmem_xfree(&vmem_va, ptr3, 0x1000); 77 | 78 | assert_int_equal(vmem_va.stat.free, prev_free + 0x4000); 79 | } 80 | 81 | static void test_vmem_imported(void **state) 82 | { 83 | void *ret = vmem_alloc(&vmem_wired, 0x1000, VM_INSTANTFIT); 84 | void *ret2 = vmem_alloc(&vmem_wired, 0x1000, VM_INSTANTFIT); 85 | 86 | (void)state; 87 | 88 | assert_ptr_equal(ret, (void *)0x1000); 89 | assert_ptr_equal(ret2, (void *)0x2000); 90 | 91 | vmem_free(&vmem_wired, ret, 0x1000); 92 | vmem_free(&vmem_wired, ret2, 0x1000); 93 | } 94 | 95 | int vmem_run_tests(void) 96 | { 97 | int r; 98 | const struct CMUnitTest tests[] = { 99 | cmocka_unit_test(test_vmem_alloc), 100 | cmocka_unit_test(test_vmem_free), 101 | cmocka_unit_test(test_vmem_free_coalesce), 102 | cmocka_unit_test(test_vmem_imported), 103 | }; 104 | 105 | vmem_init(&vmem_va, "tests-va", (void *)0x1000, 0x100000, 0x1000, NULL, NULL, NULL, 0, 0); 106 | vmem_init(&vmem_wired, "tests-wired", 0, 0, 0x1000, internal_allocwired, internal_freewired, &vmem_va, 0, 0); 107 | 108 | r = cmocka_run_group_tests(tests, NULL, NULL); 109 | 110 | vmem_destroy(&vmem_va); 111 | vmem_destroy(&vmem_wired); 112 | 113 | return r; 114 | } 115 | -------------------------------------------------------------------------------- /src/test.h: -------------------------------------------------------------------------------- 1 | #ifndef _VMEM_TEST_H 2 | #define _VMEM_TEST_H 3 | 4 | int vmem_run_tests(void); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /src/vmem.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Public Domain implementation of the VMem Resource Allocator 3 | * 4 | * See: Adams, A. and Bonwick, J. (2001). Magazines and Vmem: Extending the Slab 5 | * Allocator to Many CPUs and Arbitrary Resources. 6 | * More implementation details are available in "vmem.h" 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #ifndef __KERNEL__ 14 | # include 15 | # include 16 | # include 17 | # define vmem_printf printf 18 | # define ASSERT assert 19 | # define vmem_alloc_pages(x) malloc(x * 4096) 20 | #endif 21 | 22 | #define ARR_SIZE(x) (sizeof(x) / sizeof(*x)) 23 | #define VMEM_ADDR_MIN 0 24 | #define VMEM_ADDR_MAX (~(uintptr_t)0) 25 | 26 | /* Assuming FREELISTS_N is 64, 27 | * we can calculate the freelist index by substracting the leading zero count from 64 28 | * For example, the size 4096. clzl(4096) is 51, 64 - 51 is 13. 29 | * We then need to substract 1 from 13 because 2^13 equals 8192. 30 | */ 31 | #define GET_LIST(size) (FREELISTS_N - __builtin_clzl(size) - 1) 32 | 33 | #define VMEM_ALIGNUP(addr, align) \ 34 | (((addr) + (align)-1) & ~((align)-1)) 35 | 36 | #define MIN(a, b) (((a) < (b)) ? (a) : (b)) 37 | #define MAX(a, b) (((a) > (b)) ? (a) : (b)) 38 | 39 | /* We need to keep a global freelist of segments because allocating virtual memory (e.g allocating a segment) requires segments to describe it. (kernel only) 40 | In non-kernel code, this is handled by the host `malloc` and `free` standard library functions */ 41 | static VmemSegment static_segs[128]; 42 | static VmemSegList free_segs = LIST_HEAD_INITIALIZER(free_segs); 43 | static int nfreesegs = 0; 44 | 45 | static const char *seg_type_str[] = { 46 | "allocated", 47 | "free", 48 | "span"}; 49 | 50 | #ifdef __KERNEL__ 51 | 52 | void vmem_lock(void); 53 | void vmem_unlock(void); 54 | 55 | #else 56 | # define vmem_lock() 57 | # define vmem_unlock() 58 | #endif 59 | 60 | static VmemSegment *seg_alloc(void) 61 | { 62 | /* TODO: when bootstrapped, allocate boundary tags dynamically as described in the paper */ 63 | VmemSegment *vsp; 64 | 65 | vmem_lock(); 66 | ASSERT(!LIST_EMPTY(&free_segs)); 67 | vsp = LIST_FIRST(&free_segs); 68 | LIST_REMOVE(vsp, seglist); 69 | nfreesegs--; 70 | vmem_unlock(); 71 | 72 | return vsp; 73 | } 74 | 75 | static void seg_free(VmemSegment *seg) 76 | { 77 | vmem_lock(); 78 | LIST_INSERT_HEAD(&free_segs, seg, seglist); 79 | nfreesegs++; 80 | vmem_unlock(); 81 | } 82 | 83 | static int repopulate_segments(void) 84 | { 85 | struct 86 | { 87 | VmemSegment segs[64]; 88 | } *segblock; 89 | size_t i; 90 | 91 | if (nfreesegs >= 128) 92 | return 0; 93 | 94 | /* Add 64 new segments */ 95 | segblock = vmem_alloc_pages(1); 96 | 97 | for (i = 0; i < ARR_SIZE(segblock->segs); i++) 98 | { 99 | seg_free(&segblock->segs[i]); 100 | } 101 | 102 | return 0; 103 | } 104 | 105 | static int seg_fit(VmemSegment *segment, size_t size, size_t align, size_t phase, size_t nocross, uintptr_t minaddr, uintptr_t maxaddr, uintptr_t *addrp) 106 | { 107 | uintptr_t start, end; 108 | ASSERT(size > 0); 109 | ASSERT(segment->size >= size); 110 | 111 | start = MAX(segment->base, minaddr); 112 | end = MIN(segment->base + segment->size, maxaddr); 113 | 114 | if (start > end) 115 | return -VMEM_ERR_NO_MEM; 116 | 117 | /* Phase is the offset from the alignment boundary. 118 | * For example, if `start` is 260, `phase` is 8 and align is `64`, we need to do the following calculation: 119 | * ALIGN_UP(260 - 8, 64) = 256. 256 + 8 = 264. (264 % 64) is 8 as requested. 120 | */ 121 | start = VMEM_ALIGNUP(start - phase, align) + phase; 122 | 123 | /* If for some reason, start is smaller than the segment base, we need to ensure start is atleast as big as `align` 124 | * This can happen if, for example, we find that `start` is 0 and segment->base is 0x1000. 125 | * In this case, align is 0x1000. */ 126 | if (start < segment->base) 127 | { 128 | start += align; 129 | } 130 | 131 | ASSERT(nocross == 0 && "Not implemented (yet)"); 132 | 133 | /* Ensure that `end` is bigger than `start` and we found a segment of the proper size */ 134 | if (start <= end && (end - start) >= size) 135 | { 136 | *addrp = start; 137 | return 0; 138 | } 139 | 140 | return -VMEM_ERR_NO_MEM; 141 | } 142 | 143 | static uint64_t murmur64(uint64_t h) 144 | { 145 | h ^= h >> 33; 146 | h *= 0xff51afd7ed558ccdL; 147 | h ^= h >> 33; 148 | h *= 0xc4ceb9fe1a85ec53L; 149 | h ^= h >> 33; 150 | return h; 151 | } 152 | 153 | static VmemSegList *hashtable_for_addr(Vmem *vmem, uintptr_t addr) 154 | { 155 | /* Hash the address and get the remainder */ 156 | uintptr_t idx = murmur64(addr) % ARR_SIZE(vmem->hashtable); 157 | return &vmem->hashtable[idx]; 158 | } 159 | 160 | static void hashtab_insert(Vmem *vmem, VmemSegment *seg) 161 | { 162 | LIST_INSERT_HEAD(hashtable_for_addr(vmem, seg->base), seg, seglist); 163 | } 164 | 165 | static VmemSegList *freelist_for_size(Vmem *vmem, size_t size) 166 | { 167 | return &vmem->freelist[GET_LIST(size) - 1]; 168 | } 169 | 170 | static int vmem_contains(Vmem *vmp, void *address, size_t size) 171 | { 172 | VmemSegment *seg; 173 | uintptr_t start = (uintptr_t)address; 174 | uintptr_t end = start + size; 175 | 176 | TAILQ_FOREACH(seg, &vmp->segqueue, segqueue) 177 | { 178 | if (start >= seg->base && end <= seg->base + seg->size) 179 | { 180 | return true; 181 | } 182 | } 183 | return false; 184 | } 185 | 186 | static void vmem_add_to_freelist(Vmem *vm, VmemSegment *seg) 187 | { 188 | LIST_INSERT_HEAD(freelist_for_size(vm, seg->size), seg, seglist); 189 | } 190 | 191 | static void vmem_insert_segment(Vmem *vm, VmemSegment *seg, VmemSegment *prev) 192 | { 193 | 194 | TAILQ_INSERT_AFTER(&vm->segqueue, prev, seg, segqueue); 195 | } 196 | 197 | static VmemSegment *vmem_add_internal(Vmem *vmem, void *base, size_t size, bool import) 198 | { 199 | VmemSegment *newspan, *newfree; 200 | 201 | newspan = seg_alloc(); 202 | 203 | ASSERT(newspan); 204 | 205 | newspan->base = (uintptr_t)base; 206 | newspan->size = size; 207 | newspan->type = SEGMENT_SPAN; 208 | newspan->imported = import; 209 | 210 | newfree = seg_alloc(); 211 | 212 | ASSERT(newfree); 213 | 214 | newfree->base = (uintptr_t)base; 215 | newfree->size = size; 216 | newfree->type = SEGMENT_FREE; 217 | 218 | TAILQ_INSERT_TAIL(&vmem->segqueue, newspan, segqueue); 219 | vmem_insert_segment(vmem, newfree, newspan); 220 | vmem_add_to_freelist(vmem, newfree); 221 | 222 | return newfree; 223 | } 224 | 225 | static int vmem_import(Vmem *vmp, size_t size, int vmflag) 226 | { 227 | void *addr; 228 | VmemSegment *new_seg; 229 | if (!vmp->alloc) 230 | return -VMEM_ERR_NO_MEM; 231 | 232 | addr = vmp->alloc(vmp->source, size, vmflag); 233 | 234 | if (!addr) 235 | return -VMEM_ERR_NO_MEM; 236 | 237 | new_seg = vmem_add_internal(vmp, addr, size, true); 238 | 239 | if (!new_seg) 240 | { 241 | vmp->free(vmp->source, addr, size); 242 | } 243 | 244 | return 0; 245 | } 246 | 247 | int vmem_init(Vmem *ret, char *name, void *base, size_t size, size_t quantum, VmemAlloc *afunc, VmemFree *ffunc, Vmem *source, size_t qcache_max, int vmflag) 248 | { 249 | size_t i; 250 | 251 | strcpy(ret->name, name); 252 | 253 | ret->base = base; 254 | ret->size = size; 255 | ret->quantum = quantum; 256 | ret->alloc = afunc; 257 | ret->free = ffunc; 258 | ret->source = source; 259 | ret->qcache_max = qcache_max; 260 | ret->vmflag = vmflag; 261 | ret->stat.free = size; 262 | ret->stat.total += size; 263 | ret->stat.in_use = 0; 264 | ret->stat.import = 0; 265 | 266 | LIST_INIT(&ret->spanlist); 267 | TAILQ_INIT(&ret->segqueue); 268 | 269 | for (i = 0; i < ARR_SIZE(ret->freelist); i++) 270 | { 271 | LIST_INIT(&ret->freelist[i]); 272 | } 273 | 274 | for (i = 0; i < ARR_SIZE(ret->hashtable); i++) 275 | { 276 | LIST_INIT(&ret->hashtable[i]); 277 | } 278 | 279 | /* Add initial span */ 280 | if (!source && size) 281 | vmem_add(ret, base, size, vmflag); 282 | 283 | return 0; 284 | } 285 | 286 | void vmem_destroy(Vmem *vmp) 287 | { 288 | VmemSegment *seg; 289 | size_t i; 290 | 291 | for (i = 0; i < sizeof(vmp->hashtable) / sizeof(*vmp->hashtable); i++) 292 | ASSERT(LIST_EMPTY(&vmp->hashtable[i])); 293 | 294 | TAILQ_FOREACH(seg, &vmp->segqueue, segqueue) 295 | { 296 | seg_free(seg); 297 | } 298 | } 299 | 300 | void *vmem_add(Vmem *vmp, void *addr, size_t size, int vmflag) 301 | { 302 | ASSERT(!vmem_contains(vmp, addr, size)); 303 | 304 | vmp->stat.free += size; 305 | vmp->stat.total += size; 306 | (void)vmflag; 307 | return vmem_add_internal(vmp, addr, size, false); 308 | } 309 | 310 | void *vmem_xalloc(Vmem *vmp, size_t size, size_t align, size_t phase, 311 | size_t nocross, void *minaddr, void *maxaddr, int vmflag) 312 | { 313 | VmemSegList *first_list = freelist_for_size(vmp, size), *end = &vmp->freelist[FREELISTS_N], *list = NULL; 314 | VmemSegment *new_seg = NULL, *new_seg2 = NULL, *seg = NULL; 315 | uintptr_t start = 0; 316 | void *ret = NULL; 317 | 318 | ASSERT(nocross == 0 && "Not implemented yet"); 319 | 320 | /* If we don't want a specific alignment, we can just use the quantum */ 321 | /* FIXME: What if `align` is not quantum aligned? Maybe add an ASSERT() ? */ 322 | 323 | if (align == 0) 324 | { 325 | align = vmp->quantum; 326 | } 327 | 328 | if (!(vmflag & VM_BOOTSTRAP)) 329 | ASSERT(repopulate_segments() == 0); 330 | 331 | /* Allocate the new segments */ 332 | /* NOTE: new_seg2 might be unused, in that case, it is freed */ 333 | new_seg = seg_alloc(); 334 | new_seg2 = seg_alloc(); 335 | 336 | ASSERT(new_seg && new_seg2); 337 | 338 | while (true) 339 | { 340 | if (vmflag & VM_INSTANTFIT) /* VM_INSTANTFIT */ 341 | { 342 | /* If the size is not a power of two, use freelist[n+1] instead of freelist[n] */ 343 | if ((size & (size - 1)) != 0) 344 | { 345 | first_list++; 346 | } 347 | 348 | /* We just get the first segment from the list. This ensures constant-time allocation. 349 | * Note that we do not need to check the size of the segments because they are guaranteed to be big enough (see freelist_for_size) 350 | */ 351 | for (list = first_list; list < end; list++) 352 | { 353 | seg = LIST_FIRST(list); 354 | if (seg != NULL) 355 | { 356 | if (seg_fit(seg, size, align, phase, nocross, (uintptr_t)minaddr, (uintptr_t)maxaddr, &start) == 0) 357 | goto found; 358 | } 359 | } 360 | } 361 | 362 | else if (vmflag & VM_BESTFIT) /* VM_BESTFIT */ 363 | { 364 | /* TODO: Should we bother going through the entire list to find the absolute best fit? */ 365 | 366 | /* We go through every segment in every list until we find the smallest free segment that can satisfy the allocation */ 367 | for (list = first_list; list < end; list++) 368 | LIST_FOREACH(seg, list, seglist) 369 | { 370 | if (seg->size >= size) 371 | { 372 | 373 | /* Try to make the segment fit */ 374 | if (seg_fit(seg, size, align, phase, nocross, (uintptr_t)minaddr, (uintptr_t)maxaddr, &start) == 0) 375 | goto found; 376 | } 377 | } 378 | } 379 | else if (vmflag & VM_NEXTFIT) 380 | { 381 | ASSERT(!"TODO: implement nextfit"); 382 | } 383 | 384 | if (vmem_import(vmp, size, vmflag) == 0) 385 | { 386 | continue; 387 | } 388 | 389 | ASSERT(!"Allocation failed"); 390 | return NULL; 391 | } 392 | 393 | found: 394 | ASSERT(seg != NULL); 395 | ASSERT(seg->type == SEGMENT_FREE); 396 | ASSERT(seg->size >= size); 397 | 398 | /* Remove the segment from the freelist, it may be added back when modified */ 399 | LIST_REMOVE(seg, seglist); 400 | 401 | if (seg->base != start) 402 | { 403 | /* If the start is not the base of the segment, we need to create another segment; 404 | * new_seg2 is a free segment that starts at `base` and ends at `start-base`. 405 | * We also need to make make `seg` start at `start` and reduce its size. 406 | * For example, if we allocate a segment [0x100, 0x1000] in a [0, 0x10000] span, we need to split [0, 0x10000] into 407 | * [0x0, 0x100] (free), [0x100, 0x1000] (allocated), [0x1000, 0x10000] (free). In this case, `base` is 0 and `start` is 0x100. 408 | * This would create a segment with size 0x100-0 that starts at 0. 409 | */ 410 | new_seg2->type = SEGMENT_FREE; 411 | new_seg2->base = seg->base; 412 | new_seg2->size = start - seg->base; 413 | 414 | /* Make `seg` start at `start`, following the example, this would make `(seg->base)` 0x100 */ 415 | seg->base = start; 416 | 417 | /* Since we offset the segment by `start-(seg->base)`, we need to reduce `seg`'s size */ 418 | seg->size -= new_seg2->size; 419 | 420 | vmem_add_to_freelist(vmp, new_seg2); 421 | 422 | /* Put this new segment before the allocated segment */ 423 | vmem_insert_segment(vmp, new_seg2, TAILQ_PREV(seg, VmemSegQueue, segqueue)); 424 | 425 | /* Ensure it doesn't get freed */ 426 | new_seg2 = NULL; 427 | } 428 | 429 | ASSERT(seg->base == start); 430 | 431 | if (seg->size != size && (seg->size - size) > vmp->quantum - 1) 432 | { 433 | 434 | /* In the case where the segment's size is bigger than the requested size, we need to split the segment into two: 435 | * one free part of size `seg->size - size` and another allocated one of size `size`. For example, if we want to allocate [0, 0x1000] 436 | * and the segment is [0, 0x10000], we have to create a new segment, [0, 0x1000] and offset the current segment by `size`. Therefore ending up with: 437 | * [0, 0x1000] (allocated) [0x1000, 0x10000] */ 438 | new_seg->type = SEGMENT_ALLOCATED; 439 | new_seg->base = seg->base; 440 | new_seg->size = size; 441 | 442 | /* Offset the segment */ 443 | seg->base += size; 444 | seg->size -= size; 445 | 446 | /* Add it back to the freelist */ 447 | vmem_add_to_freelist(vmp, seg); 448 | 449 | /* Put this new allocated segment before the segment */ 450 | vmem_insert_segment(vmp, new_seg, TAILQ_PREV(seg, VmemSegQueue, segqueue)); 451 | 452 | hashtab_insert(vmp, new_seg); 453 | } 454 | else 455 | { 456 | seg->type = SEGMENT_ALLOCATED; 457 | hashtab_insert(vmp, seg); 458 | seg_free(new_seg); 459 | new_seg = seg; 460 | } 461 | 462 | if (new_seg2 != NULL) 463 | seg_free(new_seg2); 464 | 465 | ASSERT(new_seg->size >= size); 466 | 467 | vmp->stat.free -= new_seg->size; 468 | vmp->stat.in_use += new_seg->size; 469 | 470 | new_seg->type = SEGMENT_ALLOCATED; 471 | 472 | ret = (void *)new_seg->base; 473 | 474 | return ret; 475 | } 476 | 477 | void *vmem_alloc(Vmem *vmp, size_t size, int vmflag) 478 | { 479 | return vmem_xalloc(vmp, size, 0, 0, 0, (void *)VMEM_ADDR_MIN, (void *)VMEM_ADDR_MAX, vmflag); 480 | } 481 | 482 | void vmem_xfree(Vmem *vmp, void *addr, size_t size) 483 | { 484 | VmemSegment *seg, *neighbor; 485 | VmemSegList *list; 486 | 487 | list = hashtable_for_addr(vmp, (uintptr_t)addr); 488 | 489 | LIST_FOREACH(seg, list, seglist) 490 | { 491 | if (seg->base == (uintptr_t)addr) 492 | { 493 | break; 494 | } 495 | } 496 | 497 | ASSERT(seg->size == size); 498 | 499 | /* Remove the segment from the hashtable */ 500 | LIST_REMOVE(seg, seglist); 501 | 502 | /* Coalesce to the right */ 503 | neighbor = TAILQ_NEXT(seg, segqueue); 504 | 505 | if (neighbor && neighbor->type == SEGMENT_FREE) 506 | { 507 | /* Remove our neighbor since we're merging with it */ 508 | LIST_REMOVE(neighbor, seglist); 509 | 510 | TAILQ_REMOVE(&vmp->segqueue, neighbor, segqueue); 511 | 512 | seg->size += neighbor->size; 513 | 514 | seg_free(neighbor); 515 | } 516 | 517 | /* Coalesce to the left */ 518 | neighbor = TAILQ_PREV(seg, VmemSegQueue, segqueue); 519 | 520 | if (neighbor->type == SEGMENT_FREE) 521 | { 522 | LIST_REMOVE(neighbor, seglist); 523 | TAILQ_REMOVE(&vmp->segqueue, neighbor, segqueue); 524 | 525 | seg->size += neighbor->size; 526 | seg->base = neighbor->base; 527 | 528 | seg_free(neighbor); 529 | } 530 | 531 | neighbor = TAILQ_PREV(seg, VmemSegQueue, segqueue); 532 | 533 | ASSERT(neighbor->type == SEGMENT_SPAN || neighbor->type == SEGMENT_ALLOCATED); 534 | 535 | seg->type = SEGMENT_FREE; 536 | 537 | if (vmp->free != NULL && neighbor->type == SEGMENT_SPAN && neighbor->imported == true && neighbor->size == seg->size) 538 | { 539 | uintptr_t span_addr = seg->base; 540 | size_t span_size = seg->size; 541 | 542 | TAILQ_REMOVE(&vmp->segqueue, seg, segqueue); 543 | seg_free(seg); 544 | TAILQ_REMOVE(&vmp->segqueue, neighbor, segqueue); 545 | seg_free(neighbor); 546 | 547 | vmp->free(vmp->source, (void *)span_addr, span_size); 548 | } 549 | else 550 | { 551 | vmem_add_to_freelist(vmp, seg); 552 | } 553 | 554 | vmp->stat.in_use -= size; 555 | vmp->stat.free += size; 556 | } 557 | 558 | void vmem_free(Vmem *vmp, void *addr, size_t size) 559 | { 560 | vmem_xfree(vmp, addr, size); 561 | } 562 | 563 | void vmem_dump(Vmem *vmp) 564 | { 565 | VmemSegment *span; 566 | size_t i; 567 | 568 | vmem_printf("-- VMem arena \"%s\" segments -- \n", vmp->name); 569 | 570 | TAILQ_FOREACH(span, &vmp->segqueue, segqueue) 571 | { 572 | vmem_printf("[0x%lx, 0x%lx] (%s)", 573 | span->base, span->base + span->size, seg_type_str[span->type]); 574 | if (span->imported) 575 | vmem_printf("(imported)"); 576 | vmem_printf("\n"); 577 | } 578 | 579 | vmem_printf("Hashtable:\n "); 580 | 581 | for (i = 0; i < ARR_SIZE(vmp->hashtable); i++) 582 | LIST_FOREACH(span, &vmp->hashtable[i], seglist) 583 | { 584 | vmem_printf("%lx: [address: %p, size %p]\n", murmur64(span->base), (void *)span->base, (void *)span->size); 585 | } 586 | vmem_printf("Stat:\n"); 587 | vmem_printf("- in_use: %ld\n", vmp->stat.in_use); 588 | vmem_printf("- free: %ld\n", vmp->stat.free); 589 | vmem_printf("- total: %ld\n", vmp->stat.total); 590 | } 591 | 592 | void vmem_bootstrap(void) 593 | { 594 | size_t i; 595 | for (i = 0; i < ARR_SIZE(static_segs); i++) 596 | { 597 | seg_free(&static_segs[i]); 598 | } 599 | } 600 | -------------------------------------------------------------------------------- /src/vmem.h: -------------------------------------------------------------------------------- 1 | /* Implementation of the VMem resource allocator 2 | as described in https://www.usenix.org/legacy/event/usenix01/full_papers/bonwick/bonwick.pdf 3 | */ 4 | 5 | #ifndef _VMEM_H 6 | #define _VMEM_H 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | /* Directs vmem to use the smallest 14 | free segment that can satisfy the allocation. This 15 | policy tends to minimize fragmentation of very 16 | small, precious resources (cited from paper) */ 17 | #define VM_BESTFIT (1 << 0) 18 | 19 | /* Directs vmem to provide a 20 | good approximation to best−fit in guaranteed 21 | constant time. This is the default allocation policy. (cited from paper) */ 22 | #define VM_INSTANTFIT (1 << 1) 23 | 24 | /* Directs vmem to use the next free 25 | segment after the one previously allocated. This is 26 | useful for things like process IDs, where we want 27 | to cycle through all the IDs before reusing them. (cited from paper) */ 28 | #define VM_NEXTFIT (1 << 2) 29 | 30 | #define VM_SLEEP (1 << 3) 31 | #define VM_NOSLEEP (1 << 4) 32 | 33 | /* Used to eliminate cyclic dependencies when refilling the segment freelist: 34 | We need to allocate new segments but to allocate new segments, we need to refill the list, this flag ensures that no refilling occurs. */ 35 | #define VM_BOOTSTRAP (1 << 5) 36 | 37 | #define VMEM_ERR_NO_MEM 1 38 | 39 | struct vmem; 40 | 41 | /* Vmem allows one arena to import its resources from 42 | another. vmem_create() specifies the source arena, 43 | and the functions to allocate and free from that source. The arena imports new spans as needed, and gives 44 | them back when all their segments have been freed. (cited from paper) These types describe those functions. 45 | */ 46 | typedef void *VmemAlloc(struct vmem *vmem, size_t size, int flags); 47 | typedef void VmemFree(struct vmem *vmem, void *addr, size_t size); 48 | 49 | /* We can't use boundary tags because the resource we're managing is not necessarily memory. 50 | To counter this, we can use *external boundary tags*. For each segment in the arena 51 | we allocate a boundary tag to manage it. */ 52 | 53 | /* sizeof(void *) * CHAR_BIT (8) freelists provides us with a freelist for every power-of-2 length that can fit within the host's virtual address space (64 bit) */ 54 | #define FREELISTS_N sizeof(void *) * CHAR_BIT 55 | #define HASHTABLES_N 16 56 | 57 | typedef struct vmem_segment 58 | { 59 | enum 60 | { 61 | SEGMENT_ALLOCATED, 62 | SEGMENT_FREE, 63 | SEGMENT_SPAN 64 | } type; 65 | 66 | bool imported; /* Non-zero if imported */ 67 | 68 | uintptr_t base; /* base address of the segment */ 69 | uintptr_t size; /* size of the segment */ 70 | 71 | /* clang-format off */ 72 | TAILQ_ENTRY(vmem_segment) segqueue; /* Points to Vmem::segqueue */ 73 | LIST_ENTRY(vmem_segment) seglist; /* If free, points to Vmem::freelist, if allocated, points to Vmem::hashtable, else Vmem::spanlist */ 74 | /* clang-format on */ 75 | 76 | } VmemSegment; 77 | 78 | typedef LIST_HEAD(VmemSegList, vmem_segment) VmemSegList; 79 | typedef TAILQ_HEAD(VmemSegQueue, vmem_segment) VmemSegQueue; 80 | 81 | /* Statistics about a Vmem arena, NOTE: this isn't described in the original paper and was added by me. Inspired by Illumos and Solaris'vmem_kstat_t */ 82 | typedef struct 83 | { 84 | size_t in_use; /* Memory in use */ 85 | size_t import; /* Imported memory */ 86 | size_t total; /* Total memory in the area */ 87 | size_t alloc; /* Number of allocations */ 88 | size_t free; /* Number of frees */ 89 | } VmemStat; 90 | 91 | /* Description of an arena, a collection of resources. An arena is simply a set of integers. */ 92 | typedef struct vmem 93 | { 94 | char name[64]; /* Descriptive name for debugging purposes */ 95 | void *base; /* Start of initial span */ 96 | size_t size; /* Size of initial span */ 97 | size_t quantum; /* Unit of currency */ 98 | VmemAlloc *alloc; /* Import alloc function */ 99 | VmemFree *free; /* Import free function */ 100 | struct vmem *source; /* Import arena */ 101 | size_t qcache_max; /* Maximum size to cache */ 102 | int vmflag; /* VM_SLEEP or VM_NOSLEEP */ 103 | 104 | VmemSegQueue segqueue; 105 | VmemSegList freelist[FREELISTS_N]; /* Power of two freelists. Freelists[n] contains all free segments whose sizes are in the range [2^n, 2^n+1] */ 106 | VmemSegList hashtable[HASHTABLES_N]; /* Allocated segments */ 107 | VmemSegList spanlist; /* Span marker segments */ 108 | 109 | VmemStat stat; 110 | } Vmem; 111 | 112 | /* Initializes a vmem arena (no malloc) */ 113 | int vmem_init(Vmem *vmem, char *name, void *base, size_t size, size_t quantum, VmemAlloc *afunc, VmemFree *ffunc, Vmem *source, size_t qcache_max, int vmflag); 114 | 115 | /* Destroys arena `vmp` */ 116 | void vmem_destroy(Vmem *vmp); 117 | 118 | /* Allocates size bytes from vmp. Returns the allocated address on success, NULL on failure. 119 | vmem_alloc() fails only if vmflag specifies VM_NOSLEEP and no resources are currently available. 120 | vmflag may also specify an allocation policy (VM_BESTFIT, VM_INSTANTFIT, or VM_NEXTFIT). 121 | If no policy is specified the default is VM_INSTANTFIT, which provides a good 122 | approximation to best−fit in guaranteed constant time. (cited from paper) */ 123 | void *vmem_alloc(Vmem *vmp, size_t size, int vmflag); 124 | 125 | /* Frees `size` bytes at address `addr` in arena `vmp` */ 126 | void vmem_free(Vmem *vmp, void *addr, size_t size); 127 | 128 | /* 129 | Allocates size bytes at offset phase from an align boundary such that the resulting segment 130 | [addr, addr + size) is a subset of [minaddr, maxaddr) that does not straddle a nocross− 131 | aligned boundary. vmflag is as above. One performance caveat: if either minaddr or maxaddr is 132 | non−NULL, vmem may not be able to satisfy the allocation in constant time. If allocations within a 133 | given [minaddr, maxaddr) range are common it is more efficient to declare that range to be its own 134 | arena and use unconstrained allocations on the new arena (cited from paper). 135 | */ 136 | void *vmem_xalloc(Vmem *vmp, size_t size, size_t align, size_t phase, 137 | size_t nocross, void *minaddr, void *maxaddr, int vmflag); 138 | 139 | /* 140 | Frees size bytes at addr, where addr was a constrained allocation. vmem_xfree() must be used if 141 | the original allocation was a vmem_xalloc() because both routines bypass the quantum caches. (Cited from paper) 142 | */ 143 | void vmem_xfree(Vmem *vmp, void *addr, size_t size); 144 | 145 | /* Adds the span [addr, addr + size) to arena vmp. Returns addr on success, NULL on failure. 146 | vmem_add() will fail only if vmflag is VM_NOSLEEP and no resources are currently available. (cited from paper) */ 147 | void *vmem_add(Vmem *vmp, void *addr, size_t size, int vmflag); 148 | 149 | /* Dumps the arena `vmp` using the `kprintf` function */ 150 | void vmem_dump(Vmem *vmp); 151 | 152 | /* Initializes Vmem */ 153 | void vmem_bootstrap(void); 154 | 155 | #endif 156 | --------------------------------------------------------------------------------