├── .clang-format ├── .gitignore ├── COPYING ├── Makefile ├── README.md ├── log.h ├── lrmalloc.cpp ├── lrmalloc.h ├── lrmalloc_internal.h ├── mapcache.cpp ├── mapcache.h ├── pagemap.cpp ├── pagemap.h ├── pages.cpp ├── pages.h ├── size_classes.cpp ├── size_classes.h ├── tcache.cpp ├── tcache.h ├── test ├── basic.cpp └── size_class_data.cpp └── thread_hooks.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: WebKit 3 | ... 4 | 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | sc/ 2 | *.so 3 | *.a 4 | *.o 5 | *.swp 6 | *.vscode 7 | *.test 8 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ricardo Leite 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | # Licenced under the MIT licence. See COPYING file in the project root for details. 4 | # 5 | 6 | PREFIX?=/usr/local 7 | 8 | CCX=g++ 9 | DFLAGS=-ggdb -g -fno-omit-frame-pointer 10 | CXXFLAGS=-shared -fPIC -std=gnu++14 -O2 -Wall $(DFLAGS) \ 11 | -fno-builtin-malloc -fno-builtin-free -fno-builtin-realloc \ 12 | -fno-builtin-calloc -fno-builtin-cfree -fno-builtin-memalign \ 13 | -fno-builtin-posix_memalign -fno-builtin-valloc -fno-builtin-pvalloc \ 14 | -fno-builtin -fsized-deallocation -fno-exceptions 15 | 16 | LDFLAGS=-ldl -pthread 17 | 18 | OBJFILES=lrmalloc.o size_classes.o pages.o pagemap.o tcache.o thread_hooks.o mapcache.o 19 | 20 | default: liblrmalloc.so liblrmalloc.a 21 | 22 | test: all_tests 23 | 24 | %.o : %.cpp 25 | $(CCX) $(CXXFLAGS) -c -o $@ $< $(LDFLAGS) 26 | 27 | liblrmalloc.so: $(OBJFILES) 28 | $(CCX) $(CXXFLAGS) -o liblrmalloc.so $(OBJFILES) $(LDFLAGS) 29 | 30 | liblrmalloc.a: $(OBJFILES) 31 | ar rcs liblrmalloc.a $(OBJFILES) 32 | 33 | all_tests: default basic.test size_class_data.test 34 | 35 | %.test : test/%.cpp liblrmalloc.a 36 | $(CCX) $(DFLAGS) -o $@ $< liblrmalloc.a $(LDFLAGS) 37 | 38 | clean: 39 | rm -f *.so *.o *.a *.test 40 | 41 | install: default 42 | install -d $(DESTDIR)$(PREFIX)/lib/ 43 | install -m 644 liblrmalloc.so $(DESTDIR)$(PREFIX)/lib/ 44 | install -m 644 liblrmalloc.a $(DESTDIR)$(PREFIX)/lib/ 45 | install -d $(DESTDIR)$(PREFIX)/include/ 46 | install -m 644 lrmalloc.h $(DESTDIR)$(PREFIX)/include/ 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Introduction 3 | ---- 4 | lrmalloc is an efficient, lock-free malloc(3) implementation. 5 | 6 | It is derived from [Michael's lock-free allocator](https://dl.acm.org/citation.cfm?doid=996841.996848), improved with modern memory allocator features such as thread caches and allocator/user memory segregation. 7 | 8 | lrmalloc's philosophy is to provide fast synchronization-free allocations as much sa possible through the use of thread caches, and only use lock-free operations to fill and empty thread caches. 9 | 10 | ## Usage 11 | ---- 12 | To compile, just download this repository and run 13 | ```console 14 | make 15 | ``` 16 | 17 | If successfully compiled, you can link lrmalloc with your application at compile time with 18 | ```console 19 | -llrmalloc 20 | ``` 21 | or you can dynamically link it with your application by using LD_PRELOAD (if your application was not statically linked with another memory allocator). 22 | ```console 23 | LD_PRELOAD=lrmalloc.so ./your_application 24 | ``` 25 | ## Copyright 26 | 27 | License: MIT 28 | 29 | Read file [COPYING](COPYING). 30 | 31 | -------------------------------------------------------------------------------- /log.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #ifndef __LOG_H 8 | #define __LOG_H 9 | 10 | #include 11 | #include 12 | 13 | // if 1, enables assertions and other sanity checks 14 | #define LFMALLOC_SANITY 0 15 | // if 1, enables debug output 16 | #define LFMALLOC_DEBUG 0 17 | 18 | #if LFMALLOC_DEBUG 19 | #define LOG_DEBUG(STR, ...) fprintf(stdout, "%s:%d %s " STR "\n", __FILE__, __LINE__, __func__, ##__VA_ARGS__); 20 | 21 | #else 22 | #define LOG_DEBUG(str, ...) 23 | 24 | #endif 25 | 26 | #define LOG_ERR(STR, ...) fprintf(stderr, "%s:%d %s " STR "\n", __FILE__, __LINE__, __func__, ##__VA_ARGS__) 27 | 28 | #if LFMALLOC_SANITY 29 | #define ASSERT(x) \ 30 | do { \ 31 | if (!(x)) \ 32 | abort(); \ 33 | } while (0) 34 | #else 35 | #define ASSERT(x) 36 | #endif 37 | 38 | #endif // _LOG_H 39 | -------------------------------------------------------------------------------- /lrmalloc.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | // for ENOMEM 14 | #include 15 | 16 | #include "log.h" 17 | #include "lrmalloc.h" 18 | #include "lrmalloc_internal.h" 19 | #include "mapcache.h" 20 | #include "pagemap.h" 21 | #include "pages.h" 22 | #include "size_classes.h" 23 | #include "tcache.h" 24 | 25 | // global variables 26 | // descriptor recycle list 27 | extern std::atomic AvailDesc; 28 | 29 | // helper fns 30 | void HeapPushPartial(Descriptor* desc); 31 | Descriptor* HeapPopPartial(ProcHeap* heap); 32 | void MallocFromPartial(size_t scIdx, TCacheBin* cache, size_t& blockNum); 33 | void MallocFromNewSB(size_t scIdx, TCacheBin* cache, size_t& blockNum); 34 | Descriptor* DescAlloc(); 35 | void DescRetire(Descriptor* desc); 36 | 37 | // global variables 38 | // descriptor recycle list 39 | std::atomic sAvailDesc({ nullptr }); 40 | // malloc init state 41 | bool sMallocInit = false; 42 | // heaps, one heap per size class 43 | ProcHeap sHeaps[MAX_SZ_IDX]; 44 | 45 | // (un)register descriptor pages with pagemap 46 | // all pages used by the descriptor will point to desc in 47 | // the pagemap 48 | // for (unaligned) large allocations, only first page points to desc 49 | // aligned large allocations get the corresponding page pointing to desc 50 | void UpdatePageMap(ProcHeap* heap, char* ptr, Descriptor* desc, size_t scIdx) 51 | { 52 | ASSERT(ptr); 53 | 54 | PageInfo info; 55 | info.Set(desc, scIdx); 56 | 57 | // large allocation, don't need to (un)register every page 58 | // just first 59 | if (!heap) { 60 | sPageMap.SetPageInfo(ptr, info); 61 | return; 62 | } 63 | 64 | // only need to worry about alignment for large allocations 65 | // ASSERT(ptr == superblock); 66 | 67 | // small allocation, (un)register every page 68 | // could *technically* optimize if blockSize >>> page, 69 | // but let's not worry about that 70 | // SB_SIZE is a multiple of page 71 | ASSERT((SB_SIZE & PAGE_MASK) == 0); 72 | for (size_t idx = 0; idx < SB_SIZE; idx += PAGE) { 73 | sPageMap.SetPageInfo(ptr + idx, info); 74 | } 75 | } 76 | 77 | void RegisterDesc(Descriptor* desc) 78 | { 79 | ProcHeap* heap = desc->heap; 80 | char* ptr = desc->superblock; 81 | size_t scIdx = 0; 82 | if (LIKELY(heap != nullptr)) { 83 | scIdx = heap->scIdx; 84 | } 85 | 86 | UpdatePageMap(heap, ptr, desc, scIdx); 87 | } 88 | 89 | // unregister descriptor before superblock deletion 90 | // can only be done when superblock is about to be free'd to OS 91 | void UnregisterDesc(ProcHeap* heap, char* superblock) 92 | { 93 | UpdatePageMap(heap, superblock, nullptr, 0L); 94 | } 95 | 96 | LFMALLOC_INLINE 97 | PageInfo GetPageInfoForPtr(void* ptr) 98 | { 99 | return sPageMap.GetPageInfo((char*)ptr); 100 | } 101 | 102 | // compute block index in superblock 103 | LFMALLOC_INLINE 104 | uint32_t ComputeIdx(char* superblock, char* block, size_t scIdx) 105 | { 106 | SizeClassData* sc = &SizeClasses[scIdx]; 107 | uint32_t scBlockSize = sc->blockSize; 108 | (void)scBlockSize; // suppress unused var warning 109 | 110 | ASSERT(block >= superblock); 111 | ASSERT(block < superblock + SB_SIZE); 112 | // optimize integer division by allowing the compiler to create 113 | // a jump table using size class index 114 | // compiler can then optimize integer div due to known divisor 115 | uint32_t diff = uint32_t(block - superblock); 116 | uint32_t idx = 0; 117 | switch (scIdx) { 118 | #define SIZE_CLASS_bin_yes(index, blockSize) \ 119 | case index: \ 120 | ASSERT(scBlockSize == blockSize); \ 121 | idx = diff / blockSize; \ 122 | break; 123 | #define SIZE_CLASS_bin_no(index, blockSize) 124 | #define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \ 125 | SIZE_CLASS_bin_##bin((index + 1), ((1U << lg_grp) + (ndelta << lg_delta))) 126 | SIZE_CLASSES 127 | default: 128 | ASSERT(false); 129 | break; 130 | } 131 | #undef SIZE_CLASS_bin_yes 132 | #undef SIZE_CLASS_bin_no 133 | #undef SC 134 | 135 | ASSERT(diff / scBlockSize == idx); 136 | return idx; 137 | } 138 | 139 | SizeClassData* ProcHeap::GetSizeClass() const 140 | { 141 | return &SizeClasses[scIdx]; 142 | } 143 | 144 | Descriptor* ListPopPartial(ProcHeap* heap) 145 | { 146 | std::atomic& list = heap->partialList; 147 | DescriptorNode oldHead = list.load(); 148 | DescriptorNode newHead; 149 | do { 150 | Descriptor* oldDesc = oldHead.GetDesc(); 151 | if (!oldDesc) { 152 | return nullptr; 153 | } 154 | 155 | newHead = oldDesc->nextPartial.load(); 156 | Descriptor* desc = newHead.GetDesc(); 157 | uint64_t counter = oldHead.GetCounter(); 158 | newHead.Set(desc, counter); 159 | } while (!list.compare_exchange_weak(oldHead, newHead)); 160 | 161 | return oldHead.GetDesc(); 162 | } 163 | 164 | void ListPushPartial(Descriptor* desc) 165 | { 166 | ProcHeap* heap = desc->heap; 167 | std::atomic& list = heap->partialList; 168 | 169 | DescriptorNode oldHead = list.load(); 170 | DescriptorNode newHead; 171 | do { 172 | newHead.Set(desc, oldHead.GetCounter() + 1); 173 | ASSERT(oldHead.GetDesc() != newHead.GetDesc()); 174 | newHead.GetDesc()->nextPartial.store(oldHead); 175 | } while (!list.compare_exchange_weak(oldHead, newHead)); 176 | } 177 | 178 | void HeapPushPartial(Descriptor* desc) 179 | { 180 | ListPushPartial(desc); 181 | } 182 | 183 | Descriptor* HeapPopPartial(ProcHeap* heap) 184 | { 185 | return ListPopPartial(heap); 186 | } 187 | 188 | void MallocFromPartial(size_t scIdx, TCacheBin* cache, size_t& blockNum) 189 | { 190 | ProcHeap* heap = &sHeaps[scIdx]; 191 | 192 | Descriptor* desc = HeapPopPartial(heap); 193 | if (!desc) { 194 | return; 195 | } 196 | 197 | // reserve block(s) 198 | Anchor oldAnchor = desc->anchor.load(); 199 | Anchor newAnchor; 200 | uint32_t maxcount = desc->maxcount; 201 | uint32_t blockSize = desc->blockSize; 202 | char* superblock = desc->superblock; 203 | 204 | // we have "ownership" of block, but anchor can still change 205 | // due to free() 206 | do { 207 | if (oldAnchor.state == SB_EMPTY) { 208 | DescRetire(desc); 209 | // retry 210 | return MallocFromPartial(scIdx, cache, blockNum); 211 | } 212 | 213 | // oldAnchor must be SB_PARTIAL 214 | // can't be SB_FULL because we *own* the block now 215 | // and it came from HeapPopPartial 216 | // can't be SB_EMPTY, we already checked 217 | ASSERT(oldAnchor.state == SB_PARTIAL); 218 | 219 | newAnchor = oldAnchor; 220 | newAnchor.count = 0; 221 | // avail value doesn't actually matter 222 | newAnchor.avail = maxcount; 223 | newAnchor.state = SB_FULL; 224 | } while (!desc->anchor.compare_exchange_weak(oldAnchor, newAnchor)); 225 | 226 | // will take as many blocks as available from superblock 227 | // *AND* no thread can do malloc() using this superblock, we 228 | // exclusively own it 229 | // if CAS fails, it just means another thread added more available blocks 230 | // through FlushCache, which we can then use 231 | uint32_t blocksTaken = oldAnchor.count; 232 | uint32_t avail = oldAnchor.avail; 233 | 234 | ASSERT(avail < maxcount); 235 | char* block = superblock + avail * blockSize; 236 | 237 | // cache must be empty at this point 238 | // and the blocks are already organized as a list 239 | // so all we need do is "push" that list, a constant time op 240 | ASSERT(cache->GetBlockNum() == 0); 241 | cache->PushList(block, blocksTaken); 242 | 243 | blockNum += blocksTaken; 244 | } 245 | 246 | void MallocFromNewSB(size_t scIdx, TCacheBin* cache, size_t& blockNum) 247 | { 248 | ProcHeap* heap = &sHeaps[scIdx]; 249 | SizeClassData* sc = &SizeClasses[scIdx]; 250 | 251 | Descriptor* desc = DescAlloc(); 252 | ASSERT(desc); 253 | 254 | uint32_t const blockSize = sc->blockSize; 255 | uint32_t const maxcount = sc->GetBlockNum(); 256 | 257 | desc->heap = heap; 258 | desc->blockSize = blockSize; 259 | desc->maxcount = maxcount; 260 | desc->superblock = sMapCache.Alloc(); 261 | 262 | cache->PushList(desc->superblock, maxcount); 263 | 264 | Anchor anchor; 265 | anchor.avail = maxcount; 266 | anchor.count = 0; 267 | anchor.state = SB_FULL; 268 | 269 | desc->anchor.store(anchor); 270 | 271 | ASSERT(anchor.avail < maxcount || anchor.state == SB_FULL); 272 | ASSERT(anchor.count < maxcount); 273 | 274 | // register new descriptor 275 | // must be done before setting superblock as active 276 | // or leaving superblock as available in a partial list 277 | RegisterDesc(desc); 278 | 279 | // if state changes to SB_PARTIAL, desc must be added to partial list 280 | ASSERT(anchor.state == SB_FULL); 281 | 282 | blockNum += maxcount; 283 | } 284 | 285 | Descriptor* DescAlloc() 286 | { 287 | DescriptorNode oldHead = sAvailDesc.load(); 288 | while (true) { 289 | Descriptor* desc = oldHead.GetDesc(); 290 | if (desc) { 291 | DescriptorNode newHead = desc->nextFree.load(); 292 | newHead.Set(newHead.GetDesc(), oldHead.GetCounter()); 293 | if (sAvailDesc.compare_exchange_weak(oldHead, newHead)) { 294 | ASSERT(desc->blockSize == 0); 295 | return desc; 296 | } 297 | } else { 298 | // allocate several pages 299 | // get first descriptor, this is returned to caller 300 | char* ptr = (char*)PageAlloc(DESCRIPTOR_BLOCK_SZ); 301 | Descriptor* ret = (Descriptor*)ptr; 302 | // organize list with the rest of descriptors 303 | // and add to available descriptors 304 | { 305 | Descriptor* first = nullptr; 306 | Descriptor* prev = nullptr; 307 | 308 | char* currPtr = ptr + sizeof(Descriptor); 309 | currPtr = ALIGN_ADDR(currPtr, CACHELINE); 310 | first = (Descriptor*)currPtr; 311 | while (currPtr + sizeof(Descriptor) < ptr + DESCRIPTOR_BLOCK_SZ) { 312 | Descriptor* curr = (Descriptor*)currPtr; 313 | if (prev) { 314 | prev->nextFree.store({ curr }); 315 | } 316 | 317 | prev = curr; 318 | currPtr = currPtr + sizeof(Descriptor); 319 | currPtr = ALIGN_ADDR(currPtr, CACHELINE); 320 | } 321 | 322 | prev->nextFree.store({ nullptr }); 323 | 324 | // add list to available descriptors 325 | DescriptorNode oldHead = sAvailDesc.load(); 326 | DescriptorNode newHead; 327 | do { 328 | prev->nextFree.store(oldHead); 329 | newHead.Set(first, oldHead.GetCounter() + 1); 330 | } while (!sAvailDesc.compare_exchange_weak(oldHead, newHead)); 331 | } 332 | 333 | return ret; 334 | } 335 | } 336 | } 337 | 338 | void DescRetire(Descriptor* desc) 339 | { 340 | desc->blockSize = 0; 341 | DescriptorNode oldHead = sAvailDesc.load(); 342 | DescriptorNode newHead; 343 | do { 344 | desc->nextFree.store(oldHead); 345 | 346 | newHead.Set(desc, oldHead.GetCounter() + 1); 347 | } while (!sAvailDesc.compare_exchange_weak(oldHead, newHead)); 348 | } 349 | 350 | void FillCache(size_t scIdx, TCacheBin* cache) 351 | { 352 | // at most cache will be filled with number of blocks equal to superblock 353 | size_t blockNum = 0; 354 | // use a *SINGLE* partial superblock to try to fill cache 355 | MallocFromPartial(scIdx, cache, blockNum); 356 | // if we obtain no blocks from partial superblocks, create a new superblock 357 | if (blockNum == 0) { 358 | MallocFromNewSB(scIdx, cache, blockNum); 359 | } 360 | 361 | SizeClassData* sc = &SizeClasses[scIdx]; 362 | (void)sc; 363 | ASSERT(blockNum > 0); 364 | ASSERT(blockNum <= sc->cacheBlockNum); 365 | } 366 | 367 | void FlushCache(size_t scIdx, TCacheBin* cache) 368 | { 369 | ProcHeap* heap = &sHeaps[scIdx]; 370 | SizeClassData* sc = &SizeClasses[scIdx]; 371 | uint32_t const blockSize = sc->blockSize; 372 | // after CAS, desc might become empty and 373 | // concurrently reused, so store maxcount 374 | uint32_t const maxcount = sc->GetBlockNum(); 375 | (void)maxcount; // suppress unused warning 376 | 377 | // @todo: optimize 378 | // in the normal case, we should be able to return several 379 | // blocks with a single CAS 380 | while (cache->GetBlockNum() > 0) { 381 | char* head = cache->PeekBlock(); 382 | char* tail = head; 383 | PageInfo info = GetPageInfoForPtr(head); 384 | Descriptor* desc = info.GetDesc(); 385 | char* superblock = desc->superblock; 386 | 387 | // cache is a linked list of blocks 388 | // superblock free list is also a linked list of blocks 389 | // can optimize transfers of blocks between these 2 entities 390 | // by exploiting existing structure 391 | uint32_t blockCount = 1; 392 | // check if next cache blocks are in the same superblock 393 | // same superblock, same descriptor 394 | while (cache->GetBlockNum() > blockCount) { 395 | char* ptr = tail + *(ptrdiff_t*)tail + blockSize; 396 | if (ptr < superblock || ptr >= superblock + SB_SIZE) { 397 | break; // ptr not in superblock 398 | } 399 | 400 | // ptr in superblock, add to "list" 401 | ++blockCount; 402 | tail = ptr; 403 | } 404 | 405 | cache->PopList(tail + *(ptrdiff_t*)tail + blockSize, blockCount); 406 | 407 | // add list to desc, update anchor 408 | uint32_t idx = ComputeIdx(superblock, head, scIdx); 409 | 410 | Anchor oldAnchor = desc->anchor.load(); 411 | Anchor newAnchor; 412 | do { 413 | // update anchor.avail 414 | char* next = (char*)(superblock + oldAnchor.avail * blockSize); 415 | *(ptrdiff_t*)tail = next - tail - blockSize; 416 | 417 | newAnchor = oldAnchor; 418 | newAnchor.avail = idx; 419 | // state updates 420 | if (oldAnchor.state == SB_FULL) { 421 | newAnchor.state = SB_PARTIAL; 422 | } 423 | 424 | ASSERT(oldAnchor.count < desc->maxcount); 425 | if (oldAnchor.count + blockCount == desc->maxcount) { 426 | newAnchor.count = desc->maxcount - 1; 427 | newAnchor.state = SB_EMPTY; // can free superblock 428 | } else { 429 | newAnchor.count += blockCount; 430 | } 431 | } while (!desc->anchor.compare_exchange_weak(oldAnchor, newAnchor)); 432 | 433 | // after last CAS, can't reliably read any desc fields 434 | // as desc might have become empty and been concurrently reused 435 | ASSERT(oldAnchor.avail < maxcount || oldAnchor.state == SB_FULL); 436 | ASSERT(newAnchor.avail < maxcount); 437 | ASSERT(newAnchor.count < maxcount); 438 | 439 | // CAS success, can free block 440 | if (newAnchor.state == SB_EMPTY) { 441 | // unregister descriptor 442 | UnregisterDesc(heap, superblock); 443 | 444 | // free superblock 445 | sMapCache.Free(superblock); 446 | } else if (oldAnchor.state == SB_FULL) { 447 | HeapPushPartial(desc); 448 | } 449 | } 450 | } 451 | 452 | void InitMalloc() 453 | { 454 | LOG_DEBUG(); 455 | 456 | // hard assumption that this can't be called concurrently 457 | sMallocInit = true; 458 | 459 | // init size classes 460 | InitSizeClass(); 461 | 462 | // init page map 463 | sPageMap.Init(); 464 | 465 | // init heaps 466 | for (size_t idx = 0; idx < MAX_SZ_IDX; ++idx) { 467 | ProcHeap& heap = sHeaps[idx]; 468 | heap.partialList.store({ nullptr }); 469 | heap.scIdx = idx; 470 | } 471 | } 472 | 473 | LFMALLOC_INLINE 474 | void* do_malloc(size_t size) 475 | { 476 | // ensure malloc is initialized 477 | if (UNLIKELY(!sMallocInit)) { 478 | InitMalloc(); 479 | } 480 | 481 | // large block allocation 482 | if (UNLIKELY(size > MAX_SZ)) { 483 | size_t pages = PAGE_CEILING(size); 484 | Descriptor* desc = DescAlloc(); 485 | ASSERT(desc); 486 | 487 | desc->heap = nullptr; 488 | desc->blockSize = pages; 489 | desc->maxcount = 1; 490 | desc->superblock = (char*)PageAlloc(pages); 491 | 492 | Anchor anchor; 493 | anchor.avail = 0; 494 | anchor.count = 0; 495 | anchor.state = SB_FULL; 496 | 497 | desc->anchor.store(anchor); 498 | 499 | RegisterDesc(desc); 500 | 501 | char* ptr = desc->superblock; 502 | LOG_DEBUG("large, ptr: %p", ptr); 503 | return (void*)ptr; 504 | } 505 | 506 | // size class calculation 507 | size_t scIdx = GetSizeClass(size); 508 | 509 | TCacheBin* cache = &TCache[scIdx]; 510 | // fill cache if needed 511 | if (UNLIKELY(cache->GetBlockNum() == 0)) { 512 | FillCache(scIdx, cache); 513 | } 514 | 515 | return cache->PopBlock(scIdx); 516 | } 517 | 518 | LFMALLOC_INLINE 519 | bool isPowerOfTwo(size_t x) 520 | { 521 | // https://stackoverflow.com/questions/3638431/determine-if-an-int-is-a-power-of-2-or-not-in-a-single-line 522 | return x && !(x & (x - 1)); 523 | } 524 | 525 | LFMALLOC_INLINE 526 | void* do_aligned_alloc(size_t alignment, size_t size) 527 | { 528 | if (UNLIKELY(!isPowerOfTwo(alignment))) { 529 | return nullptr; 530 | } 531 | 532 | size = ALIGN_VAL(size, alignment); 533 | 534 | ASSERT(size > 0 && alignment > 0 && size >= alignment); 535 | 536 | // @todo: almost equal logic to do_malloc, DRY 537 | // ensure malloc is initialized 538 | if (UNLIKELY(!sMallocInit)) { 539 | InitMalloc(); 540 | } 541 | 542 | // allocations smaller than PAGE will be correctly aligned 543 | // this is because size >= alignment, and size will map to a small class 544 | // size with the formula 2^X + A*2^(X-1) + C*2^(X-2) 545 | // since size is a multiple of alignment, the lowest size class power of 546 | // two is already >= alignment 547 | // this does not work if allocation > PAGE even if it's a small class size, 548 | // because the superblock for those allocations is only guaranteed 549 | // to be page aligned 550 | // force such allocations to become large block allocs 551 | if (UNLIKELY(size > PAGE)) { 552 | // hotfix solution for this case is to force allocation to be large 553 | size = std::max(size, MAX_SZ + 1); 554 | 555 | // large blocks are page-aligned 556 | // if user asks for a diabolical alignment, need more pages to 557 | // fulfil it 558 | bool const needsMorePages = (alignment > PAGE); 559 | if (UNLIKELY(needsMorePages)) { 560 | size += alignment; 561 | } 562 | 563 | size_t pages = PAGE_CEILING(size); 564 | Descriptor* desc = DescAlloc(); 565 | ASSERT(desc); 566 | 567 | char* ptr = (char*)PageAlloc(pages); 568 | 569 | desc->heap = nullptr; 570 | desc->blockSize = pages; 571 | desc->maxcount = 1; 572 | desc->superblock = ptr; 573 | 574 | Anchor anchor; 575 | anchor.avail = 0; 576 | anchor.count = 0; 577 | anchor.state = SB_FULL; 578 | 579 | desc->anchor.store(anchor); 580 | 581 | RegisterDesc(desc); 582 | 583 | if (UNLIKELY(needsMorePages)) { 584 | ptr = ALIGN_ADDR(ptr, alignment); 585 | // aligned block must fit into allocated pages 586 | ASSERT((ptr + size) <= (desc->superblock + desc->blockSize)); 587 | 588 | // need to update page so that descriptors can be found 589 | // for large allocations aligned to "middle" of 590 | // superblocks 591 | UpdatePageMap(nullptr, ptr, desc, 0L); 592 | } 593 | 594 | LOG_DEBUG("large, ptr: %p", ptr); 595 | return (void*)ptr; 596 | } 597 | 598 | ASSERT(size <= PAGE); 599 | 600 | // size class calculation 601 | size_t scIdx = GetSizeClass(size); 602 | 603 | TCacheBin* cache = &TCache[scIdx]; 604 | // fill cache if needed 605 | if (UNLIKELY(cache->GetBlockNum() == 0)) { 606 | FillCache(scIdx, cache); 607 | } 608 | 609 | return cache->PopBlock(scIdx); 610 | } 611 | 612 | LFMALLOC_INLINE 613 | void do_free(void* ptr) 614 | { 615 | PageInfo info = GetPageInfoForPtr(ptr); 616 | Descriptor* desc = info.GetDesc(); 617 | // @todo: this can happen with dynamic loading 618 | // need to print correct message 619 | ASSERT(desc); 620 | 621 | size_t scIdx = info.GetScIdx(); 622 | 623 | LOG_DEBUG("Heap %p, Desc %p, ptr %p", desc->heap, desc, ptr); 624 | 625 | // large allocation case 626 | if (UNLIKELY(!scIdx)) { 627 | char* superblock = desc->superblock; 628 | 629 | // unregister descriptor 630 | UnregisterDesc(nullptr, superblock); 631 | // aligned large allocation case 632 | if (UNLIKELY((char*)ptr != superblock)) { 633 | UnregisterDesc(nullptr, (char*)ptr); 634 | } 635 | 636 | // free superblock 637 | PageFree(superblock, desc->blockSize); 638 | 639 | // desc cannot be in any partial list, so it can be 640 | // immediately reused 641 | DescRetire(desc); 642 | return; 643 | } 644 | 645 | TCacheBin* cache = &TCache[scIdx]; 646 | SizeClassData* sc = &SizeClasses[scIdx]; 647 | 648 | // flush cache if need 649 | if (UNLIKELY(cache->GetBlockNum() >= sc->cacheBlockNum)) { 650 | FlushCache(scIdx, cache); 651 | } 652 | 653 | cache->PushBlock((char*)ptr, scIdx); 654 | } 655 | 656 | extern "C" void* lf_malloc(size_t size) noexcept 657 | { 658 | LOG_DEBUG("size: %lu", size); 659 | 660 | return do_malloc(size); 661 | } 662 | 663 | extern "C" void* lf_calloc(size_t n, size_t size) noexcept 664 | { 665 | LOG_DEBUG(); 666 | size_t allocSize = n * size; 667 | // overflow check 668 | // @todo: expensive, need to optimize 669 | if (UNLIKELY(n == 0 || allocSize / n != size)) { 670 | return nullptr; 671 | } 672 | 673 | void* ptr = do_malloc(allocSize); 674 | 675 | // calloc returns zero-filled memory 676 | // @todo: optimize, memory may be already zero-filled 677 | // if coming directly from OS 678 | if (LIKELY(ptr != nullptr)) { 679 | memset(ptr, 0x0, allocSize); 680 | } 681 | 682 | return ptr; 683 | } 684 | 685 | extern "C" void* lf_realloc(void* ptr, size_t size) noexcept 686 | { 687 | LOG_DEBUG(); 688 | 689 | size_t blockSize = 0; 690 | if (LIKELY(ptr != nullptr)) { 691 | PageInfo info = GetPageInfoForPtr(ptr); 692 | Descriptor* desc = info.GetDesc(); 693 | ASSERT(desc); 694 | 695 | blockSize = desc->blockSize; 696 | 697 | // realloc with size == 0 is the same as free(ptr) 698 | if (UNLIKELY(size == 0)) { 699 | do_free(ptr); 700 | return nullptr; 701 | } 702 | 703 | // nothing to do, block is already large enough 704 | if (UNLIKELY(size <= blockSize)) 705 | return ptr; 706 | } 707 | 708 | void* newPtr = do_malloc(size); 709 | if (LIKELY(ptr && newPtr)) { 710 | memcpy(newPtr, ptr, blockSize); 711 | do_free(ptr); 712 | } 713 | 714 | return newPtr; 715 | } 716 | 717 | extern "C" size_t lf_malloc_usable_size(void* ptr) noexcept 718 | { 719 | LOG_DEBUG(); 720 | if (UNLIKELY(ptr == nullptr)) 721 | return 0; 722 | 723 | PageInfo info = GetPageInfoForPtr(ptr); 724 | 725 | size_t scIdx = info.GetScIdx(); 726 | // large allocation case 727 | if (UNLIKELY(!scIdx)) { 728 | Descriptor* desc = info.GetDesc(); 729 | ASSERT(desc); 730 | return desc->blockSize; 731 | } 732 | 733 | SizeClassData* sc = &SizeClasses[scIdx]; 734 | return sc->blockSize; 735 | } 736 | 737 | extern "C" int lf_posix_memalign(void** memptr, size_t alignment, size_t size) noexcept 738 | { 739 | LOG_DEBUG(); 740 | 741 | // "EINVAL - The alignment argument was not a power of two, or 742 | // was not a multiple of sizeof(void *)" 743 | if (UNLIKELY(!isPowerOfTwo(alignment) || (alignment & PTR_MASK))) { 744 | return EINVAL; 745 | } 746 | 747 | void* ptr = do_aligned_alloc(alignment, size); 748 | if (UNLIKELY(ptr == nullptr)) { 749 | return ENOMEM; 750 | } 751 | 752 | ASSERT(memptr != nullptr); 753 | *memptr = ptr; 754 | return 0; 755 | } 756 | 757 | extern "C" void* lf_aligned_alloc(size_t alignment, size_t size) noexcept 758 | { 759 | LOG_DEBUG(); 760 | return do_aligned_alloc(alignment, size); 761 | } 762 | 763 | extern "C" void* lf_valloc(size_t size) noexcept 764 | { 765 | LOG_DEBUG(); 766 | return do_aligned_alloc(PAGE, size); 767 | } 768 | 769 | extern "C" void* lf_memalign(size_t alignment, size_t size) noexcept 770 | { 771 | LOG_DEBUG(); 772 | return do_aligned_alloc(alignment, size); 773 | } 774 | 775 | extern "C" void* lf_pvalloc(size_t size) noexcept 776 | { 777 | LOG_DEBUG(); 778 | return do_aligned_alloc(PAGE, size); 779 | } 780 | 781 | extern "C" void lf_free(void* ptr) noexcept 782 | { 783 | LOG_DEBUG("ptr: %p", ptr); 784 | if (UNLIKELY(!ptr)) { 785 | return; 786 | } 787 | 788 | do_free(ptr); 789 | } 790 | -------------------------------------------------------------------------------- /lrmalloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #ifndef __LFMALLOC_H 8 | #define __LFMALLOC_H 9 | 10 | #include 11 | 12 | // a cache line is 64 bytes 13 | #define LG_CACHELINE 6 14 | // a page is 4KB 15 | #define LG_PAGE 12 16 | // a huge page is 2MB 17 | #define LG_HUGEPAGE 21 18 | 19 | // a ptr is sizeof(void*) bytes 20 | #define PTR_SZ sizeof(void*) 21 | #define CACHELINE ((size_t)(1U << LG_CACHELINE)) 22 | #define PAGE ((size_t)(1U << LG_PAGE)) 23 | #define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE)) 24 | 25 | #define PTR_MASK (PTR_SZ - 1) 26 | #define CACHELINE_MASK (CACHELINE - 1) 27 | #define PAGE_MASK (PAGE - 1) 28 | 29 | // minimum alignment requirement all allocations must meet 30 | // "address returned by malloc will be suitably aligned to store any kind of 31 | // variable" 32 | #define MIN_ALIGN sizeof(void*) 33 | 34 | // returns smallest value >= value with alignment align 35 | #define ALIGN_VAL(val, align) (__typeof__(val))(((size_t)(val) + (align - 1)) & ((~(align)) + 1)) 36 | 37 | // returns smallest address >= addr with alignment align 38 | #define ALIGN_ADDR(addr, align) ALIGN_VAL(addr, align) 39 | 40 | // return smallest page size multiple that is >= s 41 | #define PAGE_CEILING(s) (((s) + (PAGE - 1)) & ~(PAGE - 1)) 42 | 43 | // https://stackoverflow.com/questions/109710/how-do-the-likely-and-unlikely-macros-in-the-linux-kernel-work-and-what-is-t 44 | #define LIKELY(x) __builtin_expect((x), 1) 45 | #define UNLIKELY(x) __builtin_expect((x), 0) 46 | 47 | #define LFMALLOC_ATTR(s) __attribute__((s)) 48 | #define LFMALLOC_ALLOC_SIZE(s) LFMALLOC_ATTR(alloc_size(s)) 49 | #define LFMALLOC_ALLOC_SIZE2(s1, s2) LFMALLOC_ATTR(alloc_size(s1, s2)) 50 | #define LFMALLOC_EXPORT LFMALLOC_ATTR(visibility("default")) 51 | #define LFMALLOC_NOTHROW LFMALLOC_ATTR(nothrow) 52 | 53 | #if defined(__GNUC__) 54 | #define LFMALLOC_INLINE \ 55 | LFMALLOC_ATTR(always_inline) \ 56 | inline static 57 | #elif defined(_MSC_VER) 58 | #define LFMALLOC_INLINE __forceinline inline static 59 | #else 60 | #define LFMALLOC_INLINE 61 | #endif 62 | 63 | // use initial exec tls model, faster than regular tls 64 | // with the downside that the malloc lib can no longer be dlopen'd 65 | // https://www.ibm.com/support/knowledgecenter/en/SSVUN6_1.1.0/com.ibm.xlcpp11.zlinux.doc/language_ref/attr_tls_model.html 66 | #define LFMALLOC_TLS_INIT_EXEC LFMALLOC_ATTR(tls_model("initial-exec")) 67 | 68 | #define LFMALLOC_CACHE_ALIGNED LFMALLOC_ATTR(aligned(CACHELINE)) 69 | 70 | #define LFMALLOC_CACHE_ALIGNED_FN LFMALLOC_ATTR(aligned(CACHELINE)) 71 | 72 | #define STATIC_ASSERT(x, m) static_assert(x, m) 73 | 74 | 75 | #define lf_malloc malloc 76 | #define lf_free free 77 | #define lf_calloc calloc 78 | #define lf_realloc realloc 79 | #define lf_malloc_usable_size malloc_usable_size 80 | #define lf_posix_memalign posix_memalign 81 | #define lf_aligned_alloc aligned_alloc 82 | #define lf_valloc valloc 83 | #define lf_memalign memalign 84 | #define lf_pvalloc pvalloc 85 | 86 | // exports 87 | #ifdef __cplusplus 88 | extern "C" { 89 | #endif 90 | // malloc interface 91 | void* lf_malloc(size_t size) LFMALLOC_EXPORT LFMALLOC_NOTHROW 92 | LFMALLOC_ALLOC_SIZE(1) LFMALLOC_CACHE_ALIGNED_FN; 93 | void lf_free(void* ptr) LFMALLOC_EXPORT LFMALLOC_NOTHROW LFMALLOC_CACHE_ALIGNED_FN; 94 | void* lf_calloc(size_t n, size_t size) LFMALLOC_EXPORT LFMALLOC_NOTHROW 95 | LFMALLOC_ALLOC_SIZE2(1, 2) LFMALLOC_CACHE_ALIGNED_FN; 96 | void* lf_realloc(void* ptr, size_t size) LFMALLOC_EXPORT LFMALLOC_NOTHROW 97 | LFMALLOC_ALLOC_SIZE(2) LFMALLOC_CACHE_ALIGNED_FN; 98 | // utilities 99 | size_t lf_malloc_usable_size(void* ptr); 100 | // memory alignment ops 101 | int lf_posix_memalign(void** memptr, size_t alignment, size_t size) LFMALLOC_EXPORT LFMALLOC_NOTHROW 102 | LFMALLOC_ATTR(nonnull(1)); 103 | void* lf_aligned_alloc(size_t alignment, size_t size) LFMALLOC_EXPORT LFMALLOC_NOTHROW 104 | LFMALLOC_ALLOC_SIZE(2) LFMALLOC_CACHE_ALIGNED_FN; 105 | void* lf_valloc(size_t size) LFMALLOC_EXPORT LFMALLOC_NOTHROW 106 | LFMALLOC_ALLOC_SIZE(1) LFMALLOC_CACHE_ALIGNED_FN; 107 | // obsolete alignment oos 108 | void* lf_memalign(size_t alignment, size_t size) LFMALLOC_EXPORT LFMALLOC_NOTHROW 109 | LFMALLOC_ALLOC_SIZE(2) LFMALLOC_CACHE_ALIGNED_FN; 110 | void* lf_pvalloc(size_t size) LFMALLOC_EXPORT LFMALLOC_NOTHROW 111 | LFMALLOC_ALLOC_SIZE(1) LFMALLOC_CACHE_ALIGNED_FN; 112 | #ifdef __cplusplus 113 | } 114 | #endif 115 | 116 | #endif // __LFMALLOC_H 117 | -------------------------------------------------------------------------------- /lrmalloc_internal.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2022 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #ifndef __LFMALLOC_INTERNAL_H 8 | #define __LFMALLOC_INTERNAL_H 9 | 10 | #include 11 | 12 | #include "lrmalloc.h" 13 | #include "log.h" 14 | 15 | // superblock states 16 | // used in Anchor::state 17 | enum SuperblockState : uint8_t { 18 | // all blocks allocated or reserved 19 | SB_FULL = 0, 20 | // has unreserved available blocks 21 | SB_PARTIAL = 1, 22 | // all blocks are free 23 | SB_EMPTY = 2, 24 | }; 25 | 26 | struct Anchor; 27 | struct DescriptorNode; 28 | struct Descriptor; 29 | struct ProcHeap; 30 | struct SizeClassData; 31 | struct TCacheBin; 32 | 33 | #define LG_MAX_BLOCK_NUM 31 34 | #define MAX_BLOCK_NUM (1ul << LG_MAX_BLOCK_NUM) 35 | 36 | struct Anchor { 37 | SuperblockState state : 2; 38 | uint32_t avail : LG_MAX_BLOCK_NUM; 39 | uint32_t count : LG_MAX_BLOCK_NUM; 40 | } LFMALLOC_ATTR(packed); 41 | 42 | STATIC_ASSERT(sizeof(Anchor) == sizeof(uint64_t), "Invalid anchor size"); 43 | 44 | struct DescriptorNode { 45 | public: 46 | // ptr 47 | Descriptor* _desc; 48 | // aba counter 49 | // uint64_t _counter; 50 | 51 | public: 52 | void Set(Descriptor* desc, uint64_t counter) 53 | { 54 | // desc must be cacheline aligned 55 | ASSERT(((uint64_t)desc & CACHELINE_MASK) == 0); 56 | // counter may be incremented but will always be stored in 57 | // LG_CACHELINE bits 58 | _desc = (Descriptor*)((uint64_t)desc | (counter & CACHELINE_MASK)); 59 | } 60 | 61 | Descriptor* GetDesc() const 62 | { 63 | return (Descriptor*)((uint64_t)_desc & ~CACHELINE_MASK); 64 | } 65 | 66 | uint64_t GetCounter() const 67 | { 68 | return (uint64_t)((uint64_t)_desc & CACHELINE_MASK); 69 | } 70 | 71 | } LFMALLOC_ATTR(packed); 72 | 73 | STATIC_ASSERT(sizeof(DescriptorNode) == sizeof(uint64_t), "Invalid descriptor node size"); 74 | 75 | // Superblock descriptor 76 | // needs to be cache-line aligned 77 | // descriptors are allocated and *never* freed 78 | struct Descriptor { 79 | // list node pointers 80 | // used in free descriptor list 81 | std::atomic nextFree; 82 | // used in partial descriptor list 83 | std::atomic nextPartial; 84 | // anchor 85 | std::atomic anchor; 86 | 87 | char* superblock; 88 | ProcHeap* heap; 89 | uint32_t blockSize; // block size 90 | uint32_t maxcount; 91 | } LFMALLOC_CACHE_ALIGNED; 92 | 93 | // at least one ProcHeap instance exists for each sizeclass 94 | struct ProcHeap { 95 | public: 96 | // ptr to descriptor, head of partial descriptor list 97 | std::atomic partialList; 98 | // size class index 99 | size_t scIdx; 100 | 101 | public: 102 | size_t GetScIdx() const { return scIdx; } 103 | SizeClassData* GetSizeClass() const; 104 | 105 | } LFMALLOC_ATTR(aligned(CACHELINE)); 106 | 107 | // size of allocated block when allocating descriptors 108 | // block is split into multiple descriptors 109 | // 64k byte blocks 110 | #define DESCRIPTOR_BLOCK_SZ (16 * PAGE) 111 | 112 | #endif // __LFMALLOC_INTERNAL_H 113 | -------------------------------------------------------------------------------- /mapcache.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #include "mapcache.h" 8 | 9 | // thread cache, uses tsd/tls 10 | // one cache per thread 11 | __thread MapCacheBin sMapCache; 12 | -------------------------------------------------------------------------------- /mapcache.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #ifndef __MAPCACHE_H_ 8 | #define __MAPCACHE_H_ 9 | 10 | #include "log.h" 11 | #include "pages.h" 12 | #include "size_classes.h" 13 | #include 14 | 15 | #define MAPCACHE_SIZE 64 16 | 17 | struct MapCacheBin { 18 | private: 19 | char* _block = nullptr; 20 | uint32_t _blockNum = 0; 21 | 22 | public: 23 | // Map MAPCACHE_SIZE superblocks in one go and then consume 1 by 1 24 | char* Alloc(); 25 | // Unmap superblocks immediately in SB_SIZE chunks 26 | void Free(char*); 27 | // Used for thread termination to unmap what remains 28 | void Flush(); 29 | }; 30 | 31 | inline char* MapCacheBin::Alloc() 32 | { 33 | if (_blockNum == 0) { 34 | _block = (char*)PageAlloc(SB_SIZE * MAPCACHE_SIZE); 35 | if (_block == nullptr) { 36 | return nullptr; 37 | } 38 | _blockNum = MAPCACHE_SIZE; 39 | } 40 | char* ret = _block; 41 | _block += SB_SIZE; 42 | _blockNum--; 43 | return ret; 44 | } 45 | 46 | inline void MapCacheBin::Free(char* block) 47 | { 48 | PageFree(block, SB_SIZE); 49 | } 50 | 51 | inline void MapCacheBin::Flush() 52 | { 53 | if (_blockNum > 0) { 54 | PageFree(_block, SB_SIZE * _blockNum); 55 | } 56 | } 57 | 58 | // use tls init exec model 59 | extern __thread MapCacheBin sMapCache LFMALLOC_TLS_INIT_EXEC LFMALLOC_CACHE_ALIGNED; 60 | 61 | #endif // __MAPCACHE_H_ 62 | -------------------------------------------------------------------------------- /pagemap.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #include "pagemap.h" 8 | 9 | #include "log.h" 10 | #include "pages.h" 11 | 12 | PageMap sPageMap; 13 | 14 | void PageMap::Init() 15 | { 16 | // pages will necessarily be given by the OS 17 | // so they're already initialized and zero'd 18 | // PM_SZ is necessarily aligned to page size 19 | _pagemap = (std::atomic*)PageAllocOvercommit(PM_SZ); 20 | ASSERT(_pagemap); 21 | } 22 | -------------------------------------------------------------------------------- /pagemap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #ifndef __PAGEMAP_H 8 | #define __PAGEMAP_H 9 | 10 | #include 11 | 12 | #include "log.h" 13 | #include "lrmalloc.h" 14 | #include "size_classes.h" 15 | 16 | // assuming x86-64, for now 17 | // which uses 48 bits for addressing (e.g high 16 bits ignored) 18 | // can ignore the bottom 12 bits (lg of page) 19 | // insignificant high bits 20 | #define PM_NHS 14 21 | // insignificant low bits 22 | #define PM_NLS LG_PAGE 23 | // significant middle bits 24 | #define PM_SB (64 - PM_NHS - PM_NLS) 25 | // to get the key from a address 26 | // 1. shift to remove insignificant low bits 27 | // 2. apply mask of middle significant bits 28 | #define PM_KEY_SHIFT PM_NLS 29 | #define PM_KEY_MASK ((1ULL << PM_SB) - 1) 30 | 31 | struct Descriptor; 32 | // associates metadata to each allocator page 33 | // implemented with a static array, but can also be implemented 34 | // with a multi-level radix tree 35 | 36 | #define SC_MASK ((1ULL << 6) - 1) 37 | 38 | // contains metadata per page 39 | // *has* to be the size of a single word 40 | struct PageInfo { 41 | private: 42 | // descriptor 43 | Descriptor* _desc; 44 | // size class 45 | // stealing bits from desc to store size class 46 | // desc is aligned to at least 64 bytes, so 6 bits to steal 47 | // which is the same as LG_MAX_SIZE_IDX 48 | // size_t scIdx : LG_MAX_SIZE_IDX; 49 | 50 | public: 51 | void Set(Descriptor* desc, size_t scIdx); 52 | Descriptor* GetDesc() const; 53 | size_t GetScIdx() const; 54 | }; 55 | 56 | inline void PageInfo::Set(Descriptor* desc, size_t scIdx) 57 | { 58 | ASSERT(((size_t)desc & SC_MASK) == 0); 59 | ASSERT(scIdx < MAX_SZ_IDX); 60 | 61 | _desc = (Descriptor*)((size_t)desc | scIdx); 62 | } 63 | 64 | inline Descriptor* PageInfo::GetDesc() const 65 | { 66 | return (Descriptor*)((size_t)_desc & ~SC_MASK); 67 | } 68 | 69 | inline size_t PageInfo::GetScIdx() const 70 | { 71 | return ((size_t)_desc & SC_MASK); 72 | } 73 | 74 | #define PM_SZ ((1ULL << PM_SB) * sizeof(PageInfo)) 75 | 76 | static_assert(sizeof(PageInfo) == sizeof(uint64_t), "Invalid PageInfo size"); 77 | 78 | // lock free page map 79 | class PageMap { 80 | public: 81 | // must be called before any GetPageInfo/SetPageInfo calls 82 | void Init(); 83 | 84 | PageInfo GetPageInfo(char* ptr); 85 | void SetPageInfo(char* ptr, PageInfo info); 86 | 87 | private: 88 | size_t AddrToKey(char* ptr) const; 89 | 90 | private: 91 | // array based impl 92 | std::atomic* _pagemap = { nullptr }; 93 | }; 94 | 95 | inline size_t PageMap::AddrToKey(char* ptr) const 96 | { 97 | size_t key = ((size_t)ptr >> PM_KEY_SHIFT) & PM_KEY_MASK; 98 | return key; 99 | } 100 | 101 | inline PageInfo PageMap::GetPageInfo(char* ptr) 102 | { 103 | size_t key = AddrToKey(ptr); 104 | return _pagemap[key].load(); 105 | } 106 | 107 | inline void PageMap::SetPageInfo(char* ptr, PageInfo info) 108 | { 109 | size_t key = AddrToKey(ptr); 110 | _pagemap[key].store(info); 111 | } 112 | 113 | extern PageMap sPageMap; 114 | 115 | #endif // __PAGEMAP_H 116 | -------------------------------------------------------------------------------- /pages.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #include "pages.h" 8 | 9 | #include 10 | 11 | #include "log.h" 12 | 13 | void* PageAlloc(size_t size) 14 | { 15 | ASSERT((size & PAGE_MASK) == 0); 16 | 17 | void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 18 | if (ptr == MAP_FAILED) { 19 | ptr = nullptr; 20 | } 21 | 22 | return ptr; 23 | } 24 | 25 | void* PageAllocOvercommit(size_t size) 26 | { 27 | ASSERT((size & PAGE_MASK) == 0); 28 | 29 | // use no MAP_NORESERVE to skip OS overcommit limits 30 | void* ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0); 31 | if (ptr == MAP_FAILED) { 32 | ptr = nullptr; 33 | } else { 34 | // exclude such large maps from core dumps as they become unusable otherwise 35 | madvise(ptr, size, MADV_DONTDUMP); 36 | } 37 | 38 | return ptr; 39 | } 40 | 41 | void PageFree(void* ptr, size_t size) 42 | { 43 | ASSERT((size & PAGE_MASK) == 0); 44 | 45 | int ret = munmap(ptr, size); 46 | (void)ret; // suppress warning 47 | ASSERT(ret == 0); 48 | } 49 | -------------------------------------------------------------------------------- /pages.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #ifndef __PAGES_H 8 | #define __PAGES_H 9 | 10 | #include 11 | #include 12 | 13 | #include "lrmalloc.h" 14 | 15 | // return page address for page containing a 16 | #define PAGE_ADDR2BASE(a) ((void*)((uintptr)(a) & ~PAGE_MASK)) 17 | 18 | // returns a set of continous pages, totaling to size bytes 19 | void* PageAlloc(size_t size); 20 | // explictely allow overcommiting 21 | // used for array-based page map 22 | void* PageAllocOvercommit(size_t size); 23 | // free a set of continous pages, totaling to size bytes 24 | void PageFree(void* ptr, size_t size); 25 | 26 | #endif // __PAGES_H 27 | -------------------------------------------------------------------------------- /size_classes.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #include "lrmalloc_internal.h" 8 | #include "size_classes.h" 9 | 10 | #include "log.h" 11 | 12 | #define SIZE_CLASS_bin_yes(blockSize, pages) { blockSize, pages * PAGE }, 13 | #define SIZE_CLASS_bin_no(blockSize, pages) 14 | 15 | #define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \ 16 | SIZE_CLASS_bin_##bin((1U << lg_grp) + (ndelta << lg_delta), pgs) 17 | 18 | SizeClassData SizeClasses[MAX_SZ_IDX] = { { 0, 0 }, SIZE_CLASSES }; 19 | 20 | size_t SizeClassLookup[MAX_SZ + 1] = { 0 }; 21 | 22 | void InitSizeClass() 23 | { 24 | // fill blockNum and cacheBlockNum 25 | for (size_t scIdx = 1; scIdx < MAX_SZ_IDX; ++scIdx) { 26 | SizeClassData& sc = SizeClasses[scIdx]; 27 | // blockNum calc 28 | sc.blockNum = SB_SIZE / sc.blockSize; 29 | // cacheBlockNum calc 30 | sc.cacheBlockNum = sc.blockNum * 1; 31 | ASSERT(sc.blockNum > 0); 32 | ASSERT(sc.blockNum <= MAX_BLOCK_NUM); 33 | ASSERT(sc.blockNum >= sc.cacheBlockNum); 34 | } 35 | 36 | // first size class reserved for large allocations 37 | size_t lookupIdx = 0; 38 | for (size_t scIdx = 1; scIdx < MAX_SZ_IDX; ++scIdx) { 39 | SizeClassData const& sc = SizeClasses[scIdx]; 40 | size_t blockSize = sc.blockSize; 41 | while (lookupIdx <= blockSize) { 42 | SizeClassLookup[lookupIdx] = scIdx; 43 | ++lookupIdx; 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /size_classes.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #ifndef __SIZE_CLASSES_H 8 | #define __SIZE_CLASSES_H 9 | 10 | #include 11 | #include 12 | 13 | #include "lrmalloc.h" 14 | 15 | // number of size classes 16 | // idx 0 reserved for large size classes 17 | #define MAX_SZ_IDX 40 18 | #define LG_MAX_SIZE_IDX 6 19 | // last size covered by a size class 20 | // allocations with size > MAX_SZ are not covered by a size class 21 | #define MAX_SZ ((1 << 13) + (1 << 11) * 3) 22 | #define SB_SIZE 1024 * 256 23 | 24 | // contains size classes 25 | struct SizeClassData { 26 | public: 27 | // size of block 28 | uint32_t blockSize; 29 | // cached number of blocks, equal to SB_SIZE / blockSize 30 | uint32_t blockNum; 31 | // number of blocks held by thread-specific caches 32 | uint32_t cacheBlockNum; 33 | 34 | public: 35 | size_t GetBlockNum() const { return blockNum; } 36 | }; 37 | 38 | // globals 39 | // initialized at compile time 40 | extern SizeClassData SizeClasses[MAX_SZ_IDX]; 41 | // *not* initialized at compile time, needs InitSizeClass() call 42 | extern size_t SizeClassLookup[MAX_SZ + 1]; 43 | 44 | // must be called before GetSizeClass 45 | void InitSizeClass(); 46 | 47 | inline size_t GetSizeClass(size_t size) 48 | { 49 | return SizeClassLookup[size]; 50 | } 51 | 52 | // size class data, from jemalloc 5.0 53 | #define SIZE_CLASSES \ 54 | /* index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup */ \ 55 | SC(0, 3, 3, 0, no, yes, 1, 3) \ 56 | SC(1, 3, 3, 1, no, yes, 1, 3) \ 57 | SC(2, 3, 3, 2, no, yes, 3, 3) \ 58 | SC(3, 3, 3, 3, no, yes, 1, 3) \ 59 | \ 60 | SC(4, 5, 3, 1, no, yes, 5, 3) \ 61 | SC(5, 5, 3, 2, no, yes, 3, 3) \ 62 | SC(6, 5, 3, 3, no, yes, 7, 3) \ 63 | SC(7, 5, 3, 4, no, yes, 1, 3) \ 64 | \ 65 | SC(8, 6, 4, 1, no, yes, 5, 4) \ 66 | SC(9, 6, 4, 2, no, yes, 3, 4) \ 67 | SC(10, 6, 4, 3, no, yes, 7, 4) \ 68 | SC(11, 6, 4, 4, no, yes, 1, 4) \ 69 | \ 70 | SC(12, 7, 5, 1, no, yes, 5, 5) \ 71 | SC(13, 7, 5, 2, no, yes, 3, 5) \ 72 | SC(14, 7, 5, 3, no, yes, 7, 5) \ 73 | SC(15, 7, 5, 4, no, yes, 1, 5) \ 74 | \ 75 | SC(16, 8, 6, 1, no, yes, 5, 6) \ 76 | SC(17, 8, 6, 2, no, yes, 3, 6) \ 77 | SC(18, 8, 6, 3, no, yes, 7, 6) \ 78 | SC(19, 8, 6, 4, no, yes, 1, 6) \ 79 | \ 80 | SC(20, 9, 7, 1, no, yes, 5, 7) \ 81 | SC(21, 9, 7, 2, no, yes, 3, 7) \ 82 | SC(22, 9, 7, 3, no, yes, 7, 7) \ 83 | SC(23, 9, 7, 4, no, yes, 1, 7) \ 84 | \ 85 | SC(24, 10, 8, 1, no, yes, 5, 8) \ 86 | SC(25, 10, 8, 2, no, yes, 3, 8) \ 87 | SC(26, 10, 8, 3, no, yes, 7, 8) \ 88 | SC(27, 10, 8, 4, no, yes, 1, 8) \ 89 | \ 90 | SC(28, 11, 9, 1, no, yes, 5, 9) \ 91 | SC(29, 11, 9, 2, no, yes, 3, 9) \ 92 | SC(30, 11, 9, 3, no, yes, 7, 9) \ 93 | SC(31, 11, 9, 4, yes, yes, 2, 9) \ 94 | \ 95 | SC(32, 12, 10, 1, no, yes, 5, no) \ 96 | SC(33, 12, 10, 2, no, yes, 3, no) \ 97 | SC(34, 12, 10, 3, no, yes, 7, no) \ 98 | SC(35, 12, 10, 4, yes, yes, 4, no) \ 99 | \ 100 | SC(36, 13, 11, 1, no, yes, 5, no) \ 101 | SC(37, 13, 11, 2, yes, yes, 6, no) \ 102 | SC(38, 13, 11, 3, no, yes, 7, no) \ 103 | SC(39, 13, 11, 4, yes, no, 0, no) \ 104 | \ 105 | SC(40, 14, 12, 1, yes, no, 0, no) \ 106 | SC(41, 14, 12, 2, yes, no, 0, no) \ 107 | SC(42, 14, 12, 3, yes, no, 0, no) \ 108 | SC(43, 14, 12, 4, yes, no, 0, no) \ 109 | \ 110 | SC(44, 15, 13, 1, yes, no, 0, no) \ 111 | SC(45, 15, 13, 2, yes, no, 0, no) \ 112 | SC(46, 15, 13, 3, yes, no, 0, no) \ 113 | SC(47, 15, 13, 4, yes, no, 0, no) \ 114 | \ 115 | SC(48, 16, 14, 1, yes, no, 0, no) \ 116 | SC(49, 16, 14, 2, yes, no, 0, no) \ 117 | SC(50, 16, 14, 3, yes, no, 0, no) \ 118 | SC(51, 16, 14, 4, yes, no, 0, no) \ 119 | \ 120 | SC(52, 17, 15, 1, yes, no, 0, no) \ 121 | SC(53, 17, 15, 2, yes, no, 0, no) \ 122 | SC(54, 17, 15, 3, yes, no, 0, no) \ 123 | SC(55, 17, 15, 4, yes, no, 0, no) \ 124 | \ 125 | SC(56, 18, 16, 1, yes, no, 0, no) \ 126 | SC(57, 18, 16, 2, yes, no, 0, no) \ 127 | SC(58, 18, 16, 3, yes, no, 0, no) \ 128 | SC(59, 18, 16, 4, yes, no, 0, no) \ 129 | \ 130 | SC(60, 19, 17, 1, yes, no, 0, no) \ 131 | SC(61, 19, 17, 2, yes, no, 0, no) \ 132 | SC(62, 19, 17, 3, yes, no, 0, no) \ 133 | SC(63, 19, 17, 4, yes, no, 0, no) \ 134 | \ 135 | SC(64, 20, 18, 1, yes, no, 0, no) \ 136 | SC(65, 20, 18, 2, yes, no, 0, no) \ 137 | SC(66, 20, 18, 3, yes, no, 0, no) \ 138 | SC(67, 20, 18, 4, yes, no, 0, no) \ 139 | \ 140 | SC(68, 21, 19, 1, yes, no, 0, no) \ 141 | SC(69, 21, 19, 2, yes, no, 0, no) \ 142 | SC(70, 21, 19, 3, yes, no, 0, no) \ 143 | SC(71, 21, 19, 4, yes, no, 0, no) \ 144 | \ 145 | SC(72, 22, 20, 1, yes, no, 0, no) \ 146 | SC(73, 22, 20, 2, yes, no, 0, no) \ 147 | SC(74, 22, 20, 3, yes, no, 0, no) \ 148 | SC(75, 22, 20, 4, yes, no, 0, no) \ 149 | \ 150 | SC(76, 23, 21, 1, yes, no, 0, no) \ 151 | SC(77, 23, 21, 2, yes, no, 0, no) \ 152 | SC(78, 23, 21, 3, yes, no, 0, no) \ 153 | SC(79, 23, 21, 4, yes, no, 0, no) \ 154 | \ 155 | SC(80, 24, 22, 1, yes, no, 0, no) \ 156 | SC(81, 24, 22, 2, yes, no, 0, no) \ 157 | SC(82, 24, 22, 3, yes, no, 0, no) \ 158 | SC(83, 24, 22, 4, yes, no, 0, no) \ 159 | \ 160 | SC(84, 25, 23, 1, yes, no, 0, no) \ 161 | SC(85, 25, 23, 2, yes, no, 0, no) \ 162 | SC(86, 25, 23, 3, yes, no, 0, no) \ 163 | SC(87, 25, 23, 4, yes, no, 0, no) \ 164 | \ 165 | SC(88, 26, 24, 1, yes, no, 0, no) \ 166 | SC(89, 26, 24, 2, yes, no, 0, no) \ 167 | SC(90, 26, 24, 3, yes, no, 0, no) \ 168 | SC(91, 26, 24, 4, yes, no, 0, no) \ 169 | \ 170 | SC(92, 27, 25, 1, yes, no, 0, no) \ 171 | SC(93, 27, 25, 2, yes, no, 0, no) \ 172 | SC(94, 27, 25, 3, yes, no, 0, no) \ 173 | SC(95, 27, 25, 4, yes, no, 0, no) \ 174 | \ 175 | SC(96, 28, 26, 1, yes, no, 0, no) \ 176 | SC(97, 28, 26, 2, yes, no, 0, no) \ 177 | SC(98, 28, 26, 3, yes, no, 0, no) \ 178 | SC(99, 28, 26, 4, yes, no, 0, no) \ 179 | \ 180 | SC(100, 29, 27, 1, yes, no, 0, no) \ 181 | SC(101, 29, 27, 2, yes, no, 0, no) \ 182 | SC(102, 29, 27, 3, yes, no, 0, no) \ 183 | SC(103, 29, 27, 4, yes, no, 0, no) \ 184 | \ 185 | SC(104, 30, 28, 1, yes, no, 0, no) \ 186 | SC(105, 30, 28, 2, yes, no, 0, no) \ 187 | SC(106, 30, 28, 3, yes, no, 0, no) \ 188 | SC(107, 30, 28, 4, yes, no, 0, no) \ 189 | \ 190 | SC(108, 31, 29, 1, yes, no, 0, no) \ 191 | SC(109, 31, 29, 2, yes, no, 0, no) \ 192 | SC(110, 31, 29, 3, yes, no, 0, no) \ 193 | SC(111, 31, 29, 4, yes, no, 0, no) \ 194 | \ 195 | SC(112, 32, 30, 1, yes, no, 0, no) \ 196 | SC(113, 32, 30, 2, yes, no, 0, no) \ 197 | SC(114, 32, 30, 3, yes, no, 0, no) \ 198 | SC(115, 32, 30, 4, yes, no, 0, no) \ 199 | \ 200 | SC(116, 33, 31, 1, yes, no, 0, no) \ 201 | SC(117, 33, 31, 2, yes, no, 0, no) \ 202 | SC(118, 33, 31, 3, yes, no, 0, no) \ 203 | SC(119, 33, 31, 4, yes, no, 0, no) \ 204 | \ 205 | SC(120, 34, 32, 1, yes, no, 0, no) \ 206 | SC(121, 34, 32, 2, yes, no, 0, no) \ 207 | SC(122, 34, 32, 3, yes, no, 0, no) \ 208 | SC(123, 34, 32, 4, yes, no, 0, no) \ 209 | \ 210 | SC(124, 35, 33, 1, yes, no, 0, no) \ 211 | SC(125, 35, 33, 2, yes, no, 0, no) \ 212 | SC(126, 35, 33, 3, yes, no, 0, no) \ 213 | SC(127, 35, 33, 4, yes, no, 0, no) \ 214 | \ 215 | SC(128, 36, 34, 1, yes, no, 0, no) \ 216 | SC(129, 36, 34, 2, yes, no, 0, no) \ 217 | SC(130, 36, 34, 3, yes, no, 0, no) \ 218 | SC(131, 36, 34, 4, yes, no, 0, no) \ 219 | \ 220 | SC(132, 37, 35, 1, yes, no, 0, no) \ 221 | SC(133, 37, 35, 2, yes, no, 0, no) \ 222 | SC(134, 37, 35, 3, yes, no, 0, no) \ 223 | SC(135, 37, 35, 4, yes, no, 0, no) \ 224 | \ 225 | SC(136, 38, 36, 1, yes, no, 0, no) \ 226 | SC(137, 38, 36, 2, yes, no, 0, no) \ 227 | SC(138, 38, 36, 3, yes, no, 0, no) \ 228 | SC(139, 38, 36, 4, yes, no, 0, no) \ 229 | \ 230 | SC(140, 39, 37, 1, yes, no, 0, no) \ 231 | SC(141, 39, 37, 2, yes, no, 0, no) \ 232 | SC(142, 39, 37, 3, yes, no, 0, no) \ 233 | SC(143, 39, 37, 4, yes, no, 0, no) \ 234 | \ 235 | SC(144, 40, 38, 1, yes, no, 0, no) \ 236 | SC(145, 40, 38, 2, yes, no, 0, no) \ 237 | SC(146, 40, 38, 3, yes, no, 0, no) \ 238 | SC(147, 40, 38, 4, yes, no, 0, no) \ 239 | \ 240 | SC(148, 41, 39, 1, yes, no, 0, no) \ 241 | SC(149, 41, 39, 2, yes, no, 0, no) \ 242 | SC(150, 41, 39, 3, yes, no, 0, no) \ 243 | SC(151, 41, 39, 4, yes, no, 0, no) \ 244 | \ 245 | SC(152, 42, 40, 1, yes, no, 0, no) \ 246 | SC(153, 42, 40, 2, yes, no, 0, no) \ 247 | SC(154, 42, 40, 3, yes, no, 0, no) \ 248 | SC(155, 42, 40, 4, yes, no, 0, no) \ 249 | \ 250 | SC(156, 43, 41, 1, yes, no, 0, no) \ 251 | SC(157, 43, 41, 2, yes, no, 0, no) \ 252 | SC(158, 43, 41, 3, yes, no, 0, no) \ 253 | SC(159, 43, 41, 4, yes, no, 0, no) \ 254 | \ 255 | SC(160, 44, 42, 1, yes, no, 0, no) \ 256 | SC(161, 44, 42, 2, yes, no, 0, no) \ 257 | SC(162, 44, 42, 3, yes, no, 0, no) \ 258 | SC(163, 44, 42, 4, yes, no, 0, no) \ 259 | \ 260 | SC(164, 45, 43, 1, yes, no, 0, no) \ 261 | SC(165, 45, 43, 2, yes, no, 0, no) \ 262 | SC(166, 45, 43, 3, yes, no, 0, no) \ 263 | SC(167, 45, 43, 4, yes, no, 0, no) \ 264 | \ 265 | SC(168, 46, 44, 1, yes, no, 0, no) \ 266 | SC(169, 46, 44, 2, yes, no, 0, no) \ 267 | SC(170, 46, 44, 3, yes, no, 0, no) \ 268 | SC(171, 46, 44, 4, yes, no, 0, no) \ 269 | \ 270 | SC(172, 47, 45, 1, yes, no, 0, no) \ 271 | SC(173, 47, 45, 2, yes, no, 0, no) \ 272 | SC(174, 47, 45, 3, yes, no, 0, no) \ 273 | SC(175, 47, 45, 4, yes, no, 0, no) \ 274 | \ 275 | SC(176, 48, 46, 1, yes, no, 0, no) \ 276 | SC(177, 48, 46, 2, yes, no, 0, no) \ 277 | SC(178, 48, 46, 3, yes, no, 0, no) \ 278 | SC(179, 48, 46, 4, yes, no, 0, no) \ 279 | \ 280 | SC(180, 49, 47, 1, yes, no, 0, no) \ 281 | SC(181, 49, 47, 2, yes, no, 0, no) \ 282 | SC(182, 49, 47, 3, yes, no, 0, no) \ 283 | SC(183, 49, 47, 4, yes, no, 0, no) \ 284 | \ 285 | SC(184, 50, 48, 1, yes, no, 0, no) \ 286 | SC(185, 50, 48, 2, yes, no, 0, no) \ 287 | SC(186, 50, 48, 3, yes, no, 0, no) \ 288 | SC(187, 50, 48, 4, yes, no, 0, no) \ 289 | \ 290 | SC(188, 51, 49, 1, yes, no, 0, no) \ 291 | SC(189, 51, 49, 2, yes, no, 0, no) \ 292 | SC(190, 51, 49, 3, yes, no, 0, no) \ 293 | SC(191, 51, 49, 4, yes, no, 0, no) \ 294 | \ 295 | SC(192, 52, 50, 1, yes, no, 0, no) \ 296 | SC(193, 52, 50, 2, yes, no, 0, no) \ 297 | SC(194, 52, 50, 3, yes, no, 0, no) \ 298 | SC(195, 52, 50, 4, yes, no, 0, no) \ 299 | \ 300 | SC(196, 53, 51, 1, yes, no, 0, no) \ 301 | SC(197, 53, 51, 2, yes, no, 0, no) \ 302 | SC(198, 53, 51, 3, yes, no, 0, no) \ 303 | SC(199, 53, 51, 4, yes, no, 0, no) \ 304 | \ 305 | SC(200, 54, 52, 1, yes, no, 0, no) \ 306 | SC(201, 54, 52, 2, yes, no, 0, no) \ 307 | SC(202, 54, 52, 3, yes, no, 0, no) \ 308 | SC(203, 54, 52, 4, yes, no, 0, no) \ 309 | \ 310 | SC(204, 55, 53, 1, yes, no, 0, no) \ 311 | SC(205, 55, 53, 2, yes, no, 0, no) \ 312 | SC(206, 55, 53, 3, yes, no, 0, no) \ 313 | SC(207, 55, 53, 4, yes, no, 0, no) \ 314 | \ 315 | SC(208, 56, 54, 1, yes, no, 0, no) \ 316 | SC(209, 56, 54, 2, yes, no, 0, no) \ 317 | SC(210, 56, 54, 3, yes, no, 0, no) \ 318 | SC(211, 56, 54, 4, yes, no, 0, no) \ 319 | \ 320 | SC(212, 57, 55, 1, yes, no, 0, no) \ 321 | SC(213, 57, 55, 2, yes, no, 0, no) \ 322 | SC(214, 57, 55, 3, yes, no, 0, no) \ 323 | SC(215, 57, 55, 4, yes, no, 0, no) \ 324 | \ 325 | SC(216, 58, 56, 1, yes, no, 0, no) \ 326 | SC(217, 58, 56, 2, yes, no, 0, no) \ 327 | SC(218, 58, 56, 3, yes, no, 0, no) \ 328 | SC(219, 58, 56, 4, yes, no, 0, no) \ 329 | \ 330 | SC(220, 59, 57, 1, yes, no, 0, no) \ 331 | SC(221, 59, 57, 2, yes, no, 0, no) \ 332 | SC(222, 59, 57, 3, yes, no, 0, no) \ 333 | SC(223, 59, 57, 4, yes, no, 0, no) \ 334 | \ 335 | SC(224, 60, 58, 1, yes, no, 0, no) \ 336 | SC(225, 60, 58, 2, yes, no, 0, no) \ 337 | SC(226, 60, 58, 3, yes, no, 0, no) \ 338 | SC(227, 60, 58, 4, yes, no, 0, no) \ 339 | \ 340 | SC(228, 61, 59, 1, yes, no, 0, no) \ 341 | SC(229, 61, 59, 2, yes, no, 0, no) \ 342 | SC(230, 61, 59, 3, yes, no, 0, no) \ 343 | SC(231, 61, 59, 4, yes, no, 0, no) \ 344 | \ 345 | SC(232, 62, 60, 1, yes, no, 0, no) \ 346 | SC(233, 62, 60, 2, yes, no, 0, no) \ 347 | SC(234, 62, 60, 3, yes, no, 0, no) 348 | 349 | #endif // __SIZE_CLASSES_H 350 | -------------------------------------------------------------------------------- /tcache.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #include "tcache.h" 8 | 9 | // thread cache, uses tsd/tls 10 | // one cache per thread 11 | __thread TCacheBin TCache[MAX_SZ_IDX]; 12 | -------------------------------------------------------------------------------- /tcache.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #ifndef __TCACHE_H_ 8 | #define __TCACHE_H_ 9 | 10 | #include "log.h" 11 | #include "lrmalloc.h" 12 | #include "size_classes.h" 13 | #include 14 | 15 | struct TCacheBin { 16 | private: 17 | char* _block = nullptr; 18 | uint32_t _blockNum = 0; 19 | 20 | public: 21 | // common, fast ops 22 | void PushBlock(char* block, size_t scIdx); 23 | // push block list, cache *must* be empty 24 | void PushList(char* block, uint32_t length); 25 | 26 | char* PopBlock(size_t scIdx); // can return nullptr 27 | // manually popped list of blocks and now need to update cache 28 | // `block` is the new head 29 | void PopList(char* block, uint32_t length); 30 | char* PeekBlock() const { return _block; } 31 | 32 | uint32_t GetBlockNum() const { return _blockNum; } 33 | 34 | // slow operations like fill/flush handled in cache user 35 | }; 36 | 37 | inline void TCacheBin::PushBlock(char* block, size_t scIdx) 38 | { 39 | size_t blockSize = SizeClasses[scIdx].blockSize; 40 | // block has at least sizeof(char*) 41 | *(ptrdiff_t*)block = _block - block - blockSize; 42 | _block = block; 43 | _blockNum++; 44 | } 45 | 46 | inline void TCacheBin::PushList(char* block, uint32_t length) 47 | { 48 | // caller must ensure there's no available block 49 | // this op is only used to fill empty cache 50 | ASSERT(_blockNum == 0); 51 | 52 | _block = block; 53 | _blockNum = length; 54 | } 55 | 56 | inline char* TCacheBin::PopBlock(size_t scIdx) 57 | { 58 | // caller must ensure there's an available block 59 | ASSERT(_blockNum > 0); 60 | size_t blockSize = SizeClasses[scIdx].blockSize; 61 | char* ret = _block; 62 | _block += *(ptrdiff_t*)_block + blockSize; 63 | _blockNum--; 64 | return ret; 65 | } 66 | 67 | inline void TCacheBin::PopList(char* block, uint32_t length) 68 | { 69 | ASSERT(_blockNum >= length); 70 | 71 | _block = block; 72 | _blockNum -= length; 73 | } 74 | 75 | // use tls init exec model 76 | extern __thread TCacheBin TCache[MAX_SZ_IDX] LFMALLOC_TLS_INIT_EXEC LFMALLOC_CACHE_ALIGNED; 77 | 78 | void FillCache(size_t scIdx, TCacheBin* cache); 79 | void FlushCache(size_t scIdx, TCacheBin* cache); 80 | 81 | #endif // __TCACHE_H_ 82 | -------------------------------------------------------------------------------- /test/basic.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | int main() 17 | { 18 | // super trivial test: 19 | // - create N threads 20 | // - each thread does X allocs 21 | // - each thread shuffles their allocs 22 | // - each thread frees their allocs 23 | 24 | printf("Basic tests\n"); 25 | 26 | // parameters 27 | constexpr size_t numThreads = 8; 28 | constexpr size_t numAllocs = 10000; 29 | constexpr size_t minAllocSize = 1; 30 | constexpr size_t maxAllocSize = 65536; 31 | 32 | printf("Parameters: %zu threads x %zu allocs between [%zu,%zu] bytes\n", 33 | numThreads, numAllocs, minAllocSize, maxAllocSize); 34 | 35 | std::random_device dev; 36 | std::uniform_int_distribution dist(minAllocSize, maxAllocSize); 37 | 38 | std::array rngs; 39 | std::array threads; 40 | 41 | for (size_t t = 0; t < numThreads; ++t) { 42 | rngs[t] = std::mt19937(dev()); 43 | 44 | threads[t] = std::thread([t, &rngs, &dist]() { 45 | auto rng = rngs[t]; 46 | std::vector> allocs(numAllocs); 47 | 48 | printf("Thread %zu doing %zu allocations\n", 49 | t, numAllocs); 50 | 51 | // fill allocs 52 | for (auto& alloc : allocs) { 53 | size_t size = dist(rng); 54 | uint8_t pattern = static_cast(dist(rng)); 55 | 56 | // allocate 57 | uint8_t* buffer = static_cast(malloc(size)); 58 | 59 | // write with random pattern 60 | for (size_t k = 0; k < size; ++k) { 61 | buffer[k] = pattern; 62 | } 63 | 64 | // add to allocs 65 | alloc = std::make_tuple(buffer, size, pattern); 66 | } 67 | 68 | printf("Thread %zu shuffling %zu allocations\n", 69 | t, numAllocs); 70 | 71 | // shuffle 'em 72 | std::shuffle(allocs.begin(), allocs.end(), rng); 73 | 74 | printf("Thread %zu freeing %zu allocations\n", 75 | t, numAllocs); 76 | 77 | // verify and free them 78 | for (auto& alloc : allocs) { 79 | uint8_t* buffer = std::get(alloc); 80 | size_t size = std::get(alloc); 81 | uint8_t pattern = std::get(alloc); 82 | 83 | // check random pattern 84 | for (size_t k = 0; k < size; ++k) { 85 | if (buffer[k] != pattern) { 86 | printf("Thread %zu: alloc %p of size %zu is corrupted, expected pattern 0x%2X but got 0x%2X\n", 87 | t, buffer, size, pattern, buffer[k]); 88 | 89 | ::exit(1); 90 | } 91 | } 92 | 93 | free(buffer); 94 | } 95 | }); 96 | } 97 | 98 | for (size_t t = 0; t < numThreads; ++t) { 99 | threads[t].join(); 100 | } 101 | 102 | return 0; 103 | } 104 | -------------------------------------------------------------------------------- /test/size_class_data.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "../size_classes.h" 4 | 5 | #define SIZE_CLASS_bin_yes(blockSize, pages) { blockSize, pages * PAGE }, 6 | #define SIZE_CLASS_bin_no(blockSize, pages) 7 | 8 | #define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \ 9 | SIZE_CLASS_bin_##bin((1U << lg_grp) + (ndelta << lg_delta), pgs) 10 | 11 | 12 | int main() 13 | { 14 | SizeClassData SizeClasses[MAX_SZ_IDX] = { { 0, 0 }, SIZE_CLASSES }; 15 | // each superblock has to contain several blocks 16 | // and it has to contain blocks *perfectly* 17 | // e.g no space left after last block 18 | for (size_t scIdx = 1; scIdx < MAX_SZ_IDX; ++scIdx) { 19 | SizeClassData& sc = SizeClasses[scIdx]; 20 | // size class large enough to store several elements 21 | assert(SB_SIZE >= (sc.blockSize * 2)); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /thread_hooks.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019 Ricardo Leite. All rights reserved. 3 | * Licenced under the MIT licence. See COPYING file in the project root for 4 | * details. 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "size_classes.h" 12 | #include "tcache.h" 13 | #include "mapcache.h" 14 | 15 | // handle process init/exit hooks 16 | pthread_key_t destructor_key; 17 | bool is_initialized = false; 18 | 19 | void initializer(); 20 | void finalizer(); 21 | void* thread_initializer(void* argptr); 22 | void thread_finalizer(void* argptr); 23 | 24 | // called on process init/exit 25 | void lf_malloc_initialize() 26 | { 27 | } 28 | 29 | void lf_malloc_finalize() 30 | { 31 | } 32 | 33 | // called on thread enter/exit 34 | void lf_malloc_thread_initialize() 35 | { 36 | } 37 | 38 | void lf_malloc_thread_finalize() 39 | { 40 | // flush caches 41 | for (size_t scIdx = 1; scIdx < MAX_SZ_IDX; ++scIdx) { 42 | FlushCache(scIdx, &TCache[scIdx]); 43 | } 44 | sMapCache.Flush(); 45 | } 46 | 47 | LFMALLOC_ATTR(constructor) 48 | void initializer() 49 | { 50 | if (!is_initialized) { 51 | is_initialized = true; 52 | pthread_key_create(&destructor_key, thread_finalizer); 53 | } 54 | 55 | lf_malloc_initialize(); 56 | lf_malloc_thread_initialize(); 57 | } 58 | 59 | LFMALLOC_ATTR(destructor) 60 | void finalizer() 61 | { 62 | lf_malloc_thread_finalize(); 63 | lf_malloc_finalize(); 64 | } 65 | 66 | // handle thread init/exit hooks 67 | typedef struct 68 | { 69 | void* (*real_start)(void*); 70 | void* real_arg; 71 | } thread_starter_arg; 72 | 73 | void* thread_initializer(void* argptr) 74 | { 75 | thread_starter_arg* arg = (thread_starter_arg*)argptr; 76 | void* (*real_start)(void*) = arg->real_start; 77 | void* real_arg = arg->real_arg; 78 | lf_malloc_thread_initialize(); 79 | 80 | pthread_setspecific(destructor_key, (void*)1); 81 | return (*real_start)(real_arg); 82 | } 83 | 84 | void thread_finalizer(void* value) 85 | { 86 | lf_malloc_thread_finalize(); 87 | } 88 | 89 | int pthread_create(pthread_t* thread, pthread_attr_t const* attr, void*(start_routine)(void*), void* arg) 90 | { 91 | static int (*pthread_create_fn)(pthread_t*, pthread_attr_t const*, void*(void*), void*) = NULL; 92 | if (pthread_create_fn == NULL) { 93 | pthread_create_fn = (int (*)(pthread_t*, pthread_attr_t const*, void*(void*), void*))dlsym(RTLD_NEXT, "pthread_create"); 94 | } 95 | 96 | // @todo: don't want to use malloc here 97 | // instead using a ringbuffer, which has limited storage 98 | #define RING_BUFFER_SIZE 10000 99 | static std::atomic ring_buffer_pos(0); 100 | static thread_starter_arg ring_buffer[RING_BUFFER_SIZE]; 101 | uint32_t buffer_pos = ring_buffer_pos.fetch_add(1, std::memory_order_relaxed); 102 | 103 | thread_starter_arg* starter_arg = &ring_buffer[buffer_pos]; 104 | starter_arg->real_start = start_routine; 105 | starter_arg->real_arg = arg; 106 | return pthread_create_fn(thread, attr, thread_initializer, starter_arg); 107 | } 108 | --------------------------------------------------------------------------------