├── LICENSE ├── README.md └── Source ├── Managed ├── Smmalloc-CSharp.csproj └── Smmalloc.cs └── Native ├── CMakeLists.txt ├── smmalloc.cpp └── smmalloc.h /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Stanislav Denisov (nxrighthere@gmail.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | alt logo 3 |

4 | 5 | This is an improved version of [smmalloc](https://github.com/SergeyMakeev/smmalloc) a [fast and efficient](https://github.com/SergeyMakeev/smmalloc#features) memory allocator designed to handle many small allocations/deallocations in heavy multi-threaded scenarios. The allocator created for usage in applications where the performance is critical such as video games. 6 | 7 | Using smmalloc allocator in the .NET environment helps to minimize GC pressure for allocating buffers and avoid using lock-based pools in multi-threaded systems. Modern .NET features such as [`Span`](https://docs.microsoft.com/en-us/dotnet/api/system.span-1) greatly works in tandem with smmalloc and allows conveniently manage data in native memory blocks. 8 | 9 | Building 10 | -------- 11 | To build the native library appropriate software is required: 12 | 13 | For desktop platforms [CMake](https://cmake.org/download/) with GNU Make or Visual Studio. 14 | 15 | A managed assembly can be built using any available compiling platform that supports C# 3.0 or higher. 16 | 17 | Usage 18 | -------- 19 | ##### Create a new smmalloc instance 20 | ```c# 21 | // 8 buckets, 16 MB each, 128 bytes maximum allocation size 22 | SmmallocInstance smmalloc = new SmmallocInstance(8, 16 * 1024 * 1024); 23 | ``` 24 | 25 | ##### Destroy the smmalloc instance and free allocated memory 26 | ```c# 27 | smmalloc.Dispose(); 28 | ``` 29 | 30 | ##### Create thread cache for a current thread 31 | ```c# 32 | // 4 KB of thread cache for each bucket, hot warmup 33 | smmalloc.CreateThreadCache(4 * 1024, CacheWarmupOptions.Hot); 34 | ``` 35 | 36 | ##### Destroy thread cache for a current thread 37 | ```c# 38 | smmalloc.DestroyThreadCache(); 39 | ``` 40 | 41 | ##### Allocate memory block 42 | ```c# 43 | // 64 bytes of a memory block 44 | IntPtr memory = smmalloc.Malloc(64); 45 | ``` 46 | 47 | ##### Release memory block 48 | ```c# 49 | smmalloc.Free(memory); 50 | ``` 51 | 52 | ##### Work with batches of memory blocks 53 | ```c# 54 | IntPtr[] batch = new IntPtr[32]; 55 | 56 | // Allocate a batch of memory 57 | for (int i = 0; i < batch.Length; i++) { 58 | batch[i] = smmalloc.Malloc(64); 59 | } 60 | 61 | // Release the whole batch 62 | smmalloc.Free(batch); 63 | ``` 64 | 65 | ##### Write data to memory block 66 | ```c# 67 | // Using Marshal 68 | byte data = 0; 69 | 70 | for (int i = 0; i < smmalloc.Size(memory); i++) { 71 | Marshal.WriteByte(memory, i, data++); 72 | } 73 | 74 | // Using Span 75 | Span buffer; 76 | 77 | unsafe { 78 | buffer = new Span((byte*)memory, smmalloc.Size(memory)); 79 | } 80 | 81 | byte data = 0; 82 | 83 | for (int i = 0; i < buffer.Length; i++) { 84 | buffer[i] = data++; 85 | } 86 | ``` 87 | 88 | ##### Read data from memory block 89 | ```c# 90 | // Using Marshal 91 | int sum = 0; 92 | 93 | for (int i = 0; i < smmalloc.Size(memory); i++) { 94 | sum += Marshal.ReadByte(memory, i); 95 | } 96 | 97 | // Using Span 98 | int sum = 0; 99 | 100 | foreach (var value in buffer) { 101 | sum += value; 102 | } 103 | ``` 104 | 105 | ##### Hardware accelerated operations 106 | ```c# 107 | // Xor using Vector and Span 108 | if (Vector.IsHardwareAccelerated) { 109 | Span> bufferVector = MemoryMarshal.Cast>(buffer); 110 | Span> xorVector = MemoryMarshal.Cast>(xor); 111 | 112 | for (int i = 0; i < bufferVector.Length; i++) { 113 | bufferVector[i] ^= xorVector[i]; 114 | } 115 | } 116 | ``` 117 | 118 | ##### Copy data using memory block 119 | ```c# 120 | // Using Marshal 121 | byte[] data = new byte[64]; 122 | 123 | // Copy from native memory 124 | Marshal.Copy(memory, data, 0, 64); 125 | 126 | // Copy to native memory 127 | Marshal.Copy(data, 0, memory, 64); 128 | 129 | // Using Buffer 130 | unsafe { 131 | // Copy from native memory 132 | fixed (byte* destination = &data[0]) { 133 | Buffer.MemoryCopy((byte*)memory, destination, 64, 64); 134 | } 135 | 136 | // Copy to native memory 137 | fixed (byte* source = &data[0]) { 138 | Buffer.MemoryCopy(source, (byte*)memory, 64, 64); 139 | } 140 | } 141 | ``` 142 | 143 | ##### Custom data structures 144 | ```c# 145 | // Define a custom structure 146 | struct Entity { 147 | public uint id; 148 | public byte health; 149 | public byte state; 150 | } 151 | 152 | int entitySize = Marshal.SizeOf(typeof(Entity)); 153 | int entityCount = 10; 154 | 155 | // Allocate memory block 156 | IntPtr memory = smmalloc.Malloc(entitySize * entityCount); 157 | 158 | // Create Span using native memory block 159 | Span entities; 160 | 161 | unsafe { 162 | entities = new Span((void*)memory, entityCount); 163 | } 164 | 165 | // Do some stuff 166 | uint id = 1; 167 | 168 | for (int i = 0; i < entities.Length; i++) { 169 | entities[i].id = id++; 170 | entities[i].health = (byte)(new Random().Next(1, 100)); 171 | entities[i].state = (byte)(new Random().Next(1, 255)); 172 | } 173 | 174 | // Release memory block 175 | smmalloc.Free(memory); 176 | ``` 177 | 178 | API reference 179 | -------- 180 | ### Enumerations 181 | #### CacheWarmupOptions 182 | Definitions of warmup options for `CreateThreadCache()` function: 183 | 184 | `CacheWarmupOptions.Cold` warmup not performed for cache elements. 185 | 186 | `CacheWarmupOptions.Warm` warmup performed for half of the cache elements. 187 | 188 | `CacheWarmupOptions.Hot` warmup performed for all cache elements. 189 | 190 | ### Classes 191 | A single low-level disposable class is used to work with smmalloc. 192 | 193 | #### SmmallocInstance 194 | Contains a managed pointer to the smmalloc instance. 195 | 196 | ##### Constructors 197 | `SmmallocInstance(uint bucketsCount, int bucketSize)` creates allocator instance with a memory pool. Size of memory blocks in each bucket increases with a count of buckets. The bucket size parameter sets an initial size of a pooled memory in bytes. 198 | 199 | ##### Methods 200 | `SmmallocInstance.Dispose()` destroys the smmalloc instance and frees allocated memory. 201 | 202 | `SmmallocInstance.CreateThreadCache(int cacheSize, CacheWarmupOptions warmupOption)` creates thread cache for fast memory allocations within a thread. The warmup option sets pre-allocation degree of cache elements. 203 | 204 | `SmmallocInstance.DestroyThreadCache()` destroys the thread cache. Should be called before the end of the thread's life cycle. 205 | 206 | `SmmallocInstance.Malloc(int bytesCount, int alignment)` allocates aligned memory block. Allocation size depends on buckets count multiplied by 16, so the minimum allocation size is 16 bytes. Maximum allocation size using two buckets in a smmalloc instance will be 32 bytes, for three buckets 48 bytes, for four 64 bytes, and so on. The alignment parameter is optional. Returns pointer to a memory block. Returns a pointer to an allocated memory block. 207 | 208 | `SmmallocInstance.Free(IntPtr memory)` frees memory block. A managed array or pointer to pointers with length can be used instead of a pointer to memory block to free a batch of memory. 209 | 210 | `SmmallocInstance.Realloc(IntPtr memory, int bytesCount, int alignment)` reallocates memory block. The alignment parameter is optional. Returns a pointer to a reallocated memory block. 211 | 212 | `SmmallocInstance.Size(IntPtr memory)` gets usable memory size. Returns size in bytes. 213 | 214 | `SmmallocInstance.Bucket(IntPtr memory)` gets bucket index of a memory block. Returns placement index. 215 | -------------------------------------------------------------------------------- /Source/Managed/Smmalloc-CSharp.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Library 5 | netstandard2.0 6 | Smmalloc 7 | SMMALLOC_INLINING; 8 | 9 | 10 | 11 | false 12 | True 13 | 3 14 | 15 | 16 | 17 | true 18 | True 19 | 3 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /Source/Managed/Smmalloc.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * Managed C# wrapper for Smmalloc, blazing fast memory allocator designed for video games 3 | * Copyright (c) 2018 Stanislav Denisov 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | using System; 25 | using System.Runtime.CompilerServices; 26 | using System.Runtime.InteropServices; 27 | using System.Security; 28 | 29 | namespace Smmalloc { 30 | public enum CacheWarmupOptions { 31 | Cold = 0, 32 | Warm = 1, 33 | Hot = 2 34 | } 35 | 36 | public class SmmallocInstance : IDisposable { 37 | private IntPtr nativeAllocator; 38 | private readonly uint allocationLimit; 39 | 40 | public SmmallocInstance(uint bucketsCount, int bucketSize) { 41 | if (bucketsCount > 64) 42 | throw new ArgumentOutOfRangeException(); 43 | 44 | nativeAllocator = Native.sm_allocator_create(bucketsCount, (IntPtr)bucketSize); 45 | 46 | if (nativeAllocator == IntPtr.Zero) 47 | throw new InvalidOperationException("Native memory allocator not created"); 48 | 49 | allocationLimit = bucketsCount * 16; 50 | } 51 | 52 | public void Dispose() { 53 | Dispose(true); 54 | GC.SuppressFinalize(this); 55 | } 56 | 57 | protected virtual void Dispose(bool disposing) { 58 | if (nativeAllocator != IntPtr.Zero) { 59 | Native.sm_allocator_destroy(nativeAllocator); 60 | nativeAllocator = IntPtr.Zero; 61 | } 62 | } 63 | 64 | ~SmmallocInstance() { 65 | Dispose(false); 66 | } 67 | 68 | public void CreateThreadCache(int cacheSize, CacheWarmupOptions warmupOption) { 69 | if (cacheSize == 0 || cacheSize < 0) 70 | throw new ArgumentOutOfRangeException(); 71 | 72 | Native.sm_allocator_thread_cache_create(nativeAllocator, warmupOption, (IntPtr)cacheSize); 73 | } 74 | 75 | public void DestroyThreadCache() { 76 | Native.sm_allocator_thread_cache_destroy(nativeAllocator); 77 | } 78 | 79 | #if SMMALLOC_INLINING 80 | [MethodImpl(256)] 81 | #endif 82 | public IntPtr Malloc(int bytesCount) { 83 | return Malloc(bytesCount, 0); 84 | } 85 | 86 | #if SMMALLOC_INLINING 87 | [MethodImpl(256)] 88 | #endif 89 | public IntPtr Malloc(int bytesCount, int alignment) { 90 | if (bytesCount == 0 || bytesCount < 0 || bytesCount > allocationLimit) 91 | throw new ArgumentOutOfRangeException(); 92 | 93 | return Native.sm_malloc(nativeAllocator, (IntPtr)bytesCount, (IntPtr)alignment); 94 | } 95 | 96 | #if SMMALLOC_INLINING 97 | [MethodImpl(256)] 98 | #endif 99 | public void Free(IntPtr memory) { 100 | if (memory == IntPtr.Zero) 101 | throw new ArgumentNullException("memory"); 102 | 103 | Native.sm_free(nativeAllocator, memory); 104 | } 105 | 106 | #if SMMALLOC_INLINING 107 | [MethodImpl(256)] 108 | #endif 109 | public void Free(IntPtr[] batch) { 110 | if (batch == null) 111 | throw new ArgumentNullException("batch"); 112 | 113 | Native.sm_free_batch(nativeAllocator, batch, (IntPtr)batch.Length); 114 | } 115 | 116 | #if SMMALLOC_INLINING 117 | [MethodImpl(256)] 118 | #endif 119 | public void Free(IntPtr batch, int length) { 120 | if (batch == IntPtr.Zero) 121 | throw new ArgumentNullException("batch"); 122 | 123 | Native.sm_free_batch(nativeAllocator, batch, (IntPtr)length); 124 | } 125 | 126 | #if SMMALLOC_INLINING 127 | [MethodImpl(256)] 128 | #endif 129 | public IntPtr Realloc(IntPtr memory, int bytesCount) { 130 | return Realloc(memory, bytesCount, 0); 131 | } 132 | 133 | #if SMMALLOC_INLINING 134 | [MethodImpl(256)] 135 | #endif 136 | public IntPtr Realloc(IntPtr memory, int bytesCount, int alignment) { 137 | if (memory == IntPtr.Zero) 138 | throw new ArgumentNullException("memory"); 139 | 140 | if (bytesCount == 0 || bytesCount < 0 || bytesCount > allocationLimit) 141 | throw new ArgumentOutOfRangeException(); 142 | 143 | return Native.sm_realloc(nativeAllocator, memory, (IntPtr)bytesCount, (IntPtr)alignment); 144 | } 145 | 146 | #if SMMALLOC_INLINING 147 | [MethodImpl(256)] 148 | #endif 149 | public int Size(IntPtr memory) { 150 | if (memory == IntPtr.Zero) 151 | throw new ArgumentNullException("memory"); 152 | 153 | return (int)Native.sm_msize(nativeAllocator, memory); 154 | } 155 | 156 | #if SMMALLOC_INLINING 157 | [MethodImpl(256)] 158 | #endif 159 | public int Bucket(IntPtr memory) { 160 | if (memory == IntPtr.Zero) 161 | throw new ArgumentNullException("memory"); 162 | 163 | return Native.sm_mbucket(nativeAllocator, memory); 164 | } 165 | } 166 | 167 | [SuppressUnmanagedCodeSecurity] 168 | internal static class Native { 169 | private const string nativeLibrary = "smmalloc"; 170 | 171 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 172 | internal static extern IntPtr sm_allocator_create(uint bucketsCount, IntPtr bucketSizeInBytes); 173 | 174 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 175 | internal static extern void sm_allocator_destroy(IntPtr allocator); 176 | 177 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 178 | internal static extern void sm_allocator_thread_cache_create(IntPtr allocator, CacheWarmupOptions warmupOption, IntPtr cacheSize); 179 | 180 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 181 | internal static extern void sm_allocator_thread_cache_destroy(IntPtr allocator); 182 | 183 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 184 | internal static extern IntPtr sm_malloc(IntPtr allocator, IntPtr bytesCount, IntPtr alignment); 185 | 186 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 187 | internal static extern void sm_free(IntPtr allocator, IntPtr memory); 188 | 189 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 190 | internal static extern void sm_free_batch(IntPtr allocator, IntPtr batch, IntPtr length); 191 | 192 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 193 | internal static extern void sm_free_batch(IntPtr allocator, IntPtr[] batch, IntPtr length); 194 | 195 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 196 | internal static extern IntPtr sm_realloc(IntPtr allocator, IntPtr memory, IntPtr bytesCount, IntPtr alignment); 197 | 198 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 199 | internal static extern IntPtr sm_msize(IntPtr allocator, IntPtr memory); 200 | 201 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)] 202 | internal static extern int sm_mbucket(IntPtr allocator, IntPtr memory); 203 | } 204 | } -------------------------------------------------------------------------------- /Source/Native/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.6) 2 | project(smmalloc CXX) 3 | 4 | set(SMMALLOC_STATIC "0" CACHE BOOL "Create a static library") 5 | set(SMMALLOC_SHARED "0" CACHE BOOL "Create a shared library") 6 | set(SMMALLOC_STATS "0" CACHE BOOL "Add support for stats gathering") 7 | 8 | if (SMMALLOC_STATS) 9 | add_definitions(-DSMMALLOC_STATS_SUPPORT) 10 | endif() 11 | 12 | if (SMMALLOC_STATIC) 13 | add_library(smmalloc_static STATIC smmalloc.cpp) 14 | 15 | if (NOT LINUX) 16 | SET_TARGET_PROPERTIES(smmalloc_static PROPERTIES PREFIX "") 17 | endif() 18 | endif() 19 | 20 | if (SMMALLOC_SHARED) 21 | if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang) 22 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -static") 23 | elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) 24 | set(CMAKE_CXX_FLAGS_RELEASE "/MT") 25 | endif() 26 | 27 | add_library(smmalloc SHARED smmalloc.cpp) 28 | 29 | if (WIN32) 30 | SET_TARGET_PROPERTIES(smmalloc PROPERTIES PREFIX "") 31 | endif() 32 | endif() 33 | -------------------------------------------------------------------------------- /Source/Native/smmalloc.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Smmalloc blazing fast memory allocator designed for video games 3 | * Copyright (c) 2018 Sergey Makeev, Stanislav Denisov 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include "smmalloc.h" 26 | 27 | thread_local sm::internal::TlsPoolBucket tlsCacheBuckets[SMM_MAX_BUCKET_COUNT]; 28 | 29 | namespace sm { 30 | struct CacheWarmupLink { 31 | CacheWarmupLink* pNext; 32 | }; 33 | 34 | sm::internal::TlsPoolBucket* __restrict GetTlsBucket(size_t index) { 35 | return &tlsCacheBuckets[index]; 36 | } 37 | 38 | namespace internal { 39 | void TlsPoolBucket::Init(uint32_t* pCacheStack, uint32_t maxElementsNum, CacheWarmupOptions warmupOptions, Allocator* alloc, size_t bucketIndex) { 40 | SM_ASSERT(numElementsL0 == 0); 41 | SM_ASSERT(numElementsL1 == 0); 42 | SM_ASSERT(pBucket == nullptr); 43 | SM_ASSERT(pBucketData == nullptr); 44 | SM_ASSERT(pStorageL1 == nullptr); 45 | SM_ASSERT(maxElementsCount == 0); 46 | 47 | Allocator::PoolBucket* poolBucket = alloc->GetBucketByIndex(bucketIndex); 48 | 49 | SM_ASSERT(maxElementsNum >= SMM_MAX_CACHE_ITEMS_COUNT + 2); 50 | 51 | pStorageL1 = pCacheStack; 52 | numElementsL1 = 0; 53 | numElementsL0 = 0; 54 | maxElementsCount = (maxElementsNum - SMM_MAX_CACHE_ITEMS_COUNT); 55 | pBucket = poolBucket; 56 | 57 | SM_ASSERT(pBucket); 58 | 59 | pBucketData = pBucket->pData; 60 | 61 | if (warmupOptions == CACHE_COLD) 62 | return; 63 | 64 | uint32_t elementSize = alloc->GetBucketElementSize(bucketIndex); 65 | uint32_t num = (warmupOptions == CACHE_WARM) ? (maxElementsCount / 2) : (maxElementsCount); 66 | 67 | CacheWarmupLink* pRoot = nullptr; 68 | 69 | for (uint32_t j = 0; j < num; j++) { 70 | void* p = alloc->Allocate(elementSize, 16); 71 | 72 | if (p == nullptr) 73 | break; 74 | 75 | if (alloc->GetBucketIndex(p) != (int32_t)bucketIndex) { 76 | alloc->Free(p); 77 | 78 | break; 79 | } 80 | 81 | CacheWarmupLink* pItem = (CacheWarmupLink*)p; 82 | pItem->pNext = pRoot; 83 | pRoot = pItem; 84 | } 85 | 86 | CacheWarmupLink* pCurrent = pRoot; 87 | 88 | while (pCurrent) { 89 | CacheWarmupLink* pNext = pCurrent->pNext; 90 | 91 | bool r = alloc->ReleaseToCache(this, pCurrent); 92 | 93 | SMMALLOC_USED_IN_ASSERT(r); 94 | SM_ASSERT(r); 95 | 96 | pCurrent = pNext; 97 | } 98 | 99 | SM_ASSERT(GetElementsCount() == num); 100 | } 101 | 102 | uint32_t* TlsPoolBucket::Destroy() { 103 | for (uint32_t i = 0; i < numElementsL0; i++) { 104 | pStorageL1[numElementsL1] = storageL0[i]; 105 | numElementsL1++; 106 | } 107 | 108 | if (numElementsL1 > 0) 109 | ReturnL1CacheToMaster(numElementsL1); 110 | 111 | uint32_t* r = pStorageL1; 112 | 113 | pStorageL1 = nullptr; 114 | numElementsL0 = 0; 115 | numElementsL1 = 0; 116 | maxElementsCount = 0; 117 | pBucket = nullptr; 118 | pBucketData = nullptr; 119 | 120 | return r; 121 | } 122 | } 123 | 124 | void Allocator::CreateThreadCache(CacheWarmupOptions warmupOptions, size_t cacheSize) { 125 | for (size_t i = 0; i < bucketsCount; i++) { 126 | uint32_t elementsNum = (uint32_t)cacheSize + SMM_MAX_CACHE_ITEMS_COUNT; 127 | uint32_t* localStack = (uint32_t*)GenericAllocator::Alloc(gAllocator, elementsNum * sizeof(uint32_t), 64); 128 | GetTlsBucket(i)->Init(localStack, elementsNum, warmupOptions, this, i); 129 | 130 | i++; 131 | } 132 | } 133 | 134 | void Allocator::DestroyThreadCache() { 135 | for (size_t i = 0; i < SMM_MAX_BUCKET_COUNT; i++) { 136 | uint32_t* p = GetTlsBucket(i)->Destroy(); 137 | GenericAllocator::Free(gAllocator, p); 138 | } 139 | } 140 | 141 | void Allocator::PoolBucket::Create(size_t elementSize) { 142 | SM_ASSERT(elementSize >= 16 && "Invalid element size"); 143 | 144 | globalTag.store(0, std::memory_order_relaxed); 145 | 146 | uint8_t* node = pData; 147 | 148 | TaggedIndex headVal; 149 | headVal.p.tag = globalTag.load(std::memory_order_relaxed); 150 | headVal.p.offset = (uint32_t)(node - pData); 151 | head.store(headVal.u); 152 | 153 | while (true) { 154 | uint8_t* next = node + elementSize; 155 | 156 | if ((next + elementSize) <= pBufferEnd) { 157 | TaggedIndex nextVal; 158 | nextVal.p.tag = globalTag.load(std::memory_order_relaxed);; 159 | nextVal.p.offset = (uint32_t)(next - pData); 160 | *((TaggedIndex*)(node)) = nextVal; 161 | } else { 162 | ((TaggedIndex*)(node))->u = TaggedIndex::Invalid; 163 | 164 | break; 165 | } 166 | 167 | node = next; 168 | globalTag.fetch_add(1, std::memory_order_relaxed); 169 | } 170 | } 171 | 172 | Allocator::Allocator(GenericAllocator::TInstance allocator) : bucketsCount(0), bucketSizeInBytes(0), pBufferEnd(nullptr), pBuffer(nullptr, GenericAllocator::Deleter(allocator)), gAllocator(allocator) { 173 | #ifdef SMMALLOC_STATS_SUPPORT 174 | globalMissCount.store(0); 175 | #endif 176 | } 177 | 178 | inline int GetNextPow2(uint32_t n) { 179 | n -= 1; 180 | n |= n >> 16; 181 | n |= n >> 8; 182 | n |= n >> 4; 183 | n |= n >> 2; 184 | n |= n >> 1; 185 | 186 | return n + 1; 187 | } 188 | 189 | void Allocator::Init(uint32_t _bucketsCount, size_t _bucketSizeInBytes) { 190 | if (bucketsCount > 0) 191 | return; 192 | 193 | SM_ASSERT(_bucketsCount > 0 && _bucketsCount <= 64); 194 | 195 | if (_bucketsCount == 0) 196 | return; 197 | 198 | bucketsCount = _bucketsCount; 199 | 200 | size_t alignmentMax = GetNextPow2((uint32_t)(16 * bucketsCount)); 201 | 202 | bucketSizeInBytes = Align(_bucketSizeInBytes, alignmentMax); 203 | 204 | size_t i = 0; 205 | 206 | for (i = 0; i < bucketsDataBegin.size(); i++) { 207 | bucketsDataBegin[i] = nullptr; 208 | } 209 | 210 | size_t totalBytesCount = bucketSizeInBytes * bucketsCount; 211 | 212 | pBuffer.reset((uint8_t*)GenericAllocator::Alloc(gAllocator, totalBytesCount, alignmentMax)); 213 | pBufferEnd = pBuffer.get() + totalBytesCount + 1; 214 | 215 | size_t elementSize = 16; 216 | 217 | for (i = 0; i < bucketsCount; i++) { 218 | PoolBucket& bucket = buckets[i]; 219 | bucket.pData = pBuffer.get() + i * bucketSizeInBytes; 220 | 221 | SM_ASSERT(IsAligned((size_t)bucket.pData, GetNextPow2(elementSize)) && "Alignment failed"); 222 | 223 | bucket.pBufferEnd = bucket.pData + bucketSizeInBytes; 224 | bucket.Create(elementSize); 225 | elementSize += 16; 226 | bucketsDataBegin[i] = bucket.pData; 227 | } 228 | } 229 | } 230 | 231 | sm::GenericAllocator::TInstance sm::GenericAllocator::Invalid() { 232 | return nullptr; 233 | } 234 | 235 | bool sm::GenericAllocator::IsValid(TInstance instance) { 236 | SMMALLOC_UNUSED(instance); 237 | 238 | return true; 239 | } 240 | 241 | sm::GenericAllocator::TInstance sm::GenericAllocator::Create() { 242 | return nullptr; 243 | } 244 | 245 | void sm::GenericAllocator::Destroy(sm::GenericAllocator::TInstance instance) { 246 | SMMALLOC_UNUSED(instance); 247 | } 248 | 249 | void* sm::GenericAllocator::Alloc(sm::GenericAllocator::TInstance instance, size_t bytesCount, size_t alignment) { 250 | SMMALLOC_UNUSED(instance); 251 | 252 | if (alignment < 16) 253 | alignment = 16; 254 | 255 | return _aligned_malloc(bytesCount, alignment); 256 | } 257 | 258 | void sm::GenericAllocator::Free(sm::GenericAllocator::TInstance instance, void* p) { 259 | SMMALLOC_UNUSED(instance); 260 | 261 | _aligned_free(p); 262 | } 263 | 264 | void* sm::GenericAllocator::Realloc(sm::GenericAllocator::TInstance instance, void* p, size_t bytesCount, size_t alignment) { 265 | SMMALLOC_UNUSED(instance); 266 | 267 | return _aligned_realloc(p, bytesCount, alignment); 268 | } 269 | 270 | size_t sm::GenericAllocator::GetUsableSpace(sm::GenericAllocator::TInstance instance, void* p) { 271 | SMMALLOC_UNUSED(instance); 272 | 273 | size_t alignment = DetectAlignment(p); 274 | 275 | #ifdef __GNUC__ 276 | if (alignment < sizeof(void*)) 277 | alignment = sizeof(void*); 278 | 279 | return _msize(p) - alignment - sizeof(void*); 280 | #else 281 | return _aligned_msize(p, alignment, 0); 282 | #endif 283 | } 284 | -------------------------------------------------------------------------------- /Source/Native/smmalloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Smmalloc blazing fast memory allocator designed for video games 3 | * Copyright (c) 2018 Sergey Makeev, Stanislav Denisov 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a copy 6 | * of this software and associated documentation files (the "Software"), to deal 7 | * in the Software without restriction, including without limitation the rights 8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | * copies of the Software, and to permit persons to whom the Software is 10 | * furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be included in all 13 | * copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #pragma once 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #if __GNUC__ || __INTEL_COMPILER 34 | #define SM_UNLIKELY(expr) __builtin_expect(!!(expr), (0)) 35 | #define SM_LIKELY(expr) __builtin_expect(!!(expr), (1)) 36 | #else 37 | #define SM_UNLIKELY(expr) (expr) 38 | #define SM_LIKELY(expr) (expr) 39 | #endif 40 | 41 | #ifdef _DEBUG 42 | #define SMMALLOC_ENABLE_ASSERTS 43 | #endif 44 | 45 | #ifdef _M_X64 46 | #define SMMMALLOC_X64 47 | #define SMM_MAX_CACHE_ITEMS_COUNT (7) 48 | #else 49 | #define SMMMALLOC_X86 50 | #define SMM_MAX_CACHE_ITEMS_COUNT (10) 51 | #endif 52 | 53 | #define SMM_CACHE_LINE_SIZE (64) 54 | #define SMM_MAX_BUCKET_COUNT (64) 55 | 56 | #define SMMALLOC_UNUSED(x) (void)(x) 57 | #define SMMALLOC_USED_IN_ASSERT(x) (void)(x) 58 | 59 | #ifdef _MSC_VER 60 | #define INLINE __forceinline 61 | #else 62 | #define INLINE inline 63 | #endif 64 | 65 | #ifdef _MSC_VER 66 | #define NOINLINE __declspec(noinline) 67 | #else 68 | #define NOINLINE __attribute__((__noinline__)) 69 | #endif 70 | 71 | #ifdef SMMALLOC_ENABLE_ASSERTS 72 | #include 73 | 74 | #define SM_ASSERT(cond) do { if (!(cond)) __debugbreak(); } while (0) 75 | #else 76 | #define SM_ASSERT(x) 77 | #endif 78 | 79 | namespace sm { 80 | #ifdef SMMALLOC_STATS_SUPPORT 81 | struct AllocatorStats { 82 | std::atomic cacheHitCount; 83 | std::atomic hitCount; 84 | std::atomic missCount; 85 | std::atomic freeCount; 86 | 87 | AllocatorStats() { 88 | cacheHitCount.store(0); 89 | hitCount.store(0); 90 | missCount.store(0); 91 | freeCount.store(0); 92 | } 93 | }; 94 | #endif 95 | 96 | enum CacheWarmupOptions { 97 | CACHE_COLD = 0, 98 | CACHE_WARM = 1, 99 | CACHE_HOT = 2 100 | }; 101 | 102 | namespace internal { 103 | struct TlsPoolBucket; 104 | } 105 | 106 | internal::TlsPoolBucket* __restrict GetTlsBucket(size_t index); 107 | 108 | INLINE bool IsAligned(size_t v, size_t alignment) { 109 | size_t lowBits = v & (alignment - 1); 110 | 111 | return (lowBits == 0); 112 | } 113 | 114 | INLINE size_t Align(size_t val, size_t alignment) { 115 | SM_ASSERT((alignment & (alignment - 1)) == 0 && "Invalid alignment. Must be power of two."); 116 | 117 | size_t r = (val + (alignment - 1)) & ~(alignment - 1); 118 | 119 | SM_ASSERT(IsAligned(r, alignment) && "Alignment failed."); 120 | 121 | return r; 122 | } 123 | 124 | INLINE size_t DetectAlignment(void* p) { 125 | uintptr_t v = (uintptr_t)p; 126 | size_t ptrBitsCount = sizeof(void*) * 8; 127 | size_t i; 128 | 129 | for (i = 0; i < ptrBitsCount; i++) { 130 | if (v & 1) 131 | break; 132 | 133 | v = v >> 1; 134 | } 135 | 136 | return (size_t(1) << i); 137 | } 138 | 139 | struct GenericAllocator { 140 | typedef void* TInstance; 141 | 142 | static TInstance Invalid(); 143 | static bool IsValid(TInstance instance); 144 | static TInstance Create(); 145 | static void Destroy(TInstance instance); 146 | static void* Alloc(TInstance instance, size_t bytesCount, size_t alignment); 147 | static void Free(TInstance instance, void* p); 148 | static void* Realloc(TInstance instance, void* p, size_t bytesCount, size_t alignment); 149 | static size_t GetUsableSpace(TInstance instance, void* p); 150 | 151 | struct Deleter { 152 | explicit Deleter(GenericAllocator::TInstance _instance) : instance(_instance) { } 153 | 154 | INLINE void operator()(uint8_t* p) { 155 | GenericAllocator::Free(instance, p); 156 | } 157 | 158 | GenericAllocator::TInstance instance; 159 | }; 160 | }; 161 | 162 | class Allocator { 163 | private: 164 | 165 | static const size_t MaxValidAlignment = 16384; 166 | 167 | friend struct internal::TlsPoolBucket; 168 | 169 | INLINE bool IsReadable(void* p) const { 170 | return (uintptr_t(p) > MaxValidAlignment); 171 | } 172 | 173 | struct PoolBucket { 174 | union TaggedIndex { 175 | struct { 176 | uint32_t tag; 177 | uint32_t offset; 178 | } p; 179 | 180 | uint64_t u; 181 | 182 | static const uint64_t Invalid = UINT64_MAX; 183 | }; 184 | 185 | std::atomic head; 186 | std::atomic globalTag; 187 | 188 | uint8_t* pData; 189 | uint8_t* pBufferEnd; 190 | 191 | #ifdef SMMALLOC_STATS_SUPPORT 192 | AllocatorStats stats; 193 | #endif 194 | 195 | PoolBucket() : head(TaggedIndex::Invalid), globalTag(0), pData(nullptr), pBufferEnd(nullptr) { } 196 | 197 | void Create(size_t elementSize); 198 | 199 | INLINE void* Alloc() { 200 | uint8_t* p = nullptr; 201 | 202 | TaggedIndex headValue; 203 | headValue.u = head.load(); 204 | 205 | while (true) { 206 | if (headValue.u == TaggedIndex::Invalid) 207 | return nullptr; 208 | 209 | p = (pData + headValue.p.offset); 210 | TaggedIndex nextValue = *((TaggedIndex*)(p)); 211 | 212 | if (head.compare_exchange_strong(headValue.u, nextValue.u)) 213 | break; 214 | } 215 | 216 | return p; 217 | } 218 | 219 | INLINE void FreeInterval(void* _pHead, void* _pTail) { 220 | uint8_t* pHead = (uint8_t*)_pHead; 221 | uint8_t* pTail = (uint8_t*)_pTail; 222 | uint32_t tag = globalTag.fetch_add(1, std::memory_order_relaxed); 223 | 224 | TaggedIndex nodeValue; 225 | nodeValue.p.offset = (uint32_t)(pHead - pData); 226 | nodeValue.p.tag = tag; 227 | TaggedIndex headValue; 228 | headValue.u = head.load(); 229 | 230 | while (true) { 231 | *((TaggedIndex*)(pTail)) = headValue; 232 | 233 | if (head.compare_exchange_strong(headValue.u, nodeValue.u)) 234 | break; 235 | } 236 | } 237 | 238 | INLINE bool IsMyAlloc(void* p) const { 239 | return (p >= pData && p < pBufferEnd); 240 | } 241 | }; 242 | 243 | public: 244 | 245 | void CreateThreadCache(CacheWarmupOptions warmupOptions, size_t cacheSize); 246 | void DestroyThreadCache(); 247 | 248 | private: 249 | 250 | size_t bucketsCount; 251 | size_t bucketSizeInBytes; 252 | uint8_t* pBufferEnd; 253 | 254 | std::array bucketsDataBegin; 255 | std::array buckets; 256 | std::unique_ptr pBuffer; 257 | GenericAllocator::TInstance gAllocator; 258 | 259 | #ifdef SMMALLOC_STATS_SUPPORT 260 | std::atomic globalMissCount; 261 | #endif 262 | 263 | INLINE void* AllocFromCache(internal::TlsPoolBucket* __restrict _self) const; 264 | 265 | template 266 | INLINE bool ReleaseToCache(internal::TlsPoolBucket* __restrict _self, void* _p); 267 | 268 | INLINE size_t FindBucket(const void* p) const { 269 | uintptr_t index = (uintptr_t)p - (uintptr_t)bucketsDataBegin[0]; 270 | size_t r = (index / bucketSizeInBytes); 271 | 272 | return r; 273 | } 274 | 275 | INLINE PoolBucket* GetBucketByIndex(size_t bucketIndex) { 276 | if (bucketIndex >= bucketsCount) 277 | return nullptr; 278 | 279 | return &buckets[bucketIndex]; 280 | } 281 | 282 | INLINE const PoolBucket* GetBucketByIndex(size_t bucketIndex) const { 283 | if (bucketIndex >= bucketsCount) 284 | return nullptr; 285 | 286 | return &buckets[bucketIndex]; 287 | } 288 | 289 | template 290 | INLINE void* Allocate(size_t _bytesCount, size_t alignment) { 291 | SM_ASSERT(alignment <= MaxValidAlignment); 292 | 293 | if (SM_UNLIKELY(_bytesCount == 0)) 294 | return (void*)alignment; 295 | 296 | size_t bytesCount = (_bytesCount < alignment) ? alignment : _bytesCount; 297 | size_t bucketIndex = ((bytesCount - 1) >> 4); 298 | 299 | if (bucketIndex < bucketsCount) { 300 | void* pRes = AllocFromCache(GetTlsBucket(bucketIndex)); 301 | 302 | if (pRes) { 303 | #ifdef SMMALLOC_STATS_SUPPORT 304 | if (enableStatistic) 305 | buckets[bucketIndex].stats.cacheHitCount.fetch_add(1, std::memory_order_relaxed); 306 | #endif 307 | 308 | return pRes; 309 | } 310 | } 311 | 312 | while (bucketIndex < bucketsCount) { 313 | void* pRes = buckets[bucketIndex].Alloc(); 314 | 315 | if (pRes) { 316 | #ifdef SMMALLOC_STATS_SUPPORT 317 | if (enableStatistic) 318 | buckets[bucketIndex].stats.hitCount.fetch_add(1, std::memory_order_relaxed); 319 | #endif 320 | 321 | return pRes; 322 | } else { 323 | #ifdef SMMALLOC_STATS_SUPPORT 324 | if (enableStatistic) 325 | buckets[bucketIndex].stats.missCount.fetch_add(1, std::memory_order_relaxed); 326 | #endif 327 | } 328 | 329 | bucketIndex++; 330 | } 331 | 332 | #ifdef SMMALLOC_STATS_SUPPORT 333 | if (enableStatistic) 334 | globalMissCount.fetch_add(1, std::memory_order_relaxed); 335 | #endif 336 | 337 | return GenericAllocator::Alloc(gAllocator, _bytesCount, alignment); 338 | } 339 | 340 | public: 341 | 342 | Allocator(GenericAllocator::TInstance allocator); 343 | 344 | void Init(uint32_t bucketsCount, size_t bucketSizeInBytes); 345 | 346 | INLINE void* Alloc(size_t _bytesCount, size_t alignment) { 347 | return Allocate(_bytesCount, alignment); 348 | } 349 | 350 | INLINE void Free(void* p) { 351 | if (SM_UNLIKELY(!IsReadable(p))) 352 | return; 353 | 354 | size_t bucketIndex = FindBucket(p); 355 | 356 | if (bucketIndex < bucketsCount) { 357 | #ifdef SMMALLOC_STATS_SUPPORT 358 | buckets[bucketIndex].stats.freeCount.fetch_add(1, std::memory_order_relaxed); 359 | #endif 360 | 361 | if (ReleaseToCache(GetTlsBucket(bucketIndex), p)) 362 | return; 363 | 364 | PoolBucket* bucket = &buckets[bucketIndex]; 365 | bucket->FreeInterval(p, p); 366 | 367 | return; 368 | } 369 | 370 | GenericAllocator::Free(gAllocator, (uint8_t*)p); 371 | } 372 | 373 | INLINE void* Realloc(void* p, size_t bytesCount, size_t alignment) { 374 | if (p == nullptr) 375 | return Alloc(bytesCount, alignment); 376 | 377 | size_t bucketIndex = FindBucket(p); 378 | 379 | if (bucketIndex < bucketsCount) { 380 | size_t elementSize = GetBucketElementSize(bucketIndex); 381 | 382 | if (bytesCount <= elementSize) { 383 | Free(p); 384 | 385 | return p; 386 | } 387 | 388 | void* p2 = Alloc(bytesCount, alignment); 389 | 390 | if (IsReadable(p)) 391 | std::memmove(p2, p, elementSize); 392 | 393 | Free(p); 394 | 395 | return p2; 396 | } 397 | 398 | if (bytesCount == 0) { 399 | if (IsReadable(p)) 400 | GenericAllocator::Free(gAllocator, p); 401 | 402 | return (void*)alignment; 403 | } 404 | 405 | if (!IsReadable(p)) 406 | return GenericAllocator::Alloc(gAllocator, bytesCount, alignment); 407 | 408 | return GenericAllocator::Realloc(gAllocator, p, bytesCount, alignment); 409 | } 410 | 411 | INLINE size_t GetUsableSize(void* p) { 412 | if (!IsReadable(p)) 413 | return 0; 414 | 415 | size_t bucketIndex = FindBucket(p); 416 | 417 | if (bucketIndex < bucketsCount) { 418 | size_t elementSize = GetBucketElementSize(bucketIndex); 419 | 420 | return elementSize; 421 | } 422 | 423 | return GenericAllocator::GetUsableSpace(gAllocator, p); 424 | } 425 | 426 | INLINE int32_t GetBucketIndex(void* _p) { 427 | if (!IsMyAlloc(_p)) 428 | return -1; 429 | 430 | size_t bucketIndex = FindBucket(_p); 431 | 432 | if (bucketIndex >= bucketsCount) 433 | return -1; 434 | 435 | return (int32_t)bucketIndex; 436 | } 437 | 438 | INLINE bool IsMyAlloc(const void* p) const { 439 | return (p >= pBuffer.get() && p < pBufferEnd); 440 | } 441 | 442 | INLINE size_t GetBucketsCount() const { 443 | return bucketsCount; 444 | } 445 | 446 | INLINE uint32_t GetBucketElementSize(size_t bucketIndex) const { 447 | return (uint32_t)((bucketIndex + 1) * 16); 448 | } 449 | 450 | INLINE uint32_t GetBucketElementsCount(size_t bucketIndex) const { 451 | if (bucketIndex >= bucketsCount) 452 | return 0; 453 | 454 | size_t oneElementSize = GetBucketElementSize(bucketIndex); 455 | 456 | return (uint32_t)(bucketSizeInBytes / oneElementSize); 457 | } 458 | 459 | #ifdef SMMALLOC_STATS_SUPPORT 460 | size_t GetGlobalMissCount() const { 461 | return globalMissCount.load(std::memory_order_relaxed); 462 | } 463 | 464 | const AllocatorStats* GetBucketStats(size_t bucketIndex) const { 465 | const PoolBucket* bucket = GetBucketByIndex(bucketIndex); 466 | 467 | if (!bucket) 468 | return nullptr; 469 | 470 | return &bucket->stats; 471 | } 472 | #endif 473 | 474 | GenericAllocator::TInstance GetGenericAllocatorInstance() { 475 | return gAllocator; 476 | } 477 | }; 478 | 479 | namespace internal { 480 | struct TlsPoolBucket { 481 | uint8_t* pBucketData; 482 | uint32_t* pStorageL1; 483 | 484 | Allocator::PoolBucket* pBucket; 485 | std::array storageL0; 486 | 487 | uint32_t maxElementsCount; 488 | uint32_t numElementsL1; 489 | uint8_t numElementsL0; 490 | 491 | INLINE uint32_t GetElementsCount() const { 492 | return numElementsL1 + numElementsL0; 493 | } 494 | 495 | void Init(uint32_t* pCacheStack, uint32_t maxElementsNum, CacheWarmupOptions warmupOptions, Allocator* alloc, size_t bucketIndex); 496 | uint32_t* Destroy(); 497 | 498 | INLINE void ReturnL1CacheToMaster(uint32_t count) { 499 | if (count == 0) 500 | return; 501 | 502 | SM_ASSERT(pBucket != nullptr); 503 | 504 | if (numElementsL1 == 0) 505 | return; 506 | 507 | count = std::min(count, numElementsL1); 508 | 509 | uint32_t localTag = 0xFFFFFF; 510 | uint32_t firstElementToReturn = (numElementsL1 - count); 511 | uint32_t offset = pStorageL1[firstElementToReturn]; 512 | uint8_t* pHead = pBucketData + offset; 513 | uint8_t* pPrevBlockMemory = pHead; 514 | 515 | for (uint32_t i = (firstElementToReturn + 1); i < numElementsL1; i++, localTag++) { 516 | offset = pStorageL1[i]; 517 | Allocator::PoolBucket::TaggedIndex* pTag = (Allocator::PoolBucket::TaggedIndex*)pPrevBlockMemory; 518 | pTag->p.tag = localTag; 519 | pTag->p.offset = offset; 520 | 521 | uint8_t* pBlockMemory = pBucketData + offset; 522 | 523 | pPrevBlockMemory = pBlockMemory; 524 | } 525 | 526 | uint8_t* pTail = pPrevBlockMemory; 527 | 528 | pBucket->FreeInterval(pHead, pTail); 529 | numElementsL1 -= count; 530 | } 531 | }; 532 | 533 | static_assert(std::is_pod::value == true, "TlsPoolBucket must be POD type, stored in TLS"); 534 | static_assert(sizeof(TlsPoolBucket) <= 64, "TlsPoolBucket sizeof must be less than CPU cache line"); 535 | } 536 | 537 | INLINE void* Allocator::AllocFromCache(internal::TlsPoolBucket* __restrict _self) const { 538 | if (_self->numElementsL0 > 0) { 539 | SM_ASSERT(_self->pBucketData != nullptr); 540 | 541 | _self->numElementsL0--; 542 | 543 | uint32_t offset = _self->storageL0[_self->numElementsL0]; 544 | 545 | return _self->pBucketData + offset; 546 | } 547 | 548 | if (_self->numElementsL1 > 0) { 549 | SM_ASSERT(_self->pBucketData != nullptr); 550 | SM_ASSERT(_self->numElementsL0 == 0); 551 | 552 | _self->numElementsL1--; 553 | 554 | uint32_t offset = _self->pStorageL1[_self->numElementsL1]; 555 | 556 | return _self->pBucketData + offset; 557 | } 558 | 559 | return nullptr; 560 | } 561 | 562 | template 563 | INLINE bool Allocator::ReleaseToCache(internal::TlsPoolBucket* __restrict _self, void* _p) { 564 | if (_self->maxElementsCount == 0) 565 | return false; 566 | 567 | SM_ASSERT(_self->pBucket != nullptr); 568 | SM_ASSERT(_self->pBucketData != nullptr); 569 | 570 | uint8_t* p = (uint8_t*)_p; 571 | 572 | SM_ASSERT(p >= _self->pBucketData && p < _self->pBucket->pBufferEnd); 573 | 574 | uint32_t offset = (uint32_t)(p - _self->pBucketData); 575 | 576 | if (useCacheL0) { 577 | if (_self->numElementsL0 < SMM_MAX_CACHE_ITEMS_COUNT) { 578 | _self->storageL0[_self->numElementsL0] = offset; 579 | _self->numElementsL0++; 580 | 581 | return true; 582 | } 583 | } 584 | 585 | if (_self->numElementsL1 < _self->maxElementsCount) { 586 | _self->pStorageL1[_self->numElementsL1] = offset; 587 | _self->numElementsL1++; 588 | 589 | return true; 590 | } 591 | 592 | uint32_t halfOfElements = (_self->numElementsL1 >> 1); 593 | 594 | _self->ReturnL1CacheToMaster(halfOfElements); 595 | _self->pStorageL1[_self->numElementsL1] = offset; 596 | _self->numElementsL1++; 597 | 598 | return true; 599 | } 600 | } 601 | 602 | #define SMMALLOC_CSTYLE_FUNCS 603 | 604 | #ifdef SMMALLOC_CSTYLE_FUNCS 605 | #define SMMALLOC_DLL 606 | 607 | #if defined(_WIN32) && defined(SMMALLOC_DLL) 608 | #define SMMALLOC_API __declspec(dllexport) 609 | #else 610 | #define SMMALLOC_API extern 611 | #endif 612 | 613 | #ifdef __cplusplus 614 | extern "C" { 615 | #endif 616 | 617 | typedef sm::Allocator* sm_allocator; 618 | 619 | SMMALLOC_API INLINE sm_allocator sm_allocator_create(uint32_t bucketsCount, size_t bucketSizeInBytes) { 620 | sm::GenericAllocator::TInstance instance = sm::GenericAllocator::Create(); 621 | 622 | if (!sm::GenericAllocator::IsValid(instance)) 623 | return nullptr; 624 | 625 | size_t align = __alignof(sm::Allocator); 626 | 627 | align = sm::Align(align, SMM_CACHE_LINE_SIZE); 628 | 629 | void* pBuffer = sm::GenericAllocator::Alloc(instance, sizeof(sm::Allocator), align); 630 | 631 | sm::Allocator* allocator = new(pBuffer) sm::Allocator(instance); 632 | allocator->Init(bucketsCount, bucketSizeInBytes); 633 | 634 | return allocator; 635 | } 636 | 637 | SMMALLOC_API INLINE void sm_allocator_destroy(sm_allocator allocator) { 638 | if (allocator == nullptr) 639 | return; 640 | 641 | sm::GenericAllocator::TInstance instance = allocator->GetGenericAllocatorInstance(); 642 | allocator->~Allocator(); 643 | 644 | sm::GenericAllocator::Free(instance, allocator); 645 | sm::GenericAllocator::Destroy(instance); 646 | } 647 | 648 | SMMALLOC_API INLINE void sm_allocator_thread_cache_create(sm_allocator allocator, sm::CacheWarmupOptions warmupOptions, size_t cacheSize) { 649 | if (allocator == nullptr) 650 | return; 651 | 652 | allocator->CreateThreadCache(warmupOptions, cacheSize); 653 | } 654 | 655 | SMMALLOC_API INLINE void sm_allocator_thread_cache_destroy(sm_allocator allocator) { 656 | if (allocator == nullptr) 657 | return; 658 | 659 | allocator->DestroyThreadCache(); 660 | } 661 | 662 | SMMALLOC_API INLINE void* sm_malloc(sm_allocator allocator, size_t bytesCount, size_t alignment) { 663 | return allocator->Alloc(bytesCount, alignment); 664 | } 665 | 666 | SMMALLOC_API INLINE void sm_free(sm_allocator allocator, void* p) { 667 | allocator->Free(p); 668 | } 669 | 670 | SMMALLOC_API INLINE void sm_free_batch(sm_allocator allocator, void** batch, size_t length) { 671 | void* p; 672 | size_t i; 673 | 674 | for (i = 0; i < length; i++) { 675 | p = batch[i]; 676 | 677 | if (p == nullptr) 678 | continue; 679 | 680 | allocator->Free(p); 681 | } 682 | } 683 | 684 | SMMALLOC_API INLINE void* sm_realloc(sm_allocator allocator, void* p, size_t bytesCount, size_t alignment) { 685 | return allocator->Realloc(p, bytesCount, alignment); 686 | } 687 | 688 | SMMALLOC_API INLINE size_t sm_msize(sm_allocator allocator, void* p) { 689 | return allocator->GetUsableSize(p); 690 | } 691 | 692 | SMMALLOC_API INLINE int32_t sm_mbucket(sm_allocator allocator, void* p) { 693 | return allocator->GetBucketIndex(p); 694 | } 695 | 696 | #ifdef __cplusplus 697 | } 698 | #endif 699 | #endif --------------------------------------------------------------------------------