├── LICENSE
├── README.md
└── Source
├── Managed
├── Smmalloc-CSharp.csproj
└── Smmalloc.cs
└── Native
├── CMakeLists.txt
├── smmalloc.cpp
└── smmalloc.h
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Stanislav Denisov (nxrighthere@gmail.com)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | This is an improved version of [smmalloc](https://github.com/SergeyMakeev/smmalloc) a [fast and efficient](https://github.com/SergeyMakeev/smmalloc#features) memory allocator designed to handle many small allocations/deallocations in heavy multi-threaded scenarios. The allocator created for usage in applications where the performance is critical such as video games.
6 |
7 | Using smmalloc allocator in the .NET environment helps to minimize GC pressure for allocating buffers and avoid using lock-based pools in multi-threaded systems. Modern .NET features such as [`Span`](https://docs.microsoft.com/en-us/dotnet/api/system.span-1) greatly works in tandem with smmalloc and allows conveniently manage data in native memory blocks.
8 |
9 | Building
10 | --------
11 | To build the native library appropriate software is required:
12 |
13 | For desktop platforms [CMake](https://cmake.org/download/) with GNU Make or Visual Studio.
14 |
15 | A managed assembly can be built using any available compiling platform that supports C# 3.0 or higher.
16 |
17 | Usage
18 | --------
19 | ##### Create a new smmalloc instance
20 | ```c#
21 | // 8 buckets, 16 MB each, 128 bytes maximum allocation size
22 | SmmallocInstance smmalloc = new SmmallocInstance(8, 16 * 1024 * 1024);
23 | ```
24 |
25 | ##### Destroy the smmalloc instance and free allocated memory
26 | ```c#
27 | smmalloc.Dispose();
28 | ```
29 |
30 | ##### Create thread cache for a current thread
31 | ```c#
32 | // 4 KB of thread cache for each bucket, hot warmup
33 | smmalloc.CreateThreadCache(4 * 1024, CacheWarmupOptions.Hot);
34 | ```
35 |
36 | ##### Destroy thread cache for a current thread
37 | ```c#
38 | smmalloc.DestroyThreadCache();
39 | ```
40 |
41 | ##### Allocate memory block
42 | ```c#
43 | // 64 bytes of a memory block
44 | IntPtr memory = smmalloc.Malloc(64);
45 | ```
46 |
47 | ##### Release memory block
48 | ```c#
49 | smmalloc.Free(memory);
50 | ```
51 |
52 | ##### Work with batches of memory blocks
53 | ```c#
54 | IntPtr[] batch = new IntPtr[32];
55 |
56 | // Allocate a batch of memory
57 | for (int i = 0; i < batch.Length; i++) {
58 | batch[i] = smmalloc.Malloc(64);
59 | }
60 |
61 | // Release the whole batch
62 | smmalloc.Free(batch);
63 | ```
64 |
65 | ##### Write data to memory block
66 | ```c#
67 | // Using Marshal
68 | byte data = 0;
69 |
70 | for (int i = 0; i < smmalloc.Size(memory); i++) {
71 | Marshal.WriteByte(memory, i, data++);
72 | }
73 |
74 | // Using Span
75 | Span buffer;
76 |
77 | unsafe {
78 | buffer = new Span((byte*)memory, smmalloc.Size(memory));
79 | }
80 |
81 | byte data = 0;
82 |
83 | for (int i = 0; i < buffer.Length; i++) {
84 | buffer[i] = data++;
85 | }
86 | ```
87 |
88 | ##### Read data from memory block
89 | ```c#
90 | // Using Marshal
91 | int sum = 0;
92 |
93 | for (int i = 0; i < smmalloc.Size(memory); i++) {
94 | sum += Marshal.ReadByte(memory, i);
95 | }
96 |
97 | // Using Span
98 | int sum = 0;
99 |
100 | foreach (var value in buffer) {
101 | sum += value;
102 | }
103 | ```
104 |
105 | ##### Hardware accelerated operations
106 | ```c#
107 | // Xor using Vector and Span
108 | if (Vector.IsHardwareAccelerated) {
109 | Span> bufferVector = MemoryMarshal.Cast>(buffer);
110 | Span> xorVector = MemoryMarshal.Cast>(xor);
111 |
112 | for (int i = 0; i < bufferVector.Length; i++) {
113 | bufferVector[i] ^= xorVector[i];
114 | }
115 | }
116 | ```
117 |
118 | ##### Copy data using memory block
119 | ```c#
120 | // Using Marshal
121 | byte[] data = new byte[64];
122 |
123 | // Copy from native memory
124 | Marshal.Copy(memory, data, 0, 64);
125 |
126 | // Copy to native memory
127 | Marshal.Copy(data, 0, memory, 64);
128 |
129 | // Using Buffer
130 | unsafe {
131 | // Copy from native memory
132 | fixed (byte* destination = &data[0]) {
133 | Buffer.MemoryCopy((byte*)memory, destination, 64, 64);
134 | }
135 |
136 | // Copy to native memory
137 | fixed (byte* source = &data[0]) {
138 | Buffer.MemoryCopy(source, (byte*)memory, 64, 64);
139 | }
140 | }
141 | ```
142 |
143 | ##### Custom data structures
144 | ```c#
145 | // Define a custom structure
146 | struct Entity {
147 | public uint id;
148 | public byte health;
149 | public byte state;
150 | }
151 |
152 | int entitySize = Marshal.SizeOf(typeof(Entity));
153 | int entityCount = 10;
154 |
155 | // Allocate memory block
156 | IntPtr memory = smmalloc.Malloc(entitySize * entityCount);
157 |
158 | // Create Span using native memory block
159 | Span entities;
160 |
161 | unsafe {
162 | entities = new Span((void*)memory, entityCount);
163 | }
164 |
165 | // Do some stuff
166 | uint id = 1;
167 |
168 | for (int i = 0; i < entities.Length; i++) {
169 | entities[i].id = id++;
170 | entities[i].health = (byte)(new Random().Next(1, 100));
171 | entities[i].state = (byte)(new Random().Next(1, 255));
172 | }
173 |
174 | // Release memory block
175 | smmalloc.Free(memory);
176 | ```
177 |
178 | API reference
179 | --------
180 | ### Enumerations
181 | #### CacheWarmupOptions
182 | Definitions of warmup options for `CreateThreadCache()` function:
183 |
184 | `CacheWarmupOptions.Cold` warmup not performed for cache elements.
185 |
186 | `CacheWarmupOptions.Warm` warmup performed for half of the cache elements.
187 |
188 | `CacheWarmupOptions.Hot` warmup performed for all cache elements.
189 |
190 | ### Classes
191 | A single low-level disposable class is used to work with smmalloc.
192 |
193 | #### SmmallocInstance
194 | Contains a managed pointer to the smmalloc instance.
195 |
196 | ##### Constructors
197 | `SmmallocInstance(uint bucketsCount, int bucketSize)` creates allocator instance with a memory pool. Size of memory blocks in each bucket increases with a count of buckets. The bucket size parameter sets an initial size of a pooled memory in bytes.
198 |
199 | ##### Methods
200 | `SmmallocInstance.Dispose()` destroys the smmalloc instance and frees allocated memory.
201 |
202 | `SmmallocInstance.CreateThreadCache(int cacheSize, CacheWarmupOptions warmupOption)` creates thread cache for fast memory allocations within a thread. The warmup option sets pre-allocation degree of cache elements.
203 |
204 | `SmmallocInstance.DestroyThreadCache()` destroys the thread cache. Should be called before the end of the thread's life cycle.
205 |
206 | `SmmallocInstance.Malloc(int bytesCount, int alignment)` allocates aligned memory block. Allocation size depends on buckets count multiplied by 16, so the minimum allocation size is 16 bytes. Maximum allocation size using two buckets in a smmalloc instance will be 32 bytes, for three buckets 48 bytes, for four 64 bytes, and so on. The alignment parameter is optional. Returns pointer to a memory block. Returns a pointer to an allocated memory block.
207 |
208 | `SmmallocInstance.Free(IntPtr memory)` frees memory block. A managed array or pointer to pointers with length can be used instead of a pointer to memory block to free a batch of memory.
209 |
210 | `SmmallocInstance.Realloc(IntPtr memory, int bytesCount, int alignment)` reallocates memory block. The alignment parameter is optional. Returns a pointer to a reallocated memory block.
211 |
212 | `SmmallocInstance.Size(IntPtr memory)` gets usable memory size. Returns size in bytes.
213 |
214 | `SmmallocInstance.Bucket(IntPtr memory)` gets bucket index of a memory block. Returns placement index.
215 |
--------------------------------------------------------------------------------
/Source/Managed/Smmalloc-CSharp.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Library
5 | netstandard2.0
6 | Smmalloc
7 | SMMALLOC_INLINING;
8 |
9 |
10 |
11 | false
12 | True
13 | 3
14 |
15 |
16 |
17 | true
18 | True
19 | 3
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/Source/Managed/Smmalloc.cs:
--------------------------------------------------------------------------------
1 | /*
2 | * Managed C# wrapper for Smmalloc, blazing fast memory allocator designed for video games
3 | * Copyright (c) 2018 Stanislav Denisov
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 |
24 | using System;
25 | using System.Runtime.CompilerServices;
26 | using System.Runtime.InteropServices;
27 | using System.Security;
28 |
29 | namespace Smmalloc {
30 | public enum CacheWarmupOptions {
31 | Cold = 0,
32 | Warm = 1,
33 | Hot = 2
34 | }
35 |
36 | public class SmmallocInstance : IDisposable {
37 | private IntPtr nativeAllocator;
38 | private readonly uint allocationLimit;
39 |
40 | public SmmallocInstance(uint bucketsCount, int bucketSize) {
41 | if (bucketsCount > 64)
42 | throw new ArgumentOutOfRangeException();
43 |
44 | nativeAllocator = Native.sm_allocator_create(bucketsCount, (IntPtr)bucketSize);
45 |
46 | if (nativeAllocator == IntPtr.Zero)
47 | throw new InvalidOperationException("Native memory allocator not created");
48 |
49 | allocationLimit = bucketsCount * 16;
50 | }
51 |
52 | public void Dispose() {
53 | Dispose(true);
54 | GC.SuppressFinalize(this);
55 | }
56 |
57 | protected virtual void Dispose(bool disposing) {
58 | if (nativeAllocator != IntPtr.Zero) {
59 | Native.sm_allocator_destroy(nativeAllocator);
60 | nativeAllocator = IntPtr.Zero;
61 | }
62 | }
63 |
64 | ~SmmallocInstance() {
65 | Dispose(false);
66 | }
67 |
68 | public void CreateThreadCache(int cacheSize, CacheWarmupOptions warmupOption) {
69 | if (cacheSize == 0 || cacheSize < 0)
70 | throw new ArgumentOutOfRangeException();
71 |
72 | Native.sm_allocator_thread_cache_create(nativeAllocator, warmupOption, (IntPtr)cacheSize);
73 | }
74 |
75 | public void DestroyThreadCache() {
76 | Native.sm_allocator_thread_cache_destroy(nativeAllocator);
77 | }
78 |
79 | #if SMMALLOC_INLINING
80 | [MethodImpl(256)]
81 | #endif
82 | public IntPtr Malloc(int bytesCount) {
83 | return Malloc(bytesCount, 0);
84 | }
85 |
86 | #if SMMALLOC_INLINING
87 | [MethodImpl(256)]
88 | #endif
89 | public IntPtr Malloc(int bytesCount, int alignment) {
90 | if (bytesCount == 0 || bytesCount < 0 || bytesCount > allocationLimit)
91 | throw new ArgumentOutOfRangeException();
92 |
93 | return Native.sm_malloc(nativeAllocator, (IntPtr)bytesCount, (IntPtr)alignment);
94 | }
95 |
96 | #if SMMALLOC_INLINING
97 | [MethodImpl(256)]
98 | #endif
99 | public void Free(IntPtr memory) {
100 | if (memory == IntPtr.Zero)
101 | throw new ArgumentNullException("memory");
102 |
103 | Native.sm_free(nativeAllocator, memory);
104 | }
105 |
106 | #if SMMALLOC_INLINING
107 | [MethodImpl(256)]
108 | #endif
109 | public void Free(IntPtr[] batch) {
110 | if (batch == null)
111 | throw new ArgumentNullException("batch");
112 |
113 | Native.sm_free_batch(nativeAllocator, batch, (IntPtr)batch.Length);
114 | }
115 |
116 | #if SMMALLOC_INLINING
117 | [MethodImpl(256)]
118 | #endif
119 | public void Free(IntPtr batch, int length) {
120 | if (batch == IntPtr.Zero)
121 | throw new ArgumentNullException("batch");
122 |
123 | Native.sm_free_batch(nativeAllocator, batch, (IntPtr)length);
124 | }
125 |
126 | #if SMMALLOC_INLINING
127 | [MethodImpl(256)]
128 | #endif
129 | public IntPtr Realloc(IntPtr memory, int bytesCount) {
130 | return Realloc(memory, bytesCount, 0);
131 | }
132 |
133 | #if SMMALLOC_INLINING
134 | [MethodImpl(256)]
135 | #endif
136 | public IntPtr Realloc(IntPtr memory, int bytesCount, int alignment) {
137 | if (memory == IntPtr.Zero)
138 | throw new ArgumentNullException("memory");
139 |
140 | if (bytesCount == 0 || bytesCount < 0 || bytesCount > allocationLimit)
141 | throw new ArgumentOutOfRangeException();
142 |
143 | return Native.sm_realloc(nativeAllocator, memory, (IntPtr)bytesCount, (IntPtr)alignment);
144 | }
145 |
146 | #if SMMALLOC_INLINING
147 | [MethodImpl(256)]
148 | #endif
149 | public int Size(IntPtr memory) {
150 | if (memory == IntPtr.Zero)
151 | throw new ArgumentNullException("memory");
152 |
153 | return (int)Native.sm_msize(nativeAllocator, memory);
154 | }
155 |
156 | #if SMMALLOC_INLINING
157 | [MethodImpl(256)]
158 | #endif
159 | public int Bucket(IntPtr memory) {
160 | if (memory == IntPtr.Zero)
161 | throw new ArgumentNullException("memory");
162 |
163 | return Native.sm_mbucket(nativeAllocator, memory);
164 | }
165 | }
166 |
167 | [SuppressUnmanagedCodeSecurity]
168 | internal static class Native {
169 | private const string nativeLibrary = "smmalloc";
170 |
171 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
172 | internal static extern IntPtr sm_allocator_create(uint bucketsCount, IntPtr bucketSizeInBytes);
173 |
174 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
175 | internal static extern void sm_allocator_destroy(IntPtr allocator);
176 |
177 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
178 | internal static extern void sm_allocator_thread_cache_create(IntPtr allocator, CacheWarmupOptions warmupOption, IntPtr cacheSize);
179 |
180 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
181 | internal static extern void sm_allocator_thread_cache_destroy(IntPtr allocator);
182 |
183 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
184 | internal static extern IntPtr sm_malloc(IntPtr allocator, IntPtr bytesCount, IntPtr alignment);
185 |
186 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
187 | internal static extern void sm_free(IntPtr allocator, IntPtr memory);
188 |
189 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
190 | internal static extern void sm_free_batch(IntPtr allocator, IntPtr batch, IntPtr length);
191 |
192 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
193 | internal static extern void sm_free_batch(IntPtr allocator, IntPtr[] batch, IntPtr length);
194 |
195 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
196 | internal static extern IntPtr sm_realloc(IntPtr allocator, IntPtr memory, IntPtr bytesCount, IntPtr alignment);
197 |
198 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
199 | internal static extern IntPtr sm_msize(IntPtr allocator, IntPtr memory);
200 |
201 | [DllImport(nativeLibrary, CallingConvention = CallingConvention.Cdecl)]
202 | internal static extern int sm_mbucket(IntPtr allocator, IntPtr memory);
203 | }
204 | }
--------------------------------------------------------------------------------
/Source/Native/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.6)
2 | project(smmalloc CXX)
3 |
4 | set(SMMALLOC_STATIC "0" CACHE BOOL "Create a static library")
5 | set(SMMALLOC_SHARED "0" CACHE BOOL "Create a shared library")
6 | set(SMMALLOC_STATS "0" CACHE BOOL "Add support for stats gathering")
7 |
8 | if (SMMALLOC_STATS)
9 | add_definitions(-DSMMALLOC_STATS_SUPPORT)
10 | endif()
11 |
12 | if (SMMALLOC_STATIC)
13 | add_library(smmalloc_static STATIC smmalloc.cpp)
14 |
15 | if (NOT LINUX)
16 | SET_TARGET_PROPERTIES(smmalloc_static PROPERTIES PREFIX "")
17 | endif()
18 | endif()
19 |
20 | if (SMMALLOC_SHARED)
21 | if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang)
22 | set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -static")
23 | elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
24 | set(CMAKE_CXX_FLAGS_RELEASE "/MT")
25 | endif()
26 |
27 | add_library(smmalloc SHARED smmalloc.cpp)
28 |
29 | if (WIN32)
30 | SET_TARGET_PROPERTIES(smmalloc PROPERTIES PREFIX "")
31 | endif()
32 | endif()
33 |
--------------------------------------------------------------------------------
/Source/Native/smmalloc.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Smmalloc blazing fast memory allocator designed for video games
3 | * Copyright (c) 2018 Sergey Makeev, Stanislav Denisov
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 |
24 | #include
25 | #include "smmalloc.h"
26 |
27 | thread_local sm::internal::TlsPoolBucket tlsCacheBuckets[SMM_MAX_BUCKET_COUNT];
28 |
29 | namespace sm {
30 | struct CacheWarmupLink {
31 | CacheWarmupLink* pNext;
32 | };
33 |
34 | sm::internal::TlsPoolBucket* __restrict GetTlsBucket(size_t index) {
35 | return &tlsCacheBuckets[index];
36 | }
37 |
38 | namespace internal {
39 | void TlsPoolBucket::Init(uint32_t* pCacheStack, uint32_t maxElementsNum, CacheWarmupOptions warmupOptions, Allocator* alloc, size_t bucketIndex) {
40 | SM_ASSERT(numElementsL0 == 0);
41 | SM_ASSERT(numElementsL1 == 0);
42 | SM_ASSERT(pBucket == nullptr);
43 | SM_ASSERT(pBucketData == nullptr);
44 | SM_ASSERT(pStorageL1 == nullptr);
45 | SM_ASSERT(maxElementsCount == 0);
46 |
47 | Allocator::PoolBucket* poolBucket = alloc->GetBucketByIndex(bucketIndex);
48 |
49 | SM_ASSERT(maxElementsNum >= SMM_MAX_CACHE_ITEMS_COUNT + 2);
50 |
51 | pStorageL1 = pCacheStack;
52 | numElementsL1 = 0;
53 | numElementsL0 = 0;
54 | maxElementsCount = (maxElementsNum - SMM_MAX_CACHE_ITEMS_COUNT);
55 | pBucket = poolBucket;
56 |
57 | SM_ASSERT(pBucket);
58 |
59 | pBucketData = pBucket->pData;
60 |
61 | if (warmupOptions == CACHE_COLD)
62 | return;
63 |
64 | uint32_t elementSize = alloc->GetBucketElementSize(bucketIndex);
65 | uint32_t num = (warmupOptions == CACHE_WARM) ? (maxElementsCount / 2) : (maxElementsCount);
66 |
67 | CacheWarmupLink* pRoot = nullptr;
68 |
69 | for (uint32_t j = 0; j < num; j++) {
70 | void* p = alloc->Allocate(elementSize, 16);
71 |
72 | if (p == nullptr)
73 | break;
74 |
75 | if (alloc->GetBucketIndex(p) != (int32_t)bucketIndex) {
76 | alloc->Free(p);
77 |
78 | break;
79 | }
80 |
81 | CacheWarmupLink* pItem = (CacheWarmupLink*)p;
82 | pItem->pNext = pRoot;
83 | pRoot = pItem;
84 | }
85 |
86 | CacheWarmupLink* pCurrent = pRoot;
87 |
88 | while (pCurrent) {
89 | CacheWarmupLink* pNext = pCurrent->pNext;
90 |
91 | bool r = alloc->ReleaseToCache(this, pCurrent);
92 |
93 | SMMALLOC_USED_IN_ASSERT(r);
94 | SM_ASSERT(r);
95 |
96 | pCurrent = pNext;
97 | }
98 |
99 | SM_ASSERT(GetElementsCount() == num);
100 | }
101 |
102 | uint32_t* TlsPoolBucket::Destroy() {
103 | for (uint32_t i = 0; i < numElementsL0; i++) {
104 | pStorageL1[numElementsL1] = storageL0[i];
105 | numElementsL1++;
106 | }
107 |
108 | if (numElementsL1 > 0)
109 | ReturnL1CacheToMaster(numElementsL1);
110 |
111 | uint32_t* r = pStorageL1;
112 |
113 | pStorageL1 = nullptr;
114 | numElementsL0 = 0;
115 | numElementsL1 = 0;
116 | maxElementsCount = 0;
117 | pBucket = nullptr;
118 | pBucketData = nullptr;
119 |
120 | return r;
121 | }
122 | }
123 |
124 | void Allocator::CreateThreadCache(CacheWarmupOptions warmupOptions, size_t cacheSize) {
125 | for (size_t i = 0; i < bucketsCount; i++) {
126 | uint32_t elementsNum = (uint32_t)cacheSize + SMM_MAX_CACHE_ITEMS_COUNT;
127 | uint32_t* localStack = (uint32_t*)GenericAllocator::Alloc(gAllocator, elementsNum * sizeof(uint32_t), 64);
128 | GetTlsBucket(i)->Init(localStack, elementsNum, warmupOptions, this, i);
129 |
130 | i++;
131 | }
132 | }
133 |
134 | void Allocator::DestroyThreadCache() {
135 | for (size_t i = 0; i < SMM_MAX_BUCKET_COUNT; i++) {
136 | uint32_t* p = GetTlsBucket(i)->Destroy();
137 | GenericAllocator::Free(gAllocator, p);
138 | }
139 | }
140 |
141 | void Allocator::PoolBucket::Create(size_t elementSize) {
142 | SM_ASSERT(elementSize >= 16 && "Invalid element size");
143 |
144 | globalTag.store(0, std::memory_order_relaxed);
145 |
146 | uint8_t* node = pData;
147 |
148 | TaggedIndex headVal;
149 | headVal.p.tag = globalTag.load(std::memory_order_relaxed);
150 | headVal.p.offset = (uint32_t)(node - pData);
151 | head.store(headVal.u);
152 |
153 | while (true) {
154 | uint8_t* next = node + elementSize;
155 |
156 | if ((next + elementSize) <= pBufferEnd) {
157 | TaggedIndex nextVal;
158 | nextVal.p.tag = globalTag.load(std::memory_order_relaxed);;
159 | nextVal.p.offset = (uint32_t)(next - pData);
160 | *((TaggedIndex*)(node)) = nextVal;
161 | } else {
162 | ((TaggedIndex*)(node))->u = TaggedIndex::Invalid;
163 |
164 | break;
165 | }
166 |
167 | node = next;
168 | globalTag.fetch_add(1, std::memory_order_relaxed);
169 | }
170 | }
171 |
172 | Allocator::Allocator(GenericAllocator::TInstance allocator) : bucketsCount(0), bucketSizeInBytes(0), pBufferEnd(nullptr), pBuffer(nullptr, GenericAllocator::Deleter(allocator)), gAllocator(allocator) {
173 | #ifdef SMMALLOC_STATS_SUPPORT
174 | globalMissCount.store(0);
175 | #endif
176 | }
177 |
178 | inline int GetNextPow2(uint32_t n) {
179 | n -= 1;
180 | n |= n >> 16;
181 | n |= n >> 8;
182 | n |= n >> 4;
183 | n |= n >> 2;
184 | n |= n >> 1;
185 |
186 | return n + 1;
187 | }
188 |
189 | void Allocator::Init(uint32_t _bucketsCount, size_t _bucketSizeInBytes) {
190 | if (bucketsCount > 0)
191 | return;
192 |
193 | SM_ASSERT(_bucketsCount > 0 && _bucketsCount <= 64);
194 |
195 | if (_bucketsCount == 0)
196 | return;
197 |
198 | bucketsCount = _bucketsCount;
199 |
200 | size_t alignmentMax = GetNextPow2((uint32_t)(16 * bucketsCount));
201 |
202 | bucketSizeInBytes = Align(_bucketSizeInBytes, alignmentMax);
203 |
204 | size_t i = 0;
205 |
206 | for (i = 0; i < bucketsDataBegin.size(); i++) {
207 | bucketsDataBegin[i] = nullptr;
208 | }
209 |
210 | size_t totalBytesCount = bucketSizeInBytes * bucketsCount;
211 |
212 | pBuffer.reset((uint8_t*)GenericAllocator::Alloc(gAllocator, totalBytesCount, alignmentMax));
213 | pBufferEnd = pBuffer.get() + totalBytesCount + 1;
214 |
215 | size_t elementSize = 16;
216 |
217 | for (i = 0; i < bucketsCount; i++) {
218 | PoolBucket& bucket = buckets[i];
219 | bucket.pData = pBuffer.get() + i * bucketSizeInBytes;
220 |
221 | SM_ASSERT(IsAligned((size_t)bucket.pData, GetNextPow2(elementSize)) && "Alignment failed");
222 |
223 | bucket.pBufferEnd = bucket.pData + bucketSizeInBytes;
224 | bucket.Create(elementSize);
225 | elementSize += 16;
226 | bucketsDataBegin[i] = bucket.pData;
227 | }
228 | }
229 | }
230 |
231 | sm::GenericAllocator::TInstance sm::GenericAllocator::Invalid() {
232 | return nullptr;
233 | }
234 |
235 | bool sm::GenericAllocator::IsValid(TInstance instance) {
236 | SMMALLOC_UNUSED(instance);
237 |
238 | return true;
239 | }
240 |
241 | sm::GenericAllocator::TInstance sm::GenericAllocator::Create() {
242 | return nullptr;
243 | }
244 |
245 | void sm::GenericAllocator::Destroy(sm::GenericAllocator::TInstance instance) {
246 | SMMALLOC_UNUSED(instance);
247 | }
248 |
249 | void* sm::GenericAllocator::Alloc(sm::GenericAllocator::TInstance instance, size_t bytesCount, size_t alignment) {
250 | SMMALLOC_UNUSED(instance);
251 |
252 | if (alignment < 16)
253 | alignment = 16;
254 |
255 | return _aligned_malloc(bytesCount, alignment);
256 | }
257 |
258 | void sm::GenericAllocator::Free(sm::GenericAllocator::TInstance instance, void* p) {
259 | SMMALLOC_UNUSED(instance);
260 |
261 | _aligned_free(p);
262 | }
263 |
264 | void* sm::GenericAllocator::Realloc(sm::GenericAllocator::TInstance instance, void* p, size_t bytesCount, size_t alignment) {
265 | SMMALLOC_UNUSED(instance);
266 |
267 | return _aligned_realloc(p, bytesCount, alignment);
268 | }
269 |
270 | size_t sm::GenericAllocator::GetUsableSpace(sm::GenericAllocator::TInstance instance, void* p) {
271 | SMMALLOC_UNUSED(instance);
272 |
273 | size_t alignment = DetectAlignment(p);
274 |
275 | #ifdef __GNUC__
276 | if (alignment < sizeof(void*))
277 | alignment = sizeof(void*);
278 |
279 | return _msize(p) - alignment - sizeof(void*);
280 | #else
281 | return _aligned_msize(p, alignment, 0);
282 | #endif
283 | }
284 |
--------------------------------------------------------------------------------
/Source/Native/smmalloc.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Smmalloc blazing fast memory allocator designed for video games
3 | * Copyright (c) 2018 Sergey Makeev, Stanislav Denisov
4 | *
5 | * Permission is hereby granted, free of charge, to any person obtaining a copy
6 | * of this software and associated documentation files (the "Software"), to deal
7 | * in the Software without restriction, including without limitation the rights
8 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | * copies of the Software, and to permit persons to whom the Software is
10 | * furnished to do so, subject to the following conditions:
11 | *
12 | * The above copyright notice and this permission notice shall be included in all
13 | * copies or substantial portions of the Software.
14 | *
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | * SOFTWARE.
22 | */
23 |
24 | #pragma once
25 |
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 |
33 | #if __GNUC__ || __INTEL_COMPILER
34 | #define SM_UNLIKELY(expr) __builtin_expect(!!(expr), (0))
35 | #define SM_LIKELY(expr) __builtin_expect(!!(expr), (1))
36 | #else
37 | #define SM_UNLIKELY(expr) (expr)
38 | #define SM_LIKELY(expr) (expr)
39 | #endif
40 |
41 | #ifdef _DEBUG
42 | #define SMMALLOC_ENABLE_ASSERTS
43 | #endif
44 |
45 | #ifdef _M_X64
46 | #define SMMMALLOC_X64
47 | #define SMM_MAX_CACHE_ITEMS_COUNT (7)
48 | #else
49 | #define SMMMALLOC_X86
50 | #define SMM_MAX_CACHE_ITEMS_COUNT (10)
51 | #endif
52 |
53 | #define SMM_CACHE_LINE_SIZE (64)
54 | #define SMM_MAX_BUCKET_COUNT (64)
55 |
56 | #define SMMALLOC_UNUSED(x) (void)(x)
57 | #define SMMALLOC_USED_IN_ASSERT(x) (void)(x)
58 |
59 | #ifdef _MSC_VER
60 | #define INLINE __forceinline
61 | #else
62 | #define INLINE inline
63 | #endif
64 |
65 | #ifdef _MSC_VER
66 | #define NOINLINE __declspec(noinline)
67 | #else
68 | #define NOINLINE __attribute__((__noinline__))
69 | #endif
70 |
71 | #ifdef SMMALLOC_ENABLE_ASSERTS
72 | #include
73 |
74 | #define SM_ASSERT(cond) do { if (!(cond)) __debugbreak(); } while (0)
75 | #else
76 | #define SM_ASSERT(x)
77 | #endif
78 |
79 | namespace sm {
80 | #ifdef SMMALLOC_STATS_SUPPORT
81 | struct AllocatorStats {
82 | std::atomic cacheHitCount;
83 | std::atomic hitCount;
84 | std::atomic missCount;
85 | std::atomic freeCount;
86 |
87 | AllocatorStats() {
88 | cacheHitCount.store(0);
89 | hitCount.store(0);
90 | missCount.store(0);
91 | freeCount.store(0);
92 | }
93 | };
94 | #endif
95 |
96 | enum CacheWarmupOptions {
97 | CACHE_COLD = 0,
98 | CACHE_WARM = 1,
99 | CACHE_HOT = 2
100 | };
101 |
102 | namespace internal {
103 | struct TlsPoolBucket;
104 | }
105 |
106 | internal::TlsPoolBucket* __restrict GetTlsBucket(size_t index);
107 |
108 | INLINE bool IsAligned(size_t v, size_t alignment) {
109 | size_t lowBits = v & (alignment - 1);
110 |
111 | return (lowBits == 0);
112 | }
113 |
114 | INLINE size_t Align(size_t val, size_t alignment) {
115 | SM_ASSERT((alignment & (alignment - 1)) == 0 && "Invalid alignment. Must be power of two.");
116 |
117 | size_t r = (val + (alignment - 1)) & ~(alignment - 1);
118 |
119 | SM_ASSERT(IsAligned(r, alignment) && "Alignment failed.");
120 |
121 | return r;
122 | }
123 |
124 | INLINE size_t DetectAlignment(void* p) {
125 | uintptr_t v = (uintptr_t)p;
126 | size_t ptrBitsCount = sizeof(void*) * 8;
127 | size_t i;
128 |
129 | for (i = 0; i < ptrBitsCount; i++) {
130 | if (v & 1)
131 | break;
132 |
133 | v = v >> 1;
134 | }
135 |
136 | return (size_t(1) << i);
137 | }
138 |
139 | struct GenericAllocator {
140 | typedef void* TInstance;
141 |
142 | static TInstance Invalid();
143 | static bool IsValid(TInstance instance);
144 | static TInstance Create();
145 | static void Destroy(TInstance instance);
146 | static void* Alloc(TInstance instance, size_t bytesCount, size_t alignment);
147 | static void Free(TInstance instance, void* p);
148 | static void* Realloc(TInstance instance, void* p, size_t bytesCount, size_t alignment);
149 | static size_t GetUsableSpace(TInstance instance, void* p);
150 |
151 | struct Deleter {
152 | explicit Deleter(GenericAllocator::TInstance _instance) : instance(_instance) { }
153 |
154 | INLINE void operator()(uint8_t* p) {
155 | GenericAllocator::Free(instance, p);
156 | }
157 |
158 | GenericAllocator::TInstance instance;
159 | };
160 | };
161 |
162 | class Allocator {
163 | private:
164 |
165 | static const size_t MaxValidAlignment = 16384;
166 |
167 | friend struct internal::TlsPoolBucket;
168 |
169 | INLINE bool IsReadable(void* p) const {
170 | return (uintptr_t(p) > MaxValidAlignment);
171 | }
172 |
173 | struct PoolBucket {
174 | union TaggedIndex {
175 | struct {
176 | uint32_t tag;
177 | uint32_t offset;
178 | } p;
179 |
180 | uint64_t u;
181 |
182 | static const uint64_t Invalid = UINT64_MAX;
183 | };
184 |
185 | std::atomic head;
186 | std::atomic globalTag;
187 |
188 | uint8_t* pData;
189 | uint8_t* pBufferEnd;
190 |
191 | #ifdef SMMALLOC_STATS_SUPPORT
192 | AllocatorStats stats;
193 | #endif
194 |
195 | PoolBucket() : head(TaggedIndex::Invalid), globalTag(0), pData(nullptr), pBufferEnd(nullptr) { }
196 |
197 | void Create(size_t elementSize);
198 |
199 | INLINE void* Alloc() {
200 | uint8_t* p = nullptr;
201 |
202 | TaggedIndex headValue;
203 | headValue.u = head.load();
204 |
205 | while (true) {
206 | if (headValue.u == TaggedIndex::Invalid)
207 | return nullptr;
208 |
209 | p = (pData + headValue.p.offset);
210 | TaggedIndex nextValue = *((TaggedIndex*)(p));
211 |
212 | if (head.compare_exchange_strong(headValue.u, nextValue.u))
213 | break;
214 | }
215 |
216 | return p;
217 | }
218 |
219 | INLINE void FreeInterval(void* _pHead, void* _pTail) {
220 | uint8_t* pHead = (uint8_t*)_pHead;
221 | uint8_t* pTail = (uint8_t*)_pTail;
222 | uint32_t tag = globalTag.fetch_add(1, std::memory_order_relaxed);
223 |
224 | TaggedIndex nodeValue;
225 | nodeValue.p.offset = (uint32_t)(pHead - pData);
226 | nodeValue.p.tag = tag;
227 | TaggedIndex headValue;
228 | headValue.u = head.load();
229 |
230 | while (true) {
231 | *((TaggedIndex*)(pTail)) = headValue;
232 |
233 | if (head.compare_exchange_strong(headValue.u, nodeValue.u))
234 | break;
235 | }
236 | }
237 |
238 | INLINE bool IsMyAlloc(void* p) const {
239 | return (p >= pData && p < pBufferEnd);
240 | }
241 | };
242 |
243 | public:
244 |
245 | void CreateThreadCache(CacheWarmupOptions warmupOptions, size_t cacheSize);
246 | void DestroyThreadCache();
247 |
248 | private:
249 |
250 | size_t bucketsCount;
251 | size_t bucketSizeInBytes;
252 | uint8_t* pBufferEnd;
253 |
254 | std::array bucketsDataBegin;
255 | std::array buckets;
256 | std::unique_ptr pBuffer;
257 | GenericAllocator::TInstance gAllocator;
258 |
259 | #ifdef SMMALLOC_STATS_SUPPORT
260 | std::atomic globalMissCount;
261 | #endif
262 |
263 | INLINE void* AllocFromCache(internal::TlsPoolBucket* __restrict _self) const;
264 |
265 | template
266 | INLINE bool ReleaseToCache(internal::TlsPoolBucket* __restrict _self, void* _p);
267 |
268 | INLINE size_t FindBucket(const void* p) const {
269 | uintptr_t index = (uintptr_t)p - (uintptr_t)bucketsDataBegin[0];
270 | size_t r = (index / bucketSizeInBytes);
271 |
272 | return r;
273 | }
274 |
275 | INLINE PoolBucket* GetBucketByIndex(size_t bucketIndex) {
276 | if (bucketIndex >= bucketsCount)
277 | return nullptr;
278 |
279 | return &buckets[bucketIndex];
280 | }
281 |
282 | INLINE const PoolBucket* GetBucketByIndex(size_t bucketIndex) const {
283 | if (bucketIndex >= bucketsCount)
284 | return nullptr;
285 |
286 | return &buckets[bucketIndex];
287 | }
288 |
289 | template
290 | INLINE void* Allocate(size_t _bytesCount, size_t alignment) {
291 | SM_ASSERT(alignment <= MaxValidAlignment);
292 |
293 | if (SM_UNLIKELY(_bytesCount == 0))
294 | return (void*)alignment;
295 |
296 | size_t bytesCount = (_bytesCount < alignment) ? alignment : _bytesCount;
297 | size_t bucketIndex = ((bytesCount - 1) >> 4);
298 |
299 | if (bucketIndex < bucketsCount) {
300 | void* pRes = AllocFromCache(GetTlsBucket(bucketIndex));
301 |
302 | if (pRes) {
303 | #ifdef SMMALLOC_STATS_SUPPORT
304 | if (enableStatistic)
305 | buckets[bucketIndex].stats.cacheHitCount.fetch_add(1, std::memory_order_relaxed);
306 | #endif
307 |
308 | return pRes;
309 | }
310 | }
311 |
312 | while (bucketIndex < bucketsCount) {
313 | void* pRes = buckets[bucketIndex].Alloc();
314 |
315 | if (pRes) {
316 | #ifdef SMMALLOC_STATS_SUPPORT
317 | if (enableStatistic)
318 | buckets[bucketIndex].stats.hitCount.fetch_add(1, std::memory_order_relaxed);
319 | #endif
320 |
321 | return pRes;
322 | } else {
323 | #ifdef SMMALLOC_STATS_SUPPORT
324 | if (enableStatistic)
325 | buckets[bucketIndex].stats.missCount.fetch_add(1, std::memory_order_relaxed);
326 | #endif
327 | }
328 |
329 | bucketIndex++;
330 | }
331 |
332 | #ifdef SMMALLOC_STATS_SUPPORT
333 | if (enableStatistic)
334 | globalMissCount.fetch_add(1, std::memory_order_relaxed);
335 | #endif
336 |
337 | return GenericAllocator::Alloc(gAllocator, _bytesCount, alignment);
338 | }
339 |
340 | public:
341 |
342 | Allocator(GenericAllocator::TInstance allocator);
343 |
344 | void Init(uint32_t bucketsCount, size_t bucketSizeInBytes);
345 |
346 | INLINE void* Alloc(size_t _bytesCount, size_t alignment) {
347 | return Allocate(_bytesCount, alignment);
348 | }
349 |
350 | INLINE void Free(void* p) {
351 | if (SM_UNLIKELY(!IsReadable(p)))
352 | return;
353 |
354 | size_t bucketIndex = FindBucket(p);
355 |
356 | if (bucketIndex < bucketsCount) {
357 | #ifdef SMMALLOC_STATS_SUPPORT
358 | buckets[bucketIndex].stats.freeCount.fetch_add(1, std::memory_order_relaxed);
359 | #endif
360 |
361 | if (ReleaseToCache(GetTlsBucket(bucketIndex), p))
362 | return;
363 |
364 | PoolBucket* bucket = &buckets[bucketIndex];
365 | bucket->FreeInterval(p, p);
366 |
367 | return;
368 | }
369 |
370 | GenericAllocator::Free(gAllocator, (uint8_t*)p);
371 | }
372 |
373 | INLINE void* Realloc(void* p, size_t bytesCount, size_t alignment) {
374 | if (p == nullptr)
375 | return Alloc(bytesCount, alignment);
376 |
377 | size_t bucketIndex = FindBucket(p);
378 |
379 | if (bucketIndex < bucketsCount) {
380 | size_t elementSize = GetBucketElementSize(bucketIndex);
381 |
382 | if (bytesCount <= elementSize) {
383 | Free(p);
384 |
385 | return p;
386 | }
387 |
388 | void* p2 = Alloc(bytesCount, alignment);
389 |
390 | if (IsReadable(p))
391 | std::memmove(p2, p, elementSize);
392 |
393 | Free(p);
394 |
395 | return p2;
396 | }
397 |
398 | if (bytesCount == 0) {
399 | if (IsReadable(p))
400 | GenericAllocator::Free(gAllocator, p);
401 |
402 | return (void*)alignment;
403 | }
404 |
405 | if (!IsReadable(p))
406 | return GenericAllocator::Alloc(gAllocator, bytesCount, alignment);
407 |
408 | return GenericAllocator::Realloc(gAllocator, p, bytesCount, alignment);
409 | }
410 |
411 | INLINE size_t GetUsableSize(void* p) {
412 | if (!IsReadable(p))
413 | return 0;
414 |
415 | size_t bucketIndex = FindBucket(p);
416 |
417 | if (bucketIndex < bucketsCount) {
418 | size_t elementSize = GetBucketElementSize(bucketIndex);
419 |
420 | return elementSize;
421 | }
422 |
423 | return GenericAllocator::GetUsableSpace(gAllocator, p);
424 | }
425 |
426 | INLINE int32_t GetBucketIndex(void* _p) {
427 | if (!IsMyAlloc(_p))
428 | return -1;
429 |
430 | size_t bucketIndex = FindBucket(_p);
431 |
432 | if (bucketIndex >= bucketsCount)
433 | return -1;
434 |
435 | return (int32_t)bucketIndex;
436 | }
437 |
438 | INLINE bool IsMyAlloc(const void* p) const {
439 | return (p >= pBuffer.get() && p < pBufferEnd);
440 | }
441 |
442 | INLINE size_t GetBucketsCount() const {
443 | return bucketsCount;
444 | }
445 |
446 | INLINE uint32_t GetBucketElementSize(size_t bucketIndex) const {
447 | return (uint32_t)((bucketIndex + 1) * 16);
448 | }
449 |
450 | INLINE uint32_t GetBucketElementsCount(size_t bucketIndex) const {
451 | if (bucketIndex >= bucketsCount)
452 | return 0;
453 |
454 | size_t oneElementSize = GetBucketElementSize(bucketIndex);
455 |
456 | return (uint32_t)(bucketSizeInBytes / oneElementSize);
457 | }
458 |
459 | #ifdef SMMALLOC_STATS_SUPPORT
460 | size_t GetGlobalMissCount() const {
461 | return globalMissCount.load(std::memory_order_relaxed);
462 | }
463 |
464 | const AllocatorStats* GetBucketStats(size_t bucketIndex) const {
465 | const PoolBucket* bucket = GetBucketByIndex(bucketIndex);
466 |
467 | if (!bucket)
468 | return nullptr;
469 |
470 | return &bucket->stats;
471 | }
472 | #endif
473 |
474 | GenericAllocator::TInstance GetGenericAllocatorInstance() {
475 | return gAllocator;
476 | }
477 | };
478 |
479 | namespace internal {
480 | struct TlsPoolBucket {
481 | uint8_t* pBucketData;
482 | uint32_t* pStorageL1;
483 |
484 | Allocator::PoolBucket* pBucket;
485 | std::array storageL0;
486 |
487 | uint32_t maxElementsCount;
488 | uint32_t numElementsL1;
489 | uint8_t numElementsL0;
490 |
491 | INLINE uint32_t GetElementsCount() const {
492 | return numElementsL1 + numElementsL0;
493 | }
494 |
495 | void Init(uint32_t* pCacheStack, uint32_t maxElementsNum, CacheWarmupOptions warmupOptions, Allocator* alloc, size_t bucketIndex);
496 | uint32_t* Destroy();
497 |
498 | INLINE void ReturnL1CacheToMaster(uint32_t count) {
499 | if (count == 0)
500 | return;
501 |
502 | SM_ASSERT(pBucket != nullptr);
503 |
504 | if (numElementsL1 == 0)
505 | return;
506 |
507 | count = std::min(count, numElementsL1);
508 |
509 | uint32_t localTag = 0xFFFFFF;
510 | uint32_t firstElementToReturn = (numElementsL1 - count);
511 | uint32_t offset = pStorageL1[firstElementToReturn];
512 | uint8_t* pHead = pBucketData + offset;
513 | uint8_t* pPrevBlockMemory = pHead;
514 |
515 | for (uint32_t i = (firstElementToReturn + 1); i < numElementsL1; i++, localTag++) {
516 | offset = pStorageL1[i];
517 | Allocator::PoolBucket::TaggedIndex* pTag = (Allocator::PoolBucket::TaggedIndex*)pPrevBlockMemory;
518 | pTag->p.tag = localTag;
519 | pTag->p.offset = offset;
520 |
521 | uint8_t* pBlockMemory = pBucketData + offset;
522 |
523 | pPrevBlockMemory = pBlockMemory;
524 | }
525 |
526 | uint8_t* pTail = pPrevBlockMemory;
527 |
528 | pBucket->FreeInterval(pHead, pTail);
529 | numElementsL1 -= count;
530 | }
531 | };
532 |
533 | static_assert(std::is_pod::value == true, "TlsPoolBucket must be POD type, stored in TLS");
534 | static_assert(sizeof(TlsPoolBucket) <= 64, "TlsPoolBucket sizeof must be less than CPU cache line");
535 | }
536 |
537 | INLINE void* Allocator::AllocFromCache(internal::TlsPoolBucket* __restrict _self) const {
538 | if (_self->numElementsL0 > 0) {
539 | SM_ASSERT(_self->pBucketData != nullptr);
540 |
541 | _self->numElementsL0--;
542 |
543 | uint32_t offset = _self->storageL0[_self->numElementsL0];
544 |
545 | return _self->pBucketData + offset;
546 | }
547 |
548 | if (_self->numElementsL1 > 0) {
549 | SM_ASSERT(_self->pBucketData != nullptr);
550 | SM_ASSERT(_self->numElementsL0 == 0);
551 |
552 | _self->numElementsL1--;
553 |
554 | uint32_t offset = _self->pStorageL1[_self->numElementsL1];
555 |
556 | return _self->pBucketData + offset;
557 | }
558 |
559 | return nullptr;
560 | }
561 |
562 | template
563 | INLINE bool Allocator::ReleaseToCache(internal::TlsPoolBucket* __restrict _self, void* _p) {
564 | if (_self->maxElementsCount == 0)
565 | return false;
566 |
567 | SM_ASSERT(_self->pBucket != nullptr);
568 | SM_ASSERT(_self->pBucketData != nullptr);
569 |
570 | uint8_t* p = (uint8_t*)_p;
571 |
572 | SM_ASSERT(p >= _self->pBucketData && p < _self->pBucket->pBufferEnd);
573 |
574 | uint32_t offset = (uint32_t)(p - _self->pBucketData);
575 |
576 | if (useCacheL0) {
577 | if (_self->numElementsL0 < SMM_MAX_CACHE_ITEMS_COUNT) {
578 | _self->storageL0[_self->numElementsL0] = offset;
579 | _self->numElementsL0++;
580 |
581 | return true;
582 | }
583 | }
584 |
585 | if (_self->numElementsL1 < _self->maxElementsCount) {
586 | _self->pStorageL1[_self->numElementsL1] = offset;
587 | _self->numElementsL1++;
588 |
589 | return true;
590 | }
591 |
592 | uint32_t halfOfElements = (_self->numElementsL1 >> 1);
593 |
594 | _self->ReturnL1CacheToMaster(halfOfElements);
595 | _self->pStorageL1[_self->numElementsL1] = offset;
596 | _self->numElementsL1++;
597 |
598 | return true;
599 | }
600 | }
601 |
602 | #define SMMALLOC_CSTYLE_FUNCS
603 |
604 | #ifdef SMMALLOC_CSTYLE_FUNCS
605 | #define SMMALLOC_DLL
606 |
607 | #if defined(_WIN32) && defined(SMMALLOC_DLL)
608 | #define SMMALLOC_API __declspec(dllexport)
609 | #else
610 | #define SMMALLOC_API extern
611 | #endif
612 |
613 | #ifdef __cplusplus
614 | extern "C" {
615 | #endif
616 |
617 | typedef sm::Allocator* sm_allocator;
618 |
619 | SMMALLOC_API INLINE sm_allocator sm_allocator_create(uint32_t bucketsCount, size_t bucketSizeInBytes) {
620 | sm::GenericAllocator::TInstance instance = sm::GenericAllocator::Create();
621 |
622 | if (!sm::GenericAllocator::IsValid(instance))
623 | return nullptr;
624 |
625 | size_t align = __alignof(sm::Allocator);
626 |
627 | align = sm::Align(align, SMM_CACHE_LINE_SIZE);
628 |
629 | void* pBuffer = sm::GenericAllocator::Alloc(instance, sizeof(sm::Allocator), align);
630 |
631 | sm::Allocator* allocator = new(pBuffer) sm::Allocator(instance);
632 | allocator->Init(bucketsCount, bucketSizeInBytes);
633 |
634 | return allocator;
635 | }
636 |
637 | SMMALLOC_API INLINE void sm_allocator_destroy(sm_allocator allocator) {
638 | if (allocator == nullptr)
639 | return;
640 |
641 | sm::GenericAllocator::TInstance instance = allocator->GetGenericAllocatorInstance();
642 | allocator->~Allocator();
643 |
644 | sm::GenericAllocator::Free(instance, allocator);
645 | sm::GenericAllocator::Destroy(instance);
646 | }
647 |
648 | SMMALLOC_API INLINE void sm_allocator_thread_cache_create(sm_allocator allocator, sm::CacheWarmupOptions warmupOptions, size_t cacheSize) {
649 | if (allocator == nullptr)
650 | return;
651 |
652 | allocator->CreateThreadCache(warmupOptions, cacheSize);
653 | }
654 |
655 | SMMALLOC_API INLINE void sm_allocator_thread_cache_destroy(sm_allocator allocator) {
656 | if (allocator == nullptr)
657 | return;
658 |
659 | allocator->DestroyThreadCache();
660 | }
661 |
662 | SMMALLOC_API INLINE void* sm_malloc(sm_allocator allocator, size_t bytesCount, size_t alignment) {
663 | return allocator->Alloc(bytesCount, alignment);
664 | }
665 |
666 | SMMALLOC_API INLINE void sm_free(sm_allocator allocator, void* p) {
667 | allocator->Free(p);
668 | }
669 |
670 | SMMALLOC_API INLINE void sm_free_batch(sm_allocator allocator, void** batch, size_t length) {
671 | void* p;
672 | size_t i;
673 |
674 | for (i = 0; i < length; i++) {
675 | p = batch[i];
676 |
677 | if (p == nullptr)
678 | continue;
679 |
680 | allocator->Free(p);
681 | }
682 | }
683 |
684 | SMMALLOC_API INLINE void* sm_realloc(sm_allocator allocator, void* p, size_t bytesCount, size_t alignment) {
685 | return allocator->Realloc(p, bytesCount, alignment);
686 | }
687 |
688 | SMMALLOC_API INLINE size_t sm_msize(sm_allocator allocator, void* p) {
689 | return allocator->GetUsableSize(p);
690 | }
691 |
692 | SMMALLOC_API INLINE int32_t sm_mbucket(sm_allocator allocator, void* p) {
693 | return allocator->GetBucketIndex(p);
694 | }
695 |
696 | #ifdef __cplusplus
697 | }
698 | #endif
699 | #endif
--------------------------------------------------------------------------------