├── .dir-locals.el
├── .gitignore
├── .gitmodules
├── LICENSE.md
├── README.md
├── features
    ├── aimbot.cpp
    ├── aimbot.h
    └── aimbot_types.h
├── g_defines.h
├── interfaces
    └── tracing.h
├── math
    ├── clang_avx512.h
    ├── matrix.h
    ├── mm_funcs.h
    ├── mmath.h
    ├── soa_accessor.h
    ├── vec_funcs.h
    ├── vecsoa_funcs.h
    ├── vector.h
    └── vector_operators.h
├── meson.build
├── meson_options.txt
├── players.h
├── tests
    ├── allocator.cpp
    ├── crc.cpp
    ├── intersect.cpp
    ├── kd_tree.cpp
    ├── mutex.cpp
    ├── settings.cpp
    ├── shared_mutex.cpp
    ├── shmemalloc.cpp
    ├── thread_pool.cpp
    └── vector.cpp
├── utils
    ├── allocwraps.h
    ├── atomic_lock.cpp
    ├── atomic_lock.h
    ├── crc32.h
    ├── freelistallocator.h
    ├── handles.cpp
    ├── handles.h
    ├── history_list.h
    ├── intersect.cpp
    ├── intersect.h
    ├── intersect_box.cpp
    ├── intersect_box.h
    ├── intersect_box_impl.h
    ├── intersect_impl.h
    ├── kd_tree.h
    ├── md5.cpp
    ├── md5.h
    ├── memutils.h
    ├── mutex.cpp
    ├── mutex.h
    ├── named_semaphores.cpp
    ├── named_semaphores.h
    ├── packed_heap.cpp
    ├── packed_heap.h
    ├── pattern_scan.cpp
    ├── pattern_scan.h
    ├── rstring.h
    ├── scheduler.h
    ├── semaphores.cpp
    ├── semaphores.h
    ├── settings.h
    ├── shared_mutex.cpp
    ├── shared_mutex.h
    ├── shared_utils.h
    ├── stackstring.h
    ├── threading.cpp
    ├── threading.h
    ├── utils.h
    ├── vfhook.cpp
    └── vfhook.h
├── wincludes.h
└── windows_meson.txt


/.dir-locals.el:
--------------------------------------------------------------------------------
1 | ((nil . ((indent-tabs-mode . t))))
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.exe
3 | *.out
4 | *.pdb
5 | *.gch
6 | build


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "submodules/minitrace"]
2 | 	path = submodules/minitrace
3 | 	url = https://github.com/hrydgard/minitrace.git
4 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2018 A. B. (Heep042)
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # A modular video game modding library
 2 | 
 3 | This is a set of commonly used utilities in video game modifications, such as a pattern scanner as well as a high performance vector math library. Some game independant features, such as aimbot are also in this library (does not support physics bullets yet).
 4 | 
 5 | This library is intended to be used as the core module of any project project, wrapped in a layer of game engine specific functions and data structures, then wrapped in a game specific implementation (which provides initialization, shutdown, data preparation and execution of features). This way the least code is duplicated, and the most of it is reused. The library was primarilly focused around internal hacks, with direct access to pointer dereferencing, but some tools can be used externally (provided correct memory access functions are implemented by other layers).
 6 | 
 7 | Another great focus of the library was efficient data layout (data oriented design), hence SoA style vector structs, SoA style player data structure, etc. This allows for good cache performance and more efficient vectorization (not having to loose 4th value in a SSE register when dealing with 3D vectors and being able to scale up even to AVX512). Out of these 3 compilers tested: LLVM (together with Apple's LLVM), GCC and MSVC, LLVM seems to do the best job at auto vectorization and is naturally the recommended compiler to be used.
 8 | 
 9 | ##### TODO
10 | - Make the HistoryList structure use standard naming
11 | 


--------------------------------------------------------------------------------
/features/aimbot.cpp:
--------------------------------------------------------------------------------
  1 | #include "aimbot.h"
  2 | #include "../interfaces/tracing.h"
  3 | #ifdef AIMBOT_THREADING
  4 | #include "../utils/threading.h"
  5 | auto& numThreads = Threading::numThreads;
  6 | constexpr int threadQueueMultiplier = 2;
  7 | #else
  8 | constexpr int numThreads = 1;
  9 | constexpr int threadQueueMultiplier = 1;
 10 | #endif
 11 | 
 12 | vec3_t Aimbot::shootAngles;
 13 | float* pointScaleVal = nullptr;
 14 | 
 15 | #ifdef AIMBOT_THREADING
 16 | static Semaphore threadSem;
 17 | #endif
 18 | 
 19 | AimbotLoopData data[NUM_THREADS * 2];
 20 | 
 21 | bool doMultipoint = true;
 22 | 
 23 | static int ProcessAimPointsSIMD(AimbotLoopData* d)
 24 | {
 25 | 	int ret = -1;
 26 | 
 27 | #ifdef AIMBOT_SIMD_TRACE_DATA
 28 | 	Tracing::TracePointListSIMD<MULTIPOINT_COUNT>(d->localPlayer, d->players, d->hitboxIDsSOA.size(), d->traceEndSOA.data(), d->entID, d->traceOutputsSOA.data(), 1);
 29 | 
 30 | 	for (size_t i = 0; i < d->hitboxIDsSOA.size(); i++) {
 31 | 		bool quit = false;
 32 | 
 33 | 		for (size_t o = 0; o < MULTIPOINT_COUNT; o++)
 34 | 			if (Aimbot::CompareData(d, d->traceOutputsSOA[i * MULTIPOINT_COUNT + o], (vec3_t)d->traceEndSOA[i].acc[o], d->hitboxIDsSOA[i], d->fovListSOA[i * MULTIPOINT_COUNT + o]))
 35 | 				quit = true;
 36 | 
 37 | 		//TODO: add an option to return early
 38 | 		if (quit)
 39 | 			ret = d->entID;
 40 | 	}
 41 | 
 42 | #endif
 43 | 	return ret;
 44 | }
 45 | 
 46 | static int ProcessAimPoints(AimbotLoopData* d)
 47 | {
 48 | 	Tracing::TracePointList(d->localPlayer, d->players, d->hitboxIDs.size(), d->traceEnd.data(), d->entID, d->traceOutputs.data(), 1);
 49 | 
 50 | 	int ret = -1;
 51 | 
 52 | 	for (size_t i = 0; i < d->hitboxIDs.size(); i++)
 53 | 		if (Aimbot::CompareData(d, d->traceOutputs[i], d->traceEnd[i], d->hitboxIDs[i], d->fovList[i]))
 54 | 			ret = d->entID;
 55 | 
 56 | 	return ret;
 57 | }
 58 | 
 59 | static int PrepareHitboxList(AimbotLoopData* d, size_t id)
 60 | {
 61 | 	d->fovs[id] = 1000.f;
 62 | 
 63 | 	d->entID = id;
 64 | 
 65 | 	d->traceEnd.clear();
 66 | 	d->hitboxIDs.clear();
 67 | 	d->fovList.clear();
 68 | 	d->traceOutputs.clear();
 69 | 
 70 | #ifdef AIMBOT_SIMD_TRACE_DATA
 71 | 	d->traceEndSOA.clear();
 72 | 	d->hitboxIDsSOA.clear();
 73 | 	d->fovListSOA.clear();
 74 | 	d->traceOutputsSOA.clear();
 75 | #endif
 76 | 
 77 | 	HitboxList& hitboxes = d->players->hitboxes[id];
 78 | 
 79 | 	for (size_t i = 0; i < MAX_HITBOXES; i++) {
 80 | 
 81 | 		if (!d->hitboxList[i])
 82 | 			continue;
 83 | 
 84 | 		float fov = 0.f;
 85 | 
 86 | 		vec3_t average = (hitboxes.start[i] + hitboxes.end[i]) * 0.5f;
 87 | 		average = hitboxes.wm[i].Vector3Transform(average);
 88 | 
 89 | 		if (!Aimbot::PreCompareData(&d->target, d->localPlayer, average, i, &fov))
 90 | 			continue;
 91 | 
 92 | 		if (d->hitboxList[i] & HitboxScanMode_t::SCAN_MULTIPOINT) {
 93 | 			mvec3 mpVec = d->players->hitboxes[id].mpOffset[i] + d->players->hitboxes[id].mpDir[i] * d->players->hitboxes[id].radius[i] * pointScaleVal[i];
 94 | 			mpVec = d->players->hitboxes[id].wm[i].VecSoaTransform(mpVec);
 95 | 
 96 | #ifdef AIMBOT_SIMD_TRACE_DATA
 97 | 			d->traceEndSOA.push_back(mpVec);
 98 | 			d->hitboxIDsSOA.push_back(i);
 99 | 			//TODO: Convert this to more vectorizable way
100 | 			for (int o = 0; o < MULTIPOINT_COUNT; o++) {
101 | 				vec3_t angle = ((vec3_t)mpVec.acc[o] - d->localPlayer->eyePos).GetAngles(true);
102 | 				vec3_t angleDiff = (Aimbot::shootAngles - angle).NormalizeAngles<2>(-180.f, 180.f);
103 | 				float fovSOA = angleDiff.Length<2>();
104 | 				d->fovListSOA.push_back(fovSOA);
105 | 			}
106 | 			d->traceOutputsSOA.resize(d->traceOutputsSOA.size() + MULTIPOINT_COUNT);
107 | #else
108 | 			auto rvec = mpVec.Rotate();
109 | 
110 | 			//TODO: Convert this to more vectorizable way
111 | 			for (int o = 0; o < MULTIPOINT_COUNT; o++) {
112 | 				d->traceEnd.push_back(rvec.GetColAsVecp(o));
113 | 				d->hitboxIDs.push_back(i);
114 | 				vec3_t angle = ((vec3_t)rvec.GetColAsVecp(o) - d->localPlayer->eyePos).GetAngles(true);
115 | 				vec3_t angleDiff = (Aimbot::shootAngles - angle).NormalizeAngles<2>(-180.f, 180.f);
116 | 				float fov = angleDiff.Length<2>();
117 | 				d->fovList.push_back(fov);
118 | 				d->traceOutputs.push_back(0);
119 | 			}
120 | 
121 | #endif
122 | 		} else {
123 | 			d->traceEnd.push_back(average);
124 | 			d->hitboxIDs.push_back(i);
125 | 			d->fovList.push_back(fov);
126 | 			d->traceOutputs.push_back(0);
127 | 		}
128 | 	}
129 | 
130 | 	return 0;
131 | }
132 | 
133 | static int LoopPlayers(AimbotLoopData* d)
134 | {
135 | 	int ret = 0;
136 | 
137 | 	for (int i = 0; i < d->players->count; i++) {
138 | 		if (~d->ignoreList[d->players->unsortIDs[i] / 64] & (1ull << (d->players->unsortIDs[i] % 64)) && d->players->flags[i] & Flags::HITBOXES_UPDATED && ~d->players->flags[i] & Flags::FRIENDLY &&
139 | 			//The following check is just a rough way to clear the unrelated players from view. A better check would be to intersect AABB with previous target to see if they overlap. If they do not, then simply quit the loop since the players should be sorted by FOV
140 | 			d->players->fov[i] - 30.f < d->target.fov) {
141 | 			PrepareHitboxList(d, i);
142 | 
143 | 			int ap = ProcessAimPoints(d);
144 | 			int aps = ProcessAimPointsSIMD(d);
145 | 
146 | 			if (ap != -1) {
147 | 				d->target.id = ap;
148 | 				ret = 1;
149 | 			}
150 | 
151 | 			if (aps != -1) {
152 | 				d->target.id = aps;
153 | 				ret = 1;
154 | 			}
155 | 		}
156 | 	}
157 | 
158 | #ifdef AIMBOT_THREADING
159 | 	threadSem.Post();
160 | #endif
161 | 
162 | 	return ret;
163 | }
164 | 
165 | static void FindBestTarget(AimbotTarget* target, HistoryList<Players, BACKTRACK_TICKS>* track, HistoryList<Players, BACKTRACK_TICKS>* futureTrack, LocalPlayer* localPlayer, unsigned char hitboxList[MAX_HITBOXES], uint64_t ignoreList[NumOf<64>(MAX_PLAYERS)])
166 | {
167 | 
168 | 	char backtrackMask[MAX_PLAYERS];
169 | 	float lowestFov = 1000.f;
170 | 	Players* prevPlayers = nullptr;
171 | 	Players* targetPlayers = nullptr;
172 | 
173 | 	memset(backtrackMask, 0, MAX_PLAYERS);
174 | 
175 | 	//First check the future, but this will be overwritten by the normal track if any of the ticks are valid
176 | 	if (futureTrack) {
177 | 		for (size_t i = 0; i < futureTrack->Count(); i += numThreads * threadQueueMultiplier) {
178 | 
179 | #ifdef AIMBOT_THREADING
180 | 			int pushedCount = 0;
181 | #endif
182 | 
183 | 			for (int o = 0; o < (int)numThreads * threadQueueMultiplier && i + o < futureTrack->Count(); o++) {
184 | 				Players& players = futureTrack->GetLastItem(i + o);
185 | 				AimbotLoopData* d = data + o;
186 | 
187 | 				d->target.fov = 9000;
188 | 
189 | 				//We do not want to just exit out the loop if we predicted too far into future
190 | 				if (!Tracing::BacktrackPlayers(&players, prevPlayers, backtrackMask))
191 | 					continue;
192 | 
193 | 				d->target = *target;
194 | 
195 | 				d->players = &players;
196 | 				d->localPlayer = localPlayer;
197 | 				d->hitboxList = hitboxList;
198 | 				d->ignoreList = ignoreList;
199 | 
200 | #ifdef AIMBOT_THREADING
201 | 				Threading::QueueJobRef(LoopPlayers, d);
202 | 				pushedCount++;
203 | #else
204 | 				LoopPlayers(d);
205 | #endif
206 | 
207 | 				prevPlayers = &players;
208 | 			}
209 | 
210 | #ifdef AIMBOT_THREADING
211 | 			for (int i = 0; i < pushedCount; i++)
212 | 				threadSem.Wait();
213 | #endif
214 | 
215 | 			for (int o = 0; o < (int)numThreads * threadQueueMultiplier && i + o < futureTrack->Count(); o++) {
216 | 				Players& players = futureTrack->GetLastItem(i + o);
217 | 				AimbotLoopData* d = data + o;
218 | 
219 | 				if (d->target.id >= 0 && d->target.fov < lowestFov) {
220 | 					lowestFov = d->target.fov;
221 | 					d->target.backTick = i + o;
222 | 					d->target.future = true;
223 | 					*target = d->target;
224 | 					targetPlayers = &players;
225 | 				}
226 | 			}
227 | 		}
228 | 	}
229 | 
230 | 	bool b = false;
231 | 
232 | 	for (size_t i = 0; i < track->Count() && !b; i += numThreads * threadQueueMultiplier) {
233 | 		int o = 0;
234 | 
235 | #ifdef AIMBOT_THREADING
236 | 		int pushedCount = 0;
237 | #endif
238 | 
239 | 		for (o = 0; o < (int)numThreads * threadQueueMultiplier && i + o < track->Count(); o++) {
240 | 			Players& players = track->GetLastItem(i + o);
241 | 			AimbotLoopData* d = data + o;
242 | 
243 | 			d->target.fov = 9000;
244 | 
245 | 			if (!Tracing::BacktrackPlayers(&players, prevPlayers, backtrackMask)) {
246 | 				b = true;
247 | 				break;
248 | 			}
249 | 
250 | 			d->target = *target;
251 | 
252 | 			d->players = &players;
253 | 			d->localPlayer = localPlayer;
254 | 			d->hitboxList = hitboxList;
255 | 			d->ignoreList = ignoreList;
256 | 
257 | #ifdef AIMBOT_THREADING
258 | 			Threading::QueueJobRef(LoopPlayers, d);
259 | 			pushedCount++;
260 | #else
261 | 			LoopPlayers(d);
262 | #endif
263 | 
264 | 			prevPlayers = &players;
265 | 		}
266 | 
267 | #ifdef AIMBOT_THREADING
268 | 		for (int i = 0; i < pushedCount; i++)
269 | 			threadSem.Wait();
270 | #endif
271 | 
272 | 		for (int u = 0; u < o; u++) {
273 | 			Players& players = track->GetLastItem(i + u);
274 | 			AimbotLoopData* d = data + u;
275 | 
276 | 			if (d->target.id >= 0 && (d->target.fov < lowestFov || !Tracing::VerifyTarget(targetPlayers, target->id, backtrackMask))) {
277 | 				lowestFov = d->target.fov;
278 | 				d->target.backTick = i + u;
279 | 				d->target.future = false;
280 | 				*target = d->target;
281 | 				targetPlayers = &players;
282 | 			}
283 | 		}
284 | 	}
285 | }
286 | 
287 | AimbotTarget Aimbot::RunAimbot(HistoryList<Players, BACKTRACK_TICKS>* track, HistoryList<Players, BACKTRACK_TICKS>* futureTrack, LocalPlayer* localPlayer, unsigned char hitboxList[MAX_HITBOXES], uint64_t ignoreList[NumOf<64>(MAX_PLAYERS)], float pointScale[MAX_HITBOXES])
288 | {
289 | 	AimbotTarget target;
290 | 	shootAngles = localPlayer->angles + localPlayer->aimOffset;
291 | 	pointScaleVal = pointScale;
292 | 
293 | 	FindBestTarget(&target, track, futureTrack, localPlayer, hitboxList, ignoreList);
294 | 
295 | 	return target;
296 | }
297 | 


--------------------------------------------------------------------------------
/features/aimbot.h:
--------------------------------------------------------------------------------
 1 | #ifndef AIMBOT_H
 2 | #define AIMBOT_H
 3 | 
 4 | #include "../players.h"
 5 | #include "../utils/history_list.h"
 6 | 
 7 | #include "aimbot_types.h"
 8 | 
 9 | namespace Aimbot
10 | {
11 | 	extern vec3_t shootAngles;
12 | 
13 | 	AimbotTarget RunAimbot(HistoryList<Players, BACKTRACK_TICKS>* track, HistoryList<Players, BACKTRACK_TICKS>* futureTrack, LocalPlayer* localPlayer, unsigned char hitboxList[MAX_HITBOXES], uint64_t ignoreList[NumOf<64>(MAX_PLAYERS)], float pointScale[MAX_PLAYERS]);
14 | 
15 | 	//These need to be implemented manually
16 | 	bool PreCompareData(AimbotTarget* target, LocalPlayer* localPlayer, vec3_t targetVec, int bone, float* outFOV);
17 | 	bool CompareData(AimbotLoopData* d, int out, vec3_t targetVec, int bone, float fov);
18 | }
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/features/aimbot_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef AIMBOT_TYPES_H
 2 | #define AIMBOT_TYPES_H
 3 | 
 4 | struct LocalPlayer;
 5 | struct Players;
 6 | 
 7 | struct AimbotTarget
 8 | {
 9 | 	vec3_t targetVec;
10 | 	int id = -1;
11 | 	int backTick = 0;
12 | 	int boneID = 0;
13 | 	float fov = 420.f;
14 | 	int dmg = 0;
15 | 	bool future = false;
16 | };
17 | 
18 | enum HitboxScanMode_t : unsigned char
19 | {
20 | 	SCAN_NONE = 0,
21 | 	SCAN_SIMPLE = 1,
22 | 	SCAN_MULTIPOINT = 2
23 | };
24 | 
25 | struct AimbotLoopData {
26 | 	AimbotTarget target;
27 | 	LocalPlayer* localPlayer;
28 | 	Players* players;
29 | 
30 | 	unsigned char* hitboxList;
31 | 	uint64_t* ignoreList;
32 | 
33 | 	float fovs[MAX_PLAYERS];
34 | 
35 | 	int entID;
36 | 
37 | 	std::vector<vec3_t> traceEnd;
38 | 	std::vector<int> hitboxIDs;
39 | 	std::vector<float> fovList;
40 | 	std::vector<int> traceOutputs;
41 | 
42 | #ifdef AIMBOT_SIMD_TRACE_DATA
43 | 	std::vector<mvec3> traceEndSOA;
44 | 	std::vector<int> hitboxIDsSOA;
45 | 	std::vector<float> fovListSOA;
46 | 	std::vector<int> traceOutputsSOA;
47 | #endif
48 | };
49 | 
50 | #endif
51 | 


--------------------------------------------------------------------------------
/g_defines.h:
--------------------------------------------------------------------------------
 1 | #ifndef G_DEFINES_H
 2 | #define G_DEFINES_H
 3 | 
 4 | template<typename T>
 5 | inline constexpr T x64x32(T x64, T x32)
 6 | {
 7 | 	if (sizeof(void*) == 0x8)
 8 | 		return x64;
 9 | 	return x32;
10 | }
11 | 
12 | #define COMMA ,
13 | 
14 | #if defined(_WIN32)
15 | #define SECTION(sec) __declspec(allocate(sec))
16 | #define WSECTION(sec) SECTION(sec)
17 | #include "wincludes.h"
18 | #include <string.h>
19 | //#define CLZ(x) //__lzcnt(x)
20 | #define CLZ(x) __builtin_clz(x)
21 | #define OLin(Linux)
22 | #define OWin(Windows) Windows
23 | #define PosixWin(Posix, Windows) Windows
24 | #define LWM(Linux, Windows, Mac) Windows
25 | #define OMac(Mac)
26 | #define OPosix(Posix)
27 | #define paddr(handle, name) GetProcAddress(handle, name)
28 | #define FASTARGS [[maybe_unused]] void* thisptr, [[maybe_unused]] void* edx
29 | #define CFASTARGS thisptr, edx
30 | #define STDARGS
31 | #define THISARGS [[maybe_unused]] void* thisptr
32 | #define LC
33 | #define PC
34 | #define WC COMMA
35 | #define _noinline __declspec(noinline)
36 | #elif defined(__linux__)
37 | #define __posix__
38 | #define SECTION(sec) __attribute__((section(sec)))
39 | #define WSECTION(sec)
40 | #define CLZ(x) __builtin_clz(x)
41 | #define CTZ(x) __builtin_ctz(x)
42 | #define OLin(Linux) Linux
43 | #define OWin(Windows)
44 | #define OMac(Mac)
45 | #define OPosix(Posix) Posix
46 | #define PosixWin(Posix, Windows) Posix
47 | #define LWM(Linux, Windows, Mac) Linux
48 | #define paddr(handle, name) dlsym(handle, name)
49 | #define FASTARGS [[maybe_unused]] void* thisptr
50 | #define CFASTARGS thisptr
51 | #define STDARGS [[maybe_unused]] void* thisptr
52 | #define THISARGS [[maybe_unused]] void* thisptr
53 | #define LC COMMA
54 | #define PC COMMA
55 | #define WC
56 | #define _ReturnAddress() __builtin_return_address(0)
57 | #define _noinline __attribute__((noinline))
58 | #else
59 | #define __posix__
60 | #define SECTION(sec) __attribute__((section(sec)))
61 | #define WSECTION(sec)
62 | #define CLZ(x) __builtin_clz(x)
63 | #define CTZ(x) __builtin_ctz(x)
64 | #define OLin(Linux)
65 | #define OWin(Windows)
66 | #define OMac(Mac) Mac
67 | #define OPosix(Posix) Posix
68 | #define PosixWin(Posix, Windows) Posix
69 | #define LWM(Linux, Windows, Mac) Mac
70 | #define paddr(handle, name) dlsym(handle, name)
71 | #define FASTARGS [[maybe_unused]] void* thisptr
72 | #define CFASTARGS thisptr
73 | #define STDARGS [[maybe_unused]] void* thisptr
74 | #define THISARGS [[maybe_unused]] void* thisptr
75 | #define LC COMMA
76 | #define PC COMMA
77 | #define WC
78 | #define _ReturnAddress() __builtin_return_address(0)
79 | #define _noinline __attribute__((noinline))
80 | #endif
81 | 
82 | #ifdef __posix__
83 | #define __thiscall
84 | #define __fastcall
85 | #define __stdcall
86 | #define __cdecl
87 | #define __declspec (a)
88 | #define _stricmp(a, b) strcasecmp(a, b)
89 | #endif
90 | 
91 | #ifdef _MSC_VER
92 | #define FRAME_POINTER() (void*)((void**)_AddressOfReturnAddress() - 1)
93 | #else
94 | #define FRAME_POINTER() __builtin_frame_address(0)
95 | #endif
96 | 
97 | #endif
98 | 


--------------------------------------------------------------------------------
/interfaces/tracing.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRACING_H
 2 | #define TRACING_H
 3 | 
 4 | #include "../players.h"
 5 | 
 6 | namespace Tracing
 7 | {
 8 | 
 9 | 	struct Trace
10 | 	{
11 | 		vec3_t start;
12 | 		vec3_t end;
13 | 		int entID;
14 | 	};
15 | 
16 | 	/*
17 | 	  Depth specifies the complexity of the trace to be performed.
18 | 	  For example, depth 1 in CSGO would make the trace run through wall penetrating code path,
19 | 	  while depth 0 would be a regular traceray.
20 | 	*/
21 | 	int TracePlayer(LocalPlayer* localPlayer, Players* players, vec3_t point, int eID, int depth = 0, bool skipLocal = true);
22 | 
23 | 	template<size_t N>
24 | 	void TracePlayerSIMD(LocalPlayer* localPlayer, Players* players, vec3soa<float, N> point, int eID, int out[N], int depth = 0, bool skipLocal = true);
25 | 
26 | 	void TracePointList(LocalPlayer* localPlayer, Players* players, size_t n, const vec3_t* points, int eIDs, int* __restrict out, int depth = 0, bool skipLocal = true);
27 | 
28 | 	template<size_t N>
29 | 	void TracePointListSIMD(LocalPlayer* localPlayer, Players* players, size_t n, const vec3soa<float, N>* points, int eID, int* __restrict out, int depth = 0, bool skipLocal = true);
30 | 	/*
31 | 	  For games supporting moving players back in time.
32 | 	  The mask is to be used for anything the implementation needs it to use (for example marking a Source Engine player as non-backtrackable,
33 | 	  due to the breaking of lag compensation)
34 | 	*/
35 | 	bool BacktrackPlayers(Players* curPlayers, Players* prevPlayers, char backtrackMask[MAX_PLAYERS]);
36 | 	bool VerifyTarget(Players* players, int id, char backtrackMask[MAX_PLAYERS]);
37 | }
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/math/clang_avx512.h:
--------------------------------------------------------------------------------
 1 | 
 2 | //A hack to improve compile times on clang
 3 | #if defined(__clang__) && !defined(__AVX512F__) && !defined(__AVX512CD__) && !defined(__AVX512ER__)
 4 | #define __AVX512BITALGINTRIN_H
 5 | #define __AVX512BWINTRIN_H
 6 | #define __AVX512CDINTRIN_H
 7 | #define __AVX512DQINTRIN_H
 8 | #define __AVX512ERINTRIN_H
 9 | #define __AVX512FINTRIN_H
10 | #define __AVX512PFINTRIN_H
11 | #define __AVX512VBMI2INTRIN_H
12 | #define __AVX512VLBITALGINTRIN_H
13 | #define __AVX512VLBWINTRIN_H
14 | #define __AVX512VLCDINTRIN_H
15 | #define __AVX512VLDQINTRIN_H
16 | #define __AVX512VLINTRIN_H
17 | #define __AVX512VLVBMI2INTRIN_H
18 | #define __AVX512VLVNNIINTRIN_H
19 | #define __AVX512VNNIINTRIN_H
20 | #define __AVX512VPOPCNTDQINTRIN_H
21 | #define __AVX512VPOPCNTDQVLINTRIN_H
22 | 
23 | #define __IFMAINTRIN_H
24 | #define __IFMAVLINTRIN_H
25 | #define __VBMIINTRIN_H
26 | #define __VBMIVLINTRIN_H
27 | 
28 | typedef char __v64qi __attribute__((__vector_size__(64)));
29 | typedef short __v32hi __attribute__((__vector_size__(64)));
30 | typedef double __v8df __attribute__((__vector_size__(64)));
31 | typedef float __v16sf __attribute__((__vector_size__(64)));
32 | typedef long long __v8di __attribute__((__vector_size__(64)));
33 | typedef int __v16si __attribute__((__vector_size__(64)));
34 | 
35 | /* Unsigned types */
36 | typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
37 | typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
38 | typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
39 | typedef unsigned int __v16su __attribute__((__vector_size__(64)));
40 | 
41 | typedef float __m512 __attribute__((__vector_size__(64)));
42 | typedef double __m512d __attribute__((__vector_size__(64)));
43 | typedef long long __m512i __attribute__((__vector_size__(64)));
44 | 
45 | typedef unsigned char __mmask8;
46 | typedef unsigned short __mmask16;
47 | 
48 | #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
49 | 
50 | static  __inline __m512i __DEFAULT_FN_ATTRS512
51 | _mm512_setzero_si512(void)
52 | {
53 | 	return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
54 | }
55 | 
56 | typedef unsigned int __mmask32;
57 | typedef unsigned long long __mmask64;
58 | typedef unsigned short __v64hu __attribute__((__vector_size__(128)));
59 | typedef unsigned int __v32su __attribute__((__vector_size__(128)));
60 | 
61 | typedef __v8di __v8di_aligned __attribute__((aligned(64)));
62 | typedef __v8di __v8di_aligned __attribute__((aligned(64)));
63 | typedef __v8df __v8df_aligned __attribute__((aligned(64)));
64 | typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
65 | typedef short __v2hi __attribute__((__vector_size__(4)));
66 | typedef char __v4qi __attribute__((__vector_size__(4)));
67 | typedef char __v2qi __attribute__((__vector_size__(2)));
68 | 
69 | 
70 | #endif
71 | 


--------------------------------------------------------------------------------
/math/matrix.h:
--------------------------------------------------------------------------------
  1 | #ifndef MATRIX_H
  2 | #define MATRIX_H
  3 | 
  4 | #include "vector.h"
  5 | 
  6 | template<size_t X, size_t Y>
  7 | struct matrix
  8 | {
  9 | 	vecSoa<float, X, Y> vec;
 10 | 
 11 | 	template <size_t X2, size_t Y2>
 12 | 	inline auto& operator=(const matrix<X2, Y2>& ov)
 13 | 	{
 14 | 		constexpr size_t MX = X2 < X ? X2 : X;
 15 | 		constexpr size_t MY = Y2 < Y ? Y2 : Y;
 16 | 		for (size_t i = 0; i < MX; i++)
 17 | 			for (size_t o = 0; o < MY; o++)
 18 | 				vec[i][o] = ov.vec[i][o];
 19 | 		return *this;
 20 | 	}
 21 | 
 22 | 	template <size_t X2, size_t Y2>
 23 | 	inline auto operator*(const matrix<X2, Y2>& ov)
 24 | 	{
 25 | 		constexpr size_t MinX = Y2 < X ? Y2 : X;
 26 | 		constexpr size_t MinY = X2 < Y ? X2 : Y;
 27 | 		constexpr size_t CompS = MinX < MinY ? MinX : MinY;
 28 | 
 29 | 		constexpr size_t MX = X2 < X ? X2 : X;
 30 | 		constexpr size_t MY = Y2 < Y ? Y2 : Y;
 31 | 
 32 | 		constexpr size_t SX = MX < MinX ? MinX : MX;
 33 | 		constexpr size_t SY = MY < MinY ? MinY : MY;
 34 | 
 35 | 		matrix<SX, SY> result;
 36 | 
 37 | 		for (size_t i = 0; i < MinX; i++)
 38 | 			for (size_t o = 0; o < MinY; o++)
 39 | 				result[i][o] = 0;
 40 | 
 41 | 		for (size_t i = 0; i < MinX; i++)
 42 | 			for (size_t o = 0; o < MinY; o++) {
 43 | 				for (size_t u = 0; u < CompS; u++)
 44 | 					result[i][o] += vec[i][u] * ov[u][o];
 45 | 			}
 46 | 			//vec.template Dot<vecb<float, CompS>, CompS, CompS>(ov.vec.template ColumnVec<CompS>(i), result[i]);
 47 | 
 48 | 		//Copy over the remainding data that was not be multiplied
 49 | 		for (size_t i = CompS; i < SX; i++)
 50 | 			for (size_t o = 0; o < SY; o++)
 51 | 				result[i][o] = vec[i][o];
 52 | 
 53 | 		for (size_t i = 0; i < SX; i++)
 54 | 			for (size_t o = CompS; o < SY; o++)
 55 | 				result[i][o] = vec[i][o];
 56 | 
 57 | 		return result;
 58 | 	}
 59 | 
 60 | 	template <size_t X2, size_t Y2>
 61 | 	inline auto& operator*=(const matrix<X2, Y2>& ov)
 62 | 	{
 63 | 		*this = *this * ov;
 64 | 		return *this;
 65 | 	}
 66 | 
 67 | 	template<typename T>
 68 | 	static constexpr auto GetMatrix(const T& angles, bool fromDegrees = false)
 69 | 	{
 70 | 		matrix<X, Y> vec = {vecSoa<float, X, Y>()};
 71 | 
 72 | 		const int VP = 0;
 73 | 		const int VY = 1;
 74 | 		const int VR = 2;
 75 | 
 76 | 		float s[3] = {0}, c[3] = {0};
 77 | 
 78 | 		auto it = angles;
 79 | 		if (fromDegrees)
 80 | 			it *= DEG2RAD;
 81 | 
 82 | 		for (size_t i = 0; i < 3; i++)
 83 | 			s[i] = ConstSin(it[i]);
 84 | 
 85 | 		for (size_t i = 0; i < 3; i++)
 86 | 			c[i] = ConstCos(it[i]);
 87 | 
 88 | 		vec[0][0] = c[VP] * c[VY];
 89 | 		vec[1][0] = c[VP] * s[VY];
 90 | 		vec[2][0] = -s[VP];
 91 | 
 92 | 		vec[0][1] = s[VR] * s[VP] * c[VY] + c[VR] * s[VY];
 93 | 		vec[1][1] = s[VR] * s[VP] * s[VY] - c[VR] * c[VY];
 94 | 		vec[2][1] = s[VR] * c[VP];
 95 | 
 96 | 		vec[0][2] = c[VR] * s[VP] * c[VY] + s[VR] * s[VY];
 97 | 		vec[1][2] = c[VR] * s[VP] * s[VY] - s[VR] * c[VY];
 98 | 		vec[2][2] = c[VR] * c[VP];
 99 | 
100 | 		return vec;
101 | 	}
102 | 
103 | 	inline vec3_t GetAngles(bool toDegrees = false)
104 | 	{
105 | 		vec3_t fwd = (vec3_t)vec.acc[0];
106 | 		vec3_t left = (vec3_t)vec.acc[1];
107 | 		vec3_t up = (vec3_t)vec.acc[2];
108 | 		vec3_t ret(0);
109 | 
110 | 		float xyLen = fwd.Length<2>();
111 | 
112 | 		if (xyLen > 0.001f) {
113 | 			ret[0] = atan2f(-fwd[2], xyLen);
114 | 			ret[1] = atan2f(fwd[1], fwd[0]);
115 | 			ret[2] = atan2f(left[2], up[2]);
116 | 		} else {
117 | 			ret[0] = atan2f(-fwd[2], xyLen);
118 | 			ret[1] = atan2f(-left[0], left[1]);
119 | 			ret[2] = 0;
120 | 		}
121 | 
122 | 		return toDegrees ? ret * RAD2DEG : ret;
123 | 	}
124 | 
125 | 	inline auto Inverse() const
126 | 	{
127 | 		auto ret = *this;
128 | 
129 | 		float det = vec[0][0] * (vec[1][1] * vec[2][2] - vec[2][1] * vec[1][2]) -
130 | 								vec[0][1] * (vec[1][0] * vec[2][2] - vec[1][2] * vec[2][0]) +
131 | 								vec[0][2] * (vec[1][0] * vec[2][1] - vec[1][1] * vec[2][0]);
132 | 
133 | 		float invDet = 1.f / det;
134 | 
135 | 		ret[0][0] = (vec[1][1] * vec[2][2] - vec[2][1] * vec[1][2]) * invDet;
136 | 		ret[0][1] = (vec[0][2] * vec[2][1] - vec[0][1] * vec[2][2]) * invDet;
137 | 		ret[0][2] = (vec[0][1] * vec[1][2] - vec[0][2] * vec[1][1]) * invDet;
138 | 		ret[1][0] = (vec[1][2] * vec[2][0] - vec[1][0] * vec[2][2]) * invDet;
139 | 		ret[1][1] = (vec[0][0] * vec[2][2] - vec[0][2] * vec[2][0]) * invDet;
140 | 		ret[1][2] = (vec[1][0] * vec[0][2] - vec[0][0] * vec[1][2]) * invDet;
141 | 		ret[2][0] = (vec[1][0] * vec[2][1] - vec[2][0] * vec[1][1]) * invDet;
142 | 		ret[2][1] = (vec[2][0] * vec[0][1] - vec[0][0] * vec[2][1]) * invDet;
143 | 		ret[2][2] = (vec[0][0] * vec[1][1] - vec[1][0] * vec[0][1]) * invDet;
144 | 
145 | 		return ret;
146 | 	}
147 | 
148 | 	inline auto InverseTranspose() const
149 | 	{
150 | 		auto ret = *this;
151 | 
152 | 		float det = vec[0][0] * (vec[1][1] * vec[2][2] - vec[2][1] * vec[1][2]) -
153 | 								vec[0][1] * (vec[1][0] * vec[2][2] - vec[1][2] * vec[2][0]) +
154 | 								vec[0][2] * (vec[1][0] * vec[2][1] - vec[1][1] * vec[2][0]);
155 | 
156 | 		float invDet = 1.f / det;
157 | 
158 | 		ret[0][0] = (vec[1][1] * vec[2][2] - vec[2][1] * vec[1][2]) * invDet;
159 | 		ret[0][1] = (vec[1][2] * vec[2][0] - vec[1][0] * vec[2][2]) * invDet;
160 | 		ret[0][2] = (vec[1][0] * vec[2][1] - vec[2][0] * vec[1][1]) * invDet;
161 | 		ret[1][0] = (vec[0][2] * vec[2][1] - vec[0][1] * vec[2][2]) * invDet;
162 | 		ret[1][1] = (vec[0][0] * vec[2][2] - vec[0][2] * vec[2][0]) * invDet;
163 | 		ret[1][2] = (vec[2][0] * vec[0][1] - vec[0][0] * vec[2][1]) * invDet;
164 | 		ret[2][0] = (vec[0][1] * vec[1][2] - vec[0][2] * vec[1][1]) * invDet;
165 | 		ret[2][1] = (vec[1][0] * vec[0][2] - vec[0][0] * vec[1][2]) * invDet;
166 | 		ret[2][2] = (vec[0][0] * vec[1][1] - vec[1][0] * vec[0][1]) * invDet;
167 | 
168 | 		return ret;
169 | 	}
170 | 
171 | 	template<typename T, size_t Xt = X>
172 | 	constexpr typename std::enable_if<!comp_if<Xt, 4>::value, T>::type Vector3Transform(const T& inp) const
173 | 	{
174 | 		T out(0);
175 | 
176 | 		for (size_t i = 0; i < 3; i++)
177 | 			out[i] = inp.Dot(vec[i]) + vec[i][3];
178 | 
179 | 		return out;
180 | 	}
181 | 
182 | 	template<typename T, size_t Xt = X>
183 | 	constexpr typename std::enable_if<comp_if<Xt, 4>::value, T>::type Vector3Transform(const T& inp) const
184 | 	{
185 | 		T out(0);
186 | 
187 | 		for (size_t i = 0; i < 3; i++)
188 | 			out[i] = inp.Dot(vec[i]) + vec[i][3];
189 | 
190 | 		float w = inp.Dot(vec[3]) + vec[3][3];
191 | 		w = (w <= 0 ? std::numeric_limits<float>::infinity() : 1.f / w);
192 | 		return out * w;
193 | 	}
194 | 
195 | 	template<typename T>
196 | 	constexpr auto Vector3ITransform(T inp) const
197 | 	{
198 | 		T out(0);
199 | 
200 | 		auto vecRot = vec.Rotate();
201 | 		inp -= vecRot[3];
202 | 
203 | 		for (size_t i = 0; i < 3; i++)
204 | 			out[i] = inp.Dot(vecRot[i]);
205 | 
206 | 		return out;
207 | 	}
208 | 
209 | 	template<typename T>
210 | 	constexpr T Vector3Rotate(const T& inp) const
211 | 	{
212 | 		T out(0);
213 | 
214 | 		for (size_t i = 0; i < 3; i++)
215 | 			out[i] = inp.Dot(vec[i]);
216 | 
217 | 		return out;
218 | 	}
219 | 
220 | 	template<typename T>
221 | 	constexpr T Vector3IRotate(const T& inp) const
222 | 	{
223 | 		T out(0);
224 | 
225 | 		auto vecRot = vec.Rotate();
226 | 
227 | 		for (size_t i = 0; i < 3; i++)
228 | 			out[i] = inp.Dot(vecRot[i]);
229 | 
230 | 		return out;
231 | 	}
232 | 
233 | 	template<typename T, size_t Xt = X>
234 | 	constexpr typename std::enable_if<!comp_if<Xt, 4>::value, T>::type VecSoaTransform(const T& inp) const
235 | 	{
236 | 		T out(0);
237 | 
238 | 		for (size_t i = 0; i < 3; i++)
239 | 			for (size_t o = 0; o < inp.Yt; o++)
240 | 				out[i][o] = ((vecp<float, 3>)inp.acc[o]).Dot(vec[i]) + vec[i][3];
241 | 
242 | 		return out;
243 | 	}
244 | 
245 | 	template<typename T, size_t Xt = X>
246 | 	constexpr typename std::enable_if<comp_if<Xt, 4>::value, T>::type VecSoaTransform(const T& inp) const
247 | 	{
248 | 		T out(0);
249 | 		float w[inp.Yt];
250 | 
251 | 		for (size_t i = 0; i < 3; i++)
252 | 			for (size_t o = 0; o < inp.Yt; o++)
253 | 				out[i][o] = ((vecp<float, 3>)inp.acc[o]).Dot(vec[i]) + vec[i][3];
254 | 
255 | 		for (size_t i = 0; i < inp.Yt; i++) {
256 | 			w[i] = ((vecp<float, 3>)inp.acc[i]).Dot(vec[3]) + vec[3][3];
257 | 			w[i] = (w[i] <= 0 ? std::numeric_limits<float>::infinity() : 1.f / w[i]);
258 | 		}
259 | 
260 | 		for (size_t i = 0; i < inp.Xt; i++)
261 | 			for (size_t o = 0; o < inp.Yt; o++)
262 | 				out[i][o] *= w[o];
263 | 
264 | 		return out;
265 | 	}
266 | 
267 | 	template<typename T>
268 | 	constexpr auto VectorSoaITransform(const T& inp) const
269 | 	{
270 | 		T out(0);
271 | 		T temp = inp - (vecp<float, 3>)vec.acc[3];
272 | 
273 | 		for (size_t i = 0; i < 3; i++)
274 | 			for (size_t o = 0; o < inp.Yt; o++)
275 | 				out[i][o] = ((vecp<float, 3>)temp.acc[o]).Dot(vec[i]) + vec[i][3];
276 | 
277 | 		return out;
278 | 	}
279 | 
280 | 	template<typename T, size_t N = T::Yt>
281 | 	constexpr auto WorldToScreen(const T& vec, const vecb<float, 2>& screen, bool* flags) const
282 | 	{
283 | 		auto out = VecSoaTransform(vec);
284 | 
285 | 		constexpr size_t MX = 2 < T::Xt ? 2 : T::Xt;
286 | 
287 | 		for (size_t o = 0; o < T::Yt; o++)
288 | 			flags[o] = true;
289 | 
290 | 		for (size_t i = 0; i < MX; i++)
291 | 			for (size_t o = 0; o < T::Yt; o++)
292 | 				if (out[i][o] > screen[i])
293 | 					flags[o] = false;
294 | 
295 | 		for (size_t o = 0; o < T::Yt; o++)
296 | 			if (flags[o]) {
297 | 				out[0][o] = screen[0] * 0.5f + out[0][o] * (screen[0] * 0.5f);
298 | 				out[1][o] = screen[1] * 0.5f - out[1][o] * (screen[1] * 0.5f);
299 | 			}
300 | 
301 | 		return out;
302 | 	}
303 | 
304 | 	template<typename T>
305 | 	constexpr auto WorldToScreen(const T& vec, const vecb<float, 2>& screen, bool& status) const
306 | 	{
307 | 		auto out = Vector3Transform(vec);
308 | 		if (out[0] <= screen[0] && out[1] <= screen[1]) {
309 | 			out[0] = screen[0] * 0.5f + out[0] * screen[0] * 0.5f;
310 | 			out[1] = screen[1] * 0.5f - out[1] * screen[1] * 0.5f;
311 | 
312 | 			status = true;
313 | 
314 | 			return out;
315 | 		}
316 | 
317 | 		status = false;
318 | 
319 | 		return out;
320 | 	}
321 | 
322 | 	constexpr float* operator[](int idx)
323 | 	{
324 | 		return vec.v[idx];
325 | 	}
326 | 
327 | 	constexpr const float* operator[](int idx) const
328 | 	{
329 | 		return vec.v[idx];
330 | 	}
331 | };
332 | 
333 | typedef matrix<4,4> matrix4x4;
334 | #endif
335 | 


--------------------------------------------------------------------------------
/math/mm_funcs.h:
--------------------------------------------------------------------------------
 1 | #ifndef MM_FUNCS_H
 2 | #define MM_FUNCS_H
 3 | 
 4 | //In order to template various SIMD operations we have to have templateable types and functions which is not an easy feat to be done manually, thus we use this macro cave to be able to easily expand this later on
 5 | 
 6 | template<size_t sz, typename F>
 7 | struct m_
 8 | {
 9 | 	struct type {
10 | 	};
11 | };
12 | 
13 | [[noreturn]]
14 | static inline void throwfunc()
15 | {
16 | 	throw;
17 | }
18 | 
19 | template<size_t sz, typename F>
20 | using _m = typename m_<sz, F>::type;
21 | 
22 | template<auto val>
23 | using EnableIf = typename std::enable_if<val, std::true_type>::type;
24 | 
25 | #define GEN_FUNCC(RFUNC, FUNC, SIZE, ...)								\
26 | 	template<size_t sz, typename F, EnableIf<sz == SIZE && __VA_ARGS__>* = nullptr, typename... Args> constexpr auto RFUNC(Args... args) \
27 | 	{																	\
28 | 		return FUNC(args...);											\
29 | 	}
30 | 
31 | #define GEN_FUNC(RFUNC, type, FUNC, SIZE) GEN_FUNCC(RFUNC, FUNC, SIZE, std::is_same<type, F>::value)
32 | 
33 | #define PASTETWO(A, B) A##B
34 | 
35 | #define GEN_FUNC_SI2(RFUNC, FUNC, SIZE)							\
36 | 	GEN_FUNC(RFUNC, float, PASTETWO(FUNC, _ps), SIZE);			\
37 | 	GEN_FUNC(RFUNC, double, PASTETWO(FUNC, _pd), SIZE);			\
38 | 	GEN_FUNCC(RFUNC, PASTETWO(FUNC, _pd), SIZE, std::is_integral<F>::value); \
39 | 
40 | #define GEN_FUNC_EPI2(RFUNC, FUNC, SIZE)								\
41 | 	GEN_FUNC(RFUNC, float, PASTETWO(FUNC, _ps), SIZE);					\
42 | 	GEN_FUNC(RFUNC, double, PASTETWO(FUNC, _pd), SIZE);					\
43 | 	GEN_FUNCC(RFUNC, PASTETWO(FUNC, _epi8), SIZE, std::is_integral<F>::value && sizeof(F) == 1); \
44 | 	GEN_FUNCC(RFUNC, PASTETWO(FUNC, _epi16), SIZE, std::is_integral<F>::value && sizeof(F) == 2); \
45 | 	GEN_FUNCC(RFUNC, PASTETWO(FUNC, _epi32), SIZE, std::is_integral<F>::value && sizeof(F) == 4); \
46 | 	GEN_FUNCC(RFUNC, PASTETWO(FUNC, _epi64), SIZE, std::is_integral<F>::value && sizeof(F) == 8); \
47 | 
48 | #define GEN_FNAME_128(NAME) PASTETWO(_mm_,NAME)
49 | #define GEN_FNAME_256(NAME) PASTETWO(_mm256_,NAME)
50 | #define GEN_FNAME_512(NAME) PASTETWO(_mm512_,NAME)
51 | 
52 | #define CALL(F, ...)F(__VA_ARGS__)
53 | 
54 | #define MM_NAME(NAME, SIZE) CALL(GEN_FNAME_##SIZE, NAME)
55 | 
56 | #define GEN_FUNC_SI(FUNC, SIZE) GEN_FUNC_SI2(_mm_##FUNC, MM_NAME(FUNC, SIZE), SIZE)
57 | #define GEN_FUNC_EPI(FUNC, SIZE) GEN_FUNC_EPI2(_mm_##FUNC, MM_NAME(FUNC, SIZE), SIZE)
58 | 
59 | #define GEN_FUNCS(SIZE)			\
60 | 	GEN_FUNC_SI(loadu, SIZE);	\
61 | 	GEN_FUNC_SI(storeu, SIZE);	\
62 | 	GEN_FUNC_EPI(add, SIZE);	\
63 | 	GEN_FUNC_EPI(sub, SIZE);	\
64 | 
65 | #define DEFINE_MM(SIZE)													\
66 | 	template<> struct m_<SIZE, float> { using type = __m##SIZE; };		\
67 | 	template<> struct m_<SIZE, double> { using type = __m##SIZE##d; };	\
68 | 	template<typename F> struct m_<SIZE, std::enable_if<std::is_integral<F>::value, F>> { using type = __m##SIZE##i; }; \
69 | 	GEN_FUNCS(SIZE);													\
70 | 
71 | #endif
72 | 


--------------------------------------------------------------------------------
/math/mmath.h:
--------------------------------------------------------------------------------
  1 | #ifndef MMATH_H
  2 | #define MMATH_H
  3 | 
  4 | #include "../utils/shared_utils.h"
  5 | #include "../wincludes.h"
  6 | #include <stddef.h>
  7 | #include <type_traits>
  8 | #define _USE_MATH_DEFINES
  9 | #include <math.h>
 10 | #include <cmath>
 11 | #include <algorithm>
 12 | 
 13 | #include "mm_funcs.h"
 14 | 
 15 | #ifndef _MSC_VER
 16 | #include <nmmintrin.h>
 17 | #endif
 18 | 
 19 | //This cuts compile times when avx512 is not in use
 20 | //#include "clang_avx512.h"
 21 | 
 22 | #if defined(__clang__) && defined(_MSC_VER)
 23 | #pragma push_macro("_MM_HINT_T0")
 24 | #undef _MM_HINT_T0
 25 | #pragma push_macro("_MM_HINT_T1")
 26 | #undef _MM_HINT_T1
 27 | #pragma push_macro("_MM_HINT_T2")
 28 | #undef _MM_HINT_T2
 29 | #endif
 30 | #include <xmmintrin.h>
 31 | 
 32 | #include <immintrin.h>
 33 | #include <emmintrin.h>
 34 | #include <smmintrin.h>
 35 | #if defined(__clang__) && defined(_MSC_VER)
 36 | #pragma pop_macro("_MM_HINT_T0")
 37 | #pragma pop_macro("_MM_HINT_T1")
 38 | #pragma pop_macro("_MM_HINT_T2")
 39 | #endif
 40 | 
 41 | #if defined(OVERRIDE)
 42 | const int SIMD_COUNT = OVERRIDE;
 43 | #elif defined(__AVX512F__) || defined(__AVX512CD__) || defined(__AVX512ER__)
 44 | #define PSIMD 16
 45 | const int SIMD_COUNT = 16;
 46 | typedef short simdFlags;
 47 | DEFINE_MM(128);
 48 | DEFINE_MM(256);
 49 | DEFINE_MM(512);
 50 | #elif defined(__AVX__) || defined(__AVX2__)
 51 | #define PSIMD 8
 52 | const int SIMD_COUNT = 8;
 53 | typedef char simdFlags;
 54 | DEFINE_MM(128);
 55 | DEFINE_MM(256);
 56 | #elif defined(__SSE__) || defined(__SSE2__) || defined(__SSE2_MATH__) || defined(_M_IX86_FP) || (defined(_M_AMD64) || defined(_M_X64))
 57 | #define PSIMD 4
 58 | const int SIMD_COUNT = 4;
 59 | typedef char simdFlags;
 60 | DEFINE_MM(128);
 61 | #else
 62 | const int SIMD_COUNT = 1;
 63 | typedef char simdFlags;
 64 | #endif
 65 | 
 66 | template<typename W, size_t Q>
 67 | struct max_sse
 68 | {
 69 | 	static const bool value = (Q * sizeof(W) == 16 && SIMD_COUNT >= 4);
 70 | };
 71 | 
 72 | template<typename W, size_t Q>
 73 | struct max_avx
 74 | {
 75 | 	static const bool value = (Q * sizeof(W) == 32 && SIMD_COUNT >= 8);
 76 | };
 77 | 
 78 | template<typename W, size_t Q>
 79 | struct max_avx512
 80 | {
 81 | 	static const bool value = (Q * sizeof(W) == 64 && SIMD_COUNT >= 16);
 82 | };
 83 | 
 84 | template<typename W, size_t Q>
 85 | struct do_avx512
 86 | {
 87 | 	static const bool value = SIMD_COUNT >= 16 && !((Q * 8 * sizeof(W)) % (8 * 4 * 16));
 88 | };
 89 | 
 90 | template<typename W, size_t Q>
 91 | struct do_avx
 92 | {
 93 | 	static const bool value = !do_avx512<W, Q>::value && SIMD_COUNT >= 8 && !((Q * 8 * sizeof(W)) % (8 * 4 * 8));
 94 | };
 95 | 
 96 | template<typename W, size_t Q>
 97 | struct do_sse
 98 | {
 99 | 	static const bool value = !do_avx<W, Q>::value && !do_avx512<W, Q>::value && SIMD_COUNT >= 4 && !((Q * 8 * sizeof(W)) % (8 * 4 * 4));
100 | };
101 | 
102 | template<typename W, size_t Q>
103 | struct do_simd
104 | {
105 | 	static const bool value = do_sse<W, Q>::value || do_avx<W, Q>::value || do_avx512<W, Q>::value;
106 | };
107 | 
108 | template<size_t A, size_t B>
109 | struct comp_if
110 | {
111 | 	static const bool value = (A == B);
112 | };
113 | 
114 | template<size_t N>
115 | constexpr int NumOf(const int val)
116 | {
117 | 	return (val - 1) / N + 1;
118 | }
119 | 
120 | constexpr int NumOfSIMD(const int val)
121 | {
122 | 	return NumOf<SIMD_COUNT>(val);
123 | }
124 | 
125 | #include <type_traits>
126 | 
127 | template<typename T>
128 | constexpr T PopCnt(T inp)
129 | {
130 | #ifdef __GNUC__
131 | 	if (sizeof(inp) == 8)
132 | 		return __builtin_popcountll(inp);
133 | 	return __builtin_popcount(inp);
134 | #else
135 | 	T i = 0;
136 | 	for (i = 0; i < sizeof(inp) * 8; i++)
137 | 		if (~inp & (1 << i))
138 | 			break;
139 | 	return i;
140 | #endif
141 | }
142 | 
143 | constexpr size_t Clz(size_t inp)
144 | {
145 | #ifdef __GNUC__
146 | 	if (sizeof(inp) == 8)
147 | 		return __builtin_clzll(inp);
148 | 	return __builtin_clz(inp);
149 | #else
150 | 	for (size_t i = 0; (1 << i) < sizeof(size_t) * 8; i++)
151 | 		inp |= inp >> (1 << i);
152 | 	return sizeof(inp) * 8 - PopCnt(inp);
153 | #endif
154 | }
155 | 
156 | constexpr size_t AlignUp(size_t inp)
157 | {
158 | 	if (inp <= 1)
159 | 		return 1;
160 | 	return size_t(1) << (sizeof(size_t) * 8 - Clz(inp));
161 | }
162 | 
163 | 
164 | template<typename T, typename std::enable_if<std::is_floating_point<T>::value>::type* p = nullptr>
165 | constexpr T Modulo(const T x, const T y)
166 | {
167 | 	return (x < T() ? T(-1) : T(1)) * (
168 | 		(x < T() ? -x : x) -
169 | 		(long long)((x / y < T() ? -x / y : x / y)) * (y < T() ? -y : y));
170 | }
171 | 
172 | // For non-floating point types
173 | 
174 | template<typename T>
175 | using TypeToCast = typename std::conditional<std::is_floating_point<T>::value, int, T>::type;
176 | 
177 | template<typename T, typename std::enable_if<!std::is_floating_point<T>::value>::type* p = nullptr>
178 | constexpr T Modulo(const T x, const T y)
179 | {
180 | 	return (TypeToCast<T>)(x) % (TypeToCast<T>)(y);
181 | }
182 | 
183 | template<typename T>
184 | [[deprecated("Duplicate function")]]
185 | inline T TMod(T val, T lim)
186 | {
187 | 	return std::remainder(val, lim);
188 | }
189 | 
190 | constexpr float NormalizeFloat(float result, float start, float end)
191 | {
192 | 	result = Modulo(result - start, end - start);
193 | 
194 | 	if (result < 0.f)
195 | 		result += end - start;
196 | 
197 | 	return result + start;
198 | }
199 | 
200 | template<typename T>
201 | constexpr T NormalizeInRange(T result, T start, T end)
202 | {
203 | 	result = Modulo(result - start, end - start);
204 | 
205 | 	if (result < 0)
206 | 		result += end - start;
207 | 
208 | 	return result + start;
209 | }
210 | 
211 | //This should never be called in the first place, but it is required for the compile to take place
212 | template<typename T>
213 | constexpr T GetElementAt([[maybe_unused]] size_t id)
214 | {
215 | 	return T();
216 | }
217 | 
218 | template<typename F, typename... T>
219 | constexpr F GetElementAt(size_t id, F arg, T... args)
220 | {
221 | 	constexpr size_t sz = sizeof...(args);
222 | 	return (id && sz) ? GetElementAt<F>(id - 1, args...) : arg;
223 | }
224 | 
225 | template<typename T>
226 | constexpr T Max(T a, T b)
227 | {
228 | 	return a > b ? a : b;
229 | }
230 | 
231 | template<typename T>
232 | constexpr T Min(T a, T b)
233 | {
234 | 	return a < b ? a : b;
235 | }
236 | 
237 | template<typename T>
238 | constexpr T Abs(T val)
239 | {
240 | 	return val < 0 ? -val : val;
241 | }
242 | 
243 | template <typename T>
244 | constexpr T TrigSeries(T val, T sum, T n, int i, int s, T exp)
245 | {
246 | 	return Abs(exp * s / n) > std::numeric_limits<T>::epsilon() ? TrigSeries(val, sum + exp * s / n, n * i * (i + 1), i + 2, -s, exp * val * val) : sum;
247 | }
248 | 
249 | template<typename T>
250 | constexpr T ConstSin(T val)
251 | {
252 | 	val = NormalizeInRange(val, T(-M_PI), T(M_PI));
253 | 	return TrigSeries(val, val, T(6), 4, -1, val * val * val);
254 | }
255 | 
256 | template<typename T>
257 | constexpr T ConstCos(T val)
258 | {
259 | 	return ConstSin(val + M_PI / 2);
260 | }
261 | 
262 | constexpr float RAD2DEG = (float)(180.0 / M_PI);
263 | constexpr float DEG2RAD = (float)(M_PI / 180.0);
264 | 
265 | #include "vector.h"
266 | 
267 | #endif
268 | 


--------------------------------------------------------------------------------
/math/soa_accessor.h:
--------------------------------------------------------------------------------
 1 | #ifndef SOA_ACCESSOR_H
 2 | #define SOA_ACCESSOR_H
 3 | 
 4 | #define REF() &&
 5 | #define X(...)
 6 | 
 7 | #define SOA_VECTOR_CAST(type)					\
 8 | 	template<size_t B>							\
 9 | 	explicit inline operator type<T, B>() {		\
10 | 		type<T, B> ret;							\
11 | 		constexpr size_t mv = B < X ? B : X;	\
12 | 		auto& it = *this;						\
13 | 		for (size_t i = 0; i < mv; i++)			\
14 | 			ret[i] = it[i];						\
15 | 		return ret;								\
16 | 	}
17 | 
18 | #define SOA_SCALAR_ASIGNMENT			\
19 | 	inline auto& operator=(T val)		\
20 | 	{									\
21 | 		auto& it = *this;				\
22 | 		for (size_t i = 0; i < X; i++)	\
23 | 			it[i] = val;				\
24 | 		return it;						\
25 | 	}
26 | 
27 | #define SOA_ASIGNMENT(type)						\
28 | 	template<size_t B>							\
29 | 	inline auto& operator=(type<T, B> vec)		\
30 | 	{											\
31 | 		constexpr size_t mv = B < X ? B : X;	\
32 | 		auto& it = *this;						\
33 | 		for (size_t i = 0; i < mv; i++)			\
34 | 			it[i] = vec[i];						\
35 | 		return it;								\
36 | 	}
37 | 
38 | //XYZ might be invalid on vecSoa, depending on how many columns are thare
39 | #define DEFINE_SOA_ACCESSOR								\
40 | 	struct {											\
41 | 		struct SoaAccessor {							\
42 | 			T x;										\
43 | 			T px[Y - 1];								\
44 | 			T y;										\
45 | 			T py[Y - 1];								\
46 | 			T z;										\
47 | 														\
48 | 			inline T& operator[](size_t idx)			\
49 | 			{											\
50 | 				return (&x)[(int)idx * (int)Y];			\
51 | 			}											\
52 | 														\
53 | 			inline auto& Set(SoaAccessor& acc)			\
54 | 			{											\
55 | 				for(size_t i = 0; i < X; i++)			\
56 | 					(*this)[i] = acc[i];				\
57 | 				return *this;							\
58 | 			}											\
59 | 														\
60 | 			SOA_ASIGNMENT(vecb);						\
61 | 			SOA_ASIGNMENT(vecp);						\
62 | 			SOA_SCALAR_ASIGNMENT;						\
63 | 			SOA_VECTOR_CAST(vecb);						\
64 | 			SOA_VECTOR_CAST(vecp);						\
65 | 		} acc2;											\
66 | 														\
67 | 		inline auto& operator[](size_t idx) const		\
68 | 		{												\
69 | 			return *(SoaAccessor*)(((T*)&acc2)+idx);	\
70 | 		}												\
71 | 	} acc;
72 | 
73 | #endif
74 | 


--------------------------------------------------------------------------------
/math/vec_funcs.h:
--------------------------------------------------------------------------------
  1 | //Be sure to only include in a vector class and have VEC_TYPE defined (it gets automatically undefined)
  2 | 
  3 | #ifdef VEC_TYPE
  4 | 
  5 | constexpr VEC_TYPE() = default;
  6 | 
  7 | template<typename F, typename = typename std::enable_if<AllArithmetic<F>::value>::type>
  8 | constexpr VEC_TYPE(F arg) : v()
  9 | {
 10 | 	for (size_t i = 0; i < N; i++)
 11 | 		v[i] = (T)arg;
 12 | }
 13 | 
 14 | 
 15 | template<typename F, size_t SZ, typename = typename std::enable_if<AllArithmetic<F>::value>::type>
 16 | constexpr VEC_TYPE(const F (&args)[SZ]) : v()
 17 | {
 18 | 	for (size_t i = 0; i < N; i++)
 19 | 		v[i] = args[i % SZ];
 20 | }
 21 | 
 22 | template<typename... F, typename = typename std::enable_if<AllArithmetic<F...>::value>::type>
 23 | constexpr VEC_TYPE(F... args) : v()
 24 | {
 25 | 	constexpr size_t elementCount = sizeof...(args);
 26 | 	for (size_t i = 0; i < N; i++)
 27 | 		v[i] = GetElementAt<T>(i % elementCount, args...);
 28 | }
 29 | 
 30 | inline auto& Assign(T val)
 31 | {
 32 | 	for (size_t i = 0; i < N; i++)
 33 | 		v[i] = val;
 34 | 	return *this;
 35 | }
 36 | 
 37 | 
 38 | template <size_t D>
 39 | constexpr T Dot(const VEC_TYPE& o) const
 40 | {
 41 | 	T val = 0;
 42 | 	for (size_t i = 0; i < D; i++)
 43 | 		val += v[i] * o.v[i];
 44 | 	return val;
 45 | }
 46 | 
 47 | template <size_t D>
 48 | constexpr T Dot(const T* o) const
 49 | {
 50 | 	T val = 0;
 51 | 	for (size_t i = 0; i < D; i++)
 52 | 		val += v[i] * o[i];
 53 | 	return val;
 54 | }
 55 | 
 56 | template <size_t D>
 57 | constexpr T LengthSqr() const
 58 | {
 59 | 	return Dot<D>(*this);
 60 | }
 61 | 
 62 | template <size_t D>
 63 | constexpr T Length() const
 64 | {
 65 | 	return sqrt(Dot<D>(*this));
 66 | }
 67 | 
 68 | template <size_t D>
 69 | inline auto& Sqrt()
 70 | {
 71 | 	constexpr size_t Md = D > N ? N : D;
 72 | 	VSqrt<T, Md>(v);
 73 | 	return *this;
 74 | }
 75 | 
 76 | template <size_t D>
 77 | inline auto& NormalizeAngles(T start, T end)
 78 | {
 79 | 	for (size_t i = 0; i < D; i++)
 80 | 		v[i] = std::fmod(std::fmod(v[i] - start, end - start) + (end - start), end - start) + start;
 81 | 	return *this;
 82 | }
 83 | 
 84 | constexpr T Dot(const VEC_TYPE& o) const
 85 | {
 86 | 	return Dot<N>(o);
 87 | }
 88 | 
 89 | constexpr T Dot(const T* o) const
 90 | {
 91 | 	return Dot<N>(o);
 92 | }
 93 | 
 94 | constexpr T LengthSqr() const
 95 | {
 96 | 	return LengthSqr<N>();
 97 | }
 98 | 
 99 | constexpr T Length() const
100 | {
101 | 	return Length<N>();
102 | }
103 | 
104 | inline auto& Sqrt()
105 | {
106 | 	return Sqrt<N>();
107 | }
108 | 
109 | inline auto Normalized() const
110 | {
111 | 	auto val = *this;
112 | 	float l = val.Length();
113 | 	val *= l ? 1 / l : 0;
114 | 	return val;
115 | }
116 | 
117 | inline auto& Normalize()
118 | {
119 | 	*this = Normalized();
120 | 	return *this;
121 | }
122 | 
123 | template <size_t D>
124 | inline T DistTo(const VEC_TYPE& o) const
125 | {
126 | 	return (*this - o).template Length<D>();
127 | }
128 | 
129 | inline T DistTo(const VEC_TYPE& o) const
130 | {
131 | 	return DistTo<N>(o);
132 | }
133 | 
134 | template <size_t D>
135 | constexpr T DistToSqr(const VEC_TYPE& o) const
136 | {
137 | 	return (*this - o).template LengthSqr<D>();
138 | }
139 | 
140 | constexpr T DistToSqr(const VEC_TYPE& o) const
141 | {
142 | 	return DistToSqr<N>(o);
143 | }
144 | 
145 | inline auto DirToRay(const VEC_TYPE& a, const VEC_TYPE& b) const
146 | {
147 | 	auto c = *this - a;
148 | 	auto d = b - a;
149 | 
150 | 	T t = c.Dot(d) / d.LengthSqr();
151 | 
152 | 	return a + t * d;
153 | }
154 | 
155 | inline auto DirToLine(const VEC_TYPE& a, const VEC_TYPE& b) const
156 | {
157 | 	auto c = *this - a;
158 | 	auto d = b - a;
159 | 
160 | 	T t = std::clamp(c.Dot(d) / d.LengthSqr(), T(0), T(1));
161 | 
162 | 	return a + t * d;
163 | }
164 | 
165 | constexpr auto GetRight() const
166 | {
167 | 	if (v[0] == v[1] == 0)
168 | 		return VEC_TYPE(0, -1, 0);
169 | 	return this->Cross(VEC_TYPE(0, 0, 1));
170 | }
171 | 
172 | constexpr auto GetUp() const
173 | {
174 | 	if (v[0] == v[1] == 0)
175 | 		return VEC_TYPE(-v[2], 0, 0);
176 | 	return GetRight().Cross(*this);
177 | }
178 | 
179 | template<size_t Q = N>
180 | constexpr typename std::enable_if<comp_if<Q, 3>::value, VEC_TYPE<T, 3>>::type
181 | Cross(const VEC_TYPE& o) const
182 | {
183 | 	VEC_TYPE<T, 3> ret(0);
184 | 	ret[0] = v[1] * o[2] - v[2] * o[1];
185 | 	ret[1] = v[2] * o[0] - v[0] * o[2];
186 | 	ret[2] = v[0] * o[1] - v[1] * o[0];
187 | 	return ret;
188 | }
189 | 
190 | 
191 | template<size_t Q = N>
192 | inline typename std::enable_if<comp_if<Q, 3>::value, VEC_TYPE<T, 3>&>::type
193 | ToAngles()
194 | {
195 | 	T y, x, len;
196 | 	y = atan2(v[1], v[0]);
197 | 
198 | 	len = Length<2>();
199 | 
200 | 	x = atan2(-v[2], len);
201 | 
202 | 	v[0] = x;
203 | 	v[1] = y;
204 | 	v[2] = 0;
205 | 
206 | 	return *this;
207 | }
208 | 
209 | template<size_t Q = N>
210 | inline typename std::enable_if<comp_if<Q, 3>::value, VEC_TYPE<T, 3>>::type
211 | GetAngles(bool toDegrees = false) const
212 | {
213 | 	auto ret = *this;
214 | 	ret.ToAngles();
215 | 	if (toDegrees)
216 | 		ret *= RAD2DEG;
217 | 	return ret;
218 | }
219 | 
220 | template<size_t Q = N>
221 | inline typename std::enable_if<comp_if<Q, 3>::value, VEC_TYPE<T, 3>&>::type
222 | GetVectors(VEC_TYPE& __restrict forward, VEC_TYPE& __restrict right, VEC_TYPE& __restrict up, bool fromDegrees = false)
223 | {
224 | 	const int VP = 0;
225 | 	const int VY = 1;
226 | 	const int VR = 2;
227 | 
228 | 	T s[3], c[3];
229 | 
230 | 	auto it = *this;
231 | 	if (fromDegrees)
232 | 		it *= DEG2RAD;
233 | 
234 | 	for (size_t i = 0; i < 3; i++)
235 | 		s[i] = std::sin(it[i]);
236 | 
237 | 	for (size_t i = 0; i < 3; i++)
238 | 		c[i] = std::cos(it[i]);
239 | 
240 | 	forward[0] = c[VP] * c[VY];
241 | 	forward[1] = c[VP] * s[VY];
242 | 	forward[2] = -s[VP];
243 | 
244 | 	right[0] = -s[VR] * s[VP] * c[VY] + c[VR] * s[VY];
245 | 	right[1] = -s[VR] * s[VP] * s[VY] - c[VR] * c[VY];
246 | 	right[2] = -s[VR] * c[VP];
247 | 
248 | 	up[0] = c[VR] * s[VP] * c[VY] + s[VR] * s[VY];
249 | 	up[1] = c[VR] * s[VP] * s[VY] - s[VR] * c[VY];
250 | 	up[2] = c[VR] * c[VP];
251 | 
252 | 	return *this;
253 | }
254 | 
255 | template<size_t dim, size_t Q = N>
256 | inline typename std::enable_if<comp_if<Q, 3>::value, VEC_TYPE<T, 3>&>::type
257 | Rotate(T angle)
258 | {
259 | 	T s, c;
260 | 	s = std::sin(angle);
261 | 	c = std::cos(angle);
262 | 
263 | 	constexpr size_t iX = (dim + 1) % 3;
264 | 	constexpr size_t iY = (dim + 2) % 3;
265 | 
266 | 	T xn = v[iX] * c - v[iY] * s;
267 | 	T yn = v[iX] * s + v[iY] * c;
268 | 
269 | 	v[iX] = xn;
270 | 	v[iY] = yn;
271 | 
272 | 	return *this;
273 | }
274 | 
275 | constexpr auto Min(const VEC_TYPE& ov)
276 | {
277 | 	VEC_TYPE ret(0);
278 | 
279 | 	for (size_t i = 0; i < N; i++)
280 | 		ret[i] = ::Min(v[i], ov[i]);
281 | 
282 | 	return ret;
283 | }
284 | 
285 | constexpr auto Max(const VEC_TYPE& ov)
286 | {
287 | 	VEC_TYPE ret(0);
288 | 
289 | 	for (size_t i = 0; i < N; i++)
290 | 		ret[i] = ::Max(v[i], ov[i]);
291 | 
292 | 	return ret;
293 | }
294 | 
295 | constexpr auto MinUp()
296 | {
297 | 	T ret = std::numeric_limits<T>::max();
298 | 
299 | 	for (size_t i = 0; i < N; i++)
300 | 		ret = ::Min(ret, v[i]);
301 | 
302 | 	return ret;
303 | }
304 | 
305 | constexpr auto MaxUp()
306 | {
307 | 	T ret = std::numeric_limits<T>::min();
308 | 
309 | 	for (size_t i = 0; i < N; i++)
310 | 		ret = ::Max(ret, v[i]);
311 | 
312 | 	return ret;
313 | }
314 | 
315 | constexpr auto Lerp(const VEC_TYPE& ov, float time)
316 | {
317 | 	return *this + time * (ov - *this);
318 | }
319 | 
320 | constexpr auto LerpClamped(const VEC_TYPE& ov, float time)
321 | {
322 | 	return *this + ::Min(1.f, ::Max(0.f, time)) * (ov - *this);
323 | }
324 | 
325 | #undef VEC_TYPE
326 | #endif
327 | 


--------------------------------------------------------------------------------
/math/vecsoa_funcs.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | 	These functions should only be included inside SoA vector structures.
  3 | 	Define SOA_TYPE before including, it will be undefined afterwards.
  4 | */
  5 | 
  6 | 
  7 | #ifdef SOA_TYPE
  8 | 
  9 | constexpr SOA_TYPE() = default;
 10 | 
 11 | template<typename F, typename = typename std::enable_if<AllArithmetic<F>::value>::type>
 12 | constexpr SOA_TYPE(F arg) : v()
 13 | {
 14 | 	for (size_t i = 0; i < Xt; i++)
 15 | 		for (size_t o = 0; o < Yt; o++)
 16 | 			v[i][o] = (T)arg;
 17 | }
 18 | 
 19 | template<typename... F, typename = typename std::enable_if<AllArithmetic<F...>::value>::type>
 20 | constexpr SOA_TYPE(F... args) : v()
 21 | {
 22 | 	constexpr size_t elementCount = sizeof...(args);
 23 | 
 24 | 	for (size_t i = 0; i < Xt; i++)
 25 | 		for (size_t o = 0; o < Yt; o++)
 26 | 			v[i][o] = GetElementAt(o % elementCount, args...);
 27 | }
 28 | 
 29 | template<size_t Y2>
 30 | inline auto& ColumnVec(int col) const
 31 | {
 32 | 	return *(vecb<T, Y2>*)v[col];
 33 | }
 34 | 
 35 | inline auto& ColumnVec(int col) const
 36 | {
 37 | 	return ColumnVec<Y>(col);
 38 | }
 39 | 
 40 | //Micro-optimized version for 4 sized vector chunks since
 41 | //Clang did not want to generate SIMD code on a normal loop
 42 | template<size_t SWidth, typename F = T, size_t Q = Y, size_t D = X>
 43 | inline void AddUpDimSIMD(int dim, F vv[D][Q])
 44 | {
 45 | 	static constexpr size_t Elems = SWidth / (8 * sizeof(F));
 46 | 
 47 | 	if (!dim)
 48 | 		return;
 49 | 
 50 | 	for (size_t i = 0; i < Q / Elems; i++) {
 51 | 		_m<SWidth, F> a = _mm_loadu<SWidth, F>(v[dim-1] + i * Elems);
 52 | 		_m<SWidth, F> b = _mm_loadu<SWidth, F>(vv[dim] + i * Elems);
 53 | 		a = _mm_add<SWidth, F>(a, b);
 54 | 		_mm_storeu<SWidth, F>(vv[dim-1] + i * Elems, a);
 55 | 	}
 56 | 
 57 | 	AddUpDimSIMD<SWidth, T, Y, X>(--dim, vv);
 58 | }
 59 | 
 60 | 
 61 | template<typename F = T, size_t Q = Y, size_t D = X>
 62 | inline typename std::enable_if<do_sse<F, Q>::value, void>::type AddUpDim(int dim, F vv[D][Q])
 63 | {
 64 | 	AddUpDimSIMD<128, F, Q, D>(dim, vv);
 65 | }
 66 | 
 67 | template<typename F = T, size_t Q = Y, size_t D = X>
 68 | inline typename std::enable_if<do_avx<F, Q>::value, void>::type AddUpDim(int dim, F vv[D][Q])
 69 | {
 70 | 	AddUpDimSIMD<256, F, Q, D>(dim, vv);
 71 | }
 72 | 
 73 | template<typename F = T, size_t Q = Y, size_t D = X>
 74 | inline typename std::enable_if<do_avx512<F, Q>::value, void>::type AddUpDim(int dim, F vv[D][Q])
 75 | {
 76 | 	AddUpDimSIMD<512, F, Q, D>(dim, vv);
 77 | }
 78 | 
 79 | template<typename F = T, size_t Q = Y, size_t D = X>
 80 | inline typename std::enable_if<!do_simd<F, Q>::value, void>::type AddUpDim(int dim, F vv[X][Q])
 81 | {
 82 | 	if (!dim)
 83 | 		return;
 84 | 
 85 | 	for(; dim > 0; dim--)
 86 | 		for (size_t o = 0; o < Y; o++)
 87 | 			vv[dim-1][o] = v[dim-1][o] + vv[dim][o];
 88 | }
 89 | 
 90 | template <size_t D>
 91 | inline auto& AddUp()
 92 | {
 93 | 	AddUpDim<T, Y, X>(D-1, v);
 94 | 	return *this;
 95 | }
 96 | 
 97 | template <size_t D>
 98 | inline auto& AddUpTotal()
 99 | {
100 | 	AddUpDim<T, Y, X>(D-1, v);
101 | 
102 | 	for (size_t i = D - 1; i > 0; i--)
103 | 		v[0][i - 1] += v[0][i];
104 | 
105 | 	return *this;
106 | }
107 | 
108 | template <size_t D>
109 | inline T AddedUpTotal()
110 | {
111 | 	T temp[D][Y];
112 | 
113 | 	for (size_t o = 0; o < Y; o++)
114 | 		temp[D - 1][o] = 0;
115 | 
116 | 	AddUpDim<T, Y, X>(D - 1, temp);
117 | 
118 | 	for (size_t i = Y - 1; i > 0; i--)
119 | 		temp[0][i - 1] += temp[0][i];
120 | 
121 | 	return temp[0][0];
122 | }
123 | 
124 | inline T AddedUpTotal()
125 | {
126 | 	return AddedUpTotal<X>();
127 | }
128 | 
129 | //Constant array functions
130 | template <typename F, size_t D, size_t Y2>
131 | inline void Dot(const F& ov, T val[Y2]) const
132 | {
133 | 	SOA_TYPE nv = *this * ov;
134 | 	nv.AddUp<D>();
135 | 
136 | 	for (size_t i = 0; i < Y2; i++)
137 | 		val[i] = nv[0][i];
138 | }
139 | 
140 | template <size_t D>
141 | inline void Dot(const SOA_TYPE& ov, T val[Y]) const
142 | {
143 | 	Dot<SOA_TYPE, D, Y>(ov, val);
144 | }
145 | 
146 | template <size_t D>
147 | inline void LengthSqr(T val[Y]) const
148 | {
149 | 	Dot<D>(*this, val);
150 | }
151 | 
152 | template <size_t D>
153 | inline void Length(T val[Y]) const
154 | {
155 | 	Dot<D>(*this, val);
156 | 	VSqrt<T, Y>(val);
157 | }
158 | 
159 | template <size_t D>
160 | inline auto& Sqrt()
161 | {
162 | 	for (size_t i = 0; i < D; i++)
163 | 		VSqrt<T, Y>(v[i]);
164 | 	return *this;
165 | }
166 | 
167 | template<typename F, size_t Y2>
168 | inline void Dot(const F& o, T val[Y2]) const
169 | {
170 | 	Dot<F, X, Y2>(o, val);
171 | }
172 | 
173 | inline void Dot(const SOA_TYPE& o, T val[Y]) const
174 | {
175 | 	Dot<SOA_TYPE, X, Y>(o, val);
176 | }
177 | 
178 | inline void LengthSqr(T val[Y]) const
179 | {
180 | 	LengthSqr<X>(val);
181 | }
182 | 
183 | inline void Length(T val[Y]) const
184 | {
185 | 	Length<X>(val);
186 | }
187 | 
188 | inline auto& Sqrt()
189 | {
190 | 	return Sqrt<X>();
191 | }
192 | 
193 | template <size_t D>
194 | inline void DistTo(const SOA_TYPE& o, T val[Y]) const
195 | {
196 | 	(*this - o).template Length<D>(val);
197 | }
198 | 
199 | inline void DistTo(const SOA_TYPE& o, T val[Y]) const
200 | {
201 | 	DistTo<X>(o, val);
202 | }
203 | 
204 | //Pointer returning functions
205 | template <size_t D>
206 | inline const T* Dot(const SOA_TYPE& ov) const
207 | {
208 | 	T val[Y];
209 | 	Dot<D>(ov, val);
210 | 	return val;
211 | }
212 | 
213 | template <size_t D>
214 | inline const T* LengthSqr() const
215 | {
216 | 	return Dot<D>(*this);
217 | }
218 | 
219 | template <size_t D>
220 | inline const T* Length() const
221 | {
222 | 	T* val = Dot<D>(*this);
223 | 	VSqrt<X>(val);
224 | 	return val;
225 | }
226 | 
227 | inline const T* Dot(const SOA_TYPE& o) const
228 | {
229 | 	return Dot<X>(o);
230 | }
231 | 
232 | inline const T* LengthSqr() const
233 | {
234 | 	return LengthSqr<X>();
235 | }
236 | 
237 | inline const T* Length() const
238 | {
239 | 	return Length<X>();
240 | }
241 | 
242 | constexpr auto Abs() const
243 | {
244 | 	auto ret = *this;
245 | 
246 | 	for (size_t i = 0; i < X; i++)
247 | 		for (size_t o = 0; o < Y; o++)
248 | 			ret[i][o] = std::abs(ret[i][o]);
249 | 
250 | 	return ret;
251 | }
252 | 
253 | template <size_t D>
254 | inline const T* DistTo(const SOA_TYPE& o) const
255 | {
256 | 	return (*this - o).template Length<D>();
257 | }
258 | 
259 | inline const T* DistTo(const SOA_TYPE& o) const
260 | {
261 | 	return DistTo<X>(o);
262 | }
263 | 
264 | inline auto DirToRay(const SOA_TYPE& a, const SOA_TYPE& b) const
265 | {
266 | 	auto c = *this - a;
267 | 	auto d = b - a;
268 | 
269 | 	T t[Y], ls[Y];
270 | 	c.Dot(d, t);
271 | 	d.LengthSqr(ls);
272 | 
273 | 	for (int i = 0; i < Y; i++)
274 | 		t[i] = t[i] / ls[i];
275 | 
276 | 	return a + d * t;
277 | }
278 | 
279 | inline auto DirToLine(const SOA_TYPE& a, const SOA_TYPE& b) const
280 | {
281 | 	auto c = *this - a;
282 | 	auto d = b - a;
283 | 
284 | 	T t[Y], ls[Y];
285 | 	c.Dot(d, t);
286 | 	d.LengthSqr(ls);
287 | 
288 | 	for (int i = 0; i < Y; i++)
289 | 		t[i] = std::clamp(t[i] / ls[i], T(0), T(1));
290 | 
291 | 	return a + d * t;
292 | }
293 | 
294 | inline auto Normalized() const
295 | {
296 | 	auto val = *this;
297 | 	float l[Y];
298 | 	val.Length(l);
299 | 	for (size_t i = 0; i < Y; i++)
300 | 		l[i] = l[i] ? 1.f / l[i] : (T)0;
301 | 	val *= l;
302 | 	return val;
303 | }
304 | 
305 | inline void Normalize()
306 | {
307 | 	*this = Normalized();
308 | }
309 | 
310 | template<typename Q>
311 | inline void TransformInPlace(const Q* inp)
312 | {
313 | 	Q dot[Y];
314 | 	for (size_t o = 0; o < Y; o++)
315 | 		for (size_t i = 0; i < X; i++)
316 | 			v[o][i] = Dot(inp[o].vec[i]) + inp[o].vec[i][3];
317 | }
318 | 
319 | template<typename Q>
320 | inline auto Transform(const Q* inp) const
321 | {
322 | 	auto ret = *this;
323 | 	ret.TransformInPlace(inp);
324 | 	return ret;
325 | }
326 | 
327 | constexpr auto& AssignRow(int row, const vecb<T, X>& vec)
328 | {
329 | 	for (size_t i = 0; i < X; i++)
330 | 		v[i][row] = vec[i];
331 | 
332 | 	return *this;
333 | }
334 | 
335 | constexpr auto& AddRow(int row, const vecb<T, X>& vec)
336 | {
337 | 	for (size_t i = 0; i < X; i++)
338 | 		v[i][row] += vec[i];
339 | 
340 | 	return *this;
341 | }
342 | 
343 | constexpr auto& AddRow(int row, const vecp<T, X>& vec)
344 | {
345 | 	return AddRow(row, *(vecb<T, X>*)&vec);
346 | }
347 | 
348 | constexpr auto& AddRow(int row, T val)
349 | {
350 | 	for (size_t i = 0; i < X; i++)
351 | 		v[i][row] += val;
352 | }
353 | 
354 | constexpr auto& AddRow(int row, const T* val)
355 | {
356 | 	for (size_t i = 0; i < X; i++)
357 | 		v[i][row] *= val[i];
358 | }
359 | 
360 | constexpr auto& AssignCol(int col, const vecb<T, Y>& vec)
361 | {
362 | 	for (size_t i = 0; i < Y; i++)
363 | 		v[col][i] = vec[i];
364 | 
365 | 	return *this;
366 | }
367 | 
368 | constexpr auto& AssignCol(int col, const vecp<T, Y>& vec)
369 | {
370 | 	return AssignCol(col, (const vecb<T, Y>&)vec);
371 | }
372 | 
373 | constexpr auto& AssignCol(int col, T val)
374 | {
375 | 	vecb<T, Y> vec(val);
376 | 	return AssignCol(col, vec);
377 | }
378 | 
379 | 
380 | constexpr auto& MulCol(int col, const vecb<T, Y>& vec)
381 | {
382 | 	for (size_t i = 0; i < Y; i++)
383 | 		v[col][i] *= vec[i];
384 | 
385 | 	return *this;
386 | }
387 | 
388 | constexpr auto& MulCol(int col, const vecp<T, Y>& vec)
389 | {
390 | 	return MulCol(col, (vecb<T, Y>)vec);
391 | }
392 | 
393 | constexpr auto& MulCol(int col, T val)
394 | {
395 | 	for (int i = 0; i < Y; i++)
396 | 		v[col][i] *= val;
397 | }
398 | 
399 | constexpr auto& MulCol(int col, const T* val)
400 | {
401 | 	for (int i = 0; i < Y; i++)
402 | 		v[col][i] *= val[i];
403 | }
404 | 
405 | 
406 | constexpr auto& AddCol(int col, const vecb<T, Y>& vec)
407 | {
408 | 	for (size_t i = 0; i < Y; i++)
409 | 		v[col][i] += vec[i];
410 | 
411 | 	return *this;
412 | }
413 | 
414 | constexpr auto& AddCol(int col, const vecp<T, Y>& vec)
415 | {
416 | 	return AddCol(col, (vecb<T, Y>)vec);
417 | }
418 | 
419 | constexpr auto& AddCol(int col, T val)
420 | {
421 | 	for (int i = 0; i < Y; i++)
422 | 		v[col][i] += val;
423 | }
424 | 
425 | constexpr auto& AddCol(int col, const T* val)
426 | {
427 | 	for (int i = 0; i < Y; i++)
428 | 		v[col][i] += val[i];
429 | }
430 | 
431 | constexpr auto GetColAsVecb(int col)
432 | {
433 | 	return *(vecb<T, Y>*)v[col];
434 | }
435 | 
436 | constexpr auto GetColAsVecp(int col)
437 | {
438 | 	return vecp<T, Y>(v[col]);
439 | }
440 | 
441 | 
442 | template<size_t Q = Xt>
443 | constexpr typename std::enable_if<comp_if<Q, 3>::value, SOA_TYPE>::type
444 | Cross(const SOA_TYPE& o) const
445 | {
446 | 	SOA_TYPE ret(0);
447 | 
448 | 	for (size_t i = 0; i < Yt; i++) {
449 | 		ret[0][i] = v[1][i] * o[2][i] - v[2][i] * o[1][i];
450 | 		ret[1][i] = v[2][i] * o[0][i] - v[0][i] * o[2][i];
451 | 		ret[2][i] = v[0][i] * o[1][i] - v[1][i] * o[0][i];
452 | 	}
453 | 	return ret;
454 | }
455 | 
456 | 
457 | 
458 | constexpr auto Rotate() const
459 | {
460 | 	vecSoa<T, Y, X> ret(0);
461 | 
462 | 	for (size_t i = 0; i < X; i++)
463 | 		for (size_t o = 0; o < Y; o++)
464 | 			ret[o][i] = v[i][o];
465 | 
466 | 	return ret;
467 | }
468 | 
469 | constexpr auto Min(const SOA_TYPE& ov)
470 | {
471 | 	SOA_TYPE ret(0);
472 | 
473 | 	for (size_t i = 0; i < Xt; i++)
474 | 		for (size_t o = 0; o < Yt; o++)
475 | 			ret[i][o] = ::Min(v[i][o], ov[i][o]);
476 | 
477 | 	return ret;
478 | }
479 | 
480 | constexpr auto Max(const SOA_TYPE& ov)
481 | {
482 | 	SOA_TYPE ret(0);
483 | 
484 | 	for (size_t i = 0; i < Xt; i++)
485 | 		for (size_t o = 0; o < Yt; o++)
486 | 			ret[i][o] = ::Min(v[i][o], ov[i][o]);
487 | 
488 | 	return ret;
489 | }
490 | 
491 | constexpr auto MinUp()
492 | {
493 | 	vecb<T, Yt> ret(std::numeric_limits<T>::max());
494 | 
495 | 	for(size_t o = 0; o < Xt; o++)
496 | 		for (size_t i = 0; i < Yt; i++)
497 | 			ret[i] = ::Min(ret[i], v[o][i]);
498 | 
499 | 	return ret;
500 | }
501 | 
502 | constexpr auto MaxUp()
503 | {
504 | 	vecb<T, Yt> ret(std::numeric_limits<T>::min());
505 | 
506 | 	for(size_t o = 0; o < Xt; o++)
507 | 		for (size_t i = 0; i < Yt; i++)
508 | 			ret[i] = ::Max(ret[i], v[o][i]);
509 | 
510 | 	return ret;
511 | }
512 | 
513 | constexpr auto Lerp(const SOA_TYPE& ov, float time)
514 | {
515 | 	return *this + time * (ov - *this);
516 | }
517 | 
518 | constexpr auto LerpClamped(const SOA_TYPE& ov, float time)
519 | {
520 | 	return *this + ::Min(1.f, ::Max(0.f, time)) * (ov - *this);
521 | }
522 | 
523 | #undef SOA_TYPE
524 | #endif
525 | 


--------------------------------------------------------------------------------
/math/vector.h:
--------------------------------------------------------------------------------
  1 | #ifndef VECTOR_H
  2 | #define VECTOR_H
  3 | 
  4 | /*
  5 |  * This vector library is focuesed about high efficiency
  6 |  * by laying out the data in a very easily vectorizable way.
  7 |  * A compiler that does automatic SIMD code generation should
  8 |  * be able to work more easily on this layout.
  9 |  *
 10 |  * Clang does a really good job at generating SIMD code, while
 11 |  * MSVC does not always work. Some more tricky parts, such as
 12 |  * square root calculation have manual "hand written" SSE/AVX
 13 |  * implementations, while addition, multiplication, etc. usually
 14 |  * have correct code generation.
 15 |  *
 16 |  * All the code logic should focus around the data layout of the
 17 |  * SOA vectors, even though accessors (in vec3soa case) are implemented
 18 |  * in case you need to conveniently access all dimensions in one go.
 19 |  * The data is laid out like this for a reason.
 20 | */
 21 | 
 22 | #include "stddef.h"
 23 | #include "mmath.h"
 24 | #include "math.h"
 25 | 
 26 | #include "vector_operators.h"
 27 | #include "soa_accessor.h"
 28 | #include <stdlib.h>
 29 | #include <functional>
 30 | 
 31 | template<typename T, size_t Q>
 32 | inline void VSqrt(T val[Q])
 33 | {
 34 | 	for (size_t i = 0; i < Q; i++)
 35 | 		val[i] = sqrt(val[i]);
 36 | }
 37 | 
 38 | #if PSIMD >= 4
 39 | template<>
 40 | inline void VSqrt<float, 4>(float val[4])
 41 | {
 42 | 	__m128 x = _mm_loadu_ps(val);
 43 | 	x = _mm_sqrt_ps(x);
 44 | 	_mm_storeu_ps(val, x);
 45 | }
 46 | #endif
 47 | 
 48 | #if PSIMD >= 8
 49 | template<>
 50 | inline void VSqrt<float, 8>(float val[8])
 51 | {
 52 | 	__m256 x = _mm256_loadu_ps(val);
 53 | 	x = _mm256_sqrt_ps(x);
 54 | 	_mm256_storeu_ps(val, x);
 55 | }
 56 | #endif
 57 | 
 58 | #if PSIMD >= 16
 59 | template<>
 60 | inline void VSqrt<float, 16>(float val[16])
 61 | {
 62 | 	__m512 x = _mm512_loadu_ps(val);
 63 | 	x = _mm512_sqrt_ps(x);
 64 | 	_mm512_storeu_ps(val, x);
 65 | }
 66 | #endif
 67 | 
 68 | template<typename T, size_t N>
 69 | struct vecp;
 70 | 
 71 | template<typename T, size_t Y>
 72 | struct vec3soa;
 73 | 
 74 | template<typename T, size_t X, size_t Y>
 75 | struct vecSoa;
 76 | 
 77 | template<size_t X, size_t Y>
 78 | struct matrix;
 79 | 
 80 | template<typename T, size_t N>
 81 | struct vecb_accessor
 82 | {
 83 | 	T x, y, z, w;
 84 | };
 85 | 
 86 | template<typename T>
 87 | struct vecb_accessor<T, 1>
 88 | {
 89 | 	T x;
 90 | };
 91 | 
 92 | template<typename T>
 93 | struct vecb_accessor<T, 2>
 94 | {
 95 | 	T x, y;
 96 | };
 97 | 
 98 | template<typename T>
 99 | struct vecb_accessor<T, 3>
100 | {
101 | 	T x, y, z;
102 | };
103 | 
104 | template<typename T, size_t N>
105 | struct vecb
106 | {
107 | 	using value_type = T;
108 | 	static constexpr size_t Yt = N;
109 | 	T v[N];
110 | 
111 | 	DEFINE_VEC_OPS(vecb);
112 | 
113 | #define VEC_TYPE vecb
114 | #include "vec_funcs.h"
115 | 
116 | 	constexpr bool operator==(const vecb& o)
117 | 	{
118 | 		for (size_t i = 0; i < N; i++)
119 | 			if (v[i] != o.v[i])
120 | 				return false;
121 | 		return true;
122 | 	}
123 | 
124 | 	constexpr bool operator!=(const vecb& o)
125 | 	{
126 | 		return !operator==(o);
127 | 	}
128 | 
129 | 	constexpr T& operator[](size_t idx)
130 | 	{
131 | 		return v[idx];
132 | 	}
133 | 
134 | 	constexpr const T& operator[](size_t idx) const
135 | 	{
136 | 		return v[idx];
137 | 	}
138 | 
139 | 	inline vecb_accessor<T, N>* operator->()
140 | 	{
141 | 		return (vecb_accessor<T, N>*)v;
142 | 	}
143 | 
144 | 	inline const vecb_accessor<T, N>* operator->() const
145 | 	{
146 | 		return (const vecb_accessor<T, N>*)v;
147 | 	}
148 | 
149 | 	template<size_t B>
150 | 	constexpr operator vecp<T, B>() const
151 | 	{
152 | 		constexpr size_t mv = B < 4 ? B : 4;
153 | 		vecp<T, B> vec = {};
154 | 		for (size_t i = 0; i < mv; i++)
155 | 			vec[i] = v[i];
156 | 		return vec;
157 | 	}
158 | 
159 | 	template<size_t B>
160 | 	constexpr operator vec3soa<T, B>() const
161 | 	{
162 | 		vec3soa<T, B> ret = {};
163 | 		for (size_t i = 0; i < 3; i++)
164 | 			for (size_t o = 0; o < B; o++)
165 | 				ret[i][o] = v[i];
166 | 		return ret;
167 | 	}
168 | 
169 | };
170 | 
171 | template<typename T, size_t N>
172 | struct vecp
173 | {
174 | 	using value_type = T;
175 | 	static constexpr size_t Yt = N;
176 | 
177 | 	union {
178 | 		struct {
179 | 			float x, y, z, w;
180 | 		};
181 | 		T v[4];
182 | 	};
183 | 
184 | 	DEFINE_VEC_OPS(vecp);
185 | 
186 | #define VEC_TYPE vecp
187 | #include "vec_funcs.h"
188 | 
189 | 
190 | 	constexpr bool operator==(const vecp& o)
191 | 	{
192 | 		for (size_t i = 0; i < N; i++)
193 | 			if (v[i] != o.v[i])
194 | 				return false;
195 | 		return true;
196 | 	}
197 | 
198 | 	constexpr bool operator!=(const vecp& o)
199 | 	{
200 | 		return !operator==(o);
201 | 	}
202 | 
203 | 	constexpr T& operator[](size_t idx)
204 | 	{
205 | 		return v[idx];
206 | 	}
207 | 
208 | 	constexpr const T& operator[](size_t idx) const
209 | 	{
210 | 		return v[idx];
211 | 	}
212 | 
213 | 	template<size_t B>
214 | 	constexpr auto& operator=(const vecb<float, B>& vec)
215 | 	{
216 | 		constexpr size_t mv = B < 4 ? B : 4;
217 | 		for (size_t i = 0; i < mv; i++)
218 | 			v[i] = vec[i];
219 | 		return *this;
220 | 	}
221 | 
222 | 	template<size_t B>
223 | 	constexpr operator vecb<T, B>() const
224 | 	{
225 | 		constexpr size_t mv = B < 4 ? B : 4;
226 | 		vecb<T, B> vec = {};
227 | 		for (size_t i = 0; i < mv; i++)
228 | 			vec[i] = v[i];
229 | 		return vec;
230 | 	}
231 | 
232 | 	template<size_t B>
233 | 	constexpr operator vec3soa<T, B>() const
234 | 	{
235 | 		vec3soa<T, B> ret = {};
236 | 		for (size_t i = 0; i < 3; i++)
237 | 			for (size_t o = 0; o < B; o++)
238 | 				ret[i][o] = v[i];
239 | 		return ret;
240 | 	}
241 | };
242 | 
243 | template<typename T, size_t Y>
244 | struct vec3soa
245 | {
246 | 	using value_type = T;
247 | 	static constexpr size_t X = 3;
248 | 	static constexpr size_t Xt = X;
249 | 	static constexpr size_t Yt = Y;
250 | 	union {
251 | 		struct {
252 | 			T x[Y];
253 | 			T y[Y];
254 | 			T z[Y];
255 | 		};
256 | 		T v[X][Y];
257 | 		DEFINE_SOA_ACCESSOR;
258 | 	};
259 | 
260 | 	DEFINE_SOA_OPS(vec3soa);
261 | 	DEFINE_SOA_VEC_OPS(vec3soa);
262 | 
263 | #define SOA_TYPE vec3soa
264 | #include "vecsoa_funcs.h"
265 | 
266 | 	constexpr bool operator==(const vec3soa& ov) const
267 | 	{
268 | 		for (size_t i = 0; i < X; i++)
269 | 			for (size_t o = 0; o < Y; o++)
270 | 				if (v[i][o] != ov.v[i][o])
271 | 					return false;
272 | 		return true;
273 | 	}
274 | 
275 | 	constexpr bool operator!=(const vec3soa& o) const
276 | 	{
277 | 		return !operator==(o);
278 | 	}
279 | 
280 | 	inline void ToAngles()
281 | 	{
282 | 		T y[Y], x[Y], len[Y];
283 | 		for (size_t o = 0; o < Y; o++)
284 | 			y[o] = atan2(v[1][o], v[0][o]);
285 | 
286 | 		Length<2>(len);
287 | 
288 | 		for (size_t o = 0; o < Y; o++)
289 | 			x[o] = atan2(-v[1][o], len[o]);
290 | 
291 | 		for (size_t o = 0; o < Y; o++)
292 | 			v[0][o] = x[o];
293 | 
294 | 		for (size_t o = 0; o < Y; o++)
295 | 			v[1][o] = y[o];
296 | 
297 | 		for (size_t o = 0; o < Y; o++)
298 | 			v[2][o] = 0;
299 | 
300 | 	}
301 | 
302 | 	constexpr auto GetAngles()
303 | 	{
304 | 		auto ret = *this;
305 | 		ret.ToAngles();
306 | 		return ret;
307 | 	}
308 | 
309 | 	constexpr T* operator[](size_t idx)
310 | 	{
311 | 		return v[idx];
312 | 	}
313 | 
314 | 	constexpr const T* operator[](size_t idx) const
315 | 	{
316 | 		return v[idx];
317 | 	}
318 | 
319 | };
320 | 
321 | template<typename T, size_t X, size_t Y>
322 | struct vecSoa
323 | {
324 | 	using value_type = T;
325 | 	static constexpr size_t Xt = X;
326 | 	static constexpr size_t Yt = Y;
327 | 	union
328 | 	{
329 | 		T v[X][Y];
330 | 		DEFINE_SOA_ACCESSOR;
331 | 	};
332 | 
333 | 	DEFINE_SOA_OPS(vecSoa);
334 | 	DEFINE_SOA_VEC_OPS(vecSoa);
335 | 
336 | #define SOA_TYPE vecSoa
337 | #include "vecsoa_funcs.h"
338 | 
339 | 	constexpr bool operator==(const vecSoa& ov) const
340 | 	{
341 | 		for (size_t i = 0; i < X; i++)
342 | 			for (size_t o = 0; o < Y; o++)
343 | 				if (v[i][o] != ov.v[i][o])
344 | 					return false;
345 | 		return true;
346 | 	}
347 | 
348 | 	constexpr bool operator!=(const vecSoa& o) const
349 | 	{
350 | 		return !operator==(o);
351 | 	}
352 | 
353 | 	constexpr T* operator[](size_t idx)
354 | 	{
355 | 		return v[idx];
356 | 	}
357 | 
358 | 	constexpr const T* operator[](size_t idx) const
359 | 	{
360 | 		return v[idx];
361 | 	}
362 | 
363 | 	template<size_t B>
364 | 	constexpr operator vec3soa<T, B>()
365 | 	{
366 | 		constexpr int mv = X < 3 ? X : 3;
367 | 		constexpr int mb = Y < B ? Y : B;
368 | 		vec3soa<T, B> ret = {};
369 | 		for (size_t i = 0; i < mv; i++)
370 | 			for (size_t o = 0; o < mb; o++)
371 | 				ret[i][o] = v[i][o];
372 | 		return ret;
373 | 	}
374 | };
375 | 
376 | template<size_t N>
377 | using vec = vecb<float, N>;
378 | using vec2 = vec<2>;
379 | using vec3 = vec<3>;
380 | using vec4 = vec<4>;
381 | 
382 | using vec3_t = vecp<float, 3>;
383 | using vec4_t = vecp<float, 4>;
384 | 
385 | using xvec3 = vec3soa<float, 4>;
386 | using yvec3 = vec3soa<float, 8>;
387 | using zvec3 = vec3soa<float, 16>;
388 | using nvec3 = vec3soa<float, SIMD_COUNT>;
389 | template<size_t Y>
390 | using svec3 = vec3soa<float, Y>;
391 | 
392 | template<size_t X>
393 | using xvec = vecSoa<float, X, 4>;
394 | template<size_t X>
395 | using yvec = vecSoa<float, X, 8>;
396 | template<size_t X>
397 | using zvec = vecSoa<float, X, 16>;
398 | template<size_t X>
399 | using nvec = vecSoa<float, X, SIMD_COUNT>;
400 | 
401 | static_assert(std::is_pod<xvec<3>>::value);
402 | 
403 | template <size_t N>
404 | using veci = vecb<int, N>;
405 | 
406 | #include "matrix.h"
407 | 
408 | #endif
409 | 


--------------------------------------------------------------------------------
/math/vector_operators.h:
--------------------------------------------------------------------------------
  1 | #ifndef VECTOR_OPERATORS_H
  2 | 
  3 | #define VEC_OP(type, OP)									\
  4 | 	template<template <typename F, size_t N2> class otherType>	\
  5 | 	friend constexpr auto operator OP(type v, const otherType<T, N>& ov) \
  6 | 	{														\
  7 | 		for (size_t o = 0; o < N; o++)						\
  8 | 			v.v[o] = v.v[o] OP ov.v[o];						\
  9 | 		return v;											\
 10 | 	}														\
 11 | 															\
 12 | 	friend constexpr auto operator OP(type v, const T& ov)	\
 13 | 	{														\
 14 | 		size_t o = 0;										\
 15 | 		for (o = 0; o < N; o++)								\
 16 | 			v.v[o] = v.v[o] OP ov;							\
 17 | 		return v;											\
 18 | 	}														\
 19 | 															\
 20 | 	friend constexpr auto operator OP(type v, const T* ov)	\
 21 | 	{														\
 22 | 		for (size_t o = 0; o < N; o++)						\
 23 | 			v.v[o] = v.v[o] OP ov[o];						\
 24 | 		return v;											\
 25 | 	}														\
 26 | 															\
 27 | 	template<template <typename F, size_t N2> class otherType>	\
 28 | 	constexpr auto& operator OP##=(const otherType<T, N>& ov)	\
 29 | 	{														\
 30 | 		for (size_t o = 0; o < N; o++)						\
 31 | 			v[o] OP##= ov.v[o];								\
 32 | 		return *this;										\
 33 | 	}														\
 34 | 															\
 35 | 	constexpr auto& operator OP##=(const T& ov)				\
 36 | 	{														\
 37 | 		for (size_t o = 0; o < N; o++)						\
 38 | 			v[o] OP##= ov;									\
 39 | 		return *this;										\
 40 | 	}														\
 41 | 															\
 42 | 	constexpr auto& operator OP##=(const T* ov)				\
 43 | 	{														\
 44 | 		for (size_t o = 0; o < N; o++)						\
 45 | 			v[o] OP##= ov[o];								\
 46 | 		return *this;										\
 47 | 	}
 48 | 
 49 | #define SOA_OP(type, OP)								\
 50 | 	friend constexpr auto operator OP(type v, const type& ov)	\
 51 | 	{													\
 52 | 		for (size_t i = 0; i < X; i++)					\
 53 | 			for (size_t o = 0; o < Y; o++)				\
 54 | 				v.v[i][o] = v.v[i][o] OP ov.v[i][o];	\
 55 | 		return v;										\
 56 | 	}													\
 57 | 														\
 58 | 	friend constexpr auto operator OP(type v, const T& ov)	\
 59 | 	{													\
 60 | 		for (size_t i = 0; i < X; i++)					\
 61 | 			for (size_t o = 0; o < Y; o++)				\
 62 | 				v.v[i][o] = v.v[i][o] OP ov;			\
 63 | 		return v;										\
 64 | 	}													\
 65 | 														\
 66 | 	friend constexpr auto operator OP(type v, const T* ov)	\
 67 | 	{													\
 68 | 		for (size_t i = 0; i < X; i++)					\
 69 | 			for (size_t o = 0; o < Y; o++)				\
 70 | 				v.v[i][o] = v.v[i][o] OP ov[o];			\
 71 | 		return v;										\
 72 | 	}													\
 73 | 														\
 74 | 	constexpr auto& operator OP##=(const type& ov)		\
 75 | 	{													\
 76 | 		for (size_t i = 0; i < X; i++)					\
 77 | 			for (size_t o = 0; o < Y; o++)				\
 78 | 				v[i][o] OP##= ov.v[i][o];				\
 79 | 		return *this;									\
 80 | 	}													\
 81 | 														\
 82 | 	constexpr auto& operator OP##=(const T& ov)			\
 83 | 	{													\
 84 | 		for (size_t i = 0; i < X; i++)					\
 85 | 			for (size_t o = 0; o < Y; o++)				\
 86 | 				v[i][o] OP##= ov;						\
 87 | 		return *this;									\
 88 | 	}													\
 89 | 														\
 90 | 	constexpr auto& operator OP##=(const T* ov)			\
 91 | 	{													\
 92 | 		for (size_t i = 0; i < X; i++)					\
 93 | 			for (size_t o = 0; o < Y; o++)				\
 94 | 				v[i][o] OP##= ov[o];					\
 95 | 		return *this;									\
 96 | 	}
 97 | 
 98 | 
 99 | 
100 | #define SOA_VEC_OP(mainType, OP)						\
101 | 	template<template <typename F, size_t Y2> class type>	\
102 | 	friend constexpr auto operator OP(mainType v, const type<T, Y>& ov) \
103 | 	{													\
104 | 		for (size_t i = 0; i < X; i++)					\
105 | 			for (size_t o = 0; o < Y; o++)				\
106 | 				v.v[i][o] = v.v[i][o] OP ov[o];			\
107 | 		return v;										\
108 | 	}													\
109 | 														\
110 | 	template<template <typename F, size_t Y2> class type>	\
111 | 	constexpr auto& operator OP##=(const type<T, Y>& ov)	\
112 | 	{													\
113 | 		for (size_t i = 0; i < X; i++)					\
114 | 			for (size_t o = 0; o < Y; o++)				\
115 | 				v[i][o] OP##= ov[o];					\
116 | 		return *this;									\
117 | 	}
118 | 
119 | 
120 | #define WIDE_OP(type, OP)							\
121 | 	friend constexpr auto operator OP(type v, const type& ov)	\
122 | 	{													\
123 | 		for (size_t i = 0; i < Y; i++)					\
124 | 			for (size_t o = 0; o < X; o++)				\
125 | 				v[i][o] = v[i][o] OP ov[i][o];			\
126 | 		return v;										\
127 | 	}													\
128 | 														\
129 | 	friend constexpr auto operator OP(type v, const T& ov)	\
130 | 	{													\
131 | 		for (size_t i = 0; i < Y; i++)					\
132 | 			for (size_t o = 0; o < X; o++)				\
133 | 				v[i][o] = v[i][o] OP ov;				\
134 | 		return v;										\
135 | 	}													\
136 | 														\
137 | 	friend constexpr auto operator OP(type v, const T* ov)	\
138 | 	{													\
139 | 		for (size_t i = 0; i < Y; i++)					\
140 | 			for (size_t o = 0; o < X; o++)				\
141 | 				v[i][o] = v[i][o] OP ov[o];				\
142 | 		return v;										\
143 | 	}													\
144 | 														\
145 | 	constexpr auto& operator OP##=(const type& ov)		\
146 | 	{													\
147 | 		for (size_t i = 0; i < Y; i++)					\
148 | 			for (size_t o = 0; o < X; o++)				\
149 | 				w[i][o] OP##= ov[i][o];					\
150 | 		return *this;									\
151 | 	}													\
152 | 														\
153 | 	constexpr auto& operator OP##=(const T& ov)			\
154 | 	{													\
155 | 		for (size_t i = 0; i < Y; i++)					\
156 | 			for (size_t o = 0; o < X; o++)				\
157 | 				w[i][o] OP##= ov;						\
158 | 		return *this;									\
159 | 	}													\
160 | 														\
161 | 	constexpr auto& operator OP##=(const T* ov)			\
162 | 	{													\
163 | 		for (size_t i = 0; i < Y; i++)					\
164 | 			for (size_t o = 0; o < X; o++)				\
165 | 				w[i][o] OP##= ov[o];					\
166 | 		return *this;									\
167 | 	}
168 | 
169 | 
170 | #define DEFINE_WIDE_OPS(type)					\
171 | 	WIDE_OP(type, +);							\
172 | 	WIDE_OP(type, -);							\
173 | 	WIDE_OP(type, *);							\
174 | 	WIDE_OP(type, /);							\
175 | 												\
176 | 	constexpr auto& operator =(const T& ov)		\
177 | 	{											\
178 | 		for (size_t i = 0; i < Y; i++)			\
179 | 			for (size_t o = 0; o < X; o++)		\
180 | 				w[i][o] = ov;					\
181 | 		return *this;							\
182 | 	}											\
183 | 												\
184 | 	constexpr auto& operator =(const T* ov)		\
185 | 	{											\
186 | 		for (size_t i = 0; i < Y; i++)			\
187 | 			for (size_t o = 0; o < X; o++)		\
188 | 				w[i][o] = ov[o];				\
189 | 		return *this;							\
190 | 	}
191 | 
192 | 
193 | #define DEFINE_SOA_OPS(type)					\
194 | 	SOA_OP(type, +);							\
195 | 	SOA_OP(type, -);							\
196 | 	SOA_OP(type, *);							\
197 | 	SOA_OP(type, /);							\
198 | 												\
199 | 	constexpr auto& operator =(const T& ov)		\
200 | 	{											\
201 | 		for (size_t i = 0; i < X; i++)			\
202 | 			for (size_t o = 0; o < Y; o++)		\
203 | 				v[i][o] = ov;					\
204 | 		return *this;							\
205 | 	}											\
206 | 												\
207 | 	constexpr auto& operator =(const T* ov)		\
208 | 	{											\
209 | 		for (size_t i = 0; i < X; i++)			\
210 | 			for (size_t o = 0; o < Y; o++)		\
211 | 				v[i][o] = ov[o];				\
212 | 		return *this;							\
213 | 	}
214 | 
215 | #define DEFINE_SOA_VEC_OPS(mainType)			\
216 | 	SOA_VEC_OP(mainType, +);								\
217 | 	SOA_VEC_OP(mainType, -);								\
218 | 	SOA_VEC_OP(mainType, *);								\
219 | 	SOA_VEC_OP(mainType, /);								\
220 | 												\
221 | 	template<template <typename F, size_t Y2> class type>	\
222 | 	constexpr auto& operator =(const type<T, Y>& ov)	\
223 | 	{											\
224 | 		for (size_t i = 0; i < X; i++)			\
225 | 			for (size_t o = 0; o < Y; o++)		\
226 | 				v[i][o] = ov[o];				\
227 | 		return *this;							\
228 | 	}											\
229 | 															\
230 | 	template<template <typename F, size_t Y2> class type>	\
231 | 	constexpr auto& operator =(const volatile type<T, Y>& ov)	\
232 | 	{														\
233 | 		for (size_t i = 0; i < X; i++)						\
234 | 			for (size_t o = 0; o < Y; o++)					\
235 | 				v[i][o] = ov[o];							\
236 | 		return *this;										\
237 | 	}														\
238 | 
239 | 
240 | 
241 | #define DEFINE_VEC_OPS(type)					\
242 | 	VEC_OP(type, +);							\
243 | 	VEC_OP(type, -);							\
244 | 	VEC_OP(type, *);							\
245 | 	VEC_OP(type, /);							\
246 | 												\
247 | 	constexpr auto& operator =(const T& ov)		\
248 | 	{											\
249 | 		for (size_t o = 0; o < N; o++)			\
250 | 			v[o] = ov;							\
251 | 		return *this;							\
252 | 	}											\
253 | 												\
254 | 	constexpr auto& operator =(const T* ov)		\
255 | 	{											\
256 | 		for (size_t o = 0; o < N; o++)			\
257 | 			v[o] = ov[o];						\
258 | 		return *this;							\
259 | 	}
260 | 
261 | 
262 | 
263 | #endif
264 | 


--------------------------------------------------------------------------------
/meson.build:
--------------------------------------------------------------------------------
  1 | project('m0dular-framework', 'cpp', 'c', default_options : ['c_std=c11', 'cpp_std=c++17', 'b_ndebug=if-release'])
  2 | 
  3 | #compile_args = ['-DNUM_THREADS=2', '-Werror', '-Wno-missing-braces']
  4 | compile_args = ['-D_POSIX_C_SOURCE=200809L', '-DNUM_THREADS=8', '-Werror', '-Wno-missing-braces', '-Wno-format', '-Wno-unused-command-line-argument', '-Wno-comment']
  5 | linker_args = compile_args
  6 | 
  7 | if not get_option('buildtype').startswith('debug')
  8 | 	compile_args += ['-O2', '-fno-stack-protector', '-msse4.2']
  9 | 	linker_args += ['-flto', '-O0']
 10 | else
 11 |   #compile_args += ['-g', '-O0', '-fsanitize=address']
 12 |   #linker_args += ['-lasan']
 13 | endif
 14 | 
 15 | includes = []
 16 | libraries = []
 17 | deps = []
 18 | 
 19 | cc = meson.get_compiler('cpp')
 20 | 
 21 | if meson.is_cross_build()
 22 | 	add_args = []
 23 |   #Cross compile support, assuming all the windows build files have been extracted from a visual studio installation ant placed inside a single directory.
 24 | 	if host_machine.system() == 'windows'
 25 | 		cpu_family = host_machine.cpu_family()
 26 | 		if build_machine.system() != 'windows'
 27 | 			linker_args += '-fuse-ld=lld'
 28 | 			add_args = ['-target', 'i386-pc-windows-msvc19.14.26428', '-Wno-msvc-not-found', '-D_CRT_SECURE_NO_WARNINGS', '-D_WINSOCK_DEPRECATED_NO_WARNINGS', '-fms-extensions', '-fms-compatibility', '-fms-compatibility-version=19.14.26428', '-Wno-expansion-to-defined', '-Wno-nonportable-include-path', '-Wno-pragma-pack', '-Wno-ignored-attributes', '-Wno-ignored-pragma-intrinsic', '-Wno-int-to-void-pointer-cast', '-fuse-ld=lld', '-Wno-non-virtual-dtor', '-Wno-unused-local-typedef', '-Wno-unknown-pragmas']
 29 | 
 30 | 			bf_dir = get_option('msvc_dir')
 31 | 			#includes += join_paths(bf_dir, 'include/clang')
 32 | 			includes += join_paths(bf_dir, 'include/msvc')
 33 | 			includes += join_paths(bf_dir, 'include/ucrt')
 34 | 			includes += join_paths(bf_dir, 'include/um')
 35 | 			includes += join_paths(bf_dir, 'include/shared')
 36 | 			includes += join_paths(bf_dir, 'include/winrt')
 37 | 			includes += join_paths(bf_dir, 'include/boost')
 38 | 			libraries += join_paths(bf_dir, 'lib/clang')
 39 | 			libraries += join_paths(bf_dir, join_paths('lib/msvc', cpu_family))
 40 | 			libraries += join_paths(bf_dir, join_paths('lib/ucrt', cpu_family))
 41 | 			libraries += join_paths(bf_dir, join_paths('lib/um', cpu_family))
 42 | 			libraries += join_paths(bf_dir, join_paths('lib/shared', cpu_family))
 43 | 			libraries += join_paths(bf_dir, join_paths('lib/winrt', cpu_family))
 44 | 		else
 45 | 			add_args = []
 46 | 			linker_args += '-Wl,-debug:full'
 47 | 		endif
 48 | 		linker_args += '-Bdynamic'
 49 | 		linker_args += '-lkernel32'
 50 | 		linker_args += '-lmsvcrt'
 51 | 	elif host_machine.system() == 'linux'
 52 | 		linker_args += '-fuse-ld=lld'
 53 |     linker_args += '-lrt'
 54 | 		add_args = ['-target', 'x86_64-linux', '-flto']
 55 | 	endif
 56 | 	foreach p :  add_args
 57 | 		compile_args += p
 58 | 		linker_args += p
 59 | 	endforeach
 60 | endif
 61 | 
 62 | if host_machine.system() == 'linux'
 63 |   linker_args += ['-ldl', '-lrt']
 64 | endif
 65 | 
 66 | foreach p : libraries
 67 | 	linker_args += '-L'+p
 68 | endforeach
 69 | 
 70 | foreach p : includes
 71 | 	compile_args += '-I'+p
 72 | endforeach
 73 | 
 74 | #message(compile_args)
 75 | #message(linker_args)
 76 | #message(libraries)
 77 | #message(includes)
 78 | 
 79 | thread_dep = dependency('threads')
 80 | 
 81 | smtx = executable('shared_mutex_test', files(['tests/shared_mutex.cpp', 'utils/threading.cpp', 'utils/atomic_lock.cpp', 'utils/shared_mutex.cpp', 'utils/semaphores.cpp', 'utils/mutex.cpp']), cpp_args : compile_args, link_args : linker_args, dependencies : thread_dep)
 82 | mtx = executable('mutex_test', files(['tests/mutex.cpp', 'utils/threading.cpp', 'utils/atomic_lock.cpp', 'utils/mutex.cpp', 'utils/semaphores.cpp']), cpp_args : compile_args, link_args : linker_args, dependencies : thread_dep)
 83 | tpool = executable('thread_pool_test', files(['tests/thread_pool.cpp', 'utils/atomic_lock.cpp', 'utils/threading.cpp', 'utils/packed_heap.cpp', 'utils/mutex.cpp', 'utils/semaphores.cpp', 'submodules/minitrace/minitrace.c']), cpp_args : compile_args + ['-DMTR_ENABLED'], c_args : compile_args + ['-DMTR_ENABLED'], link_args : linker_args, dependencies : thread_dep)
 84 | crc = executable('crc_test', files(['tests/crc.cpp']), cpp_args : compile_args, link_args : linker_args, dependencies : thread_dep)
 85 | #code_crypt = executable('code_crypt_test', files(['tests/codecrypt.cpp', 'utils/codecrypt.cpp', 'utils/threading.cpp', 'utils/atomic_lock.cpp', 'utils/mutex.cpp', 'utils/shared_mutex.cpp', 'utils/semaphores.cpp', 'utils/packed_heap.cpp']), cpp_args : compile_args, link_args : linker_args, dependencies : thread_dep)
 86 | settings = executable('settings_test', files(['tests/settings.cpp', 'utils/packed_heap.cpp']), cpp_args : compile_args, link_args : linker_args, dependencies : thread_dep)
 87 | allocator = executable('allocator_test', files(['tests/allocator.cpp', 'utils/packed_heap.cpp']), cpp_args : compile_args + ['-DPACKED_HEAP_DEBUG', '-DPACKED_HEAP_MERGE_SECTIONS=0'], link_args : linker_args, dependencies : thread_dep)
 88 | shmem_allocator = executable('shmem_allocator_test', files(['tests/shmemalloc.cpp', 'utils/named_semaphores.cpp']), cpp_args : compile_args + ['-DOFFSET_POINTER_DEBUG'], link_args : linker_args, dependencies : thread_dep)
 89 | kd_tree = executable('kd_tree_test', files(['tests/kd_tree.cpp', 'utils/packed_heap.cpp']), cpp_args : compile_args + ['-DPACKED_HEAP_DEBUG', '-DPACKED_HEAP_MERGE_SECTIONS=0', '-DOFFSET_POINTER_DEBUG'], link_args : linker_args, dependencies : thread_dep)
 90 | intersect = executable('intersect_test', files(['tests/intersect.cpp', 'utils/intersect.cpp', 'utils/intersect_box.cpp']), cpp_args : compile_args + ['-DPACKED_HEAP_DEBUG', '-DPACKED_HEAP_MERGE_SECTIONS=0', '-DOFFSET_POINTER_DEBUG'], link_args : linker_args, dependencies : thread_dep)
 91 | 
 92 | 
 93 | test('shared mutex', smtx)
 94 | test('mutex', mtx)
 95 | test('thread_pool', tpool)
 96 | test('crc', crc)
 97 | #test('code_crypt', code_crypt)
 98 | test('settings', settings)
 99 | test('allocator', allocator)
100 | test('kd_tree', kd_tree)
101 | #currently no automated test available
102 | #test('shmem_allocator', shmem_allocator)
103 | 
104 | foreach p : ['no-sse2', 'sse2', 'sse4.2', 'avx', 'avx2', 'avx512f']
105 |         vectors = executable('vector_test_'+p, files(['tests/vector.cpp']), cpp_args : compile_args + ['-m'+p], link_args : linker_args, dependencies : thread_dep)
106 |         test('vectors_'+p, vectors)
107 | endforeach
108 | 


--------------------------------------------------------------------------------
/meson_options.txt:
--------------------------------------------------------------------------------
1 | option('msvc_dir', type: 'string', value : '', description : 'The root directory for all msvc includes and libraries')
2 | 


--------------------------------------------------------------------------------
/players.h:
--------------------------------------------------------------------------------
  1 | #ifndef PLAYERS_H
  2 | #define PLAYERS_H
  3 | 
  4 | /*
  5 |  * Here is the basic data for the players.
  6 |  * It is used by the various features of the framework.
  7 |  * Game specific functions are needed to be implemented
  8 |  * to fill in the data.
  9 | */
 10 | 
 11 | #include "math/mmath.h"
 12 | #include "utils/intersect.h"
 13 | #include "utils/shared_utils.h"
 14 | #include <string.h>
 15 | 
 16 | constexpr int PLAYER_CHUNKS = NumOfSIMD(MAX_PLAYERS);
 17 | const int NAME_LEN = 32;
 18 | const int MAX_HITBOXES = 16;
 19 | constexpr int HITBOX_CHUNKS = NumOfSIMD(MAX_HITBOXES);
 20 | 
 21 | #ifndef MULTIPOINT_COUNT
 22 | constexpr size_t MULTIPOINT_COUNT = 8;
 23 | #endif
 24 | using mvec3 = vec3soa<float, MULTIPOINT_COUNT>;
 25 | 
 26 | /*
 27 |   UPDATED is set when EXISTS is set and something was updated
 28 |   HITBOXES_UPDATED is set when all the hitbox data was updated (so that aimbot data is correct)
 29 |   Other flags are self-explanatory
 30 | */
 31 | 
 32 | enum Flags
 33 | {
 34 | 	EXISTS = (1 << 0),
 35 | 	UPDATED = (1 << 1),
 36 | 	ONGROUND = (1 << 2),
 37 | 	DUCKING = (1 << 3),
 38 | 	HITBOXES_UPDATED = (1 << 4),
 39 | 	FRIENDLY = (1 << 5)
 40 | };
 41 | 
 42 | enum Keys
 43 | {
 44 | 	ATTACK1 = (1 << 0),
 45 | 	ATTACK2 = (1 << 1),
 46 | 	JUMP = (1 << 2)
 47 | };
 48 | 
 49 | struct alignas(SIMD_COUNT * 4)
 50 | HitboxList
 51 | {
 52 | 	matrix<3,4> wm[MAX_HITBOXES];
 53 | 
 54 | 	vec3_t start[MAX_HITBOXES];
 55 | 	vec3_t end[MAX_HITBOXES];
 56 | 
 57 | 	float damageMul[MAX_HITBOXES];
 58 | 	float radius[MAX_HITBOXES];
 59 | 
 60 | 	mvec3 mpOffset[MAX_HITBOXES];
 61 | 	mvec3 mpDir[MAX_HITBOXES];
 62 | };
 63 | 
 64 | /*
 65 |   All player data is sorted in some fashion.
 66 |   To access the player by its internal ID, use the sortIDs member
 67 |   Player instance should be kept externally as it may lead to invalid pointers on history worlds
 68 | */
 69 | 
 70 | struct Players
 71 | {
 72 | 	vec3_t* boundsStart;
 73 | 	vec3_t* boundsEnd;
 74 | 	vec3_t* origin;
 75 | 	vec3_t* eyePos;
 76 | 	vec3_t* velocity;
 77 | 	CapsuleColliderSOA<SIMD_COUNT> (*colliders)[NumOfSIMD(MAX_HITBOXES)];
 78 | 	HitboxList* hitboxes;
 79 | 	int* flags;
 80 | 	int* health;
 81 | 	int* armor;
 82 | 	float* time;
 83 | 	char (*name)[NAME_LEN];
 84 | 	float* fov;
 85 | 	matrix<3,4> (*bones)[MAX_BONES];
 86 | 	//Used for sorting the player
 87 | 	int sortIDs[MAX_PLAYERS];
 88 | 	int unsortIDs[MAX_PLAYERS];
 89 | 	int count;
 90 | 	float globalTime;
 91 | 
 92 | 	static constexpr size_t sizePerPlayer = sizeof(boundsStart[0]) + sizeof(boundsEnd[0]) + sizeof(origin[0]) + sizeof(eyePos[0]) + sizeof(velocity[0]) + sizeof(colliders[0]) + sizeof(hitboxes[0]) + sizeof(flags[0]) + sizeof(health[0]) + sizeof(armor[0]) + sizeof(time[0]) + sizeof(name[0]) + sizeof(fov[0]) + sizeof(bones[0]);
 93 | 	static constexpr size_t extraAlignmentNeeds = alignof(vec3_t) * 5 + alignof(decltype(colliders[0])) + alignof(decltype(hitboxes[0])) + alignof(void*) + alignof(int) * 3 + alignof(float) + alignof(char*) + alignof(float) + alignof(decltype(bones));
 94 | 
 95 | 	const auto& operator=(Players& o)
 96 | 	{
 97 | 		memcpy(this, &o, sizeof(Players));
 98 | 		return *this;
 99 | 	}
100 | 
101 | 	int Resort(const Players& target, int id)
102 | 	{
103 | 		int uid = unsortIDs[id];
104 | 		if (uid >= 0 && uid < MAX_PLAYERS) {
105 | 			int sid = target.sortIDs[uid];
106 | 			if (sid >= 0 && sid < MAX_PLAYERS && sid < target.count)
107 | 				return sid;
108 | 		}
109 | 		return MAX_PLAYERS;
110 | 	}
111 | 
112 | 	void FreeAll()
113 | 	{
114 | 		if (count && boundsStart) {
115 | 			free((void*)boundsStart);
116 | 		}
117 | 
118 | 		memset(this, 0, sizeof(*this));
119 | 		memset(sortIDs, -1, sizeof(sortIDs));
120 | 		memset(unsortIDs, -1, sizeof(sortIDs));
121 | 	}
122 | 
123 | 	void Allocate(int cnt)
124 | 	{
125 | 		FreeAll();
126 | 		count = cnt;
127 | 
128 | 		void* data = malloc(sizePerPlayer * count + extraAlignmentNeeds);
129 | 
130 | 		//We have to align some of the data
131 | 		boundsStart = (vec3_t*)data;
132 | 		boundsEnd = AlignUp(boundsStart + count);
133 | 		origin = AlignUp(boundsEnd + count);
134 | 		eyePos = AlignUp(origin + count);
135 | 		velocity = AlignUp(eyePos + count);
136 | 		colliders = AlignUp((decltype(colliders))(velocity + count));
137 | 		hitboxes = AlignUp((decltype(hitboxes))(colliders + count));
138 | 		flags = AlignUp((decltype(flags))(hitboxes + count));
139 | 		health = AlignUp(flags + count);
140 | 		armor = AlignUp(health + count);
141 | 		time = AlignUp((decltype(time))(armor + count));
142 | 		name = AlignUp((decltype(name))(time + count));
143 | 		fov = AlignUp((decltype(fov))(name + count));
144 | 		bones = AlignUp((decltype(bones))(fov + count));
145 | 
146 | 		memset(sortIDs, -1, sizeof(sortIDs));
147 | 		memset(unsortIDs, -1, sizeof(sortIDs));
148 | 	}
149 | 
150 | 	Players(int cnt)
151 | 	{
152 | 		Allocate(cnt);
153 | 	}
154 | 
155 | 	Players()
156 | 	{
157 | 		memset(this, 0, sizeof(*this));
158 | 		memset(sortIDs, -1, sizeof(sortIDs));
159 | 		memset(unsortIDs, -1, sizeof(sortIDs));
160 | 	}
161 | 
162 | 	~Players()
163 | 	{
164 | 		FreeAll();
165 | 	}
166 | };
167 | 
168 | struct alignas(SIMD_COUNT * 4)
169 | LocalPlayer
170 | {
171 | 	vec3_t eyePos;
172 | 	vec3_t angles;
173 | 	vec3_t aimOffset;
174 | 	vec3_t origin;
175 | 	vec3_t velocity;
176 | 	float time;
177 | 	int weaponAmmo;
178 | 	float weaponDamage;
179 | 	float weaponPenetration;
180 | 	float weaponArmorPenetration;
181 | 	float weaponRange;
182 | 	float weaponRangeModifier;
183 | 	int keys;
184 | 	int flags;
185 | 	int ID;
186 | };
187 | 
188 | #endif
189 | 


--------------------------------------------------------------------------------
/tests/crc.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "../utils/crc32.h"
 3 | #include <string.h>
 4 | 
 5 | int main()
 6 | {
 7 | 	auto h1 = CCRC32("Lorem ipsum");
 8 | 	auto h2 = "Lorem ipsum"_crc32;
 9 | 
10 | 	if (h1 ^ h2)
11 | 		return 1;
12 | 
13 | 	if (h1 ^ 0xF44BFB59)
14 | 		return 2;
15 | 
16 | 	auto h3 = "Lorem ipsum dolor sit amet, modo probo patrioque eos ne, no porro admodum aliquando pro. Posse pertinax erroribus sed at, sed apeirian ocurreret intellegebat ne, te qui facete quaeque dolorum. Quem dolor sed at, usu nonumes facilisi ne. Postea vocibus luptatum id sed."_crc32;
17 | 
18 | 	if (0xFF9776B0 ^ h3)
19 | 		return 3;
20 | 
21 | 	const char* lipsum_text = "Lorem ipsum dolor sit amet, modo probo patrioque eos ne, no porro admodum aliquando pro. Posse pertinax erroribus sed at, sed apeirian ocurreret intellegebat ne, te qui facete quaeque dolorum. Quem dolor sed at, usu nonumes facilisi ne. Postea vocibus luptatum id sed.";
22 | 
23 | 	auto h4 = Crc32(lipsum_text, strlen(lipsum_text));
24 | 	auto h5 = Crc32(lipsum_text);
25 | 
26 | 	if (0xFF9776B0 ^ h4)
27 | 		return 4;
28 | 
29 | 	if (h4 ^ h5)
30 | 		return 5;
31 | 
32 | 	return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/intersect.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <time.h>
  3 | #include <stdlib.h>
  4 | #include <chrono>
  5 | #include "../utils/intersect_impl.h"
  6 | #include "../utils/intersect_box_impl.h"
  7 | 
  8 | //This is more of a speedtest, not a unit test
  9 | 
 10 | static constexpr long INTERSECTC = 640000;
 11 | static constexpr long SOA_SIZE = SIMD_COUNT;
 12 | static constexpr long SOA_INTERSECTC = INTERSECTC / SOA_SIZE;
 13 | static constexpr long REPEAT_C = 50;
 14 | 
 15 | typedef std::chrono::high_resolution_clock Clock;
 16 | 
 17 | vec3_t rays[INTERSECTC][2];
 18 | svec3<SOA_SIZE> raysSoa[INTERSECTC][2];
 19 | 
 20 | int main()
 21 | {
 22 | 	srand(time(nullptr));
 23 | 
 24 | 	volatile float rad = 1.f;
 25 | 	volatile vec3_t startV(0);
 26 | 	volatile vec3_t endV(10);
 27 | 
 28 | 	vec3_t start = *(vec3_t*)&startV;
 29 | 	vec3_t end = *(vec3_t*)&endV;
 30 | 
 31 | 	volatile svec3<SOA_SIZE> startSoaV(0);
 32 | 	volatile svec3<SOA_SIZE> endSoaV(10);
 33 | 
 34 | 	svec3<SOA_SIZE> startSoa = *(svec3<SOA_SIZE>*)&startSoaV;
 35 | 	svec3<SOA_SIZE> endSoa = *(svec3<SOA_SIZE>*)&endSoaV;
 36 | 
 37 | 	[[maybe_unused]] CapsuleCollider testCollider;
 38 | 	testCollider.start = start;
 39 | 	testCollider.end = end;
 40 | 	testCollider.radius = rad;
 41 | 
 42 | 	[[maybe_unused]] CapsuleColliderSOA<SOA_SIZE> testColliderSOA;
 43 | 	testColliderSOA.start = startSoa;
 44 | 	testColliderSOA.end = endSoa;
 45 | 
 46 | 	AABBCollider testBox(start, end);
 47 | 	AABBColliderSOA testBoxSOA(startSoa, endSoa);
 48 | 
 49 | 	for (long i = 0; i < SOA_SIZE; i++)
 50 | 		testColliderSOA.radius[i] = rad + 0.01f * i;
 51 | 
 52 | 	for (long i = 0; i < INTERSECTC; i++)
 53 | 		for (long o = 0; o < 2; o++)
 54 | 			for (long u = 0; u < 3; u++)
 55 | 				rays[i][o][u] = (rand() % 10000) * (((rand() % 2) * 2) - 1);
 56 | 
 57 | 	for (long i = 0; i < SOA_INTERSECTC; i++)
 58 | 		for (long o = 0; o < 2; o++)
 59 | 			for (long u = 0; u < 3; u++)
 60 | 				for (int p = 0; p < SOA_SIZE; p++)
 61 | 					raysSoa[i][o][u][p] = (rand() % 10000) * (((rand() % 2) * 2) - 1);
 62 | 
 63 | 
 64 | 	{
 65 | 		printf("Intersecting 1ray - 1box... %d times\n", INTERSECTC * REPEAT_C);
 66 | 
 67 | 		auto t1 = Clock::now();
 68 | 
 69 | 		for (volatile long r = 0; r < REPEAT_C; r++)
 70 | 			for (volatile long i = 0; i < INTERSECTC; i++)
 71 | 				[[maybe_unused]] volatile bool res = testBox.Intersect(rays[i][0], rays[i][1]);
 72 | 
 73 | 		auto t2 = Clock::now();
 74 | 
 75 | 		printf("Finished intersecting in %lu.\n", (unsigned long)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count());
 76 | 	}
 77 | 
 78 | 	{
 79 | 		printf("Intersecting %drays - 1capsule... %d times\n", SOA_SIZE, SOA_INTERSECTC * REPEAT_C);
 80 | 
 81 | 		auto t1 = Clock::now();
 82 | 
 83 | 		for (volatile long r = 0; r < REPEAT_C; r++)
 84 | 			for (volatile long i = 0; i < SOA_INTERSECTC; i++)
 85 | 				[[maybe_unused]] volatile auto res = testBox.IntersectSOA(raysSoa[i][0], raysSoa[i][1]);
 86 | 
 87 | 		auto t2 = Clock::now();
 88 | 
 89 | 		printf("Finished intersecting in %lu.\n", (unsigned long)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count());
 90 | 	}
 91 | 
 92 | 	{
 93 | 		printf("Intersecting 1ray - %dboxes... %d times\n", SOA_SIZE, SOA_INTERSECTC * REPEAT_C);
 94 | 
 95 | 		auto t1 = Clock::now();
 96 | 
 97 | 		for (volatile long r = 0; r < REPEAT_C; r++)
 98 | 			for (volatile long i = 0; i < SOA_INTERSECTC; i++)
 99 | 				[[maybe_unused]] volatile auto res = testBoxSOA.Intersect(rays[i][0], rays[i][1]);
100 | 
101 | 		auto t2 = Clock::now();
102 | 
103 | 		printf("Finished intersecting in %lu.\n", (unsigned long)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count());
104 | 	}
105 | 
106 | 	{
107 | 		printf("Intersecting 1rays - 1box (x%d)... %d times\n", SOA_SIZE, SOA_INTERSECTC * REPEAT_C);
108 | 
109 | 		auto t1 = Clock::now();
110 | 
111 | 		for (volatile long r = 0; r < REPEAT_C; r++)
112 | 			for (volatile long i = 0; i < SOA_INTERSECTC; i++)
113 | 				[[maybe_unused]] volatile auto res = testBoxSOA.IntersectSSOA(raysSoa[i][0], raysSoa[i][1]);
114 | 
115 | 		auto t2 = Clock::now();
116 | 
117 | 		printf("Finished intersecting in %lu.\n", (unsigned long)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count());
118 | 	}
119 | 
120 | 	{
121 | 		printf("Intersecting 1ray - 1capsule... %d times\n", INTERSECTC * REPEAT_C);
122 | 
123 | 		auto t1 = Clock::now();
124 | 
125 | 		for (volatile long r = 0; r < REPEAT_C; r++)
126 | 			for (volatile long i = 0; i < INTERSECTC; i++)
127 | 				[[maybe_unused]] volatile bool res = testCollider.Intersect(rays[i][0], rays[i][1]);
128 | 
129 | 		auto t2 = Clock::now();
130 | 
131 | 		printf("Finished intersecting in %lu.\n", (unsigned long)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count());
132 | 	}
133 | 
134 | 	{
135 | 		printf("Intersecting %drays - 1capsule... %d times\n", SOA_SIZE, SOA_INTERSECTC * REPEAT_C);
136 | 
137 | 		auto t1 = Clock::now();
138 | 
139 | 		for (volatile long r = 0; r < REPEAT_C; r++)
140 | 			for (volatile long i = 0; i < SOA_INTERSECTC; i++)
141 | 				[[maybe_unused]] volatile auto res = testCollider.IntersectSOA(raysSoa[i][0], raysSoa[i][1]);
142 | 
143 | 		auto t2 = Clock::now();
144 | 
145 | 		printf("Finished intersecting in %lu.\n", (unsigned long)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count());
146 | 	}
147 | 
148 | 	{
149 | 		printf("Intersecting 1ray - %dcapsules... %d times\n", SOA_SIZE, SOA_INTERSECTC * REPEAT_C);
150 | 
151 | 		auto t1 = Clock::now();
152 | 
153 | 		for (volatile long r = 0; r < REPEAT_C; r++)
154 | 			for (volatile long i = 0; i < SOA_INTERSECTC; i++)
155 | 				[[maybe_unused]] volatile auto res = testColliderSOA.Intersect(rays[i][0], rays[i][1]);
156 | 
157 | 		auto t2 = Clock::now();
158 | 
159 | 		printf("Finished intersecting in %lu.\n", (unsigned long)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count());
160 | 	}
161 | 
162 | 	return 0;
163 | }
164 | 


--------------------------------------------------------------------------------
/tests/kd_tree.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <math.h>
  3 | #include <vector>
  4 | #include <stdlib.h>
  5 | #include <time.h>
  6 | #include "../utils/kd_tree.h"
  7 | #include "../utils/freelistallocator.h"
  8 | #include "../utils/allocwraps.h"
  9 | #include <algorithm>
 10 | 
 11 | constexpr int DIMS = 150;
 12 | constexpr int ALLOC_COUNT = 700;
 13 | 
 14 | template<typename T>
 15 | struct KDPoint
 16 | {
 17 | 	T pt[2];
 18 | 	T depth;
 19 | 
 20 | 	constexpr KDPoint() : pt(), depth(0)
 21 | 	{
 22 | 
 23 | 	}
 24 | 
 25 | 	constexpr KDPoint(T x, T y) : pt{x, y}, depth(0)
 26 | 	{
 27 | 
 28 | 	}
 29 | 
 30 | 	constexpr KDPoint(T x, T y, T d) : pt{x, y}, depth(d)
 31 | 	{
 32 | 
 33 | 	}
 34 | 
 35 | 	inline const T& operator[](int idx) const
 36 | 	{
 37 | 		return pt[idx];
 38 | 	}
 39 | 
 40 | 	inline bool operator==(const KDPoint& o) const
 41 | 	{
 42 | 		return pt[0] == o[0] && pt[1] == o[1];
 43 | 	}
 44 | 
 45 | 	inline bool operator!=(const KDPoint& o) const
 46 | 	{
 47 | 		return !(*this == o);
 48 | 	}
 49 | };
 50 | 
 51 | uintptr_t allocBase = 0;
 52 | generic_free_list_allocator<allocBase, true> alloc(3200, PlacementPolicy::FIND_FIRST);
 53 | KDTree<KDPoint<int>, 2, stateful_allocator<TreeNode_t<KDPoint<int>>, alloc>> tree;
 54 | std::vector<KDPoint<int>> testData;
 55 | 
 56 | int main()
 57 | {
 58 | 	srand(time(nullptr));
 59 | 
 60 | 	int status = 0;
 61 | 
 62 | 	for (size_t i = 0; i < 3; i++) {
 63 | 		printf("Clearing...\n");
 64 | 		tree.Clear();
 65 | 		testData.clear();
 66 | 
 67 | 		for (int i = 0; i < ALLOC_COUNT; i++) {
 68 | 			auto ref = tree.Insert(KDPoint<int>(rand() % DIMS, rand() % DIMS));
 69 | 			testData.push_back(**ref);
 70 | 		}
 71 | 
 72 | 		for (int i = 0; i < DIMS; i++) {
 73 | 
 74 | 			bool dirty = false;
 75 | 
 76 | 			for (int o = 0; o < DIMS; o++) {
 77 | 				KDPoint<int> pt(i, o);
 78 | 				auto ref = tree.Find(pt);
 79 | 
 80 | 				if (ref) {
 81 | 					auto iter = std::find(testData.begin(), testData.end(), pt);
 82 | 					if (iter != testData.end())
 83 | 						;//putchar('#');
 84 | 					else {
 85 | 						putchar('X');
 86 | 						dirty = true;
 87 | 						status++;
 88 | 					}
 89 | 				} else {
 90 | 					auto iter = std::find(testData.begin(), testData.end(), pt);
 91 | 					if (iter == testData.end())
 92 | 						;//putchar('.');
 93 | 					else {
 94 | 						putchar('x');
 95 | 						dirty = true;
 96 | 						status++;
 97 | 					}
 98 | 				}
 99 | 			}
100 | 			if (dirty)
101 | 				putchar('\n');
102 | 		}
103 | 	}
104 | 
105 | 	return status;
106 | }
107 | 


--------------------------------------------------------------------------------
/tests/mutex.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "../utils/threading.h"
 3 | #include "../utils/mutex.h"
 4 | 
 5 | #include <atomic>
 6 | #include <thread>
 7 | #include <chrono>
 8 | 
 9 | std::atomic_int expected_value(0);
10 | int value = 0;
11 | bool cont = true;
12 | Mutex mtx;
13 | std::atomic_int global_return_status(0);
14 | 
15 | void* __stdcall WriteFunc(void*)
16 | {
17 | 	while (cont) {
18 | 		int newval = rand();
19 | 		mtx.lock();
20 | 		value = newval;
21 | 		std::this_thread::sleep_for(std::chrono::microseconds(100 + rand() % 10000));
22 | 		expected_value.store(value);
23 | 		mtx.unlock();
24 | 		std::this_thread::sleep_for(std::chrono::microseconds(100 + rand() % 1000));
25 | 	}
26 | 	return nullptr;
27 | }
28 | 
29 | void* __stdcall ReadFunc(void*)
30 | {
31 | 	int local = -1;
32 | 	while (cont) {
33 | 		mtx.lock();
34 | 		if (value != local) {
35 | 			if (value != expected_value.load())
36 | 				global_return_status++;
37 | 			local = value;
38 | 		}
39 | 		mtx.unlock();
40 | 		std::this_thread::sleep_for(std::chrono::microseconds(rand() % 100));
41 | 	}
42 | 	return nullptr;
43 | }
44 | 
45 | int main()
46 | {
47 | 	srand(time(nullptr));
48 | 	thread_t write_thread = Threading::StartThread(WriteFunc, nullptr, false);
49 | 	thread_t read_thread1 = Threading::StartThread(ReadFunc, nullptr, false);
50 | 	thread_t read_thread2 = Threading::StartThread(ReadFunc, nullptr, false);
51 | 	thread_t read_thread3 = Threading::StartThread(ReadFunc, nullptr, false);
52 | 
53 | 	std::this_thread::sleep_for(std::chrono::microseconds(500000));
54 | 
55 | 	cont = false;
56 | 	void* ret = nullptr;
57 | 	Threading::JoinThread(write_thread, &ret);
58 | 	Threading::JoinThread(read_thread1, &ret);
59 | 	Threading::JoinThread(read_thread2, &ret);
60 | 	Threading::JoinThread(read_thread3, &ret);
61 | 
62 | 	return -global_return_status.load();
63 | }
64 | 


--------------------------------------------------------------------------------
/tests/settings.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include "../utils/settings.h"
  3 | #include "../utils/shared_utils.h"
  4 | #include "../utils/freelistallocator.h"
  5 | #include "../utils/allocwraps.h"
  6 | 
  7 | #include <atomic>
  8 | #include <thread>
  9 | #include <chrono>
 10 | 
 11 | typedef std::chrono::high_resolution_clock Clock;
 12 | 
 13 | //Pointer proxies prevent the option variables from storing and rechecking the address of the SettingsGroup layer every time it is accessed. Should improve performance
 14 | SettingsGroup* globalSettingsPtr = new typename std::decay<decltype(*globalSettingsPtr)>::type();
 15 | pointer_proxy<globalSettingsPtr> globalSettings;
 16 | 
 17 | SettingsGroup groupASettings;
 18 | SettingsGroup groupBSettings;
 19 | SettingsGroup* groupSettings = &groupASettings;
 20 | 
 21 | SettingsGroup overrideASettings;
 22 | SettingsGroup overrideBSettings;
 23 | SettingsGroup* overrideSettings = &overrideASettings;
 24 | 
 25 | Option<int, "option_A"_crc32, globalSettings> option_A(1);
 26 | Option<int, "option_B"_crc32, overrideSettings, groupSettings, globalSettings> option_B(23);
 27 | 
 28 | static int EXIT(int ret)
 29 | {
 30 | 	printf("Return: %d\n", ret);
 31 | 	return ret;
 32 | }
 33 | 
 34 | int main()
 35 | {
 36 | 	if (option_A != 1)
 37 | 		return EXIT(1);
 38 | 
 39 | 	if (option_B != 23)
 40 | 		return EXIT(2);
 41 | 
 42 | 	groupSettings = &groupBSettings;
 43 | 
 44 | 	if (option_A != 1)
 45 | 		return EXIT(3);
 46 | 
 47 | 	if (option_B != 23) {
 48 | 		printf("%d\n", option_B + 0);
 49 | 		return EXIT(4);
 50 | 	}
 51 | 
 52 | 	option_B = 360;
 53 | 
 54 | 	overrideSettings = &overrideBSettings;
 55 | 
 56 | 	if (option_A != 1)
 57 | 		return EXIT(5);
 58 | 
 59 | 	if (option_B != 23) {
 60 | 		printf("%d\n", option_B + 0);
 61 | 		return EXIT(6);
 62 | 	}
 63 | 
 64 | 	groupASettings.Set<"option_B"_crc32>((int)993.2);
 65 | 
 66 | 	if (option_B == 993)
 67 | 		return EXIT(7);
 68 | 
 69 | 	groupSettings = &groupASettings;
 70 | 
 71 | 	if (option_B != 993)
 72 | 		return EXIT(8);
 73 | 
 74 | 	int optB = option_B;
 75 | 
 76 | 	std::vector<unsigned char> buf;
 77 | 	groupSettings->Serialize(buf);
 78 | 
 79 | 	if (SettingsGroup(buf).SettingsGroup::Get<int, "option_B"_crc32>() != optB)
 80 | 		return EXIT(9);
 81 | 
 82 | 	{
 83 | 		auto t1 = Clock::now();
 84 | 		for (volatile int i = 0; i < 10000000; i++) {
 85 | 			[[maybe_unused]]
 86 | 			volatile int a = option_B;
 87 | 
 88 | 			if (i % 10 == 0 && overrideSettings != &overrideASettings)
 89 | 				overrideSettings = &overrideASettings;
 90 | 			else
 91 | 				overrideSettings = &overrideBSettings;
 92 | 
 93 | 			if (i % 2000 == 0 && groupSettings != &groupASettings)
 94 | 				groupSettings = &groupASettings;
 95 | 			else
 96 | 				groupSettings = &groupBSettings;
 97 | 
 98 | 		}
 99 | 		auto t2 = Clock::now();
100 | 
101 | 		printf("Time: %ld\n", std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count());
102 | 	}
103 | 
104 | 	{
105 | 		auto t1 = Clock::now();
106 | 		for (volatile int i = 0; i < 10000000; i++) {
107 | 			[[maybe_unused]]
108 | 			volatile int a = option_A;
109 | 		}
110 | 		auto t2 = Clock::now();
111 | 
112 | 		printf("Time: %ld\n", std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count());
113 | 	}
114 | 
115 | 	return 0;
116 | }
117 | 


--------------------------------------------------------------------------------
/tests/shared_mutex.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "../utils/threading.h"
 3 | #include "../utils/shared_mutex.h"
 4 | 
 5 | #include <atomic>
 6 | #include <thread>
 7 | #include <chrono>
 8 | 
 9 | std::atomic_int expected_value(0);
10 | int value = 0;
11 | bool cont = true;
12 | SharedMutex mtx;
13 | std::atomic_int global_return_status(0);
14 | 
15 | void* __stdcall WriteFunc(void*)
16 | {
17 | 	while (cont) {
18 | 		int newval = rand();
19 | 		mtx.wlock();
20 | 		value = newval;
21 | 		std::this_thread::sleep_for(std::chrono::microseconds(100 + rand() % 10000));
22 | 		expected_value.store(value);
23 | 		mtx.wunlock();
24 | 		std::this_thread::sleep_for(std::chrono::microseconds(100 + rand() % 1000));
25 | 	}
26 | 	return nullptr;
27 | }
28 | 
29 | void* __stdcall ReadFunc(void*)
30 | {
31 | 	int local = -1;
32 | 	while (cont) {
33 | 		mtx.rlock();
34 | 		if (value != local) {
35 | 			if (value != expected_value.load())
36 | 				global_return_status++;
37 | 			local = value;
38 | 		}
39 | 		mtx.runlock();
40 | 		std::this_thread::sleep_for(std::chrono::microseconds(rand() % 100));
41 | 	}
42 | 	return nullptr;
43 | }
44 | 
45 | int main()
46 | {
47 | 	srand(time(nullptr));
48 | 	thread_t write_thread = Threading::StartThread(WriteFunc, nullptr, false);
49 | 	thread_t read_thread1 = Threading::StartThread(ReadFunc, nullptr, false);
50 | 	thread_t read_thread2 = Threading::StartThread(ReadFunc, nullptr, false);
51 | 	thread_t read_thread3 = Threading::StartThread(ReadFunc, nullptr, false);
52 | 
53 | 	std::this_thread::sleep_for(std::chrono::microseconds(500000));
54 | 
55 | 	cont = false;
56 | 	void* ret = nullptr;
57 | 	Threading::JoinThread(write_thread, &ret);
58 | 	Threading::JoinThread(read_thread1, &ret);
59 | 	Threading::JoinThread(read_thread2, &ret);
60 | 	Threading::JoinThread(read_thread3, &ret);
61 | 
62 | 	return -global_return_status.load();
63 | }
64 | 


--------------------------------------------------------------------------------
/tests/shmemalloc.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include "../utils/freelistallocator.h"
  3 | #include "../utils/allocwraps.h"
  4 | #include <vector>
  5 | #ifdef _WIN32
  6 | #include <windows.h>
  7 | #else
  8 | #include <sys/mman.h>
  9 | #include <sys/types.h>
 10 | #include <sys/stat.h>
 11 | #include <unistd.h>
 12 | #include <fcntl.h>
 13 | #endif
 14 | #include "../utils/named_semaphores.h"
 15 | #include <atomic>
 16 | 
 17 | uintptr_t allocBase = 0;
 18 | 
 19 | generic_free_list_allocator<allocBase>* alloc = nullptr;
 20 | std::vector<int, stateful_allocator<int, alloc>>* vec = nullptr;
 21 | NamedSemaphore sem("/shm_test_sem");
 22 | 
 23 | constexpr unsigned long msz = 1 << 20;
 24 | 
 25 | bool MapSharedMemory(void*& addr);
 26 | void UnmapSharedMemory(void* addr);
 27 | 
 28 | int main()
 29 | {
 30 | 
 31 | 	std::atomic_bool b;
 32 | 	printf("LCK %d\n", (int)b.is_lock_free());
 33 | 	void* addr = nullptr;
 34 | 	bool firstTime = MapSharedMemory(addr);
 35 | 
 36 | 	if (addr) {
 37 | 		allocBase = (uintptr_t)addr;
 38 | 
 39 | 		printf("Alloc base: %lx\n", allocBase);
 40 | 
 41 | 		if (firstTime) {
 42 | 			alloc = new((decltype(alloc))allocBase) generic_free_list_allocator<allocBase>((1 << 20) - 500, PlacementPolicy::FIND_FIRST, (void*)(allocBase + 200));
 43 | 			vec = new((decltype(vec))(allocBase + sizeof(*alloc))) std::vector<int, stateful_allocator<int, alloc>>(10);
 44 | 		} else {
 45 | 			alloc = (decltype(alloc))allocBase;
 46 | 			vec = (decltype(vec))(allocBase + sizeof(*alloc));
 47 | 		}
 48 | 		int n;
 49 | 
 50 | 		scanf("%d", &n);
 51 | 
 52 | 		if (firstTime) {
 53 | 			for (int i = 0; i < n; i++) {
 54 | 				int m;
 55 | 				scanf("%d", &m);
 56 | 				printf("PUSHING %d to vec\n", m);
 57 | 				vec->push_back(m);
 58 | 				printf("PUSHED %d to vec (%zu %d)\n", m, vec->size(), (*vec)[i]);
 59 | 				sem.Post();
 60 | 			}
 61 | 
 62 | 			for (size_t i = 0; i < vec->size(); i++) {
 63 | 				printf("%d\n", (*vec)[i]);
 64 | 			}
 65 | 		} else {
 66 | 			for (int i = 0; i < n; i++) {
 67 | 				sem.Wait();
 68 | 				printf("INTERPUSHED to vec (%zu %d)\n", vec->size(), (*vec)[i]);
 69 | 			}
 70 | 		}
 71 | 
 72 | 		UnmapSharedMemory(addr);
 73 | 	}
 74 | 
 75 | 
 76 | 	return 0;
 77 | }
 78 | 
 79 | bool MapSharedMemory(void*& addr)
 80 | {
 81 | 	bool firstTime = false;
 82 | 
 83 | #ifdef _WIN32
 84 | 	HANDLE mapFile = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, "shm_test");
 85 | 
 86 | 	printf("OPEN FILE %p\n", mapFile);
 87 | 
 88 | 	if (!(void*)mapFile) {
 89 | 		mapFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, msz, "shm_test");
 90 | 		firstTime = true;
 91 | 	}
 92 | 
 93 | 	printf("OPEN FILE %p\n", mapFile);
 94 | 
 95 | 	if (mapFile) {
 96 | 		addr = (void*)MapViewOfFile(mapFile, FILE_MAP_ALL_ACCESS, 0, 0, msz);
 97 | 	}
 98 | 	printf("MAP FILE %p\n", addr);
 99 | 
100 | 	if (!firstTime)
101 | 		CloseHandle(mapFile);
102 | 
103 | #else
104 | 	int fd = shm_open("shm_test", O_RDWR, S_IRUSR | S_IWUSR);
105 | 	if (fd == -1) {
106 | 		firstTime = true;
107 | 		fd = shm_open("shm_test", O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
108 | 	}
109 | 	//firstTime = true;
110 | 
111 | 	if (fd != -1 && ftruncate(fd, msz) != -2)
112 | 		addr = mmap(nullptr, msz, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
113 | 
114 | 	if (addr == (void*)-1)
115 | 		addr = nullptr;
116 | 
117 | 	if (!firstTime)
118 | 		shm_unlink("shm_test");
119 | #endif
120 | 
121 | 	return firstTime;
122 | }
123 | 
124 | void UnmapSharedMemory(void* addr)
125 | {
126 | #ifdef _WIN32
127 | 	UnmapViewOfFile(addr);
128 | #else
129 | 	munmap(addr, msz);
130 | #endif
131 | }
132 | 


--------------------------------------------------------------------------------
/tests/thread_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include "../utils/threading.h"
 3 | #include "../submodules/minitrace/minitrace.h"
 4 | 
 5 | #include <thread>
 6 | #include <chrono>
 7 | 
 8 | int value = 0;
 9 | bool cont = true;
10 | Mutex mtx;
11 | std::atomic_int global_return_status(0);
12 | 
13 | struct threadjob
14 | {
15 | 	int delay;
16 | };
17 | 
18 | void TJob(threadjob* job)
19 | {
20 | 	MTR_BEGIN("test", "sleep_job");
21 | 	std::this_thread::sleep_for(std::chrono::microseconds(job->delay));
22 | 	MTR_END("test", "sleep_job");
23 | 	global_return_status++;
24 | }
25 | 
26 | typedef std::chrono::high_resolution_clock Clock;
27 | long mtime = 0;
28 | std::atomic_long mtimef(0);
29 | 
30 | int main()
31 | {
32 | 	mtr_init("trace.json");
33 | 	MTR_META_PROCESS_NAME("thread_pool_test");
34 | 
35 | 	srand(time(nullptr));
36 | 	Threading::InitThreads();
37 | 
38 | 	MTR_BEGIN("test", "queue_jobs");
39 | 	for (int i = 0; i < 10000; i++) {
40 | 		//auto t1 = Clock::now();
41 | 		Threading::QueueJob(TJob, (threadjob){0 * 1 + rand() % 10});
42 | 		//auto t2 = Clock::now();
43 | 		//mtime = std::max((long long)mtime, (long long)std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count());
44 | 		global_return_status--;
45 | 		/*if (!(i % 100000)) {
46 | 			printf("%d... (%lu)\n", i, mtime);
47 | 			mtime = 0;
48 | 			}*/
49 | 	}
50 | 	MTR_END("test", "queue_jobs");
51 | 
52 | 	threadjob* testdata[1000];
53 | 
54 | 	MTR_BEGIN("test", "genrerate_test_data");
55 | 	for (int i = 0; i < 1000; i++) {
56 | 		testdata[i] = (threadjob*)malloc(sizeof(threadjob));
57 | 		testdata[i]->delay = rand() % 10;
58 | 	}
59 | 	MTR_END("test", "genrerate_test_data");
60 | 
61 | 	printf("Queued the first batch of jobs\n");
62 | 
63 | 	Threading::FinishQueue();
64 | 
65 | 	printf("Finished the first batch of jobs (%d)\n", global_return_status.load());
66 | 
67 | 	MTR_BEGIN("test", "queue_jobs_ref");
68 | 	for (int i = 0; i < 10000; i++) {
69 | 		Threading::QueueJobRef(TJob, testdata[i % 1000]);
70 | 		global_return_status--;
71 | 		/*if (!(i % 10000))
72 | 		  printf("%d...\n", i);*/
73 | 	}
74 | 	MTR_END("test", "queue_jobs_ref");
75 | 
76 | 	printf("Queued the second batch of jobs\n");
77 | 
78 | 	Threading::FinishQueue();
79 | 	Threading::EndThreads();
80 | 
81 | 	printf("Finished the second batch of jobs (%d)\n", global_return_status.load());
82 | 
83 | 	for (int i = 0; i < 1000; i++)
84 | 		free(testdata[i]);
85 | 
86 | 	if (global_return_status < 0)
87 | 		global_return_status.store(0);
88 | 
89 | 	mtr_flush();
90 | 	mtr_shutdown();
91 | 
92 | 	return global_return_status.load();
93 | }
94 | 


--------------------------------------------------------------------------------
/tests/vector.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include "../math/vector.h"
  3 | #include "../math/matrix.h"
  4 | #include <stdlib.h>
  5 | #include <time.h>
  6 | 
  7 | constexpr vec3_t rotation = vec3_t(0, M_PI / 2, 0);
  8 | constexpr matrix<3, 4> rotationMatrix = matrix<3, 4>::GetMatrix(rotation);
  9 | constexpr vec3_t rotatedVec = rotationMatrix.Vector3Rotate(vec3_t(1, 0, 0));
 10 | constexpr vec3_t expectedVec = vec3_t(0, 1, 0);
 11 | static_assert(rotatedVec.DistToSqr(expectedVec) < 0.1);
 12 | 
 13 | int exit_error(int i)
 14 | {
 15 | 	fprintf(stderr, "%d\n", i);
 16 | 	return i;
 17 | }
 18 | 
 19 | int main()
 20 | {
 21 | 	srand(time(nullptr));
 22 | 
 23 | 	vec3_t a(1), b(1, 2);
 24 | 	[[maybe_unused]] vec3soa<float, 8> stype = b;
 25 | 	[[maybe_unused]] vec3 v3 = b;
 26 | 	[[maybe_unused]] vec3_t d = {3.f, 3.f, 3.f};
 27 | 
 28 | 	printf("%f %f %f\n%f %f %f\n", a[0], a[1], a[2], b[0], b[1], b[2]);
 29 | 
 30 | 	{
 31 | 		vec3_t arr[100];
 32 | 		for (int i = 0; i < 100; i++)
 33 | 			arr[i] = vec3_t(rand() % 100 + 1, rand() % 100 + 1, rand() % 100 + 1, rand() % 100 + 1);
 34 | 
 35 | 		{
 36 | 			vec3_t combined[50];
 37 | 
 38 | 			for (int i = 0; i < 50; i++)
 39 | 				combined[i] = arr[i] + arr[i + 50];
 40 | 
 41 | 			vec3_t back[100];
 42 | 
 43 | 			for (int i = 0; i < 50; i++)
 44 | 				back[i] = combined[i] - arr[i + 50];
 45 | 
 46 | 			for (int i = 0; i < 50; i++)
 47 | 				back[i + 50] = combined[i] - arr[i];
 48 | 
 49 | 			for (int i = 0; i < 100; i++)
 50 | 				if (arr[i] != back[i])
 51 | 					return exit_error(1000 + i);
 52 | 		}
 53 | 
 54 | 		printf("Vec test 1 passed!\n");
 55 | 
 56 | 		{
 57 | 			vec3_t combined[50];
 58 | 
 59 | 			for (int i = 0; i < 50; i++)
 60 | 				combined[i] = arr[i] * arr[i + 50];
 61 | 
 62 | 			vec3_t back[100];
 63 | 
 64 | 			for (int i = 0; i < 50; i++)
 65 | 				back[i] = combined[i] / arr[i + 50];
 66 | 
 67 | 			for (int i = 0; i < 50; i++)
 68 | 				back[i + 50] = combined[i] / arr[i];
 69 | 
 70 | 			for (int i = 0; i < 100; i++)
 71 | 				if ((arr[i] - back[i]).LengthSqr() > 0.01f) {
 72 | 					printf("Mismatch! %f %f %f | %f %f %f\n", arr[i][0], arr[i][1], arr[i][2], back[i][0], back[i][1], back[i][2]);
 73 | 					return exit_error(2000 + i);
 74 | 				}
 75 | 		}
 76 | 
 77 | 		printf("Vec test 2 passed!\n");
 78 | 
 79 | 		{
 80 | 			vec3_t combined[100];
 81 | 
 82 | 			for (int i = 0; i < 100; i++)
 83 | 				combined[i] = arr[i] * arr[i];
 84 | 
 85 | 			vec3_t back[100];
 86 | 
 87 | 			for (int i = 0; i < 100; i++)
 88 | 				back[i] = combined[i].Sqrt();
 89 | 
 90 | 			for (int i = 0; i < 100; i++)
 91 | 				if ((arr[i] - back[i]).LengthSqr() > 0.01f)
 92 | 					return exit_error(3000 + i);
 93 | 		}
 94 | 
 95 | 		printf("Vec test 3 passed!\n");
 96 | 	}
 97 | 
 98 | 	{
 99 | 		vec3soa<float, 8> arr[100];
100 | 		for (int i = 0; i < 100; i++)
101 | 			for (int u = 0; u < 3; u++)
102 | 				for (int o = 0; o < 8; o++)
103 | 					arr[i][u][o] = rand() % 100 + 1;
104 | 
105 | 		{
106 | 			vec3soa<float, 8> combined[50];
107 | 
108 | 			for (int i = 0; i < 50; i++)
109 | 				combined[i] = arr[i] + arr[i + 50];
110 | 
111 | 			vec3soa<float, 8> back[100];
112 | 
113 | 			for (int i = 0; i < 50; i++)
114 | 				back[i] = combined[i] - arr[i + 50];
115 | 
116 | 			for (int i = 0; i < 50; i++)
117 | 				back[i + 50] = combined[i] - arr[i];
118 | 
119 | 			for (int i = 0; i < 100; i++)
120 | 				if ((arr[i] - back[i]).Abs().AddedUpTotal() > 0.01f)
121 | 					return exit_error(10000 + i);
122 | 		}
123 | 
124 | 		printf("VecSoa test 1 passed!\n");
125 | 
126 | 		{
127 | 			vec3soa<float, 8> combined[50];
128 | 
129 | 			for (int i = 0; i < 50; i++)
130 | 				combined[i] = arr[i] * arr[i + 50];
131 | 
132 | 			vec3soa<float, 8> back[100];
133 | 
134 | 			for (int i = 0; i < 50; i++)
135 | 				back[i] = combined[i] / arr[i + 50];
136 | 
137 | 			for (int i = 0; i < 50; i++)
138 | 				back[i + 50] = combined[i] / arr[i];
139 | 
140 | 			for (int i = 0; i < 100; i++)
141 | 				if ((arr[i] - back[i]).Abs().AddedUpTotal() > 0.01f)
142 | 					return exit_error(20000 + i);
143 | 		}
144 | 
145 | 		printf("VecSoa test 2 passed!\n");
146 | 
147 | 		{
148 | 			vec3soa<float, 8> combined[100];
149 | 
150 | 			for (int i = 0; i < 100; i++)
151 | 				combined[i] = arr[i] * arr[i];
152 | 
153 | 			printf("Part 1\n");
154 | 
155 | 			vec3soa<float, 8> back[100];
156 | 
157 | 			for (int i = 0; i < 100; i++)
158 | 				back[i] = combined[i].Sqrt();
159 | 
160 | 			printf("Part 2\n");
161 | 
162 | 			for (int i = 0; i < 100; i++)
163 | 				if ((arr[i] - back[i]).Abs().AddedUpTotal() > 0.01f)
164 | 					return exit_error(30000 + i);
165 | 		}
166 | 
167 | 		printf("VecSoa test 3 passed!\n");
168 | 	}
169 | 
170 | 	return 0;
171 | }
172 | 
173 | constexpr vec3_t v(1);
174 | constexpr vec3_t v2(2);
175 | 
176 | static_assert(v[0] == 1);
177 | static_assert((v * v2)[0] == 2);
178 | 
179 | constexpr vec3soa<float, 8> vs(1, 2);
180 | static_assert(vs[0][0] == 1 && vs[1][0] == 1);
181 | static_assert(vs[0][1] == 2 && vs[1][1] == 2);
182 | static_assert(vs[0][2] == 1 && vs[1][2] == 1);
183 | 
184 | static_assert(!AllArithmetic<vecp<float, 3>, vecp<float, 3>>::value);
185 | static_assert(AllArithmetic<int, float, double, long>::value);
186 | static_assert(!AllArithmetic<int, float, char*>::value);
187 | 


--------------------------------------------------------------------------------
/utils/allocwraps.h:
--------------------------------------------------------------------------------
  1 | #ifndef ALLOCWRAPS_H
  2 | #define ALLOCWRAPS_H
  3 | 
  4 | template<auto& BASE, bool REALLOCATABLE>
  5 | class FreeListAllocator;
  6 | 
  7 | #include "freelistallocator.h"
  8 | 
  9 | //Safely define void reference type
 10 | template<typename T>
 11 | struct GetReference
 12 | {
 13 | 	using type = T&;
 14 | };
 15 | 
 16 | template<>
 17 | struct GetReference<void>
 18 | {
 19 | 	using type = void;
 20 | };
 21 | 
 22 | template<typename T, auto& BASE>
 23 | struct offset_pointer_t
 24 | {
 25 | 	T* ptr;
 26 | 
 27 | 	using difference_type = ptrdiff_t;
 28 | 	using value_type = T;
 29 | 	using pointer = T*;
 30 | 	using reference = typename GetReference<T>::type;
 31 | 	using iterator_category = std::random_access_iterator_tag;
 32 | 
 33 | 	constexpr offset_pointer_t()
 34 | 		: ptr((T*)~0ul)
 35 | 	{
 36 | 	}
 37 | 
 38 | 	constexpr void CheckPtr()
 39 | 	{
 40 | #ifdef OFFSET_POINTER_DEBUG
 41 | 		if (this->operator bool() && (((uintptr_t)ptr + (uintptr_t)BASE) < (uintptr_t)BASE || ((uintptr_t)ptr + (uintptr_t)BASE) > (uintptr_t)BASE + 0x2063a0))
 42 | 			throw;
 43 | #endif
 44 | 	}
 45 | 
 46 | 	constexpr offset_pointer_t(const offset_pointer_t& o)
 47 | 	{
 48 | 		*this = o;
 49 | 	}
 50 | 
 51 | 	constexpr offset_pointer_t& operator=(const offset_pointer_t& o)
 52 | 	{
 53 | 		ptr = o.ptr;
 54 | 		CheckPtr();
 55 | 		return *this;
 56 | 	}
 57 | 
 58 | 	constexpr offset_pointer_t& operator=(T* o)
 59 | 	{
 60 | 		ptr = o ? (T*)((uintptr_t)o - (uintptr_t)BASE) : (T*)~0ul;
 61 | 		CheckPtr();
 62 | 		return *this;
 63 | 	}
 64 | 
 65 | 
 66 | 	constexpr operator size_t()
 67 | 	{
 68 | 		if ((uintptr_t)ptr ^ ~0ul)
 69 | 			return (size_t)((uintptr_t)ptr + (uintptr_t)BASE);
 70 | 		return 0;
 71 | 	}
 72 | 
 73 | 	constexpr offset_pointer_t(T* p)
 74 | 		: ptr(p ? (T*)((uintptr_t)p - (uintptr_t)BASE) : (T*)~0ul)
 75 | 	{
 76 | 		CheckPtr();
 77 | 	}
 78 | 
 79 | 	template<typename F>
 80 | 	constexpr offset_pointer_t(offset_pointer_t<F, BASE> o)
 81 | 	: ptr((T*)o.ptr)
 82 | 	{
 83 | 		CheckPtr();
 84 | 	}
 85 | 
 86 | 	template<typename F, typename = typename std::enable_if<AllArithmetic<F>::value>::type>
 87 | 	constexpr offset_pointer_t(F p)
 88 | 		: ptr(p ? (T*)((uintptr_t)p - (uintptr_t)BASE) : (T*)~0ul)
 89 | 	{
 90 | 		CheckPtr();
 91 | 	}
 92 | 
 93 | 	template<typename F, typename = typename std::enable_if<AllArithmetic<F>::value>::type>
 94 | 	constexpr offset_pointer_t(F* p)
 95 | 		: ptr(p ? (T*)((uintptr_t)p - (uintptr_t)BASE) : (T*)~0ul)
 96 | 	{
 97 | 		CheckPtr();
 98 | 	}
 99 | 
100 | 	constexpr T* GetRawPtr() const
101 | 	{
102 | 		return ((uintptr_t)ptr ^ ~0ul) ? (T*)((uintptr_t)BASE + (uintptr_t)ptr) : nullptr;
103 | 	}
104 | 
105 | 
106 | 	template<typename H = T>
107 | 	constexpr typename std::enable_if<!std::is_same<H, void>::value, H&>::type operator*() const
108 | 	{
109 | 		return *GetRawPtr();
110 | 	}
111 | 
112 | 	constexpr T* operator->() const
113 | 	{
114 | 		return GetRawPtr();
115 | 	}
116 | 
117 | 	template<typename H = T>
118 | 	constexpr typename std::enable_if<!std::is_same<H, void>::value, H&>::type operator[](int idx) const
119 | 	{
120 | 		return GetRawPtr()[idx];
121 | 	}
122 | 
123 | 	constexpr bool operator==(const offset_pointer_t& o) const
124 | 	{
125 | 		return ptr == o.ptr;
126 | 	}
127 | 
128 | 	constexpr bool operator!=(const offset_pointer_t& o) const
129 | 	{
130 | 		return ptr != o.ptr;
131 | 	}
132 | 
133 | 	constexpr difference_type operator-(offset_pointer_t i) const
134 | 	{
135 | 		return ptr - i.ptr;
136 | 	}
137 | 
138 | 	template<typename F>
139 | 	constexpr offset_pointer_t operator+(F i) const
140 | 	{
141 | 		return offset_pointer_t((uintptr_t)(ptr + i) + (uintptr_t)BASE);
142 | 	}
143 | 
144 | 	template<typename F>
145 | 	constexpr offset_pointer_t operator-(F i) const
146 | 	{
147 | 		return offset_pointer_t((uintptr_t)(ptr - i) + (uintptr_t)BASE);
148 | 	}
149 | 
150 | 	template<typename F>
151 | 	constexpr offset_pointer_t& operator+=(F i)
152 | 	{
153 | 		ptr += i;
154 | 		return *this;
155 | 	}
156 | 
157 | 	template<typename F>
158 | 	constexpr offset_pointer_t& operator-=(F i)
159 | 	{
160 | 		ptr -= i;
161 | 		return *this;
162 | 	}
163 | 
164 | 	constexpr offset_pointer_t& operator++()
165 | 	{
166 | 		ptr++;
167 | 		return *this;
168 | 	}
169 | 
170 | 	constexpr offset_pointer_t& operator--()
171 | 	{
172 | 		ptr--;
173 | 		return *this;
174 | 	}
175 | 
176 | 	constexpr operator bool() const
177 | 	{
178 | 		return ((uintptr_t)ptr ^ ~0ul);
179 | 	}
180 | 
181 | 	constexpr bool operator!() const
182 | 	{
183 | 		return !this->operator bool();
184 | 	}
185 | 
186 | 	constexpr operator pointer() const
187 | 	{
188 | 		return GetRawPtr();
189 | 	}
190 | 
191 | 	template<typename F>
192 | 	static constexpr offset_pointer_t pointer_to(F& ref)
193 | 	{
194 | 		return offset_pointer_t((T*)&ref);
195 | 	}
196 | };
197 | 
198 | template<auto& BASE, bool REALLOCATABLE = false>
199 | class generic_free_list_allocator : FreeListAllocator<BASE, REALLOCATABLE>
200 | {
201 |   public:
202 | 	static constexpr auto& base = BASE;
203 | 
204 | 	template<typename F>
205 | 	struct pointer_t : offset_pointer_t<F, BASE>
206 | 	{
207 | 		typedef offset_pointer_t<F, BASE> base_class;
208 | 		typedef typename base_class::difference_type difference_type;
209 | 		typedef typename base_class::value_type value_type;
210 | 		typedef typename base_class::pointer pointer;
211 | 		typedef typename base_class::reference reference;
212 | 		typedef typename base_class::iterator_category iterator_category;
213 | 
214 | 		constexpr pointer_t()
215 | 			: base_class()
216 | 		{
217 | 		}
218 | 
219 | 		constexpr pointer_t(const pointer_t& o)
220 | 		{
221 | 			*this = o;
222 | 		}
223 | 
224 | 		constexpr pointer_t& operator=(const pointer_t& o)
225 | 		{
226 | 			base_class::operator=(o);
227 | 			return *this;
228 | 		}
229 | 
230 | 		constexpr pointer_t(F* p)
231 | 			: base_class(p) {}
232 | 
233 | 		constexpr pointer_t(const base_class& o)
234 | 			: base_class(o) {}
235 | 
236 | 		template<typename H = F>
237 | 		constexpr typename std::enable_if<!std::is_same<H, void>::value, H&>::type operator*() const
238 | 		{
239 | 			return base_class::operator*();
240 | 		}
241 | 
242 | 		constexpr F* operator->() const
243 | 		{
244 | 			return base_class::operator->();
245 | 		}
246 | 
247 | 		constexpr bool operator==(const pointer_t& o) const
248 | 		{
249 | 			return base_class::operator==(o);
250 | 		}
251 | 
252 | 		constexpr bool operator!=(const pointer_t& o) const
253 | 		{
254 | 			return base_class::operator!=(o);
255 | 		}
256 | 
257 | 		constexpr difference_type operator-(pointer_t i) const
258 | 		{
259 | 			return this->ptr - i.ptr;
260 | 		}
261 | 
262 | 		template<typename G>
263 | 		constexpr pointer_t operator+(G i) const
264 | 		{
265 | 			return base_class::template operator+(i);
266 | 		}
267 | 
268 | 		template<typename G>
269 | 		constexpr pointer_t operator-(G i) const
270 | 		{
271 | 			return base_class::template operator-(i);
272 | 		}
273 | 
274 | 		constexpr pointer_t& operator++()
275 | 		{
276 | 			base_class::operator++();
277 | 			return *this;
278 | 		}
279 | 
280 | 		constexpr pointer_t& operator--()
281 | 		{
282 | 			base_class::operator--();
283 | 			return *this;
284 | 		}
285 | 
286 | 		constexpr operator bool() const
287 | 		{
288 | 			return base_class::operator bool();
289 | 		}
290 | 
291 | 
292 | 		template<typename H>
293 | 		static constexpr pointer_t pointer_to(H& ref)
294 | 		{
295 | 			return pointer_t((F*)&ref);
296 | 		}
297 | 	};
298 | 
299 | 	typedef size_t size_type;
300 | 	typedef ptrdiff_t difference_type;
301 | 	typedef std::false_type is_always_equal;
302 | 	typedef std::true_type propagate_on_container_move_assignment;
303 | 
304 | 	template<typename... Args>
305 | 	generic_free_list_allocator(Args... args)
306 | 		: FreeListAllocator<BASE, REALLOCATABLE>(args...)
307 | 	{
308 | 	}
309 | 
310 | 	~generic_free_list_allocator()
311 | 	{
312 | 	}
313 | 
314 | 	template<typename T>
315 | 	inline auto allocate(size_type size)
316 | 	{
317 | 		auto ptr = this->Allocate(sizeof(T) * size, std::alignment_of<T>::value);
318 | 		return pointer_t<T>((T*)&*ptr);
319 | 	}
320 | 
321 | 	template<typename T>
322 | 	inline void deallocate(pointer_t<T> ptr, size_type size)
323 | 	{
324 | 		this->Free(offset_pointer_t<void*, BASE>((void**)&*ptr));
325 | 	}
326 | 
327 | 	inline bool operator==(const generic_free_list_allocator& o) const
328 | 	{
329 | 		return this->m_start_ptr == o.m_start_ptr;
330 | 	}
331 | };
332 | 
333 | template<typename T, auto& BASE, bool REALLOCATABLE = true, size_t SIZE = 10000, PlacementPolicy PLACEMENT_POLICY = PlacementPolicy::FIND_FIRST>
334 | class free_list_allocator : public generic_free_list_allocator<BASE, REALLOCATABLE>
335 | {
336 | 
337 |   public:
338 | 	static constexpr auto& base = BASE;
339 | 
340 | 	template<typename F>
341 | 	using pointer_t = typename generic_free_list_allocator<BASE, REALLOCATABLE>::template pointer_t<F>;
342 | 	using value_type = T;
343 | 	using pointer = pointer_t<T>;
344 | 	using size_type = size_t;
345 | 	using difference_type = ptrdiff_t;
346 | 	using is_always_equal = std::false_type;
347 | 	using propagate_on_container_move_assignment = std::false_type;
348 | 
349 | 	free_list_allocator(size_t size = SIZE, PlacementPolicy pPolicy = PLACEMENT_POLICY)
350 | 	: generic_free_list_allocator<BASE, REALLOCATABLE>(size * sizeof(T), pPolicy)
351 | 	{
352 | 	}
353 | 
354 | 	template<typename H, size_t SZ2, PlacementPolicy PP2>
355 | 	free_list_allocator(const free_list_allocator<H, BASE, REALLOCATABLE, SZ2, PP2>& o)
356 | 	: generic_free_list_allocator<BASE, REALLOCATABLE>(SZ2 * sizeof(T), PP2)
357 | 	{
358 | 	}
359 | 
360 | 
361 | 	~free_list_allocator()
362 | 	{
363 | 	}
364 | 
365 | 	inline pointer allocate(size_type size)
366 | 	{
367 | 		return generic_free_list_allocator<BASE, REALLOCATABLE>::template allocate<T>(size);
368 | 	}
369 | 
370 | 	inline void deallocate(pointer ptr, size_type size)
371 | 	{
372 | 		generic_free_list_allocator<BASE, REALLOCATABLE>::template deallocate<T>(ptr, size);
373 | 	}
374 | 
375 | 	inline bool operator==(const free_list_allocator& o)
376 | 	{
377 | 		return generic_free_list_allocator<BASE, REALLOCATABLE>::template operator==(o);
378 | 	}
379 | 
380 | 	template<typename Other>
381 | 	struct rebind
382 | 	{
383 | 		using other = free_list_allocator<Other, BASE, REALLOCATABLE, SIZE, PLACEMENT_POLICY>;
384 | 	};
385 | };
386 | 
387 | template<typename T, typename F>
388 | class stateful_pointer_allocator
389 | {
390 |   public:
391 | 	using value_type = T;
392 | 	using parent_type = typename std::decay<decltype(*F())>::type;
393 | 	using pointer = typename parent_type::template pointer_t<T>;
394 | 	using size_type = size_t;
395 | 	using difference_type = ptrdiff_t;
396 | 	using is_always_equal = std::false_type;
397 | 	using propagate_on_container_move_assignment = std::true_type;
398 | 
399 | 	template<typename H>
400 | 	stateful_pointer_allocator(const stateful_pointer_allocator<H, F>& o)
401 | 	{
402 | 	}
403 | 
404 | 	stateful_pointer_allocator()
405 | 	{
406 | 	}
407 | 
408 | 	~stateful_pointer_allocator()
409 | 	{
410 | 	}
411 | 
412 | 	inline pointer allocate(size_type size)
413 | 	{
414 | 		return (*F()).template allocate<T>(size);
415 | 	}
416 | 
417 | 	inline void deallocate(pointer ptr, size_type size)
418 | 	{
419 | 		(*F()).template deallocate<T>(ptr, size);
420 | 	}
421 | 
422 | 	inline bool operator==(const stateful_pointer_allocator& o)
423 | 	{
424 | 		return F()->template operator==(o);
425 | 	}
426 | 
427 | 	template<typename Other>
428 | 	struct rebind
429 | 	{
430 | 		using other = stateful_pointer_allocator<Other, F>;
431 | 	};
432 | };
433 | 
434 | template<typename T, auto& G>
435 | using stateful_allocator = stateful_pointer_allocator<T, pointer_proxy<G>>;
436 | 
437 | #endif
438 | 


--------------------------------------------------------------------------------
/utils/atomic_lock.cpp:
--------------------------------------------------------------------------------
 1 | #include "atomic_lock.h"
 2 | 
 3 | AtomicLock::AtomicLock()
 4 | {
 5 | 	lck.clear();
 6 | }
 7 | 
 8 | AtomicLock::~AtomicLock()
 9 | {
10 | }
11 | 
12 | void AtomicLock::lock()
13 | {
14 | 	while (lck.test_and_set(std::memory_order_acquire))
15 | 		;
16 | }
17 | 
18 | bool AtomicLock::trylock()
19 | {
20 | 	return !lck.test_and_set(std::memory_order_acquire);
21 | }
22 | 
23 | void AtomicLock::unlock()
24 | {
25 | 	lck.clear(std::memory_order_release);
26 | }
27 | 


--------------------------------------------------------------------------------
/utils/atomic_lock.h:
--------------------------------------------------------------------------------
 1 | #ifndef ATOMIC_LOCK_H
 2 | #define ATOMIC_LOCK_H
 3 | 
 4 | #include <atomic>
 5 | 
 6 | class AtomicLock
 7 | {
 8 |   public:
 9 | 	AtomicLock();
10 | 	~AtomicLock();
11 | 	void lock();
12 | 	bool trylock();
13 | 	void unlock();
14 |   private:
15 | 	std::atomic_flag lck;
16 | };
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/utils/crc32.h:
--------------------------------------------------------------------------------
 1 | #ifndef CRC32_H
 2 | #define CRC32_H
 3 | 
 4 | #include <array>
 5 | #include "shared_utils.h"
 6 | 
 7 | typedef unsigned int crcs_t;
 8 | 
 9 | template<typename T>
10 | constexpr auto GenCRCTable(T polynomial = 0xedb88320) {
11 | 	constexpr int numBytes = 256;
12 | 	constexpr int numIterations = 8;
13 | 
14 | 	std::array<T, numBytes> crc32Table{};
15 | 
16 | 	for (T byte = 0u; byte < numBytes; byte++) {
17 | 		T crc = byte;
18 | 
19 | 		for (int i = 0; i < numIterations; i++) {
20 | 			//Can be done with -(crc & 1), but then MSVC complains about overflows
21 | 			T mask = (crc & 1) ? ~T(0) : T(0);
22 | 			crc = (crc >> 1) ^ (polynomial & mask);
23 | 		}
24 | 
25 | 		crc32Table[byte] = crc;
26 | 	}
27 | 
28 | 	return crc32Table;
29 | }
30 | 
31 | static constexpr auto crc32Tab = GenCRCTable<crcs_t>();
32 | 
33 | #define CCRC32(x) calc_constexpr<crcs_t, Crc32(x, sizeof(x) - 1)>::value
34 | 
35 | constexpr crcs_t Crc32(const char* cv, size_t size)
36 | {
37 | 	crcs_t ret(0);
38 | 	ret = ~ret;
39 | 
40 | 	for (size_t i = 0; i < size; i++)
41 | 		ret = crc32Tab[((ret) ^ (cv[i])) & 0xff] ^ ((ret) >> 8);
42 | 
43 | 	return ~ret;
44 | }
45 | 
46 | constexpr crcs_t Crc32(const char* cv)
47 | {
48 | 	crcs_t ret(0);
49 | 	ret = ~ret;
50 | 
51 | 	for (size_t i = 0; cv[i]; i++)
52 | 		ret = crc32Tab[((ret) ^ (cv[i])) & 0xff] ^ ((ret) >> 8);
53 | 
54 | 	return ~ret;
55 | }
56 | 
57 | constexpr crcs_t operator ""_crc32(const char* cv, size_t size)
58 | {
59 | 	return Crc32(cv);
60 | }
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/utils/freelistallocator.h:
--------------------------------------------------------------------------------
  1 | #ifndef FREELISTALLOCATOR_ENUMS
  2 | #define FREELISTALLOCATOR_ENUMS
  3 | enum PlacementPolicy {
  4 | 	FIND_FIRST,
  5 | 	FIND_BEST
  6 | };
  7 | #endif
  8 | 
  9 | #ifndef FREELISTALLOCATOR_H
 10 | #define FREELISTALLOCATOR_H
 11 | 
 12 | #include <stdint.h>
 13 | #include <stddef.h>
 14 | #include <type_traits>
 15 | #include "shared_utils.h"
 16 | #include <iterator>
 17 | #include "allocwraps.h"
 18 | #include <stdlib.h>
 19 | #include <algorithm>
 20 | #include <string.h>
 21 | 
 22 | /*
 23 |   Credits: mtrebi
 24 |   Added purpose based modifications.
 25 | 
 26 |   This allocator is usable by all stl containers and is usable in shared memory, it requires a global uintptr variable base which provides the necessary offset base for use in shared memory
 27 | */
 28 | 
 29 | static inline size_t CalculatePadding(size_t baseAddress, size_t alignment) {
 30 | 	size_t multiplier = (baseAddress / alignment) + 1;
 31 | 	size_t alignedAddress = multiplier * alignment;
 32 | 	size_t padding = alignedAddress - baseAddress;
 33 | 	return padding;
 34 | }
 35 | 
 36 | static inline size_t CalculatePaddingWithHeader(size_t baseAddress, size_t alignment, size_t headerSize) {
 37 | 	size_t padding = CalculatePadding(baseAddress, alignment);
 38 | 	size_t neededSpace = headerSize;
 39 | 
 40 | 	if (padding < neededSpace){
 41 | 		// Header does not fit - Calculate next aligned address that header fits
 42 | 		neededSpace -= padding;
 43 | 
 44 | 		// How many alignments I need to fit the header
 45 | 		if (neededSpace % alignment > 0)
 46 | 			padding += alignment * (1 + (neededSpace / alignment));
 47 | 		else
 48 | 			padding += alignment * (neededSpace / alignment);
 49 | 	}
 50 | 
 51 | 	return padding;
 52 | }
 53 | 
 54 | template <class T, auto& BASE>
 55 | class SinglyLinkedList {
 56 |   public:
 57 | 	struct Node {
 58 | 		T data;
 59 | 		offset_pointer_t<Node, BASE> next;
 60 | 	};
 61 | 
 62 | 	using NodePtr = offset_pointer_t<Node, BASE>;
 63 | 
 64 | 	offset_pointer_t<Node, BASE> head;
 65 | 
 66 |   public:
 67 | 	SinglyLinkedList()
 68 | 	{
 69 | 
 70 | 	}
 71 | 
 72 | 	void insert(NodePtr previousNode, NodePtr newNode){
 73 | 		if (!previousNode) {
 74 | 			newNode->next = head;
 75 | 			head = newNode;
 76 | 		} else {
 77 | 			newNode->next = previousNode->next;
 78 | 			previousNode->next = newNode;
 79 | 		}
 80 | 	}
 81 | 
 82 | 	void remove(NodePtr previousNode, NodePtr deleteNode){
 83 | 		if (!previousNode)
 84 | 			head = deleteNode->next;
 85 | 		else
 86 | 			previousNode->next = deleteNode->next;
 87 | 	}
 88 | };
 89 | 
 90 | class Allocator {
 91 |   protected:
 92 | 	size_t totalSize;
 93 | 	size_t used;
 94 | 	size_t peak;
 95 |   public:
 96 | 	Allocator(size_t inTotalSize)
 97 | 		: totalSize(inTotalSize), used(), peak() {}
 98 | 
 99 | 	~Allocator()
100 | 	{
101 | 		totalSize = 0;
102 | 	}
103 | };
104 | 
105 | struct FreeHeader {
106 | 	uint32_t blockSize;
107 | };
108 | 
109 | struct AllocationHeader {
110 | 	uint32_t blockSize;
111 | 	unsigned char padding;
112 | };
113 | 
114 | template<auto& BASE, bool REALLOCATABLE>
115 | class FreeListAllocator : public Allocator {
116 |   protected:
117 | 
118 | 	using Node = typename SinglyLinkedList<FreeHeader, BASE>::Node;
119 | 	using NodePtr = typename SinglyLinkedList<FreeHeader, BASE>::NodePtr;
120 | 	template<typename T>
121 | 	using pointer_t = offset_pointer_t<T, BASE>;
122 | 	using AllocationHeaderPtr = pointer_t<AllocationHeader>;
123 | 
124 | 	uintptr_t baseOffset;
125 | 	offset_pointer_t<void*, BASE> start_ptr = 0;
126 | 	PlacementPolicy pPolicy;
127 | 	SinglyLinkedList<FreeHeader, BASE> freeList;
128 | 	bool freeOnExit;
129 | 
130 |   public:
131 | 	FreeListAllocator(size_t totalSize, const PlacementPolicy inpPolicy, void* startPtr = nullptr)
132 | 		: Allocator(totalSize) {
133 | 		pPolicy = inpPolicy;
134 | 		start_ptr = (void**)startPtr;
135 | 		freeOnExit = false;
136 | 
137 | 		Init(startPtr);
138 | 	}
139 | 
140 | 	~FreeListAllocator()
141 | 	{
142 | 		if (freeOnExit && start_ptr)
143 | 			free((void*)&*start_ptr);
144 | 		start_ptr = nullptr;
145 | 	}
146 | 
147 | 	pointer_t<void*> Allocate(size_t size, size_t alignment = 0)
148 | 	{
149 | 		size_t allocationHeaderSize = sizeof(AllocationHeader);
150 | 		//size_t freeHeaderSize = sizeof(FreeHeader);
151 | 
152 | 		size = std::max(size, sizeof(Node));
153 | 		alignment = std::max(alignment, size_t(8));
154 | 
155 | 		// Search through the free list for a free block that has enough space to allocate our data
156 | 		size_t padding = 0;
157 | 		NodePtr affectedNode, previousNode;
158 | 		this->Find(size, alignment, padding, previousNode, affectedNode);
159 | 
160 | 		if (!affectedNode)
161 | 			if constexpr (REALLOCATABLE) {
162 | 				Reallocate(size);
163 | 				this->Find(size, alignment, padding, previousNode, affectedNode);
164 | 			}
165 | 
166 | 		if (!affectedNode)
167 | #if defined(__cpp_exceptions) || defined(_CPPUNWIND)
168 | 			throw;
169 | #else
170 | 		return nullptr;
171 | #endif
172 | 
173 | 		size_t alignmentPadding = padding - allocationHeaderSize;
174 | 		size_t requiredSize = size + padding;
175 | 
176 | 		size_t rest = affectedNode->data.blockSize >= requiredSize ? affectedNode->data.blockSize - requiredSize : 0;
177 | 
178 | 
179 | 		//There is a bug somewhere causing newFreeNode overlap with affectedNode->next if rest is too small
180 | 		//It probably will not be fixed since it is not of the highest priority
181 | 		if (rest >= sizeof(Node) && (rest > 16 || !affectedNode->next || ((size_t)affectedNode->next - ((size_t)affectedNode + requiredSize)) > sizeof(Node)) && (size_t)affectedNode + requiredSize + sizeof(Node) < (size_t)start_ptr + totalSize) {
182 | 			// We have to split the block into the data block and a free block of size 'rest'
183 | 			NodePtr newFreeNode = NodePtr((size_t) affectedNode + requiredSize);
184 | 			newFreeNode->data.blockSize = (uint32_t)rest;
185 | 			freeList.insert(affectedNode, newFreeNode);
186 | 		}
187 | 		else
188 | 			requiredSize = affectedNode->data.blockSize - alignmentPadding;
189 | 
190 | 
191 | 		freeList.remove(previousNode, affectedNode);
192 | 
193 | 		// Setup data block
194 | 		size_t headerAddress = (size_t) affectedNode + alignmentPadding;
195 | 		size_t dataAddress = headerAddress + allocationHeaderSize;
196 | 		AllocationHeaderPtr(headerAddress)->blockSize = (uint32_t)requiredSize;
197 | 		AllocationHeaderPtr(headerAddress)->padding = (uint8_t)alignmentPadding;
198 | 
199 | 		used += requiredSize + alignmentPadding;
200 | 		peak = std::max(peak, used);
201 | 
202 | 		return (void**)dataAddress;
203 | 	}
204 | 
205 | 	void Free(pointer_t<void*> ptr)
206 | 	{
207 | 		// Insert it in a sorted position by the address number
208 | 		size_t currentAddress = (size_t) ptr;
209 | 		size_t headerAddress = currentAddress - sizeof (AllocationHeader);
210 | 		const AllocationHeaderPtr allocationHeader(headerAddress);
211 | 
212 | 		NodePtr freeNode = NodePtr(headerAddress);
213 | 		freeNode->data.blockSize = allocationHeader->blockSize + allocationHeader->padding;
214 | 		freeNode->next = nullptr;
215 | 
216 | 		NodePtr it = freeList.head;
217 | 		NodePtr itPrev = nullptr;
218 | 		while (it) {
219 | 			if (ptr < it) {
220 | 				freeList.insert(itPrev, freeNode);
221 | 				break;
222 | 			}
223 | 			itPrev = it;
224 | 			it = it->next;
225 | 		}
226 | 
227 | 		used -= freeNode->data.blockSize;
228 | 
229 | 		// Merge contiguous nodes
230 | 		Coalescence(itPrev, freeNode);
231 | 	}
232 | 
233 | 	void Init(void* startPtr = nullptr)
234 | 	{
235 | 		if (freeOnExit && start_ptr) {
236 | 			free((void*)&*start_ptr);
237 | 			start_ptr = nullptr;
238 | 		}
239 | 
240 | 		if (!startPtr) {
241 | 			void** startPtr2 = (void**)malloc(totalSize);
242 | 			if (!startPtr2)
243 | 				throw;
244 | 			if constexpr (REALLOCATABLE) {
245 | 				baseOffset = 0;
246 | 				BASE = (uintptr_t)startPtr2;
247 | 			}
248 | 			start_ptr = startPtr2;
249 | 			freeOnExit = true;
250 | 		} else {
251 | 			start_ptr = (void**)startPtr;
252 | 			baseOffset = (uintptr_t)&*start_ptr - (uintptr_t)BASE;
253 | 		}
254 | 
255 | 		this->Reset();
256 | 	}
257 | 
258 | 	void Reset()
259 | 	{
260 | 		used = 0;
261 | 		peak = 0;
262 | 		NodePtr firstNode = NodePtr(start_ptr);
263 | 		firstNode->data.blockSize = totalSize;
264 | 		firstNode->next = nullptr;
265 | 		freeList.head = nullptr;
266 | 		freeList.insert(nullptr, firstNode);
267 | 	}
268 |   protected:
269 | 	FreeListAllocator(FreeListAllocator &freeListAllocator);
270 | 
271 | 	void Coalescence(NodePtr previousNode, NodePtr freeNode)
272 | 	{
273 | 		if (freeNode->next &&
274 | 			(size_t) freeNode + freeNode->data.blockSize == (size_t) freeNode->next) {
275 | 			freeNode->data.blockSize += freeNode->next->data.blockSize;
276 | 			freeList.remove(freeNode, freeNode->next);
277 | 		}
278 | 
279 | 		if (previousNode &&
280 | 			(size_t) previousNode + previousNode->data.blockSize == (size_t) freeNode) {
281 | 			previousNode->data.blockSize += freeNode->data.blockSize;
282 | 			freeList.remove(previousNode, freeNode);
283 | 		}
284 | 	}
285 | 
286 | 	void Find(size_t size, size_t alignment, size_t& padding, NodePtr& previousNode, NodePtr& foundNode)
287 | 	{
288 | 		switch (pPolicy) {
289 | 		  case FIND_FIRST:
290 | 			  FindFirst(size, alignment, padding, previousNode, foundNode);
291 | 			  break;
292 | 		  case FIND_BEST:
293 | 			  FindBest(size, alignment, padding, previousNode, foundNode);
294 | 			  break;
295 | 		}
296 | 	}
297 | 
298 | 	void FindBest(size_t size, size_t alignment, size_t& padding, NodePtr& previousNode, NodePtr& foundNode)
299 | 	{
300 | 		// Iterate WHOLE list keeping a pointer to the best fit
301 | 		size_t smallestDiff = std::numeric_limits<size_t>::max();
302 | 		NodePtr bestBlock = nullptr;
303 | 		NodePtr it = freeList.head, itPrev = nullptr;
304 | 		while (it) {
305 | 			padding = CalculatePaddingWithHeader((size_t)it, alignment, sizeof (AllocationHeader));
306 | 			size_t requiredSpace = size + padding;
307 | 			if (it->data.blockSize >= requiredSpace && (it->data.blockSize - requiredSpace < smallestDiff)) {
308 | 				bestBlock = it;
309 | 			}
310 | 			itPrev = it;
311 | 			it = it->next;
312 | 		}
313 | 		previousNode = itPrev;
314 | 		foundNode = bestBlock;
315 | 	}
316 | 
317 | 	void FindFirst(size_t size, size_t alignment, size_t& padding, NodePtr& previousNode, NodePtr& foundNode)
318 | 	{
319 | 		//Iterate list and return the first free block with a size >= than given size
320 | 		NodePtr it = freeList.head, itPrev = nullptr;
321 | 
322 | 		while (it) {
323 | 			padding = CalculatePaddingWithHeader((size_t)it, alignment, sizeof (AllocationHeader));
324 | 			size_t requiredSpace = size + padding;
325 | 			if (it->data.blockSize >= requiredSpace) {
326 | 				break;
327 | 			}
328 | 			itPrev = it;
329 | 			it = it->next;
330 | 		}
331 | 		previousNode = itPrev;
332 | 		foundNode = it;
333 | 	}
334 | 
335 | 	void Reallocate(size_t neededSize)
336 | 	{
337 | 		if constexpr (!REALLOCATABLE)
338 | #if defined(__cpp_exceptions) || defined(_CPPUNWIND)
339 | 			throw;
340 | #else
341 | 		return;
342 | #endif
343 | 		else {
344 | 			size_t newSize = (totalSize + neededSize) * 2;
345 | 			size_t oldSize = totalSize;
346 | 			//Possibly change this to realloc
347 | 			void* newPtr = malloc(newSize);
348 | 			void* oldPtr = (void*)&*start_ptr;
349 | 			memcpy(newPtr, oldPtr, totalSize);
350 | 			free(oldPtr);
351 | 			totalSize = newSize;
352 | 
353 | 			BASE = (uintptr_t)newPtr - baseOffset;
354 | 
355 | 			NodePtr it = freeList.head, itPrev = nullptr;
356 | 
357 | 			while (it) {
358 | 				itPrev = it;
359 | 				it = it->next;
360 | 			}
361 | 
362 | 			NodePtr lastNode = NodePtr(BASE + (uintptr_t)oldSize);
363 | 			lastNode->data.blockSize = totalSize - oldSize;
364 | 			lastNode->next = nullptr;
365 | 			freeList.insert(itPrev, lastNode);
366 | 			Coalescence(itPrev, lastNode);
367 | 		}
368 | 	}
369 | };
370 | 
371 | #endif
372 | 


--------------------------------------------------------------------------------
/utils/handles.cpp:
--------------------------------------------------------------------------------
  1 | #include "handles.h"
  2 | #include <mutex>
  3 | #include "string.h"
  4 | 
  5 | #if defined(_WIN32)
  6 | #include <Psapi.h>
  7 | #elif defined(__linux__)
  8 | #include <vector>
  9 | #include <dlfcn.h>
 10 | #include <link.h>
 11 | #include <sys/mman.h>
 12 | #elif defined(__APPLE__)
 13 | #include <vector>
 14 | #include <dlfcn.h>
 15 | #include <sys/mman.h>
 16 | #include <sys/stat.h>
 17 | #include <mach/mach_traps.h>
 18 | #include <mach/mach_init.h>
 19 | #include <mach/mach_error.h>
 20 | #include <mach/mach.h>
 21 | #include <mach-o/dyld_images.h>
 22 | #include <mach-o/loader.h>
 23 | #endif
 24 | 
 25 | #if defined(__linux__) || defined(__APPLE__)
 26 | struct dlinfo_t
 27 | {
 28 | 	const char* library = nullptr;
 29 | 	uintptr_t address = 0;
 30 | 	size_t size = 0;
 31 | };
 32 | 
 33 | std::vector<dlinfo_t> libraries;
 34 | std::mutex lInfoLock;
 35 | 
 36 | #endif
 37 | 
 38 | #ifdef __linux__
 39 | int DlIterateCallback(struct dl_phdr_info* info, size_t, void*)
 40 | {
 41 | 	dlinfo_t libraryInfo;
 42 | 	libraryInfo.library = info->dlpi_name;
 43 | 	libraryInfo.address = info->dlpi_addr + info->dlpi_phdr[0].p_vaddr;
 44 | 	libraryInfo.size = info->dlpi_phdr[0].p_memsz;
 45 | 
 46 | 	libraries.push_back(libraryInfo);
 47 | 
 48 | 	return 0;
 49 | }
 50 | #endif
 51 | 
 52 | #if !defined(_WIN32) && !defined(_WIN64)
 53 | static void InitializeLibraries()
 54 | {
 55 | #if defined(__APPLE__)
 56 | 
 57 | 	struct task_dyld_info dyldInfo;
 58 | 	vm_address_t address = 0;
 59 | 
 60 | 	mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
 61 | 	if (task_info(current_task(), TASK_DYLD_INFO, (task_info_t)&dyldInfo, &count) == KERN_SUCCESS)
 62 | 		address = (vm_address_t)dyldInfo.all_image_info_addr;
 63 | 
 64 | 	struct dyld_all_image_infos* dyldaii;
 65 | 	mach_msg_type_number_t size = sizeof(dyld_all_image_infos);
 66 | 	vm_offset_t readMem;
 67 | 	kern_return_t kr = vm_read(current_task(), address, size, &readMem, &size);
 68 | 	if (kr != KERN_SUCCESS)
 69 | 		return;
 70 | 
 71 | 	dyldaii = (dyld_all_image_infos*) readMem;
 72 | 	int imageCount = dyldaii->infoArrayCount;
 73 | 	mach_msg_type_number_t dataCnt = imageCount * 24;
 74 | 	struct dyld_image_info* gdii = nullptr;
 75 | 	gdii = (struct dyld_image_info*) malloc (dataCnt);
 76 | 	// 32bit bs 64bit
 77 | 	kern_return_t kr2 = vm_read(current_task(), (vm_address_t)dyldaii->infoArray, dataCnt, &readMem, &dataCnt);
 78 | 	if (kr2) {
 79 | 		free(gdii);
 80 | 		return;
 81 | 	}
 82 | 	struct dyld_image_info* dii = (struct dyld_image_info*) readMem;
 83 | 	for (int i = 0; i < imageCount; i++) {
 84 | 		dataCnt = 1024;
 85 | 		vm_read(current_task(), (vm_address_t)dii[i].imageFilePath, dataCnt, &readMem, &dataCnt);
 86 | 		char *imageName = (char *)readMem;
 87 | 
 88 | 		if (imageName)
 89 | 			gdii[i].imageFilePath = strdup(imageName);
 90 | 		else
 91 | 			gdii[i].imageFilePath = NULL;
 92 | 		gdii[i].imageLoadAddress = dii[i].imageLoadAddress;
 93 | 
 94 | 		dlinfo_t libraryInfo;
 95 | 		struct stat st;
 96 | 		stat(imageName, &st);
 97 | 
 98 | 		libraryInfo.address = (vm_address_t)dii[i].imageLoadAddress;
 99 | 		libraryInfo.size = st.st_size;
100 | 		libraryInfo.library = gdii[i].imageFilePath;
101 | 
102 | 		libraries.push_back(libraryInfo);
103 | 	}
104 | 
105 | 	free(gdii);
106 | #elif defined(__linux__)
107 | 	dl_iterate_phdr(DlIterateCallback, nullptr);
108 | #endif
109 | }
110 | #endif
111 | 
112 | MHandle Handles::GetModuleHandle(const char* module)
113 | {
114 | #if defined(__APPLE__) || defined(__linux__)
115 | 	lInfoLock.lock();
116 | 	if (!libraries.size())
117 | 		InitializeLibraries();
118 | 	lInfoLock.unlock();
119 | 
120 | 	dlinfo_t* bestMatch = nullptr;
121 | 
122 | 	int target = strlen(module);
123 | 	int lendist = 1000000;
124 | 
125 | 	for (dlinfo_t& i : libraries) {
126 | 		if (strstr(i.library, module)) {
127 | 			int len = strlen(i.library) - target;
128 | 
129 | 			if (len < lendist) {
130 | 				bestMatch = &i;
131 | 				lendist = len;
132 | 			}
133 | 
134 | 			if (lendist == 0) {
135 | 				break;
136 | 			}
137 | 		}
138 | 	}
139 | 
140 | 	return bestMatch != nullptr ? dlopen(bestMatch->library, RTLD_NOLOAD | RTLD_NOW) : nullptr;
141 | #else
142 | 	return ::GetModuleHandleA(module);
143 | #endif
144 | }
145 | 
146 | ModuleInfo Handles::GetModuleInfo(const char* module)
147 | {
148 | 	ModuleInfo ret;
149 | 	ret.handle = nullptr;
150 | 	ret.address = 0;
151 | 	ret.size = 0;
152 | #if defined(__linux__) || defined(__APPLE__)
153 | 	lInfoLock.lock();
154 | 	if (!libraries.size())
155 | 		InitializeLibraries();
156 | 
157 | 	const dlinfo_t* bestMatch = nullptr;
158 | 
159 | 	int target = strlen(module);
160 | 	int lendist = 1000000;
161 | 
162 | 	for (const dlinfo_t& i : libraries) {
163 | 		if (strstr(i.library, module)) {
164 | 			int len = strlen(i.library) - target;
165 | 
166 | 			if (len < lendist) {
167 | 				bestMatch = &i;
168 | 				lendist = len;
169 | 			}
170 | 
171 | 			if (lendist == 0) {
172 | 				break;
173 | 			}
174 | 		}
175 | 	}
176 | 
177 | 	if (bestMatch != nullptr) {
178 | 		ret.handle = dlopen(bestMatch->library, RTLD_NOLOAD | RTLD_NOW);
179 | 		ret.address = bestMatch->address;
180 | 		ret.size = bestMatch->size;
181 | 	}
182 | 
183 | 	lInfoLock.unlock();
184 | #else
185 | 	ret.handle = GetModuleHandle(module);
186 | 	MODULEINFO modInfo;
187 | 	GetModuleInformation(GetCurrentProcess(), ret.handle, &modInfo, sizeof(MODULEINFO));
188 | 	ret.address = (uintptr_t)modInfo.lpBaseOfDll;
189 | 	ret.size = (size_t)modInfo.SizeOfImage;
190 | #endif
191 | 	return ret;
192 | }
193 | 
194 | MHandle Handles::GetPtrModuleHandle(void* ptr)
195 | {
196 | #if defined(__linux__) || defined(__APPLE__)
197 | 	Dl_info info;
198 | 	if (dladdr(ptr, &info) && info.dli_fname)
199 | 		return dlopen(info.dli_fname, RTLD_NOW | RTLD_NOLOAD);
200 | 	return nullptr;
201 | #else
202 | 	return nullptr;
203 | #endif
204 | }
205 | 


--------------------------------------------------------------------------------
/utils/handles.h:
--------------------------------------------------------------------------------
 1 | #ifndef HANDLES_H
 2 | #define HANDLES_H
 3 | 
 4 | #if defined(__linux__) || defined(__APPLE__)
 5 | #include <unistd.h>
 6 | #include <stdint.h>
 7 | typedef void* MHandle;
 8 | #else
 9 | #include "../wincludes.h"
10 | typedef HMODULE MHandle;
11 | #endif
12 | 
13 | typedef struct
14 | {
15 | 	MHandle handle;
16 | 	uintptr_t address;
17 | 	size_t size;
18 | } ModuleInfo;
19 | 
20 | namespace Handles
21 | {
22 | 	MHandle GetModuleHandle(const char* module);
23 | 	ModuleInfo GetModuleInfo(const char* module);
24 | 	MHandle GetPtrModuleHandle(void* ptr);
25 | }
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/utils/history_list.h:
--------------------------------------------------------------------------------
 1 | #ifndef HISTORY_LIST_H
 2 | #define HISTORY_LIST_H
 3 | 
 4 | #ifndef _MSC_VER
 5 | #include <unistd.h>
 6 | #endif
 7 | #include <math.h>
 8 | 
 9 | template <typename T, size_t C>
10 | struct HistoryList
11 | {
12 | 
13 | 	//id 0 is the current item, and higher up go previous items
14 | 	inline T& GetLastItem(size_t id)
15 | 	{
16 | 		return list[(counter - id) % C];
17 | 	}
18 | 
19 | 	inline T& operator[](size_t id)
20 | 	{
21 | 		return GetLastItem(id);
22 | 	}
23 | 
24 | 	HistoryList()
25 | 	{
26 | 		counter = 0;
27 | 	}
28 | 
29 | 	~HistoryList()
30 | 	{
31 | 		for (size_t i = 0; i < Count(); i++)
32 | 			operator[](i).~T();
33 | 	}
34 | 
35 | 	auto& Push()
36 | 	{
37 | 		counter++;
38 | 
39 | 		if (counter >= C)
40 | 			list[counter % C].~T();
41 | 
42 | 		new(&list[counter % C]) T();
43 | 		return list[counter % C];
44 | 	}
45 | 
46 | 	void UndoPush()
47 | 	{
48 | 		counter--;
49 | 	}
50 | 
51 | 	auto& Push(const T& item)
52 | 	{
53 | 		list[++counter % C] = item;
54 | 		return list[counter % C];
55 | 	}
56 | 
57 | 	auto& GetItem(size_t id)
58 | 	{
59 | 		return list[id % C];
60 | 	}
61 | 
62 | 	size_t Count()
63 | 	{
64 | 		return std::min(counter, C);
65 | 	}
66 | 
67 | 	void Reset()
68 | 	{
69 | 		for (size_t i = 0; i < Count(); i++)
70 | 			operator[](i).~T();
71 | 
72 | 		counter = 0;
73 | 	}
74 | 
75 |   private:
76 | 	T list[C];
77 | 	size_t counter;
78 | };
79 | 
80 | #endif
81 | 


--------------------------------------------------------------------------------
/utils/intersect.cpp:
--------------------------------------------------------------------------------
 1 | #include "intersect_impl.h"
 2 | 
 3 | static vec3_t DirBetweenLines(vec3_t a, vec3_t b, vec3_t c, vec3_t d)
 4 | {
 5 | 	vec3_t d1 = (b - a);
 6 | 	vec3_t d2 = (d - c);
 7 | 
 8 | 	vec3_t cross = d1.Cross(d2);
 9 | 
10 | 	vec3_t cross1 = d1.Cross(cross);
11 | 	vec3_t cross2 = d2.Cross(cross);
12 | 
13 | 	vec3_t sp = c + std::clamp((a - c).Dot(cross1) / (d2.Dot(cross1)), 0.f, 1.f) * d2;
14 | 	vec3_t ep = a + std::clamp((c - a).Dot(cross2) / (d1.Dot(cross2)), 0.f, 1.f) * d1;
15 | 
16 | 	return ep - sp;
17 | }
18 | 
19 | bool CapsuleCollider::Intersect(vec3_t a, vec3_t b)
20 | {
21 | 	vec3_t dir = DirBetweenLines(a, b, start, end);
22 | 
23 | 	return dir.LengthSqr() <= radius * radius;
24 | }
25 | 


--------------------------------------------------------------------------------
/utils/intersect.h:
--------------------------------------------------------------------------------
 1 | #ifndef INTERSECT_H
 2 | #define INTERSECT_H
 3 | 
 4 | #include "../math/mmath.h"
 5 | 
 6 | struct CapsuleCollider
 7 | {
 8 | 	vec3_t start;
 9 | 	vec3_t end;
10 | 	float radius;
11 | 
12 | 	bool Intersect(vec3_t a, vec3_t b);
13 | 
14 | 	template<size_t Y>
15 | 	unsigned int IntersectSOA(vec3soa<float, Y>& __restrict a, vec3soa<float, Y>& __restrict, svec3<Y>* __restrict out = nullptr);
16 | };
17 | 
18 | template <size_t N>
19 | struct CapsuleColliderSOA
20 | {
21 | 	svec3<N> start, end;
22 | 	float radius[N];
23 | 
24 | 	unsigned int Intersect(vec3_t a, vec3_t b, svec3<N>* out = nullptr);
25 | };
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/utils/intersect_box.cpp:
--------------------------------------------------------------------------------
 1 | #include "intersect_box.h"
 2 | 
 3 | bool AABBCollider::Intersect(vec3_t a, vec3_t b, vec3_t* out)
 4 | {
 5 | 	vec3_t d = b - a;
 6 | 	vec3_t dInv = 1.f / d;
 7 | 
 8 | 	vec3_t t1 = (start - a) * dInv;
 9 | 	vec3_t t2 = (end - a) * dInv;
10 | 
11 | 	vec3_t minv = t1.Min(t2);
12 | 	vec3_t maxv = t1.Max(t2);
13 | 
14 | 	float tmin = minv.MaxUp();
15 | 	float tmax = maxv.MinUp();
16 | 
17 | 	bool ret = tmax >= fmaxf(0.f, tmin) && tmin < 1;
18 | 
19 | 	if (out)
20 | 		*out = a + d * fminf(tmin, 1);
21 | 
22 | 	return ret;
23 | }
24 | 


--------------------------------------------------------------------------------
/utils/intersect_box.h:
--------------------------------------------------------------------------------
 1 | #ifndef INTERSECT_BOX_H
 2 | #define INTERSECT_BOX_H
 3 | 
 4 | #include "../math/mmath.h"
 5 | 
 6 | struct AABBCollider
 7 | {
 8 | 	vec3_t start, end;
 9 | 
10 | 	AABBCollider(vec3_t s, vec3_t e)
11 | 		: start(s), end(e) {}
12 | 
13 | 	bool Intersect(vec3_t a, vec3_t b, vec3_t* out = nullptr);
14 | 	template<size_t Y>
15 | 	uint64_t IntersectSOA(const vec3soa<float, Y>& __restrict a, const vec3soa<float, Y>& __restrict b, vec3soa<float, Y>* __restrict out = nullptr);
16 | };
17 | 
18 | struct OBBCollider
19 | 	: AABBCollider
20 | {
21 | 	matrix<3, 4> w2l;
22 | 
23 | 	OBBCollider(vec3_t s, vec3_t e, matrix<3, 4> m)
24 | 		: AABBCollider(s, e), w2l(m) {}
25 | 
26 | 	inline bool Intersect(vec3_t a, vec3_t b, vec3_t* out = nullptr)
27 | 	{
28 | 		return AABBCollider::Intersect(w2l.Vector3Transform(a), w2l.Vector3Transform(b), out);
29 | 	}
30 | };
31 | 
32 | template<size_t Y>
33 | struct AABBColliderSOA
34 | {
35 | 	vec3soa<float, Y> start, end;
36 | 
37 | 	AABBColliderSOA(vec3soa<float, Y> s, vec3soa<float, Y> e)
38 | 		: start(s), end(e) {}
39 | 
40 | 	uint64_t Intersect(vec3_t a, vec3_t b, vec3soa<float, Y>* out = nullptr);
41 | 	uint64_t IntersectSSOA(const vec3soa<float, Y>& __restrict a, const vec3soa<float, Y>& __restrict b, vec3soa<float, Y>* out = nullptr);
42 | };
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/utils/intersect_box_impl.h:
--------------------------------------------------------------------------------
 1 | #ifndef INTERSECT_BOX_IMPL_H
 2 | #define INTERSECT_BOX_IMPL_H
 3 | 
 4 | #include "intersect_box.h"
 5 | 
 6 | template<size_t Y, typename ClassSource, typename Source, typename Dest>
 7 | [[gnu::flatten]] static inline uint64_t PerformIntersect(const ClassSource& __restrict start, const ClassSource& __restrict end, const Source& __restrict a, const Source& __restrict b, Dest* __restrict out)
 8 | {
 9 | 	Dest d = b - a;
10 | 	auto dInv = 1.f / d;
11 | 
12 | 	auto t1 = (start - a) * dInv;
13 | 	auto t2 = (end - a) * dInv;
14 | 
15 | 	auto minv = t1.Min(t2);
16 | 	auto maxv = t1.Max(t2);
17 | 
18 | 	auto tmin = minv.MaxUp();
19 | 	auto tmax = maxv.MinUp();
20 | 
21 | 	uint64_t ret = 0;
22 | 
23 | 	for (size_t i = 0; i < sizeof(tmin) / sizeof(tmin[0]); i++)
24 | 		if (tmax[i] >= fmaxf(0.f, tmin[i]) && tmin[i] < 1)
25 | 			ret |= (1ull << i);
26 | 
27 | 	if (out) {
28 | 		for (size_t i = 0; i < sizeof(tmin) / sizeof(tmin[0]); i++)
29 | 			tmin[i] = fminf(tmin[i], 0);
30 | 		*out = d * tmin + a;
31 | 	}
32 | 
33 | 	return ret;
34 | }
35 | 
36 | template<size_t Y>
37 | uint64_t AABBCollider::IntersectSOA(const vec3soa<float, Y>& __restrict a, const vec3soa<float, Y>& __restrict b, vec3soa<float, Y>* __restrict out)
38 | {
39 | 	return PerformIntersect<Y>(start, end, a, b, out);
40 | }
41 | 
42 | template<size_t Y>
43 | uint64_t AABBColliderSOA<Y>::Intersect(vec3_t a, vec3_t b, vec3soa<float, Y>* out)
44 | {
45 | 	return PerformIntersect<Y>(start, end, a, b, out);
46 | }
47 | 
48 | template<size_t Y>
49 | uint64_t AABBColliderSOA<Y>::IntersectSSOA(const vec3soa<float, Y>& __restrict a, const vec3soa<float, Y>& __restrict b, vec3soa<float, Y>* out)
50 | {
51 | 	return PerformIntersect<Y>(start, end, a, b, out);
52 | }
53 | 
54 | #endif
55 | 


--------------------------------------------------------------------------------
/utils/intersect_impl.h:
--------------------------------------------------------------------------------
 1 | #ifndef INTERSECT_IMPL_H
 2 | #define INTERSECT_IMPL_H
 3 | 
 4 | /*
 5 |   Template implementations of SOA intersection functions.
 6 |   These can not be inside a source file since templates then will not be created.
 7 | */
 8 | 
 9 | #include "intersect.h"
10 | 
11 | static inline float clampf(float a, float min, float max)
12 | {
13 | 	return fminf(max, fmaxf(min, a));
14 | }
15 | 
16 | template<size_t Y>
17 | [[gnu::flatten]] static vec3soa<float, Y> DirBetweenLines(const vec3soa<float, Y>& __restrict a, const vec3soa<float, Y>& __restrict b, const vec3soa<float, Y>& __restrict c, const vec3soa<float, Y>& __restrict d, vec3soa<float, Y>& sp, vec3soa<float, Y>& ep)
18 | {
19 | 	auto d1 = (b - a);
20 | 	auto d2 = (d - c);
21 | 
22 | 	auto cross = d1.Cross(d2);
23 | 
24 | 	auto cross1 = d1.Cross(cross);
25 | 	auto cross2 = d2.Cross(cross);
26 | 
27 | 	float dirDotC2[Y], d1DotC2[Y], dirDotC1[Y], d2DotC1[Y];
28 | 	(c - a).Dot(cross2, dirDotC2);
29 | 	d1.Dot(cross2, d1DotC2);
30 | 	(a - c).Dot(cross1, dirDotC1);
31 | 	d2.Dot(cross1, d2DotC1);
32 | 
33 | 	float c2Div[Y], c1Div[Y];
34 | 
35 | 	for (size_t i = 0; i < Y; i++) {
36 | 		c2Div[i] = clampf(dirDotC2[i] / d1DotC2[i], 0, 1);
37 | 		c1Div[i] = clampf(dirDotC1[i] / d2DotC1[i], 0, 1);
38 | 	}
39 | 
40 | 	sp = c + d2 * c1Div;
41 | 	ep = a + d1 * c2Div;
42 | 
43 | 	auto diff = ep - sp;
44 | 
45 | 	return diff;
46 | }
47 | 
48 | template<size_t Y>
49 | unsigned int CapsuleCollider::IntersectSOA(vec3soa<float, Y>& __restrict a, vec3soa<float, Y>& __restrict b, vec3soa<float, Y>* __restrict out)
50 | {
51 | 	unsigned int flags = 0;
52 | 	svec3<Y> soaStart, soaEnd;
53 | 	soaStart = start;
54 | 	soaEnd = end;
55 | 	float radiusSqr = radius * radius;
56 | 
57 | 	vec3soa<float, Y> sp, ep;
58 | 	vec3soa<float, Y> dirs = DirBetweenLines(a, b, soaStart, soaEnd, sp, ep);
59 | 
60 | 	if (out)
61 | 		*out = ep;
62 | 
63 | 	float lens[Y];
64 | 	dirs.LengthSqr(lens);
65 | 
66 | 	for (size_t i = 0; i < Y; i++)
67 | 		if (lens[i] <= radiusSqr)
68 | 			flags |= (1 << i);
69 | 
70 | 	return flags;
71 | }
72 | 
73 | 
74 | template unsigned int CapsuleCollider::IntersectSOA(nvec3& __restrict a, nvec3& __restrict b, nvec3* __restrict out);
75 | 
76 | template <size_t N>
77 | unsigned int CapsuleColliderSOA<N>::Intersect(vec3_t a, vec3_t b, svec3<N>* out)
78 | {
79 | 	unsigned int flags = 0;
80 | 	svec3<N> va = a, vb = b;
81 | 
82 | 	svec3<N> sp, ep;
83 | 	svec3<N> dirs = DirBetweenLines(va, vb, start, end, sp, ep);
84 | 
85 | 	if (out)
86 | 		*out = ep;
87 | 
88 | 	float lens[N];
89 | 	dirs.LengthSqr(lens);
90 | 
91 | 	for (size_t i = 0; i < N; i++)
92 | 		if (lens[i] <= radius[i] * radius[i])
93 | 			flags |= (1 << i);
94 | 
95 | 	return flags;
96 | }
97 | 
98 | #endif
99 | 


--------------------------------------------------------------------------------
/utils/kd_tree.h:
--------------------------------------------------------------------------------
  1 | #ifndef KD_TREE_H
  2 | #define KD_TREE_H
  3 | 
  4 | #include <stdint.h>
  5 | #include <memory>
  6 | #include <assert.h>
  7 | 
  8 | template<typename T, typename Pointer2 = uintptr_t*>
  9 | struct TreeNode_t
 10 | {
 11 | 	using Pointer = typename std::pointer_traits<Pointer2>::template rebind<TreeNode_t>;
 12 | 	T value;
 13 | 	Pointer left = 0, right = 0;
 14 | 
 15 | 	constexpr const T& operator*() const
 16 | 	{
 17 | 		return value;
 18 | 	}
 19 | };
 20 | 
 21 | template<typename T, unsigned int K, typename Alloc2 = std::allocator<TreeNode_t<T>>>
 22 | struct KDTree
 23 | {
 24 |   private:
 25 | 	//heck
 26 | 	using pointer3 = typename std::allocator_traits<Alloc2>::pointer;
 27 | 	using TreeNode2 = TreeNode_t<T, pointer3>;
 28 | 	using pointer2 = typename std::remove_const<typename TreeNode2::Pointer>::type;
 29 | 	using TreeNode = TreeNode_t<T, pointer2>;
 30 | 	using Alloc = typename Alloc2::template rebind<TreeNode>::other;
 31 | 	using pointer = typename std::remove_const<typename TreeNode::Pointer>::type;
 32 | 
 33 | 	Alloc alloc;
 34 | 	pointer rootNode;
 35 | 	pointer freeNode;
 36 | 	size_t treeSize;
 37 | 	size_t freeSize = 0;
 38 | 
 39 |   public:
 40 | 
 41 | 	~KDTree()
 42 | 	{
 43 | 		Free();
 44 | 	}
 45 | 
 46 | 	constexpr size_t size()
 47 | 	{
 48 | 		return treeSize;
 49 | 	}
 50 | 
 51 | 	constexpr pointer Insert(const T& entry)
 52 | 	{
 53 | 		pointer idx = 0;
 54 | 
 55 | 		Insert(rootNode, entry, 0, &idx);
 56 | 
 57 | 		if (!rootNode)
 58 | 			rootNode = idx;
 59 | 
 60 | 		return idx;
 61 | 	}
 62 | 
 63 | 	constexpr pointer Find(const T& entry)
 64 | 	{
 65 | 		return Find(rootNode, entry, 0);
 66 | 	}
 67 | 
 68 | 	constexpr void DeleteNode(const pointer& ref)
 69 | 	{
 70 | 		DeleteNode(rootNode, ref->value, 0);
 71 | 	}
 72 | 
 73 | 	void WalkDelete(pointer root)
 74 | 	{
 75 | 		if (!root)
 76 | 			return;
 77 | 
 78 | 		WalkDelete(root->left);
 79 | 		WalkDelete(root->right);
 80 | 		Deallocate(root);
 81 | 		treeSize--;
 82 | 	}
 83 | 
 84 | 	void Clear()
 85 | 	{
 86 | 		WalkDelete(rootNode);
 87 | 		assert(!treeSize);
 88 | 		rootNode = 0;
 89 | 		treeSize = 0;
 90 | 	}
 91 | 
 92 | 	void WalkFree(pointer root)
 93 | 	{
 94 | 		if (!root)
 95 | 			return;
 96 | 
 97 | 		WalkFree(root->left);
 98 | 		root->left = nullptr;
 99 | 		alloc.deallocate(root, 1);
100 | 		freeSize--;
101 | 	}
102 | 
103 | 	void Free()
104 | 	{
105 | 		WalkFree(freeNode);
106 | 		assert(!freeSize);
107 | 		freeNode = nullptr;
108 | 	}
109 | 
110 |   private:
111 | 
112 | 	pointer Allocate()
113 | 	{
114 | 		if (freeNode) {
115 | 			pointer ret = freeNode;
116 | 			freeNode = freeNode->left;
117 | 			freeSize--;
118 | 			return ret;
119 | 		}
120 | 		return alloc.allocate(1);
121 | 	}
122 | 
123 | 	void Deallocate(pointer ptr)
124 | 	{
125 | 		ptr->left = freeNode;
126 | 		freeNode = ptr;
127 | 		freeSize++;
128 | 	}
129 | 
130 | 	pointer Insert(pointer root, const T& entry, unsigned int depth, pointer* out)
131 | 	{
132 | 		if (!root) {
133 | 			root = Allocate();
134 | 			*root = TreeNode();
135 | 			treeSize++;
136 | 			root->value = entry;
137 | 			if (out)
138 | 				*out = root;
139 | 			return root;
140 | 		}
141 | 
142 | 		unsigned int d = depth % K;
143 | 
144 | 		if (entry[d] < root->value[d])
145 | 			root->left = Insert((pointer)root->left, entry, depth + 1, out);
146 | 		else
147 | 			root->right = Insert((pointer)root->right, entry, depth + 1, out);
148 | 
149 | 		return root;
150 | 	}
151 | 
152 | 	pointer Find(pointer root, const T& entry, unsigned int depth)
153 | 	{
154 | 		if (!root)
155 | 			return 0;
156 | 
157 | 		if (entry == root->value)
158 | 			return root;
159 | 
160 | 		unsigned int d = depth % K;
161 | 
162 | 		if (entry[d] < root->value[d])
163 | 			return Find((pointer)root->left, entry, depth + 1);
164 | 
165 | 		return Find((pointer)root->right, entry, depth + 1);
166 | 	}
167 | 
168 | 	pointer MinNode(pointer x, pointer y, pointer z, unsigned int d)
169 | 	{
170 | 		pointer res = x;
171 | 		if (y && z->value[d] < res->value[d])
172 | 			res = y;
173 | 		if (z && z->value[d] < res->value[d])
174 | 			res = z;
175 | 		return res;
176 | 	}
177 | 
178 | 	pointer FindMin(pointer root, int dim, unsigned int depth)
179 | 	{
180 | 		if (!root)
181 | 			return 0;
182 | 
183 | 		unsigned int d = depth % K;
184 | 
185 | 		if (d == dim) {
186 | 			if (!root->left)
187 | 				return root;
188 | 			return FindMin(root->left, dim, depth + 1);
189 | 		}
190 | 
191 | 		return MinNode(root, FindMin(root->left, dim, depth + 1), FindMin(root->right, dim, depth + 1));
192 | 	}
193 | 
194 | 	pointer DeleteNode(pointer root, const T& entry, unsigned int depth)
195 | 	{
196 | 		if (!root)
197 | 			return 0;
198 | 
199 | 		unsigned int d = depth % K;
200 | 
201 | 		if (entry == root->value) {
202 | 			if (root->right) {
203 | 				pointer min = FindMin((pointer)root->right, d, 0);
204 | 				root->value = ((pointer)root->right)->value;
205 | 				root->right = DeleteNode((pointer)root->right, min->value, depth + 1);
206 | 			} else if (root->left) {
207 | 				pointer min = FindMin((pointer)root->left, d, 0);
208 | 				root->value = ((pointer)root->left)->value;
209 | 				root->left = DeleteNode((pointer)root->left, min->value, depth + 1);
210 | 			} else {
211 | 				Deallocate(root);
212 | 				treeSize--;
213 | 				return 0;
214 | 			}
215 | 			return root;
216 | 		}
217 | 
218 | 		if (entry[d] < root->value[d])
219 | 			root->left = DeleteNode((pointer)root->left, entry, depth + 1);
220 | 		else
221 | 			root->right = DeleteNode((pointer)root->right, entry, depth + 1);
222 | 
223 | 		return root;
224 | 	}
225 | };
226 | 
227 | 
228 | #endif
229 | 


--------------------------------------------------------------------------------
/utils/md5.cpp:
--------------------------------------------------------------------------------
  1 | #include "md5.h"
  2 | 
  3 | #include <string.h>
  4 | 
  5 | // The four core functions - F1 is optimized somewhat
  6 | // #define F1(x, y, z) (x & y | ~x & z)
  7 | #define F1(x, y, z) (z ^ (x & (y ^ z)))
  8 | #define F2(x, y, z) F1(z, x, y)
  9 | #define F3(x, y, z) (x ^ y ^ z)
 10 | #define F4(x, y, z) (y ^ (x | ~z))
 11 | 
 12 | // This is the central step in the MD5 algorithm.
 13 | #define MD5STEP(f, w, x, y, z, data, s)							\
 14 | 	( w += f(x, y, z) + data,  w = w<<s | w>>(32-s),  w += x )
 15 | 
 16 | static void Transform(unsigned int buf[4], unsigned int const in[16])
 17 | {
 18 | 	unsigned int a, b, c, d;
 19 | 
 20 | 	a = buf[0];
 21 | 	b = buf[1];
 22 | 	c = buf[2];
 23 | 	d = buf[3];
 24 | 
 25 | 	MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
 26 | 	MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
 27 | 	MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
 28 | 	MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
 29 | 	MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
 30 | 	MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
 31 | 	MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
 32 | 	MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
 33 | 	MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
 34 | 	MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
 35 | 	MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
 36 | 	MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
 37 | 	MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
 38 | 	MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
 39 | 	MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
 40 | 	MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
 41 | 
 42 | 	MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
 43 | 	MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
 44 | 	MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
 45 | 	MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
 46 | 	MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
 47 | 	MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
 48 | 	MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
 49 | 	MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
 50 | 	MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
 51 | 	MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
 52 | 	MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
 53 | 	MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
 54 | 	MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
 55 | 	MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
 56 | 	MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
 57 | 	MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
 58 | 
 59 | 	MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
 60 | 	MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
 61 | 	MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
 62 | 	MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
 63 | 	MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
 64 | 	MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
 65 | 	MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
 66 | 	MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
 67 | 	MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
 68 | 	MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
 69 | 	MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
 70 | 	MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
 71 | 	MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
 72 | 	MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
 73 | 	MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
 74 | 	MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
 75 | 
 76 | 	MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
 77 | 	MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
 78 | 	MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
 79 | 	MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
 80 | 	MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
 81 | 	MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
 82 | 	MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
 83 | 	MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
 84 | 	MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
 85 | 	MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
 86 | 	MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
 87 | 	MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
 88 | 	MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
 89 | 	MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
 90 | 	MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
 91 | 	MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
 92 | 
 93 | 	buf[0] += a;
 94 | 	buf[1] += b;
 95 | 	buf[2] += c;
 96 | 	buf[3] += d;
 97 | }
 98 | 
 99 | void MD5::Init(MD5Context_t *ctx)
100 | {
101 | 	ctx->buf[0] = 0x67452301;
102 | 	ctx->buf[1] = 0xefcdab89;
103 | 	ctx->buf[2] = 0x98badcfe;
104 | 	ctx->buf[3] = 0x10325476;
105 | 
106 | 	ctx->bits[0] = 0;
107 | 	ctx->bits[1] = 0;
108 | }
109 | 
110 | void MD5::Update(MD5Context_t* ctx, unsigned char const* buf, unsigned int len)
111 | {
112 | 	unsigned int t;
113 | 
114 | 	t = ctx->bits[0];
115 | 	if((ctx->bits[0] = t + ((unsigned int)len << 3)) < t)
116 | 		ctx->bits[1]++;
117 | 	ctx->bits[1] += len >> 29;
118 | 
119 | 	t = (t >> 3) & 0x3f;
120 | 
121 | 	if(t) {
122 | 		unsigned char *p = (unsigned char*)ctx->in + t;
123 | 
124 | 		t = 64 - t;
125 | 		if(len < t) {
126 | 			memcpy(p, buf, len);
127 | 			return;
128 | 		}
129 | 		memcpy(p, buf, t);
130 | 		Transform(ctx->buf, (unsigned int*)ctx->in);
131 | 		buf += t;
132 | 		len -= t;
133 | 	}
134 | 	//Process data in 64-uint8_t chunks
135 | 
136 | 	while(len >= 64) {
137 | 		memcpy(ctx->in, buf, 64);
138 | 		Transform(ctx->buf, (unsigned int*)ctx->in);
139 | 		buf += 64;
140 | 		len -= 64;
141 | 	}
142 | 
143 | 	//Handle any remaining bytes of data.
144 | 	memcpy(ctx->in, buf, len);
145 | }
146 | 
147 | 
148 | void MD5::Final(unsigned char digest[MD5_DIGEST_LENGTH], MD5Context_t* ctx)
149 | {
150 | 	unsigned count;
151 | 	unsigned char* p;
152 | 
153 | 	//Compute number of bytes mod 64
154 | 	count = (ctx->bits[0] >> 3) & 0x3F;
155 | 
156 | 	/* Set the first char of padding to 0x80.  This is safe since there is
157 | 	   always at least one uint8_t free */
158 | 	p = ctx->in + count;
159 | 	*p++ = 0x80;
160 | 
161 | 	//Bytes of padding needed to make 64 bytes
162 | 	count = 64 - 1 - count;
163 | 
164 | 	//Pad out to 56 mod 64
165 | 	if(count < 8) {
166 | 		//Two lots of padding:  Pad the first block to 64 bytes
167 | 		memset(p, 0, count);
168 | 		Transform(ctx->buf, (unsigned int*)ctx->in);
169 | 
170 | 		//Now fill the next block with 56 bytes
171 | 		memset(ctx->in, 0, 56);
172 | 	} else {
173 | 		//Pad block to 56 bytes
174 | 		memset(p, 0, count - 8);
175 | 	}
176 | 
177 | 	((unsigned int*)ctx->in)[14] = ctx->bits[0];
178 | 	((unsigned int*)ctx->in)[15] = ctx->bits[1];
179 | 
180 | 	Transform(ctx->buf, (unsigned int*)ctx->in);
181 | 	memcpy(digest, ctx->buf, MD5_DIGEST_LENGTH);
182 | 	memset(ctx, 0, sizeof(MD5Context_t));
183 | }
184 | 
185 | 
186 | unsigned int MD5::PseudoRandom(unsigned int nSeed)
187 | {
188 | 	MD5Context_t ctx;
189 | 	unsigned char digest[MD5_DIGEST_LENGTH];
190 | 
191 | 	memset(&ctx, 0, sizeof(ctx));
192 | 
193 | 	Init(&ctx);
194 | 	Update(&ctx, (unsigned char*)&nSeed, sizeof(nSeed));
195 | 	Final(digest, &ctx);
196 | 
197 | 	return *(unsigned int*)(digest + 6);
198 | }
199 | 


--------------------------------------------------------------------------------
/utils/md5.h:
--------------------------------------------------------------------------------
 1 | #ifndef MD5_H
 2 | #define MD5_H
 3 | 
 4 | constexpr int MD5_DIGEST_LENGTH = 16;
 5 | 
 6 | typedef struct
 7 | {
 8 | 	unsigned int buf[4];
 9 | 	unsigned int bits[2];
10 | 	unsigned char in[64];
11 | } MD5Context_t;
12 | 
13 | namespace MD5 {
14 | 	void Init(MD5Context_t* context);
15 | 	void Update(MD5Context_t* context, unsigned char const* buf, unsigned int len);
16 | 	void Final(unsigned char digest[MD5_DIGEST_LENGTH], MD5Context_t* context);
17 | 	unsigned int PseudoRandom(unsigned int nSeed);
18 | }
19 | 
20 | #endif
21 | 


--------------------------------------------------------------------------------
/utils/memutils.h:
--------------------------------------------------------------------------------
 1 | #ifndef MEMUTILS_H
 2 | #define MEMUTILS_H
 3 | 
 4 | #include "pattern_scan.h"
 5 | #include "handles.h"
 6 | #include <string.h>
 7 | 
 8 | //External programs might want to use custom RPM/WPM functions
 9 | #ifndef MEMUTILS_CUSTOM_RW
10 | inline void ReadMem(void* dest, void* source, size_t sz)
11 | {
12 | 	memcpy(dest, source, sz);
13 | }
14 | 
15 | inline void WriteMem(void* dest, void* source, size_t sz)
16 | {
17 | 	memcpy(dest, source, sz);
18 | }
19 | #else
20 | void ReadMem(void* dest, void* source, size_t sz);
21 | void WriteMem(void* dest, void* source, size_t sz);
22 | #endif
23 | 
24 | template<typename T, typename N>
25 | inline T Read(N addr)
26 | {
27 | 	T ret;
28 | 	ReadMem(&ret, (void*)addr, sizeof(T));
29 | 	return ret;
30 | }
31 | 
32 | template<typename T, typename N>
33 | inline void ReadArr(N addr, T* arr, size_t count)
34 | {
35 | 	ReadMem((void*)arr, (void*)addr, sizeof(T) * count);
36 | }
37 | 
38 | template<typename T, typename N>
39 | inline void Write(N addr, T value)
40 | {
41 | 	WriteMem((void*)addr, &value, sizeof(T));
42 | }
43 | 
44 | template<typename T, typename N>
45 | inline void WriteArr(N addr, T* arr, size_t count)
46 | {
47 | 	WriteMem((void*)addr, (void*)arr, sizeof(T) * count);
48 | }
49 | 
50 | template<typename T = int32_t>
51 | inline uintptr_t GetAbsoluteAddress(uintptr_t addr, intptr_t offset, intptr_t instructionSize)
52 | {
53 | 	return addr + Read<T>(addr + offset) + instructionSize;
54 | }
55 | 
56 | template<typename T, size_t idx, typename N>
57 | inline T GetVFunc(N* inst)
58 | {
59 | 	return Read<T>(Read<T*>(inst) + idx);
60 | }
61 | 
62 | #endif
63 | 


--------------------------------------------------------------------------------
/utils/mutex.cpp:
--------------------------------------------------------------------------------
 1 | #include "mutex.h"
 2 | 
 3 | #if defined(__linux__) || defined(__APPLE__)
 4 | #include <pthread.h>
 5 | 
 6 | Mutex::Mutex() {
 7 | 	int ret = pthread_mutex_init(&lck, nullptr);
 8 | #if defined(__cpp_exceptions) || defined(_CPPUNWIND)
 9 | 	if (ret) {
10 | 		throw;
11 | 	}
12 | #endif
13 | }
14 | 
15 | Mutex::~Mutex() {
16 | 	pthread_mutex_destroy(&lck);
17 | }
18 | 
19 | void Mutex::lock() {
20 | 	pthread_mutex_lock(&lck);
21 | }
22 | 
23 | bool Mutex::trylock() {
24 | 	return !pthread_mutex_trylock(&lck);
25 | }
26 | 
27 | void Mutex::unlock() {
28 | 	pthread_mutex_unlock(&lck);
29 | }
30 | 
31 | #else
32 | #include <windows.h>
33 | 
34 | Mutex::Mutex() {
35 | 	::InitializeCriticalSection(&lck);
36 | }
37 | 
38 | Mutex::~Mutex() {
39 | 	::DeleteCriticalSection(&lck);
40 | }
41 | 
42 | void Mutex::lock() {
43 | 	::EnterCriticalSection(&lck);
44 | }
45 | 
46 | void Mutex::unlock() {
47 | 	::LeaveCriticalSection(&lck);
48 | }
49 | #endif
50 | 


--------------------------------------------------------------------------------
/utils/mutex.h:
--------------------------------------------------------------------------------
 1 | #ifndef MUTEX_H
 2 | #define MUTEX_H
 3 | 
 4 | #if defined(__linux__) || defined(__APPLE__)
 5 | #include <pthread.h>
 6 | #else
 7 | #define WIN32_LEAN_AND_MEAN
 8 | #include <windows.h>
 9 | #endif
10 | 
11 | class Mutex {
12 |   public:
13 | 	Mutex();
14 | 	~Mutex();
15 | 	void lock();
16 | 	bool trylock();
17 | 	void unlock();
18 | 	//private:
19 | #if defined(__linux__) || defined(__APPLE__)
20 | 	pthread_mutex_t lck;
21 | #else
22 | 	CRITICAL_SECTION lck;
23 | #endif
24 | };
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/utils/named_semaphores.cpp:
--------------------------------------------------------------------------------
 1 | #include "named_semaphores.h"
 2 | 
 3 | #if defined(__linux__)
 4 | #include <fcntl.h>
 5 | #include <stdio.h>
 6 | #include <errno.h>
 7 | 
 8 | NamedSemaphore::NamedSemaphore(const char* name) {
 9 | 	_name = name;
10 | 	sm = sem_open(name, O_CREAT, S_IRWXU | S_IRWXG | S_IRWXO);
11 | 	if (sm == (sem_t*)SEM_FAILED) {
12 | 		sem_unlink(name);
13 | 		sm = sem_open(name, O_CREAT | O_EXCL, S_IRWXU | S_IRWXG);
14 | 	}
15 | 	if (sm == (sem_t*)SEM_FAILED) {
16 | 		printf("ERROR %d\n", errno);
17 | 		throw;
18 | 	}
19 | }
20 | 
21 | NamedSemaphore::~NamedSemaphore() {
22 | 	if (sm) {
23 | 		sem_unlink(_name);
24 | 		sem_close(sm);
25 | 	}
26 | }
27 | 
28 | int NamedSemaphore::TimedWait(size_t milliseconds)
29 | {
30 | 	struct timespec ts;
31 | 	if (clock_gettime(CLOCK_REALTIME, &ts) == -1)
32 | 		return 1;
33 | 	ts.tv_nsec += 1000000ull * milliseconds;
34 | 	return sem_timedwait(sm, &ts);
35 | }
36 | 
37 | void NamedSemaphore::Wait() {
38 | 	sem_wait(sm);
39 | }
40 | 
41 | void NamedSemaphore::Post() {
42 | 	sem_post(sm);
43 | }
44 | 
45 | unsigned long NamedSemaphore::Count()
46 | {
47 | 	int val = 0;
48 | 	sem_getvalue(sm, &val);
49 | 	return val;
50 | }
51 | 
52 | #else
53 | 
54 | NamedSemaphore::NamedSemaphore(const char* name) {
55 | 	sm = CreateSemaphoreA(nullptr, 0, 0xffff, name);
56 | }
57 | 
58 | NamedSemaphore::~NamedSemaphore() {
59 | 	CloseHandle(sm);
60 | }
61 | 
62 | void NamedSemaphore::Wait() {
63 | 	WaitForSingleObject(sm, INFINITE);
64 | }
65 | 
66 | int NamedSemaphore::TimedWait(size_t milliseconds)
67 | {
68 | 	if (WaitForSingleObject(sm, milliseconds) == WAIT_OBJECT_0)
69 | 		return 0;
70 | 	return 1;
71 | }
72 | 
73 | void NamedSemaphore::Post() {
74 | 	ReleaseSemaphore(sm, 1, NULL);
75 | }
76 | 
77 | unsigned long NamedSemaphore::Count()
78 | {
79 | 	long previous;
80 | 	switch (WaitForSingleObject(sm, 0)) {
81 | 	  case WAIT_OBJECT_0:
82 | 		  ReleaseSemaphore(sm, 1, &previous);
83 | 		  return previous + 1;
84 | 	  case WAIT_TIMEOUT:
85 | 		  return 0;
86 | 	}
87 | 	return 0;
88 | }
89 | #endif
90 | 


--------------------------------------------------------------------------------
/utils/named_semaphores.h:
--------------------------------------------------------------------------------
 1 | #ifndef SEMAPHORES_H
 2 | #define SEMAPHORES_H
 3 | 
 4 | #include <stdint.h>
 5 | #if defined(__linux__) || defined(__APPLE__)
 6 | #include <stddef.h>
 7 | #endif
 8 | 
 9 | #if defined(__linux__) || defined(__APPLE__)
10 | #include <semaphore.h>
11 | #include <time.h>
12 | #else
13 | #include "windows.h"
14 | #endif
15 | 
16 | class NamedSemaphore
17 | {
18 | 	public:
19 | 	NamedSemaphore(const char* name);
20 | 	~NamedSemaphore();
21 | 	void Wait();
22 | 	int TimedWait(size_t milliseconds);
23 | 	void Post();
24 | 	unsigned long Count();
25 | 	private:
26 | 
27 | #if defined(__linux__)
28 | 	sem_t* sm;
29 | 	const char* _name;
30 | #else
31 | 	HANDLE sm;
32 | #endif
33 | };
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/utils/packed_heap.cpp:
--------------------------------------------------------------------------------
  1 | #include "packed_heap.h"
  2 | #include <memory>
  3 | 
  4 | 
  5 | PackedAllocator::PackedAllocator(size_t sz, char* b)
  6 | {
  7 | 	if (b)
  8 | 		buf = b;
  9 | 	else
 10 | 		buf = (char*)malloc(sz);
 11 | 
 12 | 	bufCapacity = sz;
 13 | 	bufSize = 0;
 14 | }
 15 | 
 16 | PackedAllocator::PackedAllocator(const PackedAllocator& o)
 17 | {
 18 | 	*this = o;
 19 | }
 20 | 
 21 | PackedAllocator::PackedAllocator(const PackedAllocator&& o)
 22 | {
 23 | 	*this = o;
 24 | }
 25 | 
 26 | PackedAllocator::~PackedAllocator()
 27 | {
 28 | 	if (buf)
 29 | 		free(buf);
 30 | 	buf = nullptr;
 31 | 	bufCapacity = 0;
 32 | 	bufSize = 0;
 33 | }
 34 | 
 35 | PackedAllocator& PackedAllocator::operator=(const PackedAllocator& o)
 36 | {
 37 | 	totalAllocations = o.totalAllocations;
 38 | 	totalFrees = o.totalFrees;
 39 | 	totalResizes = o.totalResizes;
 40 | 	totalReallocations = o.totalReallocations;
 41 | 
 42 | 	freeRegionsTree = o.freeRegionsTree;
 43 | 
 44 | 	if (o.bufSize <= bufCapacity) {
 45 | 		bufSize = o.bufSize;
 46 | 	} else {
 47 | 		if (buf)
 48 | 			free(buf);
 49 | 		bufSize = o.bufSize;
 50 | 		bufCapacity = o.bufCapacity;
 51 | 		buf = (char*)malloc(bufCapacity);
 52 | 	}
 53 | 	memcpy(buf, o.buf, bufSize);
 54 | 
 55 | 	return *this;
 56 | }
 57 | 
 58 | PackedAllocator& PackedAllocator::operator=(PackedAllocator&& o)
 59 | {
 60 | 	totalAllocations = o.totalAllocations;
 61 | 	totalFrees = o.totalFrees;
 62 | 	totalResizes = o.totalResizes;
 63 | 	totalReallocations = o.totalReallocations;
 64 | 
 65 | 	freeRegionsTree = std::move(o.freeRegionsTree);
 66 | 
 67 | 	if (buf)
 68 | 		free(buf);
 69 | 
 70 | 	buf = o.buf;
 71 | 	o.buf = nullptr;
 72 | 	bufSize = o.bufSize;
 73 | 	bufCapacity = o.bufCapacity;
 74 | 
 75 | 	return *this;
 76 | }
 77 | 
 78 | idx_t PackedAllocator::_Alloc(idx_t sz, size_t alignment)
 79 | {
 80 | 	alignment = std::max(size_t(4), alignment);
 81 | 
 82 | 	totalAllocations++;
 83 | 
 84 | 	size_t allocSize = sz + sizeof(MetaData) * 2;
 85 | 	if (!freeRegionsTree.empty()) {
 86 | 		auto reg = freeRegionsTree.lower_bound(sz + alignment - 4);
 87 | 		if (reg != freeRegionsTree.end()) {
 88 | 			idx_t address = *reg->second.rbegin();
 89 | 			idx_t ret = address + sizeof(MetaData);
 90 | 
 91 | 			//Align the allocated pointer
 92 | 			size_t delta = ((MetaData*)&buf[address])->size;
 93 | 			void* ptr = (void*)(size_t)ret;
 94 | 			idx_t origRet = ret;
 95 | 			idx_t ret2 = (idx_t)(size_t)std::align(alignment, sz, ptr, delta);
 96 | 			delta -= sz;
 97 | 			ret = ret2;
 98 | 			reg->second.erase(address);
 99 | 
100 | #ifdef PACKED_HEAP_DEBUG
101 | 			if (reg->first != ((MetaData*)&buf[address])->size)
102 | 				throw std::runtime_error("PackedHeap corruption");
103 | #endif
104 | 
105 | 			*(MetaData*)&buf[ret - sizeof(MetaData)] = {USED_REGION, sz};
106 | 			*(MetaData*)&buf[ret + sz] = {USED_REGION, sz};
107 | 
108 | 			if (!reg->second.size())
109 | 				freeRegionsTree.erase(reg);
110 | 
111 | 			idx_t lowerHoleStart = origRet;
112 | 			idx_t lowerHoleDelta = ret2 - sizeof(MetaData) - lowerHoleStart;
113 | 
114 | 			//The lower holes are to be caused by alignments > 8
115 | 			if (lowerHoleDelta && lowerHoleDelta < sz && !FillHole(lowerHoleStart, lowerHoleDelta)) {
116 | 				idx_t holeSpotSize = lowerHoleDelta - sizeof(MetaData) * 2;
117 | 				*(MetaData*)&buf[lowerHoleStart] = {FREE_REGION, holeSpotSize};
118 | 				*(MetaData*)&buf[lowerHoleStart + holeSpotSize + sizeof(MetaData)] = {FREE_REGION, holeSpotSize};
119 | 				freeRegionsTree[holeSpotSize].insert(lowerHoleStart);
120 | 			}
121 | 
122 | 			idx_t holeStart = ret + allocSize - sizeof(MetaData);
123 | 
124 | 			//Check if the place is small enough for a unallocatable hole
125 | 			if (delta && !FillHole(holeStart, delta)) {
126 | 				idx_t holeSpotSize = delta - sizeof(MetaData) * 2;
127 | 				*(MetaData*)&buf[holeStart] = {FREE_REGION, holeSpotSize};
128 | 				*(MetaData*)&buf[holeStart + holeSpotSize + sizeof(MetaData)] = {FREE_REGION, holeSpotSize};
129 | 				freeRegionsTree[holeSpotSize].insert(holeStart);
130 | 			}
131 | 
132 | 			return ret;
133 | 		}
134 | 	}
135 | 
136 | 	totalResizes++;
137 | 
138 | 	idx_t baseIdx = bufSize + sizeof(MetaData);
139 | 	size_t delta = sz + alignment;
140 | 	void* ptr = (void*)(size_t)baseIdx;
141 | 	baseIdx = (idx_t)(size_t)std::align(alignment, sz, ptr, delta);
142 | 
143 | 	idx_t lowerHoleStart = bufSize;
144 | 	idx_t lowerHoleDelta = baseIdx - sizeof(MetaData) - lowerHoleStart;
145 | 
146 | 	if (lowerHoleDelta && lowerHoleDelta < sz && !FillHole(lowerHoleStart, lowerHoleDelta)) {
147 | 		idx_t holeSpotSize = lowerHoleDelta - sizeof(MetaData) * 2;
148 | 		*(MetaData*)&buf[lowerHoleStart] = {FREE_REGION, holeSpotSize};
149 | 		*(MetaData*)&buf[lowerHoleStart + holeSpotSize + sizeof(MetaData)] = {FREE_REGION, holeSpotSize};
150 | 		freeRegionsTree[holeSpotSize].insert(lowerHoleStart);
151 | 	}
152 | 
153 | 	if (bufCapacity < baseIdx + sizeof(MetaData) + sz) {
154 | 		totalReallocations++;
155 | 		bufCapacity = (baseIdx + sizeof(MetaData) + sz) * GROW_FACTOR;
156 | 		buf = (char*)malloc(bufCapacity);
157 | 	}
158 | 
159 | 	bufSize = baseIdx + sizeof(MetaData) + sz;
160 | 	*(MetaData*)&buf[baseIdx - sizeof(MetaData)] = {USED_REGION, sz};
161 | 	*(MetaData*)&buf[baseIdx + sz] = {USED_REGION, sz};
162 | 
163 | 	return baseIdx;
164 | }
165 | 
166 | idx_t PackedAllocator::Alloc(idx_t sz, size_t alignment)
167 | {
168 | 	if (!buf) {
169 | 		bufCapacity = sz + 2 * sizeof(MetaData);
170 | 		buf = (char*)malloc(bufCapacity);
171 | 	}
172 | 
173 | 	char* prevBuf = buf;
174 | 
175 | 	idx_t ret = _Alloc(sz, alignment);
176 | 
177 | 	if (buf != prevBuf) {
178 | 		memcpy(buf, prevBuf, ret - sizeof(MetaData));
179 | 		free(prevBuf);
180 | 	}
181 | 
182 | 	return ret;
183 | }
184 | 
185 | void PackedAllocator::Free(idx_t idx)
186 | {
187 | 	if (!idx)
188 | 		return;
189 | 
190 | 	totalFrees++;
191 | 
192 | 	MetaData* metaData = (MetaData*)&buf[idx - sizeof(MetaData)];
193 | 
194 | 	if (metaData->used != USED_REGION) {
195 | 		if (metaData->used == FREE_REGION)
196 | #ifdef PACKED_HEAP_DEBUG
197 | 			throw std::runtime_error("Double free");
198 | #else
199 | 			return;
200 | #endif
201 | 		else
202 | #if PACKED_HEAP_DEBUG
203 | 			throw std::runtime_error("PackedHeap corruption");
204 | #else
205 | 			return;
206 | #endif
207 | 	}
208 | 
209 | 	idx_t start = idx - sizeof(MetaData);
210 | 	idx_t end = idx + sizeof(MetaData) + metaData->size;
211 | 
212 | 	if (*metaData != *(MetaData*)&buf[end - sizeof(MetaData)])
213 | #if PACKED_HEAP_DEBUG
214 | 		throw std::runtime_error("PackedHeap corruption");
215 | #else
216 | 		return;
217 | #endif
218 | 
219 | 	//Check for a memory hole above the region (this can never occur below)
220 | 	if ((unsigned char)buf[end] == HOLE_START)
221 | 		while ((unsigned char)buf[end++] != HOLE_END)
222 | 			;
223 | 	else if ((unsigned char)buf[end] == HOLE_REGION)
224 | 		end++;
225 | 
226 | 	MetaData* upperMetaData = (MetaData*)&buf[end - sizeof(MetaData)];
227 | 
228 | 	MetaData* aboveRegion = end + sizeof(MetaData) < bufSize ? (MetaData*)&buf[end] : nullptr;
229 | 	MetaData* belowRegion = start >= sizeof(MetaData) * 2 ? (MetaData*)&buf[start - sizeof(MetaData)] : nullptr;
230 | 
231 | 	//Join the nearby free regions
232 | 	if (PACKED_HEAP_MERGE_REGIONS && aboveRegion && aboveRegion->used == FREE_REGION) {
233 | 		[[maybe_unused]]
234 | 			size_t ret = freeRegionsTree[aboveRegion->size].erase(end);
235 | 
236 | #ifdef PACKED_HEAP_DEBUG
237 | 		if (!ret)
238 | 			throw std::runtime_error("PackedHeap corruption");
239 | 
240 | 		if (!freeRegionsTree[aboveRegion->size].size())
241 | 			freeRegionsTree.erase(aboveRegion->size);
242 | #endif
243 | 
244 | 		upperMetaData = aboveRegion->WalkUp();
245 | 	}
246 | 
247 | 	if (PACKED_HEAP_MERGE_REGIONS && belowRegion && belowRegion->used == FREE_REGION) {
248 | 		[[maybe_unused]]
249 | 			size_t ret = freeRegionsTree[belowRegion->size].erase(start - 2 * sizeof(MetaData) - belowRegion->size);
250 | 
251 | #ifdef PACKED_HEAP_DEBUG
252 | 		if (!ret)
253 | 			throw std::runtime_error("PackedHeap corruption");
254 | 
255 | 		if (!freeRegionsTree[belowRegion->size].size())
256 | 			freeRegionsTree.erase(belowRegion->size);
257 | #endif
258 | 
259 | 		metaData = belowRegion->WalkDown();
260 | 	}
261 | 
262 | 	metaData->used = FREE_REGION;
263 | 	metaData->size = (uintptr_t)upperMetaData - (uintptr_t)metaData - sizeof(MetaData);
264 | 	*upperMetaData = *metaData;
265 | 
266 | 	freeRegionsTree[metaData->size].insert((idx_t)((uintptr_t)metaData - (uintptr_t)&buf[0]));
267 | }
268 | 
269 | void PackedAllocator::FreeAll()
270 | {
271 | 	totalFrees += totalAllocations - totalFrees;
272 | 	bufSize = 0;
273 | }
274 | 


--------------------------------------------------------------------------------
/utils/pattern_scan.cpp:
--------------------------------------------------------------------------------
  1 | #include "pattern_scan.h"
  2 | #include "memutils.h"
  3 | #include "string.h"
  4 | #include <vector>
  5 | #include <map>
  6 | #include "assert.h"
  7 | #include "stdlib.h"
  8 | 
  9 | struct pOperation
 10 | {
 11 | 	short op;
 12 | 	intptr_t offset, v1;
 13 | 
 14 | 	pOperation(short o = 0, intptr_t off = 0, intptr_t v = 0)
 15 | 	{
 16 | 		op = o;
 17 | 		offset = off;
 18 | 		v1 = v;
 19 | 	}
 20 | 
 21 | 	uintptr_t RunOp(uintptr_t addr)
 22 | 	{
 23 | 		switch(op) {
 24 | 		  case 1:
 25 | 			  return addr + offset;
 26 | 		  case 11:
 27 | 			  return Read<uint8_t>(addr + offset);
 28 | 		  case 12:
 29 | 			  return Read<uint16_t>(addr + offset);
 30 | 		  case 14:
 31 | 			  return Read<uint32_t>(addr + offset);
 32 | 		  case 18:
 33 | 			  return Read<uint64_t>(addr + offset);
 34 | 		  case 21:
 35 | 			  return GetAbsoluteAddress<int8_t>(addr, offset, v1);
 36 | 		  case 22:
 37 | 			  return GetAbsoluteAddress<int16_t>(addr, offset, v1);
 38 | 		  case 24:
 39 | 		  case 28:
 40 | 			  return GetAbsoluteAddress<int32_t>(addr, offset, v1);
 41 | 		  default:
 42 | 			  break;
 43 | 		}
 44 | 		return addr;
 45 | 	}
 46 | 
 47 | };
 48 | 
 49 | uintptr_t ScanPattern(uintptr_t start, uintptr_t end, uintptr_t length, uintptr_t* data, uintptr_t* mask);
 50 | 
 51 | static std::map<char, size_t> readSizes = {
 52 | 	{'$', 1},
 53 | 	{'%', 2},
 54 | 	{'^', 4},
 55 | 	{'&', 8},
 56 | 	{'*', sizeof(uintptr_t)}
 57 | };
 58 | 
 59 | static void ParsePattern(const char* pattern, short*& patternBytes, size_t& length, std::vector<pOperation>& operations)
 60 | {
 61 | 	char* p = (char*)(uintptr_t)pattern-1;
 62 | 	bool inRelDeref = false;
 63 | 	[[maybe_unused]] bool derefDone = false;
 64 | 	int relIdx = 0;
 65 | 	int relStartIdx = 0;
 66 | 	int idx = 0;
 67 | 	int initDerefIdx = 0;
 68 | 
 69 | 	length = strlen(pattern);
 70 | 	patternBytes = new short[length];
 71 | 
 72 | 	while((++p) - pattern <= (long)length && *p) {
 73 | 
 74 | 		while (*p == ' ') p++;
 75 | 
 76 | 		if (*p == '?') {
 77 | 			if (*(p+1) == '?')
 78 | 				p++;
 79 | 			patternBytes[idx++] = -1;
 80 | 		} else if (*p == '@') {
 81 | 			assert(!inRelDeref && !derefDone && operations.size() == 0);
 82 | 			if (idx)
 83 | 				operations.emplace_back(pOperation(1, idx));
 84 | 			derefDone = true;
 85 | 		} else if (*p == '[') {
 86 | 			assert(!inRelDeref && !derefDone);
 87 | 			inRelDeref = true;
 88 | 			relStartIdx = idx;
 89 | 			if (idx) {
 90 | 				relIdx++;
 91 | 				operations.emplace_back(pOperation(1, idx));
 92 | 			}
 93 | 			operations.emplace_back(pOperation());
 94 | 		} else if (*p == ']') {
 95 | 			assert(inRelDeref);
 96 | 			inRelDeref = false;
 97 | 			derefDone = true;
 98 | 
 99 | 			pOperation& op = operations.at(relIdx);
100 | 
101 | 			op.offset = initDerefIdx - relStartIdx;
102 | 			op.v1 = idx - relStartIdx;
103 | 		} else if (readSizes[(int)*p] != 0) {
104 | 			assert(!derefDone);
105 | 			derefDone = true;
106 | 
107 | 			initDerefIdx = idx;
108 | 
109 | 			if (!inRelDeref)
110 | 				operations.emplace_back(pOperation(10 + readSizes[(int)*p], idx));
111 | 			else
112 | 				operations.at(relIdx).op = 20 + readSizes[(int)*p];
113 | 
114 | 			p++;
115 | 
116 | 			while (*p == '+' || *p == '-' || readSizes[(int)*p] || *p == ':') {
117 | 				if (readSizes[(int)*p])
118 | 					operations.emplace_back(pOperation(10 + readSizes[(int)*p++]));
119 | 				else if (*p == ':') {
120 | 					pOperation op = pOperation();
121 | 					p++;
122 | 					op.offset = strtol(p, &p, 10);
123 | 					p++;
124 | 					op.v1 = strtol(p, &p, 10);
125 | 					op.op = 20 + sizeof(uintptr_t);
126 | 					operations.emplace_back(op);
127 | 				} else {
128 | 					pOperation op = pOperation();
129 | 					if (*p == '+' || *p == '-')
130 | 						op.offset = strtol(p, &p, 10);
131 | 					//Compress the offset operation into a dereference
132 | 					op.op = readSizes[(int)*p] ? 10 + readSizes[(int)*p] : 1;
133 | 					if (readSizes[(int)*p])
134 | 						p++;
135 | 					operations.emplace_back(op);
136 | 				}
137 | 			}
138 | 
139 | 			if (*p != ' ')
140 | 				p--;
141 | 
142 | 		} else {
143 | 			patternBytes[idx++] = (uint8_t)strtoul(p, &p, 16);
144 | 			if (*p != ' ')
145 | 				p--;
146 | 		}
147 | 	}
148 | 
149 | 	length = idx;
150 | }
151 | 
152 | //Optimize the parsed pattern into larger long sized values to be compared. This way we will utilize the full potential of the CPUs native register size when reading the memory. Going wider (into SIMD) is not worth it, because if the pattern does not match, it will usually be within the first 4-8 instructions.
153 | static void ProduceScanData(short* parsedData, uintptr_t*& data, uintptr_t*& mask, size_t& size)
154 | {
155 | 	constexpr size_t iSize = sizeof(long);
156 | 	size_t size2 = (size - 1) / iSize + 1;
157 | 
158 | 	data = new uintptr_t[size2];
159 | 	mask = new uintptr_t[size2];
160 | 
161 | 	for (size_t i = 0; i < size2; i++) {
162 | 		data[i] = 0;
163 | 		mask[i] = 0;
164 | 
165 | 		for (size_t o = 0; o < iSize; o++) {
166 | 			if (i * iSize + o >= size || parsedData[i * iSize + o] < 0)
167 | 				mask[i] |= (0xffll << (8ll * o));
168 | 			if (i * iSize + o < size)
169 | 				data[i] |= (((uintptr_t)((parsedData[i * iSize + o]) & 0xffll)) << (8ll * o));
170 | 
171 | 		}
172 | 		data[i] |= mask[i];
173 | 	}
174 | 
175 | 	size = size2;
176 | }
177 | 
178 | uintptr_t PatternScan::FindPattern(const char* pattern, uintptr_t start, uintptr_t end)
179 | {
180 | 	short* patternBytes = nullptr;
181 | 	size_t length = 0;
182 | 	std::vector<pOperation> operations;
183 | 
184 | 	uintptr_t addr = 0;
185 | 
186 | 	ParsePattern(pattern, patternBytes, length, operations);
187 | 
188 | 	uintptr_t* data;
189 | 	uintptr_t* mask;
190 | 	ProduceScanData(patternBytes, data, mask, length);
191 | 	delete[] patternBytes;
192 | 
193 | 	addr = ScanPattern(start, end, length, data, mask);
194 | 
195 | 	if (addr)
196 | 		for (auto& i : operations)
197 | 			addr = i.RunOp(addr);
198 | 
199 | 	delete[] data;
200 | 	delete[] mask;
201 | 	return addr;
202 | }
203 | 
204 | uintptr_t PatternScan::FindPattern(const char* __restrict pattern, const char* __restrict module)
205 | {
206 | 	ModuleInfo info = Handles::GetModuleInfo(module);
207 | 	return FindPattern(pattern, info.address, info.address + info.size);
208 | }
209 | 
210 | #ifndef PATTERN_SCAN_CUSTOM_SCAN
211 | #ifdef PATTERN_SCAN_PAGE_SCAN
212 | //Page scanning is very useful in instances where memory reads have high latency. It can be hundreds of times faster than reading long-by-long
213 | uintptr_t ScanPattern(uintptr_t start, uintptr_t end, uintptr_t length, uintptr_t* data, uintptr_t* mask)
214 | {
215 | 	uintptr_t llength = sizeof(long) * length;
216 | 	char* buf = (char*)alloca(0x1000 + llength);
217 | 	char* page = buf + llength;
218 | 	//On the first round, we do not want to scan the part where a part of the previous page buffer would be copied on - since we have no "previous page"
219 | 	uintptr_t sOffset = llength;
220 | 	for (uintptr_t i = start; i < end - llength; i += 0x1000) {
221 | 		memcpy(buf, buf + 0x1000, llength);
222 | 		ReadArr(i & ~0xfff, page, 0x1000);
223 | 		for (uintptr_t u = (start & 0xfff) + sOffset; u < 0x1000; u++) {
224 | 			bool miss = false;
225 | 			for (uintptr_t o = 0; o < length && !miss; o++)
226 | 				miss = data[o] ^ (*(uintptr_t*)(buf + u + o * sizeof(uintptr_t)) | mask[o]);
227 | 
228 | 			if (!miss)
229 | 				return u + (i & ~0xfff) - llength;
230 | 		}
231 | 		sOffset = 0;
232 | 	}
233 | 
234 | 	return 0;
235 | }
236 | #else
237 | uintptr_t ScanPattern(uintptr_t start, uintptr_t end, uintptr_t length, uintptr_t* data, uintptr_t* mask)
238 | {
239 | 	uintptr_t llength = sizeof(long) * length;
240 | 	for (uintptr_t i = start; i < end - llength; i++) {
241 | 		bool miss = false;
242 | 		for (uintptr_t o = 0; o < length && !miss; o++)
243 | 			miss = data[o] ^ (Read<uintptr_t>(i + o * sizeof(uintptr_t)) | mask[o]);
244 | 
245 | 		if (!miss)
246 | 			return i;
247 | 	}
248 | 
249 | 	return 0;
250 | }
251 | #endif
252 | #endif
253 | 


--------------------------------------------------------------------------------
/utils/pattern_scan.h:
--------------------------------------------------------------------------------
 1 | #ifndef PATTERN_SCAN_H
 2 | #define PATTERN_SCAN_H
 3 | 
 4 | #include "stdint.h"
 5 | 
 6 | /*
 7 |   This is a very powerful pattern scanner.
 8 |   It uses IDA style patterns with some extra features. Take for example this pattern:
 9 |   34 24 [E8 *? ? ? ?] E9 DE FD FF FF
10 | 
11 |   Here are the steps that are taken in the scanner:
12 |   First of all, the pattern 34 24 E8 ? ? ? ? E9 DE FD FF FF is parsed and its address is found.
13 |   Then, if the address is not null, the asterix in [E8 *? ? ? ?] is dereferenced relative to the brackets and this is our final address.
14 | 
15 |   In short, the scanner does not only find the pattern, it also allows to get the address of the exact element we want in memory. Here is the full list of actions we can take:
16 |   1) Dereference native size pointer with *
17 |   2) Read 8-bit size value with $
18 |   3) Read 16-bit size value with %
19 |   4) Read 32-bit size value with ^
20 |   5) Read 64-bit size value with &
21 |   6) Offset the resulting address to the wanted place with @
22 |   7) Instruct the first dereference to be IP-relative with []
23 |   8) After each dereference, an offset can be specified with +NUM or -NUM, a space is needed if the next non-offset character is a part of exact pattern match
24 |   9) Manual relative dereference is done with :OFF,SIZE
25 | */
26 | 
27 | namespace PatternScan
28 | {
29 | 	uintptr_t FindPattern(const char* pattern, uintptr_t start, uintptr_t end);
30 | 	uintptr_t FindPattern(const char* __restrict pattern, const char* __restrict module);
31 | }
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/utils/rstring.h:
--------------------------------------------------------------------------------
 1 | #ifndef RSTRING_H
 2 | #define RSTRING_H
 3 | 
 4 | inline int rstrcmp(const char* a, const char* b)
 5 | {
 6 | 	while (*a && *b)
 7 | 		if (*a++ != *b++)
 8 | 			return 1;
 9 | 	return 0;
10 | }
11 | 
12 | inline int rstrcmp(char* a, char* b)
13 | {
14 | 	return rstrcmp((const char*)a, (const char*)b);
15 | }
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/utils/scheduler.h:
--------------------------------------------------------------------------------
 1 | #ifndef SCHEDULER_H
 2 | #define SCHEDULER_H
 3 | 
 4 | #include <stddef.h>
 5 | 
 6 | template <typename T, size_t size>
 7 | struct Scheduler
 8 | {
 9 | 	T data[size];
10 | 	int burstTime[size];
11 | 	int waitTime[size];
12 | 	int priority[size];
13 | 	int sid[size];
14 | 	size_t cid;
15 | 
16 | 	void Sort()
17 | 	{
18 | 		for (size_t i = 0; i < size; i++) {
19 | 			size_t pos = i;
20 | 			for (size_t o = i + 1; o < size; o++)
21 | 				if (priority[pos] >= priority[o])
22 | 					pos = o;
23 | 
24 | 			int temp = priority[i];
25 | 			priority[i] = priority[pos];
26 | 			priority[pos] = temp;
27 | 
28 | 			temp = burstTime[i];
29 | 			burstTime[i] = burstTime[pos];
30 | 			burstTime[pos] = temp;
31 | 
32 | 			T temp2 = data[i];
33 | 			data[i] = data[pos];
34 | 			data[pos] = temp2;
35 | 
36 | 			sid[pos] = i;
37 | 		}
38 | 		cid = 0;
39 | 	}
40 | 
41 | 	T* Run(int time)
42 | 	{
43 | 		for (; cid < size; cid++) {
44 | 			if (burstTime[cid] >= time) {
45 | 				burstTime[cid] -= time;
46 | 				return data + (cid++);
47 | 			}
48 | 			time -= burstTime[cid];
49 | 			burstTime[cid] = 0;
50 | 		}
51 | 		return nullptr;
52 | 	}
53 | };
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/utils/semaphores.cpp:
--------------------------------------------------------------------------------
  1 | #include "semaphores.h"
  2 | 
  3 | #if defined(__linux__)
  4 | #include <fcntl.h>
  5 | 
  6 | Semaphore::Semaphore(bool shared) {
  7 | 	if (sem_init(&sm, (int)shared, 0) == -1)
  8 | #if defined(__cpp_exceptions) || defined(_CPPUNWIND)
  9 | 		throw;
 10 | #else
 11 | 	;
 12 | #endif
 13 | }
 14 | 
 15 | Semaphore::~Semaphore() {
 16 | }
 17 | 
 18 | int Semaphore::TimedWait(size_t milliseconds)
 19 | {
 20 | 	struct timespec ts;
 21 | 	if (clock_gettime(CLOCK_REALTIME, &ts) == -1)
 22 | 		return 1;
 23 | 	ts.tv_nsec += 1000000ull * milliseconds;
 24 | 	return sem_timedwait(&sm, &ts);
 25 | }
 26 | 
 27 | void Semaphore::Wait() {
 28 | 	sem_wait(&sm);
 29 | }
 30 | 
 31 | void Semaphore::Post() {
 32 | 	sem_post(&sm);
 33 | }
 34 | 
 35 | unsigned long Semaphore::Count()
 36 | {
 37 | 	int val = 0;
 38 | 	sem_getvalue(&sm, &val);
 39 | 	return val;
 40 | }
 41 | 
 42 | #elif defined(__APPLE__)
 43 | 
 44 | Semaphore::Semaphore(bool shared) {
 45 | 	sm = dispatch_semaphore_create(0);
 46 | }
 47 | 
 48 | Semaphore::~Semaphore() {
 49 | 	dispatch_release(sm);
 50 | }
 51 | 
 52 | void Semaphore::Wait() {
 53 | 	dispatch_semaphore_wait(sm, DISPATCH_TIME_FOREVER);
 54 | }
 55 | 
 56 | int Semaphore::TimedWait(size_t milliseconds) {
 57 | 	return dispatch_semaphore_wait(sm, dispatch_time(DISPATCH_TIME_NOW, NSEC_PER_MSEC * milliseconds));
 58 | }
 59 | 
 60 | void Semaphore::Post() {
 61 | 	dispatch_semaphore_signal(sm);
 62 | }
 63 | 
 64 | unsigned long Semaphore::Count()
 65 | {
 66 | 	int val = 0;
 67 | 	return val;
 68 | }
 69 | 
 70 | #else
 71 | 
 72 | Semaphore::Semaphore(bool shared) {
 73 | 	//Unnamed shared semaphores do not work on windows
 74 | 	if (shared)
 75 | #if defined(__cpp_exceptions) || defined(_CPPUNWIND)
 76 | 		throw;
 77 | #else
 78 | 	return;
 79 | #endif
 80 | 	sm = CreateSemaphoreA(nullptr, 0, 0xffff, nullptr);
 81 | }
 82 | 
 83 | Semaphore::~Semaphore() {
 84 | 	CloseHandle(sm);
 85 | }
 86 | 
 87 | void Semaphore::Wait() {
 88 | 	WaitForSingleObject(sm, INFINITE);
 89 | }
 90 | 
 91 | int Semaphore::TimedWait(size_t milliseconds)
 92 | {
 93 | 	if (WaitForSingleObject(sm, milliseconds) == WAIT_OBJECT_0)
 94 | 		return 0;
 95 | 	return 1;
 96 | }
 97 | 
 98 | void Semaphore::Post() {
 99 | 	ReleaseSemaphore(sm, 1, NULL);
100 | }
101 | 
102 | unsigned long Semaphore::Count()
103 | {
104 | 	long previous;
105 | 	switch (WaitForSingleObject(sm, 0)) {
106 | 	  case WAIT_OBJECT_0:
107 | 		  ReleaseSemaphore(sm, 1, &previous);
108 | 		  return previous + 1;
109 | 	  case WAIT_TIMEOUT:
110 | 		  return 0;
111 | 	}
112 | 	return 0;
113 | }
114 | #endif
115 | 


--------------------------------------------------------------------------------
/utils/semaphores.h:
--------------------------------------------------------------------------------
 1 | #ifndef SEMAPHORES_H
 2 | #define SEMAPHORES_H
 3 | 
 4 | #include <stdint.h>
 5 | #if defined(__linux__) || defined(__APPLE__)
 6 | #include <stddef.h>
 7 | #endif
 8 | 
 9 | #if defined(__linux__)
10 | #include <semaphore.h>
11 | #include <time.h>
12 | #elif defined(__APPLE__)
13 | #include <dispatch/dispatch.h>
14 | #else
15 | #include "../wincludes.h"
16 | #endif
17 | 
18 | class Semaphore
19 | {
20 | 	public:
21 | 	Semaphore(bool shared = false);
22 | 	~Semaphore();
23 | 	void Wait();
24 | 	int TimedWait(size_t milliseconds);
25 | 	void Post();
26 | 	unsigned long Count();
27 | 	private:
28 | 
29 | #if defined(__linux__)
30 | 	sem_t sm;
31 | #elif defined(__APPLE__)
32 | 	dispatch_semaphore_t sm;
33 | #else
34 | 	HANDLE sm;
35 | #endif
36 | };
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/utils/shared_mutex.cpp:
--------------------------------------------------------------------------------
 1 | #include "shared_mutex.h"
 2 | 
 3 | #if defined(__linux__) || defined(__APPLE__)
 4 | #include <pthread.h>
 5 | 
 6 | SharedMutex::SharedMutex() {
 7 | 	lock = PTHREAD_RWLOCK_INITIALIZER;
 8 | }
 9 | 
10 | SharedMutex::~SharedMutex() {
11 | 	pthread_rwlock_destroy(&lock);
12 | }
13 | 
14 | void SharedMutex::rlock() {
15 | 	pthread_rwlock_rdlock(&lock);
16 | }
17 | 
18 | bool SharedMutex::tryrlock() {
19 | 	int ret = pthread_rwlock_tryrdlock(&lock);
20 | 	return !ret;
21 | }
22 | 
23 | void SharedMutex::runlock() {
24 | 	pthread_rwlock_unlock(&lock);
25 | }
26 | 
27 | void SharedMutex::wlock() {
28 | 	pthread_rwlock_wrlock(&lock);
29 | }
30 | 
31 | bool SharedMutex::trywlock() {
32 | 	int ret = pthread_rwlock_trywrlock(&lock);
33 | 	return !ret;
34 | }
35 | 
36 | void SharedMutex::wunlock() {
37 | 	pthread_rwlock_unlock(&lock);
38 | }
39 | 
40 | #else
41 | #include <windows.h>
42 | 
43 | SharedMutex::SharedMutex() {
44 | 	::InitializeSRWLock(&lock);
45 | }
46 | 
47 | SharedMutex::~SharedMutex() {
48 | 	//No release function
49 | }
50 | 
51 | void SharedMutex::rlock() {
52 | 	::AcquireSRWLockShared(&lock);
53 | }
54 | 
55 | bool SharedMutex::tryrlock() {
56 | 	return ::TryAcquireSRWLockShared(&lock);
57 | }
58 | 
59 | void SharedMutex::runlock() {
60 | 	::ReleaseSRWLockShared(&lock);
61 | }
62 | 
63 | void SharedMutex::wlock() {
64 | 	::AcquireSRWLockExclusive(&lock);
65 | }
66 | 
67 | bool SharedMutex::trywlock() {
68 | 	return ::TryAcquireSRWLockExclusive(&lock);
69 | }
70 | 
71 | void SharedMutex::wunlock() {
72 | 	::ReleaseSRWLockExclusive(&lock);
73 | }
74 | #endif
75 | 


--------------------------------------------------------------------------------
/utils/shared_mutex.h:
--------------------------------------------------------------------------------
 1 | #ifndef SHARED_MUTEX_H
 2 | #define SHARED_MUTEX_H
 3 | 
 4 | #if defined(__linux__) || defined(__APPLE__)
 5 | #include <pthread.h>
 6 | #else
 7 | #define WIN32_LEAN_AND_MEAN
 8 | #include <windows.h>
 9 | #endif
10 | 
11 | class SharedMutex {
12 |   public:
13 | 	SharedMutex();
14 | 	~SharedMutex();
15 | 	void rlock();
16 | 	bool tryrlock();
17 | 	void runlock();
18 | 	void wlock();
19 | 	bool trywlock();
20 | 	void wunlock();
21 |   private:
22 | #if defined(__linux__) || defined(__APPLE__)
23 | 	pthread_rwlock_t lock;
24 | #else
25 | 	SRWLOCK lock;
26 | #endif
27 | };
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/utils/shared_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef SHARED_UTILS_H
 2 | #define SHARED_UTILS_H
 3 | 
 4 | #include <type_traits>
 5 | 
 6 | //We can force a constexpr to really be a compile-time expression
 7 | template <typename T, T K>
 8 | struct calc_constexpr
 9 | {
10 | 	static constexpr T value = K;
11 | };
12 | 
13 | template<typename T>
14 | struct is_pointer {
15 | 	static const bool value = false;
16 | };
17 | 
18 | template<typename T>
19 | struct is_pointer<T*> {
20 | 	static const bool value = true;
21 | };
22 | 
23 | template<typename T>
24 | constexpr auto& RemovePtr(T& arg)
25 | {
26 | 	if constexpr(is_pointer<T>::value)
27 | 		return *arg;
28 | 	else
29 | 		return arg;
30 | }
31 | 
32 | template<typename T>
33 | constexpr bool IsPointer([[maybe_unused]] T& arg)
34 | {
35 | 	return is_pointer<T>::value;
36 | }
37 | 
38 | template<auto& G>
39 | class pointer_proxy
40 | {
41 |   public:
42 | 	constexpr auto& operator->()
43 | 	{
44 | 		if constexpr(IsPointer(G))
45 | 			return G;
46 | 		else
47 | 			return &G;
48 | 	}
49 | 
50 | 	constexpr auto& operator*()
51 | 	{
52 | 		if constexpr(IsPointer(G))
53 | 			return *G;
54 | 		else
55 | 			return G;
56 | 	}
57 | };
58 | 
59 | template<typename first, typename...more>
60 | 	struct AllArithmetic {
61 | 		static const bool value = std::is_arithmetic<first>::value &&
62 | 			AllArithmetic<more...>::value;
63 | 	};
64 | 
65 | template<typename first>
66 | struct AllArithmetic<first> : std::is_arithmetic<first> {};
67 | 
68 | template<typename T>
69 | constexpr T* AlignUp(T* ptr, std::size_t align = std::alignment_of<T>::value)
70 | {
71 | 	return (T*)(((std::size_t)ptr + align - 1) & ~(align - 1));
72 | }
73 | #endif
74 | 


--------------------------------------------------------------------------------
/utils/stackstring.h:
--------------------------------------------------------------------------------
 1 | #ifndef STACK_STRING
 2 | #define STACK_STRING
 3 | 
 4 | #include <stddef.h>
 5 | 
 6 | #define ST(x) StackString(x)
 7 | 
 8 | typedef unsigned char stt;
 9 | typedef stt rstt;
10 | 
11 | #ifdef _WIN32
12 | #define __alwaysinline __forceinline
13 | #else
14 | #define __alwaysinline __inline __attribute__((always_inline))
15 | #endif
16 | 
17 | template <int N>
18 | __alwaysinline
19 | constexpr int unroll_read(rstt* a, rstt* b, int FN)
20 | {
21 | 	a[N - 1] = b[FN - N];
22 | 	int ret = unroll_read<N - 1>(a, b, FN);
23 | 	return ret;
24 | };
25 | 
26 | template<>
27 | __alwaysinline
28 | constexpr int unroll_read<0>(rstt* a, rstt* b, int FN)
29 | {
30 | 	return 1;
31 | };
32 | 
33 | 
34 | template<size_t slen>
35 | struct StackString
36 | {
37 | 
38 | 	static constexpr int len = slen / sizeof(stt) + 1;
39 | 	static constexpr int len2 = slen / sizeof(stt);
40 | 
41 | 	char stack[len * sizeof(stt)];
42 | 	volatile stt stack2[len];
43 | 	volatile stt stack2s[len/2];
44 | 	volatile stt stack2e[len2 - len/2];
45 | 
46 | 	//We have to always inline the functions for the trick to work
47 | 	__alwaysinline
48 | 	constexpr StackString(const char (&Array)[slen])
49 | 	{
50 | 
51 | 		unroll_read<len2 - len / 2>((rstt*)stack2e, (rstt*)Array + len / 2, len2 - len / 2);
52 | 		unroll_read<len / 2>((rstt*)stack2s, (rstt*)Array, len / 2);
53 | 
54 | 		for (int i = (len - 1) * sizeof(rstt); i < (int)slen; i++)
55 | 			((char*)stack2)[i] = Array[i];
56 | 
57 | 		unroll_read<len2 - len / 2>((rstt*)stack + len / 2, (rstt*)stack2e, len2 - len / 2);
58 | 		unroll_read<len / 2>((rstt*)stack, (rstt*)stack2s, len / 2);
59 | 
60 | 		for (int i = (len - 1) * sizeof(rstt); i < (int)slen; i++)
61 | 			stack[i] = ((char*)stack2)[i];
62 | 
63 | 	}
64 | 
65 | 	__alwaysinline
66 | 	constexpr const char* val() const
67 | 	{
68 | 		return stack;
69 | 	}
70 | 
71 | 	inline operator char*()
72 | 	{
73 | 		return (char*)val();
74 | 	}
75 | 
76 | 	constexpr operator const char*() const
77 | 	{
78 | 		return val();
79 | 	}
80 | 
81 | 	constexpr operator const char*()
82 | 	{
83 | 		return val();
84 | 	}
85 | };
86 | 
87 | #endif
88 | 


--------------------------------------------------------------------------------
/utils/threading.cpp:
--------------------------------------------------------------------------------
  1 | #include "threading.h"
  2 | 
  3 | static LList<struct Job> jobs;
  4 | thread_local int Threading::threadID = -1;
  5 | 
  6 | uint64_t Threading::_QueueJob(JobFn function, void* data, bool ref, bool priority)
  7 | {
  8 | 	Job job;
  9 | 	job.args = data;
 10 | 	job.function = function;
 11 | 	job.ref = ref;
 12 | 	uint64_t ret = jobs.Enqueue(job, priority);
 13 | 	return ret;
 14 | }
 15 | 
 16 | static void RunJob(struct Job& job)
 17 | {
 18 | 	MTR_BEGIN("workers", "execute_job");
 19 | 	job.function(job.args);
 20 | 	MTR_END("workers", "execute_job");
 21 | 	if (!job.ref)
 22 | 		free(job.args);
 23 | }
 24 | 
 25 | static void* __stdcall ThreadLoop(void* t)
 26 | {
 27 | 	struct JobThread* thread = (struct JobThread*)t;
 28 | 
 29 | #ifdef MTR_ENABLED
 30 | 	char threadName[64];
 31 | 	snprintf(threadName, 63, "worker thread %d", thread->id);
 32 | 	MTR_META_THREAD_NAME(threadName);
 33 | #endif
 34 | 
 35 | 	Threading::threadID = thread->id;
 36 | 
 37 | 	struct Job job;
 38 | 	thread->isRunning = true;
 39 | 	MTR_BEGIN("workers", "run_job_thread");
 40 | 	while (!thread->shouldQuit) {
 41 | 		if (job.id ^ ~0ull) {
 42 | 			thread->queueEmpty = false;
 43 | 			RunJob(job);
 44 | 		} else
 45 | 			thread->queueEmpty = true;
 46 | 		struct LList<struct Job>* tJobs = thread->jobs;
 47 | 		thread->jLock->unlock();
 48 | 		MTR_BEGIN("workers", "pop_job");
 49 | 		job = tJobs->PopFront(thread->jLock);
 50 | 		MTR_END("workers", "pop_job");
 51 | 	}
 52 | 	MTR_END("workers", "run_job_thread");
 53 | 	thread->isRunning = false;
 54 | 	return nullptr;
 55 | }
 56 | 
 57 | unsigned int Threading::numThreads = 0;
 58 | static struct JobThread* threads = nullptr;
 59 | 
 60 | static void InitThread(struct JobThread* thread, int id)
 61 | {
 62 | 	thread->id = id;
 63 | 	thread->jLock = new Mutex();
 64 | 	thread->jobs = &jobs;
 65 | 	thread_t handle = Threading::StartThread(ThreadLoop, thread, false);
 66 | 	thread->handle = malloc(sizeof(thread_t));
 67 | 	*(thread_t*)thread->handle = handle;
 68 | 
 69 | }
 70 | 
 71 | void Threading::InitThreads()
 72 | {
 73 | 	MTR_META_THREAD_NAME("main thread");
 74 | 	//numThreads = std::thread::hardware_concurrency();
 75 | 	numThreads = NUM_THREADS;
 76 | 	/*if (numThreads < 2)
 77 | 		numThreads = 2;
 78 | 	if (numThreads >= 8)
 79 | 	numThreads -= 2;*/
 80 | 	threads = (struct JobThread*)calloc(numThreads, sizeof(struct JobThread));
 81 | 
 82 | 	for (unsigned int i = 0; i < numThreads; i++)
 83 | 		InitThread(threads + i, i);
 84 | }
 85 | 
 86 | int Threading::EndThreads()
 87 | {
 88 | 	int ret = 0;
 89 | 
 90 | 	if (!threads)
 91 | 		return ret;
 92 | 
 93 | 	for (unsigned int i = 0; i < numThreads; i++)
 94 | 		threads[i].shouldQuit = true;
 95 | 
 96 | 	for (unsigned int i = 0; i < numThreads; i++)
 97 | 		threads[i].jobs->quit = true;
 98 | 
 99 | 	for (int o = 0; o < 4; o++)
100 | 		for (unsigned int i = 0; i < numThreads; i++)
101 | 			threads[i].jobs->sem.Post();
102 | 
103 | 	for (size_t i = 0; i < numThreads; i++) {
104 | #if defined(__linux__) || defined(__APPLE__)
105 | 		void* ret2 = nullptr;
106 | 		pthread_join(*(pthread_t*)threads[i].handle, &ret2);
107 | #else
108 | 		ResumeThread(*(HANDLE*)threads[i].handle);
109 | 		if (WaitForSingleObject(*(HANDLE*)threads[i].handle, 100) == WAIT_TIMEOUT && threads[i].isRunning)
110 | 			;
111 | #endif
112 | 		delete threads[i].jLock;
113 | 		threads[i].jLock = nullptr;
114 | 		free(threads[i].handle);
115 | 	}
116 | 	free(threads);
117 | 	threads = nullptr;
118 | 
119 | 	return ret;
120 | }
121 | 
122 | void Threading::FinishQueue(bool executeJobs)
123 | {
124 | 	if (!threads)
125 | 		return;
126 | 
127 | 	MTR_BEGIN("workers", "finish_queue");
128 | 
129 | 	if (executeJobs) {
130 | 		for (unsigned int i = 0; i < numThreads; i++) {
131 | 			auto jobList = &jobs;
132 | 			if (threads[i].jobs)
133 | 				jobList = threads[i].jobs;
134 | 			while (1) {
135 | 				struct Job job = jobList->TryPopFront();
136 | 				if (job.id == ~0ull)
137 | 					break;
138 | 				RunJob(job);
139 | 			}
140 | 		}
141 | 	}
142 | 
143 | 	for (unsigned int i = 0; i < numThreads; i++) {
144 | 		if (threads[i].jobs)
145 | 			while (!threads[i].jobs->IsEmpty());
146 | 
147 | 		threads[i].jLock->lock();
148 | 		threads[i].jLock->unlock();
149 | 	}
150 | 
151 | 	MTR_END("workers", "finish_queue");
152 | }
153 | 
154 | JobThread* Threading::BindThread(LList<struct Job>* jobsQueue)
155 | {
156 | 	for (size_t i = 0; i < numThreads; i++) {
157 | 		if (threads[i].jobs == &jobs || !threads[i].jobs) {
158 | 			threads[i].jobs = jobsQueue;
159 | 			for (size_t o = 0; o < numThreads; o++)
160 | 				jobs.sem.Post();
161 | 			return threads + i;
162 | 		}
163 | 	}
164 | 	return nullptr;
165 | }
166 | 
167 | void Threading::UnbindThread(LList<struct Job>* jobsQueue)
168 | {
169 | 	for (size_t i = 0; i < numThreads; i++) {
170 | 		threads[i].jLock->lock();
171 | 		if (threads[i].jobs == jobsQueue)
172 | 			threads[i].jobs = &jobs;
173 | 		threads[i].jLock->unlock();
174 | 	}
175 | }
176 | 
177 | thread_t Threading::StartThread(threadFn start, void* arg, bool detached, thread_t* thread)
178 | {
179 | #ifdef _WIN32
180 | 	CreateThread(nullptr, (SIZE_T)nullptr, (LPTHREAD_START_ROUTINE)start, arg, 0, thread);
181 | #else
182 | 	pthread_attr_t* attr = nullptr;
183 | 	pthread_attr_t tAttr;
184 | 	if (detached) {
185 | 		pthread_attr_init(&tAttr);
186 | 		pthread_attr_setdetachstate(&tAttr, PTHREAD_CREATE_DETACHED);
187 | 		attr = &tAttr;
188 | 	}
189 | 	pthread_create(thread, attr, start, arg);
190 | #endif
191 | 	return *thread;
192 | }
193 | 
194 | thread_t Threading::StartThread(threadFn start, void* arg, bool detached)
195 | {
196 | 	thread_t thread;
197 | 	return StartThread(start, arg, detached, &thread);
198 | }
199 | 
200 | void Threading::JoinThread(thread_t thread, void** returnVal)
201 | {
202 | #ifdef __posix__
203 | 	pthread_join(thread, returnVal);
204 | #else
205 | 	WaitForSingleObject((void*)thread, INFINITE);
206 | #endif
207 | }
208 | 


--------------------------------------------------------------------------------
/utils/threading.h:
--------------------------------------------------------------------------------
  1 | #ifndef THREADING_H
  2 | #define THREADING_H
  3 | 
  4 | #include "../g_defines.h"
  5 | #include "mutex.h"
  6 | #include "semaphores.h"
  7 | #include "packed_heap.h"
  8 | #include "atomic_lock.h"
  9 | #include <atomic>
 10 | #include "../submodules/minitrace/minitrace.h"
 11 | 
 12 | #if defined(__posix__)
 13 | #include <unistd.h>
 14 | #include <pthread.h>
 15 | #include <stdlib.h>
 16 | #include <string.h>
 17 | 
 18 | typedef pthread_t thread_t;
 19 | 
 20 | #else
 21 | #define NOMINMAX
 22 | #include "../wincludes.h"
 23 | #include <Psapi.h>
 24 | #include "stdint.h"
 25 | 
 26 | typedef unsigned long thread_t;
 27 | 
 28 | #endif
 29 | 
 30 | typedef void(*JobFn)(void*);
 31 | typedef void*(__stdcall*threadFn)(void*);
 32 | 
 33 | struct Job
 34 | {
 35 | 	JobFn function;
 36 | 	void* args;
 37 | 	bool ref;
 38 | 	uint64_t id;
 39 | 
 40 | 	Job()
 41 | 	{
 42 | 		function = nullptr;
 43 | 		args = nullptr;
 44 | 		ref = true;
 45 | 		id = ~0ull;
 46 | 	}
 47 | };
 48 | 
 49 | template <typename T>
 50 | struct LList
 51 | {
 52 | 
 53 | 	struct LEntry
 54 | 	{
 55 | 		T entry;
 56 | 		idx_t prev;
 57 | 		idx_t next;
 58 | 	};
 59 | 
 60 | 	PackedHeapL<LEntry> entries;
 61 | 
 62 | 	Mutex lock;
 63 | 	bool quit;
 64 | 	idx_t front;
 65 | 	idx_t back;
 66 | 	uint64_t lastID;
 67 | 	uint64_t lastPopID;
 68 | 
 69 | 	Semaphore sem;
 70 | 
 71 | 	LList() {
 72 | 		front = 0;
 73 | 		back = 0;
 74 | 		lastID = 0;
 75 | 		lastPopID = 0;
 76 | 	}
 77 | 
 78 | 	uint64_t Enqueue(const T& data, bool priority = false) {
 79 | 		lock.lock();
 80 | 		idx_t entry = entries.Alloc();
 81 | 		if (priority) {
 82 | 			entries[entry] = { data, front, 0 };
 83 | 			entries[entry].entry.id = lastID;
 84 | 			if (front)
 85 | 				entries[front].next = entry;
 86 | 			if (!back) {
 87 | 				back = entry;
 88 | 				entries[back].next = 0;
 89 | 			}
 90 | 			entries[entry].prev = front;
 91 | 			front = entry;
 92 | 		} else {
 93 | 			entries[entry] = { data, 0, back };
 94 | 			entries[entry].entry.id = lastID;
 95 | 			if (back)
 96 | 				entries[back].prev = entry;
 97 | 			if (!front) {
 98 | 				front = entry;
 99 | 				entries[front].prev = 0;
100 | 			}
101 | 			entries[entry].next = back;
102 | 			back = entry;
103 | 		}
104 | 		uint64_t id = lastID++;
105 | 		lock.unlock();
106 | 		sem.Post();
107 | 		return id;
108 | 	}
109 | 
110 | 	T DoPopFront(Mutex* lck) {
111 | 		lock.lock();
112 | 		if (!front) {
113 | 			lock.unlock();
114 | 			return Job();
115 | 		}
116 | 		if (lck)
117 | 			lck->lock();
118 | 		LEntry* entry = &entries[front];
119 | 		front = entry->prev;
120 | 		if (front)
121 | 			entries[front].next = 0;
122 | 		else
123 | 			back = 0;
124 | 		T ret = entry->entry;
125 | 		lastPopID = ret.id;
126 | 		entries.Free(entry);
127 | 		lock.unlock();
128 | 		return ret;
129 | 	}
130 | 
131 | 	T PopFront(Mutex* lck = nullptr) {
132 | 		sem.Wait();
133 | 		if (quit) {
134 | 			sem.Post();
135 | 			return Job();
136 | 		}
137 | 		return DoPopFront(lck);
138 | 	}
139 | 
140 | 	T TryPopFront() {
141 | 		if (sem.TimedWait(0))
142 | 			return Job();
143 | 
144 | 		if (quit) {
145 | 			sem.Post();
146 | 			return Job();
147 | 		}
148 | 
149 | 		return DoPopFront(nullptr);
150 | 	}
151 | 
152 | #ifdef _MSC_VER
153 | 	__declspec(noinline)
154 | #else
155 | 	__attribute__((noinline))
156 | #endif
157 | 	bool IsEmpty()
158 | 	{
159 | 		static volatile short cnt = 0;
160 | 		cnt++;
161 | 		return !front;
162 | 	}
163 | };
164 | 
165 | struct JobThread
166 | {
167 | 	std::atomic_bool shouldQuit;
168 | 	std::atomic_bool isRunning;
169 | 	std::atomic_bool queueEmpty;
170 | 	Mutex* jLock;
171 | 	LList<struct Job>* jobs;
172 | 	int id;
173 | 	void* handle;
174 | };
175 | 
176 | namespace Threading
177 | {
178 | 	extern unsigned int numThreads;
179 | 	extern thread_local int threadID;
180 | 	uint64_t _QueueJob(JobFn function, void* data, bool ref = false, bool priority = false);
181 | 	void InitThreads();
182 | 	int EndThreads();
183 | 	void FinishQueue(bool executeJobs = false);
184 | 	JobThread* BindThread(LList<struct Job>* jobsQueue);
185 | 	void UnbindThread(LList<struct Job>* jobsQueue);
186 | 	thread_t StartThread(threadFn start, void* param, bool detached = true);
187 | 	thread_t StartThread(threadFn start, void* param, bool detached, thread_t* thread);
188 | 	void JoinThread(thread_t thread, void** returnVal);
189 | 
190 | 	template<typename N, typename T>
191 | 	uint64_t QueueJob(N function, T data, bool priority = false) {
192 | 		void* d = malloc(sizeof(T));
193 | 		memcpy(d, (void*)&data, sizeof(T));
194 | 		return _QueueJob((JobFn)function, d, false, priority);
195 | 	}
196 | 
197 | 	template<typename N, typename T>
198 | 	uint64_t QueueJobRef(N function, T* data, bool priority = false) {
199 | 		return _QueueJob((JobFn)function, (void*)data, true, priority);
200 | 	}
201 | }
202 | 
203 | #endif
204 | 


--------------------------------------------------------------------------------
/utils/utils.h:
--------------------------------------------------------------------------------
1 | #ifndef UTILS_H
2 | #define UTILS_H
3 | #include "vfhook.h"
4 | #endif
5 | 


--------------------------------------------------------------------------------
/utils/vfhook.cpp:
--------------------------------------------------------------------------------
 1 | #include "vfhook.h"
 2 | #include "assert.h"
 3 | #include "string.h"
 4 | #include "stdint.h"
 5 | 
 6 | VFuncHook::VFuncHook()
 7 | {
 8 | 	overridePointers = false;
 9 | 	vtableLength = 0;
10 | 	curVTable = nullptr;
11 | 	oldVTable = nullptr;
12 | 	indexes = nullptr;
13 | }
14 | 
15 | VFuncHook::VFuncHook(void* base, bool overrideMode, int minSize)
16 | {
17 | 	classBase = (uintptr_t**)base;
18 | 
19 | 	indexes = new std::unordered_map<void*, size_t>();
20 | 
21 | 	oldVTable = *classBase;
22 | 	overridePointers = overrideMode;
23 | 
24 | 	vtableLength = EstimateVTableLength(oldVTable, minSize);
25 | 
26 | 	curVTable = (uintptr_t*)malloc(sizeof(uintptr_t*) * (vtableLength + 2));
27 | 	curVTable += 2;
28 | 	memcpy((void*)(curVTable - 2), (void*)(oldVTable - 2), sizeof(uintptr_t*) * (vtableLength + 2));
29 | 
30 | 	if (overridePointers) {
31 | 		oldVTable = curVTable;
32 | 		curVTable = *classBase;
33 | 	} else
34 | 		*classBase = curVTable;
35 | }
36 | 
37 | //We need to disable ASAN for this function, as classBase might be pointing to freed area.
38 | //This is completely intentional.
39 | [[gnu::no_sanitize_address]]
40 | VFuncHook::~VFuncHook()
41 | {
42 | 	if (*classBase == curVTable)
43 | 		UnhookAll();
44 | 	uintptr_t* vtbl = overridePointers ? oldVTable : curVTable;
45 | 	vtbl -= 2;
46 | 	free(vtbl);
47 | }
48 | 
49 | void VFuncHook::UpdateBase(void* base)
50 | {
51 | 	classBase = (uintptr_t**)base;
52 | }
53 | 
54 | void VFuncHook::UnhookID(size_t index)
55 | {
56 | 	assert(index < vtableLength);
57 | 	curVTable[index] = oldVTable[index];
58 | }
59 | 
60 | void VFuncHook::UnhookAll()
61 | {
62 | 	if (overridePointers)
63 | 		memcpy((void*)curVTable, (void*)oldVTable, sizeof(intptr_t) * vtableLength);
64 | 	else
65 | 		*classBase = oldVTable;
66 | }
67 | 
68 | size_t VFuncHook::EstimateVTableLength(uintptr_t* vtable, int minSize)
69 | {
70 | 	size_t len = 0;
71 | 	while(*vtable++ || (int)len < minSize) len++;
72 | 	return len;
73 | }
74 | 


--------------------------------------------------------------------------------
/utils/vfhook.h:
--------------------------------------------------------------------------------
 1 | #ifndef VFHOOK_H
 2 | #define VFHOOK_H
 3 | 
 4 | #include <assert.h>
 5 | #include "math.h"
 6 | #include <unordered_map>
 7 | 
 8 | class VFuncHook
 9 | {
10 |   public:
11 | 	VFuncHook();
12 | 	VFuncHook(void* base, bool overrideMode = false, int minSize = 0);
13 | 	~VFuncHook();
14 | 
15 | 	void UpdateBase(void* base);
16 | 
17 | 	template<typename T>
18 | 	void Hook(size_t index, T function)
19 | 	{
20 | 		assert(index < vtableLength && indexes);
21 | 		indexes->insert({(void*)function, index});
22 | 		curVTable[index] = (uintptr_t)function;
23 | 	}
24 | 
25 | 	template<typename T>
26 | 	void Unhook(T function)
27 | 	{
28 | 		assert(indexes->find(function) != indexes->end());
29 | 		size_t idx = indexes->at(function);
30 | 		curVTable[idx] = oldVTable[idx];
31 | 	}
32 | 
33 | 	void UnhookID(size_t index);
34 | 	void UnhookAll();
35 | 
36 | 	template<typename T, typename F>
37 | 	T GetOriginal(F func)
38 | 	{
39 | 		assert(indexes->find(func) != indexes->end());
40 | 		return (T)oldVTable[indexes->at(func)];
41 | 	}
42 | 
43 | 	template<typename T>
44 | 	T GetOriginalByIndex(size_t index)
45 | 	{
46 | 		return (T)oldVTable[index];
47 | 	}
48 | 
49 |   private:
50 | 	size_t EstimateVTableLength(uintptr_t* vtable, int minSize = 0);
51 | 
52 | 	uintptr_t** classBase;
53 | 
54 | 	bool overridePointers;
55 | 	size_t vtableLength;
56 | 
57 | 	uintptr_t* curVTable;
58 | 	uintptr_t* oldVTable;
59 | 
60 | 	std::unordered_map<void*, size_t>* indexes;
61 | };
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------
/wincludes.h:
--------------------------------------------------------------------------------
 1 | #ifdef _WIN32
 2 | #ifndef WINCLUDES_H
 3 | #define WINCLUDES_H
 4 | 
 5 | #define NOMINMAX
 6 | #define WIN32_LEAN_AND_MEAN
 7 | #include <WinSock2.h>
 8 | #include <WS2tcpip.h>
 9 | #include <Windows.h>
10 | #include <intrin.h>
11 | 
12 | #pragma comment (lib, "Ws2_32.lib")
13 | #pragma comment (lib, "Mswsock.lib")
14 | #pragma comment (lib, "AdvApi32.lib")
15 | #endif
16 | #endif
17 | 


--------------------------------------------------------------------------------
/windows_meson.txt:
--------------------------------------------------------------------------------
 1 | [binaries]
 2 | c = 'clang'
 3 | cpp = 'clang++'
 4 | ar = 'llvm-ar'
 5 | 
 6 | [host_machine]
 7 | system = 'windows'
 8 | cpu_family = 'x86'
 9 | cpu = 'i686'
10 | endian = 'little'
11 | 
12 | [properties]
13 | sizeof_int = 4
14 | sizeof_void* = 4
15 | 
16 | 


--------------------------------------------------------------------------------