├── .dir-locals.el ├── .gitignore ├── .gitmodules ├── LICENSE.md ├── README.md ├── features ├── aimbot.cpp ├── aimbot.h └── aimbot_types.h ├── g_defines.h ├── interfaces └── tracing.h ├── math ├── clang_avx512.h ├── matrix.h ├── mm_funcs.h ├── mmath.h ├── soa_accessor.h ├── vec_funcs.h ├── vecsoa_funcs.h ├── vector.h └── vector_operators.h ├── meson.build ├── meson_options.txt ├── players.h ├── tests ├── allocator.cpp ├── crc.cpp ├── intersect.cpp ├── kd_tree.cpp ├── mutex.cpp ├── settings.cpp ├── shared_mutex.cpp ├── shmemalloc.cpp ├── thread_pool.cpp └── vector.cpp ├── utils ├── allocwraps.h ├── atomic_lock.cpp ├── atomic_lock.h ├── crc32.h ├── freelistallocator.h ├── handles.cpp ├── handles.h ├── history_list.h ├── intersect.cpp ├── intersect.h ├── intersect_box.cpp ├── intersect_box.h ├── intersect_box_impl.h ├── intersect_impl.h ├── kd_tree.h ├── md5.cpp ├── md5.h ├── memutils.h ├── mutex.cpp ├── mutex.h ├── named_semaphores.cpp ├── named_semaphores.h ├── packed_heap.cpp ├── packed_heap.h ├── pattern_scan.cpp ├── pattern_scan.h ├── rstring.h ├── scheduler.h ├── semaphores.cpp ├── semaphores.h ├── settings.h ├── shared_mutex.cpp ├── shared_mutex.h ├── shared_utils.h ├── stackstring.h ├── threading.cpp ├── threading.h ├── utils.h ├── vfhook.cpp └── vfhook.h ├── wincludes.h └── windows_meson.txt /.dir-locals.el: -------------------------------------------------------------------------------- 1 | ((nil . ((indent-tabs-mode . t)))) 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.exe 3 | *.out 4 | *.pdb 5 | *.gch 6 | build -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "submodules/minitrace"] 2 | path = submodules/minitrace 3 | url = https://github.com/hrydgard/minitrace.git 4 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 A. B. (Heep042) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A modular video game modding library 2 | 3 | This is a set of commonly used utilities in video game modifications, such as a pattern scanner as well as a high performance vector math library. Some game independant features, such as aimbot are also in this library (does not support physics bullets yet). 4 | 5 | This library is intended to be used as the core module of any project project, wrapped in a layer of game engine specific functions and data structures, then wrapped in a game specific implementation (which provides initialization, shutdown, data preparation and execution of features). This way the least code is duplicated, and the most of it is reused. The library was primarilly focused around internal hacks, with direct access to pointer dereferencing, but some tools can be used externally (provided correct memory access functions are implemented by other layers). 6 | 7 | Another great focus of the library was efficient data layout (data oriented design), hence SoA style vector structs, SoA style player data structure, etc. This allows for good cache performance and more efficient vectorization (not having to loose 4th value in a SSE register when dealing with 3D vectors and being able to scale up even to AVX512). Out of these 3 compilers tested: LLVM (together with Apple's LLVM), GCC and MSVC, LLVM seems to do the best job at auto vectorization and is naturally the recommended compiler to be used. 8 | 9 | ##### TODO 10 | - Make the HistoryList structure use standard naming 11 | -------------------------------------------------------------------------------- /features/aimbot.cpp: -------------------------------------------------------------------------------- 1 | #include "aimbot.h" 2 | #include "../interfaces/tracing.h" 3 | #ifdef AIMBOT_THREADING 4 | #include "../utils/threading.h" 5 | auto& numThreads = Threading::numThreads; 6 | constexpr int threadQueueMultiplier = 2; 7 | #else 8 | constexpr int numThreads = 1; 9 | constexpr int threadQueueMultiplier = 1; 10 | #endif 11 | 12 | vec3_t Aimbot::shootAngles; 13 | float* pointScaleVal = nullptr; 14 | 15 | #ifdef AIMBOT_THREADING 16 | static Semaphore threadSem; 17 | #endif 18 | 19 | AimbotLoopData data[NUM_THREADS * 2]; 20 | 21 | bool doMultipoint = true; 22 | 23 | static int ProcessAimPointsSIMD(AimbotLoopData* d) 24 | { 25 | int ret = -1; 26 | 27 | #ifdef AIMBOT_SIMD_TRACE_DATA 28 | Tracing::TracePointListSIMD(d->localPlayer, d->players, d->hitboxIDsSOA.size(), d->traceEndSOA.data(), d->entID, d->traceOutputsSOA.data(), 1); 29 | 30 | for (size_t i = 0; i < d->hitboxIDsSOA.size(); i++) { 31 | bool quit = false; 32 | 33 | for (size_t o = 0; o < MULTIPOINT_COUNT; o++) 34 | if (Aimbot::CompareData(d, d->traceOutputsSOA[i * MULTIPOINT_COUNT + o], (vec3_t)d->traceEndSOA[i].acc[o], d->hitboxIDsSOA[i], d->fovListSOA[i * MULTIPOINT_COUNT + o])) 35 | quit = true; 36 | 37 | //TODO: add an option to return early 38 | if (quit) 39 | ret = d->entID; 40 | } 41 | 42 | #endif 43 | return ret; 44 | } 45 | 46 | static int ProcessAimPoints(AimbotLoopData* d) 47 | { 48 | Tracing::TracePointList(d->localPlayer, d->players, d->hitboxIDs.size(), d->traceEnd.data(), d->entID, d->traceOutputs.data(), 1); 49 | 50 | int ret = -1; 51 | 52 | for (size_t i = 0; i < d->hitboxIDs.size(); i++) 53 | if (Aimbot::CompareData(d, d->traceOutputs[i], d->traceEnd[i], d->hitboxIDs[i], d->fovList[i])) 54 | ret = d->entID; 55 | 56 | return ret; 57 | } 58 | 59 | static int PrepareHitboxList(AimbotLoopData* d, size_t id) 60 | { 61 | d->fovs[id] = 1000.f; 62 | 63 | d->entID = id; 64 | 65 | d->traceEnd.clear(); 66 | d->hitboxIDs.clear(); 67 | d->fovList.clear(); 68 | d->traceOutputs.clear(); 69 | 70 | #ifdef AIMBOT_SIMD_TRACE_DATA 71 | d->traceEndSOA.clear(); 72 | d->hitboxIDsSOA.clear(); 73 | d->fovListSOA.clear(); 74 | d->traceOutputsSOA.clear(); 75 | #endif 76 | 77 | HitboxList& hitboxes = d->players->hitboxes[id]; 78 | 79 | for (size_t i = 0; i < MAX_HITBOXES; i++) { 80 | 81 | if (!d->hitboxList[i]) 82 | continue; 83 | 84 | float fov = 0.f; 85 | 86 | vec3_t average = (hitboxes.start[i] + hitboxes.end[i]) * 0.5f; 87 | average = hitboxes.wm[i].Vector3Transform(average); 88 | 89 | if (!Aimbot::PreCompareData(&d->target, d->localPlayer, average, i, &fov)) 90 | continue; 91 | 92 | if (d->hitboxList[i] & HitboxScanMode_t::SCAN_MULTIPOINT) { 93 | mvec3 mpVec = d->players->hitboxes[id].mpOffset[i] + d->players->hitboxes[id].mpDir[i] * d->players->hitboxes[id].radius[i] * pointScaleVal[i]; 94 | mpVec = d->players->hitboxes[id].wm[i].VecSoaTransform(mpVec); 95 | 96 | #ifdef AIMBOT_SIMD_TRACE_DATA 97 | d->traceEndSOA.push_back(mpVec); 98 | d->hitboxIDsSOA.push_back(i); 99 | //TODO: Convert this to more vectorizable way 100 | for (int o = 0; o < MULTIPOINT_COUNT; o++) { 101 | vec3_t angle = ((vec3_t)mpVec.acc[o] - d->localPlayer->eyePos).GetAngles(true); 102 | vec3_t angleDiff = (Aimbot::shootAngles - angle).NormalizeAngles<2>(-180.f, 180.f); 103 | float fovSOA = angleDiff.Length<2>(); 104 | d->fovListSOA.push_back(fovSOA); 105 | } 106 | d->traceOutputsSOA.resize(d->traceOutputsSOA.size() + MULTIPOINT_COUNT); 107 | #else 108 | auto rvec = mpVec.Rotate(); 109 | 110 | //TODO: Convert this to more vectorizable way 111 | for (int o = 0; o < MULTIPOINT_COUNT; o++) { 112 | d->traceEnd.push_back(rvec.GetColAsVecp(o)); 113 | d->hitboxIDs.push_back(i); 114 | vec3_t angle = ((vec3_t)rvec.GetColAsVecp(o) - d->localPlayer->eyePos).GetAngles(true); 115 | vec3_t angleDiff = (Aimbot::shootAngles - angle).NormalizeAngles<2>(-180.f, 180.f); 116 | float fov = angleDiff.Length<2>(); 117 | d->fovList.push_back(fov); 118 | d->traceOutputs.push_back(0); 119 | } 120 | 121 | #endif 122 | } else { 123 | d->traceEnd.push_back(average); 124 | d->hitboxIDs.push_back(i); 125 | d->fovList.push_back(fov); 126 | d->traceOutputs.push_back(0); 127 | } 128 | } 129 | 130 | return 0; 131 | } 132 | 133 | static int LoopPlayers(AimbotLoopData* d) 134 | { 135 | int ret = 0; 136 | 137 | for (int i = 0; i < d->players->count; i++) { 138 | if (~d->ignoreList[d->players->unsortIDs[i] / 64] & (1ull << (d->players->unsortIDs[i] % 64)) && d->players->flags[i] & Flags::HITBOXES_UPDATED && ~d->players->flags[i] & Flags::FRIENDLY && 139 | //The following check is just a rough way to clear the unrelated players from view. A better check would be to intersect AABB with previous target to see if they overlap. If they do not, then simply quit the loop since the players should be sorted by FOV 140 | d->players->fov[i] - 30.f < d->target.fov) { 141 | PrepareHitboxList(d, i); 142 | 143 | int ap = ProcessAimPoints(d); 144 | int aps = ProcessAimPointsSIMD(d); 145 | 146 | if (ap != -1) { 147 | d->target.id = ap; 148 | ret = 1; 149 | } 150 | 151 | if (aps != -1) { 152 | d->target.id = aps; 153 | ret = 1; 154 | } 155 | } 156 | } 157 | 158 | #ifdef AIMBOT_THREADING 159 | threadSem.Post(); 160 | #endif 161 | 162 | return ret; 163 | } 164 | 165 | static void FindBestTarget(AimbotTarget* target, HistoryList* track, HistoryList* futureTrack, LocalPlayer* localPlayer, unsigned char hitboxList[MAX_HITBOXES], uint64_t ignoreList[NumOf<64>(MAX_PLAYERS)]) 166 | { 167 | 168 | char backtrackMask[MAX_PLAYERS]; 169 | float lowestFov = 1000.f; 170 | Players* prevPlayers = nullptr; 171 | Players* targetPlayers = nullptr; 172 | 173 | memset(backtrackMask, 0, MAX_PLAYERS); 174 | 175 | //First check the future, but this will be overwritten by the normal track if any of the ticks are valid 176 | if (futureTrack) { 177 | for (size_t i = 0; i < futureTrack->Count(); i += numThreads * threadQueueMultiplier) { 178 | 179 | #ifdef AIMBOT_THREADING 180 | int pushedCount = 0; 181 | #endif 182 | 183 | for (int o = 0; o < (int)numThreads * threadQueueMultiplier && i + o < futureTrack->Count(); o++) { 184 | Players& players = futureTrack->GetLastItem(i + o); 185 | AimbotLoopData* d = data + o; 186 | 187 | d->target.fov = 9000; 188 | 189 | //We do not want to just exit out the loop if we predicted too far into future 190 | if (!Tracing::BacktrackPlayers(&players, prevPlayers, backtrackMask)) 191 | continue; 192 | 193 | d->target = *target; 194 | 195 | d->players = &players; 196 | d->localPlayer = localPlayer; 197 | d->hitboxList = hitboxList; 198 | d->ignoreList = ignoreList; 199 | 200 | #ifdef AIMBOT_THREADING 201 | Threading::QueueJobRef(LoopPlayers, d); 202 | pushedCount++; 203 | #else 204 | LoopPlayers(d); 205 | #endif 206 | 207 | prevPlayers = &players; 208 | } 209 | 210 | #ifdef AIMBOT_THREADING 211 | for (int i = 0; i < pushedCount; i++) 212 | threadSem.Wait(); 213 | #endif 214 | 215 | for (int o = 0; o < (int)numThreads * threadQueueMultiplier && i + o < futureTrack->Count(); o++) { 216 | Players& players = futureTrack->GetLastItem(i + o); 217 | AimbotLoopData* d = data + o; 218 | 219 | if (d->target.id >= 0 && d->target.fov < lowestFov) { 220 | lowestFov = d->target.fov; 221 | d->target.backTick = i + o; 222 | d->target.future = true; 223 | *target = d->target; 224 | targetPlayers = &players; 225 | } 226 | } 227 | } 228 | } 229 | 230 | bool b = false; 231 | 232 | for (size_t i = 0; i < track->Count() && !b; i += numThreads * threadQueueMultiplier) { 233 | int o = 0; 234 | 235 | #ifdef AIMBOT_THREADING 236 | int pushedCount = 0; 237 | #endif 238 | 239 | for (o = 0; o < (int)numThreads * threadQueueMultiplier && i + o < track->Count(); o++) { 240 | Players& players = track->GetLastItem(i + o); 241 | AimbotLoopData* d = data + o; 242 | 243 | d->target.fov = 9000; 244 | 245 | if (!Tracing::BacktrackPlayers(&players, prevPlayers, backtrackMask)) { 246 | b = true; 247 | break; 248 | } 249 | 250 | d->target = *target; 251 | 252 | d->players = &players; 253 | d->localPlayer = localPlayer; 254 | d->hitboxList = hitboxList; 255 | d->ignoreList = ignoreList; 256 | 257 | #ifdef AIMBOT_THREADING 258 | Threading::QueueJobRef(LoopPlayers, d); 259 | pushedCount++; 260 | #else 261 | LoopPlayers(d); 262 | #endif 263 | 264 | prevPlayers = &players; 265 | } 266 | 267 | #ifdef AIMBOT_THREADING 268 | for (int i = 0; i < pushedCount; i++) 269 | threadSem.Wait(); 270 | #endif 271 | 272 | for (int u = 0; u < o; u++) { 273 | Players& players = track->GetLastItem(i + u); 274 | AimbotLoopData* d = data + u; 275 | 276 | if (d->target.id >= 0 && (d->target.fov < lowestFov || !Tracing::VerifyTarget(targetPlayers, target->id, backtrackMask))) { 277 | lowestFov = d->target.fov; 278 | d->target.backTick = i + u; 279 | d->target.future = false; 280 | *target = d->target; 281 | targetPlayers = &players; 282 | } 283 | } 284 | } 285 | } 286 | 287 | AimbotTarget Aimbot::RunAimbot(HistoryList* track, HistoryList* futureTrack, LocalPlayer* localPlayer, unsigned char hitboxList[MAX_HITBOXES], uint64_t ignoreList[NumOf<64>(MAX_PLAYERS)], float pointScale[MAX_HITBOXES]) 288 | { 289 | AimbotTarget target; 290 | shootAngles = localPlayer->angles + localPlayer->aimOffset; 291 | pointScaleVal = pointScale; 292 | 293 | FindBestTarget(&target, track, futureTrack, localPlayer, hitboxList, ignoreList); 294 | 295 | return target; 296 | } 297 | -------------------------------------------------------------------------------- /features/aimbot.h: -------------------------------------------------------------------------------- 1 | #ifndef AIMBOT_H 2 | #define AIMBOT_H 3 | 4 | #include "../players.h" 5 | #include "../utils/history_list.h" 6 | 7 | #include "aimbot_types.h" 8 | 9 | namespace Aimbot 10 | { 11 | extern vec3_t shootAngles; 12 | 13 | AimbotTarget RunAimbot(HistoryList* track, HistoryList* futureTrack, LocalPlayer* localPlayer, unsigned char hitboxList[MAX_HITBOXES], uint64_t ignoreList[NumOf<64>(MAX_PLAYERS)], float pointScale[MAX_PLAYERS]); 14 | 15 | //These need to be implemented manually 16 | bool PreCompareData(AimbotTarget* target, LocalPlayer* localPlayer, vec3_t targetVec, int bone, float* outFOV); 17 | bool CompareData(AimbotLoopData* d, int out, vec3_t targetVec, int bone, float fov); 18 | } 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /features/aimbot_types.h: -------------------------------------------------------------------------------- 1 | #ifndef AIMBOT_TYPES_H 2 | #define AIMBOT_TYPES_H 3 | 4 | struct LocalPlayer; 5 | struct Players; 6 | 7 | struct AimbotTarget 8 | { 9 | vec3_t targetVec; 10 | int id = -1; 11 | int backTick = 0; 12 | int boneID = 0; 13 | float fov = 420.f; 14 | int dmg = 0; 15 | bool future = false; 16 | }; 17 | 18 | enum HitboxScanMode_t : unsigned char 19 | { 20 | SCAN_NONE = 0, 21 | SCAN_SIMPLE = 1, 22 | SCAN_MULTIPOINT = 2 23 | }; 24 | 25 | struct AimbotLoopData { 26 | AimbotTarget target; 27 | LocalPlayer* localPlayer; 28 | Players* players; 29 | 30 | unsigned char* hitboxList; 31 | uint64_t* ignoreList; 32 | 33 | float fovs[MAX_PLAYERS]; 34 | 35 | int entID; 36 | 37 | std::vector traceEnd; 38 | std::vector hitboxIDs; 39 | std::vector fovList; 40 | std::vector traceOutputs; 41 | 42 | #ifdef AIMBOT_SIMD_TRACE_DATA 43 | std::vector traceEndSOA; 44 | std::vector hitboxIDsSOA; 45 | std::vector fovListSOA; 46 | std::vector traceOutputsSOA; 47 | #endif 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /g_defines.h: -------------------------------------------------------------------------------- 1 | #ifndef G_DEFINES_H 2 | #define G_DEFINES_H 3 | 4 | template 5 | inline constexpr T x64x32(T x64, T x32) 6 | { 7 | if (sizeof(void*) == 0x8) 8 | return x64; 9 | return x32; 10 | } 11 | 12 | #define COMMA , 13 | 14 | #if defined(_WIN32) 15 | #define SECTION(sec) __declspec(allocate(sec)) 16 | #define WSECTION(sec) SECTION(sec) 17 | #include "wincludes.h" 18 | #include 19 | //#define CLZ(x) //__lzcnt(x) 20 | #define CLZ(x) __builtin_clz(x) 21 | #define OLin(Linux) 22 | #define OWin(Windows) Windows 23 | #define PosixWin(Posix, Windows) Windows 24 | #define LWM(Linux, Windows, Mac) Windows 25 | #define OMac(Mac) 26 | #define OPosix(Posix) 27 | #define paddr(handle, name) GetProcAddress(handle, name) 28 | #define FASTARGS [[maybe_unused]] void* thisptr, [[maybe_unused]] void* edx 29 | #define CFASTARGS thisptr, edx 30 | #define STDARGS 31 | #define THISARGS [[maybe_unused]] void* thisptr 32 | #define LC 33 | #define PC 34 | #define WC COMMA 35 | #define _noinline __declspec(noinline) 36 | #elif defined(__linux__) 37 | #define __posix__ 38 | #define SECTION(sec) __attribute__((section(sec))) 39 | #define WSECTION(sec) 40 | #define CLZ(x) __builtin_clz(x) 41 | #define CTZ(x) __builtin_ctz(x) 42 | #define OLin(Linux) Linux 43 | #define OWin(Windows) 44 | #define OMac(Mac) 45 | #define OPosix(Posix) Posix 46 | #define PosixWin(Posix, Windows) Posix 47 | #define LWM(Linux, Windows, Mac) Linux 48 | #define paddr(handle, name) dlsym(handle, name) 49 | #define FASTARGS [[maybe_unused]] void* thisptr 50 | #define CFASTARGS thisptr 51 | #define STDARGS [[maybe_unused]] void* thisptr 52 | #define THISARGS [[maybe_unused]] void* thisptr 53 | #define LC COMMA 54 | #define PC COMMA 55 | #define WC 56 | #define _ReturnAddress() __builtin_return_address(0) 57 | #define _noinline __attribute__((noinline)) 58 | #else 59 | #define __posix__ 60 | #define SECTION(sec) __attribute__((section(sec))) 61 | #define WSECTION(sec) 62 | #define CLZ(x) __builtin_clz(x) 63 | #define CTZ(x) __builtin_ctz(x) 64 | #define OLin(Linux) 65 | #define OWin(Windows) 66 | #define OMac(Mac) Mac 67 | #define OPosix(Posix) Posix 68 | #define PosixWin(Posix, Windows) Posix 69 | #define LWM(Linux, Windows, Mac) Mac 70 | #define paddr(handle, name) dlsym(handle, name) 71 | #define FASTARGS [[maybe_unused]] void* thisptr 72 | #define CFASTARGS thisptr 73 | #define STDARGS [[maybe_unused]] void* thisptr 74 | #define THISARGS [[maybe_unused]] void* thisptr 75 | #define LC COMMA 76 | #define PC COMMA 77 | #define WC 78 | #define _ReturnAddress() __builtin_return_address(0) 79 | #define _noinline __attribute__((noinline)) 80 | #endif 81 | 82 | #ifdef __posix__ 83 | #define __thiscall 84 | #define __fastcall 85 | #define __stdcall 86 | #define __cdecl 87 | #define __declspec (a) 88 | #define _stricmp(a, b) strcasecmp(a, b) 89 | #endif 90 | 91 | #ifdef _MSC_VER 92 | #define FRAME_POINTER() (void*)((void**)_AddressOfReturnAddress() - 1) 93 | #else 94 | #define FRAME_POINTER() __builtin_frame_address(0) 95 | #endif 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /interfaces/tracing.h: -------------------------------------------------------------------------------- 1 | #ifndef TRACING_H 2 | #define TRACING_H 3 | 4 | #include "../players.h" 5 | 6 | namespace Tracing 7 | { 8 | 9 | struct Trace 10 | { 11 | vec3_t start; 12 | vec3_t end; 13 | int entID; 14 | }; 15 | 16 | /* 17 | Depth specifies the complexity of the trace to be performed. 18 | For example, depth 1 in CSGO would make the trace run through wall penetrating code path, 19 | while depth 0 would be a regular traceray. 20 | */ 21 | int TracePlayer(LocalPlayer* localPlayer, Players* players, vec3_t point, int eID, int depth = 0, bool skipLocal = true); 22 | 23 | template 24 | void TracePlayerSIMD(LocalPlayer* localPlayer, Players* players, vec3soa point, int eID, int out[N], int depth = 0, bool skipLocal = true); 25 | 26 | void TracePointList(LocalPlayer* localPlayer, Players* players, size_t n, const vec3_t* points, int eIDs, int* __restrict out, int depth = 0, bool skipLocal = true); 27 | 28 | template 29 | void TracePointListSIMD(LocalPlayer* localPlayer, Players* players, size_t n, const vec3soa* points, int eID, int* __restrict out, int depth = 0, bool skipLocal = true); 30 | /* 31 | For games supporting moving players back in time. 32 | The mask is to be used for anything the implementation needs it to use (for example marking a Source Engine player as non-backtrackable, 33 | due to the breaking of lag compensation) 34 | */ 35 | bool BacktrackPlayers(Players* curPlayers, Players* prevPlayers, char backtrackMask[MAX_PLAYERS]); 36 | bool VerifyTarget(Players* players, int id, char backtrackMask[MAX_PLAYERS]); 37 | } 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /math/clang_avx512.h: -------------------------------------------------------------------------------- 1 | 2 | //A hack to improve compile times on clang 3 | #if defined(__clang__) && !defined(__AVX512F__) && !defined(__AVX512CD__) && !defined(__AVX512ER__) 4 | #define __AVX512BITALGINTRIN_H 5 | #define __AVX512BWINTRIN_H 6 | #define __AVX512CDINTRIN_H 7 | #define __AVX512DQINTRIN_H 8 | #define __AVX512ERINTRIN_H 9 | #define __AVX512FINTRIN_H 10 | #define __AVX512PFINTRIN_H 11 | #define __AVX512VBMI2INTRIN_H 12 | #define __AVX512VLBITALGINTRIN_H 13 | #define __AVX512VLBWINTRIN_H 14 | #define __AVX512VLCDINTRIN_H 15 | #define __AVX512VLDQINTRIN_H 16 | #define __AVX512VLINTRIN_H 17 | #define __AVX512VLVBMI2INTRIN_H 18 | #define __AVX512VLVNNIINTRIN_H 19 | #define __AVX512VNNIINTRIN_H 20 | #define __AVX512VPOPCNTDQINTRIN_H 21 | #define __AVX512VPOPCNTDQVLINTRIN_H 22 | 23 | #define __IFMAINTRIN_H 24 | #define __IFMAVLINTRIN_H 25 | #define __VBMIINTRIN_H 26 | #define __VBMIVLINTRIN_H 27 | 28 | typedef char __v64qi __attribute__((__vector_size__(64))); 29 | typedef short __v32hi __attribute__((__vector_size__(64))); 30 | typedef double __v8df __attribute__((__vector_size__(64))); 31 | typedef float __v16sf __attribute__((__vector_size__(64))); 32 | typedef long long __v8di __attribute__((__vector_size__(64))); 33 | typedef int __v16si __attribute__((__vector_size__(64))); 34 | 35 | /* Unsigned types */ 36 | typedef unsigned char __v64qu __attribute__((__vector_size__(64))); 37 | typedef unsigned short __v32hu __attribute__((__vector_size__(64))); 38 | typedef unsigned long long __v8du __attribute__((__vector_size__(64))); 39 | typedef unsigned int __v16su __attribute__((__vector_size__(64))); 40 | 41 | typedef float __m512 __attribute__((__vector_size__(64))); 42 | typedef double __m512d __attribute__((__vector_size__(64))); 43 | typedef long long __m512i __attribute__((__vector_size__(64))); 44 | 45 | typedef unsigned char __mmask8; 46 | typedef unsigned short __mmask16; 47 | 48 | #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) 49 | 50 | static __inline __m512i __DEFAULT_FN_ATTRS512 51 | _mm512_setzero_si512(void) 52 | { 53 | return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 54 | } 55 | 56 | typedef unsigned int __mmask32; 57 | typedef unsigned long long __mmask64; 58 | typedef unsigned short __v64hu __attribute__((__vector_size__(128))); 59 | typedef unsigned int __v32su __attribute__((__vector_size__(128))); 60 | 61 | typedef __v8di __v8di_aligned __attribute__((aligned(64))); 62 | typedef __v8di __v8di_aligned __attribute__((aligned(64))); 63 | typedef __v8df __v8df_aligned __attribute__((aligned(64))); 64 | typedef __v16sf __v16sf_aligned __attribute__((aligned(64))); 65 | typedef short __v2hi __attribute__((__vector_size__(4))); 66 | typedef char __v4qi __attribute__((__vector_size__(4))); 67 | typedef char __v2qi __attribute__((__vector_size__(2))); 68 | 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /math/matrix.h: -------------------------------------------------------------------------------- 1 | #ifndef MATRIX_H 2 | #define MATRIX_H 3 | 4 | #include "vector.h" 5 | 6 | template 7 | struct matrix 8 | { 9 | vecSoa vec; 10 | 11 | template 12 | inline auto& operator=(const matrix& ov) 13 | { 14 | constexpr size_t MX = X2 < X ? X2 : X; 15 | constexpr size_t MY = Y2 < Y ? Y2 : Y; 16 | for (size_t i = 0; i < MX; i++) 17 | for (size_t o = 0; o < MY; o++) 18 | vec[i][o] = ov.vec[i][o]; 19 | return *this; 20 | } 21 | 22 | template 23 | inline auto operator*(const matrix& ov) 24 | { 25 | constexpr size_t MinX = Y2 < X ? Y2 : X; 26 | constexpr size_t MinY = X2 < Y ? X2 : Y; 27 | constexpr size_t CompS = MinX < MinY ? MinX : MinY; 28 | 29 | constexpr size_t MX = X2 < X ? X2 : X; 30 | constexpr size_t MY = Y2 < Y ? Y2 : Y; 31 | 32 | constexpr size_t SX = MX < MinX ? MinX : MX; 33 | constexpr size_t SY = MY < MinY ? MinY : MY; 34 | 35 | matrix result; 36 | 37 | for (size_t i = 0; i < MinX; i++) 38 | for (size_t o = 0; o < MinY; o++) 39 | result[i][o] = 0; 40 | 41 | for (size_t i = 0; i < MinX; i++) 42 | for (size_t o = 0; o < MinY; o++) { 43 | for (size_t u = 0; u < CompS; u++) 44 | result[i][o] += vec[i][u] * ov[u][o]; 45 | } 46 | //vec.template Dot, CompS, CompS>(ov.vec.template ColumnVec(i), result[i]); 47 | 48 | //Copy over the remainding data that was not be multiplied 49 | for (size_t i = CompS; i < SX; i++) 50 | for (size_t o = 0; o < SY; o++) 51 | result[i][o] = vec[i][o]; 52 | 53 | for (size_t i = 0; i < SX; i++) 54 | for (size_t o = CompS; o < SY; o++) 55 | result[i][o] = vec[i][o]; 56 | 57 | return result; 58 | } 59 | 60 | template 61 | inline auto& operator*=(const matrix& ov) 62 | { 63 | *this = *this * ov; 64 | return *this; 65 | } 66 | 67 | template 68 | static constexpr auto GetMatrix(const T& angles, bool fromDegrees = false) 69 | { 70 | matrix vec = {vecSoa()}; 71 | 72 | const int VP = 0; 73 | const int VY = 1; 74 | const int VR = 2; 75 | 76 | float s[3] = {0}, c[3] = {0}; 77 | 78 | auto it = angles; 79 | if (fromDegrees) 80 | it *= DEG2RAD; 81 | 82 | for (size_t i = 0; i < 3; i++) 83 | s[i] = ConstSin(it[i]); 84 | 85 | for (size_t i = 0; i < 3; i++) 86 | c[i] = ConstCos(it[i]); 87 | 88 | vec[0][0] = c[VP] * c[VY]; 89 | vec[1][0] = c[VP] * s[VY]; 90 | vec[2][0] = -s[VP]; 91 | 92 | vec[0][1] = s[VR] * s[VP] * c[VY] + c[VR] * s[VY]; 93 | vec[1][1] = s[VR] * s[VP] * s[VY] - c[VR] * c[VY]; 94 | vec[2][1] = s[VR] * c[VP]; 95 | 96 | vec[0][2] = c[VR] * s[VP] * c[VY] + s[VR] * s[VY]; 97 | vec[1][2] = c[VR] * s[VP] * s[VY] - s[VR] * c[VY]; 98 | vec[2][2] = c[VR] * c[VP]; 99 | 100 | return vec; 101 | } 102 | 103 | inline vec3_t GetAngles(bool toDegrees = false) 104 | { 105 | vec3_t fwd = (vec3_t)vec.acc[0]; 106 | vec3_t left = (vec3_t)vec.acc[1]; 107 | vec3_t up = (vec3_t)vec.acc[2]; 108 | vec3_t ret(0); 109 | 110 | float xyLen = fwd.Length<2>(); 111 | 112 | if (xyLen > 0.001f) { 113 | ret[0] = atan2f(-fwd[2], xyLen); 114 | ret[1] = atan2f(fwd[1], fwd[0]); 115 | ret[2] = atan2f(left[2], up[2]); 116 | } else { 117 | ret[0] = atan2f(-fwd[2], xyLen); 118 | ret[1] = atan2f(-left[0], left[1]); 119 | ret[2] = 0; 120 | } 121 | 122 | return toDegrees ? ret * RAD2DEG : ret; 123 | } 124 | 125 | inline auto Inverse() const 126 | { 127 | auto ret = *this; 128 | 129 | float det = vec[0][0] * (vec[1][1] * vec[2][2] - vec[2][1] * vec[1][2]) - 130 | vec[0][1] * (vec[1][0] * vec[2][2] - vec[1][2] * vec[2][0]) + 131 | vec[0][2] * (vec[1][0] * vec[2][1] - vec[1][1] * vec[2][0]); 132 | 133 | float invDet = 1.f / det; 134 | 135 | ret[0][0] = (vec[1][1] * vec[2][2] - vec[2][1] * vec[1][2]) * invDet; 136 | ret[0][1] = (vec[0][2] * vec[2][1] - vec[0][1] * vec[2][2]) * invDet; 137 | ret[0][2] = (vec[0][1] * vec[1][2] - vec[0][2] * vec[1][1]) * invDet; 138 | ret[1][0] = (vec[1][2] * vec[2][0] - vec[1][0] * vec[2][2]) * invDet; 139 | ret[1][1] = (vec[0][0] * vec[2][2] - vec[0][2] * vec[2][0]) * invDet; 140 | ret[1][2] = (vec[1][0] * vec[0][2] - vec[0][0] * vec[1][2]) * invDet; 141 | ret[2][0] = (vec[1][0] * vec[2][1] - vec[2][0] * vec[1][1]) * invDet; 142 | ret[2][1] = (vec[2][0] * vec[0][1] - vec[0][0] * vec[2][1]) * invDet; 143 | ret[2][2] = (vec[0][0] * vec[1][1] - vec[1][0] * vec[0][1]) * invDet; 144 | 145 | return ret; 146 | } 147 | 148 | inline auto InverseTranspose() const 149 | { 150 | auto ret = *this; 151 | 152 | float det = vec[0][0] * (vec[1][1] * vec[2][2] - vec[2][1] * vec[1][2]) - 153 | vec[0][1] * (vec[1][0] * vec[2][2] - vec[1][2] * vec[2][0]) + 154 | vec[0][2] * (vec[1][0] * vec[2][1] - vec[1][1] * vec[2][0]); 155 | 156 | float invDet = 1.f / det; 157 | 158 | ret[0][0] = (vec[1][1] * vec[2][2] - vec[2][1] * vec[1][2]) * invDet; 159 | ret[0][1] = (vec[1][2] * vec[2][0] - vec[1][0] * vec[2][2]) * invDet; 160 | ret[0][2] = (vec[1][0] * vec[2][1] - vec[2][0] * vec[1][1]) * invDet; 161 | ret[1][0] = (vec[0][2] * vec[2][1] - vec[0][1] * vec[2][2]) * invDet; 162 | ret[1][1] = (vec[0][0] * vec[2][2] - vec[0][2] * vec[2][0]) * invDet; 163 | ret[1][2] = (vec[2][0] * vec[0][1] - vec[0][0] * vec[2][1]) * invDet; 164 | ret[2][0] = (vec[0][1] * vec[1][2] - vec[0][2] * vec[1][1]) * invDet; 165 | ret[2][1] = (vec[1][0] * vec[0][2] - vec[0][0] * vec[1][2]) * invDet; 166 | ret[2][2] = (vec[0][0] * vec[1][1] - vec[1][0] * vec[0][1]) * invDet; 167 | 168 | return ret; 169 | } 170 | 171 | template 172 | constexpr typename std::enable_if::value, T>::type Vector3Transform(const T& inp) const 173 | { 174 | T out(0); 175 | 176 | for (size_t i = 0; i < 3; i++) 177 | out[i] = inp.Dot(vec[i]) + vec[i][3]; 178 | 179 | return out; 180 | } 181 | 182 | template 183 | constexpr typename std::enable_if::value, T>::type Vector3Transform(const T& inp) const 184 | { 185 | T out(0); 186 | 187 | for (size_t i = 0; i < 3; i++) 188 | out[i] = inp.Dot(vec[i]) + vec[i][3]; 189 | 190 | float w = inp.Dot(vec[3]) + vec[3][3]; 191 | w = (w <= 0 ? std::numeric_limits::infinity() : 1.f / w); 192 | return out * w; 193 | } 194 | 195 | template 196 | constexpr auto Vector3ITransform(T inp) const 197 | { 198 | T out(0); 199 | 200 | auto vecRot = vec.Rotate(); 201 | inp -= vecRot[3]; 202 | 203 | for (size_t i = 0; i < 3; i++) 204 | out[i] = inp.Dot(vecRot[i]); 205 | 206 | return out; 207 | } 208 | 209 | template 210 | constexpr T Vector3Rotate(const T& inp) const 211 | { 212 | T out(0); 213 | 214 | for (size_t i = 0; i < 3; i++) 215 | out[i] = inp.Dot(vec[i]); 216 | 217 | return out; 218 | } 219 | 220 | template 221 | constexpr T Vector3IRotate(const T& inp) const 222 | { 223 | T out(0); 224 | 225 | auto vecRot = vec.Rotate(); 226 | 227 | for (size_t i = 0; i < 3; i++) 228 | out[i] = inp.Dot(vecRot[i]); 229 | 230 | return out; 231 | } 232 | 233 | template 234 | constexpr typename std::enable_if::value, T>::type VecSoaTransform(const T& inp) const 235 | { 236 | T out(0); 237 | 238 | for (size_t i = 0; i < 3; i++) 239 | for (size_t o = 0; o < inp.Yt; o++) 240 | out[i][o] = ((vecp)inp.acc[o]).Dot(vec[i]) + vec[i][3]; 241 | 242 | return out; 243 | } 244 | 245 | template 246 | constexpr typename std::enable_if::value, T>::type VecSoaTransform(const T& inp) const 247 | { 248 | T out(0); 249 | float w[inp.Yt]; 250 | 251 | for (size_t i = 0; i < 3; i++) 252 | for (size_t o = 0; o < inp.Yt; o++) 253 | out[i][o] = ((vecp)inp.acc[o]).Dot(vec[i]) + vec[i][3]; 254 | 255 | for (size_t i = 0; i < inp.Yt; i++) { 256 | w[i] = ((vecp)inp.acc[i]).Dot(vec[3]) + vec[3][3]; 257 | w[i] = (w[i] <= 0 ? std::numeric_limits::infinity() : 1.f / w[i]); 258 | } 259 | 260 | for (size_t i = 0; i < inp.Xt; i++) 261 | for (size_t o = 0; o < inp.Yt; o++) 262 | out[i][o] *= w[o]; 263 | 264 | return out; 265 | } 266 | 267 | template 268 | constexpr auto VectorSoaITransform(const T& inp) const 269 | { 270 | T out(0); 271 | T temp = inp - (vecp)vec.acc[3]; 272 | 273 | for (size_t i = 0; i < 3; i++) 274 | for (size_t o = 0; o < inp.Yt; o++) 275 | out[i][o] = ((vecp)temp.acc[o]).Dot(vec[i]) + vec[i][3]; 276 | 277 | return out; 278 | } 279 | 280 | template 281 | constexpr auto WorldToScreen(const T& vec, const vecb& screen, bool* flags) const 282 | { 283 | auto out = VecSoaTransform(vec); 284 | 285 | constexpr size_t MX = 2 < T::Xt ? 2 : T::Xt; 286 | 287 | for (size_t o = 0; o < T::Yt; o++) 288 | flags[o] = true; 289 | 290 | for (size_t i = 0; i < MX; i++) 291 | for (size_t o = 0; o < T::Yt; o++) 292 | if (out[i][o] > screen[i]) 293 | flags[o] = false; 294 | 295 | for (size_t o = 0; o < T::Yt; o++) 296 | if (flags[o]) { 297 | out[0][o] = screen[0] * 0.5f + out[0][o] * (screen[0] * 0.5f); 298 | out[1][o] = screen[1] * 0.5f - out[1][o] * (screen[1] * 0.5f); 299 | } 300 | 301 | return out; 302 | } 303 | 304 | template 305 | constexpr auto WorldToScreen(const T& vec, const vecb& screen, bool& status) const 306 | { 307 | auto out = Vector3Transform(vec); 308 | if (out[0] <= screen[0] && out[1] <= screen[1]) { 309 | out[0] = screen[0] * 0.5f + out[0] * screen[0] * 0.5f; 310 | out[1] = screen[1] * 0.5f - out[1] * screen[1] * 0.5f; 311 | 312 | status = true; 313 | 314 | return out; 315 | } 316 | 317 | status = false; 318 | 319 | return out; 320 | } 321 | 322 | constexpr float* operator[](int idx) 323 | { 324 | return vec.v[idx]; 325 | } 326 | 327 | constexpr const float* operator[](int idx) const 328 | { 329 | return vec.v[idx]; 330 | } 331 | }; 332 | 333 | typedef matrix<4,4> matrix4x4; 334 | #endif 335 | -------------------------------------------------------------------------------- /math/mm_funcs.h: -------------------------------------------------------------------------------- 1 | #ifndef MM_FUNCS_H 2 | #define MM_FUNCS_H 3 | 4 | //In order to template various SIMD operations we have to have templateable types and functions which is not an easy feat to be done manually, thus we use this macro cave to be able to easily expand this later on 5 | 6 | template 7 | struct m_ 8 | { 9 | struct type { 10 | }; 11 | }; 12 | 13 | [[noreturn]] 14 | static inline void throwfunc() 15 | { 16 | throw; 17 | } 18 | 19 | template 20 | using _m = typename m_::type; 21 | 22 | template 23 | using EnableIf = typename std::enable_if::type; 24 | 25 | #define GEN_FUNCC(RFUNC, FUNC, SIZE, ...) \ 26 | template* = nullptr, typename... Args> constexpr auto RFUNC(Args... args) \ 27 | { \ 28 | return FUNC(args...); \ 29 | } 30 | 31 | #define GEN_FUNC(RFUNC, type, FUNC, SIZE) GEN_FUNCC(RFUNC, FUNC, SIZE, std::is_same::value) 32 | 33 | #define PASTETWO(A, B) A##B 34 | 35 | #define GEN_FUNC_SI2(RFUNC, FUNC, SIZE) \ 36 | GEN_FUNC(RFUNC, float, PASTETWO(FUNC, _ps), SIZE); \ 37 | GEN_FUNC(RFUNC, double, PASTETWO(FUNC, _pd), SIZE); \ 38 | GEN_FUNCC(RFUNC, PASTETWO(FUNC, _pd), SIZE, std::is_integral::value); \ 39 | 40 | #define GEN_FUNC_EPI2(RFUNC, FUNC, SIZE) \ 41 | GEN_FUNC(RFUNC, float, PASTETWO(FUNC, _ps), SIZE); \ 42 | GEN_FUNC(RFUNC, double, PASTETWO(FUNC, _pd), SIZE); \ 43 | GEN_FUNCC(RFUNC, PASTETWO(FUNC, _epi8), SIZE, std::is_integral::value && sizeof(F) == 1); \ 44 | GEN_FUNCC(RFUNC, PASTETWO(FUNC, _epi16), SIZE, std::is_integral::value && sizeof(F) == 2); \ 45 | GEN_FUNCC(RFUNC, PASTETWO(FUNC, _epi32), SIZE, std::is_integral::value && sizeof(F) == 4); \ 46 | GEN_FUNCC(RFUNC, PASTETWO(FUNC, _epi64), SIZE, std::is_integral::value && sizeof(F) == 8); \ 47 | 48 | #define GEN_FNAME_128(NAME) PASTETWO(_mm_,NAME) 49 | #define GEN_FNAME_256(NAME) PASTETWO(_mm256_,NAME) 50 | #define GEN_FNAME_512(NAME) PASTETWO(_mm512_,NAME) 51 | 52 | #define CALL(F, ...)F(__VA_ARGS__) 53 | 54 | #define MM_NAME(NAME, SIZE) CALL(GEN_FNAME_##SIZE, NAME) 55 | 56 | #define GEN_FUNC_SI(FUNC, SIZE) GEN_FUNC_SI2(_mm_##FUNC, MM_NAME(FUNC, SIZE), SIZE) 57 | #define GEN_FUNC_EPI(FUNC, SIZE) GEN_FUNC_EPI2(_mm_##FUNC, MM_NAME(FUNC, SIZE), SIZE) 58 | 59 | #define GEN_FUNCS(SIZE) \ 60 | GEN_FUNC_SI(loadu, SIZE); \ 61 | GEN_FUNC_SI(storeu, SIZE); \ 62 | GEN_FUNC_EPI(add, SIZE); \ 63 | GEN_FUNC_EPI(sub, SIZE); \ 64 | 65 | #define DEFINE_MM(SIZE) \ 66 | template<> struct m_ { using type = __m##SIZE; }; \ 67 | template<> struct m_ { using type = __m##SIZE##d; }; \ 68 | template struct m_::value, F>> { using type = __m##SIZE##i; }; \ 69 | GEN_FUNCS(SIZE); \ 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /math/mmath.h: -------------------------------------------------------------------------------- 1 | #ifndef MMATH_H 2 | #define MMATH_H 3 | 4 | #include "../utils/shared_utils.h" 5 | #include "../wincludes.h" 6 | #include 7 | #include 8 | #define _USE_MATH_DEFINES 9 | #include 10 | #include 11 | #include 12 | 13 | #include "mm_funcs.h" 14 | 15 | #ifndef _MSC_VER 16 | #include 17 | #endif 18 | 19 | //This cuts compile times when avx512 is not in use 20 | //#include "clang_avx512.h" 21 | 22 | #if defined(__clang__) && defined(_MSC_VER) 23 | #pragma push_macro("_MM_HINT_T0") 24 | #undef _MM_HINT_T0 25 | #pragma push_macro("_MM_HINT_T1") 26 | #undef _MM_HINT_T1 27 | #pragma push_macro("_MM_HINT_T2") 28 | #undef _MM_HINT_T2 29 | #endif 30 | #include 31 | 32 | #include 33 | #include 34 | #include 35 | #if defined(__clang__) && defined(_MSC_VER) 36 | #pragma pop_macro("_MM_HINT_T0") 37 | #pragma pop_macro("_MM_HINT_T1") 38 | #pragma pop_macro("_MM_HINT_T2") 39 | #endif 40 | 41 | #if defined(OVERRIDE) 42 | const int SIMD_COUNT = OVERRIDE; 43 | #elif defined(__AVX512F__) || defined(__AVX512CD__) || defined(__AVX512ER__) 44 | #define PSIMD 16 45 | const int SIMD_COUNT = 16; 46 | typedef short simdFlags; 47 | DEFINE_MM(128); 48 | DEFINE_MM(256); 49 | DEFINE_MM(512); 50 | #elif defined(__AVX__) || defined(__AVX2__) 51 | #define PSIMD 8 52 | const int SIMD_COUNT = 8; 53 | typedef char simdFlags; 54 | DEFINE_MM(128); 55 | DEFINE_MM(256); 56 | #elif defined(__SSE__) || defined(__SSE2__) || defined(__SSE2_MATH__) || defined(_M_IX86_FP) || (defined(_M_AMD64) || defined(_M_X64)) 57 | #define PSIMD 4 58 | const int SIMD_COUNT = 4; 59 | typedef char simdFlags; 60 | DEFINE_MM(128); 61 | #else 62 | const int SIMD_COUNT = 1; 63 | typedef char simdFlags; 64 | #endif 65 | 66 | template 67 | struct max_sse 68 | { 69 | static const bool value = (Q * sizeof(W) == 16 && SIMD_COUNT >= 4); 70 | }; 71 | 72 | template 73 | struct max_avx 74 | { 75 | static const bool value = (Q * sizeof(W) == 32 && SIMD_COUNT >= 8); 76 | }; 77 | 78 | template 79 | struct max_avx512 80 | { 81 | static const bool value = (Q * sizeof(W) == 64 && SIMD_COUNT >= 16); 82 | }; 83 | 84 | template 85 | struct do_avx512 86 | { 87 | static const bool value = SIMD_COUNT >= 16 && !((Q * 8 * sizeof(W)) % (8 * 4 * 16)); 88 | }; 89 | 90 | template 91 | struct do_avx 92 | { 93 | static const bool value = !do_avx512::value && SIMD_COUNT >= 8 && !((Q * 8 * sizeof(W)) % (8 * 4 * 8)); 94 | }; 95 | 96 | template 97 | struct do_sse 98 | { 99 | static const bool value = !do_avx::value && !do_avx512::value && SIMD_COUNT >= 4 && !((Q * 8 * sizeof(W)) % (8 * 4 * 4)); 100 | }; 101 | 102 | template 103 | struct do_simd 104 | { 105 | static const bool value = do_sse::value || do_avx::value || do_avx512::value; 106 | }; 107 | 108 | template 109 | struct comp_if 110 | { 111 | static const bool value = (A == B); 112 | }; 113 | 114 | template 115 | constexpr int NumOf(const int val) 116 | { 117 | return (val - 1) / N + 1; 118 | } 119 | 120 | constexpr int NumOfSIMD(const int val) 121 | { 122 | return NumOf(val); 123 | } 124 | 125 | #include 126 | 127 | template 128 | constexpr T PopCnt(T inp) 129 | { 130 | #ifdef __GNUC__ 131 | if (sizeof(inp) == 8) 132 | return __builtin_popcountll(inp); 133 | return __builtin_popcount(inp); 134 | #else 135 | T i = 0; 136 | for (i = 0; i < sizeof(inp) * 8; i++) 137 | if (~inp & (1 << i)) 138 | break; 139 | return i; 140 | #endif 141 | } 142 | 143 | constexpr size_t Clz(size_t inp) 144 | { 145 | #ifdef __GNUC__ 146 | if (sizeof(inp) == 8) 147 | return __builtin_clzll(inp); 148 | return __builtin_clz(inp); 149 | #else 150 | for (size_t i = 0; (1 << i) < sizeof(size_t) * 8; i++) 151 | inp |= inp >> (1 << i); 152 | return sizeof(inp) * 8 - PopCnt(inp); 153 | #endif 154 | } 155 | 156 | constexpr size_t AlignUp(size_t inp) 157 | { 158 | if (inp <= 1) 159 | return 1; 160 | return size_t(1) << (sizeof(size_t) * 8 - Clz(inp)); 161 | } 162 | 163 | 164 | template::value>::type* p = nullptr> 165 | constexpr T Modulo(const T x, const T y) 166 | { 167 | return (x < T() ? T(-1) : T(1)) * ( 168 | (x < T() ? -x : x) - 169 | (long long)((x / y < T() ? -x / y : x / y)) * (y < T() ? -y : y)); 170 | } 171 | 172 | // For non-floating point types 173 | 174 | template 175 | using TypeToCast = typename std::conditional::value, int, T>::type; 176 | 177 | template::value>::type* p = nullptr> 178 | constexpr T Modulo(const T x, const T y) 179 | { 180 | return (TypeToCast)(x) % (TypeToCast)(y); 181 | } 182 | 183 | template 184 | [[deprecated("Duplicate function")]] 185 | inline T TMod(T val, T lim) 186 | { 187 | return std::remainder(val, lim); 188 | } 189 | 190 | constexpr float NormalizeFloat(float result, float start, float end) 191 | { 192 | result = Modulo(result - start, end - start); 193 | 194 | if (result < 0.f) 195 | result += end - start; 196 | 197 | return result + start; 198 | } 199 | 200 | template 201 | constexpr T NormalizeInRange(T result, T start, T end) 202 | { 203 | result = Modulo(result - start, end - start); 204 | 205 | if (result < 0) 206 | result += end - start; 207 | 208 | return result + start; 209 | } 210 | 211 | //This should never be called in the first place, but it is required for the compile to take place 212 | template 213 | constexpr T GetElementAt([[maybe_unused]] size_t id) 214 | { 215 | return T(); 216 | } 217 | 218 | template 219 | constexpr F GetElementAt(size_t id, F arg, T... args) 220 | { 221 | constexpr size_t sz = sizeof...(args); 222 | return (id && sz) ? GetElementAt(id - 1, args...) : arg; 223 | } 224 | 225 | template 226 | constexpr T Max(T a, T b) 227 | { 228 | return a > b ? a : b; 229 | } 230 | 231 | template 232 | constexpr T Min(T a, T b) 233 | { 234 | return a < b ? a : b; 235 | } 236 | 237 | template 238 | constexpr T Abs(T val) 239 | { 240 | return val < 0 ? -val : val; 241 | } 242 | 243 | template 244 | constexpr T TrigSeries(T val, T sum, T n, int i, int s, T exp) 245 | { 246 | return Abs(exp * s / n) > std::numeric_limits::epsilon() ? TrigSeries(val, sum + exp * s / n, n * i * (i + 1), i + 2, -s, exp * val * val) : sum; 247 | } 248 | 249 | template 250 | constexpr T ConstSin(T val) 251 | { 252 | val = NormalizeInRange(val, T(-M_PI), T(M_PI)); 253 | return TrigSeries(val, val, T(6), 4, -1, val * val * val); 254 | } 255 | 256 | template 257 | constexpr T ConstCos(T val) 258 | { 259 | return ConstSin(val + M_PI / 2); 260 | } 261 | 262 | constexpr float RAD2DEG = (float)(180.0 / M_PI); 263 | constexpr float DEG2RAD = (float)(M_PI / 180.0); 264 | 265 | #include "vector.h" 266 | 267 | #endif 268 | -------------------------------------------------------------------------------- /math/soa_accessor.h: -------------------------------------------------------------------------------- 1 | #ifndef SOA_ACCESSOR_H 2 | #define SOA_ACCESSOR_H 3 | 4 | #define REF() && 5 | #define X(...) 6 | 7 | #define SOA_VECTOR_CAST(type) \ 8 | template \ 9 | explicit inline operator type() { \ 10 | type ret; \ 11 | constexpr size_t mv = B < X ? B : X; \ 12 | auto& it = *this; \ 13 | for (size_t i = 0; i < mv; i++) \ 14 | ret[i] = it[i]; \ 15 | return ret; \ 16 | } 17 | 18 | #define SOA_SCALAR_ASIGNMENT \ 19 | inline auto& operator=(T val) \ 20 | { \ 21 | auto& it = *this; \ 22 | for (size_t i = 0; i < X; i++) \ 23 | it[i] = val; \ 24 | return it; \ 25 | } 26 | 27 | #define SOA_ASIGNMENT(type) \ 28 | template \ 29 | inline auto& operator=(type vec) \ 30 | { \ 31 | constexpr size_t mv = B < X ? B : X; \ 32 | auto& it = *this; \ 33 | for (size_t i = 0; i < mv; i++) \ 34 | it[i] = vec[i]; \ 35 | return it; \ 36 | } 37 | 38 | //XYZ might be invalid on vecSoa, depending on how many columns are thare 39 | #define DEFINE_SOA_ACCESSOR \ 40 | struct { \ 41 | struct SoaAccessor { \ 42 | T x; \ 43 | T px[Y - 1]; \ 44 | T y; \ 45 | T py[Y - 1]; \ 46 | T z; \ 47 | \ 48 | inline T& operator[](size_t idx) \ 49 | { \ 50 | return (&x)[(int)idx * (int)Y]; \ 51 | } \ 52 | \ 53 | inline auto& Set(SoaAccessor& acc) \ 54 | { \ 55 | for(size_t i = 0; i < X; i++) \ 56 | (*this)[i] = acc[i]; \ 57 | return *this; \ 58 | } \ 59 | \ 60 | SOA_ASIGNMENT(vecb); \ 61 | SOA_ASIGNMENT(vecp); \ 62 | SOA_SCALAR_ASIGNMENT; \ 63 | SOA_VECTOR_CAST(vecb); \ 64 | SOA_VECTOR_CAST(vecp); \ 65 | } acc2; \ 66 | \ 67 | inline auto& operator[](size_t idx) const \ 68 | { \ 69 | return *(SoaAccessor*)(((T*)&acc2)+idx); \ 70 | } \ 71 | } acc; 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /math/vec_funcs.h: -------------------------------------------------------------------------------- 1 | //Be sure to only include in a vector class and have VEC_TYPE defined (it gets automatically undefined) 2 | 3 | #ifdef VEC_TYPE 4 | 5 | constexpr VEC_TYPE() = default; 6 | 7 | template::value>::type> 8 | constexpr VEC_TYPE(F arg) : v() 9 | { 10 | for (size_t i = 0; i < N; i++) 11 | v[i] = (T)arg; 12 | } 13 | 14 | 15 | template::value>::type> 16 | constexpr VEC_TYPE(const F (&args)[SZ]) : v() 17 | { 18 | for (size_t i = 0; i < N; i++) 19 | v[i] = args[i % SZ]; 20 | } 21 | 22 | template::value>::type> 23 | constexpr VEC_TYPE(F... args) : v() 24 | { 25 | constexpr size_t elementCount = sizeof...(args); 26 | for (size_t i = 0; i < N; i++) 27 | v[i] = GetElementAt(i % elementCount, args...); 28 | } 29 | 30 | inline auto& Assign(T val) 31 | { 32 | for (size_t i = 0; i < N; i++) 33 | v[i] = val; 34 | return *this; 35 | } 36 | 37 | 38 | template 39 | constexpr T Dot(const VEC_TYPE& o) const 40 | { 41 | T val = 0; 42 | for (size_t i = 0; i < D; i++) 43 | val += v[i] * o.v[i]; 44 | return val; 45 | } 46 | 47 | template 48 | constexpr T Dot(const T* o) const 49 | { 50 | T val = 0; 51 | for (size_t i = 0; i < D; i++) 52 | val += v[i] * o[i]; 53 | return val; 54 | } 55 | 56 | template 57 | constexpr T LengthSqr() const 58 | { 59 | return Dot(*this); 60 | } 61 | 62 | template 63 | constexpr T Length() const 64 | { 65 | return sqrt(Dot(*this)); 66 | } 67 | 68 | template 69 | inline auto& Sqrt() 70 | { 71 | constexpr size_t Md = D > N ? N : D; 72 | VSqrt(v); 73 | return *this; 74 | } 75 | 76 | template 77 | inline auto& NormalizeAngles(T start, T end) 78 | { 79 | for (size_t i = 0; i < D; i++) 80 | v[i] = std::fmod(std::fmod(v[i] - start, end - start) + (end - start), end - start) + start; 81 | return *this; 82 | } 83 | 84 | constexpr T Dot(const VEC_TYPE& o) const 85 | { 86 | return Dot(o); 87 | } 88 | 89 | constexpr T Dot(const T* o) const 90 | { 91 | return Dot(o); 92 | } 93 | 94 | constexpr T LengthSqr() const 95 | { 96 | return LengthSqr(); 97 | } 98 | 99 | constexpr T Length() const 100 | { 101 | return Length(); 102 | } 103 | 104 | inline auto& Sqrt() 105 | { 106 | return Sqrt(); 107 | } 108 | 109 | inline auto Normalized() const 110 | { 111 | auto val = *this; 112 | float l = val.Length(); 113 | val *= l ? 1 / l : 0; 114 | return val; 115 | } 116 | 117 | inline auto& Normalize() 118 | { 119 | *this = Normalized(); 120 | return *this; 121 | } 122 | 123 | template 124 | inline T DistTo(const VEC_TYPE& o) const 125 | { 126 | return (*this - o).template Length(); 127 | } 128 | 129 | inline T DistTo(const VEC_TYPE& o) const 130 | { 131 | return DistTo(o); 132 | } 133 | 134 | template 135 | constexpr T DistToSqr(const VEC_TYPE& o) const 136 | { 137 | return (*this - o).template LengthSqr(); 138 | } 139 | 140 | constexpr T DistToSqr(const VEC_TYPE& o) const 141 | { 142 | return DistToSqr(o); 143 | } 144 | 145 | inline auto DirToRay(const VEC_TYPE& a, const VEC_TYPE& b) const 146 | { 147 | auto c = *this - a; 148 | auto d = b - a; 149 | 150 | T t = c.Dot(d) / d.LengthSqr(); 151 | 152 | return a + t * d; 153 | } 154 | 155 | inline auto DirToLine(const VEC_TYPE& a, const VEC_TYPE& b) const 156 | { 157 | auto c = *this - a; 158 | auto d = b - a; 159 | 160 | T t = std::clamp(c.Dot(d) / d.LengthSqr(), T(0), T(1)); 161 | 162 | return a + t * d; 163 | } 164 | 165 | constexpr auto GetRight() const 166 | { 167 | if (v[0] == v[1] == 0) 168 | return VEC_TYPE(0, -1, 0); 169 | return this->Cross(VEC_TYPE(0, 0, 1)); 170 | } 171 | 172 | constexpr auto GetUp() const 173 | { 174 | if (v[0] == v[1] == 0) 175 | return VEC_TYPE(-v[2], 0, 0); 176 | return GetRight().Cross(*this); 177 | } 178 | 179 | template 180 | constexpr typename std::enable_if::value, VEC_TYPE>::type 181 | Cross(const VEC_TYPE& o) const 182 | { 183 | VEC_TYPE ret(0); 184 | ret[0] = v[1] * o[2] - v[2] * o[1]; 185 | ret[1] = v[2] * o[0] - v[0] * o[2]; 186 | ret[2] = v[0] * o[1] - v[1] * o[0]; 187 | return ret; 188 | } 189 | 190 | 191 | template 192 | inline typename std::enable_if::value, VEC_TYPE&>::type 193 | ToAngles() 194 | { 195 | T y, x, len; 196 | y = atan2(v[1], v[0]); 197 | 198 | len = Length<2>(); 199 | 200 | x = atan2(-v[2], len); 201 | 202 | v[0] = x; 203 | v[1] = y; 204 | v[2] = 0; 205 | 206 | return *this; 207 | } 208 | 209 | template 210 | inline typename std::enable_if::value, VEC_TYPE>::type 211 | GetAngles(bool toDegrees = false) const 212 | { 213 | auto ret = *this; 214 | ret.ToAngles(); 215 | if (toDegrees) 216 | ret *= RAD2DEG; 217 | return ret; 218 | } 219 | 220 | template 221 | inline typename std::enable_if::value, VEC_TYPE&>::type 222 | GetVectors(VEC_TYPE& __restrict forward, VEC_TYPE& __restrict right, VEC_TYPE& __restrict up, bool fromDegrees = false) 223 | { 224 | const int VP = 0; 225 | const int VY = 1; 226 | const int VR = 2; 227 | 228 | T s[3], c[3]; 229 | 230 | auto it = *this; 231 | if (fromDegrees) 232 | it *= DEG2RAD; 233 | 234 | for (size_t i = 0; i < 3; i++) 235 | s[i] = std::sin(it[i]); 236 | 237 | for (size_t i = 0; i < 3; i++) 238 | c[i] = std::cos(it[i]); 239 | 240 | forward[0] = c[VP] * c[VY]; 241 | forward[1] = c[VP] * s[VY]; 242 | forward[2] = -s[VP]; 243 | 244 | right[0] = -s[VR] * s[VP] * c[VY] + c[VR] * s[VY]; 245 | right[1] = -s[VR] * s[VP] * s[VY] - c[VR] * c[VY]; 246 | right[2] = -s[VR] * c[VP]; 247 | 248 | up[0] = c[VR] * s[VP] * c[VY] + s[VR] * s[VY]; 249 | up[1] = c[VR] * s[VP] * s[VY] - s[VR] * c[VY]; 250 | up[2] = c[VR] * c[VP]; 251 | 252 | return *this; 253 | } 254 | 255 | template 256 | inline typename std::enable_if::value, VEC_TYPE&>::type 257 | Rotate(T angle) 258 | { 259 | T s, c; 260 | s = std::sin(angle); 261 | c = std::cos(angle); 262 | 263 | constexpr size_t iX = (dim + 1) % 3; 264 | constexpr size_t iY = (dim + 2) % 3; 265 | 266 | T xn = v[iX] * c - v[iY] * s; 267 | T yn = v[iX] * s + v[iY] * c; 268 | 269 | v[iX] = xn; 270 | v[iY] = yn; 271 | 272 | return *this; 273 | } 274 | 275 | constexpr auto Min(const VEC_TYPE& ov) 276 | { 277 | VEC_TYPE ret(0); 278 | 279 | for (size_t i = 0; i < N; i++) 280 | ret[i] = ::Min(v[i], ov[i]); 281 | 282 | return ret; 283 | } 284 | 285 | constexpr auto Max(const VEC_TYPE& ov) 286 | { 287 | VEC_TYPE ret(0); 288 | 289 | for (size_t i = 0; i < N; i++) 290 | ret[i] = ::Max(v[i], ov[i]); 291 | 292 | return ret; 293 | } 294 | 295 | constexpr auto MinUp() 296 | { 297 | T ret = std::numeric_limits::max(); 298 | 299 | for (size_t i = 0; i < N; i++) 300 | ret = ::Min(ret, v[i]); 301 | 302 | return ret; 303 | } 304 | 305 | constexpr auto MaxUp() 306 | { 307 | T ret = std::numeric_limits::min(); 308 | 309 | for (size_t i = 0; i < N; i++) 310 | ret = ::Max(ret, v[i]); 311 | 312 | return ret; 313 | } 314 | 315 | constexpr auto Lerp(const VEC_TYPE& ov, float time) 316 | { 317 | return *this + time * (ov - *this); 318 | } 319 | 320 | constexpr auto LerpClamped(const VEC_TYPE& ov, float time) 321 | { 322 | return *this + ::Min(1.f, ::Max(0.f, time)) * (ov - *this); 323 | } 324 | 325 | #undef VEC_TYPE 326 | #endif 327 | -------------------------------------------------------------------------------- /math/vecsoa_funcs.h: -------------------------------------------------------------------------------- 1 | /* 2 | These functions should only be included inside SoA vector structures. 3 | Define SOA_TYPE before including, it will be undefined afterwards. 4 | */ 5 | 6 | 7 | #ifdef SOA_TYPE 8 | 9 | constexpr SOA_TYPE() = default; 10 | 11 | template::value>::type> 12 | constexpr SOA_TYPE(F arg) : v() 13 | { 14 | for (size_t i = 0; i < Xt; i++) 15 | for (size_t o = 0; o < Yt; o++) 16 | v[i][o] = (T)arg; 17 | } 18 | 19 | template::value>::type> 20 | constexpr SOA_TYPE(F... args) : v() 21 | { 22 | constexpr size_t elementCount = sizeof...(args); 23 | 24 | for (size_t i = 0; i < Xt; i++) 25 | for (size_t o = 0; o < Yt; o++) 26 | v[i][o] = GetElementAt(o % elementCount, args...); 27 | } 28 | 29 | template 30 | inline auto& ColumnVec(int col) const 31 | { 32 | return *(vecb*)v[col]; 33 | } 34 | 35 | inline auto& ColumnVec(int col) const 36 | { 37 | return ColumnVec(col); 38 | } 39 | 40 | //Micro-optimized version for 4 sized vector chunks since 41 | //Clang did not want to generate SIMD code on a normal loop 42 | template 43 | inline void AddUpDimSIMD(int dim, F vv[D][Q]) 44 | { 45 | static constexpr size_t Elems = SWidth / (8 * sizeof(F)); 46 | 47 | if (!dim) 48 | return; 49 | 50 | for (size_t i = 0; i < Q / Elems; i++) { 51 | _m a = _mm_loadu(v[dim-1] + i * Elems); 52 | _m b = _mm_loadu(vv[dim] + i * Elems); 53 | a = _mm_add(a, b); 54 | _mm_storeu(vv[dim-1] + i * Elems, a); 55 | } 56 | 57 | AddUpDimSIMD(--dim, vv); 58 | } 59 | 60 | 61 | template 62 | inline typename std::enable_if::value, void>::type AddUpDim(int dim, F vv[D][Q]) 63 | { 64 | AddUpDimSIMD<128, F, Q, D>(dim, vv); 65 | } 66 | 67 | template 68 | inline typename std::enable_if::value, void>::type AddUpDim(int dim, F vv[D][Q]) 69 | { 70 | AddUpDimSIMD<256, F, Q, D>(dim, vv); 71 | } 72 | 73 | template 74 | inline typename std::enable_if::value, void>::type AddUpDim(int dim, F vv[D][Q]) 75 | { 76 | AddUpDimSIMD<512, F, Q, D>(dim, vv); 77 | } 78 | 79 | template 80 | inline typename std::enable_if::value, void>::type AddUpDim(int dim, F vv[X][Q]) 81 | { 82 | if (!dim) 83 | return; 84 | 85 | for(; dim > 0; dim--) 86 | for (size_t o = 0; o < Y; o++) 87 | vv[dim-1][o] = v[dim-1][o] + vv[dim][o]; 88 | } 89 | 90 | template 91 | inline auto& AddUp() 92 | { 93 | AddUpDim(D-1, v); 94 | return *this; 95 | } 96 | 97 | template 98 | inline auto& AddUpTotal() 99 | { 100 | AddUpDim(D-1, v); 101 | 102 | for (size_t i = D - 1; i > 0; i--) 103 | v[0][i - 1] += v[0][i]; 104 | 105 | return *this; 106 | } 107 | 108 | template 109 | inline T AddedUpTotal() 110 | { 111 | T temp[D][Y]; 112 | 113 | for (size_t o = 0; o < Y; o++) 114 | temp[D - 1][o] = 0; 115 | 116 | AddUpDim(D - 1, temp); 117 | 118 | for (size_t i = Y - 1; i > 0; i--) 119 | temp[0][i - 1] += temp[0][i]; 120 | 121 | return temp[0][0]; 122 | } 123 | 124 | inline T AddedUpTotal() 125 | { 126 | return AddedUpTotal(); 127 | } 128 | 129 | //Constant array functions 130 | template 131 | inline void Dot(const F& ov, T val[Y2]) const 132 | { 133 | SOA_TYPE nv = *this * ov; 134 | nv.AddUp(); 135 | 136 | for (size_t i = 0; i < Y2; i++) 137 | val[i] = nv[0][i]; 138 | } 139 | 140 | template 141 | inline void Dot(const SOA_TYPE& ov, T val[Y]) const 142 | { 143 | Dot(ov, val); 144 | } 145 | 146 | template 147 | inline void LengthSqr(T val[Y]) const 148 | { 149 | Dot(*this, val); 150 | } 151 | 152 | template 153 | inline void Length(T val[Y]) const 154 | { 155 | Dot(*this, val); 156 | VSqrt(val); 157 | } 158 | 159 | template 160 | inline auto& Sqrt() 161 | { 162 | for (size_t i = 0; i < D; i++) 163 | VSqrt(v[i]); 164 | return *this; 165 | } 166 | 167 | template 168 | inline void Dot(const F& o, T val[Y2]) const 169 | { 170 | Dot(o, val); 171 | } 172 | 173 | inline void Dot(const SOA_TYPE& o, T val[Y]) const 174 | { 175 | Dot(o, val); 176 | } 177 | 178 | inline void LengthSqr(T val[Y]) const 179 | { 180 | LengthSqr(val); 181 | } 182 | 183 | inline void Length(T val[Y]) const 184 | { 185 | Length(val); 186 | } 187 | 188 | inline auto& Sqrt() 189 | { 190 | return Sqrt(); 191 | } 192 | 193 | template 194 | inline void DistTo(const SOA_TYPE& o, T val[Y]) const 195 | { 196 | (*this - o).template Length(val); 197 | } 198 | 199 | inline void DistTo(const SOA_TYPE& o, T val[Y]) const 200 | { 201 | DistTo(o, val); 202 | } 203 | 204 | //Pointer returning functions 205 | template 206 | inline const T* Dot(const SOA_TYPE& ov) const 207 | { 208 | T val[Y]; 209 | Dot(ov, val); 210 | return val; 211 | } 212 | 213 | template 214 | inline const T* LengthSqr() const 215 | { 216 | return Dot(*this); 217 | } 218 | 219 | template 220 | inline const T* Length() const 221 | { 222 | T* val = Dot(*this); 223 | VSqrt(val); 224 | return val; 225 | } 226 | 227 | inline const T* Dot(const SOA_TYPE& o) const 228 | { 229 | return Dot(o); 230 | } 231 | 232 | inline const T* LengthSqr() const 233 | { 234 | return LengthSqr(); 235 | } 236 | 237 | inline const T* Length() const 238 | { 239 | return Length(); 240 | } 241 | 242 | constexpr auto Abs() const 243 | { 244 | auto ret = *this; 245 | 246 | for (size_t i = 0; i < X; i++) 247 | for (size_t o = 0; o < Y; o++) 248 | ret[i][o] = std::abs(ret[i][o]); 249 | 250 | return ret; 251 | } 252 | 253 | template 254 | inline const T* DistTo(const SOA_TYPE& o) const 255 | { 256 | return (*this - o).template Length(); 257 | } 258 | 259 | inline const T* DistTo(const SOA_TYPE& o) const 260 | { 261 | return DistTo(o); 262 | } 263 | 264 | inline auto DirToRay(const SOA_TYPE& a, const SOA_TYPE& b) const 265 | { 266 | auto c = *this - a; 267 | auto d = b - a; 268 | 269 | T t[Y], ls[Y]; 270 | c.Dot(d, t); 271 | d.LengthSqr(ls); 272 | 273 | for (int i = 0; i < Y; i++) 274 | t[i] = t[i] / ls[i]; 275 | 276 | return a + d * t; 277 | } 278 | 279 | inline auto DirToLine(const SOA_TYPE& a, const SOA_TYPE& b) const 280 | { 281 | auto c = *this - a; 282 | auto d = b - a; 283 | 284 | T t[Y], ls[Y]; 285 | c.Dot(d, t); 286 | d.LengthSqr(ls); 287 | 288 | for (int i = 0; i < Y; i++) 289 | t[i] = std::clamp(t[i] / ls[i], T(0), T(1)); 290 | 291 | return a + d * t; 292 | } 293 | 294 | inline auto Normalized() const 295 | { 296 | auto val = *this; 297 | float l[Y]; 298 | val.Length(l); 299 | for (size_t i = 0; i < Y; i++) 300 | l[i] = l[i] ? 1.f / l[i] : (T)0; 301 | val *= l; 302 | return val; 303 | } 304 | 305 | inline void Normalize() 306 | { 307 | *this = Normalized(); 308 | } 309 | 310 | template 311 | inline void TransformInPlace(const Q* inp) 312 | { 313 | Q dot[Y]; 314 | for (size_t o = 0; o < Y; o++) 315 | for (size_t i = 0; i < X; i++) 316 | v[o][i] = Dot(inp[o].vec[i]) + inp[o].vec[i][3]; 317 | } 318 | 319 | template 320 | inline auto Transform(const Q* inp) const 321 | { 322 | auto ret = *this; 323 | ret.TransformInPlace(inp); 324 | return ret; 325 | } 326 | 327 | constexpr auto& AssignRow(int row, const vecb& vec) 328 | { 329 | for (size_t i = 0; i < X; i++) 330 | v[i][row] = vec[i]; 331 | 332 | return *this; 333 | } 334 | 335 | constexpr auto& AddRow(int row, const vecb& vec) 336 | { 337 | for (size_t i = 0; i < X; i++) 338 | v[i][row] += vec[i]; 339 | 340 | return *this; 341 | } 342 | 343 | constexpr auto& AddRow(int row, const vecp& vec) 344 | { 345 | return AddRow(row, *(vecb*)&vec); 346 | } 347 | 348 | constexpr auto& AddRow(int row, T val) 349 | { 350 | for (size_t i = 0; i < X; i++) 351 | v[i][row] += val; 352 | } 353 | 354 | constexpr auto& AddRow(int row, const T* val) 355 | { 356 | for (size_t i = 0; i < X; i++) 357 | v[i][row] *= val[i]; 358 | } 359 | 360 | constexpr auto& AssignCol(int col, const vecb& vec) 361 | { 362 | for (size_t i = 0; i < Y; i++) 363 | v[col][i] = vec[i]; 364 | 365 | return *this; 366 | } 367 | 368 | constexpr auto& AssignCol(int col, const vecp& vec) 369 | { 370 | return AssignCol(col, (const vecb&)vec); 371 | } 372 | 373 | constexpr auto& AssignCol(int col, T val) 374 | { 375 | vecb vec(val); 376 | return AssignCol(col, vec); 377 | } 378 | 379 | 380 | constexpr auto& MulCol(int col, const vecb& vec) 381 | { 382 | for (size_t i = 0; i < Y; i++) 383 | v[col][i] *= vec[i]; 384 | 385 | return *this; 386 | } 387 | 388 | constexpr auto& MulCol(int col, const vecp& vec) 389 | { 390 | return MulCol(col, (vecb)vec); 391 | } 392 | 393 | constexpr auto& MulCol(int col, T val) 394 | { 395 | for (int i = 0; i < Y; i++) 396 | v[col][i] *= val; 397 | } 398 | 399 | constexpr auto& MulCol(int col, const T* val) 400 | { 401 | for (int i = 0; i < Y; i++) 402 | v[col][i] *= val[i]; 403 | } 404 | 405 | 406 | constexpr auto& AddCol(int col, const vecb& vec) 407 | { 408 | for (size_t i = 0; i < Y; i++) 409 | v[col][i] += vec[i]; 410 | 411 | return *this; 412 | } 413 | 414 | constexpr auto& AddCol(int col, const vecp& vec) 415 | { 416 | return AddCol(col, (vecb)vec); 417 | } 418 | 419 | constexpr auto& AddCol(int col, T val) 420 | { 421 | for (int i = 0; i < Y; i++) 422 | v[col][i] += val; 423 | } 424 | 425 | constexpr auto& AddCol(int col, const T* val) 426 | { 427 | for (int i = 0; i < Y; i++) 428 | v[col][i] += val[i]; 429 | } 430 | 431 | constexpr auto GetColAsVecb(int col) 432 | { 433 | return *(vecb*)v[col]; 434 | } 435 | 436 | constexpr auto GetColAsVecp(int col) 437 | { 438 | return vecp(v[col]); 439 | } 440 | 441 | 442 | template 443 | constexpr typename std::enable_if::value, SOA_TYPE>::type 444 | Cross(const SOA_TYPE& o) const 445 | { 446 | SOA_TYPE ret(0); 447 | 448 | for (size_t i = 0; i < Yt; i++) { 449 | ret[0][i] = v[1][i] * o[2][i] - v[2][i] * o[1][i]; 450 | ret[1][i] = v[2][i] * o[0][i] - v[0][i] * o[2][i]; 451 | ret[2][i] = v[0][i] * o[1][i] - v[1][i] * o[0][i]; 452 | } 453 | return ret; 454 | } 455 | 456 | 457 | 458 | constexpr auto Rotate() const 459 | { 460 | vecSoa ret(0); 461 | 462 | for (size_t i = 0; i < X; i++) 463 | for (size_t o = 0; o < Y; o++) 464 | ret[o][i] = v[i][o]; 465 | 466 | return ret; 467 | } 468 | 469 | constexpr auto Min(const SOA_TYPE& ov) 470 | { 471 | SOA_TYPE ret(0); 472 | 473 | for (size_t i = 0; i < Xt; i++) 474 | for (size_t o = 0; o < Yt; o++) 475 | ret[i][o] = ::Min(v[i][o], ov[i][o]); 476 | 477 | return ret; 478 | } 479 | 480 | constexpr auto Max(const SOA_TYPE& ov) 481 | { 482 | SOA_TYPE ret(0); 483 | 484 | for (size_t i = 0; i < Xt; i++) 485 | for (size_t o = 0; o < Yt; o++) 486 | ret[i][o] = ::Min(v[i][o], ov[i][o]); 487 | 488 | return ret; 489 | } 490 | 491 | constexpr auto MinUp() 492 | { 493 | vecb ret(std::numeric_limits::max()); 494 | 495 | for(size_t o = 0; o < Xt; o++) 496 | for (size_t i = 0; i < Yt; i++) 497 | ret[i] = ::Min(ret[i], v[o][i]); 498 | 499 | return ret; 500 | } 501 | 502 | constexpr auto MaxUp() 503 | { 504 | vecb ret(std::numeric_limits::min()); 505 | 506 | for(size_t o = 0; o < Xt; o++) 507 | for (size_t i = 0; i < Yt; i++) 508 | ret[i] = ::Max(ret[i], v[o][i]); 509 | 510 | return ret; 511 | } 512 | 513 | constexpr auto Lerp(const SOA_TYPE& ov, float time) 514 | { 515 | return *this + time * (ov - *this); 516 | } 517 | 518 | constexpr auto LerpClamped(const SOA_TYPE& ov, float time) 519 | { 520 | return *this + ::Min(1.f, ::Max(0.f, time)) * (ov - *this); 521 | } 522 | 523 | #undef SOA_TYPE 524 | #endif 525 | -------------------------------------------------------------------------------- /math/vector.h: -------------------------------------------------------------------------------- 1 | #ifndef VECTOR_H 2 | #define VECTOR_H 3 | 4 | /* 5 | * This vector library is focuesed about high efficiency 6 | * by laying out the data in a very easily vectorizable way. 7 | * A compiler that does automatic SIMD code generation should 8 | * be able to work more easily on this layout. 9 | * 10 | * Clang does a really good job at generating SIMD code, while 11 | * MSVC does not always work. Some more tricky parts, such as 12 | * square root calculation have manual "hand written" SSE/AVX 13 | * implementations, while addition, multiplication, etc. usually 14 | * have correct code generation. 15 | * 16 | * All the code logic should focus around the data layout of the 17 | * SOA vectors, even though accessors (in vec3soa case) are implemented 18 | * in case you need to conveniently access all dimensions in one go. 19 | * The data is laid out like this for a reason. 20 | */ 21 | 22 | #include "stddef.h" 23 | #include "mmath.h" 24 | #include "math.h" 25 | 26 | #include "vector_operators.h" 27 | #include "soa_accessor.h" 28 | #include 29 | #include 30 | 31 | template 32 | inline void VSqrt(T val[Q]) 33 | { 34 | for (size_t i = 0; i < Q; i++) 35 | val[i] = sqrt(val[i]); 36 | } 37 | 38 | #if PSIMD >= 4 39 | template<> 40 | inline void VSqrt(float val[4]) 41 | { 42 | __m128 x = _mm_loadu_ps(val); 43 | x = _mm_sqrt_ps(x); 44 | _mm_storeu_ps(val, x); 45 | } 46 | #endif 47 | 48 | #if PSIMD >= 8 49 | template<> 50 | inline void VSqrt(float val[8]) 51 | { 52 | __m256 x = _mm256_loadu_ps(val); 53 | x = _mm256_sqrt_ps(x); 54 | _mm256_storeu_ps(val, x); 55 | } 56 | #endif 57 | 58 | #if PSIMD >= 16 59 | template<> 60 | inline void VSqrt(float val[16]) 61 | { 62 | __m512 x = _mm512_loadu_ps(val); 63 | x = _mm512_sqrt_ps(x); 64 | _mm512_storeu_ps(val, x); 65 | } 66 | #endif 67 | 68 | template 69 | struct vecp; 70 | 71 | template 72 | struct vec3soa; 73 | 74 | template 75 | struct vecSoa; 76 | 77 | template 78 | struct matrix; 79 | 80 | template 81 | struct vecb_accessor 82 | { 83 | T x, y, z, w; 84 | }; 85 | 86 | template 87 | struct vecb_accessor 88 | { 89 | T x; 90 | }; 91 | 92 | template 93 | struct vecb_accessor 94 | { 95 | T x, y; 96 | }; 97 | 98 | template 99 | struct vecb_accessor 100 | { 101 | T x, y, z; 102 | }; 103 | 104 | template 105 | struct vecb 106 | { 107 | using value_type = T; 108 | static constexpr size_t Yt = N; 109 | T v[N]; 110 | 111 | DEFINE_VEC_OPS(vecb); 112 | 113 | #define VEC_TYPE vecb 114 | #include "vec_funcs.h" 115 | 116 | constexpr bool operator==(const vecb& o) 117 | { 118 | for (size_t i = 0; i < N; i++) 119 | if (v[i] != o.v[i]) 120 | return false; 121 | return true; 122 | } 123 | 124 | constexpr bool operator!=(const vecb& o) 125 | { 126 | return !operator==(o); 127 | } 128 | 129 | constexpr T& operator[](size_t idx) 130 | { 131 | return v[idx]; 132 | } 133 | 134 | constexpr const T& operator[](size_t idx) const 135 | { 136 | return v[idx]; 137 | } 138 | 139 | inline vecb_accessor* operator->() 140 | { 141 | return (vecb_accessor*)v; 142 | } 143 | 144 | inline const vecb_accessor* operator->() const 145 | { 146 | return (const vecb_accessor*)v; 147 | } 148 | 149 | template 150 | constexpr operator vecp() const 151 | { 152 | constexpr size_t mv = B < 4 ? B : 4; 153 | vecp vec = {}; 154 | for (size_t i = 0; i < mv; i++) 155 | vec[i] = v[i]; 156 | return vec; 157 | } 158 | 159 | template 160 | constexpr operator vec3soa() const 161 | { 162 | vec3soa ret = {}; 163 | for (size_t i = 0; i < 3; i++) 164 | for (size_t o = 0; o < B; o++) 165 | ret[i][o] = v[i]; 166 | return ret; 167 | } 168 | 169 | }; 170 | 171 | template 172 | struct vecp 173 | { 174 | using value_type = T; 175 | static constexpr size_t Yt = N; 176 | 177 | union { 178 | struct { 179 | float x, y, z, w; 180 | }; 181 | T v[4]; 182 | }; 183 | 184 | DEFINE_VEC_OPS(vecp); 185 | 186 | #define VEC_TYPE vecp 187 | #include "vec_funcs.h" 188 | 189 | 190 | constexpr bool operator==(const vecp& o) 191 | { 192 | for (size_t i = 0; i < N; i++) 193 | if (v[i] != o.v[i]) 194 | return false; 195 | return true; 196 | } 197 | 198 | constexpr bool operator!=(const vecp& o) 199 | { 200 | return !operator==(o); 201 | } 202 | 203 | constexpr T& operator[](size_t idx) 204 | { 205 | return v[idx]; 206 | } 207 | 208 | constexpr const T& operator[](size_t idx) const 209 | { 210 | return v[idx]; 211 | } 212 | 213 | template 214 | constexpr auto& operator=(const vecb& vec) 215 | { 216 | constexpr size_t mv = B < 4 ? B : 4; 217 | for (size_t i = 0; i < mv; i++) 218 | v[i] = vec[i]; 219 | return *this; 220 | } 221 | 222 | template 223 | constexpr operator vecb() const 224 | { 225 | constexpr size_t mv = B < 4 ? B : 4; 226 | vecb vec = {}; 227 | for (size_t i = 0; i < mv; i++) 228 | vec[i] = v[i]; 229 | return vec; 230 | } 231 | 232 | template 233 | constexpr operator vec3soa() const 234 | { 235 | vec3soa ret = {}; 236 | for (size_t i = 0; i < 3; i++) 237 | for (size_t o = 0; o < B; o++) 238 | ret[i][o] = v[i]; 239 | return ret; 240 | } 241 | }; 242 | 243 | template 244 | struct vec3soa 245 | { 246 | using value_type = T; 247 | static constexpr size_t X = 3; 248 | static constexpr size_t Xt = X; 249 | static constexpr size_t Yt = Y; 250 | union { 251 | struct { 252 | T x[Y]; 253 | T y[Y]; 254 | T z[Y]; 255 | }; 256 | T v[X][Y]; 257 | DEFINE_SOA_ACCESSOR; 258 | }; 259 | 260 | DEFINE_SOA_OPS(vec3soa); 261 | DEFINE_SOA_VEC_OPS(vec3soa); 262 | 263 | #define SOA_TYPE vec3soa 264 | #include "vecsoa_funcs.h" 265 | 266 | constexpr bool operator==(const vec3soa& ov) const 267 | { 268 | for (size_t i = 0; i < X; i++) 269 | for (size_t o = 0; o < Y; o++) 270 | if (v[i][o] != ov.v[i][o]) 271 | return false; 272 | return true; 273 | } 274 | 275 | constexpr bool operator!=(const vec3soa& o) const 276 | { 277 | return !operator==(o); 278 | } 279 | 280 | inline void ToAngles() 281 | { 282 | T y[Y], x[Y], len[Y]; 283 | for (size_t o = 0; o < Y; o++) 284 | y[o] = atan2(v[1][o], v[0][o]); 285 | 286 | Length<2>(len); 287 | 288 | for (size_t o = 0; o < Y; o++) 289 | x[o] = atan2(-v[1][o], len[o]); 290 | 291 | for (size_t o = 0; o < Y; o++) 292 | v[0][o] = x[o]; 293 | 294 | for (size_t o = 0; o < Y; o++) 295 | v[1][o] = y[o]; 296 | 297 | for (size_t o = 0; o < Y; o++) 298 | v[2][o] = 0; 299 | 300 | } 301 | 302 | constexpr auto GetAngles() 303 | { 304 | auto ret = *this; 305 | ret.ToAngles(); 306 | return ret; 307 | } 308 | 309 | constexpr T* operator[](size_t idx) 310 | { 311 | return v[idx]; 312 | } 313 | 314 | constexpr const T* operator[](size_t idx) const 315 | { 316 | return v[idx]; 317 | } 318 | 319 | }; 320 | 321 | template 322 | struct vecSoa 323 | { 324 | using value_type = T; 325 | static constexpr size_t Xt = X; 326 | static constexpr size_t Yt = Y; 327 | union 328 | { 329 | T v[X][Y]; 330 | DEFINE_SOA_ACCESSOR; 331 | }; 332 | 333 | DEFINE_SOA_OPS(vecSoa); 334 | DEFINE_SOA_VEC_OPS(vecSoa); 335 | 336 | #define SOA_TYPE vecSoa 337 | #include "vecsoa_funcs.h" 338 | 339 | constexpr bool operator==(const vecSoa& ov) const 340 | { 341 | for (size_t i = 0; i < X; i++) 342 | for (size_t o = 0; o < Y; o++) 343 | if (v[i][o] != ov.v[i][o]) 344 | return false; 345 | return true; 346 | } 347 | 348 | constexpr bool operator!=(const vecSoa& o) const 349 | { 350 | return !operator==(o); 351 | } 352 | 353 | constexpr T* operator[](size_t idx) 354 | { 355 | return v[idx]; 356 | } 357 | 358 | constexpr const T* operator[](size_t idx) const 359 | { 360 | return v[idx]; 361 | } 362 | 363 | template 364 | constexpr operator vec3soa() 365 | { 366 | constexpr int mv = X < 3 ? X : 3; 367 | constexpr int mb = Y < B ? Y : B; 368 | vec3soa ret = {}; 369 | for (size_t i = 0; i < mv; i++) 370 | for (size_t o = 0; o < mb; o++) 371 | ret[i][o] = v[i][o]; 372 | return ret; 373 | } 374 | }; 375 | 376 | template 377 | using vec = vecb; 378 | using vec2 = vec<2>; 379 | using vec3 = vec<3>; 380 | using vec4 = vec<4>; 381 | 382 | using vec3_t = vecp; 383 | using vec4_t = vecp; 384 | 385 | using xvec3 = vec3soa; 386 | using yvec3 = vec3soa; 387 | using zvec3 = vec3soa; 388 | using nvec3 = vec3soa; 389 | template 390 | using svec3 = vec3soa; 391 | 392 | template 393 | using xvec = vecSoa; 394 | template 395 | using yvec = vecSoa; 396 | template 397 | using zvec = vecSoa; 398 | template 399 | using nvec = vecSoa; 400 | 401 | static_assert(std::is_pod>::value); 402 | 403 | template 404 | using veci = vecb; 405 | 406 | #include "matrix.h" 407 | 408 | #endif 409 | -------------------------------------------------------------------------------- /math/vector_operators.h: -------------------------------------------------------------------------------- 1 | #ifndef VECTOR_OPERATORS_H 2 | 3 | #define VEC_OP(type, OP) \ 4 | template