├── Arrays.cpp ├── CMakeLists.txt ├── Chunks.cpp ├── Common.cpp ├── Common.h ├── Core ├── Func.h ├── Memory.cpp ├── Memory.h ├── Slice.cpp ├── Slice.h ├── UniquePtr.h ├── Utils.cpp ├── Utils.h └── Vector.h ├── Math ├── Frustum.cpp ├── Frustum.h ├── Mat.cpp ├── Mat.h ├── Plane.cpp ├── Plane.h ├── Quat.cpp ├── Quat.h ├── Sphere.cpp ├── Sphere.h ├── Transform.cpp ├── Transform.h ├── Utils.h └── Vec.h ├── README.md ├── SSECulling.cpp ├── Timer.cpp └── Timer.h /Arrays.cpp: -------------------------------------------------------------------------------- 1 | #include "Common.h" 2 | #include "Core/Vector.h" 3 | #include "Math/Sphere.h" 4 | #include "Math/Frustum.h" 5 | #include 6 | #include 7 | #include 8 | 9 | struct Data { 10 | Vector spheres = Vector(&sse_allocator); 11 | Vector results; 12 | 13 | // Maps 3d position (offset_3d(Vec3i(x, y, z), Vec3i(data_size)) to actual 14 | // sphere position. 15 | Vector mapping; 16 | }; 17 | 18 | static Data generate_data(DataType data_type, const Config &config) 19 | { 20 | Data data; 21 | const int half_size = config.data_size/2; 22 | for (int z = 0; z < config.data_size; z++) { 23 | for (int y = 0; y < config.data_size; y++) { 24 | for (int x = 0; x < config.data_size; x++) { 25 | const Vec3i p = (Vec3i(x, y, z) - Vec3i(half_size)) * Vec3i(2); 26 | data.spheres.pappend(ToVec3f(p), 1.0f); 27 | }}} 28 | 29 | for (int i = 0; i < data.spheres.length(); i++) 30 | data.mapping.append(i); 31 | 32 | data.results.resize((data.spheres.length() + 31) / 32); 33 | fill(data.results, 0); 34 | 35 | // If random data is requested, shuffle the mapping and move the spheres. 36 | if (data_type == Random) { 37 | auto seed = std::chrono::system_clock::now().time_since_epoch().count(); 38 | std::shuffle(data.mapping.data(), data.mapping.data() + data.mapping.length(), 39 | std::default_random_engine(seed)); 40 | Vector spheres_tmp(&sse_allocator); 41 | spheres_tmp.resize(data.spheres.length()); 42 | for (int i = 0; i < data.spheres.length(); i++) { 43 | spheres_tmp[data.mapping[i]] = data.spheres[i]; 44 | } 45 | data.spheres = std::move(spheres_tmp); 46 | } 47 | return data; 48 | } 49 | 50 | static Vector get_results(const Data &data) 51 | { 52 | Vector out(data.results.length()); 53 | fill(out, 0); 54 | for (int i = 0, n = data.spheres.length(); i < n; i++) { 55 | const int i2 = data.mapping[i]; 56 | const int ri1 = i / 32; 57 | const int shift1 = i % 32; 58 | const int ri2 = i2 / 32; 59 | const int shift2 = i2 % 32; 60 | const uint32_t result = (data.results[ri2] & (1U << shift2)) != 0; 61 | out[ri1] |= (result & 1) << shift1; 62 | } 63 | return out; 64 | } 65 | 66 | void do_arrays(const Config &config) 67 | { 68 | const Frustum f = Frustum_Perspective(75.0f, 1.333f, 0.5f, 100.0f); 69 | Data data; 70 | 71 | data = generate_data(Structured, config); 72 | measure([&]{ naive_cull(data.results, data.spheres, f); }, 50, 10, "Naive culling / structured data", config); 73 | print_results(get_results(data), config); 74 | 75 | data = generate_data(Random, config); 76 | measure([&]{ naive_cull(data.results, data.spheres, f); }, 50, 10, "Naive culling / random data", config); 77 | print_results(get_results(data), config); 78 | 79 | data = generate_data(Structured, config); 80 | measure([&]{ sse_cull(data.results, data.spheres, f); }, 50, 10, "SSE culling / structured data", config); 81 | print_results(get_results(data), config); 82 | 83 | data = generate_data(Random, config); 84 | measure([&]{ sse_cull(data.results, data.spheres, f); }, 50, 10, "SSE culling / random data", config); 85 | print_results(get_results(data), config); 86 | } 87 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(SSECULLING) 2 | cmake_minimum_required(VERSION 2.8) 3 | 4 | if (NOT WIN32) 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra -Wno-unused-parameter") 6 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions -fno-rtti") 7 | endif() 8 | 9 | set(PROJECT_INCLUDES ${CMAKE_SOURCE_DIR}) 10 | 11 | function(add_source_subdir NAME) 12 | file(GLOB SOURCES "${NAME}/*.cpp" "${NAME}/*.h") 13 | list(APPEND PROJECT_SOURCES ${SOURCES}) 14 | set(PROJECT_SOURCES "${PROJECT_SOURCES}" PARENT_SCOPE) 15 | endfunction() 16 | 17 | add_source_subdir(Core) 18 | add_source_subdir(Math) 19 | add_source_subdir(.) 20 | include_directories(${PROJECT_INCLUDES}) 21 | add_executable(sseculling ${PROJECT_SOURCES}) 22 | -------------------------------------------------------------------------------- /Chunks.cpp: -------------------------------------------------------------------------------- 1 | #include "Common.h" 2 | #include "Core/Vector.h" 3 | #include "Core/UniquePtr.h" 4 | #include "Math/Sphere.h" 5 | #include "Math/Frustum.h" 6 | #include 7 | #include 8 | #include 9 | 10 | struct Chunk { 11 | Vector spheres = Vector(&sse_allocator); 12 | Vector results; 13 | 14 | Chunk(int max) 15 | { 16 | spheres.reserve(max); 17 | results.resize(max + 31 / 32); 18 | fill(results, 0); 19 | } 20 | }; 21 | 22 | struct Data { 23 | Vector> chunks_ordered; 24 | Vector chunks; 25 | }; 26 | 27 | static Data generate_data(DataType data_type, const Config &config, int max) 28 | { 29 | Data data; 30 | data.chunks_ordered.pappend(new (OrDie) Chunk(max)); 31 | Chunk *c = data.chunks_ordered.last().get(); 32 | const int half_size = config.data_size/2; 33 | for (int z = 0; z < config.data_size; z++) { 34 | for (int y = 0; y < config.data_size; y++) { 35 | for (int x = 0; x < config.data_size; x++) { 36 | const Vec3i p = (Vec3i(x, y, z) - Vec3i(half_size)) * Vec3i(2); 37 | c->spheres.pappend(ToVec3f(p), 1.0f); 38 | if (c->spheres.length() == max) { 39 | data.chunks_ordered.pappend(new (OrDie) Chunk(max)); 40 | c = data.chunks_ordered.last().get(); 41 | } 42 | }}} 43 | 44 | if (data.chunks_ordered.last()->spheres.length() == 0) 45 | data.chunks_ordered.resize(data.chunks_ordered.length() - 1); 46 | 47 | data.chunks.resize(data.chunks_ordered.length()); 48 | for (int i = 0; i < data.chunks.length(); i++) { 49 | data.chunks[i] = data.chunks_ordered[i].get(); 50 | } 51 | 52 | if (data_type == Random) { 53 | auto seed = std::chrono::system_clock::now().time_since_epoch().count(); 54 | std::shuffle(data.chunks.data(), data.chunks.data() + data.chunks.length(), 55 | std::default_random_engine(seed)); 56 | } 57 | return data; 58 | } 59 | 60 | static Vector get_results(const Data &data) 61 | { 62 | int count = 0; 63 | for (const auto &c : data.chunks_ordered) 64 | count += c->spheres.length(); 65 | 66 | Vector out(count + 31 / 32); 67 | fill(out, 0); 68 | int out_i = 0; 69 | for (const auto &c : data.chunks_ordered) { 70 | for (int i = 0, n = c->spheres.length(); i < n; i++) { 71 | const int ri = i / 32; 72 | const int shift = i % 32; 73 | const int out_ri = out_i / 32; 74 | const int out_shift = out_i % 32; 75 | const uint32_t result = (c->results[ri] & (1U << shift)) != 0; 76 | out[out_ri] |= (result & 1) << out_shift; 77 | out_i++; 78 | } 79 | } 80 | return out; 81 | } 82 | 83 | static void sse_cull_data(Data *data, const Frustum &f) 84 | { 85 | for (const auto &c : data->chunks) 86 | sse_cull(c->results, c->spheres, f); 87 | } 88 | 89 | static void sse_cull_data_prefetch(Data *data, const Frustum &f) 90 | { 91 | for (int i = 0, n = data->chunks.length(); i < n; i++) { 92 | if (i != n-1) { 93 | // Tried all hints there, NTA works best for very fragmented data. 94 | _mm_prefetch(reinterpret_cast(data->chunks.data()[i+1]->spheres.data()), _MM_HINT_NTA); 95 | _mm_prefetch(reinterpret_cast(data->chunks.data()[i+1]->results.data()), _MM_HINT_NTA); 96 | } 97 | const auto &c = data->chunks.data()[i]; 98 | sse_cull(c->results, c->spheres, f); 99 | } 100 | } 101 | 102 | void do_chunks(const Config &config) 103 | { 104 | const Frustum f = Frustum_Perspective(75.0f, 1.333f, 0.5f, 100.0f); 105 | Data data; 106 | 107 | auto just_do_it = [&](int N) { 108 | char buf[4096]; 109 | snprintf(buf, sizeof(buf), "%03d", N); 110 | 111 | snprintf(buf, sizeof(buf), "SSE culling / chunks / structured data / %3d per chunk (w/o prefetch)", N); 112 | data = generate_data(Structured, config, N); 113 | measure([&]{ sse_cull_data(&data, f); }, 50, 10, buf, config); 114 | print_results(get_results(data), config); 115 | 116 | snprintf(buf, sizeof(buf), "SSE culling / chunks / random data / %3d per chunk (w/o prefetch)", N); 117 | data = generate_data(Random, config, N); 118 | measure([&]{ sse_cull_data(&data, f); }, 50, 10, buf, config); 119 | print_results(get_results(data), config); 120 | 121 | snprintf(buf, sizeof(buf), "SSE culling / chunks / random data / %3d per chunk (with prefetch)", N); 122 | data = generate_data(Random, config, N); 123 | measure([&]{ sse_cull_data_prefetch(&data, f); }, 50, 10, buf, config); 124 | print_results(get_results(data), config); 125 | }; 126 | 127 | const int tries[] = {512, 256, 128, 64, 32, 8}; 128 | for (int t : tries) { 129 | printf("----------------------------------------\n"); 130 | just_do_it(t); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /Common.cpp: -------------------------------------------------------------------------------- 1 | #include "Common.h" 2 | #include "Timer.h" 3 | #include "Core/Vector.h" 4 | #include 5 | 6 | void parse_args(Config *config, int argc, char **argv) 7 | { 8 | for (int i = 1; i < argc; i++) { 9 | const char *arg = argv[i]; 10 | if (strcmp(arg, "-v") == 0) { 11 | config->verbose = true; 12 | } else if (strcmp(arg, "-s") == 0) { 13 | config->data_size = atoi(argv[++i]); 14 | } 15 | } 16 | } 17 | 18 | void print_results(Slice bits, const Config &config) 19 | { 20 | if (!config.verbose) 21 | return; 22 | 23 | const int size = config.data_size; 24 | const int mid = size / 2; 25 | for (int z = 0; z < size; z++) { 26 | for (int x = 0; x < size; x++) { 27 | const int i = offset_3d(Vec3i(x, mid, z), Vec3i(size)); 28 | const int ri = i / 32; 29 | const int shift = i % 32; 30 | if (bits[ri] & (1U << shift)) { 31 | printf(". "); 32 | } else { 33 | printf("# "); 34 | } 35 | } 36 | printf("\n"); 37 | } 38 | } 39 | 40 | void measure(Func f, int warmup, int runs, const char *name, const Config &config) 41 | { 42 | Vector results(runs); 43 | for (int i = 0; i < warmup; i++) { 44 | f(); 45 | } 46 | double average = get_time_milliseconds(); 47 | for (int i = 0; i < runs; i++) { 48 | const double begin = get_time_milliseconds(); 49 | f(); 50 | results[i] = get_time_milliseconds() - begin; 51 | } 52 | average = (get_time_milliseconds() - average) / runs; 53 | printf("'%s' done in %d runs, average: %fms\n", name, runs, average); 54 | if (config.verbose) { 55 | printf("per-run info:\n"); 56 | for (int i = 0; i < runs; i++) 57 | printf(" [%d] %fms\n", i, results[i]); 58 | } 59 | } 60 | 61 | void naive_cull(Slice results, Slice spheres, const Frustum &f) 62 | { 63 | for (int i = 0, n = spheres.length; i < n; i++) { 64 | const Sphere &s = spheres.data[i]; 65 | const uint32_t result = f.cull(s) & 1; 66 | const int ri = i / 32; 67 | const int shift = i % 32; 68 | results.data[ri] |= result << shift; 69 | } 70 | } 71 | 72 | void sse_cull(Slice results, Slice spheres, const Frustum &f) 73 | { 74 | // we negate everything because we use this formula to cull: 75 | // dot(-p.n, s.center) - p.d > s.radius 76 | // it's equivalent to: 77 | // dot(p.n, s.center) + p.d < -s.radius 78 | // but no need to negate sphere radius 79 | const __m128 plane_components[8] = { 80 | simd_set(-f.planes[0].n.x, -f.planes[1].n.x, -f.planes[2].n.x, -f.planes[3].n.x), 81 | simd_set(-f.planes[0].n.y, -f.planes[1].n.y, -f.planes[2].n.y, -f.planes[3].n.y), 82 | simd_set(-f.planes[0].n.z, -f.planes[1].n.z, -f.planes[2].n.z, -f.planes[3].n.z), 83 | simd_set(-f.planes[0].d, -f.planes[1].d, -f.planes[2].d, -f.planes[3].d), 84 | simd_set(-f.planes[4].n.x, -f.planes[5].n.x, -f.planes[4].n.x, -f.planes[5].n.x), 85 | simd_set(-f.planes[4].n.y, -f.planes[5].n.y, -f.planes[4].n.y, -f.planes[5].n.y), 86 | simd_set(-f.planes[4].n.z, -f.planes[5].n.z, -f.planes[4].n.z, -f.planes[5].n.z), 87 | simd_set(-f.planes[4].d, -f.planes[5].d, -f.planes[4].d, -f.planes[5].d), 88 | }; 89 | 90 | for (int i = 0, n = spheres.length; i < n; i++) { 91 | // Load sphere into SSE register. 92 | const __m128 s = _mm_load_ps(reinterpret_cast(spheres.data+i)); 93 | const __m128 xxxx = simd_splat_x(s); 94 | const __m128 yyyy = simd_splat_y(s); 95 | const __m128 zzzz = simd_splat_z(s); 96 | const __m128 rrrr = simd_splat_w(s); 97 | 98 | __m128 v, r; 99 | // Move sphere center to plane normal space and make it relative to plane. 100 | // dot(p.n, s) + p.d 101 | v = simd_madd(xxxx, plane_components[0], plane_components[3]); 102 | v = simd_madd(yyyy, plane_components[1], v); 103 | v = simd_madd(zzzz, plane_components[2], v); 104 | 105 | // One of r floats will be set to 0xFFFFFFFF if sphere is outside of the frustum. 106 | r = _mm_cmpgt_ps(v, rrrr); 107 | 108 | // Same for second set of planes. 109 | v = simd_madd(xxxx, plane_components[4], plane_components[7]); 110 | v = simd_madd(yyyy, plane_components[5], v); 111 | v = simd_madd(zzzz, plane_components[6], v); 112 | 113 | r = _mm_or_ps(r, _mm_cmpgt_ps(v, rrrr)); 114 | 115 | // Shuffle and extract the result: 116 | // 1. movehl(r, r) does this (we're interested in 2 lower floats): 117 | // a b c d -> c d c d 118 | // 2. then we OR it with the existing value (ignoring 2 upper floats) 119 | // a b | c d = A B 120 | // 3. and then we OR it again ignoring all but 1 lowest float: 121 | // A | B = R 122 | // Result is written in the lowest float. 123 | r = _mm_or_ps(r, _mm_movehl_ps(r, r)); 124 | r = _mm_or_ps(r, simd_splat_y(r)); 125 | 126 | uint32_t result; 127 | _mm_store_ss((float*)&result, r); 128 | 129 | // And write the result back to bit buffer. 130 | const int ri = i / 32; 131 | const int shift = i % 32; 132 | results.data[ri] |= (result & 1) << shift; 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /Common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "Math/Vec.h" 5 | #include "Core/Slice.h" 6 | #include "Core/Func.h" 7 | #include "Math/Sphere.h" 8 | #include "Math/Frustum.h" 9 | 10 | enum DataType { 11 | Structured, 12 | Random, 13 | }; 14 | 15 | struct Config { 16 | int data_size = 80; 17 | bool verbose = false; 18 | }; 19 | 20 | static inline int offset_3d(const Vec3i &p, const Vec3i &size) 21 | { 22 | return (p.z * size.y + p.y) * size.x + p.x; 23 | } 24 | 25 | static inline __m128 simd_set(float x, float y, float z, float w) 26 | { 27 | return _mm_set_ps(w, z, y, x); 28 | } 29 | 30 | static inline __m128 simd_splat_x(__m128 v) { return _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0)); } 31 | static inline __m128 simd_splat_y(__m128 v) { return _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1)); } 32 | static inline __m128 simd_splat_z(__m128 v) { return _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2)); } 33 | static inline __m128 simd_splat_w(__m128 v) { return _mm_shuffle_ps(v, v, _MM_SHUFFLE(3, 3, 3, 3)); } 34 | 35 | static inline __m128 simd_madd(__m128 a, __m128 b, __m128 c) 36 | { 37 | return _mm_add_ps(_mm_mul_ps(a, b), c); 38 | } 39 | 40 | void parse_args(Config *config, int argc, char **argv); 41 | void print_results(Slice bits, const Config &config); 42 | void measure(Func f, int warmup, int runs, const char *name, const Config &config); 43 | 44 | void naive_cull(Slice results, Slice spheres, const Frustum &f); 45 | void sse_cull(Slice results, Slice spheres, const Frustum &f); 46 | 47 | void do_arrays(const Config &config); 48 | void do_chunks(const Config &config); 49 | -------------------------------------------------------------------------------- /Core/Func.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | template class Func; 6 | 7 | template 8 | class Func { 9 | static R _invoke_func(void *data, Args &&...args) 10 | { 11 | auto fp = reinterpret_cast(data); 12 | return (*fp)(std::forward(args)...); 13 | } 14 | 15 | template 16 | static R _invoke_obj(void *data, Args &&...args) 17 | { 18 | auto obj = static_cast(data); 19 | return (*obj)(std::forward(args)...); 20 | } 21 | 22 | template 23 | static R _invoke_const_obj(void *data, Args &&...args) 24 | { 25 | auto obj = static_cast(data); 26 | return (*obj)(std::forward(args)...); 27 | } 28 | 29 | R (*m_invoker)(void*, Args&&...) = nullptr; 30 | void *m_data = nullptr; 31 | 32 | public: 33 | Func() = default; 34 | Func(const Func &r) = default; 35 | Func(Func &&r) = default; 36 | 37 | // We need this little guy here, because otherwise template ctor 38 | // func(T &obj) will be considered as a better overload match 39 | // in some cases 40 | 41 | // Func(Func &r) = default; 42 | 43 | template 44 | Func(T &obj): m_invoker(_invoke_obj), 45 | m_data(static_cast(&obj)) 46 | { 47 | } 48 | template 49 | Func(const T &obj): m_invoker(_invoke_const_obj), 50 | m_data(static_cast(const_cast(&obj))) 51 | { 52 | } 53 | Func(R (*fp)(Args...)): m_invoker(_invoke_func), 54 | m_data(reinterpret_cast(fp)) 55 | { 56 | } 57 | 58 | Func &operator=(const Func&) = default; 59 | Func &operator=(Func&&) = default; 60 | R operator()(Args ...args) const 61 | { 62 | return (*m_invoker)(m_data, std::forward(args)...); 63 | } 64 | 65 | explicit operator bool() const { return m_data != nullptr; } 66 | }; 67 | -------------------------------------------------------------------------------- /Core/Memory.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "Core/Memory.h" 5 | #include "Core/Utils.h" 6 | 7 | void *xmalloc(int n) 8 | { 9 | void *mem = malloc(n); 10 | if (!mem) 11 | die("nextgame: out of memory"); 12 | return mem; 13 | } 14 | 15 | void xfree(void *ptr) 16 | { 17 | free(ptr); 18 | } 19 | 20 | int xcopy(void *dst, const void *src, int n) 21 | { 22 | memmove(dst, src, n); 23 | return n; 24 | } 25 | 26 | void xclear(void *dst, int n) 27 | { 28 | memset(dst, 0, n); 29 | } 30 | 31 | void *operator new(size_t size, const OrDie_t &) 32 | { 33 | void *mem = operator new(size, std::nothrow); 34 | if (!mem) 35 | die("nextgame: out of memory"); 36 | return mem; 37 | } 38 | 39 | void *operator new[](size_t size, const OrDie_t &) 40 | { 41 | void *mem = operator new[](size, std::nothrow); 42 | if (!mem) 43 | die("nextgame: out of memory"); 44 | return mem; 45 | } 46 | 47 | void *DefaultAllocator::allocate_bytes(int n) 48 | { 49 | return xmalloc(n); 50 | } 51 | 52 | void DefaultAllocator::free_bytes(void *mem) 53 | { 54 | xfree(mem); 55 | } 56 | 57 | DefaultAllocator default_allocator; 58 | 59 | AlignedAllocator::AlignedAllocator(int n): align_to(n) 60 | { 61 | } 62 | 63 | void *AlignedAllocator::allocate_bytes(int n) 64 | { 65 | #ifdef _WIN32 66 | void *ptr = _aligned_malloc(n, align_to); 67 | #else 68 | void *ptr; 69 | if (posix_memalign(&ptr, align_to, n) != 0) 70 | ptr = nullptr; 71 | #endif 72 | if (ptr == nullptr) 73 | die("nextgame: out of memory (aligned: %d)", align_to); 74 | return ptr; 75 | } 76 | 77 | void AlignedAllocator::free_bytes(void *mem) 78 | { 79 | #ifdef _WIN32 80 | _aligned_free(mem); 81 | #else 82 | free(mem); 83 | #endif 84 | } 85 | 86 | AlignedAllocator sse_allocator(16); 87 | -------------------------------------------------------------------------------- /Core/Memory.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | void *xmalloc(int n); 9 | void xfree(void *ptr); 10 | int xcopy(void *dst, const void *src, int n); 11 | void xclear(void *dst, int n); 12 | 13 | struct OrDie_t {}; 14 | const OrDie_t OrDie = {}; 15 | 16 | void *operator new(size_t size, const OrDie_t&); 17 | void *operator new[](size_t size, const OrDie_t&); 18 | 19 | template 20 | T *allocate_memory(int n = 1) 21 | { 22 | return (T*)xmalloc(sizeof(T) * n); 23 | } 24 | 25 | template 26 | T &allocate_memory(T *&ptr) 27 | { 28 | ptr = (T*)xmalloc(sizeof(T)); 29 | return *ptr; 30 | } 31 | 32 | template 33 | void free_memory(T *ptr) 34 | { 35 | if (ptr) xfree(ptr); 36 | } 37 | 38 | template 39 | int copy_memory(T *dst, const T *src, int n = 1) 40 | { 41 | return xcopy(dst, src, sizeof(T) * n); 42 | } 43 | 44 | template 45 | void clear_memory(T *dst, int n = 1) 46 | { 47 | xclear(dst, sizeof(T)*n); 48 | } 49 | 50 | struct Allocator { 51 | virtual void *allocate_bytes(int n) = 0; 52 | virtual void free_bytes(void *mem) = 0; 53 | 54 | template 55 | T *allocate_memory(int n = 1) 56 | { 57 | return (T*)allocate_bytes(sizeof(T) * n); 58 | } 59 | 60 | template 61 | T &allocate_memory(T *&ptr) 62 | { 63 | ptr = (T*)allocate_bytes(sizeof(T)); 64 | return *ptr; 65 | } 66 | 67 | template 68 | void free_memory(T *ptr) 69 | { 70 | if (ptr) free_bytes(ptr); 71 | } 72 | }; 73 | 74 | struct DefaultAllocator : Allocator { 75 | void *allocate_bytes(int n) override; 76 | void free_bytes(void *mem) override; 77 | }; 78 | 79 | extern DefaultAllocator default_allocator; 80 | 81 | struct AlignedAllocator : Allocator { 82 | int align_to; 83 | 84 | AlignedAllocator(int n); 85 | void *allocate_bytes(int n) override; 86 | void free_bytes(void *mem) override; 87 | }; 88 | 89 | // aligned to 16 bytes 90 | extern AlignedAllocator sse_allocator; 91 | -------------------------------------------------------------------------------- /Core/Slice.cpp: -------------------------------------------------------------------------------- 1 | #include "Core/Slice.h" 2 | 3 | #define _SLICE_CONST_CHAR_OPERATOR(op) \ 4 | bool operator op(Slice lhs, Slice rhs) { \ 5 | return operator op (lhs, rhs); \ 6 | } 7 | 8 | _SLICE_CONST_CHAR_OPERATOR(==) 9 | _SLICE_CONST_CHAR_OPERATOR(<) 10 | _SLICE_CONST_CHAR_OPERATOR(!=) 11 | _SLICE_CONST_CHAR_OPERATOR(>=) 12 | _SLICE_CONST_CHAR_OPERATOR(<=) 13 | _SLICE_CONST_CHAR_OPERATOR(>) 14 | 15 | #undef _SLICE_CONST_CHAR_OPERATOR 16 | 17 | int compute_hash(Slice s) 18 | { 19 | constexpr unsigned M0 = 2860486313U; 20 | constexpr unsigned M1 = 3267000013U; 21 | unsigned hash = M0; 22 | for (const auto &b : s) 23 | hash = (hash ^ b) * M1; 24 | return hash; 25 | } 26 | 27 | int compute_hash(const char *s) 28 | { 29 | return compute_hash(Slice(s)); 30 | } 31 | 32 | int compute_hash(int i) 33 | { 34 | return compute_hash(slice_cast(Slice(&i, 1))); 35 | } 36 | -------------------------------------------------------------------------------- /Core/Slice.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include "Core/Utils.h" 7 | 8 | #define _COMMON_SLICE_PART_CONST(T) \ 9 | const T *data; \ 10 | int length; \ 11 | \ 12 | Slice() = default; \ 13 | Slice(std::initializer_list r): data(r.begin()), length(r.size()) {} \ 14 | template \ 15 | Slice(const T (&array)[N]): data(array), length(N) {} \ 16 | Slice(const T *data, int len): data(data), length(len) {} \ 17 | Slice(const Slice &r): data(r.data), length(r.length) {} \ 18 | explicit operator bool() const { return length != 0; } \ 19 | int byte_length() const { return length * sizeof(T); } \ 20 | const T &first() const { NG_ASSERT(length != 0); return data[0]; } \ 21 | const T &last() const { NG_ASSERT(length != 0); return data[length-1]; }\ 22 | Slice sub() const \ 23 | { \ 24 | return {data, length}; \ 25 | } \ 26 | Slice sub(int begin) const \ 27 | { \ 28 | NG_SLICE_BOUNDS_CHECK(begin, length); \ 29 | return {data + begin, length - begin}; \ 30 | } \ 31 | Slice sub(int begin, int end) const \ 32 | { \ 33 | NG_ASSERT(begin <= end); \ 34 | NG_SLICE_BOUNDS_CHECK(begin, length); \ 35 | NG_SLICE_BOUNDS_CHECK(end, length); \ 36 | return {data + begin, end - begin}; \ 37 | } \ 38 | const T &operator[](int idx) const \ 39 | { \ 40 | NG_IDX_BOUNDS_CHECK(idx, length); \ 41 | return data[idx]; \ 42 | } 43 | 44 | template 45 | struct Slice { 46 | T *data; 47 | int length; 48 | 49 | Slice() = default; 50 | 51 | template 52 | Slice(T (&array)[N]): data(array), length(N) {} 53 | Slice(T *data, int length): data(data), length(length) {} 54 | explicit operator bool() const { return length != 0; } 55 | 56 | T &operator[](int idx) 57 | { 58 | NG_IDX_BOUNDS_CHECK(idx, length); 59 | return data[idx]; 60 | } 61 | 62 | const T &operator[](int idx) const 63 | { 64 | NG_IDX_BOUNDS_CHECK(idx, length); 65 | return data[idx]; 66 | } 67 | 68 | // for consistency with Vector, but feel free to use length directly 69 | int byte_length() const { return length * sizeof(T); } 70 | 71 | T &first() { NG_ASSERT(length != 0); return data[0]; } 72 | const T &first() const { NG_ASSERT(length != 0); return data[0]; } 73 | T &last() { NG_ASSERT(length != 0); return data[length-1]; } 74 | const T &last() const { NG_ASSERT(length != 0); return data[length-1]; } 75 | 76 | Slice sub() 77 | { 78 | return {data, length}; 79 | } 80 | Slice sub(int begin) 81 | { 82 | NG_SLICE_BOUNDS_CHECK(begin, length); 83 | return {data + begin, length - begin}; 84 | } 85 | Slice sub(int begin, int end) 86 | { 87 | NG_ASSERT(begin <= end); 88 | NG_SLICE_BOUNDS_CHECK(begin, length); 89 | NG_SLICE_BOUNDS_CHECK(end, length); 90 | return {data + begin, end - begin}; 91 | } 92 | Slice sub() const 93 | { 94 | return {data, length}; 95 | } 96 | Slice sub(int begin) const 97 | { 98 | NG_SLICE_BOUNDS_CHECK(begin, length); 99 | return {data + begin, length - begin}; 100 | } 101 | Slice sub(int begin, int end) const 102 | { 103 | NG_ASSERT(begin <= end); 104 | NG_SLICE_BOUNDS_CHECK(begin, length); 105 | NG_SLICE_BOUNDS_CHECK(end, length); 106 | return {data + begin, end - begin}; 107 | } 108 | }; 109 | 110 | template 111 | struct Slice { 112 | _COMMON_SLICE_PART_CONST(T) 113 | }; 114 | 115 | template <> 116 | struct Slice { 117 | _COMMON_SLICE_PART_CONST(char) 118 | Slice(const char *str): data(str), length(str != nullptr ? strlen(str) : 0) {} 119 | }; 120 | 121 | #undef _COMMON_SLICE_PART_CONST 122 | 123 | template 124 | bool operator==(Slice lhs, Slice rhs) 125 | { 126 | if (lhs.length != rhs.length) 127 | return false; 128 | for (int i = 0; i < lhs.length; i++) { 129 | if (!(lhs.data[i] == rhs.data[i])) 130 | return false; 131 | } 132 | return true; 133 | } 134 | 135 | template 136 | bool operator<(Slice lhs, Slice rhs) 137 | { 138 | for (int i = 0; i < rhs.length; i++) { 139 | if (i == lhs.length) { 140 | // lhs.len() < rhs.len(), but the common part is == 141 | return true; 142 | } 143 | if (lhs.data[i] < rhs.data[i]) { 144 | return true; 145 | } 146 | if (rhs.data[i] < lhs.data[i]) { 147 | return false; 148 | } 149 | } 150 | return false; 151 | } 152 | 153 | template 154 | bool operator!=(Slice lhs, Slice rhs) 155 | { 156 | return !operator==(lhs, rhs); 157 | } 158 | 159 | template 160 | bool operator>=(Slice lhs, Slice rhs) 161 | { 162 | return !operator<(lhs, rhs); 163 | } 164 | 165 | template 166 | bool operator<=(Slice lhs, Slice rhs) 167 | { 168 | return !operator<(rhs, lhs); 169 | } 170 | 171 | template 172 | bool operator>(Slice lhs, Slice rhs) 173 | { 174 | return operator<(rhs, lhs); 175 | } 176 | 177 | bool operator==(Slice lhs, Slice rhs); 178 | bool operator<(Slice lhs, Slice rhs); 179 | bool operator!=(Slice lhs, Slice rhs); 180 | bool operator>=(Slice lhs, Slice rhs); 181 | bool operator<=(Slice lhs, Slice rhs); 182 | bool operator>(Slice lhs, Slice rhs); 183 | 184 | template 185 | const T *begin(Slice s) { return s.data; } 186 | template 187 | const T *end(Slice s) { return s.data + s.length; } 188 | template 189 | T *begin(Slice s) { return s.data; } 190 | template 191 | T *end(Slice s) { return s.data + s.length; } 192 | 193 | template 194 | int copy(Slice dst, Slice src) 195 | { 196 | const int n = std::min(dst.length, src.length); 197 | if (n == 0) { 198 | return 0; 199 | } 200 | 201 | const T *srcp = src.data; 202 | T *dstp = dst.data; 203 | if (srcp == dstp) { 204 | return n; 205 | } 206 | 207 | if (srcp < dstp) { 208 | for (int i = n-1; i >= 0; i--) { 209 | dstp[i] = srcp[i]; 210 | } 211 | } else { 212 | for (int i = 0; i < n; i++) { 213 | dstp[i] = srcp[i]; 214 | } 215 | } 216 | return n; 217 | } 218 | 219 | template 220 | Slice slice_cast(Slice s) 221 | { 222 | const double ratio = (double)sizeof(U) / (double)sizeof(T); 223 | return Slice((T*)s.data, s.length * ratio); 224 | } 225 | 226 | int compute_hash(Slice s); 227 | int compute_hash(const char *s); 228 | int compute_hash(int i); 229 | 230 | template 231 | static inline int compute_hash(Slice s) 232 | { 233 | return compute_hash(slice_cast(s)); 234 | } 235 | 236 | template 237 | void sort(Slice s) 238 | { 239 | std::sort(s.data, s.data + s.length); 240 | } 241 | 242 | template 243 | void sort(Slice s, F &&f) 244 | { 245 | std::sort(s.data, s.data + s.length, std::forward(f)); 246 | } 247 | 248 | template 249 | void stable_sort(Slice s) 250 | { 251 | std::stable_sort(s.data, s.data + s.length); 252 | } 253 | 254 | template 255 | void stable_sort(Slice s, F &&f) 256 | { 257 | std::stable_sort(s.data, s.data + s.length, std::forward(f)); 258 | } 259 | 260 | template 261 | void reverse(Slice s) 262 | { 263 | const int len1 = s.length - 1; 264 | const int mid = s.length / 2; 265 | for (int i = 0; i < mid; i++) { 266 | std::swap(s[i], s[len1-i]); 267 | } 268 | } 269 | 270 | template 271 | void fill(Slice s, const T &v) 272 | { 273 | for (int i = 0; i < s.length; i++) 274 | s.data[i] = v; 275 | } 276 | 277 | template 278 | int linear_find(Slice s, const U &v) 279 | { 280 | for (int i = 0; i < s.length; i++) { 281 | if (s.data[i] == v) 282 | return i; 283 | } 284 | return -1; 285 | } 286 | 287 | template 288 | int linear_find_if(Slice s, F &&f) 289 | { 290 | for (int i = 0; i < s.length; i++) 291 | if (f(s.data[i])) 292 | return i; 293 | return -1; 294 | } 295 | 296 | template 297 | int binary_find(Slice s, const U &v) 298 | { 299 | int imax = s.length-1; 300 | int imin = 0; 301 | 302 | while (imin < imax) { 303 | // believe it or not, nobody really cares about overflows here 304 | const int imid = (imin + imax) / 2; 305 | if (s.data[imid] < v) 306 | imin = imid+1; 307 | else 308 | imax = imid; 309 | } 310 | if (imax == imin && s.data[imin] == v) 311 | return imin; 312 | return -1; 313 | } 314 | -------------------------------------------------------------------------------- /Core/UniquePtr.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Core/Memory.h" 4 | #include "Core/Utils.h" 5 | 6 | template 7 | struct DefaultDelete { 8 | static void invoke(T *ptr) { 9 | delete ptr; 10 | } 11 | }; 12 | 13 | template 14 | struct DefaultDelete { 15 | static void invoke(T *ptr) { 16 | delete [] ptr; 17 | } 18 | }; 19 | 20 | template > 21 | class UniquePtr { 22 | T *ptr; 23 | 24 | public: 25 | constexpr UniquePtr(): ptr(nullptr) {} 26 | constexpr UniquePtr(std::nullptr_t): ptr(nullptr) {} 27 | explicit UniquePtr(T *ptr): ptr(ptr) {} 28 | UniquePtr(const UniquePtr&) = delete; 29 | UniquePtr(UniquePtr &&r): ptr(r.release()) {} 30 | ~UniquePtr() { if (ptr) D::invoke(ptr); } 31 | 32 | UniquePtr &operator=(const UniquePtr&) = delete; 33 | UniquePtr &operator=(UniquePtr &&r) 34 | { 35 | if (ptr) D::invoke(ptr); 36 | ptr = r.release(); 37 | return *this; 38 | } 39 | 40 | UniquePtr &operator=(std::nullptr_t) 41 | { 42 | if (ptr) D::invoke(ptr); 43 | ptr = nullptr; 44 | return *this; 45 | } 46 | 47 | T &operator*() const 48 | { 49 | NG_ASSERT(ptr != nullptr); 50 | return *ptr; 51 | } 52 | 53 | T* operator->() const { NG_ASSERT(ptr != nullptr); return ptr; } 54 | T* get() const { return ptr; } 55 | explicit operator bool() const { return ptr != nullptr; } 56 | 57 | T *release() { T *ret = ptr; ptr = nullptr; return ret; } 58 | void reset(T *p = nullptr) 59 | { 60 | if (ptr) D::invoke(ptr); 61 | ptr = p; 62 | } 63 | }; 64 | 65 | template 66 | class UniquePtr : public UniquePtr { 67 | public: 68 | constexpr UniquePtr() = default; 69 | constexpr UniquePtr(std::nullptr_t): UniquePtr(nullptr) {} 70 | explicit UniquePtr(T *ptr): UniquePtr(ptr) {} 71 | 72 | // #TODO:0 operator[] 73 | }; 74 | 75 | template 76 | bool operator==(const UniquePtr &lhs, const UniquePtr &rhs) 77 | { 78 | return lhs.get() == rhs.get(); 79 | } 80 | 81 | template 82 | bool operator!=(const UniquePtr &lhs, const UniquePtr &rhs) 83 | { 84 | return lhs.get() != rhs.get(); 85 | } 86 | 87 | template 88 | bool operator==(const UniquePtr &lhs, const T *rhs) 89 | { 90 | return lhs.get() == rhs; 91 | } 92 | 93 | template 94 | bool operator!=(const T *lhs, const UniquePtr &rhs) 95 | { 96 | return lhs != rhs.get(); 97 | } 98 | 99 | template 100 | bool operator==(const UniquePtr &lhs, std::nullptr_t) 101 | { 102 | return lhs.get() == nullptr; 103 | } 104 | template 105 | bool operator==(std::nullptr_t, const UniquePtr &rhs) 106 | { 107 | return nullptr == rhs.get(); 108 | } 109 | 110 | template 111 | bool operator!=(const UniquePtr &lhs, std::nullptr_t) 112 | { 113 | return lhs.get() != nullptr; 114 | } 115 | template 116 | bool operator!=(std::nullptr_t, const UniquePtr &rhs) 117 | { 118 | return nullptr != rhs.get(); 119 | } 120 | 121 | template 122 | UniquePtr make_unique(Args &&...args) 123 | { 124 | return UniquePtr(new (OrDie) T(std::forward(args)...)); 125 | } 126 | -------------------------------------------------------------------------------- /Core/Utils.cpp: -------------------------------------------------------------------------------- 1 | #include "Core/Utils.h" 2 | #include 3 | #include 4 | #include 5 | 6 | void assertion_abort(const char *assertion, const char *file, 7 | int line, const char *func) 8 | { 9 | fprintf(stderr, "%s:%d: %s: assertion `%s` failed\n", 10 | file, line, func, assertion); 11 | abort(); 12 | } 13 | 14 | void die(const char *msg, ...) 15 | { 16 | va_list va; 17 | va_start(va, msg); 18 | vfprintf(stderr, msg, va); 19 | va_end(va); 20 | fprintf(stderr, "\n"); 21 | abort(); 22 | } 23 | 24 | void warn(const char *msg, ...) 25 | { 26 | va_list va; 27 | va_start(va, msg); 28 | vfprintf(stderr, msg, va); 29 | va_end(va); 30 | fprintf(stderr, "\n"); 31 | } 32 | -------------------------------------------------------------------------------- /Core/Utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | //---------------------------------------------------------------------- 4 | // Configuration 5 | //---------------------------------------------------------------------- 6 | 7 | #define NG_ASSERTION_ENABLED 8 | 9 | //---------------------------------------------------------------------- 10 | //---------------------------------------------------------------------- 11 | 12 | void assertion_abort(const char *assertion, const char *file, 13 | int line, const char *func); 14 | 15 | #ifdef _WIN32 16 | #define PRETTY_FUNCTION __FUNCTION__ 17 | #else 18 | #define PRETTY_FUNCTION __PRETTY_FUNCTION__ 19 | #endif 20 | 21 | #ifdef NG_ASSERTION_ENABLED 22 | #define NG_ASSERT(expr) \ 23 | do { \ 24 | if (!(expr)) { \ 25 | assertion_abort(#expr, \ 26 | __FILE__, __LINE__, \ 27 | PRETTY_FUNCTION); \ 28 | } \ 29 | } while (0) 30 | #else 31 | #define NG_ASSERT(expr) ((void)0) 32 | #endif 33 | 34 | #define NG_SLICE_BOUNDS_CHECK(index, length) \ 35 | NG_ASSERT((unsigned)(index) <= (unsigned)(length)) 36 | 37 | #define NG_IDX_BOUNDS_CHECK(index, length) \ 38 | NG_ASSERT((unsigned)(index) < (unsigned)(length)) 39 | 40 | void die(const char *msg, ...); 41 | void warn(const char *msg, ...); 42 | 43 | #define NG_DELETE_COPY(Type) \ 44 | Type(const Type&) = delete; \ 45 | Type &operator=(const Type&) = delete 46 | 47 | #define NG_DELETE_MOVE(Type) \ 48 | Type(Type&&) = delete; \ 49 | Type &operator=(Type&&) = delete 50 | 51 | #define NG_DELETE_COPY_AND_MOVE(Type) \ 52 | NG_DELETE_COPY(Type); \ 53 | NG_DELETE_MOVE(Type) 54 | 55 | #define AS_SLAVE_PTR(T, ptr) (*reinterpret_cast*>(&(ptr))) 56 | -------------------------------------------------------------------------------- /Core/Vector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Core/Utils.h" 4 | #include "Core/Memory.h" 5 | #include "Core/Slice.h" 6 | 7 | template 8 | struct Vector { 9 | T *m_data = nullptr; 10 | int m_len = 0; 11 | int m_cap = 0; 12 | Allocator *m_allocator = &default_allocator; 13 | 14 | int _new_size(int requested) const 15 | { 16 | int newcap = m_cap * 2; 17 | return newcap < requested ? requested : newcap; 18 | } 19 | 20 | void _ensure_capacity(int n) 21 | { 22 | if (m_len + n > m_cap) 23 | reserve(_new_size(m_len + n)); 24 | } 25 | 26 | // expects: idx < _len, idx >= 0, offset > 0 27 | void _move_forward(int idx, int offset) 28 | { 29 | const int last = m_len-1; 30 | int src = last; 31 | int dst = last+offset; 32 | while (src >= idx) { 33 | new (&m_data[dst]) T(std::move(m_data[src])); 34 | m_data[src].~T(); 35 | src--; 36 | dst--; 37 | } 38 | } 39 | 40 | // expects: idx < _len, idx >= 0, offset < 0 41 | void _move_backward(int idx, int offset) 42 | { 43 | int src = idx; 44 | int dst = idx+offset; 45 | while (src < m_len) { 46 | new (&m_data[dst]) T(std::move(m_data[src])); 47 | m_data[src].~T(); 48 | src++; 49 | dst++; 50 | } 51 | } 52 | 53 | void _self_insert(int idx, Slice s) 54 | { 55 | int sidx = s.data - m_data; 56 | _ensure_capacity(s.length); 57 | 58 | // restore the slice after possible realloc 59 | s = Slice(m_data + sidx, s.length); 60 | 61 | // shorcut case, append 62 | if (idx == m_len) { 63 | for (int i = 0; i < s.length; i++) 64 | new (m_data + idx + i) T(s.data[i]); 65 | m_len += s.length; 66 | return; 67 | } 68 | 69 | // move tail further towards the end so that there is a free space for 70 | // data insertion 71 | _move_forward(idx, s.length); 72 | m_len += s.length; 73 | 74 | // adjust slice so that it points to the right data and if we're 75 | // splitting the slice, insert first half of it right away 76 | if (idx <= sidx) { 77 | s = Slice(s.data + s.length, s.length); 78 | } else { 79 | const int lhslen = idx - sidx; 80 | copy_memory(m_data + idx, m_data + sidx, lhslen); 81 | for (int i = 0; i < lhslen; i++) 82 | new (m_data + idx + i) T(m_data[sidx+i]); 83 | idx += lhslen; 84 | s = Slice(s.data + s.length + lhslen, s.length - lhslen); 85 | } 86 | for (int i = 0; i < s.length; i++) 87 | new (m_data + idx + i) T(s.data[i]); 88 | } 89 | 90 | void _nullify() 91 | { 92 | m_data = nullptr; 93 | m_len = 0; 94 | m_cap = 0; 95 | } 96 | 97 | Vector() = default; 98 | 99 | explicit Vector(Allocator *allocator): m_allocator(allocator) {} 100 | 101 | explicit Vector(int n): m_len(n), m_cap(n) 102 | { 103 | NG_ASSERT(n >= 0); 104 | if (m_len == 0) 105 | return; 106 | m_data = m_allocator->allocate_memory(m_len); 107 | for (int i = 0; i < m_len; i++) 108 | new (m_data + i) T; 109 | } 110 | 111 | Vector(int n, const T &elem): m_len(n), m_cap(n) 112 | { 113 | NG_ASSERT(n >= 0); 114 | if (m_len == 0) 115 | return; 116 | m_data = m_allocator->allocate_memory(m_len); 117 | for (int i = 0; i < m_len; i++) 118 | new (m_data + i) T(elem); 119 | } 120 | 121 | Vector(Slice s): m_len(s.length), m_cap(s.length) 122 | { 123 | if (m_len == 0) 124 | return; 125 | m_data = m_allocator->allocate_memory(m_len); 126 | for (int i = 0; i < m_len; i++) 127 | new (m_data + i) T(s.data[i]); 128 | } 129 | 130 | Vector(Slice s): Vector(Slice(s)) 131 | { 132 | } 133 | 134 | Vector(std::initializer_list r): Vector(Slice(r)) 135 | { 136 | } 137 | 138 | Vector(const Vector &r): Vector(Slice(r)) 139 | { 140 | } 141 | 142 | Vector(Vector &&r): m_data(r.m_data), m_len(r.m_len), m_cap(r.m_cap) 143 | { 144 | if (m_allocator != r.m_allocator) 145 | die("Vector: moving is only allowed between vectors with the same allocator"); 146 | r._nullify(); 147 | } 148 | 149 | Vector &operator=(Slice r) 150 | { 151 | if (m_data == r.data && m_len == r.length) { 152 | // self copy shortcut (a = a) 153 | return *this; 154 | } 155 | if (m_cap < r.length) { 156 | // slice is bigger than we are, realloc needed, also it 157 | // means slice cannot point to ourselves and it is save 158 | // to destroy ourselves 159 | for (int i = 0; i < m_len; i++) 160 | m_data[i].~T(); 161 | m_allocator->free_memory(m_data); 162 | m_cap = m_len = r.length; 163 | m_data = m_allocator->allocate_memory(m_len); 164 | for (int i = 0; i < m_len; i++) 165 | new (m_data + i) T(r.data[i]); 166 | } else { 167 | // slice can be a subset of ourselves 168 | int i = copy(sub(), r); 169 | for (; i < m_len; i++) { 170 | // destroy the rest if any 171 | m_data[i].~T(); 172 | } 173 | for (; i < r.length; i++) { 174 | // construct the new if any 175 | new (m_data + i) T(r.data[i]); 176 | } 177 | m_len = r.length; 178 | } 179 | return *this; 180 | } 181 | 182 | Vector &operator=(Slice r) 183 | { 184 | return operator=(Slice(r)); 185 | } 186 | 187 | Vector &operator=(const Vector &r) 188 | { 189 | return operator=(Slice(r)); 190 | } 191 | 192 | Vector &operator=(Vector &&r) 193 | { 194 | if (m_allocator != r.m_allocator) 195 | die("Vector: moving is only allowed between vectors with the same allocator"); 196 | for (int i = 0; i < m_len; i++) 197 | m_data[i].~T(); 198 | m_allocator->free_memory(m_data); 199 | 200 | m_data = r.m_data; 201 | m_len = r.m_len; 202 | m_cap = r.m_cap; 203 | r._nullify(); 204 | return *this; 205 | } 206 | 207 | Vector &operator=(std::initializer_list r) 208 | { 209 | return operator=(Slice(r)); 210 | } 211 | 212 | ~Vector() 213 | { 214 | for (int i = 0; i < m_len; i++) 215 | m_data[i].~T(); 216 | m_allocator->free_memory(m_data); 217 | } 218 | 219 | int length() const { return m_len; } 220 | int byte_length() const { return m_len * sizeof(T); } 221 | int capacity() const { return m_cap; } 222 | T *data() { return m_data; } 223 | const T *data() const { return m_data; } 224 | 225 | void clear() 226 | { 227 | for (int i = 0; i < m_len; i++) 228 | m_data[i].~T(); 229 | m_len = 0; 230 | } 231 | 232 | void reserve(int n) 233 | { 234 | if (m_cap >= n) 235 | return; 236 | 237 | T *old_data = m_data; 238 | m_cap = n; 239 | m_data = m_allocator->allocate_memory(m_cap); 240 | for (int i = 0; i < m_len; i++) { 241 | new (m_data + i) T(std::move(old_data[i])); 242 | old_data[i].~T(); 243 | } 244 | m_allocator->free_memory(old_data); 245 | } 246 | 247 | void shrink() 248 | { 249 | if (m_cap == m_len) 250 | return; 251 | 252 | T *old_data = m_data; 253 | m_cap = m_len; 254 | if (m_len > 0) { 255 | m_data = m_allocator->allocate_memory(m_len); 256 | for (int i = 0; i < m_len; i++) { 257 | new (m_data + i) T(std::move(old_data[i])); 258 | old_data[i].~T(); 259 | } 260 | } else { 261 | m_data = nullptr; 262 | } 263 | m_allocator->free_memory(old_data); 264 | } 265 | 266 | void resize(int n) 267 | { 268 | NG_ASSERT(n >= 0); 269 | 270 | if (m_len == n) 271 | return; 272 | 273 | if (m_len > n) { 274 | for (int i = n; i < m_len; i++) 275 | m_data[i].~T(); 276 | m_len = n; 277 | return; 278 | } 279 | 280 | reserve(n); 281 | for (int i = m_len; i < n; i++) 282 | new (m_data + i) T; 283 | m_len = n; 284 | } 285 | 286 | void resize(int n, const T &elem) 287 | { 288 | NG_ASSERT(n >= 0); 289 | 290 | if (m_len == n) 291 | return; 292 | 293 | if (m_len > n) { 294 | for (int i = n; i < m_len; i++) 295 | m_data[i].~T(); 296 | m_len = n; 297 | return; 298 | } 299 | 300 | reserve(n); 301 | for (int i = m_len; i < n; i++) 302 | new (m_data + i) T(elem); 303 | m_len = n; 304 | } 305 | 306 | void quick_remove(int idx) 307 | { 308 | NG_IDX_BOUNDS_CHECK(idx, m_len); 309 | if (idx != m_len-1) 310 | std::swap(m_data[idx], m_data[m_len-1]); 311 | m_data[--m_len].~T(); 312 | } 313 | 314 | void remove(int idx) 315 | { 316 | NG_IDX_BOUNDS_CHECK(idx, m_len); 317 | if (idx == m_len - 1) { 318 | m_data[--m_len].~T(); 319 | return; 320 | } 321 | 322 | m_data[idx].~T(); 323 | _move_backward(idx+1, -1); 324 | m_len--; 325 | } 326 | 327 | void remove(int begin, int end) 328 | { 329 | NG_ASSERT(begin <= end); 330 | NG_SLICE_BOUNDS_CHECK(begin, m_len); 331 | NG_SLICE_BOUNDS_CHECK(end, m_len); 332 | const int len = end - begin; 333 | if (len == 0) 334 | return; 335 | for (int i = begin; i < end; i++) 336 | m_data[i].~T(); 337 | if (end < m_len) 338 | _move_backward(begin+len, -len); 339 | m_len -= len; 340 | } 341 | 342 | template 343 | void pinsert(int idx, Args &&...args) 344 | { 345 | NG_SLICE_BOUNDS_CHECK(idx, m_len); 346 | _ensure_capacity(1); 347 | if (idx < m_len) { 348 | _move_forward(idx, 1); 349 | } 350 | new (m_data + idx) T(std::forward(args)...); 351 | m_len++; 352 | } 353 | 354 | void insert(int idx, const T &elem) 355 | { 356 | pinsert(idx, elem); 357 | } 358 | 359 | void insert(int idx, T &&elem) 360 | { 361 | pinsert(idx, std::move(elem)); 362 | } 363 | 364 | void insert(int idx, Slice s) 365 | { 366 | NG_SLICE_BOUNDS_CHECK(idx, m_len); 367 | if (s.length == 0) { 368 | return; 369 | } 370 | if (s.data >= m_data && s.data < m_data + m_len) { 371 | _self_insert(idx, s); 372 | return; 373 | } 374 | _ensure_capacity(s.length); 375 | if (idx < m_len) 376 | _move_forward(idx, s.length); 377 | for (int i = 0; i < s.length; i++) 378 | new (m_data + idx + i) T(s.data[i]); 379 | m_len += s.length; 380 | } 381 | 382 | template 383 | void pappend(Args &&...args) 384 | { 385 | _ensure_capacity(1); 386 | new (m_data + m_len++) T(std::forward(args)...); 387 | } 388 | 389 | T *append() 390 | { 391 | _ensure_capacity(1); 392 | return new (m_data + m_len++) T; 393 | } 394 | 395 | void append(const T &elem) 396 | { 397 | NG_ASSERT(&elem < m_data || &elem >= m_data + m_len); 398 | pappend(elem); 399 | } 400 | 401 | void append(T &&elem) 402 | { 403 | NG_ASSERT(&elem < m_data || &elem >= m_data + m_len); 404 | pappend(std::move(elem)); 405 | } 406 | 407 | void append(Slice s) 408 | { 409 | insert(m_len, s); 410 | } 411 | 412 | T &operator[](int idx) 413 | { 414 | NG_IDX_BOUNDS_CHECK(idx, m_len); 415 | return m_data[idx]; 416 | } 417 | 418 | const T &operator[](int idx) const 419 | { 420 | NG_IDX_BOUNDS_CHECK(idx, m_len); 421 | return m_data[idx]; 422 | } 423 | 424 | T &first() { NG_ASSERT(m_len != 0); return m_data[0]; } 425 | const T &first() const { NG_ASSERT(m_len != 0); return m_data[0]; } 426 | T &last() { NG_ASSERT(m_len != 0); return m_data[m_len-1]; } 427 | const T &last() const { NG_ASSERT(m_len != 0); return m_data[m_len-1]; } 428 | 429 | Slice sub() 430 | { 431 | return {m_data, m_len}; 432 | } 433 | Slice sub(int begin) 434 | { 435 | NG_SLICE_BOUNDS_CHECK(begin, m_len); 436 | return {m_data + begin, m_len - begin}; 437 | } 438 | Slice sub(int begin, int end) 439 | { 440 | NG_ASSERT(begin <= end); 441 | NG_SLICE_BOUNDS_CHECK(begin, m_len); 442 | NG_SLICE_BOUNDS_CHECK(end, m_len); 443 | return {m_data + begin, end - begin}; 444 | } 445 | Slice sub() const 446 | { 447 | return {m_data, m_len}; 448 | } 449 | Slice sub(int begin) const 450 | { 451 | NG_SLICE_BOUNDS_CHECK(begin, m_len); 452 | return {m_data + begin, m_len - begin}; 453 | } 454 | Slice sub(int begin, int end) const 455 | { 456 | NG_ASSERT(begin <= end); 457 | NG_SLICE_BOUNDS_CHECK(begin, m_len); 458 | NG_SLICE_BOUNDS_CHECK(end, m_len); 459 | return {m_data + begin, end - begin}; 460 | } 461 | 462 | operator Slice() { return {m_data, m_len}; } 463 | operator Slice() const { return {m_data, m_len}; } 464 | }; 465 | 466 | template 467 | const T *begin(const Vector &v) { return v.data(); } 468 | template 469 | const T *end(const Vector &v) { return v.data()+v.length(); } 470 | template 471 | T *begin(Vector &v) { return v.data(); } 472 | template 473 | T *end(Vector &v) { return v.data()+v.length(); } 474 | -------------------------------------------------------------------------------- /Math/Frustum.cpp: -------------------------------------------------------------------------------- 1 | #include "Math/Frustum.h" 2 | 3 | void Frustum::update_planes() 4 | { 5 | planes[FP_FAR] = Plane(far[FPC_TOP_LEFT], far[FPC_BOTTOM_LEFT], far[FPC_BOTTOM_RIGHT]); 6 | planes[FP_NEAR] = Plane(near[FPC_TOP_LEFT], near[FPC_TOP_RIGHT], near[FPC_BOTTOM_RIGHT]); 7 | planes[FP_LEFT] = Plane(far[FPC_TOP_LEFT], near[FPC_TOP_LEFT], near[FPC_BOTTOM_LEFT]); 8 | planes[FP_RIGHT] = Plane(near[FPC_TOP_RIGHT], far[FPC_TOP_RIGHT], far[FPC_BOTTOM_RIGHT]); 9 | planes[FP_BOTTOM] = Plane(near[FPC_BOTTOM_LEFT], near[FPC_BOTTOM_RIGHT], far[FPC_BOTTOM_RIGHT]); 10 | planes[FP_TOP] = Plane(far[FPC_TOP_LEFT], far[FPC_TOP_RIGHT], near[FPC_TOP_RIGHT]); 11 | } 12 | 13 | FrustumSide Frustum::cull(const Vec3f &min, const Vec3f &max, FrustumCullingType type) const 14 | { 15 | auto result = FS_OUTSIDE; 16 | for (int i = 0; i < 6; i++) { 17 | if (i == FP_NEAR && type == FCT_NO_NEAR_PLANE) 18 | continue; 19 | int ps = planes[i].side(min, max); 20 | switch (ps) { 21 | case PS_FRONT: 22 | if (result != FS_BOTH) 23 | result = FS_INSIDE; 24 | break; 25 | case PS_BOTH: 26 | result = FS_BOTH; 27 | break; 28 | case PS_BACK: 29 | return FS_OUTSIDE; 30 | } 31 | } 32 | return result; 33 | } 34 | 35 | bool Frustum::cull(const Sphere &s) const 36 | { 37 | for (int i = 0; i < 6; i++) { 38 | const Plane &p = planes[i]; 39 | if (dot(-p.n, s.center) - p.d > s.radius) 40 | return true; 41 | } 42 | return false; 43 | } 44 | 45 | Sphere Frustum::bounding_sphere() const 46 | { 47 | Vec3f mi = near[0]; 48 | Vec3f ma = near[0]; 49 | for (int i = 0; i < 4; i++) { 50 | mi = min(near[i], mi); 51 | ma = max(near[i], ma); 52 | mi = min(far[i], mi); 53 | ma = max(far[i], ma); 54 | } 55 | const Vec3f center = mi + (ma - mi) / Vec3f(2); 56 | const float radius = distance(ma, center); 57 | return Sphere(center, radius); 58 | } 59 | 60 | Frustum Frustum_Ortho(float left, float right, float bottom, float top, float znear, float zfar) 61 | { 62 | Frustum out; 63 | out.near[0] = Vec3f(left, top, -znear); 64 | out.near[1] = Vec3f(right, top, -znear); 65 | out.near[2] = Vec3f(left, bottom, -znear); 66 | out.near[3] = Vec3f(right, bottom, -znear); 67 | out.far[0] = Vec3f(left, top, -zfar); 68 | out.far[1] = Vec3f(right, top, -zfar); 69 | out.far[2] = Vec3f(left, bottom, -zfar); 70 | out.far[3] = Vec3f(right, bottom, -zfar); 71 | out.update_planes(); 72 | return out; 73 | } 74 | 75 | Frustum Frustum_Perspective(float fov, float aspect, float znear, float zfar) 76 | { 77 | Frustum out; 78 | frustum_plane_corners(out.near, fov, aspect, -znear); 79 | frustum_plane_corners(out.far, fov, aspect, -zfar); 80 | out.update_planes(); 81 | return out; 82 | } 83 | 84 | Frustum Frustum_Shadow(const Sphere &bsphere, float zextend) 85 | { 86 | const float r = bsphere.radius; 87 | return Frustum_Ortho(-r, r, -r, r, 0.0, 2.0 * r + zextend); 88 | } 89 | 90 | Mat4 Mat4_Shadow(const Sphere &bsphere, float zextend) 91 | { 92 | const float r = bsphere.radius; 93 | return Mat4_Ortho(-r, r, -r, r, 0.0, 2.0 * r + zextend); 94 | } 95 | 96 | void frustum_plane_corners(Vec3f out[4], float fov, float aspect, float plane) 97 | { 98 | const Vec2f wh = frustum_plane_wh(fov, aspect, plane); 99 | const Vec3f up = Vec3f_Y(); 100 | const Vec3f right = Vec3f_X(); 101 | const Vec3f fc {0, 0, plane}; 102 | out[FPC_TOP_LEFT] = fc + (up * Vec3f(wh.y/2)) - (right * Vec3f(wh.x/2)); 103 | out[FPC_TOP_RIGHT] = fc + (up * Vec3f(wh.y/2)) + (right * Vec3f(wh.x/2)); 104 | out[FPC_BOTTOM_LEFT] = fc - (up * Vec3f(wh.y/2)) - (right * Vec3f(wh.x/2)); 105 | out[FPC_BOTTOM_RIGHT] = fc - (up * Vec3f(wh.y/2)) + (right * Vec3f(wh.x/2)); 106 | } 107 | 108 | Vec2f frustum_plane_wh(float fov, float aspect, float plane) 109 | { 110 | const float fov_rad = fov * MATH_DEG_TO_RAD; 111 | const float h = 2 * std::tan(fov_rad / 2) * std::abs(plane); 112 | const float w = h * aspect; 113 | return {w, h}; 114 | } 115 | 116 | Vec2f projection_ratio(float near, float far) 117 | { 118 | return {far / (far - near), (-far * near) / (far - near)}; 119 | } 120 | 121 | Frustum transform(const Frustum &frustum, const Transform &tr) 122 | { 123 | Frustum out = frustum; 124 | for (int i = 0; i < 4; i++) { 125 | out.near[i] = transform(out.near[i], tr); 126 | out.far[i] = transform(out.far[i], tr); 127 | } 128 | out.update_planes(); 129 | return out; 130 | } 131 | 132 | static float calculate_split(float near, float far, int i, int m, float ratio) 133 | { 134 | // logarithmic split 135 | const float Slog = near * powf(far/near, (float)i/m); 136 | // uniform split 137 | const float Suni = near + (far - near) * (float)i/m; 138 | return lerp(Slog, Suni, ratio); 139 | } 140 | 141 | void generate_frustum_split_spheres(Slice spheres_out, 142 | float fov, float aspect, float znear, float zfar, float ratio) 143 | { 144 | float prev_zfar = znear; 145 | for (int i = 0; i < spheres_out.length; i++) { 146 | const float split = calculate_split(znear, zfar, i+1, spheres_out.length, ratio); 147 | Frustum f = Frustum_Perspective(fov, aspect, prev_zfar, split); 148 | spheres_out[i] = f.bounding_sphere(); 149 | prev_zfar = split; 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /Math/Frustum.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Core/Slice.h" 4 | #include "Math/Plane.h" 5 | #include "Math/Quat.h" 6 | #include "Math/Vec.h" 7 | #include "Math/Sphere.h" 8 | #include "Math/Transform.h" 9 | #include "Math/Mat.h" 10 | 11 | enum FrustumSide { 12 | FS_INSIDE, 13 | FS_OUTSIDE, 14 | FS_BOTH, 15 | }; 16 | 17 | enum FrustumPlane { 18 | FP_FAR, 19 | FP_NEAR, 20 | FP_LEFT, 21 | FP_RIGHT, 22 | FP_BOTTOM, 23 | FP_TOP, 24 | }; 25 | 26 | enum FrustumPlaneCorner { 27 | FPC_TOP_LEFT, 28 | FPC_TOP_RIGHT, 29 | FPC_BOTTOM_LEFT, 30 | FPC_BOTTOM_RIGHT, 31 | }; 32 | 33 | enum FrustumCullingType { 34 | FCT_NORMAL, 35 | FCT_NO_NEAR_PLANE, 36 | }; 37 | 38 | struct Frustum { 39 | Frustum() = default; 40 | void update_planes(); 41 | 42 | FrustumSide cull(const Vec3f &min, const Vec3f &max, FrustumCullingType type = FCT_NORMAL) const; 43 | 44 | // returns true only if the sphere is outside of the frustum 45 | bool cull(const Sphere &s) const; 46 | 47 | Plane planes[6]; 48 | Vec3f near[4]; 49 | Vec3f far[4]; 50 | 51 | Sphere bounding_sphere() const; 52 | }; 53 | 54 | Frustum Frustum_Ortho(float left, float right, float bottom, float top, float znear, float zfar); 55 | Frustum Frustum_Perspective(float fov, float aspect, float znear, float zfar); 56 | Frustum Frustum_Shadow(const Sphere &bsphere, float zextend); 57 | Mat4 Mat4_Shadow(const Sphere &bsphere, float zextend); 58 | 59 | // Returns top left, top right, bottom left, bottom right corners. 60 | void frustum_plane_corners(Vec3f out[4], float fov, float aspect, float plane); 61 | Vec2f frustum_plane_wh(float fov, float aspect, float plane); 62 | Vec2f projection_ratio(float near, float far); 63 | Frustum transform(const Frustum &frustum_src, const Transform &tr); 64 | void generate_frustum_split_spheres( 65 | Slice spheres_out, 66 | float fov, float aspect, float znear, float zfar, float ratio); 67 | -------------------------------------------------------------------------------- /Math/Mat.cpp: -------------------------------------------------------------------------------- 1 | #include "Math/Mat.h" 2 | #include 3 | #include 4 | 5 | Vec3f operator*(const Mat3 &l, const Vec3f &r) 6 | { 7 | return { 8 | l[0] * r[0] + l[3] * r[1] + l[6] * r[2], 9 | l[1] * r[0] + l[4] * r[1] + l[7] * r[2], 10 | l[2] * r[0] + l[5] * r[1] + l[8] * r[2], 11 | }; 12 | } 13 | 14 | Vec3f operator*(const Vec3f &l, const Mat3 &r) 15 | { 16 | return { 17 | l[0] * r[0] + l[1] * r[1] + l[2] * r[2], 18 | l[0] * r[3] + l[1] * r[4] + l[2] * r[5], 19 | l[0] * r[6] + l[1] * r[7] + l[2] * r[8], 20 | }; 21 | } 22 | 23 | void Mat4::dump() const 24 | { 25 | printf("[%f %f %f %f\n", data[0], data[4], data[8], data[12]); 26 | printf(" %f %f %f %f\n", data[1], data[5], data[9], data[13]); 27 | printf(" %f %f %f %f\n", data[2], data[6], data[10], data[14]); 28 | printf(" %f %f %f %f]\n", data[3], data[7], data[11], data[15]); 29 | } 30 | 31 | bool operator==(const Mat4 &l, const Mat4 &r) 32 | { 33 | return ( 34 | l[0] == r[0] && 35 | l[1] == r[1] && 36 | l[2] == r[2] && 37 | l[3] == r[3] && 38 | l[4] == r[4] && 39 | l[5] == r[5] && 40 | l[6] == r[6] && 41 | l[7] == r[7] && 42 | l[8] == r[8] && 43 | l[9] == r[9] && 44 | l[10] == r[10] && 45 | l[11] == r[11] && 46 | l[12] == r[12] && 47 | l[13] == r[13] && 48 | l[14] == r[14] && 49 | l[15] == r[15] 50 | ); 51 | } 52 | 53 | bool operator!=(const Mat4 &lhs, const Mat4 &rhs) 54 | { 55 | return !operator==(lhs, rhs); 56 | } 57 | 58 | Mat4 operator*(const Mat4 &l, const Mat4 &r) 59 | { 60 | Mat4 out; 61 | out[0] = l[0] * r[0] + l[4] * r[1] + l[8] * r[2] + l[12] * r[3]; 62 | out[1] = l[1] * r[0] + l[5] * r[1] + l[9] * r[2] + l[13] * r[3]; 63 | out[2] = l[2] * r[0] + l[6] * r[1] + l[10] * r[2] + l[14] * r[3]; 64 | out[3] = l[3] * r[0] + l[7] * r[1] + l[11] * r[2] + l[15] * r[3]; 65 | 66 | out[4] = l[0] * r[4] + l[4] * r[5] + l[8] * r[6] + l[12] * r[7]; 67 | out[5] = l[1] * r[4] + l[5] * r[5] + l[9] * r[6] + l[13] * r[7]; 68 | out[6] = l[2] * r[4] + l[6] * r[5] + l[10] * r[6] + l[14] * r[7]; 69 | out[7] = l[3] * r[4] + l[7] * r[5] + l[11] * r[6] + l[15] * r[7]; 70 | 71 | out[8] = l[0] * r[8] + l[4] * r[9] + l[8] * r[10] + l[12] * r[11]; 72 | out[9] = l[1] * r[8] + l[5] * r[9] + l[9] * r[10] + l[13] * r[11]; 73 | out[10] = l[2] * r[8] + l[6] * r[9] + l[10] * r[10] + l[14] * r[11]; 74 | out[11] = l[3] * r[8] + l[7] * r[9] + l[11] * r[10] + l[15] * r[11]; 75 | 76 | out[12] = l[0] * r[12] + l[4] * r[13] + l[8] * r[14] + l[12] * r[15]; 77 | out[13] = l[1] * r[12] + l[5] * r[13] + l[9] * r[14] + l[13] * r[15]; 78 | out[14] = l[2] * r[12] + l[6] * r[13] + l[10] * r[14] + l[14] * r[15]; 79 | out[15] = l[3] * r[12] + l[7] * r[13] + l[11] * r[14] + l[15] * r[15]; 80 | return out; 81 | } 82 | 83 | Mat4 operator+(const Mat4 &lhs, const Mat4 &rhs) 84 | { 85 | Mat4 out; 86 | out[0] = lhs[0] + rhs[0]; 87 | out[1] = lhs[1] + rhs[1]; 88 | out[2] = lhs[2] + rhs[2]; 89 | out[3] = lhs[3] + rhs[3]; 90 | out[4] = lhs[4] + rhs[4]; 91 | out[5] = lhs[5] + rhs[5]; 92 | out[6] = lhs[6] + rhs[6]; 93 | out[7] = lhs[7] + rhs[7]; 94 | out[8] = lhs[8] + rhs[8]; 95 | out[9] = lhs[9] + rhs[9]; 96 | out[10] = lhs[10] + rhs[10]; 97 | out[11] = lhs[11] + rhs[11]; 98 | out[12] = lhs[12] + rhs[12]; 99 | out[13] = lhs[13] + rhs[13]; 100 | out[14] = lhs[14] + rhs[14]; 101 | out[15] = lhs[15] + rhs[15]; 102 | return out; 103 | } 104 | 105 | Vec3f operator*(const Mat4 &l, const Vec3f &r) 106 | { 107 | return { 108 | l[0] * r[0] + l[4] * r[1] + l[8] * r[2] + l[12], 109 | l[1] * r[0] + l[5] * r[1] + l[9] * r[2] + l[13], 110 | l[2] * r[0] + l[6] * r[1] + l[10] * r[2] + l[14] 111 | }; 112 | } 113 | 114 | Vec3f operator*(const Vec3f &l, const Mat4 &r) 115 | { 116 | return { 117 | l[0] * r[0] + l[1] * r[1] + l[2] * r[2] + r[3], 118 | l[0] * r[4] + l[1] * r[5] + l[2] * r[6] + r[7], 119 | l[0] * r[8] + l[1] * r[9] + l[2] * r[10] + r[11], 120 | }; 121 | } 122 | 123 | // matrix l by column-vector r 124 | Vec4f operator*(const Mat4 &l, const Vec4f &r) 125 | { 126 | return { 127 | l[0] * r[0] + l[4] * r[1] + l[8] * r[2] + l[12] * r[3], 128 | l[1] * r[0] + l[5] * r[1] + l[9] * r[2] + l[13] * r[3], 129 | l[2] * r[0] + l[6] * r[1] + l[10] * r[2] + l[14] * r[3], 130 | l[3] * r[0] + l[7] * r[1] + l[11] * r[2] + l[15] * r[3] 131 | }; 132 | } 133 | 134 | // row-vector by matrix r 135 | Vec4f operator*(const Vec4f &l, const Mat4 &r) 136 | { 137 | return { 138 | l[0] * r[0] + l[1] * r[1] + l[2] * r[2] + l[3] * r[3], 139 | l[0] * r[4] + l[1] * r[5] + l[2] * r[6] + l[3] * r[7], 140 | l[0] * r[8] + l[1] * r[9] + l[2] * r[10] + l[3] * r[11], 141 | l[0] * r[12] + l[1] * r[13] + l[2] * r[14] + l[3] * r[15] 142 | }; 143 | } 144 | 145 | Mat4 Mat4_Rotate(const Vec3f &axis, float angle) 146 | { 147 | Mat4 m; 148 | float rad = angle * MATH_DEG_TO_RAD; 149 | float c = cosf(rad); 150 | float s = sinf(rad); 151 | Vec3f v = normalize(axis); 152 | float xx = v.x * v.x; 153 | float yy = v.y * v.y; 154 | float zz = v.z * v.z; 155 | float xy = v.x * v.y; 156 | float yz = v.y * v.z; 157 | float zx = v.z * v.x; 158 | float xs = v.x * s; 159 | float ys = v.y * s; 160 | float zs = v.z * s; 161 | m[0] = (1.0f - c) * xx + c; m[4] = (1.0f - c) * xy - zs; m[8] = (1.0f - c) * zx + ys; m[12] = 0.0f; 162 | m[1] = (1.0f - c) * xy + zs; m[5] = (1.0f - c) * yy + c; m[9] = (1.0f - c) * yz - xs; m[13] = 0.0f; 163 | m[2] = (1.0f - c) * zx - ys; m[6] = (1.0f - c) * yz + xs; m[10] = (1.0f - c) * zz + c; m[14] = 0.0f; 164 | m[3] = 0.0f; m[7] = 0.0f; m[11] = 0.0f; m[15] = 1.0f; 165 | 166 | return m; 167 | } 168 | 169 | Mat4 Mat4_RotateX(float angle) 170 | { 171 | Mat4 m; 172 | float rad = angle * MATH_DEG_TO_RAD; 173 | float c = cosf(rad); 174 | float s = sinf(rad); 175 | m[0] = 1.0f; m[4] = 0.0f; m[8] = 0.0f; m[12] = 0.0f; 176 | m[1] = 0.0f; m[5] = c; m[9] = -s; m[13] = 0.0f; 177 | m[2] = 0.0f; m[6] = s; m[10] = c; m[14] = 0.0f; 178 | m[3] = 0.0f; m[7] = 0.0f; m[11] = 0.0f; m[15] = 1.0f; 179 | 180 | return m; 181 | } 182 | 183 | Mat4 Mat4_RotateY(float angle) 184 | { 185 | Mat4 m; 186 | float rad = angle * MATH_DEG_TO_RAD; 187 | float c = cosf(rad); 188 | float s = sinf(rad); 189 | m[0] = c; m[4] = 0.0f; m[8] = s; m[12] = 0.0f; 190 | m[1] = 0.0f; m[5] = 1.0f; m[9] = 0.0f; m[13] = 0.0f; 191 | m[2] = -s; m[6] = 0.0f; m[10] = c; m[14] = 0.0f; 192 | m[3] = 0.0f; m[7] = 0.0f; m[11] = 0.0f; m[15] = 1.0f; 193 | 194 | return m; 195 | } 196 | 197 | Mat4 Mat4_RotateZ(float angle) 198 | { 199 | Mat4 m; 200 | float rad = angle * MATH_DEG_TO_RAD; 201 | float c = cosf(rad); 202 | float s = sinf(rad); 203 | m[0] = c; m[4] = -s; m[8] = 0.0f; m[12] = 0.0f; 204 | m[1] = s; m[5] = c; m[9] = 0.0f; m[13] = 0.0f; 205 | m[2] = 0.0f; m[6] = 0.0f; m[10] = 1.0f; m[14] = 0.0f; 206 | m[3] = 0.0f; m[7] = 0.0f; m[11] = 0.0f; m[15] = 1.0f; 207 | 208 | return m; 209 | } 210 | 211 | Mat4 Mat4_Scale(const Vec3f &v) 212 | { 213 | Mat4 m; 214 | m[0] = v.x; m[4] = 0.0f; m[8] = 0.0f; m[12] = 0.0f; 215 | m[1] = 0.0f; m[5] = v.y; m[9] = 0.0f; m[13] = 0.0f; 216 | m[2] = 0.0f; m[6] = 0.0f; m[10] = v.z; m[14] = 0.0f; 217 | m[3] = 0.0f; m[7] = 0.0f; m[11] = 0.0f; m[15] = 1.0f; 218 | 219 | return m; 220 | } 221 | 222 | Mat4 Mat4_Scale(float f) 223 | { 224 | return Mat4_Scale(Vec3f(f)); 225 | } 226 | 227 | Mat4 Mat4_Translate(const Vec3f &v) 228 | { 229 | Mat4 m; 230 | m[0] = 1.0f; m[4] = 0.0f; m[8] = 0.0f; m[12] = v.x; 231 | m[1] = 0.0f; m[5] = 1.0f; m[9] = 0.0f; m[13] = v.y; 232 | m[2] = 0.0f; m[6] = 0.0f; m[10] = 1.0f; m[14] = v.z; 233 | m[3] = 0.0f; m[7] = 0.0f; m[11] = 0.0f; m[15] = 1.0f; 234 | 235 | return m; 236 | } 237 | 238 | Mat4 Mat4_Perspective(float fov, float aspect, float znear, float zfar) 239 | { 240 | Mat4 m; 241 | float y = std::tan(fov * MATH_DEG_TO_RAD / 2); 242 | float x = y * aspect; 243 | 244 | m[0] = 1.0f / x; 245 | m[1] = 0.0f; 246 | m[2] = 0.0f; 247 | m[3] = 0.0f; 248 | 249 | m[4] = 0.0f; 250 | m[5] = 1.0f / y; 251 | m[6] = 0.0f; 252 | m[7] = 0.0f; 253 | 254 | m[8] = 0.0f; 255 | m[9] = 0.0f; 256 | m[10] = -(zfar + znear) / (zfar - znear); 257 | m[11] = -1.0f; 258 | 259 | m[12] = 0.0f; 260 | m[13] = 0.0f; 261 | m[14] = -(2.0f * zfar * znear) / (zfar - znear); 262 | m[15] = 0.0f; 263 | 264 | return m; 265 | } 266 | 267 | Mat4 Mat4_Ortho(float left, float right, float bottom, float top, float znear, float zfar) 268 | { 269 | Mat4 m; 270 | float x = 2.0f / (right - left); 271 | float y = 2.0f / (top - bottom); 272 | float z = -2.0f / (zfar - znear); 273 | float tx = - ((right + left) / (right - left)); 274 | float ty = - ((top + bottom) / (top - bottom)); 275 | float tz = - ((zfar + znear) / (zfar - znear)); 276 | 277 | m[0] = x; 278 | m[1] = 0.0f; 279 | m[2] = 0.0f; 280 | m[3] = 0.0f; 281 | 282 | m[4] = 0.0f; 283 | m[5] = y; 284 | m[6] = 0.0f; 285 | m[7] = 0.0f; 286 | 287 | m[8] = 0.0f; 288 | m[9] = 0.0f; 289 | m[10] = z; 290 | m[11] = 0.0f; 291 | 292 | m[12] = tx; 293 | m[13] = ty; 294 | m[14] = tz; 295 | m[15] = 1.0f; 296 | 297 | return m; 298 | } 299 | 300 | Mat4 Mat4_LookAt(const Vec3f &eye, const Vec3f ¢er, const Vec3f &up) 301 | { 302 | Mat4 m; 303 | Vec3f n,u,s; 304 | n = normalize(eye - center); 305 | s = normalize(cross(up, n)); 306 | u = normalize(cross(n, s)); 307 | 308 | m[0] = s.x; m[4] = s.y; m[8] = s.z; m[12] = 0.0f; 309 | m[1] = u.x; m[5] = u.y; m[9] = u.z; m[13] = 0.0f; 310 | m[2] = n.x; m[6] = n.y; m[10] = n.z; m[14] = 0.0f; 311 | m[3] = 0.0f; m[7] = 0.0f; m[11] = 0.0f; m[15] = 1.0f; 312 | return m * Mat4_Translate(-eye); 313 | } 314 | 315 | Mat4 transpose(const Mat4 &m) 316 | { 317 | Mat4 r; 318 | r[0] = m[0]; r[4] = m[1]; r[8] = m[2]; r[12] = m[3]; 319 | r[1] = m[4]; r[5] = m[5]; r[9] = m[6]; r[13] = m[7]; 320 | r[2] = m[8]; r[6] = m[9]; r[10] = m[10]; r[14] = m[11]; 321 | r[3] = m[12]; r[7] = m[13]; r[11] = m[14]; r[15] = m[15]; 322 | return r; 323 | } 324 | 325 | float determinant(const Mat4 &m) 326 | { 327 | float d; 328 | d = m[0] * (m[5] * (m[10] * m[15] - m[14] * m[11]) - m[9] * (m[6] * m[15] - m[14] * m[7]) + m[13] * (m[6] * m[11] - m[10] * m[7])); 329 | d -= m[4] * (m[1] * (m[10] * m[15] - m[14] * m[11]) - m[9] * (m[2] * m[15] - m[14] * m[3]) + m[13] * (m[2] * m[11] - m[10] * m[3])); 330 | d += m[8] * (m[1] * (m[6] * m[15] - m[14] * m[7]) - m[5] * (m[2] * m[15] - m[14] * m[3]) + m[13] * (m[2] * m[7] - m[6] * m[3])); 331 | d -= m[12] * (m[1] * (m[6] * m[11] - m[10] * m[7]) - m[5] * (m[2] * m[11] - m[10] * m[3]) + m[9] * (m[2] * m[7] - m[6] * m[3])); 332 | return d; 333 | } 334 | 335 | Mat4 inverse(const Mat4 &m, bool *inversed) 336 | { 337 | Mat4 r; 338 | float d = determinant(m); 339 | if (d < MATH_EPSILON) { 340 | if (inversed) 341 | *inversed = false; 342 | return r; 343 | } 344 | float id = 1.0f / d; 345 | r[0] = (m[5] * (m[10] * m[15] - m[14] * m[11]) - m[9] * (m[6] * m[15] - m[14] * m[7]) + m[13] * (m[6] * m[11] - m[10] * m[7])) * id; 346 | r[1] = -(m[1] * (m[10] * m[15] - m[14] * m[11]) - m[9] * (m[2] * m[15] - m[14] * m[3]) + m[13] * (m[2] * m[11] - m[10] * m[3])) * id; 347 | r[2] = (m[1] * (m[6] * m[15] - m[14] * m[7]) - m[5] * (m[2] * m[15] - m[14] * m[3]) + m[13] * (m[2] * m[7] - m[6] * m[3])) * id; 348 | r[3] = -(m[1] * (m[6] * m[11] - m[10] * m[7]) - m[5] * (m[2] * m[11] - m[10] * m[3]) + m[9] * (m[2] * m[7] - m[6] * m[3])) * id; 349 | r[4] = -(m[4] * (m[10] * m[15] - m[14] * m[11]) - m[8] * (m[6] * m[15] - m[14] * m[7]) + m[12] * (m[6] * m[11] - m[10] * m[7])) * id; 350 | r[5] = (m[0] * (m[10] * m[15] - m[14] * m[11]) - m[8] * (m[2] * m[15] - m[14] * m[3]) + m[12] * (m[2] * m[11] - m[10] * m[3])) * id; 351 | r[6] = -(m[0] * (m[6] * m[15] - m[14] * m[7]) - m[4] * (m[2] * m[15] - m[14] * m[3]) + m[12] * (m[2] * m[7] - m[6] * m[3])) * id; 352 | r[7] = (m[0] * (m[6] * m[11] - m[10] * m[7]) - m[4] * (m[2] * m[11] - m[10] * m[3]) + m[8] * (m[2] * m[7] - m[6] * m[3])) * id; 353 | r[8] = (m[4] * (m[9] * m[15] - m[13] * m[11]) - m[8] * (m[5] * m[15] - m[13] * m[7]) + m[12] * (m[5] * m[11] - m[9] * m[7])) * id; 354 | r[9] = -(m[0] * (m[9] * m[15] - m[13] * m[11]) - m[8] * (m[1] * m[15] - m[13] * m[3]) + m[12] * (m[1] * m[11] - m[9] * m[3])) * id; 355 | r[10] = (m[0] * (m[5] * m[15] - m[13] * m[7]) - m[4] * (m[1] * m[15] - m[13] * m[3]) + m[12] * (m[1] * m[7] - m[5] * m[3])) * id; 356 | r[11] = -(m[0] * (m[5] * m[11] - m[9] * m[7]) - m[4] * (m[1] * m[11] - m[9] * m[3]) + m[8] * (m[1] * m[7] - m[5] * m[3])) * id; 357 | r[12] = -(m[4] * (m[9] * m[14] - m[13] * m[10]) - m[8] * (m[5] * m[14] - m[13] * m[6]) + m[12] * (m[5] * m[10] - m[9] * m[6])) * id; 358 | r[13] = (m[0] * (m[9] * m[14] - m[13] * m[10]) - m[8] * (m[1] * m[14] - m[13] * m[2]) + m[12] * (m[1] * m[10] - m[9] * m[2])) * id; 359 | r[14] = -(m[0] * (m[5] * m[14] - m[13] * m[6]) - m[4] * (m[1] * m[14] - m[13] * m[2]) + m[12] * (m[1] * m[6] - m[5] * m[2])) * id; 360 | r[15] = (m[0] * (m[5] * m[10] - m[9] * m[6]) - m[4] * (m[1] * m[10] - m[9] * m[2]) + m[8] * (m[1] * m[6] - m[5] * m[2])) * id; 361 | if (inversed) 362 | *inversed = true; 363 | return r; 364 | } 365 | 366 | Vec4f Vec4_MiniOrtho2D(float left, float right, float bottom, float top, const Vec2f &offset) 367 | { 368 | float x = 2.0f / (right - left); 369 | float y = 2.0f / (top - bottom); 370 | float tx = -((right + left) / (right - left)) + offset.x * x; 371 | float ty = -((top + bottom) / (top - bottom)) + offset.y * y; 372 | return {x, y, tx, ty}; 373 | } 374 | 375 | Vec4f mini_ortho_translate(const Vec4f &miniortho, const Vec2f &offset) 376 | { 377 | return { 378 | miniortho.x, 379 | miniortho.y, 380 | miniortho.z + miniortho.x * offset.x, 381 | miniortho.w + miniortho.y * offset.y 382 | }; 383 | } 384 | 385 | Vec4f Vec4_MiniPerspective3D(float vfov, float aspect, float znear, float zfar) 386 | { 387 | float v = std::tan(vfov * MATH_DEG_TO_RAD); 388 | float h = v * aspect; 389 | return { 390 | 1.0f / v, 391 | 1.0f / h, 392 | -(zfar + znear) / (zfar - znear), 393 | -(2.0f * zfar * znear) / (zfar - znear) 394 | }; 395 | } 396 | 397 | Mat4 to_mat4(const Mat3 &m) 398 | { 399 | return Mat4( 400 | m[0], m[1], m[2], 0, 401 | m[3], m[4], m[5], 0, 402 | m[6], m[7], m[8], 0, 403 | 0, 0, 0, 1 404 | ); 405 | } 406 | 407 | Mat3 to_mat3(const Mat4 &m) 408 | { 409 | return Mat3( 410 | m[0], m[1], m[2], 411 | m[4], m[5], m[6], 412 | m[8], m[9], m[10] 413 | ); 414 | } 415 | -------------------------------------------------------------------------------- /Math/Mat.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Math/Vec.h" 4 | 5 | //------------------------------------------------------------------------------ 6 | // Mat3 (column-major OpenGL-style) 7 | //------------------------------------------------------------------------------ 8 | 9 | struct Mat3 { 10 | // m[row][column] 11 | union { 12 | struct { 13 | float m11, m21, m31; // first column 14 | float m12, m22, m32; // second column 15 | float m13, m23, m33; // third column 16 | }; 17 | float data[9]; 18 | }; 19 | 20 | Mat3() = default; 21 | 22 | Mat3( 23 | float a11, float a21, float a31, 24 | float a12, float a22, float a32, 25 | float a13, float a23, float a33 26 | ): 27 | m11(a11), m21(a21), m31(a31), 28 | m12(a12), m22(a22), m32(a32), 29 | m13(a13), m23(a23), m33(a33) 30 | {} 31 | 32 | explicit Mat3(const float *n): 33 | m11(n[0]), m21(n[1]), m31(n[2]), 34 | m12(n[3]), m22(n[4]), m32(n[5]), 35 | m13(n[6]), m23(n[7]), m33(n[8]) 36 | {} 37 | 38 | float &operator[](int i) { return data[i]; } 39 | float operator[](int i) const { return data[i]; } 40 | }; 41 | 42 | Vec3f operator*(const Mat3 &lhs, const Vec3f &rhs); 43 | Vec3f operator*(const Vec3f &lhs, const Mat3 &rhs); 44 | 45 | //------------------------------------------------------------------------------ 46 | // Mat4 (column-major OpenGL-style) 47 | //------------------------------------------------------------------------------ 48 | 49 | struct Mat4 { 50 | // m[row][column] 51 | // X axis is stored at m11 m21 m31 or m[0] m[1] m[2] 52 | // Y axis is stored at m12 m22 m32 or m[4] m[5] m[6] 53 | // Z axis is stored at m13 m23 m33 or m[8] m[9] m[10] 54 | // translation is stored at m14 m24 m34 or m[12] m[13] m[14] 55 | 56 | union { 57 | struct { 58 | float m11, m21, m31, m41; // first column 59 | float m12, m22, m32, m42; // second column 60 | float m13, m23, m33, m43; // third column 61 | float m14, m24, m34, m44; // fourth column 62 | }; 63 | float data[16]; 64 | }; 65 | 66 | Mat4() = default; 67 | 68 | Mat4( 69 | float a11, float a21, float a31, float a41, 70 | float a12, float a22, float a32, float a42, 71 | float a13, float a23, float a33, float a43, 72 | float a14, float a24, float a34, float a44 73 | ): 74 | m11(a11), m21(a21), m31(a31), m41(a41), 75 | m12(a12), m22(a22), m32(a32), m42(a42), 76 | m13(a13), m23(a23), m33(a33), m43(a43), 77 | m14(a14), m24(a24), m34(a34), m44(a44) 78 | {} 79 | 80 | explicit Mat4(const float *n): 81 | m11(n[0]), m21(n[1]), m31(n[2]), m41(n[3]), 82 | m12(n[4]), m22(n[5]), m32(n[6]), m42(n[7]), 83 | m13(n[8]), m23(n[9]), m33(n[10]), m43(n[11]), 84 | m14(n[12]), m24(n[13]), m34(n[14]), m44(n[15]) 85 | {} 86 | 87 | float &operator[](int i) { return data[i]; } 88 | float operator[](int i) const { return data[i]; } 89 | 90 | void dump() const; 91 | }; 92 | 93 | static inline Mat4 Mat4_Identity() 94 | { 95 | return { 96 | 1, 0, 0, 0, 97 | 0, 1, 0, 0, 98 | 0, 0, 1, 0, 99 | 0, 0, 0, 1 100 | }; 101 | } 102 | 103 | static inline Mat4 Mat4_Zero() 104 | { 105 | return { 106 | 0, 0, 0, 0, 107 | 0, 0, 0, 0, 108 | 0, 0, 0, 0, 109 | 0, 0, 0, 0 110 | }; 111 | } 112 | 113 | static inline Mat4 Mat4_YZSwap() 114 | { 115 | return { 116 | 1, 0, 0, 0, 117 | 0, 0, -1, 0, 118 | 0, 1, 0, 0, 119 | 0, 0, 0, 1 120 | }; 121 | } 122 | 123 | bool operator==(const Mat4 &lhs, const Mat4 &rhs); 124 | bool operator!=(const Mat4 &lhs, const Mat4 &rhs); 125 | Mat4 operator*(const Mat4 &lhs, const Mat4 &rhs); 126 | Mat4 operator+(const Mat4 &lhs, const Mat4 &rhs); 127 | Vec3f operator*(const Mat4 &lhs, const Vec3f &rhs); 128 | Vec3f operator*(const Vec3f &lhs, const Mat4 &rhs); 129 | Vec4f operator*(const Mat4 &lhs, const Vec4f &rhs); 130 | Vec4f operator*(const Vec4f &lhs, const Mat4 &rhs); 131 | 132 | // Represents rotation around `axis` for `angle` (degrees), CCW if `axis` points 133 | // at viewer and the coordinate system is RH. 134 | Mat4 Mat4_Rotate(const Vec3f &axis, float angle); 135 | Mat4 Mat4_RotateX(float angle); // same as Mat4_Rotate(Vec3f_X(), angle) 136 | Mat4 Mat4_RotateY(float angle); // same as Mat4_Rotate(Vec3f_Y(), angle) 137 | Mat4 Mat4_RotateZ(float angle); // same as Mat4_Rotate(Vec3f_Z(), angle) 138 | Mat4 Mat4_Scale(const Vec3f &v); 139 | Mat4 Mat4_Scale(float f); 140 | Mat4 Mat4_Translate(const Vec3f &v); 141 | Mat4 Mat4_Perspective(float fov, float aspect, float znear, float zfar); 142 | Mat4 Mat4_Ortho(float left, float right, float bottom, float top, 143 | float znear = -1.0f, float zfar = 1.0f); 144 | Mat4 Mat4_LookAt(const Vec3f &eye, const Vec3f ¢er, const Vec3f &up); 145 | 146 | Mat4 transpose(const Mat4 &m); 147 | float determinant(const Mat4 &m); 148 | Mat4 inverse(const Mat4 &m, bool *inversed = nullptr); 149 | 150 | // MiniOrtho2D contains scale components at xy and translation at zw, to 151 | // transform the vertex do this: v * m.xy + m.zw. 152 | Vec4f Vec4_MiniOrtho2D(float left, float right, float bottom, float top, 153 | const Vec2f &offset = Vec2f(0)); 154 | Vec4f mini_ortho_translate(const Vec4f &miniortho, const Vec2f &offset); 155 | 156 | // MiniPerspective3D contains scale in xyz and z-offset in w, to transform the 157 | // vertex do this: Vec4(v.xy * m.xy, v.z * m.z + m.w, -v.z) 158 | Vec4f Vec4_MiniPerspective3D(float vfov, float aspect, float znear, float zfar); 159 | 160 | Mat4 to_mat4(const Mat3 &m); 161 | Mat3 to_mat3(const Mat4 &m); 162 | -------------------------------------------------------------------------------- /Math/Plane.cpp: -------------------------------------------------------------------------------- 1 | #include "Math/Plane.h" 2 | 3 | PlaneSide Plane::side(const Vec3f &point) const 4 | { 5 | if ((dot(n, point) + d) >= 0.0f) 6 | return PS_FRONT; 7 | 8 | return PS_BACK; 9 | } 10 | 11 | PlaneSide Plane::side(const Vec3f &min, const Vec3f &max) const 12 | { 13 | Vec3f near(max); 14 | Vec3f far(min); 15 | 16 | if (n.x > 0) { 17 | near.x = min.x; 18 | far.x = max.x; 19 | } 20 | 21 | if (n.y > 0) { 22 | near.y = min.y; 23 | far.y = max.y; 24 | } 25 | 26 | if (n.z > 0) { 27 | near.z = min.z; 28 | far.z = max.z; 29 | } 30 | 31 | if (dot(n, near) + d > 0) 32 | return PS_FRONT; 33 | 34 | if (dot(n, far) + d > 0) 35 | return PS_BOTH; 36 | 37 | return PS_BACK; 38 | } 39 | -------------------------------------------------------------------------------- /Math/Plane.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Math/Vec.h" 4 | #include "Math/Sphere.h" 5 | 6 | enum PlaneSide { 7 | PS_FRONT, 8 | PS_BACK, 9 | PS_BOTH, 10 | }; 11 | 12 | struct Plane { 13 | Plane() = default; 14 | 15 | // the normal should be normalized 16 | Plane(const Vec3f &origin, const Vec3f &normal): n(normal), d(-dot(n, origin)) {} 17 | Plane(const Vec3f &v1, const Vec3f &v2, const Vec3f &v3) 18 | { 19 | n = normalize(cross(v2 - v1, v3 - v1)); 20 | d = -dot(n, v1); 21 | } 22 | 23 | bool operator==(const Plane &r) const { return n == r.n && d == r.d; } 24 | bool operator!=(const Plane &r) const { return n != r.n || d != r.d; } 25 | 26 | PlaneSide side(const Vec3f &point) const; 27 | PlaneSide side(const Vec3f &min, const Vec3f &max) const; 28 | 29 | Vec3f n; 30 | float d; 31 | }; 32 | -------------------------------------------------------------------------------- /Math/Quat.cpp: -------------------------------------------------------------------------------- 1 | #include "Math/Quat.h" 2 | 3 | Quat::Quat(const Vec3f &dir, float angle) 4 | { 5 | const float halfangle = angle * MATH_DEG_TO_RAD / 2.0f; 6 | const float sinangle = sinf(halfangle); 7 | x = dir.x * sinangle; 8 | y = dir.y * sinangle; 9 | z = dir.z * sinangle; 10 | w = cosf(halfangle); 11 | } 12 | 13 | Quat::Quat(const Vec3f &u, const Vec3f &v) 14 | { 15 | Vec3f w = cross(u, v); 16 | float len2 = length2(w); 17 | float real = dot(u, v); 18 | if (len2 < MATH_EPSILON && real < 0) { 19 | w = std::abs(u.x) > std::abs(u.z) ? 20 | Vec3f(-u.y, u.x, 0) / Vec3f(length(u.XY())) : 21 | Vec3f(0, -u.z, u.y) / Vec3f(length(u.YZ())); 22 | this->x = w.x; 23 | this->y = w.y; 24 | this->z = w.z; 25 | this->w = 0; 26 | return; 27 | } 28 | 29 | real += std::sqrt(real * real + len2); 30 | float ilen = 1 / std::sqrt(real * real + len2); 31 | this->x = w.x * ilen; 32 | this->y = w.y * ilen; 33 | this->z = w.z * ilen; 34 | this->w = real * ilen; 35 | } 36 | 37 | Vec3f Quat::rotate(const Vec3f &v) const 38 | { 39 | /* 40 | // Alternative implementation, don't know which one is faster. 41 | // TODO: measure both 42 | const Vec3 xyz(x, y, z); 43 | const Vec3 t = Vec3(2) * Cross(xyz, v); 44 | return v + Vec3(w) * t + Cross(xyz, t); 45 | */ 46 | const Vec3f xyz(x, y, z); 47 | return v + Vec3f(2) * cross(xyz, cross(xyz, v) + Vec3f(w) * v); 48 | } 49 | 50 | Quat normalize(const Quat &q) 51 | { 52 | float il = 1.0f / (q.x*q.x + q.y*q.y + q.z*q.z + q.w*q.w); 53 | return {q.x * il, q.y * il, q.z * il, q.w * il}; 54 | } 55 | 56 | Quat slerp(const Quat &q0, const Quat &q1, float t) 57 | { 58 | float k0, k1, cosomega = q0.x * q1.x + q0.y * q1.y + q0.z * q1.z + q0.w * q1.w; 59 | Quat q; 60 | if (cosomega < 0.0f) { 61 | cosomega = -cosomega; 62 | q.x = -q1.x; 63 | q.y = -q1.y; 64 | q.z = -q1.z; 65 | q.w = -q1.w; 66 | } else { 67 | q.x = q1.x; 68 | q.y = q1.y; 69 | q.z = q1.z; 70 | q.w = q1.w; 71 | } 72 | if (1.0f - cosomega > MATH_EPSILON) { 73 | float omega = acosf(cosomega); 74 | float sinomega = sinf(omega); 75 | k0 = sinf((1.0f - t) * omega) / sinomega; 76 | k1 = sinf(t * omega) / sinomega; 77 | } else { 78 | k0 = 1.0f - t; 79 | k1 = t; 80 | } 81 | 82 | return { 83 | q0.x * k0 + q.x * k1, 84 | q0.y * k0 + q.y * k1, 85 | q0.z * k0 + q.z * k1, 86 | q0.w * k0 + q.w * k1 87 | }; 88 | } 89 | 90 | Quat to_quat(const Mat4 &m) 91 | { 92 | Quat q; 93 | float trace = m[0] + m[5] + m[10]; 94 | if (trace > 0.0f) { 95 | float s = sqrtf(trace + 1.0f); 96 | q[3] = 0.5f * s; 97 | s = 0.5f / s; 98 | q[0] = (m[6] - m[9]) * s; 99 | q[1] = (m[8] - m[2]) * s; 100 | q[2] = (m[1] - m[4]) * s; 101 | } else { 102 | static const int next[3] = { 1, 2, 0 }; 103 | int i = 0; 104 | if (m[5] > m[0]) i = 1; 105 | if (m[10] > m[4 * i + i]) i = 2; 106 | int j = next[i]; 107 | int k = next[j]; 108 | float s = sqrtf(m[4 * i + i] - m[4 * j + j] - m[4 * k + k] + 1.0f); 109 | q[i] = 0.5f * s; 110 | if (s != 0) s = 0.5f / s; 111 | q[3] = (m[4 * j + k] - m[4 * k + j]) * s; 112 | q[j] = (m[4 * i + j] + m[4 * j + i]) * s; 113 | q[k] = (m[4 * i + k] + m[4 * k + i]) * s; 114 | } 115 | return q; 116 | } 117 | 118 | Mat3 to_mat3(const Quat &q) 119 | { 120 | Mat3 r; 121 | const float x2 = q.x + q.x; 122 | const float y2 = q.y + q.y; 123 | const float z2 = q.z + q.z; 124 | const float xx = q.x * x2; 125 | const float yy = q.y * y2; 126 | const float zz = q.z * z2; 127 | const float xy = q.x * y2; 128 | const float yz = q.y * z2; 129 | const float xz = q.z * x2; 130 | const float wx = q.w * x2; 131 | const float wy = q.w * y2; 132 | const float wz = q.w * z2; 133 | r[0] = 1.0f - (yy + zz); r[3] = xy - wz; r[6] = xz + wy; 134 | r[1] = xy + wz; r[4] = 1.0f - (xx + zz); r[7] = yz - wx; 135 | r[2] = xz - wy; r[5] = yz + wx; r[8] = 1.0f - (xx + yy); 136 | return r; 137 | } 138 | 139 | Mat4 to_mat4(const Quat &q) 140 | { 141 | return to_mat4(to_mat3(q)); 142 | } 143 | 144 | Quat Quat_LookAt(const Vec3f &v) 145 | { 146 | const Vec3f unit = -Vec3f_Z(); 147 | const Vec3f around_y = normalize(Vec3f(v.x, 0, v.z)); 148 | const Quat xq(unit, around_y); 149 | const Quat yq(around_y, v); 150 | return yq * xq; 151 | } 152 | -------------------------------------------------------------------------------- /Math/Quat.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Math/Vec.h" 4 | #include "Math/Mat.h" 5 | 6 | //------------------------------------------------------------------------------ 7 | // Quat 8 | //------------------------------------------------------------------------------ 9 | 10 | struct Quat { 11 | union { 12 | struct { 13 | float x, y, z, w; 14 | }; 15 | float data[4]; 16 | }; 17 | 18 | Quat() = default; 19 | Quat(float x, float y, float z, float w): x(x), y(y), z(z), w(w) {} 20 | Quat(float x, float y, float z): x(x), y(y), z(z) { reconstruct_w(); } 21 | 22 | // dir should be normalized, angle is in degrees 23 | // Represents CCW rotation around dir, when dir points at viewer and 24 | // coordinate system is RH. 25 | Quat(const Vec3f &dir, float angle); 26 | 27 | // both u and v should be normalized 28 | Quat(const Vec3f &u, const Vec3f &v); 29 | 30 | // rotates the vector 'v' by quaternion 31 | Vec3f rotate(const Vec3f &v) const; 32 | 33 | void reconstruct_w() { w = std::sqrt(std::abs(1.0f - (x*x + y*y + z*z))); } 34 | 35 | Vec3f compress() const { return w < 0.0 ? Vec3f(-x, -y, -z) : Vec3f(x, y, z); } 36 | 37 | float &operator[](int i) { return data[i]; } 38 | float operator[](int i) const { return data[i]; } 39 | Quat operator-() const { return Quat(-x, -y, -z, -w); } 40 | }; 41 | 42 | static inline bool operator==(const Quat &l, const Quat &r) 43 | { 44 | return l.x == r.x && l.y == r.y && l.z == r.z && l.w == r.w; 45 | } 46 | 47 | static inline bool operator!=(const Quat &l, const Quat &r) 48 | { 49 | return l.x != r.x || l.y != r.y || l.z != r.z || l.w != r.w; 50 | } 51 | 52 | // Multiplying l with r applies the rotation r to l 53 | static inline Quat operator*(const Quat &l, const Quat &r) 54 | { 55 | return { 56 | l.w * r.x + l.x * r.w + l.y * r.z - l.z * r.y, 57 | l.w * r.y + l.y * r.w + l.z * r.x - l.x * r.z, 58 | l.w * r.z + l.z * r.w + l.x * r.y - l.y * r.x, 59 | l.w * r.w - l.x * r.x - l.y * r.y - l.z * r.z 60 | }; 61 | } 62 | 63 | Quat normalize(const Quat &q); 64 | Quat slerp(const Quat &q0, const Quat &q1, float t); 65 | 66 | Quat to_quat(const Mat4 &m); 67 | Mat3 to_mat3(const Quat &q); 68 | Mat4 to_mat4(const Quat &q); 69 | 70 | static inline Quat inverse(const Quat &q) { return {-q.x, -q.y, -q.z, q.w}; } 71 | 72 | static inline Quat Quat_Identity() { return {0, 0, 0, 1}; } 73 | Quat Quat_LookAt(const Vec3f &dir); 74 | -------------------------------------------------------------------------------- /Math/Sphere.cpp: -------------------------------------------------------------------------------- 1 | #include "Math/Sphere.h" 2 | 3 | Sphere transform(const Sphere &in, const Transform &tr) 4 | { 5 | Sphere out = in; 6 | out.center = transform(in.center, tr); 7 | return out; 8 | } 9 | -------------------------------------------------------------------------------- /Math/Sphere.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Math/Vec.h" 4 | #include "Math/Transform.h" 5 | 6 | struct Sphere { 7 | Vec3f center; 8 | float radius; 9 | 10 | Sphere() = default; 11 | Sphere(const Vec3f ¢er, float radius): center(center), radius(radius) {} 12 | 13 | float diameter() const { return 2 * radius; } 14 | }; 15 | 16 | Sphere transform(const Sphere &in, const Transform &tr); 17 | -------------------------------------------------------------------------------- /Math/Transform.cpp: -------------------------------------------------------------------------------- 1 | #include "Math/Transform.h" 2 | 3 | Mat4 to_mat4(const Transform &tf) 4 | { 5 | return to_mat4(tf.orientation) * Mat4_Translate(tf.translation); 6 | } 7 | 8 | Transform inverse(const Transform &tf) 9 | { 10 | return {inverse(tf.orientation), -tf.translation}; 11 | } 12 | 13 | Vec3f transform(const Vec3f &in, const Transform &tr) 14 | { 15 | return tr.orientation.rotate(in) + tr.translation; 16 | } 17 | 18 | Transform transform(const Transform &in, const Transform &tr) 19 | { 20 | Transform out; 21 | out.translation = tr.translation + tr.orientation.rotate(in.translation); 22 | out.orientation = in.orientation * tr.orientation; 23 | return out; 24 | } 25 | -------------------------------------------------------------------------------- /Math/Transform.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Math/Vec.h" 4 | #include "Math/Quat.h" 5 | 6 | struct Transform { 7 | Quat orientation = Quat_Identity(); 8 | Vec3f translation = Vec3f(0); 9 | float _pad; 10 | 11 | Transform() = default; 12 | Transform(const Quat &orientation, const Vec3f &translation): 13 | orientation(orientation), translation(translation) 14 | { 15 | } 16 | 17 | explicit Transform(const Quat &orientation): orientation(orientation) {} 18 | explicit Transform(const Vec3f &translation): translation(translation) {} 19 | }; 20 | 21 | Transform inverse(const Transform &tf); 22 | Mat4 to_mat4(const Transform &tf); 23 | 24 | Vec3f transform(const Vec3f &in, const Transform &tr); 25 | Transform transform(const Transform &in, const Transform &tr); 26 | -------------------------------------------------------------------------------- /Math/Utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | //------------------------------------------------------------------------------ 4 | // Utility functions 5 | //------------------------------------------------------------------------------ 6 | 7 | template 8 | static inline T lerp(const T &a, const T &b, float v) 9 | { 10 | return a * (1 - v) + b * v; 11 | } 12 | 13 | template 14 | static inline T clamp(const T &value, const T &min, const T &max) 15 | { 16 | if (value > max) 17 | return max; 18 | if (value < min) 19 | return min; 20 | return value; 21 | } 22 | 23 | template 24 | static inline const T &max(const T &v1, const T &v2) 25 | { 26 | return (v1 > v2) ? v1 : v2; 27 | } 28 | 29 | // returns 0 if v1 > v2 or 1 otherwise 30 | template 31 | static inline int max_i(const T &v1, const T &v2) 32 | { 33 | return (v1 > v2) ? 0 : 1; 34 | } 35 | 36 | template 37 | static inline const T &min(const T &v1, const T &v2) 38 | { 39 | return (v1 < v2) ? v1 : v2; 40 | } 41 | 42 | // returns 0 if v1 < v2 or 1 otherwise 43 | template 44 | static inline int min_i(const T &v1, const T &v2) 45 | { 46 | return (v1 < v2) ? 0 : 1; 47 | } 48 | 49 | template 50 | static inline const T &min3(const T &v1, const T &v2, const T &v3) 51 | { 52 | return min(v1, min(v2, v3)); 53 | } 54 | 55 | template 56 | static inline const T &max3(const T &v1, const T &v2, const T &v3) 57 | { 58 | return max(v1, max(v2, v3)); 59 | } 60 | 61 | template 62 | static inline int min3_i(const T &v1, const T &v2, const T &v3) 63 | { 64 | const T *vs[] = {&v1, &v2, &v3}; 65 | int min = 0; 66 | for (int i = 1; i < 3; i++) { 67 | if (*vs[i] < *vs[min]) 68 | min = i; 69 | } 70 | return min; 71 | } 72 | 73 | template 74 | static inline int max3_i(const T &v1, const T &v2, const T &v3) 75 | { 76 | const T *vs[] = {&v1, &v2, &v3}; 77 | int max = 0; 78 | for (int i = 1; i < 3; i++) { 79 | if (*vs[i] > *vs[max]) 80 | max = i; 81 | } 82 | return max; 83 | } 84 | 85 | static inline int next_power_of_2(int v) 86 | { 87 | v -= 1; 88 | v |= v >> 1; 89 | v |= v >> 2; 90 | v |= v >> 4; 91 | v |= v >> 8; 92 | v |= v >> 16; 93 | return v + 1; 94 | } 95 | 96 | static inline int floor_div(int a, int b) 97 | { 98 | int q = a / b; 99 | int r = a % b; 100 | if (r != 0 && ((r < 0) != (b < 0))) q--; 101 | return q; 102 | } 103 | 104 | //------------------------------------------------------------------------------ 105 | // Constants 106 | //------------------------------------------------------------------------------ 107 | 108 | const float MATH_PI = 3.14159265359f; 109 | const float MATH_2PI = MATH_PI * 2.0f; 110 | const float MATH_HALF_PI = MATH_PI / 2.0f; 111 | const float MATH_DEG_TO_RAD = MATH_PI / 180.0f; 112 | const float MATH_RAD_TO_DEG = 180.0f / MATH_PI; 113 | const float MATH_EPSILON = 1e-6f; 114 | -------------------------------------------------------------------------------- /Math/Vec.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include "Math/Utils.h" 7 | 8 | //------------------------------------------------------------------------------ 9 | // Vec2 10 | //------------------------------------------------------------------------------ 11 | 12 | 13 | #define _DEFINE_VEC2_NO_FUNCTIONS(type, Vec2) 14 | #define _DEFINE_VEC2_NO_MEMBERS(type, Vec2) 15 | 16 | 17 | #define _DEFINE_VEC2_INT_MEMBERS(type, Vec2) \ 18 | Vec2 &operator|=(const Vec2 &r) { x|=r.x; y|=r.y; return *this; } \ 19 | Vec2 &operator&=(const Vec2 &r) { x&=r.x; y&=r.y; return *this; } \ 20 | Vec2 &operator^=(const Vec2 &r) { x^=r.x; y^=r.y; return *this; } \ 21 | Vec2 &operator%=(const Vec2 &r) { x%=r.x; y%=r.y; return *this; } \ 22 | 23 | 24 | #define _DEFINE_VEC2_INT_FUNCTIONS(type, Vec2) \ 25 | static inline Vec2 floor_div(const Vec2 &a, const Vec2 &b) { return Vec2(floor_div(a.x, b.x), floor_div(a.y, b.y)); } \ 26 | static inline Vec2 operator&(const Vec2 &l, const Vec2 &r) { return Vec2(l.x & r.x, l.y & r.y); } \ 27 | static inline Vec2 operator|(const Vec2 &l, const Vec2 &r) { return Vec2(l.x | r.x, l.y | r.y); } \ 28 | static inline Vec2 operator^(const Vec2 &l, const Vec2 &r) { return Vec2(l.x ^ r.x, l.y ^ r.y); } \ 29 | static inline Vec2 operator%(const Vec2 &l, const Vec2 &r) { return Vec2(l.x % r.x, l.y % r.y); } \ 30 | 31 | 32 | #define _DEFINE_VEC2_FLOAT_FUNCTIONS(type, Vec2) \ 33 | static inline type length(const Vec2 &v) { return std::sqrt(length2(v)); } \ 34 | static inline Vec2 normalize(const Vec2 &v) { return v / Vec2(length(v)); } \ 35 | static inline type distance(const Vec2 &v1, const Vec2 &v2) { return length(v2-v1); } \ 36 | 37 | 38 | #define _DEFINE_VEC2(type, Vec2, ADDITIONAL_MEMBERS, ADDITIONAL_FUNCTIONS) \ 39 | struct Vec2 { \ 40 | union { \ 41 | struct { \ 42 | type x, y; \ 43 | }; \ 44 | type data[2]; \ 45 | }; \ 46 | \ 47 | Vec2() = default; \ 48 | constexpr Vec2(type ax, type ay): x(ax), y(ay) {} \ 49 | explicit constexpr Vec2(type v): x(v), y(v) {} \ 50 | \ 51 | Vec2 &operator+=(const Vec2 &r) { x+=r.x; y+=r.y; return *this; } \ 52 | Vec2 &operator-=(const Vec2 &r) { x-=r.x; y-=r.y; return *this; } \ 53 | Vec2 &operator*=(const Vec2 &r) { x*=r.x; y*=r.y; return *this; } \ 54 | Vec2 &operator/=(const Vec2 &r) { x/=r.x; y/=r.y; return *this; } \ 55 | ADDITIONAL_MEMBERS(type, Vec2) \ 56 | \ 57 | type &operator[](int i) { return data[i]; } \ 58 | type constexpr operator[](int i) const { return data[i]; } \ 59 | }; \ 60 | \ 61 | static inline constexpr Vec2 Vec2##_X(type v = 1) { return {v, 0}; } \ 62 | static inline constexpr Vec2 Vec2##_Y(type v = 1) { return {0, v}; } \ 63 | \ 64 | static inline constexpr bool operator==(const Vec2 &l, const Vec2 &r) { return l.x == r.x && l.y == r.y; } \ 65 | static inline constexpr bool operator!=(const Vec2 &l, const Vec2 &r) { return l.x != r.x || l.y != r.y; } \ 66 | static inline constexpr bool operator<(const Vec2 &l, const Vec2 &r) { return l.x < r.x && l.y < r.y; } \ 67 | static inline constexpr bool operator>(const Vec2 &l, const Vec2 &r) { return l.x > r.x && l.y > r.y; } \ 68 | static inline constexpr bool operator<=(const Vec2 &l, const Vec2 &r) { return l.x <= r.x && l.y <= r.y; } \ 69 | static inline constexpr bool operator>=(const Vec2 &l, const Vec2 &r) { return l.x >= r.x && l.y >= r.y; } \ 70 | \ 71 | static inline constexpr Vec2 operator-(const Vec2 &v) { return Vec2(-v.x, -v.y); } \ 72 | static inline constexpr Vec2 operator+(const Vec2 &l, const Vec2 &r) { return Vec2(l.x + r.x, l.y + r.y); } \ 73 | static inline constexpr Vec2 operator-(const Vec2 &l, const Vec2 &r) { return Vec2(l.x - r.x, l.y - r.y); } \ 74 | static inline constexpr Vec2 operator*(const Vec2 &l, const Vec2 &r) { return Vec2(l.x * r.x, l.y * r.y); } \ 75 | static inline constexpr Vec2 operator/(const Vec2 &l, const Vec2 &r) { return Vec2(l.x / r.x, l.y / r.y); } \ 76 | \ 77 | static inline constexpr type area(const Vec2 &v) { return v.x * v.y; } \ 78 | static inline constexpr type length2(const Vec2 &v) { return v.x*v.x + v.y*v.y; } \ 79 | static inline constexpr type dot(const Vec2 &v1, const Vec2 &v2) { return v1.x*v2.x + v1.y*v2.y; } \ 80 | static inline constexpr type distance2(const Vec2 &v1, const Vec2 &v2) { return length2(v2-v1); } \ 81 | static inline Vec2 min(const Vec2 &v1, const Vec2 &v2) { return {min(v1.x, v2.x), min(v1.y, v2.y)}; } \ 82 | static inline Vec2 max(const Vec2 &v1, const Vec2 &v2) { return {max(v1.x, v2.x), max(v1.y, v2.y)}; } \ 83 | ADDITIONAL_FUNCTIONS(type, Vec2) \ 84 | 85 | 86 | _DEFINE_VEC2(float, Vec2f, _DEFINE_VEC2_NO_MEMBERS, _DEFINE_VEC2_FLOAT_FUNCTIONS) 87 | _DEFINE_VEC2(double, Vec2d, _DEFINE_VEC2_NO_MEMBERS, _DEFINE_VEC2_FLOAT_FUNCTIONS) 88 | _DEFINE_VEC2(int32_t, Vec2i, _DEFINE_VEC2_INT_MEMBERS, _DEFINE_VEC2_INT_FUNCTIONS) 89 | _DEFINE_VEC2(int16_t, Vec2s, _DEFINE_VEC2_INT_MEMBERS, _DEFINE_VEC2_INT_FUNCTIONS) 90 | _DEFINE_VEC2(int8_t, Vec2b, _DEFINE_VEC2_INT_MEMBERS, _DEFINE_VEC2_INT_FUNCTIONS) 91 | 92 | _DEFINE_VEC2(uint16_t, Vec2us, _DEFINE_VEC2_INT_MEMBERS, _DEFINE_VEC2_INT_FUNCTIONS) 93 | _DEFINE_VEC2(uint8_t, Vec2ub, _DEFINE_VEC2_INT_MEMBERS, _DEFINE_VEC2_INT_FUNCTIONS) 94 | 95 | static inline constexpr Vec2f ToVec2f(const Vec2i &v) { return Vec2f(v.x, v.y); } 96 | static inline constexpr Vec2i ToVec2i(const Vec2f &v) { return Vec2i(v.x, v.y); } 97 | static inline constexpr Vec2i ToVec2i(const Vec2s &v) { return Vec2i(v.x, v.y); } 98 | static inline constexpr Vec2s ToVec2s(const Vec2i &v) { return Vec2s(v.x, v.y); } 99 | 100 | static inline int chebyshev_distance(const Vec2i &a, const Vec2i &b) 101 | { 102 | const Vec2i diff = a - b; 103 | return max(std::abs(diff.x), std::abs(diff.y)); 104 | } 105 | 106 | //------------------------------------------------------------------------------ 107 | // Vec3 108 | //------------------------------------------------------------------------------ 109 | 110 | #define _DEFINE_VEC3_NO_FUNCTIONS(type, Vec3) 111 | #define _DEFINE_VEC3_NO_MEMBERS(type, Vec3) 112 | 113 | 114 | #define _DEFINE_VEC3_INT_MEMBERS(type, Vec3) \ 115 | Vec3 &operator&=(const Vec3 &r) { x&=r.x; y&=r.y; z&=r.z; return *this; } \ 116 | Vec3 &operator|=(const Vec3 &r) { x|=r.x; y|=r.y; z|=r.z; return *this; } \ 117 | Vec3 &operator^=(const Vec3 &r) { x^=r.x; y^=r.y; z^=r.z; return *this; } \ 118 | Vec3 &operator%=(const Vec3 &r) { x%=r.x; y%=r.y; z%=r.z; return *this; } \ 119 | 120 | 121 | #define _DEFINE_VEC3_INT_FUNCTIONS(type, Vec3) \ 122 | static inline Vec3 floor_div(const Vec3 &a, const Vec3 &b) { return Vec3(floor_div(a.x, b.x), floor_div(a.y, b.y), floor_div(a.z, b.z)); } \ 123 | static inline constexpr Vec3 operator^(const Vec3 &l, const Vec3 &r) { return Vec3(l.x ^ r.x, l.y ^ r.y, l.z ^ r.z); } \ 124 | static inline constexpr Vec3 operator%(const Vec3 &l, const Vec3 &r) { return Vec3(l.x % r.x, l.y % r.y, l.z % r.z); } \ 125 | static inline constexpr Vec3 operator&(const Vec3 &l, const Vec3 &r) { return Vec3(l.x & r.x, l.y & r.y, l.z & r.z); } \ 126 | static inline constexpr Vec3 operator|(const Vec3 &l, const Vec3 &r) { return Vec3(l.x | r.x, l.y | r.y, l.z | r.z); } \ 127 | static inline constexpr Vec3 operator~(const Vec3 &v) { return Vec3(~v.x, ~v.y, ~v.z); } \ 128 | 129 | 130 | #define _DEFINE_VEC3_FLOAT_FUNCTIONS(type, Vec3) \ 131 | static inline Vec3 abs(const Vec3 &v) { return Vec3(std::abs(v.x), std::abs(v.y), std::abs(v.z)); } \ 132 | static inline type length(const Vec3 &v) { return std::sqrt(length2(v)); } \ 133 | static inline Vec3 normalize(const Vec3 &v) { return v / Vec3(length(v)); } \ 134 | static inline bool is_nan(const Vec3 &v) { return std::isnan(v.x) || std::isnan(v.y) || std::isnan(v.z); } \ 135 | static inline type distance(const Vec3 &v1, const Vec3 &v2) { return length(v2-v1); } \ 136 | static inline constexpr Vec3 lerp(const Vec3 &a, const Vec3 &b, float v) { return a * Vec3(1 - v) + b * Vec3(v); } \ 137 | static inline Vec3 mod(const Vec3 &a, const Vec3 &b) { return Vec3(std::fmod(a.x, b.x), std::fmod(a.y, b.y), std::fmod(a.z, b.z)); } \ 138 | static inline Vec3 pow(const Vec3 &v1, const Vec3 &v2) { return Vec3(std::pow(v1.x, v2.x), std::pow(v1.y, v2.y), std::pow(v1.z, v2.z)); } \ 139 | 140 | 141 | #define _DEFINE_VEC3(type, Vec3, Vec2, ADDITIONAL_MEMBERS, ADDITIONAL_FUNCTIONS) \ 142 | struct Vec3 { \ 143 | union { \ 144 | struct { \ 145 | type x, y, z; \ 146 | }; \ 147 | type data[3]; \ 148 | }; \ 149 | \ 150 | Vec3() = default; \ 151 | constexpr Vec3(type ax, type ay, type az): x(ax), y(ay), z(az) {} \ 152 | explicit constexpr Vec3(type v): x(v), y(v), z(v) {} \ 153 | \ 154 | Vec3 &operator+=(const Vec3 &r) { x+=r.x; y+=r.y; z+=r.z; return *this; } \ 155 | Vec3 &operator-=(const Vec3 &r) { x-=r.x; y-=r.y; z-=r.z; return *this; } \ 156 | Vec3 &operator*=(const Vec3 &r) { x*=r.x; y*=r.y; z*=r.z; return *this; } \ 157 | Vec3 &operator/=(const Vec3 &r) { x/=r.x; y/=r.y; z/=r.z; return *this; } \ 158 | ADDITIONAL_MEMBERS(type, Vec3) \ 159 | \ 160 | type &operator[](int i) { return data[i]; } \ 161 | constexpr type operator[](int i) const { return data[i]; } \ 162 | \ 163 | constexpr Vec2 XY() const { return {x, y}; } \ 164 | constexpr Vec2 XZ() const { return {x, z}; } \ 165 | constexpr Vec2 YZ() const { return {y, z}; } \ 166 | }; \ 167 | \ 168 | static inline constexpr Vec3 Vec3##_X(type v = 1) { return {v, 0, 0}; } \ 169 | static inline constexpr Vec3 Vec3##_Y(type v = 1) { return {0, v, 0}; } \ 170 | static inline constexpr Vec3 Vec3##_Z(type v = 1) { return {0, 0, v}; } \ 171 | static inline constexpr Vec3 Vec3##_XY(const Vec2 &v) { return {v.x, v.y, 0}; } \ 172 | static inline constexpr Vec3 Vec3##_XZ(const Vec2 &v) { return {v.x, 0, v.y}; } \ 173 | static inline constexpr Vec3 Vec3##_YZ(const Vec2 &v) { return {0, v.x, v.y}; } \ 174 | \ 175 | static inline constexpr bool operator==(const Vec3 &l, const Vec3 &r) { return l.x == r.x && l.y == r.y && l.z == r.z; } \ 176 | static inline constexpr bool operator!=(const Vec3 &l, const Vec3 &r) { return l.x != r.x || l.y != r.y || l.z != r.z; } \ 177 | static inline constexpr bool operator<(const Vec3 &l, const Vec3 &r) { return l.x < r.x && l.y < r.y && l.z < r.z; } \ 178 | static inline constexpr bool operator>(const Vec3 &l, const Vec3 &r) { return l.x > r.x && l.y > r.y && l.z > r.z; } \ 179 | static inline constexpr bool operator<=(const Vec3 &l, const Vec3 &r) { return l.x <= r.x && l.y <= r.y && l.z <= r.z; } \ 180 | static inline constexpr bool operator>=(const Vec3 &l, const Vec3 &r) { return l.x >= r.x && l.y >= r.y && l.z >= r.z; } \ 181 | static inline constexpr Vec3 operator+(const Vec3 &l, const Vec3 &r) { return Vec3(l.x + r.x, l.y + r.y, l.z + r.z); } \ 182 | static inline constexpr Vec3 operator-(const Vec3 &l, const Vec3 &r) { return Vec3(l.x - r.x, l.y - r.y, l.z - r.z); } \ 183 | static inline constexpr Vec3 operator*(const Vec3 &l, const Vec3 &r) { return Vec3(l.x * r.x, l.y * r.y, l.z * r.z); } \ 184 | static inline constexpr Vec3 operator/(const Vec3 &l, const Vec3 &r) { return Vec3(l.x / r.x, l.y / r.y, l.z / r.z); } \ 185 | static inline constexpr Vec3 operator-(const Vec3 &v) { return Vec3(-v.x, -v.y, -v.z); } \ 186 | \ 187 | static inline constexpr type length2(const Vec3 &v) { return v.x*v.x + v.y*v.y + v.z*v.z; } \ 188 | static inline constexpr type dot(const Vec3 &v1, const Vec3 &v2) { return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z; } \ 189 | static inline constexpr type volume(const Vec3 &v) { return v.x * v.y * v.z; } \ 190 | static inline constexpr Vec3 cross(const Vec3 &v1, const Vec3 &v2) { return Vec3(v1.y * v2.z - v1.z * v2.y, v1.z * v2.x - v1.x * v2.z, v1.x * v2.y - v1.y * v2.x); } \ 191 | static inline constexpr type distance2(const Vec3 &v1, const Vec3 &v2) { return length2(v2-v1); } \ 192 | static inline Vec3 min(const Vec3 &v1, const Vec3 &v2) { return {min(v1.x, v2.x), min(v1.y, v2.y), min(v1.z, v2.z)}; } \ 193 | static inline Vec3 max(const Vec3 &v1, const Vec3 &v2) { return {max(v1.x, v2.x), max(v1.y, v2.y), max(v1.z, v2.z)}; } \ 194 | ADDITIONAL_FUNCTIONS(type, Vec3) \ 195 | 196 | 197 | _DEFINE_VEC3(float, Vec3f, Vec2f, _DEFINE_VEC3_NO_MEMBERS, _DEFINE_VEC3_FLOAT_FUNCTIONS) 198 | _DEFINE_VEC3(double, Vec3d, Vec2d, _DEFINE_VEC3_NO_MEMBERS, _DEFINE_VEC3_FLOAT_FUNCTIONS) 199 | _DEFINE_VEC3(int32_t, Vec3i, Vec2i, _DEFINE_VEC3_INT_MEMBERS, _DEFINE_VEC3_INT_FUNCTIONS) 200 | _DEFINE_VEC3(int16_t, Vec3s, Vec2s, _DEFINE_VEC3_INT_MEMBERS, _DEFINE_VEC3_INT_FUNCTIONS) 201 | _DEFINE_VEC3(int8_t, Vec3b, Vec2b, _DEFINE_VEC3_INT_MEMBERS, _DEFINE_VEC3_INT_FUNCTIONS) 202 | 203 | _DEFINE_VEC3(uint16_t, Vec3us, Vec2us, _DEFINE_VEC3_INT_MEMBERS, _DEFINE_VEC3_INT_FUNCTIONS) 204 | _DEFINE_VEC3(uint8_t, Vec3ub, Vec2ub, _DEFINE_VEC3_INT_MEMBERS, _DEFINE_VEC3_INT_FUNCTIONS) 205 | 206 | 207 | static inline constexpr Vec3f ToVec3f(const Vec3i &v) { return Vec3f(v.x, v.y, v.z); } 208 | static inline constexpr Vec3f ToVec3f(const Vec3d &v) { return Vec3f(v.x, v.y, v.z); } 209 | static inline constexpr Vec3f ToVec3f(const Vec3ub &v) { return Vec3f(v.x, v.y, v.z); } 210 | static inline constexpr Vec3f ToVec3f(const Vec3us &v) { return Vec3f(v.x, v.y, v.z); } 211 | static inline constexpr Vec3d ToVec3d(const Vec3i &v) { return Vec3d(v.x, v.y, v.z); } 212 | static inline constexpr Vec3d ToVec3d(const Vec3f &v) { return Vec3d(v.x, v.y, v.z); } 213 | static inline constexpr Vec3i ToVec3i(const Vec3f &v) { return Vec3i(v.x, v.y, v.z); } 214 | static inline constexpr Vec3i ToVec3i(const Vec3d &v) { return Vec3i(v.x, v.y, v.z); } 215 | static inline constexpr Vec3i ToVec3i(const Vec3ub &v) { return Vec3i(v.x, v.y, v.z); } 216 | static inline constexpr Vec3ub ToVec3ub(const Vec3i &v) { return Vec3ub(v.x, v.y, v.z); } 217 | static inline constexpr Vec3us ToVec3us(const Vec3i &v) { return Vec3us(v.x, v.y, v.z); } 218 | 219 | static inline Vec3i floor(const Vec3f &v) { return Vec3i(std::floor(v.x), std::floor(v.y), std::floor(v.z)); } 220 | static inline Vec3i floor(const Vec3d &v) { return Vec3i(std::floor(v.x), std::floor(v.y), std::floor(v.z)); } 221 | 222 | static inline constexpr bool axes_equal(const Vec3i &a, const Vec3i &b, const Vec2i &axes) 223 | { 224 | return a[axes[0]] == b[axes[0]] && a[axes[1]] == b[axes[1]]; 225 | } 226 | 227 | static inline constexpr bool aabb_aabb_intersection(const Vec3i &amin, const Vec3i &amax, 228 | const Vec3i &bmin, const Vec3i &bmax) 229 | { 230 | return !( 231 | amax.x < bmin.x || 232 | amax.y < bmin.y || 233 | amax.z < bmin.z || 234 | amin.x > bmax.x || 235 | amin.y > bmax.y || 236 | amin.z > bmax.z 237 | ); 238 | } 239 | 240 | static inline int chebyshev_distance(const Vec3i &a, const Vec3i &b) 241 | { 242 | const Vec3i diff = a - b; 243 | return max3(std::abs(diff.x), std::abs(diff.y), std::abs(diff.z)); 244 | } 245 | 246 | //------------------------------------------------------------------------------ 247 | // Vec4 248 | //------------------------------------------------------------------------------ 249 | 250 | struct Vec4f { 251 | union { 252 | struct { 253 | float x, y, z, w; 254 | }; 255 | float data[4]; 256 | }; 257 | 258 | Vec4f() = default; 259 | constexpr Vec4f(float x, float y, float z, float w): x(x), y(y), z(z), w(w) {} 260 | explicit constexpr Vec4f(float v): x(v), y(v), z(v), w(v) {} 261 | 262 | Vec4f &operator+=(const Vec4f &r) { x+=r.x; y+=r.y; z+=r.z; w+=r.w; return *this; } 263 | Vec4f &operator-=(const Vec4f &r) { x-=r.x; y-=r.y; z-=r.z; w-=r.w; return *this; } 264 | Vec4f &operator*=(const Vec4f &r) { x*=r.x; y*=r.y; z*=r.z; w*=r.w; return *this; } 265 | Vec4f &operator/=(const Vec4f &r) { x/=r.x; y/=r.y; z/=r.z; w/=r.w; return *this; } 266 | 267 | float &operator[](int i) { return data[i]; } 268 | constexpr float operator[](int i) const { return data[i]; } 269 | }; 270 | 271 | static inline constexpr Vec4f operator+(const Vec4f &l, const Vec4f &r) { return {l.x + r.x, l.y + r.y, l.z + r.z, l.w + r.w}; } 272 | static inline constexpr Vec4f operator-(const Vec4f &l, const Vec4f &r) { return {l.x - r.x, l.y - r.y, l.z - r.z, l.w - r.w}; } 273 | static inline constexpr Vec4f operator*(const Vec4f &l, const Vec4f &r) { return {l.x * r.x, l.y * r.y, l.z * r.z, l.w * r.w}; } 274 | static inline constexpr Vec4f operator/(const Vec4f &l, const Vec4f &r) { return {l.x / r.x, l.y / r.y, l.z / r.z, l.w / r.w}; } 275 | static inline constexpr bool operator==(const Vec4f &l, const Vec4f &r) { return l.x == r.x && l.y == r.y && l.z == r.z && l.w == r.w; } 276 | static inline constexpr bool operator!=(const Vec4f &l, const Vec4f &r) { return l.x != r.x || l.y != r.y || l.z != r.z || l.w != r.w; } 277 | static inline constexpr bool operator<(const Vec4f &l, const Vec4f &r) { return l.x < r.x && l.y < r.y && l.z < r.z && l.w < r.w; } 278 | static inline constexpr bool operator>(const Vec4f &l, const Vec4f &r) { return l.x > r.x && l.y > r.y && l.z > r.z && l.w > r.w; } 279 | static inline constexpr bool operator<=(const Vec4f &l, const Vec4f &r) { return l.x <= r.x && l.y <= r.y && l.z <= r.z && l.w <= r.w; } 280 | static inline constexpr bool operator>=(const Vec4f &l, const Vec4f &r) { return l.x >= r.x && l.y >= r.y && l.z >= r.z && l.w >= r.w; } 281 | 282 | static inline constexpr float dot(const Vec4f &v1, const Vec4f &v2) { return v1.x*v2.x + v1.y*v2.y + v1.z*v2.z + v1.w*v2.w; } 283 | 284 | static inline constexpr Vec3f ToVec3f(const Vec4f &v) { return Vec3f(v.x, v.y, v.z); } 285 | static inline constexpr Vec4f ToVec4f(const Vec3f &v) { return Vec4f(v.x, v.y, v.z, 1); } 286 | 287 | //------------------------------------------------------------------------------ 288 | // Vec4i 289 | //------------------------------------------------------------------------------ 290 | 291 | struct Vec4i { 292 | union { 293 | struct { 294 | int x, y, z, w; 295 | }; 296 | int data[4]; 297 | }; 298 | 299 | Vec4i() = default; 300 | constexpr Vec4i(int x, int y, int z, int w): x(x), y(y), z(z), w(w) {} 301 | explicit constexpr Vec4i(int v): x(v), y(v), z(v), w(v) {} 302 | 303 | Vec4i &operator+=(const Vec4i &r) { x+=r.x; y+=r.y; z+=r.z; w+=r.w; return *this; } 304 | Vec4i &operator-=(const Vec4i &r) { x-=r.x; y-=r.y; z-=r.z; w-=r.w; return *this; } 305 | Vec4i &operator*=(const Vec4i &r) { x*=r.x; y*=r.y; z*=r.z; w*=r.w; return *this; } 306 | Vec4i &operator/=(const Vec4i &r) { x/=r.x; y/=r.y; z/=r.z; w/=r.w; return *this; } 307 | 308 | int &operator[](int i) { return data[i]; } 309 | int constexpr operator[](int i) const { return data[i]; } 310 | }; 311 | 312 | static inline constexpr Vec4i operator+(const Vec4i &l, const Vec4i &r) { return {l.x + r.x, l.y + r.y, l.z + r.z, l.w + r.w}; } 313 | static inline constexpr Vec4i operator-(const Vec4i &l, const Vec4i &r) { return {l.x - r.x, l.y - r.y, l.z - r.z, l.w - r.w}; } 314 | static inline constexpr Vec4i operator*(const Vec4i &l, const Vec4i &r) { return {l.x * r.x, l.y * r.y, l.z * r.z, l.w * r.w}; } 315 | static inline constexpr Vec4i operator/(const Vec4i &l, const Vec4i &r) { return {l.x / r.x, l.y / r.y, l.z / r.z, l.w / r.w}; } 316 | static inline constexpr bool operator==(const Vec4i &l, const Vec4i &r) { return l.x == r.x && l.y == r.y && l.z == r.z && l.w == r.w; } 317 | static inline constexpr bool operator!=(const Vec4i &l, const Vec4i &r) { return l.x != r.x || l.y != r.y || l.z != r.z || l.w != r.w; } 318 | static inline constexpr bool operator<(const Vec4i &l, const Vec4i &r) { return l.x < r.x && l.y < r.y && l.z < r.z && l.w < r.w; } 319 | static inline constexpr bool operator>(const Vec4i &l, const Vec4i &r) { return l.x > r.x && l.y > r.y && l.z > r.z && l.w > r.w; } 320 | static inline constexpr bool operator<=(const Vec4i &l, const Vec4i &r) { return l.x <= r.x && l.y <= r.y && l.z <= r.z && l.w <= r.w; } 321 | static inline constexpr bool operator>=(const Vec4i &l, const Vec4i &r) { return l.x >= r.x && l.y >= r.y && l.z >= r.z && l.w >= r.w; } 322 | 323 | static inline constexpr Vec4f ToVec4(const Vec4i &v) { return Vec4f(v.x, v.y, v.z, v.w); } 324 | static inline constexpr Vec4i ToVec4i(const Vec4f &v) { return Vec4i(v.x, v.y, v.z, v.w); } 325 | 326 | //------------------------------------------------------------------------------ 327 | // Vec4us 328 | //------------------------------------------------------------------------------ 329 | 330 | struct Vec4us { 331 | union { 332 | struct { 333 | uint16_t x, y, z, w; 334 | }; 335 | uint16_t data[4]; 336 | }; 337 | 338 | Vec4us() = default; 339 | constexpr Vec4us(uint16_t x, uint16_t y, uint16_t z, uint16_t w): x(x), y(y), z(z), w(w) {} 340 | explicit constexpr Vec4us(int v): x(v), y(v), z(v), w(v) {} 341 | 342 | Vec4us &operator+=(const Vec4us &r) { x+=r.x; y+=r.y; z+=r.z; w+=r.w; return *this; } 343 | Vec4us &operator-=(const Vec4us &r) { x-=r.x; y-=r.y; z-=r.z; w-=r.w; return *this; } 344 | Vec4us &operator*=(const Vec4us &r) { x*=r.x; y*=r.y; z*=r.z; w*=r.w; return *this; } 345 | Vec4us &operator/=(const Vec4us &r) { x/=r.x; y/=r.y; z/=r.z; w/=r.w; return *this; } 346 | 347 | uint16_t &operator[](int i) { return data[i]; } 348 | uint16_t constexpr operator[](int i) const { return data[i]; } 349 | }; 350 | 351 | static inline constexpr Vec4us operator+(const Vec4us &l, const Vec4us &r) { return Vec4us(l.x + r.x, l.y + r.y, l.z + r.z, l.w + r.w); } 352 | static inline constexpr Vec4us operator-(const Vec4us &l, const Vec4us &r) { return Vec4us(l.x - r.x, l.y - r.y, l.z - r.z, l.w - r.w); } 353 | static inline constexpr Vec4us operator*(const Vec4us &l, const Vec4us &r) { return Vec4us(l.x * r.x, l.y * r.y, l.z * r.z, l.w * r.w); } 354 | static inline constexpr Vec4us operator/(const Vec4us &l, const Vec4us &r) { return Vec4us(l.x / r.x, l.y / r.y, l.z / r.z, l.w / r.w); } 355 | 356 | static inline constexpr Vec3f ToVec3f(const Vec4us &v) { return Vec3f(v.x, v.y, v.z); } 357 | 358 | //------------------------------------------------------------------------------ 359 | // Vec4ub 360 | //------------------------------------------------------------------------------ 361 | 362 | struct Vec4ub { 363 | union { 364 | struct { 365 | uint8_t x, y, z, w; 366 | }; 367 | uint8_t data[4]; 368 | }; 369 | 370 | Vec4ub() = default; 371 | constexpr Vec4ub(uint8_t x, uint8_t y, uint8_t z, uint8_t w): x(x), y(y), z(z), w(w) {} 372 | explicit constexpr Vec4ub(int v): x(v), y(v), z(v), w(v) {} 373 | 374 | Vec4ub &operator+=(const Vec4ub &r) { x+=r.x; y+=r.y; z+=r.z; w+=r.w; return *this; } 375 | Vec4ub &operator-=(const Vec4ub &r) { x-=r.x; y-=r.y; z-=r.z; w-=r.w; return *this; } 376 | Vec4ub &operator*=(const Vec4ub &r) { x*=r.x; y*=r.y; z*=r.z; w*=r.w; return *this; } 377 | Vec4ub &operator/=(const Vec4ub &r) { x/=r.x; y/=r.y; z/=r.z; w/=r.w; return *this; } 378 | 379 | uint8_t &operator[](int i) { return data[i]; } 380 | uint8_t constexpr operator[](int i) const { return data[i]; } 381 | }; 382 | 383 | static inline constexpr Vec4ub operator+(const Vec4ub &l, const Vec4ub &r) { return Vec4ub(l.x + r.x, l.y + r.y, l.z + r.z, l.w + r.w); } 384 | static inline constexpr Vec4ub operator-(const Vec4ub &l, const Vec4ub &r) { return Vec4ub(l.x - r.x, l.y - r.y, l.z - r.z, l.w - r.w); } 385 | static inline constexpr Vec4ub operator*(const Vec4ub &l, const Vec4ub &r) { return Vec4ub(l.x * r.x, l.y * r.y, l.z * r.z, l.w * r.w); } 386 | static inline constexpr Vec4ub operator/(const Vec4ub &l, const Vec4ub &r) { return Vec4ub(l.x / r.x, l.y / r.y, l.z / r.z, l.w / r.w); } 387 | 388 | static inline constexpr Vec4ub ToVec4ub(const Vec4i &v) { return Vec4ub(v.x, v.y, v.z, v.w); } 389 | static inline constexpr Vec4ub ToVec4ub(const Vec4f &v) { return Vec4ub(v.x * 255.0f, v.y * 255.0f, v.z * 255.0f, v.w * 255.0f); } 390 | 391 | //------------------------------------------------------------------------------ 392 | // Macro Utils 393 | //------------------------------------------------------------------------------ 394 | 395 | #define VEC2(v) (v).x, (v).y 396 | #define VEC3(v) (v).x, (v).y, (v).z 397 | #define VEC4(v) (v).x, (v).y, (v).z, (v).w 398 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SSE Frustum Culling Demo 2 | 3 | A simple demo which runs massive frustum culling (by default 512000 spheres) in various setups. 4 | 5 | At the moment it contains two versions of the actual culling code: 6 | 7 | 1. Naive culling. 8 | 9 | ```c++ 10 | for (int i = 0; i < 6; i++) { 11 | const Plane &p = planes[i]; 12 | if (dot(p.n, s.center) + p.d < -s.radius) 13 | return true; 14 | } 15 | return false; 16 | ``` 17 | 18 | 2. SSE culling. 19 | 20 | This is a simple SSE version of the algorithm inspired by [Culling the Battlefield](http://www.frostbite.com/2011/04/culling-the-battlefield-data-oriented-design-in-practice/). The algorithm itself with minor modifications is taken from [here](http://www.hmrengine.com/blog/?p=490). I hope the author doesn't mind, after all it's just a simple parallel dot product calculation on SSE. 21 | 22 | With SSE using SoA data structures you can test a sphere against 4 planes at a time. Frustum has 6, and the algorithm wastes 2 planes. 23 | 24 | The demo should work on both linux (gcc 5.2/clang 3.6) and windows (msvc++ 2015). But you need to install cmake on windows to generate visual studio files. 25 | 26 | There is a command line options to explore: 27 | 28 | - `-v` Enables verbose output. Also prints ASCII slice of the sphere field, for verification purposes. 29 | - `-s ` Overrides the size of the sphere field. That's just one dimensions, the results size of the field is N x N x N. 30 | 31 | ## Results 32 | 33 | Example output on my machine (`i5-3470`, `linux`, `x86_64`, `gcc 5.2`): 34 | 35 | ``` 36 | [nsf @ build]$ ./sseculling 37 | Data size: 80x80x80 (512000 objects, 8192000 bytes) 38 | 'Naive culling / structured data' done in 10 runs, average: 3.394078ms 39 | 'Naive culling / random data' done in 10 runs, average: 8.155710ms 40 | 'SSE culling / structured data' done in 10 runs, average: 2.353994ms 41 | 'SSE culling / random data' done in 10 runs, average: 2.345242ms 42 | ---------------------------------------- 43 | 'SSE culling / chunks / structured data / 512 per chunk (w/o prefetch)' done in 10 runs, average: 2.525645ms 44 | 'SSE culling / chunks / random data / 512 per chunk (w/o prefetch)' done in 10 runs, average: 2.531772ms 45 | 'SSE culling / chunks / random data / 512 per chunk (with prefetch)' done in 10 runs, average: 2.502262ms 46 | ---------------------------------------- 47 | 'SSE culling / chunks / structured data / 256 per chunk (w/o prefetch)' done in 10 runs, average: 2.645812ms 48 | 'SSE culling / chunks / random data / 256 per chunk (w/o prefetch)' done in 10 runs, average: 2.656828ms 49 | 'SSE culling / chunks / random data / 256 per chunk (with prefetch)' done in 10 runs, average: 2.605833ms 50 | ---------------------------------------- 51 | 'SSE culling / chunks / structured data / 128 per chunk (w/o prefetch)' done in 10 runs, average: 2.743400ms 52 | 'SSE culling / chunks / random data / 128 per chunk (w/o prefetch)' done in 10 runs, average: 2.889656ms 53 | 'SSE culling / chunks / random data / 128 per chunk (with prefetch)' done in 10 runs, average: 2.806787ms 54 | ---------------------------------------- 55 | 'SSE culling / chunks / structured data / 64 per chunk (w/o prefetch)' done in 10 runs, average: 2.819107ms 56 | 'SSE culling / chunks / random data / 64 per chunk (w/o prefetch)' done in 10 runs, average: 3.470009ms 57 | 'SSE culling / chunks / random data / 64 per chunk (with prefetch)' done in 10 runs, average: 3.370178ms 58 | ---------------------------------------- 59 | 'SSE culling / chunks / structured data / 32 per chunk (w/o prefetch)' done in 10 runs, average: 2.648684ms 60 | 'SSE culling / chunks / random data / 32 per chunk (w/o prefetch)' done in 10 runs, average: 4.479559ms 61 | 'SSE culling / chunks / random data / 32 per chunk (with prefetch)' done in 10 runs, average: 4.120239ms 62 | ---------------------------------------- 63 | 'SSE culling / chunks / structured data / 8 per chunk (w/o prefetch)' done in 10 runs, average: 3.001685ms 64 | 'SSE culling / chunks / random data / 8 per chunk (w/o prefetch)' done in 10 runs, average: 9.516912ms 65 | 'SSE culling / chunks / random data / 8 per chunk (with prefetch)' done in 10 runs, average: 5.933696ms 66 | 67 | ``` 68 | 69 | Few comments on results: 70 | 71 | 1. Naive algorithm can fool you into thinking that it's almost as fast as SSE one, that's due to branch prediction friendly data. If you shuffle the data however, the truth comes out. 72 | 73 | 2. SSE version is roughly 4 times faster than naive version. 74 | 75 | 3. If we break data into chunks and randomize them, you can clearly see cache miss effects. The effects disappear starting from 128 elements per chunk. 76 | 77 | 4. On very fragmented data using `_mm_prefetch` instruction helps quite a bit. 78 | 79 | ## Results on a different machine 80 | 81 | This time it's AMD CPU, note how prefetching makes way more difference here than on intel. Very interesting. 82 | 83 | Example output on my latest machine (`amd threadripper 1950x`, `linux`, `x86_64`, `clang 8.0.1`): 84 | 85 | ``` 86 | nsf@crey ~/p/s/build> ./sseculling 87 | Data size: 80x80x80 (512000 objects, 8192000 bytes) 88 | 'Naive culling / structured data' done in 10 runs, average: 2.186710ms 89 | 'Naive culling / random data' done in 10 runs, average: 8.848435ms 90 | 'SSE culling / structured data' done in 10 runs, average: 1.445463ms 91 | 'SSE culling / random data' done in 10 runs, average: 1.467520ms 92 | ---------------------------------------- 93 | 'SSE culling / chunks / structured data / 512 per chunk (w/o prefetch)' done in 10 runs, average: 1.854928ms 94 | 'SSE culling / chunks / random data / 512 per chunk (w/o prefetch)' done in 10 runs, average: 1.798940ms 95 | 'SSE culling / chunks / random data / 512 per chunk (with prefetch)' done in 10 runs, average: 1.522028ms 96 | ---------------------------------------- 97 | 'SSE culling / chunks / structured data / 256 per chunk (w/o prefetch)' done in 10 runs, average: 2.149564ms 98 | 'SSE culling / chunks / random data / 256 per chunk (w/o prefetch)' done in 10 runs, average: 2.022723ms 99 | 'SSE culling / chunks / random data / 256 per chunk (with prefetch)' done in 10 runs, average: 1.733204ms 100 | ---------------------------------------- 101 | 'SSE culling / chunks / structured data / 128 per chunk (w/o prefetch)' done in 10 runs, average: 2.108009ms 102 | 'SSE culling / chunks / random data / 128 per chunk (w/o prefetch)' done in 10 runs, average: 2.297155ms 103 | 'SSE culling / chunks / random data / 128 per chunk (with prefetch)' done in 10 runs, average: 2.026920ms 104 | ---------------------------------------- 105 | 'SSE culling / chunks / structured data / 64 per chunk (w/o prefetch)' done in 10 runs, average: 2.027192ms 106 | 'SSE culling / chunks / random data / 64 per chunk (w/o prefetch)' done in 10 runs, average: 3.048898ms 107 | 'SSE culling / chunks / random data / 64 per chunk (with prefetch)' done in 10 runs, average: 2.619212ms 108 | ---------------------------------------- 109 | 'SSE culling / chunks / structured data / 32 per chunk (w/o prefetch)' done in 10 runs, average: 2.183860ms 110 | 'SSE culling / chunks / random data / 32 per chunk (w/o prefetch)' done in 10 runs, average: 4.267031ms 111 | 'SSE culling / chunks / random data / 32 per chunk (with prefetch)' done in 10 runs, average: 3.265937ms 112 | ---------------------------------------- 113 | 'SSE culling / chunks / structured data / 8 per chunk (w/o prefetch)' done in 10 runs, average: 1.990538ms 114 | 'SSE culling / chunks / random data / 8 per chunk (w/o prefetch)' done in 10 runs, average: 10.015637ms 115 | 'SSE culling / chunks / random data / 8 per chunk (with prefetch)' done in 10 runs, average: 8.100875ms 116 | ``` 117 | 118 | ## Results on a yet another machine 119 | 120 | AMD CPU again. Best of the best as of time of the commit. 121 | 122 | Example output on my latest machine (`amd ryzen 9 5950x`, `linux`, `x86_64`, `clang 12.0.1`): 123 | 124 | ``` 125 | ~/projects/sseculling/build> ./sseculling 126 | Data size: 80x80x80 (512000 objects, 8192000 bytes) 127 | 'Naive culling / structured data' done in 10 runs, average: 1.625211ms 128 | 'Naive culling / random data' done in 10 runs, average: 6.584432ms 129 | 'SSE culling / structured data' done in 10 runs, average: 1.214213ms 130 | 'SSE culling / random data' done in 10 runs, average: 1.212837ms 131 | ---------------------------------------- 132 | 'SSE culling / chunks / structured data / 512 per chunk (w/o prefetch)' done in 10 runs, average: 1.216788ms 133 | 'SSE culling / chunks / random data / 512 per chunk (w/o prefetch)' done in 10 runs, average: 1.229120ms 134 | 'SSE culling / chunks / random data / 512 per chunk (with prefetch)' done in 10 runs, average: 1.228033ms 135 | ---------------------------------------- 136 | 'SSE culling / chunks / structured data / 256 per chunk (w/o prefetch)' done in 10 runs, average: 1.213698ms 137 | 'SSE culling / chunks / random data / 256 per chunk (w/o prefetch)' done in 10 runs, average: 1.260839ms 138 | 'SSE culling / chunks / random data / 256 per chunk (with prefetch)' done in 10 runs, average: 1.241638ms 139 | ---------------------------------------- 140 | 'SSE culling / chunks / structured data / 128 per chunk (w/o prefetch)' done in 10 runs, average: 1.200609ms 141 | 'SSE culling / chunks / random data / 128 per chunk (w/o prefetch)' done in 10 runs, average: 1.264215ms 142 | 'SSE culling / chunks / random data / 128 per chunk (with prefetch)' done in 10 runs, average: 1.258731ms 143 | ---------------------------------------- 144 | 'SSE culling / chunks / structured data / 64 per chunk (w/o prefetch)' done in 10 runs, average: 1.262710ms 145 | 'SSE culling / chunks / random data / 64 per chunk (w/o prefetch)' done in 10 runs, average: 1.405424ms 146 | 'SSE culling / chunks / random data / 64 per chunk (with prefetch)' done in 10 runs, average: 1.350807ms 147 | ---------------------------------------- 148 | 'SSE culling / chunks / structured data / 32 per chunk (w/o prefetch)' done in 10 runs, average: 1.112343ms 149 | 'SSE culling / chunks / random data / 32 per chunk (w/o prefetch)' done in 10 runs, average: 1.405099ms 150 | 'SSE culling / chunks / random data / 32 per chunk (with prefetch)' done in 10 runs, average: 1.490859ms 151 | ---------------------------------------- 152 | 'SSE culling / chunks / structured data / 8 per chunk (w/o prefetch)' done in 10 runs, average: 1.246730ms 153 | 'SSE culling / chunks / random data / 8 per chunk (w/o prefetch)' done in 10 runs, average: 2.170331ms 154 | 'SSE culling / chunks / random data / 8 per chunk (with prefetch)' done in 10 runs, average: 1.999654ms 155 | ``` -------------------------------------------------------------------------------- /SSECulling.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Common.h" 3 | 4 | int main(int argc, char **argv) 5 | { 6 | Config config; 7 | parse_args(&config, argc, argv); 8 | 9 | printf("Data size: %dx%dx%d (%d objects, %zu bytes)\n", 10 | config.data_size, config.data_size, config.data_size, 11 | volume(Vec3i(config.data_size)), 12 | volume(Vec3i(config.data_size)) * sizeof(Sphere)); 13 | 14 | do_arrays(config); 15 | do_chunks(config); 16 | return 0; 17 | } 18 | -------------------------------------------------------------------------------- /Timer.cpp: -------------------------------------------------------------------------------- 1 | #if defined(_WIN32) 2 | #include 3 | #elif defined(__MACH__) 4 | #include 5 | #include 6 | #include 7 | #else 8 | #include 9 | #endif 10 | #include "Core/Utils.h" 11 | 12 | double get_time_milliseconds() 13 | { 14 | #if defined(_WIN32) 15 | static double freq = 0.0; 16 | if (freq == 0.0) { 17 | LARGE_INTEGER li; 18 | if (!QueryPerformanceFrequency(&li)) 19 | die("clock failure"); 20 | freq = (double)li.QuadPart / 1000.0; 21 | } 22 | LARGE_INTEGER li; 23 | QueryPerformanceCounter(&li); 24 | return (double)li.QuadPart / freq; 25 | #elif defined(__MACH__) 26 | struct timespec t; 27 | clock_serv_t cclock; 28 | mach_timespec_t mts; 29 | host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); 30 | if (clock_get_time(cclock, &mts) != KERN_SUCCESS) { 31 | die("clock failure"); 32 | } 33 | mach_port_deallocate(mach_task_self(), cclock); 34 | t.tv_sec = mts.tv_sec; 35 | t.tv_nsec = mts.tv_nsec; 36 | return (double)t.tv_sec * 1000.0 + (double)t.tv_nsec / 1000000.0; 37 | #else 38 | struct timespec t; 39 | if (clock_gettime(CLOCK_MONOTONIC, &t) != 0) 40 | die("clock failure"); 41 | return (double)t.tv_sec * 1000.0 + (double)t.tv_nsec / 1000000.0; 42 | #endif 43 | } 44 | -------------------------------------------------------------------------------- /Timer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | double get_time_milliseconds(); 4 | --------------------------------------------------------------------------------