├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── README.md ├── main.cpp ├── ss1.png └── ss2.png /.gitignore: -------------------------------------------------------------------------------- 1 | .vs/ 2 | .idea/ 3 | build/ 4 | cmake-build-debug/ 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "Bigged"] 2 | path = Bigged 3 | url = https://github.com/Alan-FGR/Bigged.git 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.9) 2 | project(culler) 3 | 4 | set(CMAKE_CXX_STANDARD 17) 5 | 6 | add_subdirectory(Bigged) 7 | 8 | link_libraries( 9 | bigged 10 | ) 11 | 12 | include_directories(culler 13 | PUBLIC 14 | Bigged 15 | Bigged/bigg/include 16 | Bigged/entt/src/entt 17 | ) 18 | 19 | add_executable(culler 20 | main.cpp 21 | ) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alan-FGR/Cullminator9000/f75f186d0522f2d129934fecb9af35107c2a2f92/README.md -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "Bigged/bigged.cpp" 3 | #include "entt.hpp" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define TIME_HERE std::chrono::high_resolution_clock::now(); 10 | #define ELAPSEDuS(time_point) (uint)(std::chrono::duration_cast(std::chrono::high_resolution_clock::now()-time_point).count()); 11 | 12 | using namespace entt; 13 | 14 | class FrustumTest : public TestBed { 15 | DefaultRegistry registry; 16 | virtual void Init() override; 17 | virtual void Update(float dt) override; 18 | virtual void Shutdown() override; 19 | }; 20 | 21 | struct AABB 22 | { 23 | vec3 min, max; 24 | AABB(vec3 min, vec3 max) : min(min), max(max) {} 25 | }; 26 | 27 | struct Transform 28 | { 29 | vec3 scalexyz; 30 | quat rotation; 31 | vec3 position; 32 | 33 | void Translate(vec3 local) { 34 | position += rotation * local; 35 | } 36 | 37 | void Rotate(quat dr) { 38 | rotation = dr * rotation; 39 | } 40 | 41 | mat4 GetMatrix() { 42 | auto m = mat4(rotation); 43 | m = scale(m, scalexyz); 44 | return translate(m, position); 45 | } 46 | 47 | void DrawDebug() { 48 | TestBed::DrawArrow(position, position + rotation * vec3(1, 0, 0), col32::red); 49 | TestBed::DrawArrow(position, position + rotation * vec3(0, 1, 0), col32::green); 50 | TestBed::DrawArrow(position, position + rotation * vec3(0, 0, 1), col32::blue); 51 | TestBed::DrawOBB(GetMatrix(), col32::purple, true); 52 | float sphereRad = length(scalexyz); 53 | TestBed::DrawSphere(position, sphereRad, col32(255,255,255,40), 2); 54 | } 55 | 56 | }; 57 | 58 | struct BSphere 59 | { 60 | //vec3 localPosition; TODO 61 | __m128 simdCacheX; 62 | __m128 simdCacheY; 63 | __m128 simdCacheZ; 64 | __m128 simdCacheR; 65 | 66 | float radius; 67 | 68 | void UpdateCaches(Transform t) 69 | { 70 | simdCacheX = _mm_set_ps1(t.position.x); 71 | simdCacheY = _mm_set_ps1(t.position.y); 72 | simdCacheZ = _mm_set_ps1(t.position.z); 73 | simdCacheR = _mm_set_ps1(-radius); 74 | } 75 | 76 | std::tuple GetCachedDataSlow() 77 | { 78 | return { 79 | vec3( 80 | simdCacheX.m128_f32[0], 81 | simdCacheY.m128_f32[0], 82 | simdCacheZ.m128_f32[0] 83 | ), 84 | radius 85 | }; 86 | } 87 | 88 | }; 89 | 90 | struct Velocity { 91 | vec3 linear; 92 | vec3 angular; 93 | 94 | void ApplyToTransform(Transform& t, float dt) { 95 | t.Rotate(quat(angular*dt)); 96 | t.Translate(linear*dt); 97 | } 98 | 99 | }; 100 | 101 | struct Frustum 102 | { 103 | float fov; 104 | float nearPlane; 105 | float farPlane; 106 | float aspectRatio; 107 | 108 | mat4 GetFrustumMatrix(mat4 viewMatrix) { 109 | mat4 pm = glm::perspectiveFov(radians(fov), aspectRatio, 1.f, nearPlane, farPlane); 110 | mat4 invView = inverse(viewMatrix); 111 | vec3 nm = vec3(invView[3]); 112 | invView[3] = vec4(0, 0, 0, 1); 113 | viewMatrix = translate(invView, nm); 114 | mat4 vpm = pm * viewMatrix; 115 | return vpm; 116 | } 117 | 118 | mat4 GetFrustumMatrix(Transform holder) { 119 | return GetFrustumMatrix(holder.GetMatrix()); 120 | } 121 | }; 122 | 123 | 124 | 125 | void FrustumTest::Init() 126 | { 127 | //add stuff to cull 128 | 129 | const int width = 100; //100 130 | const int height = 50; //6 131 | const float spacing = 2; 132 | 133 | for (int z = 0; z < width; z++) 134 | for (int y = 0; y < height; y++) 135 | for (int x = 0; x < width; x++) 136 | { 137 | auto entity = registry.create(); 138 | 139 | auto& tf = registry.assign(entity, 140 | vec3(((10 + std::rand()) % 100) / 100.f, ((10 + std::rand()) % 100) / 100.f, ((10 + std::rand()) % 100) / 100.f), 141 | quat(1, 0, 0, 0), 142 | vec3((x - (width/2)) * spacing, (y - (height/2)) * spacing, (z - (width/2)) * spacing) 143 | ); 144 | 145 | // registry.assign(entity, 146 | // vec3(0, 0, (std::rand() % 1000) / 100.f) * 0.01f, 147 | // vec3((std::rand() % 1000) / 1000.f, (std::rand() % 1000) / 1000.f, (std::rand() % 1000) / 1000.f) * 0.1f 148 | // ); 149 | 150 | auto& sphere = registry.assign(entity//, 151 | //vec3((x - (width/2)) * spacing, (y - (height/2)) * spacing, (z - (width/2)) * spacing), 152 | //((20 + std::rand()) % 100) / 100.f 153 | ); 154 | 155 | sphere.radius = ((20 + std::rand()) % 100) / 100.f; 156 | sphere.UpdateCaches(tf); 157 | 158 | 159 | } 160 | 161 | //add cullers 162 | auto entity = registry.create(); 163 | registry.assign(entity, vec3(1), quat(1, 0, 0, 0), vec3(0)); 164 | registry.assign(entity, vec3(0, 0.f, 0), vec3(0, 0.1f, 0)); 165 | registry.assign(entity, 50.f, 0.01f, 500.f, 2.f); 166 | 167 | SetCameraPosition(vec3(-5, 30, 5)); 168 | SetCameraYawPitch(-35, 90); 169 | } 170 | 171 | bool NaiveCull(vec3& pos, float& radius, vec4& plane) 172 | { 173 | return plane.x * pos.x + plane.y * pos.y + plane.z * pos.z + plane.w <= -radius; 174 | } 175 | 176 | __m128 _mm_add_ps(__m128& a, __m128& b, __m128& c) 177 | { 178 | return _mm_add_ps(_mm_add_ps(a, b), c); 179 | } 180 | 181 | __m128 _mm_add_ps(__m128& a, __m128& b, __m128& c, __m128& d) 182 | { 183 | return _mm_add_ps(_mm_add_ps(a, b), _mm_add_ps(c, d)); 184 | } 185 | 186 | __m128 _mm_set_ps_bw(float x, float y, float z, float w) 187 | { 188 | return _mm_set_ps(w, z, y, x); 189 | } 190 | 191 | bool draw = false; 192 | bool drawCulled = false; 193 | bool simd = true; 194 | bool mt = true; 195 | 196 | std::vector times; 197 | uint maxavg; 198 | float lerpAvg; 199 | 200 | std::vector inView; 201 | std::vector culled; 202 | 203 | int inViewCount = 0; 204 | int culledCount = 0; 205 | 206 | std::mutex drawListMtx; 207 | 208 | void naiveCull(BSphere& s, vec4 &left, vec4 &right, vec4 &top, vec4 &bottom) { 209 | auto[pos, r] = s.GetCachedDataSlow(); 210 | 211 | bool cull = false; 212 | 213 | if (NaiveCull(pos, s.radius, right)) cull = true; 214 | else if (NaiveCull(pos, s.radius, left)) cull = true; 215 | else if (NaiveCull(pos, s.radius, bottom)) cull = true; 216 | else if (NaiveCull(pos, s.radius, top)) cull = true; 217 | 218 | if (cull) 219 | culledCount++; 220 | else 221 | inViewCount++; 222 | 223 | if (draw) { 224 | if (cull) { 225 | if (drawCulled) { 226 | drawListMtx.lock(); 227 | culled.emplace_back(s); 228 | drawListMtx.unlock(); 229 | } 230 | } 231 | else { 232 | drawListMtx.lock(); 233 | inView.emplace_back(s); 234 | drawListMtx.unlock(); 235 | } 236 | } 237 | } 238 | 239 | void simdCull(BSphere& s, __m128* planes) 240 | { 241 | __m128 xs = _mm_mul_ps(planes[0], s.simdCacheX); 242 | __m128 ys = _mm_mul_ps(planes[1], s.simdCacheY); 243 | __m128 zs = _mm_mul_ps(planes[2], s.simdCacheZ); 244 | 245 | __m128 added = _mm_add_ps(xs, ys, zs, planes[3]); 246 | 247 | __m128 results = _mm_cmplt_ps(added, s.simdCacheR); 248 | 249 | auto cull = _mm_movemask_ps(results); 250 | 251 | if (cull) 252 | culledCount++; 253 | else 254 | inViewCount++; 255 | 256 | if (draw) { 257 | if (cull) { 258 | if (drawCulled) { 259 | drawListMtx.lock(); 260 | culled.emplace_back(s); 261 | drawListMtx.unlock(); 262 | } 263 | } 264 | else { 265 | drawListMtx.lock(); 266 | inView.emplace_back(s); 267 | drawListMtx.unlock(); 268 | } 269 | } 270 | } 271 | 272 | void FrustumTest::Update(float dt) 273 | { 274 | inViewCount = 0; 275 | culledCount = 0; 276 | 277 | DrawGrid(); 278 | 279 | registry.view().each([this, &dt](auto entity, Transform& transform, Velocity& vel) { 280 | vel.ApplyToTransform(transform, dt); 281 | transform.DrawDebug(); 282 | }); 283 | 284 | registry.view().each([this](auto entity, Transform& transform, Frustum& fr) { 285 | 286 | mat4 frustumMat4 = fr.GetFrustumMatrix(transform); 287 | DrawFrustum(frustumMat4, col32::white); 288 | 289 | mat4& m = frustumMat4; 290 | 291 | vec4 right; 292 | right.x = m[0][3] + m[0][0]; 293 | right.y = m[1][3] + m[1][0]; 294 | right.z = m[2][3] + m[2][0]; 295 | right.w = m[3][3] + m[3][0]; 296 | 297 | vec4 left; 298 | left.x = m[0][3] - m[0][0]; 299 | left.y = m[1][3] - m[1][0]; 300 | left.z = m[2][3] - m[2][0]; 301 | left.w = m[3][3] - m[3][0]; 302 | 303 | vec4 top; 304 | top.x = m[0][3] - m[0][1]; 305 | top.y = m[1][3] - m[1][1]; 306 | top.z = m[2][3] - m[2][1]; 307 | top.w = m[3][3] - m[3][1]; 308 | 309 | vec4 bottom; 310 | bottom.x = m[0][3] + m[0][1]; 311 | bottom.y = m[1][3] + m[1][1]; 312 | bottom.z = m[2][3] + m[2][1]; 313 | bottom.w = m[3][3] + m[3][1]; 314 | 315 | //vec4 far; 316 | //far.x = m[0][2]; 317 | //far.y = m[1][2]; 318 | //far.z = m[2][2]; 319 | //far.w = m[3][2]; 320 | 321 | //vec4 near; 322 | //near.x = m[0][3] - m[0][2]; 323 | //near.y = m[1][3] - m[1][2]; 324 | //near.z = m[2][3] - m[2][2]; 325 | //near.w = m[3][3] - m[3][2]; 326 | 327 | 328 | __m128 planes[4] = { 329 | _mm_set_ps_bw(left.x, right.x, top.x, bottom.x), 330 | _mm_set_ps_bw(left.y, right.y, top.y, bottom.y), 331 | _mm_set_ps_bw(left.z, right.z, top.z, bottom.z), 332 | _mm_set_ps_bw(left.w, right.w, top.w, bottom.w), 333 | }; 334 | 335 | 336 | 337 | auto view = registry.view(); 338 | 339 | auto tp = TIME_HERE; 340 | 341 | if (simd) { 342 | if (mt) 343 | std::for_each(std::execution::par, view.begin(), view.end(), [&view, &planes](const auto entity) 344 | { 345 | BSphere& s = view.get(entity); 346 | simdCull(s, &planes[0]); 347 | }); 348 | else 349 | registry.view().each([&planes](auto entity, BSphere& s) {simdCull(s, &planes[0]); }); 350 | } 351 | else { 352 | if (mt) 353 | std::for_each(std::execution::par, view.begin(), view.end(), [&view, &right, &left, &bottom, &top](const auto entity) { 354 | BSphere& s = view.get(entity); 355 | naiveCull(s, left, right, top, bottom); 356 | }); 357 | else 358 | registry.view().each([this, &left, &right, &top, &bottom](auto entity, BSphere& s) { 359 | naiveCull(s, left, right, top, bottom); 360 | }); 361 | } 362 | 363 | auto el = (int)ELAPSEDuS(tp); 364 | 365 | for (BSphere sphere : inView) 366 | { 367 | auto[pos, r] = sphere.GetCachedDataSlow(); 368 | DrawSphere(pos, r, col32::white, 8); 369 | } 370 | for (BSphere sphere : culled) 371 | { 372 | auto[pos, r] = sphere.GetCachedDataSlow(); 373 | DrawSphere(pos, r, col32::red, 8); 374 | } 375 | 376 | 377 | times.emplace_back(el); 378 | 379 | float avg = accumulate(times.begin(), times.end(), 0) / (float)times.size(); 380 | 381 | if (lerpAvg == 0) 382 | lerpAvg = avg; 383 | else 384 | lerpAvg = bx::lerp(lerpAvg, avg, 0.01f); 385 | 386 | if (times.size() > 120) { 387 | times.erase(times.begin()); 388 | } 389 | 390 | maxavg = fmax(maxavg, avg); 391 | 392 | ImGui::Checkbox("draw", &draw); 393 | ImGui::Checkbox("draw culled", &drawCulled); 394 | ImGui::Checkbox("SIMD", &simd); 395 | ImGui::Checkbox("Multi threading", &mt); 396 | ImGui::SliderInt("microSeconds", &el, avg-100*abs(lerpAvg-avg), avg+100*abs(lerpAvg-avg)); 397 | ImGui::SliderFloat("AVG microSeconds", &avg, 0, maxavg, "%.1f"); 398 | ImGui::SliderFloat("Lpd microSeconds", &lerpAvg, 0, maxavg, "%.1f"); 399 | 400 | ImGui::Text("culled: %d\ninview: %d", culledCount, inViewCount); 401 | 402 | inView.clear(); 403 | culled.clear(); 404 | 405 | }); 406 | 407 | }; 408 | 409 | void FrustumTest::Shutdown() 410 | { 411 | 412 | } 413 | 414 | //todo macro? 415 | int main(int argc, char** argv) 416 | { 417 | FrustumTest app; 418 | return app.Run(argc, argv); 419 | } -------------------------------------------------------------------------------- /ss1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alan-FGR/Cullminator9000/f75f186d0522f2d129934fecb9af35107c2a2f92/ss1.png -------------------------------------------------------------------------------- /ss2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alan-FGR/Cullminator9000/f75f186d0522f2d129934fecb9af35107c2a2f92/ss2.png --------------------------------------------------------------------------------