├── LICENSE ├── README.md ├── app ├── application.cpp ├── application.h ├── compute_renderer.cpp ├── compute_renderer.h ├── multi_renderer.cpp ├── multi_renderer.h ├── transform_feeedback_renderer.cpp └── transform_feeedback_renderer.h ├── imgs ├── fig1.png ├── fig2.png └── fig3.png ├── main.cpp └── shaders ├── cull.cs ├── cull.geom ├── cull.vert ├── draw.frag ├── draw.fs ├── draw.vert ├── draw.vs ├── multi ├── multi_cull.comp ├── multi_draw.frag └── multi_draw.vert └── temporal ├── collect_curr.cs ├── collect_curr_notlast.cs ├── hiz_cull.cs ├── pack_bits ├── bits_curr.cs ├── bits_curr_notlast.cs ├── collect_instances.cs ├── compute_renderer.cpp └── hiz_cull.cs ├── pack_bits_as_uint ├── bits_curr.cs ├── bits_curr_notlast.cs ├── collect_instances.cs ├── compute_renderer.cpp └── hiz_cull.cs ├── raster_cull.fs ├── raster_cull.gs └── raster_cull.vs /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 potato3d 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # azdo 2 | AZDO OpenGL techniques including visibility culling and LOD selection inside the GPU 3 | 4 | For more information about AZDO, checkout [this](https://www.gdcvault.com/play/1020791) 2014 presentation from GDC ([mirror](https://archive.org/details/GDC2014Everitt)). You can also google "approaching zero driver overhead" :) 5 | 6 | # Description 7 | 8 | We implement three techniques in the GPU: 9 | 10 | * Hardware instancing: Fast rendering of repeated geometries 11 | * Single draw call for all instances 12 | * Avoids API overhead 13 | * Access per-instance data 14 | * Use gl_InstanceID inside vertex shader 15 | * 3x4 transformation matrix 16 | * Combined with culling 17 | * gl_InstanceID -> visible buffer -> instance data 18 | 19 | * Visibility culling Discard geometries outside frustum and occluded 20 | * Compute shader 21 | * Test AABB vs frustum in clip space 22 | * Compute screen-space 2D AABB and its min-z 23 | * Fetch max depth from hierarchical-z 2D texture 24 | * If AABB min-z < max depth, store 1 else 0 25 | * How to build hi-z map 26 | * Draw occluders and use depth with mipmapping 27 | * logN rendering passes gather max of 4 texels= 28 | 29 | * Level of detail: Choose discrete levels during rendering 30 | * Compute shader 31 | * Gather instances with visible == 1 32 | * Use distance to camera to choose LOD 33 | * Write to different output buffer per LOD 34 | * Keep track of output index with atomic counters 35 | * Render all LOD at once 36 | * glMultiDrawElementsIndirect 37 | * drawCmds[i].baseInstance = i 38 | * LOD level vertex attrib w/ divisor = # instance + 1 39 | 40 | The general rendering algorithm consists of the following steps: 41 | 42 | 1. Render previously visible 43 | 1. Update hierarchical-z mipmaps 44 | 1. Test AABBs for visibility and store in current 45 | 1. Gather visible in current and not last frames 46 | 1. Render newly visible 47 | 1. Gather visible in current for next frame 48 | 1. Swap current with last 49 | 50 | # Results 51 | 52 | Here are some images of a test scene consisting solely of cylinders and corresponding performance results: 53 | 54 | ![scene](https://github.com/potato3d/azdo/blob/main/imgs/fig1.png "scene") 55 | ![lod](https://github.com/potato3d/azdo/blob/main/imgs/fig2.png "lod") 56 | ![perf](https://github.com/potato3d/azdo/blob/main/imgs/fig3.png "perf") 57 | -------------------------------------------------------------------------------- /app/application.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace app 5 | { 6 | void application::display(float* view_matrix) 7 | { 8 | _engine.set_view(view_matrix); 9 | _engine.render(); 10 | } 11 | 12 | void application::reshape(int w, int h) 13 | { 14 | _engine.resize_screen(w, h); 15 | } 16 | 17 | bool application::key_press(unsigned char /*key*/, int /*x*/, int /*y*/) 18 | { 19 | return false; 20 | } 21 | 22 | bool application::initialize() 23 | { 24 | glv::viewer::set_default_camera_look_at(-165.036, -61.5545, -164.548, -164.447, -61.0983, -163.881, 0.0f, 0.0f, 1.0f); 25 | 26 | // return _engine.initialize(&_transform_feedback_renderer); 27 | return _engine.initialize(&_compute_renderer); 28 | } 29 | 30 | bool application::finalize() 31 | { 32 | return _engine.finalize(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /app/application.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace app 8 | { 9 | class application : public glv::iapplication 10 | { 11 | public: 12 | virtual void display(float* view_matrix) override; 13 | virtual void reshape(int w, int h) override; 14 | virtual bool key_press(unsigned char key, int x, int y) override; 15 | virtual bool initialize() override; 16 | virtual bool finalize() override; 17 | 18 | private: 19 | app::transform_feedback_renderer _transform_feedback_renderer; 20 | app::compute_renderer _compute_renderer; 21 | glb::engine _engine; 22 | }; 23 | } 24 | -------------------------------------------------------------------------------- /app/compute_renderer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace app 10 | { 11 | struct bbox 12 | { 13 | vec3 min = vec3(math::limit_posf()); 14 | float pad0 = 1.0f; 15 | vec3 max = vec3(math::limit_negf()); 16 | float pad1 = 1.0f; 17 | 18 | void expand(const vec3& v) 19 | { 20 | min.x = math::min(min.x, v.x); 21 | min.y = math::min(min.y, v.y); 22 | min.z = math::min(min.z, v.z); 23 | max.x = math::max(max.x, v.x); 24 | max.y = math::max(max.y, v.y); 25 | max.z = math::max(max.z, v.z); 26 | } 27 | }; 28 | 29 | static void to_3D(int idx, int max_x, int max_y, int& x, int& y, int& z) 30 | { 31 | x = idx % (max_x); 32 | idx /= (max_x); 33 | y = idx % (max_y); 34 | idx /= (max_y); 35 | z = idx; 36 | } 37 | 38 | static int get_num_blocks(int total, int block_size) 39 | { 40 | return (total + block_size - 1) / block_size; 41 | } 42 | 43 | static void print_compute_info() 44 | { 45 | GLint c = 0; 46 | glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &c); 47 | io::print("GL_MAX_COMPUTE_WORK_GROUP_COUNT in X:", c); 48 | glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &c); 49 | io::print("GL_MAX_COMPUTE_WORK_GROUP_SIZE in X:", c); 50 | glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &c); 51 | io::print("GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS:", c); 52 | } 53 | 54 | static tess::triangle_mesh simplify(const tess::triangle_mesh& m, int target_triangle_count) 55 | { 56 | io::print("target:", target_triangle_count); 57 | 58 | hash_map unique_verts; 59 | 60 | for(unsigned int i = 0; i < m.elements.size(); i+=3) 61 | { 62 | Simplify::Triangle t; 63 | for(unsigned int j = 0; j < 3; ++j) 64 | { 65 | auto p = m.vertices.at(m.elements.at(i+j)).position; 66 | auto itr = unique_verts.find(p); 67 | if(itr != end(unique_verts)) 68 | { 69 | t.v[j] = itr->second; 70 | } 71 | else 72 | { 73 | t.v[j] = Simplify::vertices.size(); 74 | Simplify::Vertex v; 75 | v.p = {p.x, p.y, p.z}; 76 | Simplify::vertices.push_back(v); 77 | unique_verts.emplace(p, t.v[j]); 78 | } 79 | } 80 | Simplify::triangles.push_back(t); 81 | } 82 | 83 | Simplify::simplify_mesh(target_triangle_count); 84 | 85 | tess::triangle_mesh r; 86 | for(unsigned int i = 0; i < Simplify::vertices.size(); ++i) 87 | { 88 | const auto& v = Simplify::vertices.at(i); 89 | r.vertices.push_back({vec3(v.p.x, v.p.y, v.p.z), {0.0f, 0.0f, 0.0f}}); 90 | } 91 | for(unsigned int i = 0; i < Simplify::triangles.size(); ++i) 92 | { 93 | const auto& t = Simplify::triangles.at(i); 94 | for(unsigned int j = 0; j < 3; ++j) 95 | { 96 | r.elements.push_back(t.v[j]); 97 | auto& v = r.vertices.at(t.v[j]); 98 | v.normal += vec3(t.n.x, t.n.y, t.n.z); 99 | } 100 | } 101 | for(auto& v : r.vertices) 102 | { 103 | v.normal = v.normal.normalized(); 104 | } 105 | 106 | return r; 107 | } 108 | 109 | bool compute_renderer::initialize(glb::framebuffer& fbuffer, glb::camera& cam) 110 | { 111 | // ---------------------------------------------------------------------------------------------------------------------- 112 | // configure framebuffer for hi-z map 113 | // ---------------------------------------------------------------------------------------------------------------------- 114 | 115 | fbuffer.set_use_textures(true); 116 | fbuffer.set_create_depth_mipmaps(true); 117 | _fbuffer = &fbuffer; 118 | 119 | _camera = &cam; 120 | 121 | // ---------------------------------------------------------------------------------------------------------------------- 122 | // scene setup 123 | // ---------------------------------------------------------------------------------------------------------------------- 124 | 125 | initialize_single_mesh(); 126 | 127 | return true; 128 | } 129 | 130 | bool compute_renderer::finalize() 131 | { 132 | return true; 133 | } 134 | 135 | void compute_renderer::render() 136 | { 137 | // render_hiz_last_frame(); 138 | render_hiz_temporal(); 139 | // render_raster_temporal(); 140 | } 141 | 142 | bool compute_renderer::initialize_from_file() 143 | { 144 | std::ifstream file("caos.xfm", std::ios::in | std::ios::binary); 145 | if(!file) 146 | { 147 | return false; 148 | } 149 | 150 | std::size_t unique_count = 0; 151 | file.read((char*)&unique_count, sizeof(unique_count)); 152 | 153 | std::vector transforms; 154 | 155 | for(unsigned int i = 0; i < unique_count; ++i) 156 | { 157 | std::size_t instance_count = 0; 158 | file.read((char*)&instance_count, sizeof(instance_count)); 159 | std::vector instance_transforms(instance_count); 160 | file.read((char*)instance_transforms.data(), sizeof(mat34)*instance_count); 161 | transforms.insert(transforms.end(), instance_transforms.begin(), instance_transforms.end()); 162 | } 163 | 164 | _instance_count = transforms.size(); 165 | 166 | return initialize_common(transforms); 167 | } 168 | 169 | bool compute_renderer::initialize_single_mesh() 170 | { 171 | int n_x = 100; 172 | int n_y = 100; 173 | int n_z = 100; 174 | _instance_count = n_x*n_y*n_z; 175 | 176 | std::vector transforms(_instance_count); 177 | for(unsigned int i = 0; i < transforms.size(); ++i) 178 | { 179 | int x, y, z; 180 | to_3D(i, n_x, n_y, x, y, z); 181 | transforms[i] = mat34(mat4::translation(vec3((float)x, (float)y, (float)z)*6.0f)); 182 | } 183 | 184 | return initialize_common(transforms); 185 | } 186 | 187 | bool compute_renderer::initialize_common(const vector& transforms) 188 | { 189 | _compute_count = get_num_blocks(_instance_count, 256); 190 | 191 | tess::triangle_mesh drawable_lods[NUM_LODS]; 192 | 193 | int resolution = pow(2, NUM_LODS+1); 194 | unsigned int total_vertex_count = 0; 195 | unsigned int total_element_count = 0; 196 | unsigned int element_offsets[NUM_LODS] = {0}; 197 | unsigned int eoffset = 0; 198 | unsigned int vertex_offsets[NUM_LODS] = {0}; 199 | unsigned int voffset = 0; 200 | 201 | for(int i = 0; i < NUM_LODS; ++i) 202 | { 203 | drawable_lods[i] = tess::tessellate_cylinder(1, 5, resolution); 204 | total_vertex_count += drawable_lods[i].vertices.size(); 205 | total_element_count += drawable_lods[i].elements.size(); 206 | element_offsets[i] = eoffset; 207 | eoffset += drawable_lods[i].elements.size(); 208 | vertex_offsets[i] = voffset; 209 | voffset += drawable_lods[i].vertices.size(); 210 | resolution /= 2; 211 | } 212 | 213 | // ---------------------------------------------------------------------------------------------------------------------- 214 | // compute input 1: matrix transforms 215 | // ---------------------------------------------------------------------------------------------------------------------- 216 | 217 | glGenBuffers(1, &_input_transform_ssbo); 218 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _input_transform_ssbo); 219 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(mat34), transforms.data(), GL_STATIC_DRAW); 220 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 221 | 222 | // ---------------------------------------------------------------------------------------------------------------------- 223 | // compute input 2: bounding boxes 224 | // ---------------------------------------------------------------------------------------------------------------------- 225 | 226 | std::vector bounds(_instance_count); 227 | for(unsigned int i = 0; i < bounds.size(); ++i) 228 | { 229 | bbox& b = bounds[i]; 230 | for(unsigned int v = 0; v < drawable_lods[0].vertices.size(); ++v) 231 | { 232 | b.expand(transforms[i].as_mat4().mul(drawable_lods[0].vertices[v].position)); 233 | } 234 | } 235 | 236 | glGenBuffers(1, &_input_bound_ssbo); 237 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _input_bound_ssbo); 238 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(bbox), bounds.data(), GL_STATIC_DRAW); 239 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 240 | 241 | // ---------------------------------------------------------------------------------------------------------------------- 242 | // compute output 1: instance IDs for each LOD 243 | // ---------------------------------------------------------------------------------------------------------------------- 244 | 245 | vector ids(_instance_count); 246 | for(unsigned int i = 0; i < ids.size(); ++i) 247 | { 248 | ids[i] = i; 249 | } 250 | 251 | for(int i = 0; i < NUM_LODS; ++i) 252 | { 253 | glGenBuffers(1, &_visible_instance_id_ssbo[i]); 254 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_instance_id_ssbo[i]); 255 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(GLuint), ids.data(), GL_STATIC_DRAW); 256 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 257 | } 258 | 259 | // ---------------------------------------------------------------------------------------------------------------------- 260 | // compute output 2: indirect draw commands 261 | // ---------------------------------------------------------------------------------------------------------------------- 262 | 263 | for(int i = 0; i < NUM_LODS; ++i) 264 | { 265 | DrawElementsIndirectCommand cmd; 266 | cmd.elementCount = drawable_lods[i].elements.size(); 267 | cmd.instanceCount = 0; // atomic counter incremented by the compute shader 268 | cmd.firstElement = element_offsets[i]; 269 | cmd.baseVertex = vertex_offsets[i]; 270 | cmd.baseInstance = i; // references lod level vertex attribute 271 | _draw_commands[i] = cmd; 272 | } 273 | 274 | _draw_commands[NUM_LODS-1].instanceCount = _instance_count; 275 | 276 | glGenBuffers(1, &_draw_indirect_buffer); 277 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 278 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 279 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); 280 | 281 | _draw_commands[NUM_LODS-1].instanceCount = 0; 282 | 283 | // ---------------------------------------------------------------------------------------------------------------------- 284 | // compute shader 285 | // ---------------------------------------------------------------------------------------------------------------------- 286 | 287 | glb::shader_program_builder compute_shader_builder; 288 | compute_shader_builder.begin(); 289 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/cull.cs")) 290 | { 291 | return false; 292 | } 293 | if(!compute_shader_builder.end()) 294 | { 295 | return false; 296 | } 297 | auto program = compute_shader_builder.get_shader_program(); 298 | program.bind_uniform_buffer("camera_data", _camera->get_uniform_buffer()); 299 | _hiz_cull_lastframe_program = program.get_id(); 300 | 301 | glProgramUniform1ui(_hiz_cull_lastframe_program, 0, _instance_count); 302 | 303 | // ---------------------------------------------------------------------------------------------------------------------- 304 | // drawing vertex arrays 305 | // ---------------------------------------------------------------------------------------------------------------------- 306 | 307 | glGenVertexArrays(1, &_draw_vao); 308 | glBindVertexArray(_draw_vao); 309 | 310 | GLuint vbo; 311 | glGenBuffers(1, &vbo); 312 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 313 | glBufferData(GL_ARRAY_BUFFER, total_vertex_count*sizeof(tess::vertex), nullptr, GL_STATIC_DRAW); 314 | 315 | unsigned int offset = 0; 316 | for(int i = 0; i < NUM_LODS; ++i) 317 | { 318 | unsigned int size = drawable_lods[i].vertices.size()*sizeof(tess::vertex); 319 | glBufferSubData(GL_ARRAY_BUFFER, offset, size, drawable_lods[i].vertices.data()); 320 | offset += size; 321 | } 322 | 323 | glEnableVertexAttribArray(0); 324 | glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(tess::vertex), GLB_BYTE_OFFSET(0)); 325 | 326 | glEnableVertexAttribArray(1); 327 | glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof(tess::vertex), GLB_BYTE_OFFSET(sizeof(vec3))); 328 | 329 | GLuint lbo; 330 | glGenBuffers(1, &lbo); 331 | glBindBuffer(GL_ARRAY_BUFFER, lbo); 332 | 333 | GLuint lod_levels[NUM_LODS]; 334 | for(int i = 0; i < NUM_LODS; ++i) 335 | { 336 | lod_levels[i] = i; 337 | } 338 | glBufferData(GL_ARRAY_BUFFER, NUM_LODS*sizeof(GLuint), lod_levels, GL_STATIC_DRAW); 339 | 340 | glEnableVertexAttribArray(2); 341 | glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, 0, GLB_BYTE_OFFSET(0)); 342 | glVertexAttribDivisor(2, _instance_count + 1); // make sure (instanceID / divisor) is 0 so lod_level vertex attrib is referenced by drawCmd.baseInstance 343 | 344 | // Note: using gl_BaseInstanceARB or gl_DrawIDARB is 33% slower than using an instanced attribute on a GTX 970 (2.59 ms vs 1.94 ms) 345 | // Note: another option would be to store lod_level in an SSBO and use gl_BaseInstanceARB or gl_DrawIDARB to index it from the shader 346 | 347 | GLuint ebo; 348 | glGenBuffers(1, &ebo); 349 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo); 350 | glBufferData(GL_ELEMENT_ARRAY_BUFFER, total_element_count*sizeof(tess::element), nullptr, GL_STATIC_DRAW); 351 | 352 | offset = 0; 353 | for(int i = 0; i < NUM_LODS; ++i) 354 | { 355 | unsigned int size = drawable_lods[i].elements.size()*sizeof(tess::element); 356 | glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, offset, size, drawable_lods[i].elements.data()); 357 | offset += size; 358 | } 359 | 360 | glBindVertexArray(0); 361 | 362 | // ---------------------------------------------------------------------------------------------------------------------- 363 | // drawing shader 364 | // ---------------------------------------------------------------------------------------------------------------------- 365 | 366 | glb::shader_program_builder draw_shader_builder; 367 | draw_shader_builder.begin(); 368 | if(!draw_shader_builder.add_file(glb::shader_vertex, "../shaders/lod/draw.vs")) 369 | { 370 | return false; 371 | } 372 | if(!draw_shader_builder.add_file(glb::shader_fragment, "../shaders/lod/draw.fs")) 373 | { 374 | return false; 375 | } 376 | if(!draw_shader_builder.end()) 377 | { 378 | return false; 379 | } 380 | auto draw_shader = draw_shader_builder.get_shader_program(); 381 | draw_shader.bind_uniform_buffer("cdata", _camera->get_uniform_buffer()); 382 | _draw_program = draw_shader.get_id(); 383 | 384 | // ---------------------------------------------------------------------------------------------------------------------- 385 | // associate buffers with indexed binding points 386 | // ---------------------------------------------------------------------------------------------------------------------- 387 | 388 | unsigned int ssbo_binding = 0; 389 | 390 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ssbo_binding++, _input_bound_ssbo); 391 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ssbo_binding++, _input_transform_ssbo); 392 | 393 | for(int i = 0; i < NUM_LODS; ++i) 394 | { 395 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ssbo_binding++, _visible_instance_id_ssbo[i]); 396 | glBindBufferRange(GL_ATOMIC_COUNTER_BUFFER, i, _draw_indirect_buffer, 4+sizeof(DrawElementsIndirectCommand)*i, sizeof(GLuint)); 397 | } 398 | 399 | // ---------------------------------------------------------------------------------------------------------------------- 400 | // temporal hi-z culling shader 401 | // ---------------------------------------------------------------------------------------------------------------------- 402 | 403 | compute_shader_builder.begin(); 404 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/hiz_cull.cs")) 405 | { 406 | return false; 407 | } 408 | if(!compute_shader_builder.end()) 409 | { 410 | return false; 411 | } 412 | program = compute_shader_builder.get_shader_program(); 413 | program.bind_uniform_buffer("camera_data", _camera->get_uniform_buffer()); 414 | _hiz_cull_program = program.get_id(); 415 | 416 | glProgramUniform1ui(_hiz_cull_program, 0, _instance_count); 417 | 418 | // ---------------------------------------------------------------------------------------------------------------------- 419 | // temporal raster culling shader 420 | // ---------------------------------------------------------------------------------------------------------------------- 421 | 422 | compute_shader_builder.begin(); 423 | if(!compute_shader_builder.add_file(glb::shader_vertex, "../shaders/lod/temporal/raster_cull.vs")) 424 | { 425 | return false; 426 | } 427 | if(!compute_shader_builder.add_file(glb::shader_geometry, "../shaders/lod/temporal/raster_cull.gs")) 428 | { 429 | return false; 430 | } 431 | if(!compute_shader_builder.add_file(glb::shader_fragment, "../shaders/lod/temporal/raster_cull.fs")) 432 | { 433 | return false; 434 | } 435 | if(!compute_shader_builder.end()) 436 | { 437 | return false; 438 | } 439 | program = compute_shader_builder.get_shader_program(); 440 | program.bind_uniform_buffer("camera_data", _camera->get_uniform_buffer()); 441 | _raster_cull_program = program.get_id(); 442 | 443 | // ---------------------------------------------------------------------------------------------------------------------- 444 | // collect current visible instances shader 445 | // ---------------------------------------------------------------------------------------------------------------------- 446 | 447 | compute_shader_builder.begin(); 448 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/collect_curr.cs")) 449 | { 450 | return false; 451 | } 452 | if(!compute_shader_builder.end()) 453 | { 454 | return false; 455 | } 456 | _collect_curr_program = compute_shader_builder.get_shader_program().get_id(); 457 | 458 | glProgramUniform1ui(_collect_curr_program, 0, _instance_count); 459 | 460 | // ---------------------------------------------------------------------------------------------------------------------- 461 | // collect current and not last visible instances shader 462 | // ---------------------------------------------------------------------------------------------------------------------- 463 | 464 | compute_shader_builder.begin(); 465 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/collect_curr_notlast.cs")) 466 | { 467 | return false; 468 | } 469 | if(!compute_shader_builder.end()) 470 | { 471 | return false; 472 | } 473 | _collect_curr_notlast_program = compute_shader_builder.get_shader_program().get_id(); 474 | 475 | glProgramUniform1ui(_collect_curr_notlast_program, 0, _instance_count); 476 | 477 | // ---------------------------------------------------------------------------------------------------------------------- 478 | // current visible bits ssbo 479 | // ---------------------------------------------------------------------------------------------------------------------- 480 | 481 | vector visible(_instance_count, 1); 482 | 483 | glGenBuffers(1, &_curr_visible_ssbo); 484 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _curr_visible_ssbo); 485 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(GLuint), visible.data(), GL_STATIC_DRAW); 486 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 487 | 488 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, _curr_visible_ssbo); 489 | 490 | // ---------------------------------------------------------------------------------------------------------------------- 491 | // last visible bits ssbo 492 | // ---------------------------------------------------------------------------------------------------------------------- 493 | 494 | glGenBuffers(1, &_last_visible_ssbo); 495 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _last_visible_ssbo); 496 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(GLuint), visible.data(), GL_STATIC_DRAW); 497 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 498 | 499 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _last_visible_ssbo); 500 | 501 | _raster_curr_ssbo = _curr_visible_ssbo; 502 | 503 | return true; 504 | } 505 | 506 | void compute_renderer::render_hiz_last_frame() 507 | { 508 | // frustum cull + occlusion cull + select lod 509 | glUseProgram(_hiz_cull_lastframe_program); 510 | _fbuffer->bind_depth_texture(); 511 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 512 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 513 | glDispatchCompute(_compute_count, 1, 1); 514 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT); 515 | 516 | // render 517 | glUseProgram(_draw_program); 518 | glBindVertexArray(_draw_vao); 519 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 520 | 521 | // update hi-z map for next frame 522 | _fbuffer->update_depth_mipmaps(); 523 | } 524 | 525 | void compute_renderer::render_hiz_temporal() 526 | { 527 | // 1. draw previously visible 528 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 529 | glBindVertexArray(_draw_vao); 530 | glUseProgram(_draw_program); 531 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 532 | 533 | // 2. test all bboxes for visibily 534 | 535 | // 2.1. build depth mipmaps 536 | _fbuffer->update_depth_mipmaps(); 537 | 538 | // 2.2. perform frustum culling, occlusion culling using hi-z, and store visible flag 539 | glUseProgram(_hiz_cull_program); 540 | glDispatchCompute(_compute_count, 1, 1); 541 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 542 | 543 | // 2.3. collect visible instance ids: current and not last 544 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 545 | glUseProgram(_collect_curr_notlast_program); 546 | glDispatchCompute(_compute_count, 1, 1); 547 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 548 | 549 | // 3. draw newly visible in current frame 550 | glUseProgram(_draw_program); 551 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 552 | 553 | // 4. prepare data for next frame 554 | 555 | // 4.1. collect visible instance ids: current 556 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 557 | glUseProgram(_collect_curr_program); 558 | glDispatchCompute(_compute_count, 1, 1); 559 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 560 | 561 | // 4.2. swap current with last flags 562 | static bool invert = true; 563 | if(invert) 564 | { 565 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _curr_visible_ssbo); 566 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, _last_visible_ssbo); 567 | } 568 | else 569 | { 570 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, _curr_visible_ssbo); 571 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _last_visible_ssbo); 572 | } 573 | invert ^= 1; 574 | } 575 | 576 | void compute_renderer::render_raster_temporal() 577 | { 578 | // 1. draw previously visible 579 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 580 | glBindVertexArray(_draw_vao); 581 | glUseProgram(_draw_program); 582 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 583 | 584 | // 2. test all bboxes for visibily 585 | 586 | // 2.1. draw boxes using geometry shader and store visible flag in fragment shader 587 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _raster_curr_ssbo); 588 | GLuint zero = 0; 589 | glClearBufferData(GL_SHADER_STORAGE_BUFFER, GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, &zero); 590 | glUseProgram(_raster_cull_program); 591 | glDepthMask(GL_FALSE); 592 | glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); 593 | glDrawArrays(GL_POINTS, 0, _instance_count); 594 | glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); 595 | glDepthMask(GL_TRUE); 596 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 597 | 598 | // 2.2. collect visible instance ids: current and not last 599 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 600 | glUseProgram(_collect_curr_notlast_program); 601 | glDispatchCompute(_compute_count, 1, 1); 602 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 603 | 604 | // 3. draw newly visible in current frame 605 | glUseProgram(_draw_program); 606 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 607 | 608 | // 4. prepare data for next frame 609 | 610 | // 4.1. collect visible instance ids: current 611 | glUseProgram(_collect_curr_program); 612 | glDispatchCompute(_compute_count, 1, 1); 613 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 614 | 615 | // 4.2. swap current with last flags 616 | static bool invert = true; 617 | if(invert) 618 | { 619 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _curr_visible_ssbo); 620 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, _last_visible_ssbo); 621 | _raster_curr_ssbo = _last_visible_ssbo; 622 | } 623 | else 624 | { 625 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, _curr_visible_ssbo); 626 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _last_visible_ssbo); 627 | _raster_curr_ssbo = _curr_visible_ssbo; 628 | } 629 | invert ^= 1; 630 | } 631 | 632 | void compute_renderer::print_per_lod_instance_count() 633 | { 634 | int total = 0; 635 | std::vector cmds(NUM_LODS); 636 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 637 | glGetBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, NUM_LODS*sizeof(DrawElementsIndirectCommand), cmds.data()); 638 | for(const auto& cmd : cmds) 639 | { 640 | io::print(cmd.instanceCount); 641 | total += cmd.instanceCount; 642 | } 643 | io::print("total:", total, "--------------------------"); 644 | } 645 | } 646 | -------------------------------------------------------------------------------- /app/compute_renderer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | namespace glb 6 | { 7 | class framebuffer; 8 | } 9 | 10 | namespace app 11 | { 12 | class compute_renderer : public glb::irenderer 13 | { 14 | public: 15 | virtual bool initialize(glb::framebuffer& fbuffer, glb::camera& cam) override; 16 | virtual bool finalize() override; 17 | virtual void render() override; 18 | 19 | private: 20 | struct mat34 21 | { 22 | mat34(){} 23 | 24 | explicit mat34(const mat4& m) 25 | { 26 | int i = 0; 27 | data[i++] = m.at(0,0); 28 | data[i++] = m.at(0,1); 29 | data[i++] = m.at(0,2); 30 | data[i++] = m.at(0,3); 31 | 32 | data[i++] = m.at(1,0); 33 | data[i++] = m.at(1,1); 34 | data[i++] = m.at(1,2); 35 | data[i++] = m.at(1,3); 36 | 37 | data[i++] = m.at(2,0); 38 | data[i++] = m.at(2,1); 39 | data[i++] = m.at(2,2); 40 | data[i++] = m.at(2,3); 41 | } 42 | 43 | mat4 as_mat4() const 44 | { 45 | return mat4(data[0], data[1], data[2], data[3], 46 | data[4], data[5], data[6], data[7], 47 | data[8], data[9], data[10], data[11], 48 | 0,0,0,1); 49 | } 50 | 51 | float data[12]; 52 | }; 53 | 54 | struct DrawElementsIndirectCommand 55 | { 56 | GLuint elementCount; 57 | GLuint instanceCount; 58 | GLuint firstElement; 59 | GLuint baseVertex; 60 | GLuint baseInstance; 61 | }; 62 | 63 | bool initialize_from_file(); 64 | bool initialize_single_mesh(); 65 | bool initialize_common(const vector& transforms); 66 | void render_hiz_last_frame(); 67 | void render_hiz_temporal(); 68 | void render_raster_temporal(); 69 | void print_per_lod_instance_count(); 70 | 71 | static const int NUM_LODS = 4; 72 | unsigned int _instance_count; 73 | unsigned int _compute_count; 74 | 75 | GLuint _draw_vao; 76 | GLuint _draw_program; 77 | GLuint _input_transform_ssbo; 78 | GLuint _input_bound_ssbo; 79 | GLuint _visible_instance_id_ssbo[NUM_LODS]; 80 | GLuint _draw_indirect_buffer; 81 | GLuint _hiz_cull_lastframe_program; 82 | 83 | DrawElementsIndirectCommand _draw_commands[NUM_LODS]; 84 | 85 | glb::framebuffer* _fbuffer; 86 | glb::camera* _camera; 87 | 88 | GLuint _hiz_cull_program; 89 | GLuint _raster_cull_program; 90 | 91 | GLuint _collect_curr_program; 92 | GLuint _collect_curr_notlast_program; 93 | 94 | GLuint _curr_visible_ssbo; 95 | GLuint _last_visible_ssbo; 96 | 97 | GLuint _raster_curr_ssbo; 98 | 99 | GLuint _instance_offset_ubo; 100 | 101 | int _unique_count; 102 | vector _unique_compute_counts; 103 | vector _instance_offsets; 104 | }; 105 | } 106 | -------------------------------------------------------------------------------- /app/multi_renderer.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/potato3d/azdo/26b7db06378d11af82a47dc1c0addb07a98ce87f/app/multi_renderer.cpp -------------------------------------------------------------------------------- /app/multi_renderer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/potato3d/azdo/26b7db06378d11af82a47dc1c0addb07a98ce87f/app/multi_renderer.h -------------------------------------------------------------------------------- /app/transform_feeedback_renderer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace app 9 | { 10 | struct mat34 11 | { 12 | mat34(){} 13 | 14 | explicit mat34(const mat4& m) 15 | { 16 | int i = 0; 17 | data[i++] = m.at(0,0); 18 | data[i++] = m.at(0,1); 19 | data[i++] = m.at(0,2); 20 | data[i++] = m.at(0,3); 21 | 22 | data[i++] = m.at(1,0); 23 | data[i++] = m.at(1,1); 24 | data[i++] = m.at(1,2); 25 | data[i++] = m.at(1,3); 26 | 27 | data[i++] = m.at(2,0); 28 | data[i++] = m.at(2,1); 29 | data[i++] = m.at(2,2); 30 | data[i++] = m.at(2,3); 31 | } 32 | 33 | mat4 as_mat4() const 34 | { 35 | return mat4(data[0], data[1], data[2], data[3], 36 | data[4], data[5], data[6], data[7], 37 | data[8], data[9], data[10], data[11], 38 | 0,0,0,1); 39 | } 40 | 41 | float data[12]; 42 | }; 43 | 44 | struct bbox 45 | { 46 | vec3 min = vec3(math::limit_posf()); 47 | vec3 max = vec3(math::limit_negf()); 48 | 49 | void expand(const vec3& v) 50 | { 51 | min.x = math::min(min.x, v.x); 52 | min.y = math::min(min.y, v.y); 53 | min.z = math::min(min.z, v.z); 54 | max.x = math::max(max.x, v.x); 55 | max.y = math::max(max.y, v.y); 56 | max.z = math::max(max.z, v.z); 57 | } 58 | }; 59 | 60 | static void to_3D(int idx, int max_x, int max_y, int& x, int& y, int& z) 61 | { 62 | x = idx % (max_x+1); 63 | idx /= (max_x+1); 64 | y = idx % (max_y+1); 65 | idx /= (max_y+1); 66 | z = idx; 67 | } 68 | 69 | bool transform_feedback_renderer::initialize(glb::framebuffer& fbuffer, glb::camera& cam) 70 | { 71 | // ---------------------------------------------------------------------------------------------------------------------- 72 | // scene setup 73 | // ---------------------------------------------------------------------------------------------------------------------- 74 | 75 | int n_x = 100; 76 | int n_y = 100; 77 | int n_z = 100; 78 | _instance_count = n_x*n_y*n_z; 79 | 80 | tess::triangle_mesh drawable_lods[NUM_LODS] = {tess::tessellate_cylinder(1, 5, 32), 81 | tess::tessellate_cylinder(1, 5, 16), 82 | tess::tessellate_cylinder(1, 5, 8)}; 83 | 84 | // ---------------------------------------------------------------------------------------------------------------------- 85 | // matrix transforms 86 | // ---------------------------------------------------------------------------------------------------------------------- 87 | 88 | std::vector transforms(_instance_count); 89 | for(unsigned int i = 0; i < transforms.size(); ++i) 90 | { 91 | int x, y, z; 92 | to_3D(i, n_x, n_y, x, y, z); 93 | transforms[i] = mat34(mat4::translation(vec3((float)x, (float)y, (float)z)*6.0f)); 94 | } 95 | 96 | GLuint tvbo; 97 | glGenBuffers(1, &tvbo); 98 | glBindBuffer(GL_ARRAY_BUFFER, tvbo); 99 | glBufferData(GL_ARRAY_BUFFER, transforms.size()*sizeof(mat34), transforms.data(), GL_STATIC_DRAW); 100 | glBindBuffer(GL_ARRAY_BUFFER, 0); 101 | 102 | // ---------------------------------------------------------------------------------------------------------------------- 103 | // bounding boxes 104 | // ---------------------------------------------------------------------------------------------------------------------- 105 | 106 | std::vector bounds(_instance_count); 107 | for(unsigned int i = 0; i < bounds.size(); ++i) 108 | { 109 | bbox& b = bounds[i]; 110 | for(unsigned int v = 0; v < drawable_lods[0].vertices.size(); ++v) 111 | { 112 | b.expand(transforms[i].as_mat4().mul(drawable_lods[0].vertices[v].position)); 113 | } 114 | } 115 | 116 | GLuint bvbo; 117 | glGenBuffers(1, &bvbo); 118 | glBindBuffer(GL_ARRAY_BUFFER, bvbo); 119 | glBufferData(GL_ARRAY_BUFFER, bounds.size()*sizeof(bbox), bounds.data(), GL_STATIC_DRAW); 120 | glBindBuffer(GL_ARRAY_BUFFER, 0); 121 | 122 | // ---------------------------------------------------------------------------------------------------------------------- 123 | // cull vao 124 | // ---------------------------------------------------------------------------------------------------------------------- 125 | 126 | glGenVertexArrays(1, &_cull_vao); 127 | glBindVertexArray(_cull_vao); 128 | 129 | // bounding box 130 | glBindBuffer(GL_ARRAY_BUFFER, bvbo); 131 | 132 | glEnableVertexAttribArray(0); 133 | glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(bbox), GLB_BYTE_OFFSET(0)); 134 | 135 | glEnableVertexAttribArray(1); 136 | glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof(bbox), GLB_BYTE_OFFSET(sizeof(vec3))); 137 | 138 | // transform 139 | glBindBuffer(GL_ARRAY_BUFFER, tvbo); 140 | 141 | glEnableVertexAttribArray(2); 142 | glVertexAttribPointer(2, 4, GL_FLOAT, GL_FALSE, sizeof(mat34), GLB_BYTE_OFFSET(0)); 143 | 144 | glEnableVertexAttribArray(3); 145 | glVertexAttribPointer(3, 4, GL_FLOAT, GL_FALSE, sizeof(mat34), GLB_BYTE_OFFSET(sizeof(float)*4)); 146 | 147 | glEnableVertexAttribArray(4); 148 | glVertexAttribPointer(4, 4, GL_FLOAT, GL_FALSE, sizeof(mat34), GLB_BYTE_OFFSET(sizeof(float)*8)); 149 | 150 | glBindVertexArray(0); 151 | 152 | // ---------------------------------------------------------------------------------------------------------------------- 153 | // cull shader 154 | // ---------------------------------------------------------------------------------------------------------------------- 155 | 156 | glb::shader_program_builder spb; 157 | spb.begin(); 158 | if(!spb.add_file(glb::shader_vertex, "../shaders/lod/cull.vert")) 159 | { 160 | return false; 161 | } 162 | if(!spb.add_file(glb::shader_geometry, "../shaders/lod/cull.geom")) 163 | { 164 | return false; 165 | } 166 | spb.bind_vertex_attrib("in_bmin", 0); 167 | spb.bind_vertex_attrib("in_bmax", 1); 168 | spb.bind_vertex_attrib("in_transform_0", 2); 169 | spb.bind_vertex_attrib("in_transform_1", 3); 170 | spb.bind_vertex_attrib("in_transform_2", 4); 171 | std::vector outputs = {"fdata0.transform_0", "fdata0.transform_1", "fdata0.transform_2", "gl_NextBuffer", 172 | "fdata1.transform_0", "fdata1.transform_1", "fdata1.transform_2", "gl_NextBuffer", 173 | "fdata2.transform_0", "fdata2.transform_1", "fdata2.transform_2"}; 174 | glTransformFeedbackVaryings(spb.get_shader_program().get_id(), outputs.size(), outputs.data(), GL_INTERLEAVED_ATTRIBS); 175 | if(!spb.end()) 176 | { 177 | return false; 178 | } 179 | auto cull_shader = spb.get_shader_program(); 180 | cull_shader.bind_uniform_buffer("cdata", cam.get_uniform_buffer()); 181 | _cull_shader = cull_shader.get_id(); 182 | 183 | // ---------------------------------------------------------------------------------------------------------------------- 184 | // visible instance data (transform feedback output, draw input) 185 | // ---------------------------------------------------------------------------------------------------------------------- 186 | 187 | std::vector empty(transforms.size()); 188 | 189 | for(unsigned int i = 0; i < NUM_LODS; ++i) 190 | { 191 | glGenBuffers(1, &_visible_ssbo[i]); 192 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_ssbo[i]); 193 | glBufferData(GL_SHADER_STORAGE_BUFFER, empty.size()*sizeof(mat34), empty.data(), GL_STREAM_COPY); 194 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 195 | } 196 | 197 | // ---------------------------------------------------------------------------------------------------------------------- 198 | // transform feedback 199 | // ---------------------------------------------------------------------------------------------------------------------- 200 | 201 | glGenTransformFeedbacks(1, &_feedback); 202 | glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, _feedback); 203 | for(unsigned int i = 0; i < NUM_LODS; ++i) 204 | { 205 | glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, i, _visible_ssbo[i]); 206 | } 207 | glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0); 208 | 209 | // ---------------------------------------------------------------------------------------------------------------------- 210 | // visible query 211 | // ---------------------------------------------------------------------------------------------------------------------- 212 | 213 | for(unsigned int i = 0; i < NUM_LODS; ++i) 214 | { 215 | glGenQueries(1, &_visible_query[i]); 216 | } 217 | 218 | // ---------------------------------------------------------------------------------------------------------------------- 219 | // draw vao 220 | // ---------------------------------------------------------------------------------------------------------------------- 221 | 222 | for(unsigned int i = 0; i < NUM_LODS; ++i) 223 | { 224 | _draw_elem_count[i] = drawable_lods[i].elements.size(); 225 | 226 | glGenVertexArrays(1, &_draw_vao[i]); 227 | glBindVertexArray(_draw_vao[i]); 228 | 229 | GLuint vbo; 230 | glGenBuffers(1, &vbo); 231 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 232 | glBufferData(GL_ARRAY_BUFFER, drawable_lods[i].vertices.size()*sizeof(tess::vertex), drawable_lods[i].vertices.data(), GL_STATIC_DRAW); 233 | 234 | glEnableVertexAttribArray(0); 235 | glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(tess::vertex), GLB_BYTE_OFFSET(0)); 236 | 237 | glEnableVertexAttribArray(1); 238 | glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof(tess::vertex), GLB_BYTE_OFFSET(sizeof(vec3))); 239 | 240 | GLuint ebo; 241 | glGenBuffers(1, &ebo); 242 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo); 243 | glBufferData(GL_ELEMENT_ARRAY_BUFFER, drawable_lods[i].elements.size()*sizeof(tess::element), drawable_lods[i].elements.data(), GL_STATIC_DRAW); 244 | 245 | glBindVertexArray(0); 246 | } 247 | 248 | // ---------------------------------------------------------------------------------------------------------------------- 249 | // draw shader 250 | // ---------------------------------------------------------------------------------------------------------------------- 251 | 252 | spb.begin(); 253 | if(!spb.add_file(glb::shader_vertex, "../shaders/lod/draw.vert")) 254 | { 255 | return false; 256 | } 257 | if(!spb.add_file(glb::shader_fragment, "../shaders/lod/draw.frag")) 258 | { 259 | return false; 260 | } 261 | spb.bind_vertex_attrib("in_position", 0); 262 | spb.bind_vertex_attrib("in_normal", 1); 263 | spb.bind_draw_buffer("out_color", fbuffer.get_color_buffer_to_display()); 264 | if(!spb.end()) 265 | { 266 | return false; 267 | } 268 | auto draw_shader = spb.get_shader_program(); 269 | draw_shader.bind_uniform_buffer("cdata", cam.get_uniform_buffer()); 270 | draw_shader.set_uniform("tex_transforms", 0); 271 | _draw_shader = draw_shader.get_id(); 272 | 273 | GLuint block_index = glGetProgramResourceIndex(_draw_shader, GL_SHADER_STORAGE_BLOCK, "tdata"); 274 | glShaderStorageBlockBinding(_draw_shader, block_index, 0); 275 | 276 | return true; 277 | } 278 | 279 | bool transform_feedback_renderer::finalize() 280 | { 281 | return true; 282 | } 283 | 284 | void transform_feedback_renderer::render() 285 | { 286 | // ---------------------------------------------------------------------------------------------------------------------- 287 | // cull and select lod 288 | // ---------------------------------------------------------------------------------------------------------------------- 289 | 290 | glEnable(GL_RASTERIZER_DISCARD); 291 | glUseProgram(_cull_shader); 292 | glBindVertexArray(_cull_vao); 293 | glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, _feedback); 294 | for(unsigned int i = 0; i < NUM_LODS; ++i) 295 | { 296 | glBeginQueryIndexed(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, i, _visible_query[i]); 297 | } 298 | glBeginTransformFeedback(GL_POINTS); 299 | glDrawArrays(GL_POINTS, 0, _instance_count); 300 | glEndTransformFeedback(); 301 | for(unsigned int i = 0; i < NUM_LODS; ++i) 302 | { 303 | glEndQueryIndexed(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, i); 304 | } 305 | glBindTransformFeedback(GL_TRANSFORM_FEEDBACK, 0); 306 | glDisable(GL_RASTERIZER_DISCARD); 307 | 308 | // ---------------------------------------------------------------------------------------------------------------------- 309 | // draw visible 310 | // ---------------------------------------------------------------------------------------------------------------------- 311 | 312 | glUseProgram(_draw_shader); 313 | 314 | for(unsigned int i = 0; i < NUM_LODS; ++i) 315 | { 316 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, _visible_ssbo[i]); 317 | glBindVertexArray(_draw_vao[i]); 318 | GLuint visible_instance_count = 0; 319 | glGetQueryObjectuiv(_visible_query[i], GL_QUERY_RESULT, &visible_instance_count); 320 | glDrawElementsInstanced(GL_TRIANGLES, _draw_elem_count[i], GL_UNSIGNED_INT, NULL, visible_instance_count); 321 | // io::print(visible_instance_count, "of", _instance_count, "(", i ,")"); 322 | } 323 | } 324 | } 325 | -------------------------------------------------------------------------------- /app/transform_feeedback_renderer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace app 5 | { 6 | class transform_feedback_renderer : public glb::irenderer 7 | { 8 | public: 9 | virtual bool initialize(glb::framebuffer& fbuffer, glb::camera& cam) override; 10 | virtual bool finalize() override; 11 | virtual void render() override; 12 | 13 | private: 14 | static const unsigned int NUM_LODS = 3; 15 | 16 | unsigned int _instance_count; 17 | 18 | unsigned int _cull_vao; 19 | unsigned int _cull_shader; 20 | unsigned int _feedback; 21 | unsigned int _visible_query[NUM_LODS]; 22 | 23 | unsigned int _visible_ssbo[NUM_LODS]; 24 | 25 | unsigned int _draw_vao[NUM_LODS]; 26 | unsigned int _draw_elem_count[NUM_LODS]; 27 | unsigned int _draw_shader; 28 | }; 29 | } 30 | -------------------------------------------------------------------------------- /imgs/fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/potato3d/azdo/26b7db06378d11af82a47dc1c0addb07a98ce87f/imgs/fig1.png -------------------------------------------------------------------------------- /imgs/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/potato3d/azdo/26b7db06378d11af82a47dc1c0addb07a98ce87f/imgs/fig2.png -------------------------------------------------------------------------------- /imgs/fig3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/potato3d/azdo/26b7db06378d11af82a47dc1c0addb07a98ce87f/imgs/fig3.png -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char** argv) 7 | { 8 | app::application a; 9 | glv::viewer::set_application(&a); 10 | glv::first_person_manip first_person; 11 | glv::viewer::set_camera_manip(&first_person); 12 | return glv::viewer::exec(argc, argv); 13 | } 14 | -------------------------------------------------------------------------------- /shaders/cull.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Transform 4 | { 5 | vec4 m0; 6 | vec4 m1; 7 | vec4 m2; 8 | }; 9 | 10 | struct Bound 11 | { 12 | vec4 bmin; 13 | vec4 bmax; 14 | }; 15 | 16 | // -------------------------------------------------------------------------------------------------------------- 17 | // INPUTS 18 | // -------------------------------------------------------------------------------------------------------------- 19 | 20 | layout(local_size_x = 256) in; 21 | 22 | layout(location = 0) uniform uint instance_count; 23 | 24 | layout(binding = 0) uniform sampler2D depth_texture; 25 | 26 | layout(std140) uniform camera_data 27 | { 28 | mat4 view_proj_matrix; 29 | mat4 normal_matrix; 30 | mat4 view_matrix; 31 | } camera; 32 | 33 | layout(std430, binding = 0) buffer bound_in 34 | { 35 | readonly Bound data[]; 36 | } input_bound; 37 | 38 | // -------------------------------------------------------------------------------------------------------------- 39 | // OUTPUTS 40 | // -------------------------------------------------------------------------------------------------------------- 41 | 42 | layout(binding = 0) uniform atomic_uint instance_count_lod_0; 43 | layout(binding = 1) uniform atomic_uint instance_count_lod_1; 44 | layout(binding = 2) uniform atomic_uint instance_count_lod_2; 45 | layout(binding = 3) uniform atomic_uint instance_count_lod_3; 46 | 47 | layout(std430, binding = 2) buffer instance_id_out_lod_0 48 | { 49 | writeonly uint data[]; 50 | } output_instance_id_lod_0; 51 | 52 | layout(std430, binding = 3) buffer instance_id_out_lod_1 53 | { 54 | writeonly uint data[]; 55 | } output_instance_id_lod_1; 56 | 57 | layout(std430, binding = 4) buffer instance_id_out_lod_2 58 | { 59 | writeonly uint data[]; 60 | } output_instance_id_lod_2; 61 | 62 | layout(std430, binding = 5) buffer instance_id_out_lod_3 63 | { 64 | writeonly uint data[]; 65 | } output_instance_id_lod_3; 66 | 67 | // -------------------------------------------------------------------------------------------------------------- 68 | // AUXILIARY FUNCTIONS 69 | // -------------------------------------------------------------------------------------------------------------- 70 | 71 | bool visible(const Bound bound) 72 | { 73 | // ------------------------------------------------------------------------------ 74 | // frustum culling 75 | // ------------------------------------------------------------------------------ 76 | 77 | // tests *must* be done in clip space, *not* NDC space 78 | 79 | // clip-space bounding box 80 | vec4 bbox[8]; 81 | bbox[0] = camera.view_proj_matrix * bound.bmax; 82 | bbox[1] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmax.z, 1.0f); 83 | bbox[2] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmax.z, 1.0f); 84 | bbox[3] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmin.y, bound.bmax.z, 1.0f); 85 | bbox[4] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmax.y, bound.bmin.z, 1.0f); 86 | bbox[5] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmin.z, 1.0f); 87 | bbox[6] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmin.z, 1.0f); 88 | bbox[7] = camera.view_proj_matrix * bound.bmin; 89 | 90 | // count how many vertices are outside each frustum plane 91 | int numOutside[6] = int[6](0,0,0,0,0,0); 92 | 93 | for(int i = 0; i < 8; ++i) 94 | { 95 | if(bbox[i].x > bbox[i].w) ++numOutside[0]; 96 | if(bbox[i].x < -bbox[i].w) ++numOutside[1]; 97 | if(bbox[i].y > bbox[i].w) ++numOutside[2]; 98 | if(bbox[i].y < -bbox[i].w) ++numOutside[3]; 99 | if(bbox[i].z > bbox[i].w) ++numOutside[4]; 100 | if(bbox[i].z < -bbox[i].w) ++numOutside[5]; 101 | } 102 | 103 | // if all vertices are outside at least one frustum plane, discard 104 | for(int i = 0; i < 6; ++i) 105 | { 106 | if(numOutside[i] == 8) 107 | { 108 | return false; 109 | } 110 | } 111 | 112 | // ------------------------------------------------------------------------------ 113 | // occlusion culling 114 | // ------------------------------------------------------------------------------ 115 | 116 | // if bounding box crosses near-plane, consider visible 117 | if(numOutside[5] > 0) 118 | { 119 | return true; 120 | } 121 | 122 | // convert to NDC coordinates 123 | vec3 ndc_min = bbox[0].xyz / bbox[0].w; 124 | vec3 ndc_max = ndc_min; 125 | for(int i = 1; i < 8; ++i) 126 | { 127 | ndc_min = min(ndc_min, bbox[i].xyz / bbox[i].w); 128 | ndc_max = max(ndc_max, bbox[i].xyz / bbox[i].w); 129 | } 130 | 131 | ndc_min = ndc_min * 0.5 + 0.5; 132 | ndc_max = ndc_max * 0.5 + 0.5; 133 | 134 | // compute screen size in pixels 135 | vec2 size = (ndc_max.xy - ndc_min.xy); 136 | ivec2 texsize = textureSize(depth_texture,0); 137 | float maxsize = max(size.x, size.y) * float(max(texsize.x,texsize.y)); 138 | 139 | // small-feature culling 140 | if(maxsize <= 1.0f) 141 | { 142 | return false; 143 | } 144 | 145 | // compute correct hi-z mipmap level 146 | float miplevel = ceil(log2(maxsize)); 147 | 148 | // fetch 4 hi-z depths that cover screen-space bounding box 149 | float depth = 0.0f; 150 | float a = textureLod(depth_texture,ndc_min.xy,miplevel).r; 151 | float b = textureLod(depth_texture,vec2(ndc_max.x,ndc_min.y),miplevel).r; 152 | float c = textureLod(depth_texture,ndc_max.xy,miplevel).r; 153 | float d = textureLod(depth_texture,vec2(ndc_min.x,ndc_max.y),miplevel).r; 154 | depth = max(depth,max(max(max(a,b),c),d)); 155 | 156 | return ndc_min.z < depth; 157 | } 158 | 159 | // -------------------------------------------------------------------------------------------------------------- 160 | // MAIN 161 | // -------------------------------------------------------------------------------------------------------------- 162 | 163 | void main() 164 | { 165 | uint instance_id = gl_GlobalInvocationID.x; 166 | 167 | if(instance_id >= instance_count) 168 | { 169 | return; 170 | } 171 | 172 | const Bound b = input_bound.data[instance_id]; 173 | 174 | if(!visible(b)) 175 | { 176 | return; 177 | } 178 | 179 | const float distance = -(camera.view_matrix * vec4((b.bmin.xyz + b.bmax.xyz) * 0.5f, 1.0f)).z; 180 | 181 | if(distance < 100.0f) 182 | { 183 | uint count = atomicCounterIncrement(instance_count_lod_0); 184 | output_instance_id_lod_0.data[count] = instance_id; 185 | } 186 | else if(distance < 200.0f) 187 | { 188 | uint count = atomicCounterIncrement(instance_count_lod_1); 189 | output_instance_id_lod_1.data[count] = instance_id; 190 | } 191 | else if(distance < 400.0f) 192 | { 193 | uint count = atomicCounterIncrement(instance_count_lod_2); 194 | output_instance_id_lod_2.data[count] = instance_id; 195 | } 196 | else 197 | { 198 | uint count = atomicCounterIncrement(instance_count_lod_3); 199 | output_instance_id_lod_3.data[count] = instance_id; 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /shaders/cull.geom: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | layout (points) in; 4 | layout (points, max_vertices = 1) out; 5 | 6 | layout(std140) uniform cdata 7 | { 8 | mat4 view_proj_matrix; 9 | mat4 normal_matrix; 10 | mat4 view_matrix; 11 | } camera; 12 | 13 | in vdata 14 | { 15 | bool visible; 16 | vec3 position; 17 | vec4 transform_0; 18 | vec4 transform_1; 19 | vec4 transform_2; 20 | } vin[]; 21 | 22 | layout(stream = 0) out fdata0 23 | { 24 | vec4 transform_0; 25 | vec4 transform_1; 26 | vec4 transform_2; 27 | } vout0; 28 | 29 | layout(stream = 1) out fdata1 30 | { 31 | vec4 transform_0; 32 | vec4 transform_1; 33 | vec4 transform_2; 34 | } vout1; 35 | 36 | layout(stream = 2) out fdata2 37 | { 38 | vec4 transform_0; 39 | vec4 transform_1; 40 | vec4 transform_2; 41 | } vout2; 42 | 43 | void main() 44 | { 45 | if(vin[0].visible) 46 | { 47 | float distance = -(camera.view_matrix * vec4(vin[0].position, 1.0f)).z; 48 | 49 | if(distance < 100) 50 | { 51 | vout0.transform_0 = vin[0].transform_0; 52 | vout0.transform_1 = vin[0].transform_1; 53 | vout0.transform_2 = vin[0].transform_2; 54 | EmitStreamVertex(0); 55 | } 56 | else if(distance < 500) 57 | { 58 | vout1.transform_0 = vin[0].transform_0; 59 | vout1.transform_1 = vin[0].transform_1; 60 | vout1.transform_2 = vin[0].transform_2; 61 | EmitStreamVertex(1); 62 | } 63 | else 64 | { 65 | vout2.transform_0 = vin[0].transform_0; 66 | vout2.transform_1 = vin[0].transform_1; 67 | vout2.transform_2 = vin[0].transform_2; 68 | EmitStreamVertex(2); 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /shaders/cull.vert: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | layout(std140) uniform cdata 4 | { 5 | mat4 view_proj_matrix; 6 | mat4 normal_matrix; 7 | mat4 view_matrix; 8 | } camera; 9 | 10 | in vec3 in_bmin; 11 | in vec3 in_bmax; 12 | in vec4 in_transform_0; 13 | in vec4 in_transform_1; 14 | in vec4 in_transform_2; 15 | 16 | out vdata 17 | { 18 | bool visible; 19 | vec3 position; 20 | vec4 transform_0; 21 | vec4 transform_1; 22 | vec4 transform_2; 23 | } vout; 24 | 25 | bool visible_frustum() 26 | { 27 | vec4 bbox[8]; 28 | bbox[0] = camera.view_proj_matrix * vec4(in_bmax, 1.0f); 29 | bbox[1] = camera.view_proj_matrix * vec4(in_bmin.x, in_bmax.y, in_bmax.z, 1.0f); 30 | bbox[2] = camera.view_proj_matrix * vec4(in_bmax.x, in_bmin.y, in_bmax.z, 1.0f); 31 | bbox[3] = camera.view_proj_matrix * vec4(in_bmin.x, in_bmin.y, in_bmax.z, 1.0f); 32 | bbox[4] = camera.view_proj_matrix * vec4(in_bmax.x, in_bmax.y, in_bmin.z, 1.0f); 33 | bbox[5] = camera.view_proj_matrix * vec4(in_bmin.x, in_bmax.y, in_bmin.z, 1.0f); 34 | bbox[6] = camera.view_proj_matrix * vec4(in_bmax.x, in_bmin.y, in_bmin.z, 1.0f); 35 | bbox[7] = camera.view_proj_matrix * vec4(in_bmin, 1.0f); 36 | 37 | int outOfBound[6] = int[6](0, 0, 0, 0, 0, 0); 38 | 39 | for(int i = 0; i < 8; ++i) 40 | { 41 | if(bbox[i].x > bbox[i].w) outOfBound[0]++; 42 | if(bbox[i].x < -bbox[i].w) outOfBound[1]++; 43 | if(bbox[i].y > bbox[i].w) outOfBound[2]++; 44 | if(bbox[i].y < -bbox[i].w) outOfBound[3]++; 45 | if(bbox[i].z > bbox[i].w) outOfBound[4]++; 46 | if(bbox[i].z < -bbox[i].w) outOfBound[5]++; 47 | } 48 | 49 | for(int i = 0; i < 6; ++i) 50 | { 51 | if(outOfBound[i] == 8) 52 | { 53 | return false; 54 | } 55 | } 56 | 57 | return true; 58 | } 59 | 60 | void main() 61 | { 62 | vout.visible = visible_frustum(); 63 | vout.position = (in_bmin + in_bmax) * 0.5f; 64 | vout.transform_0 = in_transform_0; 65 | vout.transform_1 = in_transform_1; 66 | vout.transform_2 = in_transform_2; 67 | } 68 | -------------------------------------------------------------------------------- /shaders/draw.frag: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | in fdata 4 | { 5 | vec3 eye_position; 6 | vec3 eye_normal; 7 | } fin; 8 | 9 | out vec4 out_color; 10 | 11 | const vec3 DEFAULT_AMBIENT = vec3(0.2f, 0.2f, 0.2f); 12 | const vec3 DEFAULT_DIFFUSE = vec3(0.6f, 0.6f, 0.6f); 13 | const vec3 DEFAULT_SPECULAR = vec3(0.4f, 0.4f, 0.4f); 14 | const float MATERIAL_SHININESS = 32.0f; 15 | const float MATERIAL_ALPHA = 1.0f; 16 | 17 | void main() 18 | { 19 | const vec3 ambient = DEFAULT_AMBIENT; 20 | vec3 diffuse = vec3(0.0); 21 | vec3 specular = vec3(0.0); 22 | 23 | const vec3 n = normalize(fin.eye_normal); 24 | const vec3 l = normalize(-fin.eye_position); // light - vert 25 | 26 | float diffuseIntensity = dot(n,l); 27 | if(diffuseIntensity > 0.0f) 28 | { 29 | diffuse = DEFAULT_DIFFUSE * diffuseIntensity; 30 | 31 | const vec3 r = reflect(-l,n); 32 | const vec3 e = l; // cam - vert 33 | const float specularIntensity = max(dot(r,e), 0.0f); 34 | specular = DEFAULT_SPECULAR * pow(specularIntensity, MATERIAL_SHININESS); 35 | } 36 | 37 | out_color = vec4(ambient + diffuse + specular, MATERIAL_ALPHA); 38 | } 39 | -------------------------------------------------------------------------------- /shaders/draw.fs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | in fdata 4 | { 5 | vec3 eye_position; 6 | vec3 eye_normal; 7 | } fin; 8 | 9 | layout(location = 0) out vec4 output_color; 10 | 11 | const vec3 DEFAULT_AMBIENT = vec3(0.2f, 0.2f, 0.2f); 12 | const vec3 DEFAULT_DIFFUSE = vec3(0.6f, 0.6f, 0.6f); 13 | const vec3 DEFAULT_SPECULAR = vec3(0.4f, 0.4f, 0.4f); 14 | const float MATERIAL_SHININESS = 32.0f; 15 | const float MATERIAL_ALPHA = 1.0f; 16 | 17 | void main() 18 | { 19 | const vec3 ambient = DEFAULT_AMBIENT; 20 | vec3 diffuse = vec3(0.0); 21 | vec3 specular = vec3(0.0); 22 | 23 | const vec3 n = normalize(fin.eye_normal); 24 | const vec3 l = normalize(-fin.eye_position); // light - vert 25 | 26 | float diffuseIntensity = dot(n,l); 27 | if(diffuseIntensity > 0.0f) 28 | { 29 | diffuse = DEFAULT_DIFFUSE * diffuseIntensity; 30 | 31 | const vec3 r = reflect(-l,n); 32 | const vec3 e = l; // cam - vert 33 | const float specularIntensity = max(dot(r,e), 0.0f); 34 | specular = DEFAULT_SPECULAR * pow(specularIntensity, MATERIAL_SHININESS); 35 | } 36 | 37 | output_color = vec4(ambient + diffuse + specular, MATERIAL_ALPHA); 38 | } 39 | -------------------------------------------------------------------------------- /shaders/draw.vert: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | layout(std140) uniform cdata 4 | { 5 | mat4 view_proj_matrix; 6 | mat4 normal_matrix; 7 | mat4 view_matrix; 8 | } camera; 9 | 10 | layout(std430) buffer tdata 11 | { 12 | vec4 transforms[]; 13 | } tin; 14 | 15 | in vec3 in_position; 16 | in vec3 in_normal; 17 | 18 | out fdata 19 | { 20 | vec3 eye_position; 21 | vec3 eye_normal; 22 | } vout; 23 | 24 | void main() 25 | { 26 | const mat4 transform = mat4(tin.transforms[gl_InstanceID*3+0], 27 | tin.transforms[gl_InstanceID*3+1], 28 | tin.transforms[gl_InstanceID*3+2], 29 | vec4(0.0f, 0.0f, 0.0f, 1.0f)); 30 | 31 | const vec4 world_position_homogeneous = vec4(in_position, 1.0f) * transform; 32 | const vec4 world_position = vec4(world_position_homogeneous.xyz/world_position_homogeneous.w, 1.0f); 33 | const vec3 world_normal = mat3(inverse(transform)) * in_normal; 34 | 35 | vout.eye_position = vec3(camera.view_matrix * world_position); 36 | vout.eye_normal = normalize(mat3(camera.normal_matrix) * world_normal); 37 | 38 | gl_Position = camera.view_proj_matrix * world_position; 39 | } 40 | -------------------------------------------------------------------------------- /shaders/draw.vs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Transform 4 | { 5 | vec4 m0; 6 | vec4 m1; 7 | vec4 m2; 8 | }; 9 | 10 | // -------------------------------------------------------------------------------------------------------------- 11 | // INPUTS 12 | // -------------------------------------------------------------------------------------------------------------- 13 | 14 | layout(std140) uniform cdata 15 | { 16 | mat4 view_proj_matrix; 17 | mat4 normal_matrix; 18 | mat4 view_matrix; 19 | } input_camera; 20 | 21 | layout(std430, binding = 1) buffer transform_in 22 | { 23 | readonly Transform data[]; 24 | } input_transform; 25 | 26 | layout(std430, binding = 2) buffer instance_id_in_lod_0 27 | { 28 | readonly uint data[]; 29 | } input_instance_id_lod_0; 30 | 31 | layout(std430, binding = 3) buffer instance_id_in_lod_1 32 | { 33 | readonly uint data[]; 34 | } input_instance_id_lod_1; 35 | 36 | layout(std430, binding = 4) buffer instance_id_in_lod_2 37 | { 38 | readonly uint data[]; 39 | } input_instance_id_lod_2; 40 | 41 | layout(std430, binding = 5) buffer instance_id_in_lod_3 42 | { 43 | readonly uint data[]; 44 | } input_instance_id_lod_3; 45 | 46 | layout(location = 0) in vec3 input_position; 47 | layout(location = 1) in vec3 input_normal; 48 | layout(location = 2) in uint input_lod_level; 49 | 50 | // -------------------------------------------------------------------------------------------------------------- 51 | // OUTPUTS 52 | // -------------------------------------------------------------------------------------------------------------- 53 | 54 | out fdata 55 | { 56 | vec3 eye_position; 57 | vec3 eye_normal; 58 | } vout; 59 | 60 | // -------------------------------------------------------------------------------------------------------------- 61 | // MAIN 62 | // -------------------------------------------------------------------------------------------------------------- 63 | 64 | void main() 65 | { 66 | uint instance_id; 67 | 68 | switch(input_lod_level) 69 | { 70 | case 0: 71 | instance_id = input_instance_id_lod_0.data[gl_InstanceID]; 72 | break; 73 | case 1: 74 | instance_id = input_instance_id_lod_1.data[gl_InstanceID]; 75 | break; 76 | case 2: 77 | instance_id = input_instance_id_lod_2.data[gl_InstanceID]; 78 | break; 79 | default: 80 | instance_id = input_instance_id_lod_3.data[gl_InstanceID]; 81 | } 82 | 83 | const Transform t = input_transform.data[instance_id]; 84 | const mat4 transform = mat4(t.m0, 85 | t.m1, 86 | t.m2, 87 | vec4(0.0f, 0.0f, 0.0f, 1.0f)); 88 | 89 | const vec4 world_position_homogeneous = vec4(input_position, 1.0f) * transform; 90 | const vec4 world_position = vec4(world_position_homogeneous.xyz/world_position_homogeneous.w, 1.0f); 91 | const vec3 world_normal = mat3(inverse(transform)) * input_normal; 92 | 93 | vout.eye_position = vec3(input_camera.view_matrix * world_position); 94 | vout.eye_normal = normalize(mat3(input_camera.normal_matrix) * world_normal); 95 | 96 | gl_Position = input_camera.view_proj_matrix * world_position; 97 | } 98 | -------------------------------------------------------------------------------- /shaders/multi/multi_cull.comp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/potato3d/azdo/26b7db06378d11af82a47dc1c0addb07a98ce87f/shaders/multi/multi_cull.comp -------------------------------------------------------------------------------- /shaders/multi/multi_draw.frag: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/potato3d/azdo/26b7db06378d11af82a47dc1c0addb07a98ce87f/shaders/multi/multi_draw.frag -------------------------------------------------------------------------------- /shaders/multi/multi_draw.vert: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/potato3d/azdo/26b7db06378d11af82a47dc1c0addb07a98ce87f/shaders/multi/multi_draw.vert -------------------------------------------------------------------------------- /shaders/temporal/collect_curr.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Bound 4 | { 5 | vec4 bmin; 6 | vec4 bmax; 7 | }; 8 | 9 | // -------------------------------------------------------------------------------------------------------------- 10 | // INPUTS 11 | // -------------------------------------------------------------------------------------------------------------- 12 | 13 | layout(local_size_x = 256) in; 14 | 15 | layout(location = 0) uniform uint instance_count; 16 | 17 | layout(std140) uniform camera_data 18 | { 19 | mat4 view_proj_matrix; 20 | mat4 normal_matrix; 21 | mat4 view_matrix; 22 | } camera; 23 | 24 | layout(std430, binding = 0) buffer bound_in 25 | { 26 | readonly Bound data[]; 27 | } input_bound; 28 | 29 | layout(std430, binding = 10) buffer visible_curr_in 30 | { 31 | readonly uint data[]; 32 | } input_visible_curr; 33 | 34 | // -------------------------------------------------------------------------------------------------------------- 35 | // OUTPUTS 36 | // -------------------------------------------------------------------------------------------------------------- 37 | 38 | layout(binding = 0) uniform atomic_uint instance_count_lod_0; 39 | layout(binding = 1) uniform atomic_uint instance_count_lod_1; 40 | layout(binding = 2) uniform atomic_uint instance_count_lod_2; 41 | layout(binding = 3) uniform atomic_uint instance_count_lod_3; 42 | 43 | layout(std430, binding = 2) buffer instance_id_out_lod_0 44 | { 45 | writeonly uint data[]; 46 | } output_instance_id_lod_0; 47 | 48 | layout(std430, binding = 3) buffer instance_id_out_lod_1 49 | { 50 | writeonly uint data[]; 51 | } output_instance_id_lod_1; 52 | 53 | layout(std430, binding = 4) buffer instance_id_out_lod_2 54 | { 55 | writeonly uint data[]; 56 | } output_instance_id_lod_2; 57 | 58 | layout(std430, binding = 5) buffer instance_id_out_lod_3 59 | { 60 | writeonly uint data[]; 61 | } output_instance_id_lod_3; 62 | 63 | // -------------------------------------------------------------------------------------------------------------- 64 | // MAIN 65 | // -------------------------------------------------------------------------------------------------------------- 66 | 67 | void main() 68 | { 69 | uint instance_id = gl_GlobalInvocationID.x; 70 | 71 | if(instance_id >= instance_count) 72 | { 73 | return; 74 | } 75 | 76 | if(input_visible_curr.data[instance_id] == 0) 77 | { 78 | return; 79 | } 80 | 81 | const Bound b = input_bound.data[instance_id]; 82 | 83 | const float distance = -(camera.view_matrix * vec4((b.bmin.xyz + b.bmax.xyz) * 0.5f, 1.0f)).z; 84 | 85 | if(distance < 100.0f) 86 | { 87 | uint count = atomicCounterIncrement(instance_count_lod_0); 88 | output_instance_id_lod_0.data[count] = instance_id; 89 | } 90 | else if(distance < 200.0f) 91 | { 92 | uint count = atomicCounterIncrement(instance_count_lod_1); 93 | output_instance_id_lod_1.data[count] = instance_id; 94 | } 95 | else if(distance < 400.0f) 96 | { 97 | uint count = atomicCounterIncrement(instance_count_lod_2); 98 | output_instance_id_lod_2.data[count] = instance_id; 99 | } 100 | else 101 | { 102 | uint count = atomicCounterIncrement(instance_count_lod_3); 103 | output_instance_id_lod_3.data[count] = instance_id; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /shaders/temporal/collect_curr_notlast.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Bound 4 | { 5 | vec4 bmin; 6 | vec4 bmax; 7 | }; 8 | 9 | // -------------------------------------------------------------------------------------------------------------- 10 | // INPUTS 11 | // -------------------------------------------------------------------------------------------------------------- 12 | 13 | layout(local_size_x = 256) in; 14 | 15 | layout(location = 0) uniform uint instance_count; 16 | 17 | layout(std140) uniform camera_data 18 | { 19 | mat4 view_proj_matrix; 20 | mat4 normal_matrix; 21 | mat4 view_matrix; 22 | } camera; 23 | 24 | layout(std430, binding = 0) buffer bound_in 25 | { 26 | readonly Bound data[]; 27 | } input_bound; 28 | 29 | layout(std430, binding = 10) buffer curr_visible_in 30 | { 31 | readonly uint data[]; 32 | } input_visible_curr; 33 | 34 | layout(std430, binding = 11) buffer last_visible_in 35 | { 36 | readonly uint data[]; 37 | } input_visible_last; 38 | 39 | // -------------------------------------------------------------------------------------------------------------- 40 | // OUTPUTS 41 | // -------------------------------------------------------------------------------------------------------------- 42 | 43 | layout(binding = 0) uniform atomic_uint instance_count_lod_0; 44 | layout(binding = 1) uniform atomic_uint instance_count_lod_1; 45 | layout(binding = 2) uniform atomic_uint instance_count_lod_2; 46 | layout(binding = 3) uniform atomic_uint instance_count_lod_3; 47 | 48 | layout(std430, binding = 2) buffer instance_id_out_lod_0 49 | { 50 | writeonly uint data[]; 51 | } output_instance_id_lod_0; 52 | 53 | layout(std430, binding = 3) buffer instance_id_out_lod_1 54 | { 55 | writeonly uint data[]; 56 | } output_instance_id_lod_1; 57 | 58 | layout(std430, binding = 4) buffer instance_id_out_lod_2 59 | { 60 | writeonly uint data[]; 61 | } output_instance_id_lod_2; 62 | 63 | layout(std430, binding = 5) buffer instance_id_out_lod_3 64 | { 65 | writeonly uint data[]; 66 | } output_instance_id_lod_3; 67 | 68 | // -------------------------------------------------------------------------------------------------------------- 69 | // MAIN 70 | // -------------------------------------------------------------------------------------------------------------- 71 | 72 | void main() 73 | { 74 | uint instance_id = gl_GlobalInvocationID.x; 75 | 76 | if(instance_id >= instance_count) 77 | { 78 | return; 79 | } 80 | 81 | if(input_visible_curr.data[instance_id] == 0 || input_visible_last.data[instance_id] == 1) 82 | { 83 | return; 84 | } 85 | 86 | const Bound b = input_bound.data[instance_id]; 87 | 88 | const float distance = -(camera.view_matrix * vec4((b.bmin.xyz + b.bmax.xyz) * 0.5f, 1.0f)).z; 89 | 90 | if(distance < 100.0f) 91 | { 92 | uint count = atomicCounterIncrement(instance_count_lod_0); 93 | output_instance_id_lod_0.data[count] = instance_id; 94 | } 95 | else if(distance < 200.0f) 96 | { 97 | uint count = atomicCounterIncrement(instance_count_lod_1); 98 | output_instance_id_lod_1.data[count] = instance_id; 99 | } 100 | else if(distance < 400.0f) 101 | { 102 | uint count = atomicCounterIncrement(instance_count_lod_2); 103 | output_instance_id_lod_2.data[count] = instance_id; 104 | } 105 | else 106 | { 107 | uint count = atomicCounterIncrement(instance_count_lod_3); 108 | output_instance_id_lod_3.data[count] = instance_id; 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /shaders/temporal/hiz_cull.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Transform 4 | { 5 | vec4 m0; 6 | vec4 m1; 7 | vec4 m2; 8 | }; 9 | 10 | struct Bound 11 | { 12 | vec4 bmin; 13 | vec4 bmax; 14 | }; 15 | 16 | // -------------------------------------------------------------------------------------------------------------- 17 | // INPUTS 18 | // -------------------------------------------------------------------------------------------------------------- 19 | 20 | layout(local_size_x = 256) in; 21 | 22 | layout(location = 0) uniform uint instance_count; 23 | 24 | layout(binding = 0) uniform sampler2D depth_texture; 25 | 26 | layout(std140) uniform camera_data 27 | { 28 | mat4 view_proj_matrix; 29 | mat4 normal_matrix; 30 | mat4 view_matrix; 31 | } camera; 32 | 33 | layout(std430, binding = 0) buffer bound_in 34 | { 35 | readonly Bound data[]; 36 | } input_bound; 37 | 38 | // -------------------------------------------------------------------------------------------------------------- 39 | // OUTPUTS 40 | // -------------------------------------------------------------------------------------------------------------- 41 | 42 | layout(std430, binding = 10) buffer visible_out 43 | { 44 | writeonly uint data[]; 45 | } output_visible; 46 | 47 | // -------------------------------------------------------------------------------------------------------------- 48 | // AUXILIARY FUNCTIONS 49 | // -------------------------------------------------------------------------------------------------------------- 50 | 51 | uint visible(const Bound bound) 52 | { 53 | // ------------------------------------------------------------------------------ 54 | // frustum culling 55 | // ------------------------------------------------------------------------------ 56 | 57 | // tests *must* be done in clip space, *not* NDC space 58 | 59 | // clip-space bounding box 60 | vec4 bbox[8]; 61 | bbox[0] = camera.view_proj_matrix * bound.bmax; 62 | bbox[1] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmax.z, 1.0f); 63 | bbox[2] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmax.z, 1.0f); 64 | bbox[3] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmin.y, bound.bmax.z, 1.0f); 65 | bbox[4] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmax.y, bound.bmin.z, 1.0f); 66 | bbox[5] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmin.z, 1.0f); 67 | bbox[6] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmin.z, 1.0f); 68 | bbox[7] = camera.view_proj_matrix * bound.bmin; 69 | 70 | // count how many vertices are outside each frustum plane 71 | int numOutside[6] = int[6](0,0,0,0,0,0); 72 | 73 | for(int i = 0; i < 8; ++i) 74 | { 75 | if(bbox[i].x > bbox[i].w) ++numOutside[0]; 76 | if(bbox[i].x < -bbox[i].w) ++numOutside[1]; 77 | if(bbox[i].y > bbox[i].w) ++numOutside[2]; 78 | if(bbox[i].y < -bbox[i].w) ++numOutside[3]; 79 | if(bbox[i].z > bbox[i].w) ++numOutside[4]; 80 | if(bbox[i].z < -bbox[i].w) ++numOutside[5]; 81 | } 82 | 83 | // if all vertices are outside at least one frustum plane, discard 84 | for(int i = 0; i < 6; ++i) 85 | { 86 | if(numOutside[i] == 8) 87 | { 88 | return 0; 89 | } 90 | } 91 | 92 | // ------------------------------------------------------------------------------ 93 | // occlusion culling 94 | // ------------------------------------------------------------------------------ 95 | 96 | // if bounding box crosses near-plane, consider visible 97 | if(numOutside[5] > 0) 98 | { 99 | return 1; 100 | } 101 | 102 | // convert to NDC coordinates 103 | vec3 ndc_min = bbox[0].xyz / bbox[0].w; 104 | vec3 ndc_max = ndc_min; 105 | for(int i = 1; i < 8; ++i) 106 | { 107 | ndc_min = min(ndc_min, bbox[i].xyz / bbox[i].w); 108 | ndc_max = max(ndc_max, bbox[i].xyz / bbox[i].w); 109 | } 110 | 111 | ndc_min = ndc_min * 0.5 + 0.5; 112 | ndc_max = ndc_max * 0.5 + 0.5; 113 | 114 | // compute screen size in pixels 115 | vec2 size = (ndc_max.xy - ndc_min.xy); 116 | ivec2 texsize = textureSize(depth_texture,0); 117 | float maxsize = max(size.x, size.y) * float(max(texsize.x,texsize.y)); 118 | 119 | // small-feature culling 120 | if(maxsize <= 1.0f) 121 | { 122 | return 0; 123 | } 124 | 125 | // compute correct hi-z mipmap level 126 | float miplevel = ceil(log2(maxsize)); 127 | 128 | // fetch 4 hi-z depths that cover screen-space bounding box 129 | float depth = 0.0f; 130 | float a = textureLod(depth_texture,ndc_min.xy,miplevel).r; 131 | float b = textureLod(depth_texture,vec2(ndc_max.x,ndc_min.y),miplevel).r; 132 | float c = textureLod(depth_texture,ndc_max.xy,miplevel).r; 133 | float d = textureLod(depth_texture,vec2(ndc_min.x,ndc_max.y),miplevel).r; 134 | depth = max(depth,max(max(max(a,b),c),d)); 135 | 136 | return (ndc_min.z < depth)? 1 : 0; 137 | } 138 | 139 | // -------------------------------------------------------------------------------------------------------------- 140 | // MAIN 141 | // -------------------------------------------------------------------------------------------------------------- 142 | 143 | void main() 144 | { 145 | uint instance_id = gl_GlobalInvocationID.x; 146 | 147 | if(instance_id >= instance_count) 148 | { 149 | return; 150 | } 151 | 152 | output_visible.data[instance_id] = visible(input_bound.data[instance_id]); 153 | } 154 | -------------------------------------------------------------------------------- /shaders/temporal/pack_bits/bits_curr.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Flags 4 | { 5 | uint data[32]; 6 | }; 7 | 8 | // -------------------------------------------------------------------------------------------------------------- 9 | // INPUTS 10 | // -------------------------------------------------------------------------------------------------------------- 11 | 12 | layout(local_size_x = 256) in; 13 | 14 | layout(location = 0) uniform uint instance_count; 15 | 16 | layout(std430, binding = 10) buffer visible_in 17 | { 18 | readonly Flags data[]; 19 | } input_visible; 20 | 21 | // -------------------------------------------------------------------------------------------------------------- 22 | // INPUTS 23 | // -------------------------------------------------------------------------------------------------------------- 24 | 25 | layout(std430, binding = 11) buffer bits_curr_out 26 | { 27 | writeonly uint data[]; 28 | } output_bits_curr; 29 | 30 | // -------------------------------------------------------------------------------------------------------------- 31 | // MAIN 32 | // -------------------------------------------------------------------------------------------------------------- 33 | 34 | void main() 35 | { 36 | uint instance_id = gl_GlobalInvocationID.x; 37 | 38 | if(instance_id > ceil(float(instance_count)/32.0f)) 39 | { 40 | return; 41 | } 42 | 43 | uint bits = 0u; 44 | 45 | Flags flags = input_visible.data[instance_id]; 46 | 47 | for(uint i = 0; i < 32; ++i) 48 | { 49 | bits |= (flags.data[i] & 1u) << i; 50 | } 51 | 52 | output_bits_curr.data[instance_id] = bits; 53 | } 54 | -------------------------------------------------------------------------------- /shaders/temporal/pack_bits/bits_curr_notlast.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Flags 4 | { 5 | uint data[32]; 6 | }; 7 | 8 | // -------------------------------------------------------------------------------------------------------------- 9 | // INPUTS 10 | // -------------------------------------------------------------------------------------------------------------- 11 | 12 | layout(local_size_x = 256) in; 13 | 14 | layout(location = 0) uniform uint instance_count; 15 | 16 | layout(std430, binding = 10) buffer visible_in 17 | { 18 | readonly Flags data[]; 19 | } input_visible; 20 | 21 | layout(std430, binding = 12) buffer bits_last_in 22 | { 23 | readonly uint data[]; 24 | } input_bits_last; 25 | 26 | // -------------------------------------------------------------------------------------------------------------- 27 | // INPUTS 28 | // -------------------------------------------------------------------------------------------------------------- 29 | 30 | layout(std430, binding = 11) buffer bits_curr_out 31 | { 32 | writeonly uint data[]; 33 | } output_bits_curr; 34 | 35 | // -------------------------------------------------------------------------------------------------------------- 36 | // MAIN 37 | // -------------------------------------------------------------------------------------------------------------- 38 | 39 | void main() 40 | { 41 | uint instance_id = gl_GlobalInvocationID.x; 42 | 43 | if(instance_id > ceil(float(instance_count)/32.0f)) 44 | { 45 | return; 46 | } 47 | 48 | uint bits = 0u; 49 | uint outbit = 0u; 50 | 51 | Flags flags = input_visible.data[instance_id]; 52 | 53 | for(int i = 0; i < 32; ++i, ++outbit) 54 | { 55 | bits |= (flags.data[i] & 1u) << outbit; 56 | } 57 | 58 | output_bits_curr.data[instance_id] = bits & (~input_bits_last.data[instance_id]); 59 | } 60 | -------------------------------------------------------------------------------- /shaders/temporal/pack_bits/collect_instances.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Bound 4 | { 5 | vec4 bmin; 6 | vec4 bmax; 7 | }; 8 | 9 | // -------------------------------------------------------------------------------------------------------------- 10 | // INPUTS 11 | // -------------------------------------------------------------------------------------------------------------- 12 | 13 | layout(local_size_x = 256) in; 14 | 15 | layout(location = 0) uniform uint instance_count; 16 | 17 | layout(std140) uniform camera_data 18 | { 19 | mat4 view_proj_matrix; 20 | mat4 normal_matrix; 21 | mat4 view_matrix; 22 | } camera; 23 | 24 | layout(std430, binding = 0) buffer bound_in 25 | { 26 | readonly Bound data[]; 27 | } input_bound; 28 | 29 | layout(std430, binding = 11) buffer bits_curr_in 30 | { 31 | readonly uint data[]; 32 | } input_bits_curr; 33 | 34 | // -------------------------------------------------------------------------------------------------------------- 35 | // OUTPUTS 36 | // -------------------------------------------------------------------------------------------------------------- 37 | 38 | layout(binding = 0) uniform atomic_uint instance_count_lod_0; 39 | layout(binding = 1) uniform atomic_uint instance_count_lod_1; 40 | layout(binding = 2) uniform atomic_uint instance_count_lod_2; 41 | layout(binding = 3) uniform atomic_uint instance_count_lod_3; 42 | 43 | layout(std430, binding = 2) buffer instance_id_out_lod_0 44 | { 45 | writeonly uint data[]; 46 | } output_instance_id_lod_0; 47 | 48 | layout(std430, binding = 3) buffer instance_id_out_lod_1 49 | { 50 | writeonly uint data[]; 51 | } output_instance_id_lod_1; 52 | 53 | layout(std430, binding = 4) buffer instance_id_out_lod_2 54 | { 55 | writeonly uint data[]; 56 | } output_instance_id_lod_2; 57 | 58 | layout(std430, binding = 5) buffer instance_id_out_lod_3 59 | { 60 | writeonly uint data[]; 61 | } output_instance_id_lod_3; 62 | 63 | // -------------------------------------------------------------------------------------------------------------- 64 | // MAIN 65 | // -------------------------------------------------------------------------------------------------------------- 66 | 67 | void main() 68 | { 69 | uint instance_id = gl_GlobalInvocationID.x; 70 | 71 | if(instance_id >= instance_count) 72 | { 73 | return; 74 | } 75 | 76 | if((input_bits_curr.data[instance_id/32] & (1u << (instance_id%32))) == 0) 77 | { 78 | return; 79 | } 80 | 81 | const Bound b = input_bound.data[instance_id]; 82 | 83 | const float distance = -(camera.view_matrix * vec4((b.bmin.xyz + b.bmax.xyz) * 0.5f, 1.0f)).z; 84 | 85 | if(distance < 100.0f) 86 | { 87 | uint count = atomicCounterIncrement(instance_count_lod_0); 88 | output_instance_id_lod_0.data[count] = instance_id; 89 | } 90 | else if(distance < 200.0f) 91 | { 92 | uint count = atomicCounterIncrement(instance_count_lod_1); 93 | output_instance_id_lod_1.data[count] = instance_id; 94 | } 95 | else if(distance < 400.0f) 96 | { 97 | uint count = atomicCounterIncrement(instance_count_lod_2); 98 | output_instance_id_lod_2.data[count] = instance_id; 99 | } 100 | else 101 | { 102 | uint count = atomicCounterIncrement(instance_count_lod_3); 103 | output_instance_id_lod_3.data[count] = instance_id; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /shaders/temporal/pack_bits/compute_renderer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace app 9 | { 10 | struct mat34 11 | { 12 | mat34(){} 13 | 14 | explicit mat34(const mat4& m) 15 | { 16 | int i = 0; 17 | data[i++] = m.at(0,0); 18 | data[i++] = m.at(0,1); 19 | data[i++] = m.at(0,2); 20 | data[i++] = m.at(0,3); 21 | 22 | data[i++] = m.at(1,0); 23 | data[i++] = m.at(1,1); 24 | data[i++] = m.at(1,2); 25 | data[i++] = m.at(1,3); 26 | 27 | data[i++] = m.at(2,0); 28 | data[i++] = m.at(2,1); 29 | data[i++] = m.at(2,2); 30 | data[i++] = m.at(2,3); 31 | } 32 | 33 | mat4 as_mat4() const 34 | { 35 | return mat4(data[0], data[1], data[2], data[3], 36 | data[4], data[5], data[6], data[7], 37 | data[8], data[9], data[10], data[11], 38 | 0,0,0,1); 39 | } 40 | 41 | float data[12]; 42 | }; 43 | 44 | struct bbox 45 | { 46 | vec3 min = vec3(math::limit_posf()); 47 | float pad0 = 1.0f; 48 | vec3 max = vec3(math::limit_negf()); 49 | float pad1 = 1.0f; 50 | 51 | void expand(const vec3& v) 52 | { 53 | min.x = math::min(min.x, v.x); 54 | min.y = math::min(min.y, v.y); 55 | min.z = math::min(min.z, v.z); 56 | max.x = math::max(max.x, v.x); 57 | max.y = math::max(max.y, v.y); 58 | max.z = math::max(max.z, v.z); 59 | } 60 | }; 61 | 62 | static void to_3D(int idx, int max_x, int max_y, int& x, int& y, int& z) 63 | { 64 | x = idx % (max_x); 65 | idx /= (max_x); 66 | y = idx % (max_y); 67 | idx /= (max_y); 68 | z = idx; 69 | } 70 | 71 | static int get_num_blocks(int total, int block_size) 72 | { 73 | return (total + block_size - 1) / block_size; 74 | } 75 | 76 | static void print_compute_info() 77 | { 78 | GLint c = 0; 79 | glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &c); 80 | io::print("GL_MAX_COMPUTE_WORK_GROUP_COUNT in X:", c); 81 | glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &c); 82 | io::print("GL_MAX_COMPUTE_WORK_GROUP_SIZE in X:", c); 83 | glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &c); 84 | io::print("GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS:", c); 85 | } 86 | 87 | bool compute_renderer::initialize(glb::framebuffer& fbuffer, glb::camera& cam) 88 | { 89 | // ---------------------------------------------------------------------------------------------------------------------- 90 | // configure framebuffer for hi-z map 91 | // ---------------------------------------------------------------------------------------------------------------------- 92 | 93 | fbuffer.set_use_textures(true); 94 | fbuffer.set_create_depth_mipmaps(true); 95 | _fbuffer = &fbuffer; 96 | 97 | // ---------------------------------------------------------------------------------------------------------------------- 98 | // scene setup 99 | // ---------------------------------------------------------------------------------------------------------------------- 100 | 101 | int n_x = 100; 102 | int n_y = 100; 103 | int n_z = 100; 104 | _instance_count = n_x*n_y*n_z; 105 | 106 | _compute_count = get_num_blocks(_instance_count, 256); 107 | _bit_compute_count = get_num_blocks(get_num_blocks(_instance_count, 32), 256); 108 | 109 | tess::triangle_mesh drawable_lods[NUM_LODS]; 110 | 111 | int resolution = pow(2, NUM_LODS+1); 112 | unsigned int total_vertex_count = 0; 113 | unsigned int total_element_count = 0; 114 | unsigned int element_offsets[NUM_LODS] = {0}; 115 | unsigned int eoffset = 0; 116 | unsigned int vertex_offsets[NUM_LODS] = {0}; 117 | unsigned int voffset = 0; 118 | 119 | for(int i = 0; i < NUM_LODS; ++i) 120 | { 121 | drawable_lods[i] = tess::tessellate_cylinder(1, 5, resolution); 122 | total_vertex_count += drawable_lods[i].vertices.size(); 123 | total_element_count += drawable_lods[i].elements.size(); 124 | element_offsets[i] = eoffset; 125 | eoffset += drawable_lods[i].elements.size(); 126 | vertex_offsets[i] = voffset; 127 | voffset += drawable_lods[i].vertices.size(); 128 | resolution >>= 1; 129 | } 130 | 131 | // ---------------------------------------------------------------------------------------------------------------------- 132 | // compute input 1: matrix transforms 133 | // ---------------------------------------------------------------------------------------------------------------------- 134 | 135 | std::vector transforms(_instance_count); 136 | for(unsigned int i = 0; i < transforms.size(); ++i) 137 | { 138 | int x, y, z; 139 | to_3D(i, n_x, n_y, x, y, z); 140 | transforms[i] = mat34(mat4::translation(vec3((float)x, (float)y, (float)z)*6.0f)); 141 | } 142 | 143 | glGenBuffers(1, &_input_transform_ssbo); 144 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _input_transform_ssbo); 145 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(mat34), transforms.data(), GL_STATIC_DRAW); 146 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 147 | 148 | // ---------------------------------------------------------------------------------------------------------------------- 149 | // compute input 2: bounding boxes 150 | // ---------------------------------------------------------------------------------------------------------------------- 151 | 152 | std::vector bounds(_instance_count); 153 | for(unsigned int i = 0; i < bounds.size(); ++i) 154 | { 155 | bbox& b = bounds[i]; 156 | for(unsigned int v = 0; v < drawable_lods[0].vertices.size(); ++v) 157 | { 158 | b.expand(transforms[i].as_mat4().mul(drawable_lods[0].vertices[v].position)); 159 | } 160 | } 161 | 162 | glGenBuffers(1, &_input_bound_ssbo); 163 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _input_bound_ssbo); 164 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(bbox), bounds.data(), GL_STATIC_DRAW); 165 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 166 | 167 | // ---------------------------------------------------------------------------------------------------------------------- 168 | // compute output 1: instance IDs for each LOD 169 | // ---------------------------------------------------------------------------------------------------------------------- 170 | 171 | vector ids(_instance_count); 172 | for(unsigned int i = 0; i < ids.size(); ++i) 173 | { 174 | ids[i] = i; 175 | } 176 | 177 | for(int i = 0; i < NUM_LODS; ++i) 178 | { 179 | glGenBuffers(1, &_visible_instance_id_ssbo[i]); 180 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_instance_id_ssbo[i]); 181 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(GLuint), ids.data(), GL_STATIC_DRAW); 182 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 183 | } 184 | 185 | // ---------------------------------------------------------------------------------------------------------------------- 186 | // compute output 2: indirect draw commands 187 | // ---------------------------------------------------------------------------------------------------------------------- 188 | 189 | for(int i = 0; i < NUM_LODS; ++i) 190 | { 191 | DrawElementsIndirectCommand cmd; 192 | cmd.elementCount = drawable_lods[i].elements.size(); 193 | cmd.instanceCount = 0; // atomic counter incremented by the compute shader 194 | cmd.firstElement = element_offsets[i]; 195 | cmd.baseVertex = vertex_offsets[i]; 196 | cmd.baseInstance = i; // references lod level vertex attribute 197 | _draw_commands[i] = cmd; 198 | } 199 | 200 | _draw_commands[NUM_LODS-1].instanceCount = _instance_count; 201 | 202 | glGenBuffers(1, &_draw_indirect_buffer); 203 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 204 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 205 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); 206 | 207 | _draw_commands[NUM_LODS-1].instanceCount = 0; 208 | 209 | // ---------------------------------------------------------------------------------------------------------------------- 210 | // compute shader 211 | // ---------------------------------------------------------------------------------------------------------------------- 212 | 213 | glb::shader_program_builder compute_shader_builder; 214 | compute_shader_builder.begin(); 215 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/cull.cs")) 216 | { 217 | return false; 218 | } 219 | if(!compute_shader_builder.end()) 220 | { 221 | return false; 222 | } 223 | auto program = compute_shader_builder.get_shader_program(); 224 | program.bind_uniform_buffer("camera_data", cam.get_uniform_buffer()); 225 | _hiz_cull_lastframe_program = program.get_id(); 226 | 227 | glProgramUniform1ui(_hiz_cull_lastframe_program, 0, _instance_count); 228 | 229 | // ---------------------------------------------------------------------------------------------------------------------- 230 | // drawing vertex arrays 231 | // ---------------------------------------------------------------------------------------------------------------------- 232 | 233 | glGenVertexArrays(1, &_draw_vao); 234 | glBindVertexArray(_draw_vao); 235 | 236 | GLuint vbo; 237 | glGenBuffers(1, &vbo); 238 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 239 | glBufferData(GL_ARRAY_BUFFER, total_vertex_count*sizeof(tess::vertex), nullptr, GL_STATIC_DRAW); 240 | 241 | unsigned int offset = 0; 242 | for(int i = 0; i < NUM_LODS; ++i) 243 | { 244 | unsigned int size = drawable_lods[i].vertices.size()*sizeof(tess::vertex); 245 | glBufferSubData(GL_ARRAY_BUFFER, offset, size, drawable_lods[i].vertices.data()); 246 | offset += size; 247 | } 248 | 249 | glEnableVertexAttribArray(0); 250 | glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(tess::vertex), GLB_BYTE_OFFSET(0)); 251 | 252 | glEnableVertexAttribArray(1); 253 | glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof(tess::vertex), GLB_BYTE_OFFSET(sizeof(vec3))); 254 | 255 | GLuint lbo; 256 | glGenBuffers(1, &lbo); 257 | glBindBuffer(GL_ARRAY_BUFFER, lbo); 258 | 259 | GLuint lod_levels[NUM_LODS]; 260 | for(int i = 0; i < NUM_LODS; ++i) 261 | { 262 | lod_levels[i] = i; 263 | } 264 | glBufferData(GL_ARRAY_BUFFER, NUM_LODS*sizeof(GLuint), lod_levels, GL_STATIC_DRAW); 265 | 266 | glEnableVertexAttribArray(2); 267 | glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, 0, GLB_BYTE_OFFSET(0)); 268 | glVertexAttribDivisor(2, _instance_count + 1); // make sure (instanceID / divisor) is 0 so lod_level vertex attrib equals drawCmd.baseInstance 269 | 270 | GLuint ebo; 271 | glGenBuffers(1, &ebo); 272 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo); 273 | glBufferData(GL_ELEMENT_ARRAY_BUFFER, total_element_count*sizeof(tess::element), nullptr, GL_STATIC_DRAW); 274 | 275 | offset = 0; 276 | for(int i = 0; i < NUM_LODS; ++i) 277 | { 278 | unsigned int size = drawable_lods[i].elements.size()*sizeof(tess::element); 279 | glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, offset, size, drawable_lods[i].elements.data()); 280 | offset += size; 281 | } 282 | 283 | glBindVertexArray(0); 284 | 285 | // ---------------------------------------------------------------------------------------------------------------------- 286 | // drawing shader 287 | // ---------------------------------------------------------------------------------------------------------------------- 288 | 289 | glb::shader_program_builder draw_shader_builder; 290 | draw_shader_builder.begin(); 291 | if(!draw_shader_builder.add_file(glb::shader_vertex, "../shaders/lod/draw.vs")) 292 | { 293 | return false; 294 | } 295 | if(!draw_shader_builder.add_file(glb::shader_fragment, "../shaders/lod/draw.fs")) 296 | { 297 | return false; 298 | } 299 | if(!draw_shader_builder.end()) 300 | { 301 | return false; 302 | } 303 | auto draw_shader = draw_shader_builder.get_shader_program(); 304 | draw_shader.bind_uniform_buffer("cdata", cam.get_uniform_buffer()); 305 | _draw_program = draw_shader.get_id(); 306 | 307 | // ---------------------------------------------------------------------------------------------------------------------- 308 | // associate buffers with indexed binding points 309 | // ---------------------------------------------------------------------------------------------------------------------- 310 | 311 | unsigned int ssbo_binding = 0; 312 | 313 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ssbo_binding++, _input_bound_ssbo); 314 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ssbo_binding++, _input_transform_ssbo); 315 | 316 | for(int i = 0; i < NUM_LODS; ++i) 317 | { 318 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ssbo_binding++, _visible_instance_id_ssbo[i]); 319 | glBindBufferRange(GL_ATOMIC_COUNTER_BUFFER, i, _draw_indirect_buffer, 4+sizeof(DrawElementsIndirectCommand)*i, sizeof(GLuint)); 320 | } 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | // ---------------------------------------------------------------------------------------------------------------------- 330 | // temporal hi-z culling shader 331 | // ---------------------------------------------------------------------------------------------------------------------- 332 | 333 | compute_shader_builder.begin(); 334 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/hiz_cull.cs")) 335 | { 336 | return false; 337 | } 338 | if(!compute_shader_builder.end()) 339 | { 340 | return false; 341 | } 342 | program = compute_shader_builder.get_shader_program(); 343 | program.bind_uniform_buffer("camera_data", cam.get_uniform_buffer()); 344 | _hiz_cull_program = program.get_id(); 345 | 346 | glProgramUniform1ui(_hiz_cull_program, 0, _instance_count); 347 | 348 | // ---------------------------------------------------------------------------------------------------------------------- 349 | // bit-packing current shader 350 | // ---------------------------------------------------------------------------------------------------------------------- 351 | 352 | compute_shader_builder.begin(); 353 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/bits_curr.cs")) 354 | { 355 | return false; 356 | } 357 | if(!compute_shader_builder.end()) 358 | { 359 | return false; 360 | } 361 | _bits_curr_program = compute_shader_builder.get_shader_program().get_id(); 362 | 363 | glProgramUniform1ui(_bits_curr_program, 0, _instance_count); 364 | 365 | // ---------------------------------------------------------------------------------------------------------------------- 366 | // bit-packing current and not last shader 367 | // ---------------------------------------------------------------------------------------------------------------------- 368 | 369 | compute_shader_builder.begin(); 370 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/bits_curr_notlast.cs")) 371 | { 372 | return false; 373 | } 374 | if(!compute_shader_builder.end()) 375 | { 376 | return false; 377 | } 378 | _bits_curr_notlast_program = compute_shader_builder.get_shader_program().get_id(); 379 | 380 | glProgramUniform1ui(_bits_curr_notlast_program, 0, _instance_count); 381 | 382 | // ---------------------------------------------------------------------------------------------------------------------- 383 | // collect visible instance shader 384 | // ---------------------------------------------------------------------------------------------------------------------- 385 | 386 | compute_shader_builder.begin(); 387 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/collect_instances.cs")) 388 | { 389 | return false; 390 | } 391 | if(!compute_shader_builder.end()) 392 | { 393 | return false; 394 | } 395 | _collect_instances_program = compute_shader_builder.get_shader_program().get_id(); 396 | 397 | glProgramUniform1ui(_collect_instances_program, 0, _instance_count); 398 | 399 | // ---------------------------------------------------------------------------------------------------------------------- 400 | // visible flags ssbo 401 | // ---------------------------------------------------------------------------------------------------------------------- 402 | 403 | glGenBuffers(1, &_visible_flags_ssbo); 404 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_flags_ssbo); 405 | glBufferData(GL_SHADER_STORAGE_BUFFER, 32*get_num_blocks(_instance_count, 32)*sizeof(GLuint), nullptr, GL_STATIC_DRAW); 406 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 407 | 408 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, _visible_flags_ssbo); 409 | 410 | // ---------------------------------------------------------------------------------------------------------------------- 411 | // current visible bits ssbo 412 | // ---------------------------------------------------------------------------------------------------------------------- 413 | 414 | vector bits(get_num_blocks(_instance_count, 32), 0xFFFFFFFF); 415 | 416 | glGenBuffers(1, &_curr_visible_bits_ssbo); 417 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _curr_visible_bits_ssbo); 418 | glBufferData(GL_SHADER_STORAGE_BUFFER, get_num_blocks(_instance_count, 32)*sizeof(GLuint), bits.data(), GL_STATIC_DRAW); 419 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 420 | 421 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _curr_visible_bits_ssbo); 422 | 423 | // ---------------------------------------------------------------------------------------------------------------------- 424 | // last visible bits ssbo 425 | // ---------------------------------------------------------------------------------------------------------------------- 426 | 427 | glGenBuffers(1, &_last_visible_bits_ssbo); 428 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _last_visible_bits_ssbo); 429 | glBufferData(GL_SHADER_STORAGE_BUFFER, get_num_blocks(_instance_count, 32)*sizeof(GLuint), bits.data(), GL_STATIC_DRAW); 430 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 431 | 432 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, _last_visible_bits_ssbo); 433 | 434 | 435 | 436 | 437 | // _fbuffer->set_use_same_depth_for_mipmaps(true); 438 | 439 | 440 | return true; 441 | } 442 | 443 | bool compute_renderer::finalize() 444 | { 445 | return true; 446 | } 447 | 448 | void compute_renderer::render() 449 | { 450 | // render_hiz_last_frame(); 451 | render_hiz_temporal(); 452 | } 453 | 454 | void compute_renderer::render_raster_temporal() 455 | { 456 | // TODO: use int or bool to store visible flags? 457 | // TODO: really need to compact visible flags into bit array? 458 | 459 | // 1. multi draw indirect 460 | 461 | // 2. raster all bboxes against current z-buffer 462 | // 2.1. draw points for entire scene, expand to 3 box faces using geometry shader, and store visible flag in fragment shader 463 | // 2.2. pack visible flags into current bit array: current &= ~last (OPTIONAL) 464 | // 2.3. collect instance ids based on current bit array 465 | 466 | // 3. multi draw indirect 467 | 468 | // 4. prepare data for next frame 469 | // 4.1. pack visible flags into current bit array: current (OPTIONAL) 470 | // 4.2. collect instance ids based on current bit array 471 | // 4.3. swap current with last visible bit arrays 472 | } 473 | 474 | void compute_renderer::render_hiz_temporal() 475 | { 476 | // TODO: use int or bool to store visible flags? 477 | // TODO: really need to compact visible flags into bit array? 478 | // TODO: why use flags instead of storing instance id directly? 479 | 480 | //{ 481 | // glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 482 | 483 | // vector cmds(NUM_LODS); 484 | // glGetBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, NUM_LODS*sizeof(DrawElementsIndirectCommand), cmds.data()); 485 | // io::print(); 486 | 487 | // for(int i = 0; i < NUM_LODS; ++i) 488 | // { 489 | // std::vector ids(_instance_count); 490 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_instance_id_ssbo[i]); 491 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, _instance_count*sizeof(GLuint), ids.data()); 492 | // io::print(); 493 | // } 494 | //} 495 | 496 | //print_per_lod_instance_count(); 497 | 498 | 499 | // 1. draw previously visible 500 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 501 | glBindVertexArray(_draw_vao); 502 | glUseProgram(_draw_program); 503 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 504 | 505 | // 2. test all bboxes for visibily 506 | // 2.1. build depth mipmaps 507 | _fbuffer->update_depth_mipmaps(); 508 | // 2.2. perform frustum culling, occlusion culling using hi-z, and store visible flag 509 | glUseProgram(_hiz_cull_program); 510 | glDispatchCompute(_compute_count, 1, 1); 511 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 512 | 513 | 514 | //{ 515 | // vector flags(_instance_count); 516 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_flags_ssbo); 517 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, _instance_count*sizeof(GLuint), flags.data()); 518 | // io::print(); 519 | //} 520 | 521 | 522 | //{ 523 | // vector bits(get_num_blocks(_instance_count, 32)); 524 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _curr_visible_bits_ssbo); 525 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, bits.size()*sizeof(GLuint), bits.data()); 526 | // io::print(); 527 | //} 528 | 529 | //{ 530 | // vector bits(get_num_blocks(_instance_count, 32)); 531 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _last_visible_bits_ssbo); 532 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, bits.size()*sizeof(GLuint), bits.data()); 533 | // io::print(); 534 | //} 535 | 536 | 537 | // 2.3. pack visible flags into current bit array: current &= ~last (OPTIONAL) 538 | glUseProgram(_bits_curr_notlast_program); 539 | glDispatchCompute(_bit_compute_count, 1, 1); 540 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 541 | 542 | //{ 543 | // vector bits(get_num_blocks(_instance_count, 32)); 544 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _curr_visible_bits_ssbo); 545 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, bits.size()*sizeof(GLuint), bits.data()); 546 | // io::print(); 547 | //} 548 | 549 | 550 | 551 | 552 | // 2.4. collect instance ids based on current bit array 553 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 554 | glUseProgram(_collect_instances_program); 555 | glDispatchCompute(_compute_count, 1, 1); 556 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT); 557 | 558 | 559 | //{ 560 | // glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 561 | 562 | // vector cmds(NUM_LODS); 563 | // glGetBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, NUM_LODS*sizeof(DrawElementsIndirectCommand), cmds.data()); 564 | // io::print(); 565 | 566 | // for(int i = 0; i < NUM_LODS; ++i) 567 | // { 568 | // std::vector ids(_instance_count); 569 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_instance_id_ssbo[i]); 570 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, _instance_count*sizeof(GLuint), ids.data()); 571 | // io::print(); 572 | // } 573 | //} 574 | 575 | 576 | 577 | // 3. draw newly visible in current frame 578 | glUseProgram(_draw_program); 579 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 580 | 581 | // 4. prepare data for next frame 582 | // 4.1. pack visible flags into current bit array: current (OPTIONAL) 583 | glUseProgram(_bits_curr_program); 584 | glDispatchCompute(_bit_compute_count, 1, 1); 585 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 586 | 587 | //{ 588 | // vector bits(get_num_blocks(_instance_count, 32)); 589 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _curr_visible_bits_ssbo); 590 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, bits.size()*sizeof(GLuint), bits.data()); 591 | // io::print(); 592 | //} 593 | 594 | 595 | // 4.2. collect instance ids based on current bit array 596 | glUseProgram(_collect_instances_program); 597 | glDispatchCompute(_compute_count, 1, 1); 598 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT); 599 | 600 | //{ 601 | // glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 602 | 603 | // vector cmds(NUM_LODS); 604 | // glGetBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, NUM_LODS*sizeof(DrawElementsIndirectCommand), cmds.data()); 605 | // io::print(); 606 | 607 | // for(int i = 0; i < NUM_LODS; ++i) 608 | // { 609 | // std::vector ids(_instance_count); 610 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_instance_id_ssbo[i]); 611 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, _instance_count*sizeof(GLuint), ids.data()); 612 | // io::print(); 613 | // } 614 | //} 615 | 616 | // 4.3. swap current with last visible bit arrays 617 | static bool invert = true; 618 | if(invert) 619 | { 620 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, _curr_visible_bits_ssbo); 621 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _last_visible_bits_ssbo); 622 | } 623 | else 624 | { 625 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _curr_visible_bits_ssbo); 626 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, _last_visible_bits_ssbo); 627 | } 628 | invert ^= 1; 629 | } 630 | 631 | void compute_renderer::render_hiz_last_frame() 632 | { 633 | // frustum cull + occlusion cull + select lod 634 | glUseProgram(_hiz_cull_lastframe_program); 635 | _fbuffer->bind_depth_texture(); 636 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 637 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 638 | glDispatchCompute(_compute_count, 1, 1); 639 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT); 640 | 641 | // render 642 | glUseProgram(_draw_program); 643 | glBindVertexArray(_draw_vao); 644 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 645 | 646 | // update hi-z map for next frame 647 | _fbuffer->update_depth_mipmaps(); 648 | 649 | // DEBUG 650 | // print_per_lod_instance_count(); 651 | // _fbuffer->draw_depth_mipmap(0); 652 | } 653 | 654 | void compute_renderer::print_per_lod_instance_count() 655 | { 656 | int total = 0; 657 | std::vector cmds(NUM_LODS); 658 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 659 | glGetBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, NUM_LODS*sizeof(DrawElementsIndirectCommand), cmds.data()); 660 | for(const auto& cmd : cmds) 661 | { 662 | io::print(cmd.instanceCount); 663 | total += cmd.instanceCount; 664 | } 665 | io::print("total:", total, "--------------------------"); 666 | } 667 | } 668 | -------------------------------------------------------------------------------- /shaders/temporal/pack_bits/hiz_cull.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Transform 4 | { 5 | vec4 m0; 6 | vec4 m1; 7 | vec4 m2; 8 | }; 9 | 10 | struct Bound 11 | { 12 | vec4 bmin; 13 | vec4 bmax; 14 | }; 15 | 16 | // -------------------------------------------------------------------------------------------------------------- 17 | // INPUTS 18 | // -------------------------------------------------------------------------------------------------------------- 19 | 20 | layout(local_size_x = 256) in; 21 | 22 | layout(location = 0) uniform uint instance_count; 23 | 24 | layout(binding = 0) uniform sampler2D depth_texture; 25 | 26 | layout(std140) uniform camera_data 27 | { 28 | mat4 view_proj_matrix; 29 | mat4 normal_matrix; 30 | mat4 view_matrix; 31 | } camera; 32 | 33 | layout(std430, binding = 0) buffer bound_in 34 | { 35 | readonly Bound data[]; 36 | } input_bound; 37 | 38 | // -------------------------------------------------------------------------------------------------------------- 39 | // OUTPUTS 40 | // -------------------------------------------------------------------------------------------------------------- 41 | 42 | layout(std430, binding = 10) buffer visible_out 43 | { 44 | writeonly uint data[]; 45 | } output_visible; 46 | 47 | // -------------------------------------------------------------------------------------------------------------- 48 | // AUXILIARY FUNCTIONS 49 | // -------------------------------------------------------------------------------------------------------------- 50 | 51 | uint visible(const Bound bound) 52 | { 53 | // ------------------------------------------------------------------------------ 54 | // frustum culling 55 | // ------------------------------------------------------------------------------ 56 | 57 | // tests *must* be done in clip space, *not* NDC space 58 | 59 | // clip-space bounding box 60 | vec4 bbox[8]; 61 | bbox[0] = camera.view_proj_matrix * bound.bmax; 62 | bbox[1] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmax.z, 1.0f); 63 | bbox[2] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmax.z, 1.0f); 64 | bbox[3] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmin.y, bound.bmax.z, 1.0f); 65 | bbox[4] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmax.y, bound.bmin.z, 1.0f); 66 | bbox[5] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmin.z, 1.0f); 67 | bbox[6] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmin.z, 1.0f); 68 | bbox[7] = camera.view_proj_matrix * bound.bmin; 69 | 70 | // count how many vertices are outside each frustum plane 71 | int numOutside[6] = int[6](0,0,0,0,0,0); 72 | 73 | for(int i = 0; i < 8; ++i) 74 | { 75 | if(bbox[i].x > bbox[i].w) ++numOutside[0]; 76 | if(bbox[i].x < -bbox[i].w) ++numOutside[1]; 77 | if(bbox[i].y > bbox[i].w) ++numOutside[2]; 78 | if(bbox[i].y < -bbox[i].w) ++numOutside[3]; 79 | if(bbox[i].z > bbox[i].w) ++numOutside[4]; 80 | if(bbox[i].z < -bbox[i].w) ++numOutside[5]; 81 | } 82 | 83 | // if all vertices are outside at least one frustum plane, discard 84 | for(int i = 0; i < 6; ++i) 85 | { 86 | if(numOutside[i] == 8) 87 | { 88 | return 0; 89 | } 90 | } 91 | 92 | // ------------------------------------------------------------------------------ 93 | // occlusion culling 94 | // ------------------------------------------------------------------------------ 95 | 96 | // if bounding box crosses near-plane, consider visible 97 | if(numOutside[5] > 0) 98 | { 99 | return 1; 100 | } 101 | 102 | // convert to NDC coordinates 103 | vec3 ndc_min = bbox[0].xyz / bbox[0].w; 104 | vec3 ndc_max = ndc_min; 105 | for(int i = 1; i < 8; ++i) 106 | { 107 | ndc_min = min(ndc_min, bbox[i].xyz / bbox[i].w); 108 | ndc_max = max(ndc_max, bbox[i].xyz / bbox[i].w); 109 | } 110 | 111 | ndc_min = ndc_min * 0.5 + 0.5; 112 | ndc_max = ndc_max * 0.5 + 0.5; 113 | 114 | // compute screen size in pixels 115 | vec2 size = (ndc_max.xy - ndc_min.xy); 116 | ivec2 texsize = textureSize(depth_texture,0); 117 | float maxsize = max(size.x, size.y) * float(max(texsize.x,texsize.y)); 118 | 119 | // small-feature culling 120 | if(maxsize <= 1.0f) 121 | { 122 | return 0; 123 | } 124 | 125 | // compute correct hi-z mipmap level 126 | float miplevel = ceil(log2(maxsize)); 127 | 128 | // fetch 4 hi-z depths that cover screen-space bounding box 129 | float depth = 0.0f; 130 | float a = textureLod(depth_texture,ndc_min.xy,miplevel).r; 131 | float b = textureLod(depth_texture,vec2(ndc_max.x,ndc_min.y),miplevel).r; 132 | float c = textureLod(depth_texture,ndc_max.xy,miplevel).r; 133 | float d = textureLod(depth_texture,vec2(ndc_min.x,ndc_max.y),miplevel).r; 134 | depth = max(depth,max(max(max(a,b),c),d)); 135 | 136 | return (ndc_min.z < depth)? 1 : 0; 137 | } 138 | 139 | // -------------------------------------------------------------------------------------------------------------- 140 | // MAIN 141 | // -------------------------------------------------------------------------------------------------------------- 142 | 143 | void main() 144 | { 145 | uint instance_id = gl_GlobalInvocationID.x; 146 | 147 | if(instance_id >= instance_count) 148 | { 149 | return; 150 | } 151 | 152 | output_visible.data[instance_id] = visible(input_bound.data[instance_id]); 153 | } 154 | -------------------------------------------------------------------------------- /shaders/temporal/pack_bits_as_uint/bits_curr.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | // -------------------------------------------------------------------------------------------------------------- 4 | // INPUTS 5 | // -------------------------------------------------------------------------------------------------------------- 6 | 7 | layout(local_size_x = 256) in; 8 | 9 | layout(location = 0) uniform uint instance_count; 10 | 11 | layout(std430, binding = 10) buffer visible_in 12 | { 13 | readonly uint data[]; 14 | } input_visible; 15 | 16 | // -------------------------------------------------------------------------------------------------------------- 17 | // INPUTS 18 | // -------------------------------------------------------------------------------------------------------------- 19 | 20 | layout(std430, binding = 11) buffer bits_curr_out 21 | { 22 | writeonly uint data[]; 23 | } output_bits_curr; 24 | 25 | // -------------------------------------------------------------------------------------------------------------- 26 | // MAIN 27 | // -------------------------------------------------------------------------------------------------------------- 28 | 29 | void main() 30 | { 31 | uint instance_id = gl_GlobalInvocationID.x; 32 | 33 | if(instance_id >= instance_count) 34 | { 35 | return; 36 | } 37 | 38 | output_bits_curr.data[instance_id] = input_visible.data[instance_id]; 39 | } 40 | -------------------------------------------------------------------------------- /shaders/temporal/pack_bits_as_uint/bits_curr_notlast.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | // -------------------------------------------------------------------------------------------------------------- 4 | // INPUTS 5 | // -------------------------------------------------------------------------------------------------------------- 6 | 7 | layout(local_size_x = 256) in; 8 | 9 | layout(location = 0) uniform uint instance_count; 10 | 11 | layout(std430, binding = 10) buffer visible_in 12 | { 13 | readonly uint data[]; 14 | } input_visible; 15 | 16 | layout(std430, binding = 12) buffer bits_last_in 17 | { 18 | readonly uint data[]; 19 | } input_bits_last; 20 | 21 | // -------------------------------------------------------------------------------------------------------------- 22 | // INPUTS 23 | // -------------------------------------------------------------------------------------------------------------- 24 | 25 | layout(std430, binding = 11) buffer bits_curr_out 26 | { 27 | writeonly uint data[]; 28 | } output_bits_curr; 29 | 30 | // -------------------------------------------------------------------------------------------------------------- 31 | // MAIN 32 | // -------------------------------------------------------------------------------------------------------------- 33 | 34 | void main() 35 | { 36 | uint instance_id = gl_GlobalInvocationID.x; 37 | 38 | if(instance_id >= instance_count) 39 | { 40 | return; 41 | } 42 | 43 | output_bits_curr.data[instance_id] = input_visible.data[instance_id] & (~input_bits_last.data[instance_id]); 44 | } 45 | -------------------------------------------------------------------------------- /shaders/temporal/pack_bits_as_uint/collect_instances.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Bound 4 | { 5 | vec4 bmin; 6 | vec4 bmax; 7 | }; 8 | 9 | // -------------------------------------------------------------------------------------------------------------- 10 | // INPUTS 11 | // -------------------------------------------------------------------------------------------------------------- 12 | 13 | layout(local_size_x = 256) in; 14 | 15 | layout(location = 0) uniform uint instance_count; 16 | 17 | layout(std140) uniform camera_data 18 | { 19 | mat4 view_proj_matrix; 20 | mat4 normal_matrix; 21 | mat4 view_matrix; 22 | } camera; 23 | 24 | layout(std430, binding = 0) buffer bound_in 25 | { 26 | readonly Bound data[]; 27 | } input_bound; 28 | 29 | layout(std430, binding = 11) buffer bits_curr_in 30 | { 31 | readonly uint data[]; 32 | } input_bits_curr; 33 | 34 | // -------------------------------------------------------------------------------------------------------------- 35 | // OUTPUTS 36 | // -------------------------------------------------------------------------------------------------------------- 37 | 38 | layout(binding = 0) uniform atomic_uint instance_count_lod_0; 39 | layout(binding = 1) uniform atomic_uint instance_count_lod_1; 40 | layout(binding = 2) uniform atomic_uint instance_count_lod_2; 41 | layout(binding = 3) uniform atomic_uint instance_count_lod_3; 42 | 43 | layout(std430, binding = 2) buffer instance_id_out_lod_0 44 | { 45 | writeonly uint data[]; 46 | } output_instance_id_lod_0; 47 | 48 | layout(std430, binding = 3) buffer instance_id_out_lod_1 49 | { 50 | writeonly uint data[]; 51 | } output_instance_id_lod_1; 52 | 53 | layout(std430, binding = 4) buffer instance_id_out_lod_2 54 | { 55 | writeonly uint data[]; 56 | } output_instance_id_lod_2; 57 | 58 | layout(std430, binding = 5) buffer instance_id_out_lod_3 59 | { 60 | writeonly uint data[]; 61 | } output_instance_id_lod_3; 62 | 63 | // -------------------------------------------------------------------------------------------------------------- 64 | // MAIN 65 | // -------------------------------------------------------------------------------------------------------------- 66 | 67 | void main() 68 | { 69 | uint instance_id = gl_GlobalInvocationID.x; 70 | 71 | if(instance_id >= instance_count) 72 | { 73 | return; 74 | } 75 | 76 | if(input_bits_curr.data[instance_id] == 0) 77 | { 78 | return; 79 | } 80 | 81 | const Bound b = input_bound.data[instance_id]; 82 | 83 | const float distance = -(camera.view_matrix * vec4((b.bmin.xyz + b.bmax.xyz) * 0.5f, 1.0f)).z; 84 | 85 | if(distance < 100.0f) 86 | { 87 | uint count = atomicCounterIncrement(instance_count_lod_0); 88 | output_instance_id_lod_0.data[count] = instance_id; 89 | } 90 | else if(distance < 200.0f) 91 | { 92 | uint count = atomicCounterIncrement(instance_count_lod_1); 93 | output_instance_id_lod_1.data[count] = instance_id; 94 | } 95 | else if(distance < 400.0f) 96 | { 97 | uint count = atomicCounterIncrement(instance_count_lod_2); 98 | output_instance_id_lod_2.data[count] = instance_id; 99 | } 100 | else 101 | { 102 | uint count = atomicCounterIncrement(instance_count_lod_3); 103 | output_instance_id_lod_3.data[count] = instance_id; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /shaders/temporal/pack_bits_as_uint/compute_renderer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace app 9 | { 10 | struct mat34 11 | { 12 | mat34(){} 13 | 14 | explicit mat34(const mat4& m) 15 | { 16 | int i = 0; 17 | data[i++] = m.at(0,0); 18 | data[i++] = m.at(0,1); 19 | data[i++] = m.at(0,2); 20 | data[i++] = m.at(0,3); 21 | 22 | data[i++] = m.at(1,0); 23 | data[i++] = m.at(1,1); 24 | data[i++] = m.at(1,2); 25 | data[i++] = m.at(1,3); 26 | 27 | data[i++] = m.at(2,0); 28 | data[i++] = m.at(2,1); 29 | data[i++] = m.at(2,2); 30 | data[i++] = m.at(2,3); 31 | } 32 | 33 | mat4 as_mat4() const 34 | { 35 | return mat4(data[0], data[1], data[2], data[3], 36 | data[4], data[5], data[6], data[7], 37 | data[8], data[9], data[10], data[11], 38 | 0,0,0,1); 39 | } 40 | 41 | float data[12]; 42 | }; 43 | 44 | struct bbox 45 | { 46 | vec3 min = vec3(math::limit_posf()); 47 | float pad0 = 1.0f; 48 | vec3 max = vec3(math::limit_negf()); 49 | float pad1 = 1.0f; 50 | 51 | void expand(const vec3& v) 52 | { 53 | min.x = math::min(min.x, v.x); 54 | min.y = math::min(min.y, v.y); 55 | min.z = math::min(min.z, v.z); 56 | max.x = math::max(max.x, v.x); 57 | max.y = math::max(max.y, v.y); 58 | max.z = math::max(max.z, v.z); 59 | } 60 | }; 61 | 62 | static void to_3D(int idx, int max_x, int max_y, int& x, int& y, int& z) 63 | { 64 | x = idx % (max_x); 65 | idx /= (max_x); 66 | y = idx % (max_y); 67 | idx /= (max_y); 68 | z = idx; 69 | } 70 | 71 | static int get_num_blocks(int total, int block_size) 72 | { 73 | return (total + block_size - 1) / block_size; 74 | } 75 | 76 | static void print_compute_info() 77 | { 78 | GLint c = 0; 79 | glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &c); 80 | io::print("GL_MAX_COMPUTE_WORK_GROUP_COUNT in X:", c); 81 | glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &c); 82 | io::print("GL_MAX_COMPUTE_WORK_GROUP_SIZE in X:", c); 83 | glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &c); 84 | io::print("GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS:", c); 85 | } 86 | 87 | bool compute_renderer::initialize(glb::framebuffer& fbuffer, glb::camera& cam) 88 | { 89 | // ---------------------------------------------------------------------------------------------------------------------- 90 | // configure framebuffer for hi-z map 91 | // ---------------------------------------------------------------------------------------------------------------------- 92 | 93 | fbuffer.set_use_textures(true); 94 | fbuffer.set_create_depth_mipmaps(true); 95 | _fbuffer = &fbuffer; 96 | 97 | // ---------------------------------------------------------------------------------------------------------------------- 98 | // scene setup 99 | // ---------------------------------------------------------------------------------------------------------------------- 100 | 101 | int n_x = 100; 102 | int n_y = 100; 103 | int n_z = 100; 104 | _instance_count = n_x*n_y*n_z; 105 | 106 | _compute_count = get_num_blocks(_instance_count, 256); 107 | _bit_compute_count = get_num_blocks(_instance_count, 256); 108 | 109 | tess::triangle_mesh drawable_lods[NUM_LODS]; 110 | 111 | int resolution = pow(2, NUM_LODS+1); 112 | unsigned int total_vertex_count = 0; 113 | unsigned int total_element_count = 0; 114 | unsigned int element_offsets[NUM_LODS] = {0}; 115 | unsigned int eoffset = 0; 116 | unsigned int vertex_offsets[NUM_LODS] = {0}; 117 | unsigned int voffset = 0; 118 | 119 | for(int i = 0; i < NUM_LODS; ++i) 120 | { 121 | drawable_lods[i] = tess::tessellate_cylinder(1, 5, resolution); 122 | total_vertex_count += drawable_lods[i].vertices.size(); 123 | total_element_count += drawable_lods[i].elements.size(); 124 | element_offsets[i] = eoffset; 125 | eoffset += drawable_lods[i].elements.size(); 126 | vertex_offsets[i] = voffset; 127 | voffset += drawable_lods[i].vertices.size(); 128 | resolution >>= 1; 129 | } 130 | 131 | // ---------------------------------------------------------------------------------------------------------------------- 132 | // compute input 1: matrix transforms 133 | // ---------------------------------------------------------------------------------------------------------------------- 134 | 135 | std::vector transforms(_instance_count); 136 | for(unsigned int i = 0; i < transforms.size(); ++i) 137 | { 138 | int x, y, z; 139 | to_3D(i, n_x, n_y, x, y, z); 140 | transforms[i] = mat34(mat4::translation(vec3((float)x, (float)y, (float)z)*6.0f)); 141 | } 142 | 143 | glGenBuffers(1, &_input_transform_ssbo); 144 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _input_transform_ssbo); 145 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(mat34), transforms.data(), GL_STATIC_DRAW); 146 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 147 | 148 | // ---------------------------------------------------------------------------------------------------------------------- 149 | // compute input 2: bounding boxes 150 | // ---------------------------------------------------------------------------------------------------------------------- 151 | 152 | std::vector bounds(_instance_count); 153 | for(unsigned int i = 0; i < bounds.size(); ++i) 154 | { 155 | bbox& b = bounds[i]; 156 | for(unsigned int v = 0; v < drawable_lods[0].vertices.size(); ++v) 157 | { 158 | b.expand(transforms[i].as_mat4().mul(drawable_lods[0].vertices[v].position)); 159 | } 160 | } 161 | 162 | glGenBuffers(1, &_input_bound_ssbo); 163 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _input_bound_ssbo); 164 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(bbox), bounds.data(), GL_STATIC_DRAW); 165 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 166 | 167 | // ---------------------------------------------------------------------------------------------------------------------- 168 | // compute output 1: instance IDs for each LOD 169 | // ---------------------------------------------------------------------------------------------------------------------- 170 | 171 | vector ids(_instance_count); 172 | for(unsigned int i = 0; i < ids.size(); ++i) 173 | { 174 | ids[i] = i; 175 | } 176 | 177 | for(int i = 0; i < NUM_LODS; ++i) 178 | { 179 | glGenBuffers(1, &_visible_instance_id_ssbo[i]); 180 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_instance_id_ssbo[i]); 181 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(GLuint), ids.data(), GL_STATIC_DRAW); 182 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 183 | } 184 | 185 | // ---------------------------------------------------------------------------------------------------------------------- 186 | // compute output 2: indirect draw commands 187 | // ---------------------------------------------------------------------------------------------------------------------- 188 | 189 | for(int i = 0; i < NUM_LODS; ++i) 190 | { 191 | DrawElementsIndirectCommand cmd; 192 | cmd.elementCount = drawable_lods[i].elements.size(); 193 | cmd.instanceCount = 0; // atomic counter incremented by the compute shader 194 | cmd.firstElement = element_offsets[i]; 195 | cmd.baseVertex = vertex_offsets[i]; 196 | cmd.baseInstance = i; // references lod level vertex attribute 197 | _draw_commands[i] = cmd; 198 | } 199 | 200 | _draw_commands[NUM_LODS-1].instanceCount = _instance_count; 201 | 202 | glGenBuffers(1, &_draw_indirect_buffer); 203 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 204 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 205 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0); 206 | 207 | _draw_commands[NUM_LODS-1].instanceCount = 0; 208 | 209 | // ---------------------------------------------------------------------------------------------------------------------- 210 | // compute shader 211 | // ---------------------------------------------------------------------------------------------------------------------- 212 | 213 | glb::shader_program_builder compute_shader_builder; 214 | compute_shader_builder.begin(); 215 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/cull.cs")) 216 | { 217 | return false; 218 | } 219 | if(!compute_shader_builder.end()) 220 | { 221 | return false; 222 | } 223 | auto program = compute_shader_builder.get_shader_program(); 224 | program.bind_uniform_buffer("camera_data", cam.get_uniform_buffer()); 225 | _hiz_cull_lastframe_program = program.get_id(); 226 | 227 | glProgramUniform1ui(_hiz_cull_lastframe_program, 0, _instance_count); 228 | 229 | // ---------------------------------------------------------------------------------------------------------------------- 230 | // drawing vertex arrays 231 | // ---------------------------------------------------------------------------------------------------------------------- 232 | 233 | glGenVertexArrays(1, &_draw_vao); 234 | glBindVertexArray(_draw_vao); 235 | 236 | GLuint vbo; 237 | glGenBuffers(1, &vbo); 238 | glBindBuffer(GL_ARRAY_BUFFER, vbo); 239 | glBufferData(GL_ARRAY_BUFFER, total_vertex_count*sizeof(tess::vertex), nullptr, GL_STATIC_DRAW); 240 | 241 | unsigned int offset = 0; 242 | for(int i = 0; i < NUM_LODS; ++i) 243 | { 244 | unsigned int size = drawable_lods[i].vertices.size()*sizeof(tess::vertex); 245 | glBufferSubData(GL_ARRAY_BUFFER, offset, size, drawable_lods[i].vertices.data()); 246 | offset += size; 247 | } 248 | 249 | glEnableVertexAttribArray(0); 250 | glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(tess::vertex), GLB_BYTE_OFFSET(0)); 251 | 252 | glEnableVertexAttribArray(1); 253 | glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, sizeof(tess::vertex), GLB_BYTE_OFFSET(sizeof(vec3))); 254 | 255 | GLuint lbo; 256 | glGenBuffers(1, &lbo); 257 | glBindBuffer(GL_ARRAY_BUFFER, lbo); 258 | 259 | GLuint lod_levels[NUM_LODS]; 260 | for(int i = 0; i < NUM_LODS; ++i) 261 | { 262 | lod_levels[i] = i; 263 | } 264 | glBufferData(GL_ARRAY_BUFFER, NUM_LODS*sizeof(GLuint), lod_levels, GL_STATIC_DRAW); 265 | 266 | glEnableVertexAttribArray(2); 267 | glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, 0, GLB_BYTE_OFFSET(0)); 268 | glVertexAttribDivisor(2, _instance_count + 1); // make sure (instanceID / divisor) is 0 so lod_level vertex attrib equals drawCmd.baseInstance 269 | 270 | GLuint ebo; 271 | glGenBuffers(1, &ebo); 272 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo); 273 | glBufferData(GL_ELEMENT_ARRAY_BUFFER, total_element_count*sizeof(tess::element), nullptr, GL_STATIC_DRAW); 274 | 275 | offset = 0; 276 | for(int i = 0; i < NUM_LODS; ++i) 277 | { 278 | unsigned int size = drawable_lods[i].elements.size()*sizeof(tess::element); 279 | glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, offset, size, drawable_lods[i].elements.data()); 280 | offset += size; 281 | } 282 | 283 | glBindVertexArray(0); 284 | 285 | // ---------------------------------------------------------------------------------------------------------------------- 286 | // drawing shader 287 | // ---------------------------------------------------------------------------------------------------------------------- 288 | 289 | glb::shader_program_builder draw_shader_builder; 290 | draw_shader_builder.begin(); 291 | if(!draw_shader_builder.add_file(glb::shader_vertex, "../shaders/lod/draw.vs")) 292 | { 293 | return false; 294 | } 295 | if(!draw_shader_builder.add_file(glb::shader_fragment, "../shaders/lod/draw.fs")) 296 | { 297 | return false; 298 | } 299 | if(!draw_shader_builder.end()) 300 | { 301 | return false; 302 | } 303 | auto draw_shader = draw_shader_builder.get_shader_program(); 304 | draw_shader.bind_uniform_buffer("cdata", cam.get_uniform_buffer()); 305 | _draw_program = draw_shader.get_id(); 306 | 307 | // ---------------------------------------------------------------------------------------------------------------------- 308 | // associate buffers with indexed binding points 309 | // ---------------------------------------------------------------------------------------------------------------------- 310 | 311 | unsigned int ssbo_binding = 0; 312 | 313 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ssbo_binding++, _input_bound_ssbo); 314 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ssbo_binding++, _input_transform_ssbo); 315 | 316 | for(int i = 0; i < NUM_LODS; ++i) 317 | { 318 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, ssbo_binding++, _visible_instance_id_ssbo[i]); 319 | glBindBufferRange(GL_ATOMIC_COUNTER_BUFFER, i, _draw_indirect_buffer, 4+sizeof(DrawElementsIndirectCommand)*i, sizeof(GLuint)); 320 | } 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | // ---------------------------------------------------------------------------------------------------------------------- 330 | // temporal hi-z culling shader 331 | // ---------------------------------------------------------------------------------------------------------------------- 332 | 333 | compute_shader_builder.begin(); 334 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/hiz_cull.cs")) 335 | { 336 | return false; 337 | } 338 | if(!compute_shader_builder.end()) 339 | { 340 | return false; 341 | } 342 | program = compute_shader_builder.get_shader_program(); 343 | program.bind_uniform_buffer("camera_data", cam.get_uniform_buffer()); 344 | _hiz_cull_program = program.get_id(); 345 | 346 | glProgramUniform1ui(_hiz_cull_program, 0, _instance_count); 347 | 348 | // ---------------------------------------------------------------------------------------------------------------------- 349 | // bit-packing current shader 350 | // ---------------------------------------------------------------------------------------------------------------------- 351 | 352 | compute_shader_builder.begin(); 353 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/bits_curr.cs")) 354 | { 355 | return false; 356 | } 357 | if(!compute_shader_builder.end()) 358 | { 359 | return false; 360 | } 361 | _bits_curr_program = compute_shader_builder.get_shader_program().get_id(); 362 | 363 | glProgramUniform1ui(_bits_curr_program, 0, _instance_count); 364 | 365 | // ---------------------------------------------------------------------------------------------------------------------- 366 | // bit-packing current and not last shader 367 | // ---------------------------------------------------------------------------------------------------------------------- 368 | 369 | compute_shader_builder.begin(); 370 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/bits_curr_notlast.cs")) 371 | { 372 | return false; 373 | } 374 | if(!compute_shader_builder.end()) 375 | { 376 | return false; 377 | } 378 | _bits_curr_notlast_program = compute_shader_builder.get_shader_program().get_id(); 379 | 380 | glProgramUniform1ui(_bits_curr_notlast_program, 0, _instance_count); 381 | 382 | // ---------------------------------------------------------------------------------------------------------------------- 383 | // collect visible instance shader 384 | // ---------------------------------------------------------------------------------------------------------------------- 385 | 386 | compute_shader_builder.begin(); 387 | if(!compute_shader_builder.add_file(glb::shader_compute, "../shaders/lod/temporal/collect_instances.cs")) 388 | { 389 | return false; 390 | } 391 | if(!compute_shader_builder.end()) 392 | { 393 | return false; 394 | } 395 | _collect_instances_program = compute_shader_builder.get_shader_program().get_id(); 396 | 397 | glProgramUniform1ui(_collect_instances_program, 0, _instance_count); 398 | 399 | // ---------------------------------------------------------------------------------------------------------------------- 400 | // visible flags ssbo 401 | // ---------------------------------------------------------------------------------------------------------------------- 402 | 403 | glGenBuffers(1, &_visible_flags_ssbo); 404 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_flags_ssbo); 405 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(GLuint), nullptr, GL_STATIC_DRAW); 406 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 407 | 408 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 10, _visible_flags_ssbo); 409 | 410 | // ---------------------------------------------------------------------------------------------------------------------- 411 | // current visible bits ssbo 412 | // ---------------------------------------------------------------------------------------------------------------------- 413 | 414 | vector bits(_instance_count, 0xFFFFFFFF); 415 | 416 | glGenBuffers(1, &_curr_visible_bits_ssbo); 417 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _curr_visible_bits_ssbo); 418 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(GLuint), bits.data(), GL_STATIC_DRAW); 419 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 420 | 421 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _curr_visible_bits_ssbo); 422 | 423 | // ---------------------------------------------------------------------------------------------------------------------- 424 | // last visible bits ssbo 425 | // ---------------------------------------------------------------------------------------------------------------------- 426 | 427 | glGenBuffers(1, &_last_visible_bits_ssbo); 428 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, _last_visible_bits_ssbo); 429 | glBufferData(GL_SHADER_STORAGE_BUFFER, _instance_count*sizeof(GLuint), bits.data(), GL_STATIC_DRAW); 430 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 431 | 432 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, _last_visible_bits_ssbo); 433 | 434 | 435 | 436 | 437 | // _fbuffer->set_use_same_depth_for_mipmaps(true); 438 | 439 | 440 | return true; 441 | } 442 | 443 | bool compute_renderer::finalize() 444 | { 445 | return true; 446 | } 447 | 448 | void compute_renderer::render() 449 | { 450 | // render_hiz_last_frame(); 451 | render_hiz_temporal(); 452 | } 453 | 454 | void compute_renderer::render_raster_temporal() 455 | { 456 | // TODO: use int or bool to store visible flags? 457 | // TODO: really need to compact visible flags into bit array? 458 | 459 | // 1. multi draw indirect 460 | 461 | // 2. raster all bboxes against current z-buffer 462 | // 2.1. draw points for entire scene, expand to 3 box faces using geometry shader, and store visible flag in fragment shader 463 | // 2.2. pack visible flags into current bit array: current &= ~last (OPTIONAL) 464 | // 2.3. collect instance ids based on current bit array 465 | 466 | // 3. multi draw indirect 467 | 468 | // 4. prepare data for next frame 469 | // 4.1. pack visible flags into current bit array: current (OPTIONAL) 470 | // 4.2. collect instance ids based on current bit array 471 | // 4.3. swap current with last visible bit arrays 472 | } 473 | 474 | void compute_renderer::render_hiz_temporal() 475 | { 476 | // TODO: use int or bool to store visible flags? 477 | // TODO: really need to compact visible flags into bit array? 478 | // TODO: why use flags instead of storing instance id directly? 479 | 480 | //{ 481 | // glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 482 | 483 | // vector cmds(NUM_LODS); 484 | // glGetBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, NUM_LODS*sizeof(DrawElementsIndirectCommand), cmds.data()); 485 | // io::print(); 486 | 487 | // for(int i = 0; i < NUM_LODS; ++i) 488 | // { 489 | // std::vector ids(_instance_count); 490 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_instance_id_ssbo[i]); 491 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, _instance_count*sizeof(GLuint), ids.data()); 492 | // io::print(); 493 | // } 494 | //} 495 | 496 | //print_per_lod_instance_count(); 497 | 498 | 499 | // 1. draw previously visible 500 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 501 | glBindVertexArray(_draw_vao); 502 | glUseProgram(_draw_program); 503 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 504 | 505 | // 2. test all bboxes for visibily 506 | // 2.1. build depth mipmaps 507 | _fbuffer->update_depth_mipmaps(); 508 | // 2.2. perform frustum culling, occlusion culling using hi-z, and store visible flag 509 | glUseProgram(_hiz_cull_program); 510 | glDispatchCompute(_compute_count, 1, 1); 511 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 512 | 513 | 514 | //{ 515 | // vector flags(_instance_count); 516 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_flags_ssbo); 517 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, _instance_count*sizeof(GLuint), flags.data()); 518 | // io::print(); 519 | //} 520 | 521 | 522 | //{ 523 | // vector bits(get_num_blocks(_instance_count, 32)); 524 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _curr_visible_bits_ssbo); 525 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, bits.size()*sizeof(GLuint), bits.data()); 526 | // io::print(); 527 | //} 528 | 529 | //{ 530 | // vector bits(get_num_blocks(_instance_count, 32)); 531 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _last_visible_bits_ssbo); 532 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, bits.size()*sizeof(GLuint), bits.data()); 533 | // io::print(); 534 | //} 535 | 536 | 537 | // 2.3. pack visible flags into current bit array: current &= ~last (OPTIONAL) 538 | glUseProgram(_bits_curr_notlast_program); 539 | glDispatchCompute(_bit_compute_count, 1, 1); 540 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 541 | 542 | //{ 543 | // vector bits(get_num_blocks(_instance_count, 32)); 544 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _curr_visible_bits_ssbo); 545 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, bits.size()*sizeof(GLuint), bits.data()); 546 | // io::print(); 547 | //} 548 | 549 | 550 | 551 | 552 | // 2.4. collect instance ids based on current bit array 553 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 554 | glUseProgram(_collect_instances_program); 555 | glDispatchCompute(_compute_count, 1, 1); 556 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT); 557 | 558 | 559 | //{ 560 | // glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 561 | 562 | // vector cmds(NUM_LODS); 563 | // glGetBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, NUM_LODS*sizeof(DrawElementsIndirectCommand), cmds.data()); 564 | // io::print(); 565 | 566 | // for(int i = 0; i < NUM_LODS; ++i) 567 | // { 568 | // std::vector ids(_instance_count); 569 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_instance_id_ssbo[i]); 570 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, _instance_count*sizeof(GLuint), ids.data()); 571 | // io::print(); 572 | // } 573 | //} 574 | 575 | 576 | 577 | // 3. draw newly visible in current frame 578 | glUseProgram(_draw_program); 579 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 580 | 581 | // 4. prepare data for next frame 582 | // 4.1. pack visible flags into current bit array: current (OPTIONAL) 583 | glUseProgram(_bits_curr_program); 584 | glDispatchCompute(_bit_compute_count, 1, 1); 585 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); 586 | 587 | //{ 588 | // vector bits(get_num_blocks(_instance_count, 32)); 589 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _curr_visible_bits_ssbo); 590 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, bits.size()*sizeof(GLuint), bits.data()); 591 | // io::print(); 592 | //} 593 | 594 | 595 | // 4.2. collect instance ids based on current bit array 596 | glUseProgram(_collect_instances_program); 597 | glDispatchCompute(_compute_count, 1, 1); 598 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT); 599 | 600 | //{ 601 | // glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 602 | 603 | // vector cmds(NUM_LODS); 604 | // glGetBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, NUM_LODS*sizeof(DrawElementsIndirectCommand), cmds.data()); 605 | // io::print(); 606 | 607 | // for(int i = 0; i < NUM_LODS; ++i) 608 | // { 609 | // std::vector ids(_instance_count); 610 | // glBindBuffer(GL_SHADER_STORAGE_BUFFER, _visible_instance_id_ssbo[i]); 611 | // glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, _instance_count*sizeof(GLuint), ids.data()); 612 | // io::print(); 613 | // } 614 | //} 615 | 616 | // 4.3. swap current with last visible bit arrays 617 | static bool invert = true; 618 | if(invert) 619 | { 620 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, _curr_visible_bits_ssbo); 621 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _last_visible_bits_ssbo); 622 | } 623 | else 624 | { 625 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 11, _curr_visible_bits_ssbo); 626 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 12, _last_visible_bits_ssbo); 627 | } 628 | invert ^= 1; 629 | } 630 | 631 | void compute_renderer::render_hiz_last_frame() 632 | { 633 | // frustum cull + occlusion cull + select lod 634 | glUseProgram(_hiz_cull_lastframe_program); 635 | _fbuffer->bind_depth_texture(); 636 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 637 | glBufferData(GL_DRAW_INDIRECT_BUFFER, NUM_LODS*sizeof(DrawElementsIndirectCommand), _draw_commands, GL_STATIC_DRAW); 638 | glDispatchCompute(_compute_count, 1, 1); 639 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT); 640 | 641 | // render 642 | glUseProgram(_draw_program); 643 | glBindVertexArray(_draw_vao); 644 | glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, GLB_BYTE_OFFSET(0), NUM_LODS, 0); 645 | 646 | // update hi-z map for next frame 647 | _fbuffer->update_depth_mipmaps(); 648 | 649 | // DEBUG 650 | // print_per_lod_instance_count(); 651 | // _fbuffer->draw_depth_mipmap(0); 652 | } 653 | 654 | void compute_renderer::print_per_lod_instance_count() 655 | { 656 | int total = 0; 657 | std::vector cmds(NUM_LODS); 658 | glBindBuffer(GL_DRAW_INDIRECT_BUFFER, _draw_indirect_buffer); 659 | glGetBufferSubData(GL_DRAW_INDIRECT_BUFFER, 0, NUM_LODS*sizeof(DrawElementsIndirectCommand), cmds.data()); 660 | for(const auto& cmd : cmds) 661 | { 662 | io::print(cmd.instanceCount); 663 | total += cmd.instanceCount; 664 | } 665 | io::print("total:", total, "--------------------------"); 666 | } 667 | } 668 | -------------------------------------------------------------------------------- /shaders/temporal/pack_bits_as_uint/hiz_cull.cs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Transform 4 | { 5 | vec4 m0; 6 | vec4 m1; 7 | vec4 m2; 8 | }; 9 | 10 | struct Bound 11 | { 12 | vec4 bmin; 13 | vec4 bmax; 14 | }; 15 | 16 | // -------------------------------------------------------------------------------------------------------------- 17 | // INPUTS 18 | // -------------------------------------------------------------------------------------------------------------- 19 | 20 | layout(local_size_x = 256) in; 21 | 22 | layout(location = 0) uniform uint instance_count; 23 | 24 | layout(binding = 0) uniform sampler2D depth_texture; 25 | 26 | layout(std140) uniform camera_data 27 | { 28 | mat4 view_proj_matrix; 29 | mat4 normal_matrix; 30 | mat4 view_matrix; 31 | } camera; 32 | 33 | layout(std430, binding = 0) buffer bound_in 34 | { 35 | readonly Bound data[]; 36 | } input_bound; 37 | 38 | // -------------------------------------------------------------------------------------------------------------- 39 | // OUTPUTS 40 | // -------------------------------------------------------------------------------------------------------------- 41 | 42 | layout(std430, binding = 10) buffer visible_out 43 | { 44 | writeonly uint data[]; 45 | } output_visible; 46 | 47 | // -------------------------------------------------------------------------------------------------------------- 48 | // AUXILIARY FUNCTIONS 49 | // -------------------------------------------------------------------------------------------------------------- 50 | 51 | uint visible(const Bound bound) 52 | { 53 | // ------------------------------------------------------------------------------ 54 | // frustum culling 55 | // ------------------------------------------------------------------------------ 56 | 57 | // tests *must* be done in clip space, *not* NDC space 58 | 59 | // clip-space bounding box 60 | vec4 bbox[8]; 61 | bbox[0] = camera.view_proj_matrix * bound.bmax; 62 | bbox[1] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmax.z, 1.0f); 63 | bbox[2] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmax.z, 1.0f); 64 | bbox[3] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmin.y, bound.bmax.z, 1.0f); 65 | bbox[4] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmax.y, bound.bmin.z, 1.0f); 66 | bbox[5] = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmin.z, 1.0f); 67 | bbox[6] = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmin.z, 1.0f); 68 | bbox[7] = camera.view_proj_matrix * bound.bmin; 69 | 70 | // count how many vertices are outside each frustum plane 71 | int numOutside[6] = int[6](0,0,0,0,0,0); 72 | 73 | for(int i = 0; i < 8; ++i) 74 | { 75 | if(bbox[i].x > bbox[i].w) ++numOutside[0]; 76 | if(bbox[i].x < -bbox[i].w) ++numOutside[1]; 77 | if(bbox[i].y > bbox[i].w) ++numOutside[2]; 78 | if(bbox[i].y < -bbox[i].w) ++numOutside[3]; 79 | if(bbox[i].z > bbox[i].w) ++numOutside[4]; 80 | if(bbox[i].z < -bbox[i].w) ++numOutside[5]; 81 | } 82 | 83 | // if all vertices are outside at least one frustum plane, discard 84 | for(int i = 0; i < 6; ++i) 85 | { 86 | if(numOutside[i] == 8) 87 | { 88 | return 0; 89 | } 90 | } 91 | 92 | // ------------------------------------------------------------------------------ 93 | // occlusion culling 94 | // ------------------------------------------------------------------------------ 95 | 96 | // if bounding box crosses near-plane, consider visible 97 | if(numOutside[5] > 0) 98 | { 99 | return 1; 100 | } 101 | 102 | // convert to NDC coordinates 103 | vec3 ndc_min = bbox[0].xyz / bbox[0].w; 104 | vec3 ndc_max = ndc_min; 105 | for(int i = 1; i < 8; ++i) 106 | { 107 | ndc_min = min(ndc_min, bbox[i].xyz / bbox[i].w); 108 | ndc_max = max(ndc_max, bbox[i].xyz / bbox[i].w); 109 | } 110 | 111 | ndc_min = ndc_min * 0.5 + 0.5; 112 | ndc_max = ndc_max * 0.5 + 0.5; 113 | 114 | // compute screen size in pixels 115 | vec2 size = (ndc_max.xy - ndc_min.xy); 116 | ivec2 texsize = textureSize(depth_texture,0); 117 | float maxsize = max(size.x, size.y) * float(max(texsize.x,texsize.y)); 118 | 119 | // small-feature culling 120 | if(maxsize <= 1.0f) 121 | { 122 | return 0; 123 | } 124 | 125 | // compute correct hi-z mipmap level 126 | float miplevel = ceil(log2(maxsize)); 127 | 128 | // fetch 4 hi-z depths that cover screen-space bounding box 129 | float depth = 0.0f; 130 | float a = textureLod(depth_texture,ndc_min.xy,miplevel).r; 131 | float b = textureLod(depth_texture,vec2(ndc_max.x,ndc_min.y),miplevel).r; 132 | float c = textureLod(depth_texture,ndc_max.xy,miplevel).r; 133 | float d = textureLod(depth_texture,vec2(ndc_min.x,ndc_max.y),miplevel).r; 134 | depth = max(depth,max(max(max(a,b),c),d)); 135 | 136 | return (ndc_min.z < depth)? 1 : 0; 137 | } 138 | 139 | // -------------------------------------------------------------------------------------------------------------- 140 | // MAIN 141 | // -------------------------------------------------------------------------------------------------------------- 142 | 143 | void main() 144 | { 145 | uint instance_id = gl_GlobalInvocationID.x; 146 | 147 | if(instance_id >= instance_count) 148 | { 149 | return; 150 | } 151 | 152 | output_visible.data[instance_id] = visible(input_bound.data[instance_id]); 153 | } 154 | -------------------------------------------------------------------------------- /shaders/temporal/raster_cull.fs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | layout(early_fragment_tests) in; 4 | 5 | in fdata 6 | { 7 | flat uint instance_id; 8 | } fin; 9 | 10 | layout(std430, binding = 10) buffer visible_in 11 | { 12 | writeonly uint data[]; 13 | } output_visible; 14 | 15 | void main () 16 | { 17 | output_visible.data[fin.instance_id] = 1; 18 | } 19 | -------------------------------------------------------------------------------- /shaders/temporal/raster_cull.gs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | struct Bound 4 | { 5 | vec4 bmin; 6 | vec4 bmax; 7 | }; 8 | 9 | layout(points, invocations = 6) in; 10 | layout(triangle_strip, max_vertices = 4) out; 11 | 12 | layout(std140) uniform camera_data 13 | { 14 | mat4 view_proj_matrix; 15 | mat4 normal_matrix; 16 | mat4 view_matrix; 17 | } camera; 18 | 19 | layout(std430, binding = 0) buffer bound_in 20 | { 21 | readonly Bound data[]; 22 | } input_bound; 23 | 24 | in vdata 25 | { 26 | uint instance_id; 27 | } vin[]; 28 | 29 | out fdata 30 | { 31 | flat uint instance_id; 32 | } fout; 33 | 34 | void main() 35 | { 36 | uint instance_id = vin[0].instance_id; 37 | Bound bound = input_bound.data[instance_id]; 38 | 39 | switch(gl_InvocationID) 40 | { 41 | case 0: 42 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmin.y, bound.bmin.z, 1.0f); 43 | fout.instance_id = instance_id; 44 | EmitVertex(); 45 | 46 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmin.z, 1.0f); 47 | fout.instance_id = instance_id; 48 | EmitVertex(); 49 | 50 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmin.y, bound.bmax.z, 1.0f); 51 | fout.instance_id = instance_id; 52 | EmitVertex(); 53 | 54 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmax.z, 1.0f); 55 | fout.instance_id = instance_id; 56 | EmitVertex(); 57 | break; 58 | case 1: 59 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmin.z, 1.0f); 60 | fout.instance_id = instance_id; 61 | EmitVertex(); 62 | 63 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmax.y, bound.bmin.z, 1.0f); 64 | fout.instance_id = instance_id; 65 | EmitVertex(); 66 | 67 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmax.z, 1.0f); 68 | fout.instance_id = instance_id; 69 | EmitVertex(); 70 | 71 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmax.y, bound.bmax.z, 1.0f); 72 | fout.instance_id = instance_id; 73 | EmitVertex(); 74 | break; 75 | case 2: 76 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmax.y, bound.bmin.z, 1.0f); 77 | fout.instance_id = instance_id; 78 | EmitVertex(); 79 | 80 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmin.z, 1.0f); 81 | fout.instance_id = instance_id; 82 | EmitVertex(); 83 | 84 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmax.y, bound.bmax.z, 1.0f); 85 | fout.instance_id = instance_id; 86 | EmitVertex(); 87 | 88 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmax.z, 1.0f); 89 | fout.instance_id = instance_id; 90 | EmitVertex(); 91 | break; 92 | case 3: 93 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmin.y, bound.bmin.z, 1.0f); 94 | fout.instance_id = instance_id; 95 | EmitVertex(); 96 | 97 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmin.y, bound.bmax.z, 1.0f); 98 | fout.instance_id = instance_id; 99 | EmitVertex(); 100 | 101 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmin.z, 1.0f); 102 | fout.instance_id = instance_id; 103 | EmitVertex(); 104 | 105 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmax.z, 1.0f); 106 | fout.instance_id = instance_id; 107 | EmitVertex(); 108 | break; 109 | case 4: 110 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmin.y, bound.bmax.z, 1.0f); 111 | fout.instance_id = instance_id; 112 | EmitVertex(); 113 | 114 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmax.z, 1.0f); 115 | fout.instance_id = instance_id; 116 | EmitVertex(); 117 | 118 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmax.z, 1.0f); 119 | fout.instance_id = instance_id; 120 | EmitVertex(); 121 | 122 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmax.y, bound.bmax.z, 1.0f); 123 | fout.instance_id = instance_id; 124 | EmitVertex(); 125 | break; 126 | case 5: 127 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmin.y, bound.bmin.z, 1.0f); 128 | fout.instance_id = instance_id; 129 | EmitVertex(); 130 | 131 | gl_Position = camera.view_proj_matrix * vec4(bound.bmin.x, bound.bmax.y, bound.bmin.z, 1.0f); 132 | fout.instance_id = instance_id; 133 | EmitVertex(); 134 | 135 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmin.y, bound.bmin.z, 1.0f); 136 | fout.instance_id = instance_id; 137 | EmitVertex(); 138 | 139 | gl_Position = camera.view_proj_matrix * vec4(bound.bmax.x, bound.bmax.y, bound.bmin.z, 1.0f); 140 | fout.instance_id = instance_id; 141 | EmitVertex(); 142 | break; 143 | } 144 | 145 | EndPrimitive(); 146 | } 147 | -------------------------------------------------------------------------------- /shaders/temporal/raster_cull.vs: -------------------------------------------------------------------------------- 1 | #version 450 2 | 3 | out vdata 4 | { 5 | uint instance_id; 6 | } vout; 7 | 8 | void main() 9 | { 10 | vout.instance_id = gl_VertexID; 11 | } 12 | --------------------------------------------------------------------------------