├── cache └── .gitkeep ├── compiled_shaders └── .gitkeep ├── format.sh ├── readme ├── san_miguel_breakdown.png ├── san_miguel_cascades.png └── san_miguel_rendered.png ├── .gitignore ├── src ├── allocations │ ├── persistently_mapped.h │ ├── persistently_mapped.cpp │ ├── staging.h │ ├── image_with_view.h │ ├── base.h │ ├── image_with_view.cpp │ ├── base.cpp │ └── staging.cpp ├── debugging.h ├── ext_lib_impls.cpp ├── rendering.h ├── resources │ ├── meshlets.h │ ├── image_loading.h │ ├── fs_cache.h │ ├── ktx2.h │ ├── mesh_loading.h │ ├── bounding_sphere.h │ ├── meshlets.cpp │ ├── dds.h │ └── image_loading.cpp ├── pch.h ├── debugging.cpp ├── util.h ├── shaders │ ├── common │ │ ├── bindings.glsl │ │ ├── matrices.glsl │ │ ├── hlsl4glsl.glsl │ │ ├── buffer_references.glsl │ │ ├── debug.glsl │ │ ├── prefix_sum.glsl │ │ ├── util.glsl │ │ ├── pbr.glsl │ │ ├── vbuffer.glsl │ │ └── culling.glsl │ ├── compute │ │ ├── copy_quantized_positions.glsl │ │ ├── reset_buffers.glsl │ │ ├── read_depth.glsl │ │ └── generate_shadow_matrices.glsl │ ├── cull_instances.comp │ ├── display_transform.comp │ ├── write_draw_calls.comp │ ├── rasterization.glsl │ └── render_geometry.comp ├── pipelines.h ├── projection.h ├── descriptor_set.h ├── frame_resources.cpp ├── input.h ├── util.cpp ├── sync.h ├── shared_cpu_gpu.h ├── input.cpp ├── frame_resources.h ├── descriptor_set.cpp ├── pipelines.cpp └── rendering.cpp ├── compile_shaders.sh ├── .gitmodules ├── readme.md ├── CMakeLists.txt ├── .clang-format └── compile_glsl.py /cache/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /compiled_shaders/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- 1 | clang-format -i src/**/*.cpp src/**/*.h src/*.cpp src/*.h src/shaders/*.* src/shaders/**/* 2 | -------------------------------------------------------------------------------- /readme/san_miguel_breakdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/expenses/lighthugger/HEAD/readme/san_miguel_breakdown.png -------------------------------------------------------------------------------- /readme/san_miguel_cascades.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/expenses/lighthugger/HEAD/readme/san_miguel_cascades.png -------------------------------------------------------------------------------- /readme/san_miguel_rendered.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/expenses/lighthugger/HEAD/readme/san_miguel_rendered.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | compiled_shaders 3 | *.tracy 4 | imgui.ini 5 | traces 6 | renderdoc.cap 7 | *.dds 8 | *.ktx2 9 | cache/* 10 | perf.data* 11 | texconv.exe 12 | models 13 | -------------------------------------------------------------------------------- /src/allocations/persistently_mapped.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base.h" 3 | 4 | struct PersistentlyMappedBuffer { 5 | AllocatedBuffer buffer; 6 | void* mapped_ptr; 7 | 8 | PersistentlyMappedBuffer(AllocatedBuffer buffer_); 9 | }; 10 | -------------------------------------------------------------------------------- /src/debugging.h: -------------------------------------------------------------------------------- 1 | 2 | VKAPI_ATTR VkBool32 VKAPI_CALL debug_message_callback( 3 | VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, 4 | VkDebugUtilsMessageTypeFlagsEXT messageTypes, 5 | VkDebugUtilsMessengerCallbackDataEXT const* pCallbackData, 6 | void* /*pUserData*/ 7 | ); 8 | -------------------------------------------------------------------------------- /src/ext_lib_impls.cpp: -------------------------------------------------------------------------------- 1 | #define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION 2 | #include 3 | 4 | #define VMA_IMPLEMENTATION 5 | #include 6 | 7 | #include 8 | 9 | VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE 10 | -------------------------------------------------------------------------------- /src/allocations/persistently_mapped.cpp: -------------------------------------------------------------------------------- 1 | #include "persistently_mapped.h" 2 | 3 | PersistentlyMappedBuffer::PersistentlyMappedBuffer(AllocatedBuffer buffer_) : 4 | buffer(std::move(buffer_)) { 5 | auto buffer_info = buffer.allocator.getAllocationInfo(buffer.allocation); 6 | mapped_ptr = buffer_info.pMappedData; 7 | assert(mapped_ptr); 8 | } 9 | -------------------------------------------------------------------------------- /compile_shaders.sh: -------------------------------------------------------------------------------- 1 | glsl_flags="-Werror -O -DGLSL=1 -std=450 --target-env=vulkan1.3 --target-spv=spv1.6 -I src" 2 | 3 | python compile_glsl.py --out-dir compiled_shaders --flags "$glsl_flags" src/shaders/*.{glsl,comp} || exit 1 4 | python compile_glsl.py --out-dir compiled_shaders/compute --flags "$glsl_flags" --shader-stage=comp src/shaders/compute/*.* || exit 1 5 | -------------------------------------------------------------------------------- /src/rendering.h: -------------------------------------------------------------------------------- 1 | #include "descriptor_set.h" 2 | #include "pipelines.h" 3 | 4 | void render( 5 | const vk::raii::CommandBuffer& command_buffer, 6 | const Pipelines& pipelines, 7 | const DescriptorSet& descriptor_set, 8 | const Resources& resources, 9 | vk::Image swapchain_image, 10 | const vk::raii::ImageView& swapchain_image_view, 11 | vk::Extent2D extent, 12 | uint32_t graphics_queue_family, 13 | tracy::VkCtx* tracy_ctx, 14 | uint32_t swapchain_image_index, 15 | uint64_t uniform_buffer_address 16 | ); 17 | -------------------------------------------------------------------------------- /src/resources/meshlets.h: -------------------------------------------------------------------------------- 1 | #include "../shared_cpu_gpu.h" 2 | 3 | struct Meshlets { 4 | std::vector meshlets; 5 | // Contains a micro index buffer that indexes into the main index buffer. 6 | // Yeah it's confusing. 7 | std::vector micro_indices; 8 | 9 | // This is the new index buffer. 10 | std::vector indices_32bit; 11 | std::vector indices_16bit; 12 | }; 13 | 14 | Meshlets build_meshlets( 15 | uint8_t* indices, 16 | size_t indices_count, 17 | float* positions, 18 | size_t vertices_count, 19 | bool uses_32_bit_indices 20 | ); 21 | -------------------------------------------------------------------------------- /src/allocations/staging.h: -------------------------------------------------------------------------------- 1 | #include "persistently_mapped.h" 2 | 3 | AllocatedBuffer upload_via_staging_buffer( 4 | const void* bytes, 5 | size_t num_bytes, 6 | vma::Allocator allocator, 7 | vk::BufferUsageFlags desired_flags, 8 | const std::string& name, 9 | const vk::raii::CommandBuffer& command_buffer, 10 | std::vector& temp_buffers 11 | ); 12 | 13 | std::pair upload_from_file_via_staging_buffer( 14 | std::ifstream stream, 15 | vma::Allocator allocator, 16 | vk::BufferUsageFlags desired_flags, 17 | const std::string& name, 18 | const vk::raii::CommandBuffer& command_buffer, 19 | std::vector& temp_buffers 20 | ); 21 | -------------------------------------------------------------------------------- /src/resources/image_loading.h: -------------------------------------------------------------------------------- 1 | #include "../allocations/base.h" 2 | #include "../allocations/image_with_view.h" 3 | 4 | ImageWithView load_dds( 5 | const std::filesystem::path& filepath, 6 | vma::Allocator allocator, 7 | const vk::raii::Device& device, 8 | const vk::raii::CommandBuffer& command_buffer, 9 | uint32_t graphics_queue_family, 10 | std::vector& temp_buffers 11 | ); 12 | 13 | ImageWithView load_ktx2_image( 14 | const std::filesystem::path& filepath, 15 | vma::Allocator allocator, 16 | const vk::raii::Device& device, 17 | const vk::raii::CommandBuffer& command_buffer, 18 | uint32_t graphics_queue_family, 19 | std::vector& temp_buffers 20 | ); 21 | -------------------------------------------------------------------------------- /src/pch.h: -------------------------------------------------------------------------------- 1 | #define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 2 | #define VULKAN_HPP_NO_CONSTRUCTORS 3 | #include 4 | #include 5 | #define GLFW_INCLUDE_VULKAN 6 | #include 7 | #define DBG_MACRO_NO_WARNING 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #define GLM_FORCE_DEPTH_ZERO_TO_ONE 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | -------------------------------------------------------------------------------- /src/debugging.cpp: -------------------------------------------------------------------------------- 1 | #include "debugging.h" 2 | 3 | VKAPI_ATTR VkBool32 VKAPI_CALL debug_message_callback( 4 | VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, 5 | VkDebugUtilsMessageTypeFlagsEXT messageTypes, 6 | VkDebugUtilsMessengerCallbackDataEXT const* pCallbackData, 7 | void* /*pUserData*/ 8 | ) { 9 | std::cout 10 | << "[" 11 | << vk::to_string(static_cast( 12 | messageSeverity 13 | )) 14 | << "][" 15 | << vk::to_string( 16 | static_cast(messageTypes) 17 | ) 18 | << "][" << pCallbackData->pMessageIdName << "]\t" 19 | << pCallbackData->pMessage << std::endl; 20 | 21 | return false; 22 | } 23 | -------------------------------------------------------------------------------- /src/allocations/image_with_view.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "base.h" 3 | 4 | struct ImageWithView { 5 | AllocatedImage image; 6 | vk::raii::ImageView view; 7 | 8 | ImageWithView(AllocatedImage image_, vk::raii::ImageView view_); 9 | 10 | static ImageWithView create_image_with_view( 11 | vk::ImageCreateInfo create_info, 12 | vma::Allocator allocator, 13 | const vk::raii::Device& device, 14 | const std::string& name, 15 | vk::ImageSubresourceRange subresource_range, 16 | vk::ImageViewType view_type 17 | ); 18 | 19 | ImageWithView( 20 | vk::ImageCreateInfo create_info, 21 | vma::Allocator allocator, 22 | const vk::raii::Device& device, 23 | const std::string& name, 24 | vk::ImageSubresourceRange subresource_range, 25 | vk::ImageViewType view_type = vk::ImageViewType::e2D 26 | ); 27 | }; 28 | -------------------------------------------------------------------------------- /src/resources/fs_cache.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | struct FsCache { 4 | static std::filesystem::path filepath_for_key(std::string key) { 5 | return std::filesystem::path("cache") 6 | / std::to_string(std::hash {}(key)); 7 | } 8 | 9 | static std::optional get(std::string key) { 10 | auto filepath = filepath_for_key(key); 11 | 12 | if (std::filesystem::exists(filepath)) { 13 | return std::ifstream(filepath, std::ios::binary); 14 | } else { 15 | return std::nullopt; 16 | } 17 | } 18 | 19 | template 20 | static void insert(std::string key, const std::vector& data) { 21 | auto filepath = filepath_for_key(key); 22 | auto stream = std::ofstream(filepath, std::ios::binary); 23 | stream.write((char*)data.data(), data.size() * sizeof(T)); 24 | } 25 | }; 26 | -------------------------------------------------------------------------------- /src/allocations/base.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | struct AllocatedImage { 4 | vk::Image image; 5 | vma::Allocation allocation; 6 | vma::Allocator allocator; 7 | 8 | AllocatedImage(AllocatedImage&& other); 9 | 10 | AllocatedImage( 11 | vk::ImageCreateInfo create_info, 12 | vma::Allocator allocator_, 13 | const std::string& name 14 | ); 15 | 16 | AllocatedImage& operator=(AllocatedImage&& other); 17 | 18 | ~AllocatedImage(); 19 | }; 20 | 21 | struct AllocatedBuffer { 22 | vk::Buffer buffer; 23 | vma::Allocation allocation; 24 | vma::Allocator allocator; 25 | 26 | AllocatedBuffer(AllocatedBuffer&& other); 27 | 28 | AllocatedBuffer( 29 | vk::BufferCreateInfo create_info, 30 | vma::AllocationCreateInfo alloc_info, 31 | vma::Allocator allocator_, 32 | const std::string& name 33 | ); 34 | 35 | ~AllocatedBuffer(); 36 | 37 | AllocatedBuffer& operator=(AllocatedBuffer&& other); 38 | }; 39 | -------------------------------------------------------------------------------- /src/resources/ktx2.h: -------------------------------------------------------------------------------- 1 | const std::array KTX2_IDENTIFIER = 2 | {0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A}; 3 | 4 | enum Ktx2SupercompressionScheme : uint32_t { 5 | None = 0, 6 | BasisLZ = 1, 7 | Zstandard = 2, 8 | ZLIB = 3 9 | }; 10 | 11 | struct Ktx2Header { 12 | vk::Format format; 13 | uint32_t type_size; 14 | uint32_t width; 15 | uint32_t height; 16 | uint32_t depth; 17 | uint32_t layer_count; 18 | uint32_t face_count; 19 | uint32_t level_count; 20 | Ktx2SupercompressionScheme supercompression_scheme; 21 | }; 22 | 23 | struct Ktx2Index { 24 | uint32_t dfd_byte_offset; 25 | uint32_t dfd_byte_length; 26 | uint32_t kvd_byte_offset; 27 | uint32_t kvd_byte_length; 28 | uint64_t sgd_byte_offset; 29 | uint64_t sgd_byte_length; 30 | }; 31 | 32 | struct Ktx2LevelIndex { 33 | uint64_t byte_offset; 34 | uint64_t byte_length; 35 | uint64_t uncompressed_byte_length; 36 | }; 37 | -------------------------------------------------------------------------------- /src/util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "allocations/base.h" 4 | 5 | const vk::ImageSubresourceRange COLOR_SUBRESOURCE_RANGE = { 6 | .aspectMask = vk::ImageAspectFlagBits::eColor, 7 | .baseMipLevel = 0, 8 | .levelCount = 1, 9 | .baseArrayLayer = 0, 10 | .layerCount = 1, 11 | }; 12 | 13 | const vk::ImageSubresourceRange DEPTH_SUBRESOURCE_RANGE = { 14 | .aspectMask = vk::ImageAspectFlagBits::eDepth, 15 | .baseMipLevel = 0, 16 | .levelCount = 1, 17 | .baseArrayLayer = 0, 18 | .layerCount = 1, 19 | }; 20 | 21 | void check_vk_result(vk::Result err); 22 | 23 | std::vector create_and_name_swapchain_image_views( 24 | const vk::raii::Device& device, 25 | const std::vector& swapchain_images, 26 | vk::Format swapchain_format 27 | ); 28 | 29 | vk::DescriptorBufferInfo buffer_info(const AllocatedBuffer& buffer); 30 | 31 | uint32_t dispatch_size(uint32_t width, uint32_t workgroup_size); 32 | 33 | std::vector read_file_to_bytes(const std::filesystem::path& filepath); 34 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "simple_vulkan_synchronization"] 2 | path = external/simple_vulkan_synchronization 3 | url = https://github.com/Tobski/simple_vulkan_synchronization/ 4 | [submodule "VulkanMemoryAllocator-Hpp"] 5 | path = external/VulkanMemoryAllocator-Hpp 6 | url = https://github.com/YaaZ/VulkanMemoryAllocator-Hpp/ 7 | [submodule "tony-mc-mapface"] 8 | path = external/tony-mc-mapface 9 | url = https://github.com/h3r2tic/tony-mc-mapface/ 10 | [submodule "external/dbg-macro"] 11 | path = external/dbg-macro 12 | url = https://github.com/sharkdp/dbg-macro 13 | [submodule "external/tracy"] 14 | path = external/tracy 15 | url = https://github.com/wolfpld/tracy 16 | [submodule "external/imgui"] 17 | path = external/imgui 18 | url = https://github.com/ocornut/imgui 19 | [submodule "external/fastgltf"] 20 | path = external/fastgltf 21 | url = https://github.com/spnda/fastgltf 22 | [submodule "external/meshoptimizer"] 23 | path = external/meshoptimizer 24 | url = https://github.com/zeux/meshoptimizer 25 | [submodule "external/glfw"] 26 | path = external/glfw 27 | url = https://github.com/glfw/glfw 28 | -------------------------------------------------------------------------------- /src/shaders/common/bindings.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "buffer_references.glsl" 4 | 5 | // Ensure that all of these are scalar! 6 | 7 | layout(binding = 0) uniform texture2D textures[]; 8 | 9 | layout(binding = 1) uniform texture2D scene_referred_framebuffer; 10 | layout(binding = 2) uniform texture3D display_transform_lut; 11 | layout(binding = 3) uniform texture2D depth_buffer; 12 | 13 | layout(binding = 4) uniform texture2DArray shadowmap; 14 | layout(binding = 5) uniform writeonly image2D rw_scene_referred_framebuffer; 15 | layout(binding = 6) uniform utexture2D visibility_buffer; 16 | 17 | layout(binding = 7) uniform sampler clamp_sampler; 18 | layout(binding = 8) uniform sampler repeat_sampler; 19 | layout(binding = 9) uniform sampler shadowmap_comparison_sampler; 20 | 21 | layout(binding = 10) uniform textureCube skybox; 22 | 23 | layout(push_constant) uniform UniformPushConstant { 24 | UniformBufferAddressConstant uniform_buffer; 25 | ShadowPassConstant shadow_constant; 26 | }; 27 | 28 | Uniforms get_uniforms() { 29 | return UniformsBuffer(uniform_buffer.address).uniforms; 30 | } 31 | -------------------------------------------------------------------------------- /src/pipelines.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "descriptor_set.h" 3 | 4 | struct RasterizationPipeline { 5 | vk::raii::Pipeline opaque; 6 | vk::raii::Pipeline alpha_clip; 7 | }; 8 | 9 | struct Pipelines { 10 | RasterizationPipeline rasterize_shadowmap; 11 | RasterizationPipeline rasterize_visbuffer; 12 | 13 | vk::raii::Pipeline read_depth; 14 | vk::raii::Pipeline generate_matrices; 15 | vk::raii::Pipeline write_draw_calls; 16 | vk::raii::Pipeline display_transform; 17 | vk::raii::Pipeline render_geometry; 18 | vk::raii::Pipeline reset_buffers_a; 19 | vk::raii::Pipeline reset_buffers_b; 20 | vk::raii::Pipeline reset_buffers_c; 21 | vk::raii::Pipeline write_draw_calls_shadows; 22 | vk::raii::Pipeline cull_instances; 23 | vk::raii::Pipeline cull_instances_shadows; 24 | 25 | vk::raii::PipelineLayout pipeline_layout; 26 | 27 | vk::raii::Pipeline copy_quantized_positions; 28 | vk::raii::Pipeline copy_quantized_normals; 29 | vk::raii::PipelineLayout copy_pipeline_layout; 30 | 31 | static Pipelines compile_pipelines( 32 | const vk::raii::Device& device, 33 | const DescriptorSetLayouts& descriptor_set_layouts 34 | ); 35 | }; 36 | -------------------------------------------------------------------------------- /src/shaders/common/matrices.glsl: -------------------------------------------------------------------------------- 1 | // Implementations copied from https://github.com/fu5ha/ultraviolet. 2 | 3 | // In GLSL matrices are cols major or w/e, but not in HLSL. 4 | float4x4 5 | create_matrix_from_cols_glsl(float4 c0, float4 c1, float4 c2, float4 c3) { 6 | return float4x4(c0, c1, c2, c3); 7 | } 8 | 9 | float4x4 lookAt(float3 eye, float3 at, float3 up) { 10 | float3 f = normalize(at - eye); 11 | float3 r = normalize(cross(f, up)); 12 | float3 u = cross(r, f); 13 | return create_matrix_from_cols_glsl( 14 | float4(r.x, u.x, -f.x, 0.0), 15 | float4(r.y, u.y, -f.y, 0.0), 16 | float4(r.z, u.z, -f.z, 0.0), 17 | float4(-dot(r, eye), -dot(u, eye), dot(f, eye), 1.0) 18 | ); 19 | } 20 | 21 | float4x4 OrthographicProjection( 22 | float left, 23 | float right, 24 | float bottom, 25 | float top, 26 | float near, 27 | float far 28 | ) { 29 | float rml = right - left; 30 | float rpl = right + left; 31 | float tmb = top - bottom; 32 | float tpb = top + bottom; 33 | float fmn = far - near; 34 | return create_matrix_from_cols_glsl( 35 | float4(2.0 / rml, 0.0, 0.0, 0.0), 36 | float4(0.0, -2.0 / tmb, 0.0, 0.0), 37 | float4(0.0, 0.0, -1.0 / fmn, 0.0), 38 | float4(-(rpl / rml), -(tpb / tmb), -(near / fmn), 1.0) 39 | ); 40 | } 41 | -------------------------------------------------------------------------------- /src/shaders/compute/copy_quantized_positions.glsl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | layout(push_constant) uniform PushConstant { 4 | CopyQuantizedPositionsConstant copy; 5 | }; 6 | 7 | layout(buffer_reference, scalar) buffer Uint16_t4 { 8 | uint16_t4 values[]; 9 | }; 10 | 11 | // Alignment of 2 is required here. 12 | layout(buffer_reference, scalar, buffer_reference_align = 2) buffer Uint16_t3 { 13 | uint16_t3 values[]; 14 | }; 15 | 16 | layout(buffer_reference, scalar) buffer Uint8_t4 { 17 | uint8_t4 values[]; 18 | }; 19 | 20 | // Alignment of 2 is required here. 21 | layout(buffer_reference, scalar, buffer_reference_align = 1) buffer Uint8_t3 { 22 | uint8_t3 values[]; 23 | }; 24 | 25 | layout(local_size_x = 64) in; 26 | 27 | void copy_quantized_positions() { 28 | uint32_t index = gl_GlobalInvocationID.x; 29 | 30 | if (index >= copy.count) { 31 | return; 32 | } 33 | 34 | uint16_t4 value = Uint16_t4(copy.src).values[index]; 35 | Uint16_t3(copy.dst).values[index] = value.xyz; 36 | } 37 | 38 | layout(local_size_x = 64) in; 39 | 40 | void copy_quantized_normals() { 41 | uint32_t index = gl_GlobalInvocationID.x; 42 | 43 | if (index >= copy.count) { 44 | return; 45 | } 46 | 47 | uint8_t4 value = Uint8_t4(copy.src).values[index]; 48 | Uint8_t3(copy.dst).values[index] = value.xyz; 49 | } 50 | -------------------------------------------------------------------------------- /src/projection.h: -------------------------------------------------------------------------------- 1 | // Adapted from https://vincent-p.github.io/posts/vulkan_perspective_matrix/ 2 | // (simplified and transposed) 3 | glm::mat4 infinite_reverse_z_perspective( 4 | float fov_rad, 5 | float width, 6 | float height, 7 | float near 8 | ) { 9 | float focal_length = 1.0f / std::tan(fov_rad / 2.0f); 10 | 11 | float x = focal_length / (width / height); 12 | float y = -focal_length; 13 | 14 | return glm::mat4( 15 | glm::vec4(x, 0.0f, 0.0f, 0.0f), 16 | glm::vec4(0.0f, y, 0.0f, 0.0f), 17 | glm::vec4(0.0f, 0.0f, 0.0f, -1.0f), 18 | glm::vec4(0.0f, 0.0f, near, 0.0f) 19 | ); 20 | } 21 | 22 | glm::mat4 reverse_z_perspective( 23 | float fov_rad, 24 | float width, 25 | float height, 26 | float near, 27 | float far 28 | ) { 29 | float focal_length = 1.0f / std::tan(fov_rad / 2.0f); 30 | 31 | float x = focal_length / (width / height); 32 | float y = -focal_length; 33 | float A = near / (far - near); 34 | float B = far * A; 35 | 36 | return glm::mat4( 37 | glm::vec4(x, 0.0f, 0.0f, 0.0f), 38 | glm::vec4(0.0f, y, 0.0f, 0.0f), 39 | glm::vec4(0.0f, 0.0f, A, -1.0f), 40 | glm::vec4(0.0f, 0.0f, B, 0.0f) 41 | ); 42 | 43 | /*if (inverse) 44 | { 45 | *inverse = float4x4({ 46 | 1/x, 0.0f, 0.0f, 0.0f, 47 | 0.0f, 1/y, 0.0f, 0.0f, 48 | 0.0f, 0.0f, 0.0f, 1/B, 49 | 0.0f, 0.0f, -1.0f, A/B, 50 | }); 51 | }*/ 52 | } 53 | -------------------------------------------------------------------------------- /src/resources/mesh_loading.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../allocations/base.h" 3 | #include "../descriptor_set.h" 4 | #include "../pipelines.h" 5 | #include "../shared_cpu_gpu.h" 6 | #include "meshlets.h" 7 | 8 | struct BoundingBox { 9 | glm::vec3 min = glm::vec3(std::numeric_limits::max()); 10 | glm::vec3 max = glm::vec3(std::numeric_limits::min()); 11 | 12 | void insert(glm::vec3 point) { 13 | min = glm::min(min, point); 14 | max = glm::max(max, point); 15 | } 16 | 17 | float diagonal_length() { 18 | return glm::distance(min, max); 19 | } 20 | }; 21 | 22 | struct GltfPrimitive { 23 | AllocatedBuffer position; 24 | AllocatedBuffer indices; 25 | AllocatedBuffer uvs; 26 | AllocatedBuffer normals; 27 | AllocatedBuffer mesh_info; 28 | AllocatedBuffer micro_indices; 29 | AllocatedBuffer meshlets; 30 | glm::mat4 transform; 31 | uint32_t num_meshlets; 32 | }; 33 | 34 | struct GltfMesh { 35 | std::vector images; 36 | std::vector image_indices; 37 | std::vector primitives; 38 | std::shared_ptr image_index_tracker; 39 | 40 | ~GltfMesh(); 41 | }; 42 | 43 | GltfMesh load_gltf( 44 | const std::filesystem::path& filepath, 45 | vma::Allocator allocator, 46 | const vk::raii::Device& device, 47 | const vk::raii::CommandBuffer& command_buffer, 48 | uint32_t graphics_queue_family, 49 | std::vector& temp_buffers, 50 | DescriptorSet& descriptor_set, 51 | const Pipelines& pipelines 52 | ); 53 | -------------------------------------------------------------------------------- /src/descriptor_set.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "allocations/image_with_view.h" 3 | #include "allocations/persistently_mapped.h" 4 | #include "frame_resources.h" 5 | #include "shared_cpu_gpu.h" 6 | 7 | struct DescriptorSetLayouts { 8 | vk::raii::DescriptorSetLayout everything; 9 | vk::raii::DescriptorSetLayout swapchain_storage_image; 10 | }; 11 | 12 | DescriptorSetLayouts 13 | create_descriptor_set_layouts(const vk::raii::Device& device); 14 | 15 | struct IndexTracker { 16 | uint32_t next_index = 0; 17 | std::vector free_indices; 18 | 19 | IndexTracker(); 20 | 21 | uint32_t push(); 22 | 23 | void free(uint32_t index); 24 | 25 | ~IndexTracker(); 26 | }; 27 | 28 | struct DescriptorSet { 29 | vk::raii::DescriptorSet set; 30 | std::vector swapchain_image_sets; 31 | std::shared_ptr tracker = std::make_shared(); 32 | 33 | DescriptorSet( 34 | vk::raii::DescriptorSet set_, 35 | std::vector swapchain_image_sets_ 36 | ); 37 | 38 | uint32_t write_image(const ImageWithView& image, vk::Device device); 39 | 40 | void write_resizing_descriptors( 41 | const ResizingResources& resizing_resources, 42 | const vk::raii::Device& device, 43 | const std::vector& swapchain_image_views 44 | ); 45 | 46 | void write_descriptors( 47 | const Resources& resources, 48 | const vk::raii::Device& device, 49 | const std::vector& swapchain_image_views 50 | ); 51 | }; 52 | -------------------------------------------------------------------------------- /src/frame_resources.cpp: -------------------------------------------------------------------------------- 1 | #include "frame_resources.h" 2 | 3 | FrameCommandData create_frame_command_data( 4 | const vk::raii::Device& device, 5 | const vk::raii::PhysicalDevice& phys_device, 6 | const vk::raii::Queue& queue, 7 | uint32_t graphics_queue_family 8 | ) { 9 | auto pool = device.createCommandPool({ 10 | .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer, 11 | .queueFamilyIndex = graphics_queue_family, 12 | }); 13 | 14 | auto buffers = device.allocateCommandBuffers(vk::CommandBufferAllocateInfo { 15 | .commandPool = *pool, 16 | .level = vk::CommandBufferLevel::ePrimary, 17 | .commandBufferCount = 1}); 18 | 19 | auto buffer = std::move(buffers[0]); 20 | 21 | auto tracy_ctx = TracyVkContext(*phys_device, *device, *queue, *buffer); 22 | 23 | return { 24 | .pool = std::move(pool), 25 | .buffer = std::move(buffer), 26 | .swapchain_semaphore = device.createSemaphore({}), 27 | .render_semaphore = device.createSemaphore({}), 28 | .render_fence = 29 | device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled}), 30 | .tracy_ctx = RaiiTracyCtx(tracy_ctx)}; 31 | } 32 | 33 | RaiiTracyCtx::RaiiTracyCtx(tracy::VkCtx* inner_) : inner(inner_) {} 34 | 35 | RaiiTracyCtx::~RaiiTracyCtx() { 36 | if (inner) { 37 | TracyVkDestroy(inner); 38 | } 39 | } 40 | 41 | RaiiTracyCtx::RaiiTracyCtx(RaiiTracyCtx&& other) { 42 | std::swap(inner, other.inner); 43 | } 44 | 45 | RaiiTracyCtx& RaiiTracyCtx::operator=(RaiiTracyCtx&& other) { 46 | std::swap(inner, other.inner); 47 | return *this; 48 | } 49 | -------------------------------------------------------------------------------- /src/allocations/image_with_view.cpp: -------------------------------------------------------------------------------- 1 | #include "image_with_view.h" 2 | 3 | ImageWithView ImageWithView::create_image_with_view( 4 | vk::ImageCreateInfo create_info, 5 | vma::Allocator allocator, 6 | const vk::raii::Device& device, 7 | const std::string& name, 8 | vk::ImageSubresourceRange subresource_range, 9 | vk::ImageViewType view_type 10 | ) { 11 | auto image = AllocatedImage(create_info, allocator, name); 12 | auto view = device.createImageView( 13 | {.image = image.image, 14 | .viewType = view_type, 15 | .format = create_info.format, 16 | .subresourceRange = subresource_range} 17 | ); 18 | auto view_name = name + " view"; 19 | VkImageView c_view = *view; 20 | device.setDebugUtilsObjectNameEXT(vk::DebugUtilsObjectNameInfoEXT { 21 | .objectType = vk::ObjectType::eImageView, 22 | .objectHandle = reinterpret_cast(c_view), 23 | .pObjectName = view_name.data()}); 24 | return ImageWithView(std::move(image), std::move(view)); 25 | } 26 | 27 | ImageWithView::ImageWithView(AllocatedImage image_, vk::raii::ImageView view_) : 28 | image(std::move(image_)), 29 | view(std::move(view_)) {} 30 | 31 | ImageWithView::ImageWithView( 32 | vk::ImageCreateInfo create_info, 33 | vma::Allocator allocator, 34 | const vk::raii::Device& device, 35 | const std::string& name, 36 | vk::ImageSubresourceRange subresource_range, 37 | vk::ImageViewType view_type 38 | ) : 39 | ImageWithView(ImageWithView::create_image_with_view( 40 | create_info, 41 | allocator, 42 | device, 43 | name, 44 | subresource_range, 45 | view_type 46 | )) {} 47 | -------------------------------------------------------------------------------- /src/shaders/cull_instances.comp: -------------------------------------------------------------------------------- 1 | #include "common/bindings.glsl" 2 | #include "common/culling.glsl" 3 | #include "common/util.glsl" 4 | 5 | layout(local_size_x = 64) in; 6 | 7 | void cull_instances() { 8 | uint32_t instance_index = gl_GlobalInvocationID.x; 9 | 10 | if (instance_index >= get_uniforms().num_instances) { 11 | return; 12 | } 13 | 14 | Instance instance = 15 | InstanceBuffer(get_uniforms().instances).instances[instance_index]; 16 | MeshInfo mesh_info = MeshInfoBuffer(instance.mesh_info_address).mesh_info; 17 | 18 | if (cull_bounding_sphere(instance, mesh_info.bounding_sphere)) { 19 | return; 20 | } 21 | 22 | prefix_sum_inclusive_append( 23 | PrefixSumBuffer(get_uniforms().num_meshlets_prefix_sum), 24 | instance_index, 25 | mesh_info.num_meshlets 26 | ); 27 | } 28 | 29 | layout(local_size_x = 16, local_size_y = 4) in; 30 | 31 | void cull_instances_shadows() { 32 | uint32_t instance_index = gl_GlobalInvocationID.x; 33 | uint32_t cascade_index = gl_GlobalInvocationID.y; 34 | 35 | if (instance_index >= get_uniforms().num_instances) { 36 | return; 37 | } 38 | 39 | Instance instance = 40 | InstanceBuffer(get_uniforms().instances).instances[instance_index]; 41 | MeshInfo mesh_info = MeshInfoBuffer(instance.mesh_info_address).mesh_info; 42 | 43 | if (cull_bounding_sphere_shadows( 44 | instance, 45 | mesh_info.bounding_sphere, 46 | cascade_index 47 | )) { 48 | return; 49 | } 50 | 51 | prefix_sum_inclusive_append( 52 | prefix_sum_buffer_for_cascade(cascade_index), 53 | instance_index, 54 | mesh_info.num_meshlets 55 | ); 56 | } 57 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Lighthugger 🚀 2 | 3 | ![](readme/san_miguel_rendered.png) 4 | 5 | _A rendering of the San Miguel 2.0 scene from the [McGuire Computer Graphics Archive](https://casual-effects.com/data/index.html). CC BY 3.0._ 6 | 7 | - A modern Vulkan 1.3 renderer 8 | - Fully bindless with extensive use of Buffer Device Address (BDA), buffers are never bound. 9 | - Meshes are split up into (via [meshoptimizer](https://github.com/zeux/meshoptimizer)) 10 | - Instances are culled and a single-pass prefix sum over the number of meshlets in each instance is computed using a 64-bit atomic. 11 | - A per-meshlet indirect dispatch is run to further cull meshlets, essentially emulating mesh shaders in compute. 12 | - Triangles are rasterized into a [visibility buffer](http://filmicworlds.com/blog/visibility-buffer-rendering-with-material-graphs/), and lighting for the whole screen is resolved in a single compute pass. 13 | - Only block-compressed .DDS and .KTX2 textures are supported for extemely fast load times. 14 | - Min and Max depth values are computed each frame to tightly bind the cascaded shadowmap frustums. 15 | - Written in C++20 and [Vulkan-Hpp](https://github.com/KhronosGroup/Vulkan-Hpp). 16 | - GLSL shaders (I'd use HLSL if it had 8-bit int support and if atomics worked on unstructured buffers) 17 | 18 | **MIT license available upon request.** 19 | 20 | ![](readme/san_miguel_breakdown.png) 21 | 22 | _Clockwise from top-left: Rendered scene, visibility buffer triangle IDs, visibility buffer meshlet IDs, shadow cascades_ 23 | 24 | ![](readme/san_miguel_cascades.png) 25 | 26 | _Rendered view showing all 4 shadowmap cascades. Not that objects that fit in smaller cascades are culled from the larger ones._ 27 | 28 | This project started out with my writing a C++ Vulkan starter project. That code is available on the `starter` branch. 29 | -------------------------------------------------------------------------------- /src/shaders/common/hlsl4glsl.glsl: -------------------------------------------------------------------------------- 1 | #extension GL_EXT_shader_explicit_arithmetic_types : require 2 | #extension GL_EXT_buffer_reference : require 3 | #extension GL_EXT_scalar_block_layout : require 4 | #extension GL_EXT_samplerless_texture_functions : require 5 | #extension GL_EXT_nonuniform_qualifier : require 6 | #extension GL_ARB_shader_clock : require 7 | #extension GL_KHR_shader_subgroup_basic : require 8 | #extension GL_KHR_shader_subgroup_arithmetic : require 9 | #extension GL_EXT_control_flow_attributes : require 10 | #extension GL_EXT_shader_atomic_int64 : require 11 | 12 | #define float2 vec2 13 | #define float3 vec3 14 | #define float4 vec4 15 | #define float2x2 mat2 16 | #define float3x3 mat3 17 | #define float4x4 mat4 18 | #define uint32_t2 uvec2 19 | #define uint32_t3 uvec3 20 | #define uint32_t4 uvec4 21 | #define uint2 uvec2 22 | #define uint3 uvec3 23 | #define uint4 uvec4 24 | #define uint8_t3 u8vec3 25 | #define uint8_t4 u8vec4 26 | #define uint16_t2 u16vec2 27 | #define uint16_t3 u16vec3 28 | #define uint16_t4 u16vec4 29 | #define int8_t4 i8vec4 30 | #define int8_t3 i8vec3 31 | 32 | #define asuint floatBitsToUint 33 | #define asfloat uintBitsToFloat 34 | #define lerp mix 35 | 36 | #define static 37 | 38 | float3 rcp(float3 value) { 39 | return 1.0 / value; 40 | } 41 | 42 | float rcp(float value) { 43 | return 1.0 / value; 44 | } 45 | 46 | float select(bool boolean, float true_value, float false_value) { 47 | return mix(false_value, true_value, boolean); 48 | } 49 | 50 | uint32_t select(bool boolean, uint32_t true_value, uint32_t false_value) { 51 | return mix(false_value, true_value, boolean); 52 | } 53 | 54 | float3 select(bool boolean, float3 true_value, float3 false_value) { 55 | return mix(false_value, true_value, bvec3(boolean)); 56 | } 57 | 58 | float3 saturate(float3 value) { 59 | return clamp(value, float3(0), float3(1)); 60 | } 61 | -------------------------------------------------------------------------------- /src/shaders/common/buffer_references.glsl: -------------------------------------------------------------------------------- 1 | 2 | layout(buffer_reference, scalar) buffer MeshInfoBuffer { 3 | MeshInfo mesh_info; 4 | }; 5 | 6 | layout(buffer_reference, scalar) buffer MeshletReferenceBuffer { 7 | MeshletReference meshlet_reference[]; 8 | }; 9 | 10 | layout(buffer_reference, scalar) buffer MeshletBuffer { 11 | Meshlet meshlets[]; 12 | }; 13 | 14 | layout(buffer_reference, scalar, buffer_reference_align = 1) buffer 15 | MicroIndexBuffer { 16 | u8vec3 indices[]; 17 | }; 18 | 19 | layout(buffer_reference, scalar, buffer_reference_align = 1) buffer 20 | MicroIndexBufferSingle { 21 | uint8_t indices[]; 22 | }; 23 | 24 | layout(buffer_reference, scalar) buffer Index32Buffer { 25 | uint32_t indices[]; 26 | }; 27 | 28 | layout(buffer_reference, scalar, buffer_reference_align = 2) buffer 29 | Index16Buffer { 30 | uint16_t indices[]; 31 | }; 32 | 33 | layout(buffer_reference, scalar, buffer_reference_align = 2) buffer 34 | QuantizedPositionBuffer { 35 | uint16_t3 positions[]; 36 | }; 37 | 38 | layout(buffer_reference, scalar) buffer QuanitizedUvs { 39 | uint16_t2 uvs[]; 40 | }; 41 | 42 | layout(buffer_reference, scalar, buffer_reference_align = 1) buffer 43 | QuanitizedNormals { 44 | int8_t3 normals[]; 45 | }; 46 | 47 | layout(buffer_reference, scalar) buffer InstanceBuffer { 48 | Instance instances[]; 49 | }; 50 | 51 | layout(buffer_reference, scalar) buffer DrawCallBuffer { 52 | uint32_t num_opaque; 53 | uint32_t num_alpha_clip; 54 | DrawIndirectCommand draw_calls[]; 55 | }; 56 | 57 | layout(buffer_reference, scalar) buffer MiscStorageBuffer { 58 | MiscStorage misc_storage; 59 | }; 60 | 61 | layout(buffer_reference, scalar) buffer UniformsBuffer { 62 | Uniforms uniforms; 63 | }; 64 | 65 | layout(buffer_reference, scalar) buffer DispatchCommandsBuffer { 66 | DispatchIndirectCommand commands[]; 67 | }; 68 | -------------------------------------------------------------------------------- /src/shaders/common/debug.glsl: -------------------------------------------------------------------------------- 1 | const static float3 DEBUG_COLOURS[10] = { 2 | float3(0.0f / 255.0f, 2.0f / 255.0f, 91.0f / 255.0f), 3 | float3(0.0f / 255.0f, 108.0f / 255.0f, 251.0f / 255.0f), 4 | float3(0.0f / 255.0f, 221.0f / 255.0f, 221.0f / 255.0f), 5 | float3(51.0f / 255.0f, 221.0f / 255.0f, 0.0f / 255.0f), 6 | float3(255.0f / 255.0f, 252.0f / 255.0f, 0.0f / 255.0f), 7 | float3(255.0f / 255.0f, 180.0f / 255.0f, 0.0f / 255.0f), 8 | float3(255.0f / 255.0f, 104.0f / 255.0f, 0.0f / 255.0f), 9 | float3(226.0f / 255.0f, 22.0f / 255.0f, 0.0f / 255.0f), 10 | float3(191.0f / 255.0f, 0.0f / 255.0f, 83.0f / 255.0f), 11 | float3(145.0f / 255.0f, 0.0f / 255.0f, 65.0f / 255.0f)}; 12 | 13 | // https://developer.nvidia.com/blog/profiling-dxr-shaders-with-timer-instrumentation/ 14 | 15 | float3 temperature(float t) { 16 | const float s = t * 10.0f; 17 | 18 | const int cur = int(s) <= 9 ? int(s) : 9; 19 | const int prv = cur >= 1 ? cur - 1 : 0; 20 | const int nxt = cur < 9 ? cur + 1 : 9; 21 | 22 | const float blur = 0.8f; 23 | 24 | const float wc = smoothstep(float(cur) - blur, float(cur) + blur, s) 25 | * (1.0f - smoothstep(float(cur + 1) - blur, float(cur + 1) + blur, s)); 26 | const float wp = 1.0f - smoothstep(float(cur) - blur, float(cur) + blur, s); 27 | const float wn = 28 | smoothstep(float(cur + 1) - blur, float(cur + 1) + blur, s); 29 | 30 | const float3 r = wc * DEBUG_COLOURS[cur] + wp * DEBUG_COLOURS[prv] 31 | + wn * DEBUG_COLOURS[nxt]; 32 | return float3( 33 | clamp(r.x, 0.0f, 1.0f), 34 | clamp(r.y, 0.0f, 1.0f), 35 | clamp(r.z, 0.0f, 1.0f) 36 | ); 37 | } 38 | 39 | uint64_t timediff(uint64_t startTime, uint64_t endTime) { 40 | // Account for (at most one) overflow 41 | return endTime >= startTime ? (endTime - startTime) 42 | : (~0u - (startTime - endTime)); 43 | } 44 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.20) 2 | 3 | project(lighthugger) 4 | 5 | find_package(Vulkan REQUIRED) 6 | add_subdirectory(external/VulkanMemoryAllocator-Hpp) 7 | add_subdirectory(external/fastgltf) 8 | add_subdirectory(external/meshoptimizer) 9 | add_subdirectory(external/glfw) 10 | 11 | add_compile_definitions(lighthugger TRACY_ENABLE IMGUI_DEFINE_MATH_OPERATORS) 12 | file(GLOB_RECURSE SRC_FILES src/**.cpp) 13 | add_executable(lighthugger ${SRC_FILES}) 14 | 15 | target_sources(lighthugger PRIVATE 16 | # Tracy 17 | "external/tracy/public/TracyClient.cpp" 18 | # Imgui 19 | # https://vkguide.dev/docs/extra-chapter/implementing_imgui/ 20 | "external/imgui/imgui.cpp" 21 | "external/imgui/imgui_draw.cpp" 22 | "external/imgui/imgui_widgets.cpp" 23 | "external/imgui/imgui_tables.cpp" 24 | "external/imgui/imgui_demo.cpp" 25 | "external/imgui/backends/imgui_impl_glfw.cpp" 26 | "external/imgui/backends/imgui_impl_vulkan.cpp" 27 | ) 28 | 29 | target_include_directories(lighthugger PRIVATE src external/simple_vulkan_synchronization external/VulkanMemoryAllocator-Hpp/include external/VulkanMemoryAllocator-Hpp/VulkanMemoryAllocator/include external/dbg-macro external/tracy/public external/imgui external/meshoptimizer/src) 30 | target_link_libraries(lighthugger glfw Vulkan::Vulkan fastgltf meshoptimizer zstd) 31 | target_precompile_headers(lighthugger PRIVATE src/pch.h) 32 | target_compile_features(lighthugger PUBLIC cxx_std_20) 33 | target_compile_options(lighthugger PUBLIC -Wall -Wextra -Wpedantic -Wfatal-errors -fdiagnostics-color=always) 34 | 35 | # Use extra warnings when compiling with clang. 36 | target_compile_options(lighthugger PRIVATE 37 | $<$,$>: 38 | -Wall -Wextra -Wpedantic -Wfatal-errors -fdiagnostics-color=always -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-pre-c++20-compat-pedantic -Wno-extra-semi-stmt -Wno-missing-prototypes -Wno-double-promotion -Wno-switch-enum 39 | > 40 | ) 41 | -------------------------------------------------------------------------------- /src/shaders/common/prefix_sum.glsl: -------------------------------------------------------------------------------- 1 | //struct PrefixSumValue { 2 | // uint32_t index; 3 | // uint32_t sum; 4 | //}; 5 | 6 | layout(buffer_reference, scalar) buffer PrefixSumBuffer { 7 | uint64_t counter; 8 | PrefixSumValue values[]; 9 | }; 10 | 11 | // Prefix sum for 32-bit values using 64-bit atomics. 12 | // Super simple, inspired by Nabla's implementation: 13 | // https://github.com/Devsh-Graphics-Programming/Nabla/blob/8da4b980b5617802ea4e96bf101ddbfe94721a51/include/nbl/builtin/glsl/scanning_append/scanning_append.glsl 14 | // 15 | // See also https://research.nvidia.com/sites/default/files/pubs/2016-03_Single-pass-Parallel-Prefix/nvr-2016-002.pdf 16 | // apparently. 17 | void prefix_sum_inclusive_append( 18 | PrefixSumBuffer buf, 19 | uint32_t index, 20 | uint32_t value 21 | ) { 22 | uint64_t upper_bits_one = uint64_t(1) << uint64_t(32); 23 | uint64_t sum_and_counter = 24 | atomicAdd(buf.counter, uint64_t(value) | upper_bits_one); 25 | uint32_t buffer_index = uint32_t(sum_and_counter >> 32); 26 | 27 | buf.values[buffer_index].index = index; 28 | // Add the current value to make in inclusive rather than exclusive. 29 | buf.values[buffer_index].sum = uint32_t(sum_and_counter) + value; 30 | } 31 | 32 | // See https://en.cppreference.com/w/cpp/algorithm/upper_bound. 33 | PrefixSumValue prefix_sum_binary_search(PrefixSumBuffer buf, uint32_t target) { 34 | uint32_t count = uint32_t(buf.counter >> 32); 35 | uint32_t first = 0; 36 | 37 | while (count > 0) { 38 | uint32_t step = (count / 2); 39 | uint32_t current = first + step; 40 | bool greater = target >= buf.values[current].sum; 41 | first = select(greater, current + 1, first); 42 | count = select(greater, count - (step + 1), step); 43 | } 44 | 45 | return buf.values[first]; 46 | } 47 | 48 | void prefix_sum_reset(PrefixSumBuffer buf) { 49 | buf.counter = 0; 50 | } 51 | 52 | uint32_t prefix_sum_total(PrefixSumBuffer buf) { 53 | return uint32_t(buf.counter); 54 | } 55 | -------------------------------------------------------------------------------- /src/shaders/compute/reset_buffers.glsl: -------------------------------------------------------------------------------- 1 | #include "../common/bindings.glsl" 2 | #include "../common/util.glsl" 3 | 4 | layout(local_size_x = 1) in; 5 | 6 | const uint32_t UINT32_T_MAX_VALUE = 4294967295; 7 | 8 | // Before the prefix sum. 9 | void reset_buffers_a() { 10 | Uniforms uniforms = get_uniforms(); 11 | MiscStorageBuffer buf = MiscStorageBuffer(uniforms.misc_storage); 12 | buf.misc_storage.min_depth = UINT32_T_MAX_VALUE; 13 | buf.misc_storage.max_depth = 0; 14 | 15 | DispatchCommandsBuffer dispatches = 16 | DispatchCommandsBuffer(get_uniforms().dispatches); 17 | 18 | dispatches.commands[PER_INSTANCE_DISPATCH].x = 19 | dispatch_size(uniforms.num_instances, 64); 20 | dispatches.commands[PER_INSTANCE_DISPATCH].y = 1; 21 | dispatches.commands[PER_INSTANCE_DISPATCH].z = 1; 22 | 23 | dispatches.commands[PER_SHADOW_INSTANCE_DISPATCH].x = 24 | dispatch_size(uniforms.num_instances, 16); 25 | dispatches.commands[PER_SHADOW_INSTANCE_DISPATCH].y = 1; 26 | dispatches.commands[PER_SHADOW_INSTANCE_DISPATCH].z = 1; 27 | 28 | reset_counters(); 29 | } 30 | 31 | // After the prefix sum. 32 | void reset_buffers_b() { 33 | DispatchCommandsBuffer dispatches = 34 | DispatchCommandsBuffer(get_uniforms().dispatches); 35 | 36 | dispatches.commands[PER_MESHLET_DISPATCH].x = 37 | dispatch_size(total_num_meshlets_for_cascade(0), 64); 38 | dispatches.commands[PER_MESHLET_DISPATCH].y = 1; 39 | dispatches.commands[PER_MESHLET_DISPATCH].z = 1; 40 | } 41 | 42 | // After the shadow prefix sum. 43 | void reset_buffers_c() { 44 | DispatchCommandsBuffer dispatches = 45 | DispatchCommandsBuffer(get_uniforms().dispatches); 46 | 47 | dispatches.commands[PER_MESHLET_DISPATCH].x = dispatch_size( 48 | total_num_meshlets_for_cascade(shadow_constant.cascade_index), 49 | 64 50 | ); 51 | dispatches.commands[PER_MESHLET_DISPATCH].y = 1; 52 | dispatches.commands[PER_MESHLET_DISPATCH].z = 1; 53 | 54 | DrawCallBuffer draw_call_buf = DrawCallBuffer(get_uniforms().draw_calls); 55 | draw_call_buf.num_opaque = 0; 56 | draw_call_buf.num_alpha_clip = 0; 57 | } 58 | -------------------------------------------------------------------------------- /src/input.h: -------------------------------------------------------------------------------- 1 | #include "shared_cpu_gpu.h" 2 | 3 | struct KeyboardState { 4 | bool left; 5 | bool right; 6 | bool up; 7 | bool down; 8 | bool w; 9 | bool a; 10 | bool s; 11 | bool d; 12 | bool shift; 13 | bool control; 14 | bool grab_toggled; 15 | bool ui_toggled = true; 16 | }; 17 | 18 | struct CameraParams { 19 | glm::vec3 position; 20 | glm::vec3 velocity = glm::vec3(0.0); 21 | float fov; 22 | 23 | float yaw = 0.0; 24 | float pitch = 0.0; 25 | 26 | float sun_latitude; 27 | float sun_longitude; 28 | 29 | glm::vec2 sun_delta = glm::vec2(0.0); 30 | 31 | glm::vec3 facing() { 32 | return glm::vec3( 33 | cosf(yaw) * cosf(pitch), 34 | sinf(pitch), 35 | sinf(yaw) * cosf(pitch) 36 | ); 37 | } 38 | 39 | glm::vec3 right() { 40 | return glm::vec3(-sinf(yaw), 0.0, cosf(yaw)); 41 | } 42 | 43 | glm::vec3 sun_dir() { 44 | return glm::vec3( 45 | cosf(sun_latitude) * cosf(sun_longitude), 46 | sinf(sun_longitude), 47 | sinf(sun_latitude) * cosf(sun_longitude) 48 | ); 49 | } 50 | 51 | void update(KeyboardState& keyboard_state) { 52 | int sun_left_right = 53 | int(keyboard_state.right) - int(keyboard_state.left); 54 | int sun_up_down = int(keyboard_state.up) - int(keyboard_state.down); 55 | 56 | update( 57 | glm::ivec3( 58 | int(keyboard_state.d) - int(keyboard_state.a), 59 | int(keyboard_state.shift) - int(keyboard_state.control), 60 | int(keyboard_state.w) - int(keyboard_state.s) 61 | ), 62 | glm::ivec2(sun_left_right, sun_up_down) 63 | ); 64 | } 65 | 66 | void update(glm::ivec3 movement_vector, glm::ivec2 sun_vector); 67 | 68 | void rotate_camera(glm::dvec2 mouse_delta) { 69 | pitch -= static_cast(mouse_delta.y) / 1024.0f; 70 | yaw += static_cast(mouse_delta.x) / 1024.0f; 71 | pitch = std::clamp( 72 | pitch, 73 | -std::numbers::pi_v / 2.0f + 0.0001f, 74 | std::numbers::pi_v / 2.0f 75 | ); 76 | } 77 | }; 78 | 79 | void glfw_key_callback( 80 | GLFWwindow* window, 81 | int key, 82 | int /*scancode*/, 83 | int action, 84 | int /*mods*/ 85 | ); 86 | 87 | void draw_imgui_window( 88 | Uniforms* uniforms, 89 | CameraParams& camera_params, 90 | KeyboardState& keyboard_state, 91 | bool& copy_view 92 | ); 93 | -------------------------------------------------------------------------------- /src/resources/bounding_sphere.h: -------------------------------------------------------------------------------- 1 | // Adapted from meshoptimizer. 2 | 3 | static void 4 | computeBoundingSphere(float result[4], const float* points, size_t count) { 5 | assert(count > 0); 6 | 7 | // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates 8 | size_t pmin[3] = {0, 0, 0}; 9 | size_t pmax[3] = {0, 0, 0}; 10 | 11 | for (size_t i = 0; i < count; ++i) { 12 | const float* p = &points[i * 3]; 13 | 14 | for (int axis = 0; axis < 3; ++axis) { 15 | pmin[axis] = 16 | (p[axis] < points[3 * pmin[axis] + axis]) ? i : pmin[axis]; 17 | pmax[axis] = 18 | (p[axis] > points[3 * pmax[axis] + axis]) ? i : pmax[axis]; 19 | } 20 | } 21 | 22 | // find the pair of points with largest distance 23 | float paxisd2 = 0; 24 | int paxis = 0; 25 | 26 | for (int axis = 0; axis < 3; ++axis) { 27 | const float* p1 = &points[3 * pmin[axis]]; 28 | const float* p2 = &points[3 * pmax[axis]]; 29 | 30 | float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) 31 | + (p2[1] - p1[1]) * (p2[1] - p1[1]) 32 | + (p2[2] - p1[2]) * (p2[2] - p1[2]); 33 | 34 | if (d2 > paxisd2) { 35 | paxisd2 = d2; 36 | paxis = axis; 37 | } 38 | } 39 | 40 | // use the longest segment as the initial sphere diameter 41 | const float* p1 = &points[3 * pmin[paxis]]; 42 | const float* p2 = &points[3 * pmax[paxis]]; 43 | 44 | float center[3] = { 45 | (p1[0] + p2[0]) / 2, 46 | (p1[1] + p2[1]) / 2, 47 | (p1[2] + p2[2]) / 2}; 48 | float radius = sqrtf(paxisd2) / 2; 49 | 50 | // iteratively adjust the sphere up until all points fit 51 | for (size_t i = 0; i < count; ++i) { 52 | const float* p = &points[i * 3]; 53 | float d2 = (p[0] - center[0]) * (p[0] - center[0]) 54 | + (p[1] - center[1]) * (p[1] - center[1]) 55 | + (p[2] - center[2]) * (p[2] - center[2]); 56 | 57 | if (d2 > radius * radius) { 58 | float d = sqrtf(d2); 59 | assert(d > 0); 60 | 61 | float k = 0.5f + (radius / d) / 2; 62 | 63 | center[0] = center[0] * k + p[0] * (1 - k); 64 | center[1] = center[1] * k + p[1] * (1 - k); 65 | center[2] = center[2] * k + p[2] * (1 - k); 66 | radius = (radius + d) / 2; 67 | } 68 | } 69 | 70 | result[0] = center[0]; 71 | result[1] = center[1]; 72 | result[2] = center[2]; 73 | result[3] = radius; 74 | } 75 | -------------------------------------------------------------------------------- /src/shaders/common/util.glsl: -------------------------------------------------------------------------------- 1 | #include "prefix_sum.glsl" 2 | 3 | uint32_t load_index(MeshInfo mesh_info, uint32_t vertex_id) { 4 | if (bool(mesh_info.flags & MESH_INFO_FLAGS_32_BIT_INDICES)) { 5 | return Index32Buffer(mesh_info.indices).indices[vertex_id]; 6 | } else { 7 | return Index16Buffer(mesh_info.indices).indices[vertex_id]; 8 | } 9 | } 10 | 11 | float3 12 | calculate_world_pos(Instance instance, MeshInfo mesh_info, uint32_t index) { 13 | float3 position = 14 | QuantizedPositionBuffer(mesh_info.positions).positions[index]; 15 | return (instance.transform * float4(position, 1.0)).xyz; 16 | } 17 | 18 | uint32_t pack(uint32_t triangle_index, uint32_t instance_index) { 19 | return triangle_index << 24 | instance_index; 20 | } 21 | 22 | // Super easy. 23 | float convert_infinite_reverze_z_depth(float depth) { 24 | return NEAR_PLANE / depth; 25 | } 26 | 27 | PrefixSumBuffer prefix_sum_buffer_for_cascade(uint32_t cascade_index) { 28 | return PrefixSumBuffer( 29 | get_uniforms().num_meshlets_prefix_sum 30 | + cascade_index * PREFIX_SUM_BUFFER_SECTOR_SIZE 31 | ); 32 | } 33 | 34 | uint32_t total_num_meshlets_for_cascade(uint32_t cascade_index) { 35 | return prefix_sum_total(prefix_sum_buffer_for_cascade(cascade_index)); 36 | } 37 | 38 | uint32_t dispatch_size(uint32_t width, uint32_t workgroup_size) { 39 | return (select(width == 0, 1, width - 1) / workgroup_size) + 1; 40 | } 41 | 42 | MeshletReference 43 | get_meshlet_reference(uint32_t global_meshlet_index, uint32_t cascade_index) { 44 | PrefixSumValue result = prefix_sum_binary_search( 45 | prefix_sum_buffer_for_cascade(cascade_index), 46 | global_meshlet_index 47 | ); 48 | Instance instance = 49 | InstanceBuffer(get_uniforms().instances).instances[result.index]; 50 | MeshInfo mesh_info = MeshInfoBuffer(instance.mesh_info_address).mesh_info; 51 | 52 | uint32_t local_meshlet_index = 53 | global_meshlet_index - (result.sum - mesh_info.num_meshlets); 54 | 55 | MeshletReference reference; 56 | reference.instance_index = result.index; 57 | reference.meshlet_index = uint16_t(local_meshlet_index); 58 | return reference; 59 | } 60 | 61 | void reset_counters() { 62 | DrawCallBuffer draw_call_buf = DrawCallBuffer(get_uniforms().draw_calls); 63 | draw_call_buf.num_opaque = 0; 64 | draw_call_buf.num_alpha_clip = 0; 65 | 66 | prefix_sum_reset(prefix_sum_buffer_for_cascade(0)); 67 | prefix_sum_reset(prefix_sum_buffer_for_cascade(1)); 68 | prefix_sum_reset(prefix_sum_buffer_for_cascade(2)); 69 | prefix_sum_reset(prefix_sum_buffer_for_cascade(3)); 70 | } 71 | -------------------------------------------------------------------------------- /src/allocations/base.cpp: -------------------------------------------------------------------------------- 1 | #include "base.h" 2 | 3 | #include "../util.h" 4 | 5 | AllocatedImage::AllocatedImage(AllocatedImage&& other) { 6 | std::swap(image, other.image); 7 | std::swap(allocation, other.allocation); 8 | std::swap(allocator, other.allocator); 9 | } 10 | 11 | AllocatedImage& AllocatedImage::operator=(AllocatedImage&& other) { 12 | std::swap(image, other.image); 13 | std::swap(allocation, other.allocation); 14 | std::swap(allocator, other.allocator); 15 | return *this; 16 | } 17 | 18 | AllocatedImage::~AllocatedImage() { 19 | if (allocator) 20 | allocator.destroyImage(image, allocation); 21 | } 22 | 23 | AllocatedImage::AllocatedImage( 24 | vk::ImageCreateInfo create_info, 25 | vma::Allocator allocator_, 26 | const std::string& name 27 | ) { 28 | allocator = allocator_; 29 | vma::AllocationCreateInfo alloc_info = {.usage = vma::MemoryUsage::eAuto}; 30 | check_vk_result(allocator.createImage( 31 | &create_info, 32 | &alloc_info, 33 | &image, 34 | &allocation, 35 | nullptr 36 | )); 37 | 38 | auto device = allocator.getAllocatorInfo().device; 39 | device.setDebugUtilsObjectNameEXT(vk::DebugUtilsObjectNameInfoEXT { 40 | .objectType = vk::ObjectType::eImage, 41 | .objectHandle = reinterpret_cast(&*image), 42 | .pObjectName = name.data()}); 43 | } 44 | 45 | AllocatedBuffer::AllocatedBuffer(AllocatedBuffer&& other) { 46 | std::swap(buffer, other.buffer); 47 | std::swap(allocation, other.allocation); 48 | std::swap(allocator, other.allocator); 49 | } 50 | 51 | AllocatedBuffer::~AllocatedBuffer() { 52 | if (allocator) 53 | allocator.destroyBuffer(buffer, allocation); 54 | } 55 | 56 | AllocatedBuffer& AllocatedBuffer::operator=(AllocatedBuffer&& other) { 57 | std::swap(buffer, other.buffer); 58 | std::swap(allocation, other.allocation); 59 | std::swap(allocator, other.allocator); 60 | return *this; 61 | } 62 | 63 | AllocatedBuffer::AllocatedBuffer( 64 | vk::BufferCreateInfo create_info, 65 | vma::AllocationCreateInfo alloc_info, 66 | vma::Allocator allocator_, 67 | const std::string& name 68 | ) { 69 | allocator = allocator_; 70 | check_vk_result(allocator.createBuffer( 71 | &create_info, 72 | &alloc_info, 73 | &buffer, 74 | &allocation, 75 | nullptr 76 | )); 77 | auto device = allocator.getAllocatorInfo().device; 78 | device.setDebugUtilsObjectNameEXT(vk::DebugUtilsObjectNameInfoEXT { 79 | .objectType = vk::ObjectType::eBuffer, 80 | .objectHandle = reinterpret_cast(&*buffer), 81 | .pObjectName = name.data()}); 82 | } 83 | -------------------------------------------------------------------------------- /src/util.cpp: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | 3 | // https://github.com/KhronosGroup/Vulkan-Hpp/blob/64539664151311b63485a42277db7ffaba1c0c63/samples/RayTracing/RayTracing.cpp#L539-L549 4 | void check_vk_result(vk::Result err) { 5 | if (err != vk::Result::eSuccess) { 6 | dbg(vk::to_string(err)); 7 | if (err < vk::Result::eSuccess) { 8 | abort(); 9 | } 10 | } 11 | } 12 | 13 | std::vector create_and_name_swapchain_image_views( 14 | const vk::raii::Device& device, 15 | const std::vector& swapchain_images, 16 | vk::Format swapchain_format 17 | ) { 18 | std::vector views; 19 | 20 | for (size_t i = 0; i < swapchain_images.size(); i++) { 21 | vk::Image image = swapchain_images[i]; 22 | VkImage c_image = image; 23 | std::string name = std::string("swapchain image ") + std::to_string(i); 24 | device.setDebugUtilsObjectNameEXT(vk::DebugUtilsObjectNameInfoEXT { 25 | .objectType = vk::ObjectType::eImage, 26 | .objectHandle = reinterpret_cast(c_image), 27 | .pObjectName = name.data()}); 28 | 29 | vk::raii::ImageView view = device.createImageView( 30 | {.image = image, 31 | .viewType = vk::ImageViewType::e2D, 32 | .format = swapchain_format, 33 | .subresourceRange = COLOR_SUBRESOURCE_RANGE} 34 | ); 35 | 36 | VkImageView c_image_view = *view; 37 | name = std::string("swapchain image view ") + std::to_string(i); 38 | device.setDebugUtilsObjectNameEXT(vk::DebugUtilsObjectNameInfoEXT { 39 | .objectType = vk::ObjectType::eImageView, 40 | .objectHandle = reinterpret_cast(c_image_view), 41 | .pObjectName = name.data()}); 42 | 43 | views.push_back(std::move(view)); 44 | } 45 | 46 | return views; 47 | } 48 | 49 | vk::DescriptorBufferInfo buffer_info(const AllocatedBuffer& buffer) { 50 | return vk::DescriptorBufferInfo { 51 | .buffer = buffer.buffer, 52 | .offset = 0, 53 | .range = VK_WHOLE_SIZE}; 54 | } 55 | 56 | uint32_t dispatch_size(uint32_t width, uint32_t workgroup_size) { 57 | return ((width - 1) / workgroup_size) + 1; 58 | } 59 | 60 | std::vector read_file_to_bytes(const std::filesystem::path& filepath) { 61 | std::ifstream stream(filepath, std::ios::binary); 62 | 63 | if (!stream) { 64 | dbg(filepath); 65 | abort(); 66 | } 67 | 68 | stream.seekg(0, stream.end); 69 | auto length = stream.tellg(); 70 | stream.seekg(0, stream.beg); 71 | 72 | std::vector contents(length); 73 | stream.read((char*)contents.data(), length); 74 | 75 | return contents; 76 | } 77 | -------------------------------------------------------------------------------- /src/shaders/display_transform.comp: -------------------------------------------------------------------------------- 1 | #include "common/bindings.glsl" 2 | 3 | layout(set = 1, binding = 0) uniform writeonly image2D swapchain_image; 4 | 5 | float3 tony_mc_mapface(float3 stimulus) { 6 | // Apply a non-linear transform that the LUT is encoded with. 7 | const float3 encoded = stimulus / (stimulus + 1.0); 8 | 9 | // Align the encoded range to texel centers. 10 | const float LUT_DIMS = 48.0; 11 | const float3 uv = encoded * ((LUT_DIMS - 1.0) / LUT_DIMS) + 0.5 / LUT_DIMS; 12 | 13 | // Note: for OpenGL, do `uv.y = 1.0 - uv.y` 14 | 15 | return textureLod(sampler3D(display_transform_lut, clamp_sampler), uv, 0.0) 16 | .xyz; 17 | } 18 | 19 | float linear_to_srgb_transfer_function(float value) { 20 | return select( 21 | value <= 0.003130, 22 | value * 12.92, 23 | 1.055 * pow(value, 1.0 / 2.4) - 0.055 24 | ); 25 | } 26 | 27 | float3 linear_to_srgb_transfer_function(float3 value) { 28 | return float3( 29 | linear_to_srgb_transfer_function(value.x), 30 | linear_to_srgb_transfer_function(value.y), 31 | linear_to_srgb_transfer_function(value.z) 32 | ); 33 | } 34 | 35 | layout(local_size_x = 8, local_size_y = 8) in; 36 | 37 | void display_transform() { 38 | Uniforms uniforms = get_uniforms(); 39 | 40 | if (gl_GlobalInvocationID.x >= uniforms.window_size.x 41 | || gl_GlobalInvocationID.y >= uniforms.window_size.y) { 42 | return; 43 | } 44 | 45 | ivec2 coord = ivec2(gl_GlobalInvocationID.xy); 46 | 47 | float3 stimulus = texelFetch(scene_referred_framebuffer, coord, 0).rgb; 48 | 49 | float3 linear_display_referred_value = tony_mc_mapface(stimulus); 50 | 51 | if (uniforms.debug_shadowmaps) { 52 | float2 uv = (float2(gl_GlobalInvocationID.xy) + 0.5) 53 | / float2(uniforms.window_size); 54 | 55 | float shadow_screen_percentage = 0.5; 56 | 57 | if (uv.x < shadow_screen_percentage 58 | && uv.y < shadow_screen_percentage) { 59 | float2 shadow_uv = uv * 2 / shadow_screen_percentage; 60 | 61 | uint32_t cascade_index = 62 | uint32_t(shadow_uv.y > 1.0) * 2 + uint32_t(shadow_uv.x > 1.0); 63 | 64 | linear_display_referred_value = 65 | (textureLod( 66 | sampler2DArray(shadowmap, clamp_sampler), 67 | float3(mod(shadow_uv, float2(1.0)), cascade_index), 68 | 0.0 69 | ) 70 | .xxx 71 | - 0.5) 72 | * 100 73 | + 0.5; 74 | } 75 | } 76 | 77 | float3 srgb_encoded_value = 78 | linear_to_srgb_transfer_function(linear_display_referred_value); 79 | 80 | imageStore(swapchain_image, coord, float4(srgb_encoded_value, 1.0)); 81 | } 82 | -------------------------------------------------------------------------------- /src/shaders/compute/read_depth.glsl: -------------------------------------------------------------------------------- 1 | #include "../common/bindings.glsl" 2 | 3 | uint32_t min_if_not_zero(uint32_t a, uint32_t b) { 4 | // Use HLSL 2021's `select` instead of `?:` to avoid branches. 5 | // Not sure if it matters. 6 | return select(a != 0u, select(b != 0u, min(a, b), a), b); 7 | } 8 | 9 | uint32_t max4(uint4 values) { 10 | return max(max(values.x, values.y), max(values.z, values.w)); 11 | } 12 | 13 | uint32_t min4(uint4 values) { 14 | return min_if_not_zero( 15 | min_if_not_zero(values.x, values.y), 16 | min_if_not_zero(values.z, values.w) 17 | ); 18 | } 19 | 20 | layout(local_size_x = 8, local_size_y = 8) in; 21 | 22 | void read_depth() { 23 | Uniforms uniforms = get_uniforms(); 24 | 25 | uint3 global_id = gl_GlobalInvocationID; 26 | if (global_id.x >= uniforms.window_size.x 27 | || global_id.y >= uniforms.window_size.y) { 28 | return; 29 | } 30 | 31 | float2 pixel_size = 1.0 / float2(uniforms.window_size); 32 | 33 | // Sample the depth values for a 4x4 block. 34 | uint2 coord = global_id.xy * 4; 35 | 36 | uint4 depth_1 = asuint(textureGather( 37 | sampler2D(depth_buffer, clamp_sampler), 38 | (coord + uint2(1, 1)) * pixel_size 39 | )); 40 | uint4 depth_2 = asuint(textureGather( 41 | sampler2D(depth_buffer, clamp_sampler), 42 | (coord + uint2(1, 3)) * pixel_size 43 | )); 44 | uint4 depth_3 = asuint(textureGather( 45 | sampler2D(depth_buffer, clamp_sampler), 46 | (coord + uint2(3, 1)) * pixel_size 47 | )); 48 | uint4 depth_4 = asuint(textureGather( 49 | sampler2D(depth_buffer, clamp_sampler), 50 | (coord + uint2(3, 3)) * pixel_size 51 | )); 52 | 53 | // min the values, trying to avoid propagating zeros. 54 | uint32_t depth_min = 55 | min4(uint4(min4(depth_1), min4(depth_2), min4(depth_3), min4(depth_4))); 56 | 57 | if (depth_min != 0) { 58 | // Min all values in the subgroup, 59 | uint32_t subgroup_min = subgroupMin(depth_min); 60 | 61 | // https://www.khronos.org/assets/uploads/developers/library/2018-vulkan-devday/06-subgroups.pdf 62 | // equiv of subgroup elect 63 | // Note: naming is ambiguous but this means the first 64 | // _active_ lane. 65 | if (subgroupElect()) { 66 | atomicMin( 67 | MiscStorageBuffer(uniforms.misc_storage).misc_storage.min_depth, 68 | subgroup_min 69 | ); 70 | } 71 | } 72 | 73 | uint32_t depth_max = 74 | max4(uint4(max4(depth_1), max4(depth_2), max4(depth_3), max4(depth_4))); 75 | uint32_t subgroup_max = subgroupMax(depth_max); 76 | 77 | if (subgroupElect()) { 78 | atomicMax( 79 | MiscStorageBuffer(uniforms.misc_storage).misc_storage.max_depth, 80 | subgroup_max 81 | ); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | # From https://gist.github.com/YodaEmbedding/c2c77dc693d11f3734d78489f9a6eea4 2 | AccessModifierOffset: -2 3 | AlignAfterOpenBracket: BlockIndent # New in v14. For earlier clang-format versions, use AlwaysBreak instead. 4 | AlignConsecutiveMacros: false 5 | AlignConsecutiveAssignments: false 6 | AlignConsecutiveDeclarations: false 7 | AlignEscapedNewlines: DontAlign 8 | AlignOperands: false 9 | AlignTrailingComments: false 10 | AllowAllArgumentsOnNextLine: false 11 | AllowAllConstructorInitializersOnNextLine: false 12 | AllowAllParametersOfDeclarationOnNextLine: false 13 | AllowShortBlocksOnASingleLine: Empty 14 | AllowShortCaseLabelsOnASingleLine: false 15 | AllowShortFunctionsOnASingleLine: Empty 16 | AllowShortIfStatementsOnASingleLine: Never 17 | AllowShortLambdasOnASingleLine: All 18 | AllowShortLoopsOnASingleLine: false 19 | AlwaysBreakAfterReturnType: None 20 | AlwaysBreakBeforeMultilineStrings: true 21 | AlwaysBreakTemplateDeclarations: Yes 22 | BinPackArguments: false 23 | BinPackParameters: false 24 | BreakBeforeBinaryOperators: NonAssignment 25 | BreakBeforeBraces: Attach 26 | BreakBeforeTernaryOperators: true 27 | BreakConstructorInitializers: AfterColon 28 | BreakInheritanceList: AfterColon 29 | BreakStringLiterals: false 30 | ColumnLimit: 80 31 | CompactNamespaces: false 32 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 33 | ConstructorInitializerIndentWidth: 4 34 | ContinuationIndentWidth: 4 35 | Cpp11BracedListStyle: true 36 | DerivePointerAlignment: false 37 | FixNamespaceComments: true 38 | IncludeBlocks: Regroup 39 | IncludeCategories: 40 | - Regex: '^' 41 | Priority: 2 42 | SortPriority: 0 43 | CaseSensitive: false 44 | - Regex: '^<.*\.h>' 45 | Priority: 1 46 | SortPriority: 0 47 | CaseSensitive: false 48 | - Regex: '^<.*' 49 | Priority: 2 50 | SortPriority: 0 51 | CaseSensitive: false 52 | - Regex: '.*' 53 | Priority: 3 54 | SortPriority: 0 55 | CaseSensitive: false 56 | IncludeIsMainRegex: '([-_](test|unittest))?$' 57 | IndentCaseLabels: true 58 | IndentPPDirectives: BeforeHash 59 | IndentWidth: 4 60 | IndentWrappedFunctionNames: false 61 | KeepEmptyLinesAtTheStartOfBlocks: false 62 | MaxEmptyLinesToKeep: 1 63 | NamespaceIndentation: Inner 64 | PointerAlignment: Left 65 | ReferenceAlignment: Left # New in v13. int &name ==> int& name 66 | ReflowComments: false 67 | SeparateDefinitionBlocks: Always # New in v14. 68 | SortIncludes: true 69 | SortUsingDeclarations: true 70 | SpaceAfterCStyleCast: false 71 | SpaceAfterLogicalNot: false 72 | SpaceAfterTemplateKeyword: false 73 | SpaceBeforeAssignmentOperators: true 74 | SpaceBeforeCpp11BracedList: true 75 | SpaceBeforeCtorInitializerColon: true 76 | SpaceBeforeInheritanceColon: false 77 | SpaceBeforeParens: ControlStatements 78 | SpaceBeforeRangeBasedForLoopColon: true 79 | SpaceInEmptyParentheses: false 80 | SpacesBeforeTrailingComments: 2 81 | SpacesInAngles: false 82 | SpacesInCStyleCastParentheses: false 83 | SpacesInContainerLiterals: false 84 | SpacesInParentheses: false 85 | SpacesInSquareBrackets: false 86 | Standard: Cpp11 87 | TabWidth: 4 88 | UseTab: Never 89 | -------------------------------------------------------------------------------- /src/shaders/common/pbr.glsl: -------------------------------------------------------------------------------- 1 | const static float PI = 3.14159265358979323846264338327950288; 2 | 3 | float F_Schlick(float u, float f0, float f90) { 4 | return f0 + (f90 - f0) * pow(1.0 - u, 5.0); 5 | } 6 | 7 | float Fd_Burley(float NoV, float NoL, float LoH, float roughness) { 8 | float f90 = 0.5 + 2.0 * roughness * LoH * LoH; 9 | float lightScatter = F_Schlick(NoL, 1.0, f90); 10 | float viewScatter = F_Schlick(NoV, 1.0, f90); 11 | return lightScatter * viewScatter * (1.0 / PI); 12 | } 13 | 14 | float D_GGX(float NoH, float a) { 15 | float a2 = a * a; 16 | float f = (NoH * a2 - NoH) * NoH + 1.0; 17 | return a2 / (PI * f * f); 18 | } 19 | 20 | float3 F_Schlick(float u, float3 f0) { 21 | return f0 + (float3(1.0, 1.0, 1.0) - f0) * pow(1.0 - u, 5.0); 22 | } 23 | 24 | float V_SmithGGXCorrelated(float NoV, float NoL, float a) { 25 | float a2 = a * a; 26 | float GGXL = NoV * sqrt((-NoL * a2 + NoL) * NoL + a2); 27 | float GGXV = NoL * sqrt((-NoV * a2 + NoV) * NoV + a2); 28 | return 0.5 / (GGXV + GGXL); 29 | } 30 | 31 | float Fd_Lambert() { 32 | return 1.0 / PI; 33 | } 34 | 35 | float reflectance_for_ior(float ior) { 36 | float reflectance_sqrt = (ior - 1.0) / (ior + 1.0); 37 | return reflectance_sqrt * reflectance_sqrt; 38 | } 39 | 40 | float3 diffuse_color(float3 base_color, float metallic) { 41 | return (1.0 - metallic) * base_color; 42 | } 43 | 44 | float3 BRDF( 45 | float3 v, 46 | float3 l, 47 | float3 n, 48 | float perceptualRoughness, 49 | float metallic, 50 | float3 baseColor, 51 | bool thin_walled 52 | ) { 53 | float3 h = normalize(v + l); 54 | 55 | float3 diffuseColor = diffuse_color(baseColor, metallic); 56 | 57 | float3 f0 = 58 | reflectance_for_ior(1.5) * (1.0 - metallic) + baseColor * metallic; 59 | 60 | float NoV = clamp(dot(n, v), 1e-5, 1.0); 61 | float NoL = clamp(dot(n, l), 0.0, 1.0); 62 | float NoH = clamp(dot(n, h), 0.0, 1.0); 63 | float LoH = clamp(dot(l, h), 0.0, 1.0); 64 | 65 | // perceptually linear roughness to roughness (see parameterization) 66 | float roughness = perceptualRoughness * perceptualRoughness; 67 | 68 | float D = D_GGX(NoH, roughness); 69 | float3 F = F_Schlick(LoH, f0); 70 | float V = V_SmithGGXCorrelated(NoV, NoL, roughness); 71 | 72 | // specular BRDF 73 | float3 Fr = (D * V) * F; 74 | 75 | // diffuse BRDF 76 | float3 Fd = diffuseColor * Fd_Burley(NoV, NoL, LoH, roughness); 77 | 78 | float3 irradiance = (Fd + Fr) * NoL; 79 | 80 | // Clamp at 0 so that the thin walled stuff isn't fighting against negative values. 81 | irradiance = max(irradiance, float3(0)); 82 | 83 | // Thin walled 84 | // todo: use oren-nayar instead. 85 | float rev_NoV = clamp(dot(-n, v), 1e-5, 1.0); 86 | float rev_NoL = clamp(dot(-n, l), 0.0, 1.0); 87 | irradiance += select( 88 | thin_walled, 89 | diffuseColor * Fd_Burley(rev_NoV, rev_NoL, LoH, roughness) * rev_NoL, 90 | float3(0) 91 | ); 92 | 93 | // Guard against divisions by zero. Mostly from 94 | // `V_SmithGGXCorrelated` and (I think) backfacing geometry. 95 | return mix(irradiance, float3(0.0), isnan(irradiance)); 96 | } 97 | -------------------------------------------------------------------------------- /src/allocations/staging.cpp: -------------------------------------------------------------------------------- 1 | #include "staging.h" 2 | 3 | AllocatedBuffer upload_via_staging_buffer( 4 | const void* bytes, 5 | size_t num_bytes, 6 | vma::Allocator allocator, 7 | vk::BufferUsageFlags desired_flags, 8 | const std::string& name, 9 | const vk::raii::CommandBuffer& command_buffer, 10 | std::vector& temp_buffers 11 | ) { 12 | auto staging_buffer_name = name + " staging buffer"; 13 | auto staging_buffer = PersistentlyMappedBuffer(AllocatedBuffer( 14 | vk::BufferCreateInfo { 15 | .size = num_bytes, 16 | .usage = vk::BufferUsageFlagBits::eTransferSrc}, 17 | { 18 | .flags = vma::AllocationCreateFlagBits::eMapped 19 | | vma::AllocationCreateFlagBits::eHostAccessSequentialWrite, 20 | .usage = vma::MemoryUsage::eAuto, 21 | }, 22 | allocator, 23 | staging_buffer_name 24 | )); 25 | std::memcpy(staging_buffer.mapped_ptr, bytes, num_bytes); 26 | 27 | auto final_buffer = AllocatedBuffer( 28 | vk::BufferCreateInfo { 29 | .size = num_bytes, 30 | .usage = vk::BufferUsageFlagBits::eTransferDst | desired_flags}, 31 | { 32 | .usage = vma::MemoryUsage::eAuto, 33 | }, 34 | allocator, 35 | name 36 | ); 37 | 38 | command_buffer.copyBuffer( 39 | staging_buffer.buffer.buffer, 40 | final_buffer.buffer, 41 | {vk::BufferCopy {.srcOffset = 0, .dstOffset = 0, .size = num_bytes}} 42 | ); 43 | 44 | temp_buffers.push_back(std::move(staging_buffer.buffer)); 45 | 46 | return final_buffer; 47 | } 48 | 49 | std::pair upload_from_file_via_staging_buffer( 50 | std::ifstream stream, 51 | vma::Allocator allocator, 52 | vk::BufferUsageFlags desired_flags, 53 | const std::string& name, 54 | const vk::raii::CommandBuffer& command_buffer, 55 | std::vector& temp_buffers 56 | ) { 57 | stream.seekg(0, stream.end); 58 | uint32_t length = stream.tellg(); 59 | stream.seekg(0, stream.beg); 60 | auto staging_buffer_name = name + " staging buffer"; 61 | auto staging_buffer = PersistentlyMappedBuffer(AllocatedBuffer( 62 | vk::BufferCreateInfo { 63 | .size = length, 64 | .usage = vk::BufferUsageFlagBits::eTransferSrc}, 65 | { 66 | .flags = vma::AllocationCreateFlagBits::eMapped 67 | | vma::AllocationCreateFlagBits::eHostAccessSequentialWrite, 68 | .usage = vma::MemoryUsage::eAuto, 69 | }, 70 | allocator, 71 | staging_buffer_name 72 | )); 73 | stream.read((char*)staging_buffer.mapped_ptr, length); 74 | auto final_buffer = AllocatedBuffer( 75 | vk::BufferCreateInfo { 76 | .size = length, 77 | .usage = vk::BufferUsageFlagBits::eTransferDst | desired_flags}, 78 | { 79 | .usage = vma::MemoryUsage::eAuto, 80 | }, 81 | allocator, 82 | name 83 | ); 84 | 85 | command_buffer.copyBuffer( 86 | staging_buffer.buffer.buffer, 87 | final_buffer.buffer, 88 | {vk::BufferCopy {.srcOffset = 0, .dstOffset = 0, .size = length}} 89 | ); 90 | 91 | temp_buffers.push_back(std::move(staging_buffer.buffer)); 92 | 93 | return std::make_pair(std::move(final_buffer), length); 94 | } 95 | -------------------------------------------------------------------------------- /src/sync.h: -------------------------------------------------------------------------------- 1 | #include "util.h" 2 | 3 | // Make inserting color image transition barriers easier. 4 | 5 | struct ImageBarrier { 6 | ThsvsAccessType prev_access; 7 | ThsvsAccessType next_access; 8 | ThsvsImageLayout prev_layout = THSVS_IMAGE_LAYOUT_OPTIMAL; 9 | ThsvsImageLayout next_layout = THSVS_IMAGE_LAYOUT_OPTIMAL; 10 | bool discard_contents = false; 11 | uint32_t queue_family; 12 | vk::Image image; 13 | vk::ImageSubresourceRange subresource_range = COLOR_SUBRESOURCE_RANGE; 14 | }; 15 | 16 | template 17 | struct GlobalBarrier { 18 | std::array prev_accesses; 19 | std::array next_accesses; 20 | }; 21 | 22 | template 23 | void insert_color_image_barriers( 24 | const vk::raii::CommandBuffer& command_buffer, 25 | const std::array& barriers, 26 | std::optional> opt_global_barrier = std::nullopt 27 | ) { 28 | std::array thsvs_barriers; 29 | 30 | for (size_t i = 0; i < thsvs_barriers.size(); i++) { 31 | thsvs_barriers[i] = { 32 | .prevAccessCount = 1, 33 | .pPrevAccesses = &barriers[i].prev_access, 34 | .nextAccessCount = 1, 35 | .pNextAccesses = &barriers[i].next_access, 36 | .prevLayout = barriers[i].prev_layout, 37 | .nextLayout = barriers[i].next_layout, 38 | .discardContents = barriers[i].discard_contents, 39 | .srcQueueFamilyIndex = barriers[i].queue_family, 40 | .dstQueueFamilyIndex = barriers[i].queue_family, 41 | .image = barriers[i].image, 42 | .subresourceRange = barriers[i].subresource_range}; 43 | } 44 | 45 | std::optional thsvs_global_barrier = std::nullopt; 46 | 47 | if (opt_global_barrier) { 48 | auto& global_barrier = opt_global_barrier.value(); 49 | 50 | thsvs_global_barrier = { 51 | .prevAccessCount = 52 | static_cast(global_barrier.prev_accesses.size()), 53 | .pPrevAccesses = global_barrier.prev_accesses.data(), 54 | .nextAccessCount = 55 | static_cast(global_barrier.prev_accesses.size()), 56 | .pNextAccesses = global_barrier.next_accesses.data()}; 57 | } 58 | 59 | thsvsCmdPipelineBarrier( 60 | *command_buffer, 61 | thsvs_global_barrier.has_value() ? &thsvs_global_barrier.value() 62 | : nullptr, 63 | 0, 64 | nullptr, 65 | thsvs_barriers.size(), 66 | thsvs_barriers.data() 67 | ); 68 | } 69 | 70 | template 71 | void insert_global_barrier( 72 | const vk::raii::CommandBuffer& command_buffer, 73 | GlobalBarrier global_barrier 74 | ) { 75 | ThsvsGlobalBarrier thsvs_global_barrier = { 76 | .prevAccessCount = 77 | static_cast(global_barrier.prev_accesses.size()), 78 | .pPrevAccesses = global_barrier.prev_accesses.data(), 79 | .nextAccessCount = 80 | static_cast(global_barrier.prev_accesses.size()), 81 | .pNextAccesses = global_barrier.next_accesses.data()}; 82 | 83 | thsvsCmdPipelineBarrier( 84 | *command_buffer, 85 | &thsvs_global_barrier, 86 | 0, 87 | nullptr, 88 | 0, 89 | nullptr 90 | ); 91 | } 92 | -------------------------------------------------------------------------------- /src/shaders/common/vbuffer.glsl: -------------------------------------------------------------------------------- 1 | 2 | // Copied from http://filmicworlds.com/blog/visibility-buffer-rendering-with-material-graphs/. 3 | // Edits are marked. 4 | 5 | struct BarycentricDeriv { 6 | float3 m_lambda; 7 | float3 m_ddx; 8 | float3 m_ddy; 9 | }; 10 | 11 | BarycentricDeriv CalcFullBary( 12 | float4 pt0, 13 | float4 pt1, 14 | float4 pt2, 15 | float2 pixelNdc, 16 | float2 winSize 17 | ) { 18 | // EDIT: got rid of the cast. 19 | BarycentricDeriv ret; 20 | 21 | float3 invW = rcp(float3(pt0.w, pt1.w, pt2.w)); 22 | 23 | float2 ndc0 = pt0.xy * invW.x; 24 | float2 ndc1 = pt1.xy * invW.y; 25 | float2 ndc2 = pt2.xy * invW.z; 26 | 27 | float invDet = rcp(determinant(float2x2(ndc2 - ndc1, ndc0 - ndc1))); 28 | ret.m_ddx = float3(ndc1.y - ndc2.y, ndc2.y - ndc0.y, ndc0.y - ndc1.y) 29 | * invDet * invW; 30 | ret.m_ddy = float3(ndc2.x - ndc1.x, ndc0.x - ndc2.x, ndc1.x - ndc0.x) 31 | * invDet * invW; 32 | float ddxSum = dot(ret.m_ddx, float3(1, 1, 1)); 33 | float ddySum = dot(ret.m_ddy, float3(1, 1, 1)); 34 | 35 | float2 deltaVec = pixelNdc - ndc0; 36 | float interpInvW = invW.x + deltaVec.x * ddxSum + deltaVec.y * ddySum; 37 | float interpW = rcp(interpInvW); 38 | 39 | ret.m_lambda.x = interpW 40 | * (invW[0] + deltaVec.x * ret.m_ddx.x + deltaVec.y * ret.m_ddy.x); 41 | ret.m_lambda.y = 42 | interpW * (0.0f + deltaVec.x * ret.m_ddx.y + deltaVec.y * ret.m_ddy.y); 43 | ret.m_lambda.z = 44 | interpW * (0.0f + deltaVec.x * ret.m_ddx.z + deltaVec.y * ret.m_ddy.z); 45 | 46 | ret.m_ddx *= (2.0f / winSize.x); 47 | ret.m_ddy *= (2.0f / winSize.y); 48 | ddxSum *= (2.0f / winSize.x); 49 | ddySum *= (2.0f / winSize.y); 50 | 51 | ret.m_ddy *= -1.0f; 52 | ddySum *= -1.0f; 53 | 54 | float interpW_ddx = 1.0f / (interpInvW + ddxSum); 55 | float interpW_ddy = 1.0f / (interpInvW + ddySum); 56 | 57 | ret.m_ddx = 58 | interpW_ddx * (ret.m_lambda * interpInvW + ret.m_ddx) - ret.m_lambda; 59 | ret.m_ddy = 60 | interpW_ddy * (ret.m_lambda * interpInvW + ret.m_ddy) - ret.m_lambda; 61 | 62 | return ret; 63 | } 64 | 65 | float3 66 | InterpolateWithDeriv(BarycentricDeriv deriv, float v0, float v1, float v2) { 67 | float3 mergedV = float3(v0, v1, v2); 68 | float3 ret; 69 | ret.x = dot(mergedV, deriv.m_lambda); 70 | ret.y = dot(mergedV, deriv.m_ddx); 71 | ret.z = dot(mergedV, deriv.m_ddy); 72 | return ret; 73 | } 74 | 75 | // Helper structs. 76 | 77 | struct InterpolatedVector_float3 { 78 | float3 value; 79 | float3 dx; 80 | float3 dy; 81 | }; 82 | 83 | struct InterpolatedVector_float2 { 84 | float2 value; 85 | float2 dx; 86 | float2 dy; 87 | }; 88 | 89 | InterpolatedVector_float2 90 | interpolate(BarycentricDeriv deriv, float2 v0, float2 v1, float2 v2) { 91 | InterpolatedVector_float2 interp; 92 | float3 x = InterpolateWithDeriv(deriv, v0.x, v1.x, v2.x); 93 | interp.value.x = x.x; 94 | interp.dx.x = x.y; 95 | interp.dy.x = x.z; 96 | float3 y = InterpolateWithDeriv(deriv, v0.y, v1.y, v2.y); 97 | interp.value.y = y.x; 98 | interp.dx.y = y.y; 99 | interp.dy.y = y.z; 100 | return interp; 101 | } 102 | 103 | InterpolatedVector_float3 104 | interpolate(BarycentricDeriv deriv, float3 v0, float3 v1, float3 v2) { 105 | InterpolatedVector_float3 interp; 106 | float3 x = InterpolateWithDeriv(deriv, v0.x, v1.x, v2.x); 107 | interp.value.x = x.x; 108 | interp.dx.x = x.y; 109 | interp.dy.x = x.z; 110 | float3 y = InterpolateWithDeriv(deriv, v0.y, v1.y, v2.y); 111 | interp.value.y = y.x; 112 | interp.dx.y = y.y; 113 | interp.dy.y = y.z; 114 | float3 z = InterpolateWithDeriv(deriv, v0.z, v1.z, v2.z); 115 | interp.value.z = z.x; 116 | interp.dx.z = z.y; 117 | interp.dy.z = z.z; 118 | return interp; 119 | } 120 | -------------------------------------------------------------------------------- /src/shaders/compute/generate_shadow_matrices.glsl: -------------------------------------------------------------------------------- 1 | #include "../common/bindings.glsl" 2 | #include "../common/matrices.glsl" 3 | #include "../common/util.glsl" 4 | 5 | // Transforms the matrix output from NDC to UV space. 6 | mat4 bias_matrix = mat4( 7 | vec4(0.5, 0.0, 0.0, 0.0), 8 | vec4(0.0, 0.5, 0.0, 0.0), 9 | vec4(0.0, 0.0, 1.0, 0.0), 10 | vec4(0.5, 0.5, 0.0, 1.0) 11 | ); 12 | 13 | layout(local_size_x = 4) in; 14 | 15 | void generate_shadow_matrices() { 16 | Uniforms uniforms = get_uniforms(); 17 | 18 | MiscStorageBuffer buf = MiscStorageBuffer(uniforms.misc_storage); 19 | 20 | // https://github.com/SaschaWillems/Vulkan/blob/master/examples/shadowmappingcascade/shadowmappingcascade.cpp#L650-L663 21 | 22 | // Note: these values are misleading. As 0.0 is the infinite plane, 23 | // the max_depth is actually the closer value! 24 | float min_depth = asfloat(buf.misc_storage.min_depth); 25 | float max_depth = asfloat(buf.misc_storage.max_depth); 26 | 27 | float4x4 invCam = uniforms.inv_perspective_view; 28 | 29 | uint32_t cascade_index = gl_GlobalInvocationID.x; 30 | 31 | float min_distance = convert_infinite_reverze_z_depth(max_depth); 32 | float max_distance = convert_infinite_reverze_z_depth(min_depth); 33 | 34 | float cascadeSplits[4] = { 35 | // The first shadow frustum just tightly fits the aabb of the plane of the nearest stuff. 36 | // This is generally reasonably large. 37 | max_depth, 38 | convert_infinite_reverze_z_depth(lerp( 39 | min_distance, 40 | max_distance, 41 | pow(0.5, uniforms.cascade_split_pow) 42 | )), 43 | convert_infinite_reverze_z_depth(lerp( 44 | min_distance, 45 | max_distance, 46 | pow(0.75, uniforms.cascade_split_pow) 47 | )), 48 | min_depth}; 49 | 50 | float lastSplitDist = 51 | select(cascade_index > 0, cascadeSplits[cascade_index - 1], max_depth); 52 | float splitDist = cascadeSplits[cascade_index]; 53 | 54 | // Get the corners of the visible depth slice in view space 55 | float3 frustumCorners[8] = { 56 | float3(-1.0f, 1.0f, lastSplitDist), 57 | float3(1.0f, 1.0f, lastSplitDist), 58 | float3(1.0f, -1.0f, lastSplitDist), 59 | float3(-1.0f, -1.0f, lastSplitDist), 60 | float3(-1.0f, 1.0f, splitDist), 61 | float3(1.0f, 1.0f, splitDist), 62 | float3(1.0f, -1.0f, splitDist), 63 | float3(-1.0f, -1.0f, splitDist), 64 | }; 65 | 66 | // Convert the corners to world space 67 | for (uint32_t i = 0; i < 8; i++) { 68 | float4 invCorner = (invCam * float4(frustumCorners[i], 1.0f)); 69 | frustumCorners[i] = (invCorner / invCorner.w).xyz; 70 | } 71 | 72 | // Get frustum center 73 | float3 frustumCenter = float3(0.0f); 74 | for (uint32_t i = 0; i < 8; i++) { 75 | frustumCenter += frustumCorners[i]; 76 | } 77 | frustumCenter /= 8.0f; 78 | 79 | float sphere_radius = 0.0f; 80 | for (uint32_t i = 0; i < 8; i++) { 81 | float distance = length(frustumCorners[i] - frustumCenter); 82 | sphere_radius = max(sphere_radius, distance); 83 | } 84 | sphere_radius = ceil(sphere_radius * 16.0f) / 16.0f; 85 | 86 | float4x4 shadowView = lookAt( 87 | uniforms.shadow_cam_distance * uniforms.sun_dir + frustumCenter, 88 | frustumCenter, 89 | float3(0, 1, 0) 90 | ); 91 | float4x4 shadowProj = OrthographicProjection( 92 | -sphere_radius, 93 | sphere_radius, 94 | -sphere_radius, 95 | sphere_radius, 96 | 0.0f, 97 | uniforms.shadow_cam_distance * 2.0 98 | ); 99 | 100 | mat4 matrix = shadowProj * shadowView; 101 | 102 | buf.misc_storage.shadow_matrices[cascade_index] = matrix; 103 | buf.misc_storage.uv_space_shadow_matrices[cascade_index] = 104 | bias_matrix * matrix; 105 | 106 | buf.misc_storage.shadow_view_matrices[cascade_index] = shadowView; 107 | buf.misc_storage.shadow_sphere_radii[cascade_index] = sphere_radius; 108 | 109 | reset_counters(); 110 | } 111 | -------------------------------------------------------------------------------- /src/shaders/write_draw_calls.comp: -------------------------------------------------------------------------------- 1 | #include "common/bindings.glsl" 2 | #include "common/culling.glsl" 3 | #include "common/util.glsl" 4 | 5 | layout(local_size_x = 64) in; 6 | 7 | void write_draw_calls() { 8 | Uniforms uniforms = get_uniforms(); 9 | 10 | uint32_t id = gl_GlobalInvocationID.x; 11 | 12 | if (id >= total_num_meshlets_for_cascade(0)) { 13 | return; 14 | } 15 | 16 | MeshletReference meshlet_reference = get_meshlet_reference(id, 0); 17 | 18 | Instance instance = InstanceBuffer(uniforms.instances) 19 | .instances[meshlet_reference.instance_index]; 20 | MeshInfo mesh_info = MeshInfoBuffer(instance.mesh_info_address).mesh_info; 21 | 22 | Meshlet meshlet = MeshletBuffer(mesh_info.meshlets) 23 | .meshlets[meshlet_reference.meshlet_index]; 24 | 25 | if (cull_bounding_sphere(instance, meshlet.bounding_sphere)) { 26 | return; 27 | } 28 | 29 | bool alpha_clipped = bool(mesh_info.flags & MESH_INFO_FLAGS_ALPHA_CLIP); 30 | 31 | if (!alpha_clipped && cull_cone_perspective(instance, meshlet)) { 32 | return; 33 | } 34 | 35 | uint meshlet_draw_index; 36 | 37 | DrawCallBuffer draw_call_buffer = DrawCallBuffer(uniforms.draw_calls); 38 | 39 | if (alpha_clipped) { 40 | meshlet_draw_index = ALPHA_CLIP_DRAWS_OFFSET 41 | + atomicAdd(draw_call_buffer.num_alpha_clip, 1); 42 | } else { 43 | meshlet_draw_index = atomicAdd(draw_call_buffer.num_opaque, 1); 44 | } 45 | uint32_t meshlet_indices_buffer_offset = meshlet_draw_index; 46 | 47 | DrawIndirectCommand draw_call; 48 | draw_call.vertexCount = meshlet.triangle_count * 3; 49 | draw_call.instanceCount = 1; 50 | draw_call.firstVertex = 0; 51 | draw_call.firstInstance = meshlet_indices_buffer_offset; 52 | 53 | draw_call_buffer.draw_calls[meshlet_draw_index] = draw_call; 54 | 55 | MeshletReferenceBuffer(uniforms.meshlet_references) 56 | .meshlet_reference[meshlet_indices_buffer_offset] = meshlet_reference; 57 | } 58 | 59 | layout(local_size_x = 64) in; 60 | 61 | void write_draw_calls_shadows() { 62 | Uniforms uniforms = get_uniforms(); 63 | 64 | uint32_t id = gl_GlobalInvocationID.x; 65 | 66 | if (id >= total_num_meshlets_for_cascade(shadow_constant.cascade_index)) { 67 | return; 68 | } 69 | 70 | MeshletReference meshlet_reference = 71 | get_meshlet_reference(id, shadow_constant.cascade_index); 72 | 73 | Instance instance = InstanceBuffer(uniforms.instances) 74 | .instances[meshlet_reference.instance_index]; 75 | MeshInfo mesh_info = MeshInfoBuffer(instance.mesh_info_address).mesh_info; 76 | 77 | Meshlet meshlet = MeshletBuffer(mesh_info.meshlets) 78 | .meshlets[meshlet_reference.meshlet_index]; 79 | 80 | bool alpha_clipped = bool(mesh_info.flags & MESH_INFO_FLAGS_ALPHA_CLIP); 81 | 82 | if (cull_bounding_sphere_shadows( 83 | instance, 84 | meshlet.bounding_sphere, 85 | shadow_constant.cascade_index 86 | )) { 87 | return; 88 | } 89 | 90 | if (!alpha_clipped && cull_cone_orthographic(instance, meshlet)) { 91 | return; 92 | } 93 | 94 | uint meshlet_draw_index; 95 | 96 | DrawCallBuffer draw_call_buffer = DrawCallBuffer(uniforms.draw_calls); 97 | 98 | if (alpha_clipped) { 99 | meshlet_draw_index = ALPHA_CLIP_DRAWS_OFFSET 100 | + atomicAdd(draw_call_buffer.num_alpha_clip, 1); 101 | } else { 102 | meshlet_draw_index = atomicAdd(draw_call_buffer.num_opaque, 1); 103 | } 104 | uint32_t meshlet_indices_buffer_offset = 105 | meshlet_draw_index + MESHLET_INDICES_BUFFER_SECTION_OFFSET; 106 | 107 | DrawIndirectCommand draw_call; 108 | draw_call.vertexCount = meshlet.triangle_count * 3; 109 | draw_call.instanceCount = 1; 110 | draw_call.firstVertex = 0; 111 | draw_call.firstInstance = meshlet_indices_buffer_offset; 112 | 113 | draw_call_buffer.draw_calls[meshlet_draw_index] = draw_call; 114 | 115 | MeshletReferenceBuffer(uniforms.meshlet_references) 116 | .meshlet_reference[meshlet_indices_buffer_offset] = meshlet_reference; 117 | } 118 | -------------------------------------------------------------------------------- /src/shaders/rasterization.glsl: -------------------------------------------------------------------------------- 1 | #include "common/bindings.glsl" 2 | #include "common/util.glsl" 3 | 4 | //vert 5 | 6 | struct VertexData { 7 | Instance instance; 8 | MeshInfo mesh_info; 9 | uint32_t index; 10 | }; 11 | 12 | VertexData load_vertex_data(uint32_t vertex_index, uint32_t instance_index) { 13 | VertexData vertex_data; 14 | 15 | MeshletReference meshlet_reference = 16 | MeshletReferenceBuffer(get_uniforms().meshlet_references) 17 | .meshlet_reference[instance_index]; 18 | vertex_data.instance = InstanceBuffer(get_uniforms().instances) 19 | .instances[meshlet_reference.instance_index]; 20 | vertex_data.mesh_info = 21 | MeshInfoBuffer(vertex_data.instance.mesh_info_address).mesh_info; 22 | Meshlet meshlet = MeshletBuffer(vertex_data.mesh_info.meshlets) 23 | .meshlets[meshlet_reference.meshlet_index]; 24 | 25 | uint32_t micro_index = meshlet.index_offset 26 | + MicroIndexBufferSingle( 27 | vertex_data.mesh_info.micro_indices + meshlet.triangle_offset 28 | ) 29 | .indices[vertex_index]; 30 | 31 | vertex_data.index = load_index(vertex_data.mesh_info, micro_index); 32 | 33 | return vertex_data; 34 | } 35 | 36 | layout(location = 0) flat out uint32_t packed; 37 | 38 | void visbuffer_opaque_vertex() { 39 | uint32_t triangle_index = gl_VertexIndex / 3; 40 | 41 | VertexData data = load_vertex_data(gl_VertexIndex, gl_InstanceIndex); 42 | 43 | float3 position = 44 | calculate_world_pos(data.instance, data.mesh_info, data.index); 45 | 46 | gl_Position = 47 | get_uniforms().combined_perspective_view * float4(position, 1.0); 48 | packed = pack(triangle_index, gl_InstanceIndex); 49 | } 50 | 51 | //frag 52 | 53 | layout(location = 0) flat in uint32_t packed; 54 | layout(location = 0) out uint32_t out_packed; 55 | 56 | void visbuffer_opaque_pixel() { 57 | out_packed = packed; 58 | } 59 | 60 | //vert 61 | 62 | layout(location = 0) flat out uint32_t packed; 63 | layout(location = 1) out float2 uv; 64 | layout(location = 2) flat out uint32_t base_texture_index; 65 | 66 | void visbuffer_alpha_clip_vertex() { 67 | uint32_t triangle_index = gl_VertexIndex / 3; 68 | 69 | VertexData data = load_vertex_data(gl_VertexIndex, gl_InstanceIndex); 70 | 71 | float3 position = 72 | calculate_world_pos(data.instance, data.mesh_info, data.index); 73 | 74 | gl_Position = 75 | get_uniforms().combined_perspective_view * float4(position, 1.0); 76 | packed = pack(triangle_index, gl_InstanceIndex); 77 | base_texture_index = data.mesh_info.base_color_texture_index; 78 | uv = float2(QuanitizedUvs(data.mesh_info.uvs).uvs[data.index]) 79 | * data.mesh_info.texture_scale 80 | + data.mesh_info.texture_offset; 81 | } 82 | 83 | //frag 84 | 85 | layout(location = 0) flat in uint32_t packed; 86 | layout(location = 1) in float2 uv; 87 | layout(location = 2) flat in uint32_t base_texture_index; 88 | layout(location = 0) out uint32_t out_packed; 89 | 90 | void visbuffer_alpha_clip_pixel() { 91 | if (texture(sampler2D(textures[base_texture_index], repeat_sampler), uv).a 92 | < 0.5) { 93 | discard; 94 | } 95 | 96 | out_packed = packed; 97 | } 98 | 99 | //vert 100 | 101 | void shadowmap_opaque_vertex() { 102 | VertexData data = load_vertex_data(gl_VertexIndex, gl_InstanceIndex); 103 | 104 | float3 position = 105 | calculate_world_pos(data.instance, data.mesh_info, data.index); 106 | 107 | gl_Position = 108 | MiscStorageBuffer(get_uniforms().misc_storage) 109 | .misc_storage.shadow_matrices[shadow_constant.cascade_index] 110 | * float4(position, 1.0); 111 | } 112 | 113 | //vert 114 | 115 | layout(location = 0) out float2 uv; 116 | layout(location = 1) flat out uint32_t base_texture_index; 117 | 118 | void shadowmap_alpha_clip_vertex() { 119 | VertexData data = load_vertex_data(gl_VertexIndex, gl_InstanceIndex); 120 | 121 | float3 position = 122 | calculate_world_pos(data.instance, data.mesh_info, data.index); 123 | 124 | gl_Position = 125 | MiscStorageBuffer(get_uniforms().misc_storage) 126 | .misc_storage.shadow_matrices[shadow_constant.cascade_index] 127 | * float4(position, 1.0); 128 | 129 | base_texture_index = data.mesh_info.base_color_texture_index; 130 | uv = float2(QuanitizedUvs(data.mesh_info.uvs).uvs[data.index]) 131 | * data.mesh_info.texture_scale 132 | + data.mesh_info.texture_offset; 133 | } 134 | 135 | //frag 136 | 137 | layout(location = 0) in float2 uv; 138 | layout(location = 1) flat in uint32_t base_texture_index; 139 | 140 | void shadowmap_alpha_clipped_pixel() { 141 | if (texture(sampler2D(textures[base_texture_index], repeat_sampler), uv).a 142 | < 0.5) { 143 | discard; 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/resources/meshlets.cpp: -------------------------------------------------------------------------------- 1 | #include "meshlets.h" 2 | 3 | Meshlets build_meshlets( 4 | uint8_t* indices, 5 | size_t indices_count, 6 | float* positions, 7 | size_t vertices_count, 8 | bool uses_32_bit_indices 9 | ) { 10 | auto max_vertices = MAX_MESHLET_UNIQUE_VERTICES; 11 | auto max_triangles = MAX_MESHLET_TRIANGLES; 12 | // Given that I want to render a lot of foliage 13 | // which is double-sided and doesn't benefit from 14 | // cone culling, we can turn this down. 15 | auto cone_weight = 0.25f; 16 | 17 | size_t max_meshlets = 18 | meshopt_buildMeshletsBound(indices_count, max_vertices, max_triangles); 19 | std::vector meshlets(max_meshlets); 20 | std::vector meshlet_indices_32bit( 21 | max_meshlets * max_vertices 22 | ); 23 | std::vector micro_indices(max_meshlets * max_triangles * 3); 24 | 25 | size_t meshlet_count = 0; 26 | 27 | auto stride = sizeof(float) * 3; 28 | 29 | if (uses_32_bit_indices) { 30 | meshlet_count = meshopt_buildMeshlets( 31 | meshlets.data(), 32 | meshlet_indices_32bit.data(), 33 | micro_indices.data(), 34 | reinterpret_cast(indices), 35 | indices_count, 36 | positions, 37 | vertices_count, 38 | stride, 39 | max_vertices, 40 | max_triangles, 41 | cone_weight 42 | ); 43 | } else { 44 | meshlet_count = meshopt_buildMeshlets( 45 | meshlets.data(), 46 | meshlet_indices_32bit.data(), 47 | micro_indices.data(), 48 | reinterpret_cast(indices), 49 | indices_count, 50 | positions, 51 | vertices_count, 52 | stride, 53 | max_vertices, 54 | max_triangles, 55 | cone_weight 56 | ); 57 | } 58 | 59 | const meshopt_Meshlet& last = meshlets[meshlet_count - 1]; 60 | 61 | meshlet_indices_32bit.resize(last.vertex_offset + last.vertex_count); 62 | micro_indices.resize( 63 | last.triangle_offset + ((last.triangle_count * 3 + 3) & ~3) 64 | ); 65 | meshlets.resize(meshlet_count); 66 | 67 | std::vector meshlet_bounds(meshlet_count); 68 | 69 | for (size_t i = 0; i < meshlet_count; i++) { 70 | auto meshlet = meshlets[i]; 71 | meshlet_bounds[i] = meshopt_computeMeshletBounds( 72 | &meshlet_indices_32bit[meshlet.vertex_offset], 73 | µ_indices[meshlet.triangle_offset], 74 | meshlet.triangle_count, 75 | positions, 76 | vertices_count, 77 | stride 78 | ); 79 | 80 | // This means that I've done something wrong. 81 | if (meshlet_bounds[i].radius == 0) { 82 | dbg(meshlet.triangle_count, 83 | meshlet.vertex_count, 84 | meshlet.triangle_offset); 85 | } 86 | } 87 | 88 | std::vector final_meshlets(meshlet_count); 89 | 90 | for (size_t i = 0; i < meshlet_count; i++) { 91 | auto meshlet = meshlets[i]; 92 | meshopt_Bounds bounds = meshlet_bounds[i]; 93 | final_meshlets[i] = Meshlet { 94 | .cone_apex = glm::vec3( 95 | bounds.cone_apex[0], 96 | bounds.cone_apex[1], 97 | bounds.cone_apex[2] 98 | ), 99 | .cone_axis = glm::vec3( 100 | bounds.cone_axis[0], 101 | bounds.cone_axis[1], 102 | bounds.cone_axis[2] 103 | ), 104 | .cone_cutoff = bounds.cone_cutoff, 105 | .bounding_sphere = glm::vec4( 106 | bounds.center[0], 107 | bounds.center[1], 108 | bounds.center[2], 109 | bounds.radius 110 | ), 111 | .triangle_offset = meshlet.triangle_offset, 112 | .index_offset = meshlet.vertex_offset, 113 | .triangle_count = static_cast(meshlet.triangle_count), 114 | .index_count = static_cast(meshlet.vertex_count)}; 115 | } 116 | 117 | if (uses_32_bit_indices) { 118 | return { 119 | .meshlets = final_meshlets, 120 | .micro_indices = micro_indices, 121 | .indices_32bit = meshlet_indices_32bit, 122 | .indices_16bit = {}}; 123 | } else { 124 | // No need to use 32-bit indices. 125 | std::vector meshlet_indices_16bit(meshlet_indices_32bit.size() 126 | ); 127 | 128 | for (size_t i = 0; i < meshlet_indices_32bit.size(); i++) { 129 | auto index = meshlet_indices_32bit[i]; 130 | assert(index < (1 << 16)); 131 | meshlet_indices_16bit[i] = index; 132 | } 133 | 134 | return { 135 | .meshlets = final_meshlets, 136 | .micro_indices = micro_indices, 137 | .indices_32bit = {}, 138 | .indices_16bit = meshlet_indices_16bit}; 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/shared_cpu_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef GLSL 2 | #pragma once 3 | using namespace glm; 4 | #else 5 | #include "shaders/common/hlsl4glsl.glsl" 6 | #endif 7 | 8 | const static uint8_t MESH_INFO_FLAGS_32_BIT_INDICES = uint8_t(1 << 0); 9 | const static uint8_t MESH_INFO_FLAGS_ALPHA_CLIP = uint8_t(1 << 1); 10 | 11 | // Same as VkDispatchIndirectCommand. 12 | struct DispatchIndirectCommand { 13 | uint32_t x; 14 | uint32_t y; 15 | uint32_t z; 16 | }; 17 | 18 | struct MeshInfo { 19 | uint64_t positions; 20 | uint64_t indices; 21 | uint64_t normals; 22 | uint64_t uvs; 23 | uint64_t micro_indices; 24 | uint64_t meshlets; 25 | uint16_t num_meshlets; 26 | uint8_t flags; 27 | vec4 bounding_sphere; 28 | vec2 texture_scale; 29 | vec2 texture_offset; 30 | uint16_t base_color_texture_index; 31 | uint16_t metallic_roughness_texture_index; 32 | uint16_t normal_texture_index; 33 | vec3 base_color_factor; 34 | }; 35 | 36 | struct MiscStorage { 37 | mat4 shadow_matrices[4]; 38 | mat4 uv_space_shadow_matrices[4]; 39 | mat4 shadow_view_matrices[4]; 40 | float shadow_sphere_radii[4]; 41 | uint32_t min_depth; 42 | uint32_t max_depth; 43 | }; 44 | 45 | // This is only an int32_t because of imgui. 46 | const static int32_t UNIFORMS_DEBUG_OFF = 0; 47 | const static int32_t UNIFORMS_DEBUG_CASCADES = 1; 48 | const static int32_t UNIFORMS_DEBUG_TRIANGLE_INDEX = 2; 49 | const static int32_t UNIFORMS_DEBUG_INSTANCE_INDEX = 3; 50 | const static int32_t UNIFORMS_DEBUG_SHADER_CLOCK = 4; 51 | const static int32_t UNIFORMS_DEBUG_NORMALS = 5; 52 | 53 | struct Uniforms { 54 | mat4 combined_perspective_view; 55 | mat4 inv_perspective_view; 56 | mat4 view; 57 | mat4 initial_view; 58 | mat4 perspective; 59 | mat4 perspective_inverse; 60 | mat4 view_inverse; 61 | uint64_t meshlet_references; 62 | uint64_t instances; 63 | uint64_t draw_calls; 64 | uint64_t misc_storage; 65 | uint64_t num_meshlets_prefix_sum; 66 | uint64_t dispatches; 67 | vec3 camera_pos; 68 | vec3 sun_dir; 69 | vec3 sun_intensity; 70 | uint32_t num_instances; 71 | uvec2 window_size; 72 | float shadow_cam_distance; 73 | float cascade_split_pow; 74 | int32_t debug; 75 | bool debug_shadowmaps; 76 | }; 77 | 78 | // Same as VkDrawIndirectCommand 79 | struct DrawIndirectCommand { 80 | uint32_t vertexCount; 81 | uint32_t instanceCount; 82 | uint32_t firstVertex; 83 | uint32_t firstInstance; 84 | }; 85 | 86 | struct Instance { 87 | mat4 transform; 88 | uint64_t mesh_info_address; 89 | mat3 normal_transform; 90 | 91 | #ifndef GLSL 92 | Instance(glm::mat4 transform_, uint64_t mesh_info_address_) : 93 | transform(transform_), 94 | mesh_info_address(mesh_info_address_) { 95 | // Normally you want to do a transpose for this but for hlsl you don't seem to need to. Not sure why. 96 | normal_transform = glm::mat3(glm::inverse(transform)); 97 | } 98 | #endif 99 | }; 100 | 101 | struct ShadowPassConstant { 102 | uint32_t cascade_index; 103 | }; 104 | 105 | struct CalcBoundingSphereConstant { 106 | uint32_t num_vertices; 107 | }; 108 | 109 | struct CopyQuantizedPositionsConstant { 110 | uint64_t dst; 111 | uint64_t src; 112 | uint32_t count; 113 | }; 114 | 115 | struct UniformBufferAddressConstant { 116 | uint64_t address; 117 | }; 118 | 119 | const static uint32_t MAX_OPAQUE_DRAWS = 200000; 120 | const static uint32_t MAX_ALPHA_CLIP_DRAWS = 200000; 121 | 122 | const static uint32_t ALPHA_CLIP_DRAWS_OFFSET = MAX_OPAQUE_DRAWS; 123 | 124 | const static uint32_t MESHLET_INDICES_BUFFER_SECTION_OFFSET = 125 | ALPHA_CLIP_DRAWS_OFFSET + MAX_ALPHA_CLIP_DRAWS; 126 | 127 | const static float NEAR_PLANE = 0.01f; 128 | 129 | struct Meshlet { 130 | vec3 cone_apex; 131 | vec3 cone_axis; 132 | float cone_cutoff; 133 | vec4 bounding_sphere; 134 | // The buffers these index into are often large enough to require 32-bit offsets. 135 | uint32_t triangle_offset; 136 | uint32_t index_offset; 137 | uint8_t triangle_count; 138 | uint8_t index_count; 139 | }; 140 | 141 | // Allows for indexing a meshlet in the mesh that an instance represents. 142 | struct MeshletReference { 143 | uint32_t instance_index; 144 | uint16_t meshlet_index; 145 | }; 146 | 147 | // Defaults from https://github.com/zeux/meshoptimizer#mesh-shading 148 | const static uint32_t MAX_MESHLET_UNIQUE_VERTICES = 64; 149 | const static uint32_t MAX_MESHLET_TRIANGLES = 124; 150 | const static uint32_t MAX_MESHLET_VERTICES = MAX_MESHLET_TRIANGLES * 3; 151 | 152 | const static uint16_t UNUSED_TEXTURE_INDEX = ~uint16_t(0u); 153 | 154 | struct PrefixSumValue { 155 | uint32_t index; 156 | uint32_t sum; 157 | }; 158 | 159 | const static uint32_t MAX_INSTANCES = 1024; 160 | 161 | // MAX_INSTANCES * sizeof(PrefixSumValue) + sizeof(uint64_t) 162 | const static uint32_t PREFIX_SUM_BUFFER_SECTOR_SIZE = MAX_INSTANCES * 8 + 8; 163 | 164 | const static uint32_t PER_INSTANCE_DISPATCH = 0; 165 | const static uint32_t PER_SHADOW_INSTANCE_DISPATCH = 1; 166 | const static uint32_t PER_MESHLET_DISPATCH = 2; 167 | -------------------------------------------------------------------------------- /src/shaders/common/culling.glsl: -------------------------------------------------------------------------------- 1 | bool cull_bounding_sphere(Instance instance, vec4 bounding_sphere) { 2 | Uniforms uniforms = get_uniforms(); 3 | 4 | vec3 world_space_pos = 5 | (instance.transform * vec4(bounding_sphere.xyz, 1.0)).xyz; 6 | float radius = bounding_sphere.w; 7 | 8 | vec3 scale = vec3( 9 | length(instance.transform[0].xyz), 10 | length(instance.transform[1].xyz), 11 | length(instance.transform[2].xyz) 12 | ); 13 | // 99% of the time scales will be uniform. But in the chance they're not, 14 | // use the longest dimension. 15 | float scale_scalar = max(max(scale.x, scale.y), scale.z); 16 | 17 | radius *= scale_scalar; 18 | 19 | vec3 view_space_pos = 20 | (uniforms.initial_view * vec4(world_space_pos, 1.0)).xyz; 21 | // The view space goes from negatives in the front to positives in the back. 22 | // This is confusing so flipping it here makes sense I think. 23 | view_space_pos.z = -view_space_pos.z; 24 | 25 | // Is the most positive/forwards point of the object in front of the near plane? 26 | bool visible = view_space_pos.z + radius > NEAR_PLANE; 27 | 28 | // Do some fancy stuff by getting the frustum planes and comparing the position against them. 29 | vec3 frustum_x = normalize( 30 | transpose(uniforms.perspective)[3].xyz 31 | + transpose(uniforms.perspective)[0].xyz 32 | ); 33 | vec3 frustum_y = normalize( 34 | transpose(uniforms.perspective)[3].xyz 35 | + transpose(uniforms.perspective)[1].xyz 36 | ); 37 | 38 | visible = visible 39 | && view_space_pos.z * frustum_x.z + abs(view_space_pos.x) * frustum_x.x 40 | < radius; 41 | visible = visible 42 | && view_space_pos.z * frustum_y.z - abs(view_space_pos.y) * frustum_y.y 43 | < radius; 44 | 45 | return !visible; 46 | } 47 | 48 | bool fits_partially_inside( 49 | vec2 view_pos, 50 | float sphere_radius, 51 | float cascade_size 52 | ) { 53 | return all(lessThan((abs(view_pos) - sphere_radius), vec2(cascade_size))); 54 | } 55 | 56 | bool fits_entirely_inside( 57 | vec2 view_pos, 58 | float sphere_radius, 59 | float cascade_size 60 | ) { 61 | return all(lessThan((abs(view_pos) + sphere_radius), vec2(cascade_size))); 62 | } 63 | 64 | bool fits_entirely_inside_cascade( 65 | vec3 world_pos, 66 | float sphere_radius, 67 | uint32_t cascade_index 68 | ) { 69 | MiscStorageBuffer buf = MiscStorageBuffer(get_uniforms().misc_storage); 70 | 71 | vec2 view_pos = (buf.misc_storage.shadow_view_matrices[cascade_index] 72 | * vec4(world_pos, 1.0)) 73 | .xy; 74 | 75 | return fits_entirely_inside( 76 | view_pos, 77 | sphere_radius, 78 | buf.misc_storage.shadow_sphere_radii[cascade_index] 79 | ); 80 | } 81 | 82 | bool cull_bounding_sphere_shadows( 83 | Instance instance, 84 | vec4 bounding_sphere, 85 | uint32_t cascade_index 86 | ) { 87 | Uniforms uniforms = get_uniforms(); 88 | MiscStorageBuffer buf = MiscStorageBuffer(uniforms.misc_storage); 89 | 90 | vec3 world_space_pos = 91 | (instance.transform * vec4(bounding_sphere.xyz, 1.0)).xyz; 92 | float radius = bounding_sphere.w; 93 | 94 | vec3 scale = vec3( 95 | length(instance.transform[0].xyz), 96 | length(instance.transform[1].xyz), 97 | length(instance.transform[2].xyz) 98 | ); 99 | // 99% of the time scales will be uniform. But in the chance they're not, 100 | // use the longest dimension. 101 | float scale_scalar = max(max(scale.x, scale.y), scale.z); 102 | 103 | radius *= scale_scalar; 104 | 105 | vec3 view_space_pos = (buf.misc_storage.shadow_view_matrices[cascade_index] 106 | * vec4(world_space_pos, 1.0)) 107 | .xyz; 108 | 109 | if (!fits_partially_inside( 110 | view_space_pos.xy, 111 | radius, 112 | buf.misc_storage.shadow_sphere_radii[cascade_index] 113 | )) { 114 | return true; 115 | } 116 | 117 | // If the object fits entirely within a smaller cascade then it can be culled. 118 | 119 | if (cascade_index > 0 120 | && fits_entirely_inside_cascade(world_space_pos, radius, 0)) { 121 | return true; 122 | } 123 | 124 | if (cascade_index > 1 125 | && fits_entirely_inside_cascade(world_space_pos, radius, 1)) { 126 | return true; 127 | } 128 | 129 | if (cascade_index > 2 130 | && fits_entirely_inside_cascade(world_space_pos, radius, 2)) { 131 | return true; 132 | } 133 | 134 | return false; 135 | } 136 | 137 | bool cull_cone_perspective(Instance instance, Meshlet meshlet) { 138 | Uniforms uniforms = get_uniforms(); 139 | 140 | float3 apex = (instance.transform * float4(meshlet.cone_apex, 1.0)).xyz; 141 | float3 axis = normalize((instance.normal_transform * meshlet.cone_axis)); 142 | 143 | return dot(normalize(apex - uniforms.camera_pos), normalize(axis)) 144 | >= meshlet.cone_cutoff; 145 | } 146 | 147 | bool cull_cone_orthographic(Instance instance, Meshlet meshlet) { 148 | Uniforms uniforms = get_uniforms(); 149 | float3 axis = normalize((instance.normal_transform * meshlet.cone_axis)); 150 | return dot(uniforms.sun_dir, axis) >= meshlet.cone_cutoff; 151 | } 152 | -------------------------------------------------------------------------------- /compile_glsl.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | import copy 4 | import subprocess 5 | import os 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("filenames", nargs="+") 10 | parser.add_argument("--flags") 11 | parser.add_argument("--shader-stage") 12 | parser.add_argument("--out-dir", default=".") 13 | args = parser.parse_args() 14 | 15 | if not os.path.exists(args.out_dir): 16 | os.mkdir(args.out_dir) 17 | 18 | entry_point_regex = re.compile("^void (\w+)\(\)") 19 | layout_regex = re.compile("^layout\((location|local_size)") 20 | 21 | 22 | # Find a block range. For functions and layouts (like uniforms). 23 | def find_block_range(lines, line_start): 24 | balance = 0 25 | 26 | if "{" not in lines[line_start]: 27 | return range(line_start, line_start + 1) 28 | 29 | for i, line in enumerate(lines[line_start:]): 30 | for char in line: 31 | if char == "{": 32 | balance += 1 33 | elif char == "}": 34 | balance -= 1 35 | if balance == 0: 36 | return range(line_start, i + line_start + 1) 37 | 38 | 39 | def comment_out(lines, line_nums): 40 | for line_num in line_nums: 41 | lines[line_num] = "//" + lines[line_num] 42 | 43 | 44 | for filename in args.filenames: 45 | contents = open(filename).read() 46 | parent_dir = os.path.dirname(filename) 47 | 48 | basename = os.path.basename(filename).split(".")[0] 49 | 50 | fallback_shader_stage = args.shader_stage 51 | 52 | if not fallback_shader_stage: 53 | if filename.endswith(".comp"): 54 | fallback_shader_stage = "comp" 55 | 56 | lines = contents.split("\n") 57 | 58 | entry_points = [ 59 | (line_num, match) 60 | for (line_num, match) in ( 61 | (line_num, entry_point_regex.match(line)) 62 | for (line_num, line) in enumerate(lines) 63 | ) 64 | if match != None 65 | ] 66 | 67 | if len(entry_points) == 0: 68 | print(f"No entry points found in {filename}") 69 | sys.exit(1) 70 | 71 | prev_line_num = 0 72 | 73 | entry_point_ranges = [] 74 | 75 | for entry_point_line_num, entry_point in entry_points: 76 | entry_point_ranges.append(find_block_range(lines, entry_point_line_num)) 77 | 78 | for i, (entry_point_line_num, entry_point) in enumerate(entry_points): 79 | shader_stage = None 80 | 81 | # Find the last shader stage tag. 82 | for line_num in range(prev_line_num, entry_point_line_num): 83 | if lines[line_num] == "//vert": 84 | shader_stage = "vert" 85 | break 86 | elif lines[line_num] == "//frag": 87 | shader_stage = "frag" 88 | break 89 | elif lines[line_num] == "//comp": 90 | shader_stage = "comp" 91 | break 92 | 93 | shader_stage = shader_stage or fallback_shader_stage 94 | 95 | if not shader_stage: 96 | print(f"shader stage missing for {filename} {entry_point.group(1)}") 97 | sys.exit(1) 98 | 99 | entry_point_lines = copy.copy(lines) 100 | 101 | # Comment out the other entry points 102 | for j, entry_point_range in enumerate(entry_point_ranges): 103 | if i == j: 104 | continue 105 | comment_out(entry_point_lines, entry_point_ranges[j]) 106 | 107 | # Comment out layouts belonging to entry points above this one 108 | comment_out( 109 | entry_point_lines, 110 | ( 111 | line_num 112 | for (line_num, line) in enumerate(lines[:prev_line_num]) 113 | if layout_regex.match(line) 114 | ), 115 | ) 116 | 117 | # Comment out layouts belonging to entry points below this one 118 | comment_out( 119 | entry_point_lines, 120 | ( 121 | entry_point_line_num + line_num 122 | for (line_num, line) in enumerate(lines[entry_point_line_num:]) 123 | if layout_regex.match(line) 124 | ), 125 | ) 126 | 127 | # Rename the entry point 128 | entry_point_lines[entry_point_line_num] = entry_point_lines[ 129 | entry_point_line_num 130 | ].replace(entry_point.group(0), "void main()") 131 | 132 | # Name the file afte the entry point, or use the basename 133 | # of the file if the entry point is 'main 134 | entry_point_name = entry_point.group(1) 135 | 136 | output_filename = None 137 | if entry_point_name == "main": 138 | output_filename = basename + ".spv" 139 | else: 140 | output_filename = entry_point_name + ".spv" 141 | 142 | file_contents = "\n".join(entry_point_lines) 143 | 144 | try: 145 | subprocess.run( 146 | f"glslc {args.flags} -I {parent_dir} -fshader-stage={shader_stage} - -o {os.path.join(args.out_dir, output_filename)}".split( 147 | " " 148 | ), 149 | input=file_contents, 150 | text=True, 151 | check=True, 152 | ) 153 | except subprocess.CalledProcessError: 154 | # for (i, line) in enumerate(entry_point_lines): 155 | # print(f"{i+1}: {line}") 156 | print(f"Error occured in {filename}: {entry_point_name}") 157 | sys.exit(1) 158 | 159 | prev_line_num = entry_point_ranges[i][-1] + 1 160 | -------------------------------------------------------------------------------- /src/input.cpp: -------------------------------------------------------------------------------- 1 | #include "input.h" 2 | 3 | glm::vec3 clamp_length(glm::vec3 vector, float max) { 4 | auto length_squared = glm::length2(vector); 5 | if (length_squared > max * max) { 6 | return vector / sqrtf(length_squared) * max; 7 | } else { 8 | return vector; 9 | } 10 | } 11 | 12 | glm::vec2 clamp_length(glm::vec2 vector, float max) { 13 | auto length_squared = glm::length2(vector); 14 | if (length_squared > max * max) { 15 | return vector / sqrtf(length_squared) * max; 16 | } else { 17 | return vector; 18 | } 19 | } 20 | 21 | void CameraParams::update(glm::ivec3 movement_vector, glm::ivec2 sun_vector) { 22 | auto accel = 0.02f; 23 | 24 | if (movement_vector != glm::ivec3(0)) { 25 | auto normalized_movement = 26 | glm::normalize(glm::vec3(movement_vector)) * accel; 27 | velocity += normalized_movement.z * facing() 28 | + normalized_movement.x * right() 29 | + normalized_movement.y * glm::vec3(0, 1, 0); 30 | velocity = clamp_length(velocity, 0.2); 31 | } else { 32 | velocity *= 0.9; 33 | } 34 | 35 | if (sun_vector != glm::ivec2(0)) { 36 | sun_delta += glm::normalize(glm::vec2(sun_vector)) * 0.001f; 37 | sun_delta = clamp_length(sun_delta, 0.05f); 38 | } else { 39 | sun_delta *= 0.9; 40 | } 41 | 42 | position += velocity; 43 | 44 | sun_latitude += sun_delta.x; 45 | sun_longitude += sun_delta.y; 46 | sun_longitude = 47 | std::clamp(sun_longitude, 0.0f, std::numbers::pi_v / 2.0f); 48 | } 49 | 50 | void glfw_key_callback( 51 | GLFWwindow* window, 52 | int key, 53 | int /*scancode*/, 54 | int action, 55 | int /*mods*/ 56 | ) { 57 | KeyboardState& keyboard_state = 58 | *static_cast(glfwGetWindowUserPointer(window)); 59 | switch (key) { 60 | case GLFW_KEY_LEFT: 61 | keyboard_state.left = action != GLFW_RELEASE; 62 | break; 63 | case GLFW_KEY_RIGHT: 64 | keyboard_state.right = action != GLFW_RELEASE; 65 | break; 66 | case GLFW_KEY_UP: 67 | keyboard_state.up = action != GLFW_RELEASE; 68 | break; 69 | case GLFW_KEY_DOWN: 70 | keyboard_state.down = action != GLFW_RELEASE; 71 | break; 72 | case GLFW_KEY_W: 73 | keyboard_state.w = action != GLFW_RELEASE; 74 | break; 75 | case GLFW_KEY_A: 76 | keyboard_state.a = action != GLFW_RELEASE; 77 | break; 78 | case GLFW_KEY_S: 79 | keyboard_state.s = action != GLFW_RELEASE; 80 | break; 81 | case GLFW_KEY_D: 82 | keyboard_state.d = action != GLFW_RELEASE; 83 | break; 84 | case GLFW_KEY_LEFT_SHIFT: 85 | keyboard_state.shift = action != GLFW_RELEASE; 86 | break; 87 | case GLFW_KEY_LEFT_CONTROL: 88 | keyboard_state.control = action != GLFW_RELEASE; 89 | break; 90 | case GLFW_KEY_G: 91 | keyboard_state.grab_toggled ^= (action == GLFW_PRESS); 92 | if (keyboard_state.grab_toggled) { 93 | glfwSetInputMode(window, GLFW_CURSOR, GLFW_CURSOR_DISABLED); 94 | } else { 95 | glfwSetInputMode(window, GLFW_CURSOR, GLFW_CURSOR_NORMAL); 96 | } 97 | break; 98 | case GLFW_KEY_U: 99 | keyboard_state.ui_toggled ^= (action == GLFW_PRESS); 100 | break; 101 | } 102 | if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS) { 103 | glfwSetWindowShouldClose(window, GLFW_TRUE); 104 | } 105 | } 106 | 107 | void draw_imgui_window( 108 | Uniforms* uniforms, 109 | CameraParams& camera_params, 110 | KeyboardState& keyboard_state, 111 | bool& copy_view 112 | ) { 113 | ImGui::Checkbox("debug shadowmaps", &uniforms->debug_shadowmaps); 114 | ImGui::Checkbox("copy view", ©_view); 115 | ImGui::SliderFloat( 116 | "shadow_cam_distance", 117 | &uniforms->shadow_cam_distance, 118 | 0.0f, 119 | 10000.0f 120 | ); 121 | ImGui::SliderFloat( 122 | "cascade_split_pow", 123 | &uniforms->cascade_split_pow, 124 | 0.0f, 125 | 10.0f 126 | ); 127 | ImGui::SliderFloat("fov", &camera_params.fov, 0.0f, 90.0f); 128 | ImGui::SliderFloat( 129 | "sun_intensity", 130 | &uniforms->sun_intensity.x, 131 | 0.0f, 132 | 100.0f 133 | ); 134 | ImGui::Text( 135 | "camera pos: (%f, %f, %f)", 136 | camera_params.position.x, 137 | camera_params.position.y, 138 | camera_params.position.z 139 | ); 140 | ImGui::Text("yaw: %f", camera_params.yaw); 141 | ImGui::Text("pitch: %f", camera_params.pitch); 142 | ImGui::Text("sun_latitude: %f", camera_params.sun_latitude); 143 | ImGui::Text("sun_longitude: %f", camera_params.sun_longitude); 144 | ImGui::Text("grab_toggled: %u", keyboard_state.grab_toggled); 145 | ImGui::RadioButton("Debug: Off", &uniforms->debug, UNIFORMS_DEBUG_OFF); 146 | ImGui::RadioButton( 147 | "Debug: Cascades", 148 | &uniforms->debug, 149 | UNIFORMS_DEBUG_CASCADES 150 | ); 151 | ImGui::RadioButton( 152 | "Debug: Triangle index", 153 | &uniforms->debug, 154 | UNIFORMS_DEBUG_TRIANGLE_INDEX 155 | ); 156 | ImGui::RadioButton( 157 | "Debug: Instance Index", 158 | &uniforms->debug, 159 | UNIFORMS_DEBUG_INSTANCE_INDEX 160 | ); 161 | ImGui::RadioButton( 162 | "Debug: Shader Clock", 163 | &uniforms->debug, 164 | UNIFORMS_DEBUG_SHADER_CLOCK 165 | ); 166 | ImGui::RadioButton( 167 | "Debug: Normals", 168 | &uniforms->debug, 169 | UNIFORMS_DEBUG_NORMALS 170 | ); 171 | } 172 | -------------------------------------------------------------------------------- /src/frame_resources.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "allocations/base.h" 3 | #include "allocations/image_with_view.h" 4 | #include "allocations/staging.h" 5 | #include "util.h" 6 | 7 | struct ResizingResources { 8 | ImageWithView scene_referred_framebuffer; 9 | ImageWithView depthbuffer; 10 | ImageWithView visbuffer; 11 | 12 | ResizingResources( 13 | const vk::raii::Device& device, 14 | vma::Allocator allocator, 15 | vk::Extent2D extent 16 | ) : 17 | scene_referred_framebuffer(ImageWithView( 18 | { 19 | .imageType = vk::ImageType::e2D, 20 | .format = vk::Format::eR16G16B16A16Sfloat, 21 | .extent = 22 | vk::Extent3D { 23 | .width = extent.width, 24 | .height = extent.height, 25 | .depth = 1, 26 | }, 27 | .mipLevels = 1, 28 | .arrayLayers = 1, 29 | .usage = vk::ImageUsageFlagBits::eStorage 30 | | vk::ImageUsageFlagBits::eSampled, 31 | }, 32 | allocator, 33 | device, 34 | "scene_referred_framebuffer", 35 | COLOR_SUBRESOURCE_RANGE 36 | )), 37 | depthbuffer(ImageWithView( 38 | {.imageType = vk::ImageType::e2D, 39 | .format = vk::Format::eD32Sfloat, 40 | .extent = 41 | vk::Extent3D { 42 | .width = extent.width, 43 | .height = extent.height, 44 | .depth = 1, 45 | }, 46 | .mipLevels = 1, 47 | .arrayLayers = 1, 48 | .usage = vk::ImageUsageFlagBits::eDepthStencilAttachment 49 | | vk::ImageUsageFlagBits::eSampled}, 50 | allocator, 51 | device, 52 | "depthbuffer", 53 | DEPTH_SUBRESOURCE_RANGE 54 | )), 55 | visbuffer(ImageWithView( 56 | {.imageType = vk::ImageType::e2D, 57 | .format = vk::Format::eR32Uint, 58 | .extent = 59 | vk::Extent3D { 60 | .width = extent.width, 61 | .height = extent.height, 62 | .depth = 1, 63 | }, 64 | .mipLevels = 1, 65 | .arrayLayers = 1, 66 | .usage = vk::ImageUsageFlagBits::eColorAttachment 67 | | vk::ImageUsageFlagBits::eSampled}, 68 | allocator, 69 | device, 70 | "visbuffer", 71 | COLOR_SUBRESOURCE_RANGE 72 | )) {} 73 | }; 74 | 75 | struct InstanceResources { 76 | AllocatedBuffer instances; 77 | AllocatedBuffer meshlet_references; 78 | AllocatedBuffer num_meshlets_prefix_sum; 79 | }; 80 | 81 | struct Resources { 82 | ResizingResources resizing; 83 | ImageWithView shadowmap; 84 | AllocatedBuffer misc_storage_buffer; 85 | AllocatedBuffer draw_calls_buffer; 86 | AllocatedBuffer dispatches_buffer; 87 | std::array shadowmap_layer_views; 88 | ImageWithView display_transform_lut; 89 | ImageWithView skybox; 90 | vk::raii::Sampler repeat_sampler; 91 | vk::raii::Sampler clamp_sampler; 92 | vk::raii::Sampler shadowmap_comparison_sampler; 93 | }; 94 | 95 | struct RaiiTracyCtx { 96 | tracy::VkCtx* inner; 97 | 98 | RaiiTracyCtx(tracy::VkCtx* inner_); 99 | 100 | ~RaiiTracyCtx(); 101 | 102 | RaiiTracyCtx(RaiiTracyCtx&& other); 103 | 104 | RaiiTracyCtx& operator=(RaiiTracyCtx&& other); 105 | }; 106 | 107 | template 108 | struct FlipFlipResource { 109 | std::array items; 110 | bool flipped; 111 | 112 | void flip() { 113 | flipped = !flipped; 114 | } 115 | 116 | T& get() { 117 | return items[flipped]; 118 | } 119 | }; 120 | 121 | struct FrameCommandData { 122 | vk::raii::CommandPool pool; 123 | vk::raii::CommandBuffer buffer; 124 | vk::raii::Semaphore swapchain_semaphore; 125 | vk::raii::Semaphore render_semaphore; 126 | vk::raii::Fence render_fence; 127 | RaiiTracyCtx tracy_ctx; 128 | }; 129 | 130 | FrameCommandData create_frame_command_data( 131 | const vk::raii::Device& device, 132 | const vk::raii::PhysicalDevice& phys_device, 133 | const vk::raii::Queue& queue, 134 | uint32_t graphics_queue_family 135 | ); 136 | 137 | struct UploadingBuffer { 138 | PersistentlyMappedBuffer staging; 139 | AllocatedBuffer buffer; 140 | 141 | UploadingBuffer( 142 | size_t size, 143 | vk::BufferUsageFlags usage, 144 | const std::string& name, 145 | vma::Allocator allocator 146 | ) : 147 | staging(PersistentlyMappedBuffer(AllocatedBuffer( 148 | vk::BufferCreateInfo { 149 | .size = size, 150 | .usage = vk::BufferUsageFlagBits::eTransferSrc}, 151 | { 152 | .flags = vma::AllocationCreateFlagBits::eMapped 153 | | vma::AllocationCreateFlagBits::eHostAccessSequentialWrite, 154 | .usage = vma::MemoryUsage::eAuto, 155 | }, 156 | allocator, 157 | "staging " + name 158 | ))), 159 | buffer(AllocatedBuffer( 160 | vk::BufferCreateInfo { 161 | .size = size, 162 | .usage = usage | vk::BufferUsageFlagBits::eTransferDst}, 163 | { 164 | .usage = vma::MemoryUsage::eAuto, 165 | }, 166 | allocator, 167 | name 168 | )) {} 169 | 170 | void flush(const vk::raii::CommandBuffer& command_buffer, size_t size) { 171 | command_buffer.copyBuffer( 172 | staging.buffer.buffer, 173 | buffer.buffer, 174 | {vk::BufferCopy {.srcOffset = 0, .dstOffset = 0, .size = size}} 175 | ); 176 | } 177 | }; 178 | -------------------------------------------------------------------------------- /src/resources/dds.h: -------------------------------------------------------------------------------- 1 | // Copied from https://learn.microsoft.com/en-us/windows/win32/direct3ddds/dx-graphics-dds-reference 2 | 3 | typedef uint32_t DWORD; 4 | typedef uint32_t UINT; 5 | 6 | struct DDS_PIXELFORMAT { 7 | DWORD dwSize; 8 | DWORD dwFlags; 9 | DWORD dwFourCC; 10 | DWORD dwRGBBitCount; 11 | DWORD dwRBitMask; 12 | DWORD dwGBitMask; 13 | DWORD dwBBitMask; 14 | DWORD dwABitMask; 15 | }; 16 | 17 | DWORD DDSCAPS2_CUBEMAP = 0x200; 18 | 19 | struct DDS_HEADER { 20 | DWORD dwSize; 21 | DWORD dwFlags; 22 | DWORD dwHeight; 23 | DWORD dwWidth; 24 | DWORD dwPitchOrLinearSize; 25 | DWORD dwDepth; 26 | DWORD dwMipMapCount; 27 | DWORD dwReserved1[11]; 28 | DDS_PIXELFORMAT ddspf; 29 | DWORD dwCaps; 30 | DWORD dwCaps2; 31 | DWORD dwCaps3; 32 | DWORD dwCaps4; 33 | DWORD dwReserved2; 34 | }; 35 | 36 | enum DXGI_FORMAT { 37 | DXGI_FORMAT_UNKNOWN = 0, 38 | DXGI_FORMAT_R32G32B32A32_TYPELESS = 1, 39 | DXGI_FORMAT_R32G32B32A32_FLOAT = 2, 40 | DXGI_FORMAT_R32G32B32A32_UINT = 3, 41 | DXGI_FORMAT_R32G32B32A32_SINT = 4, 42 | DXGI_FORMAT_R32G32B32_TYPELESS = 5, 43 | DXGI_FORMAT_R32G32B32_FLOAT = 6, 44 | DXGI_FORMAT_R32G32B32_UINT = 7, 45 | DXGI_FORMAT_R32G32B32_SINT = 8, 46 | DXGI_FORMAT_R16G16B16A16_TYPELESS = 9, 47 | DXGI_FORMAT_R16G16B16A16_FLOAT = 10, 48 | DXGI_FORMAT_R16G16B16A16_UNORM = 11, 49 | DXGI_FORMAT_R16G16B16A16_UINT = 12, 50 | DXGI_FORMAT_R16G16B16A16_SNORM = 13, 51 | DXGI_FORMAT_R16G16B16A16_SINT = 14, 52 | DXGI_FORMAT_R32G32_TYPELESS = 15, 53 | DXGI_FORMAT_R32G32_FLOAT = 16, 54 | DXGI_FORMAT_R32G32_UINT = 17, 55 | DXGI_FORMAT_R32G32_SINT = 18, 56 | DXGI_FORMAT_R32G8X24_TYPELESS = 19, 57 | DXGI_FORMAT_D32_FLOAT_S8X24_UINT = 20, 58 | DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS = 21, 59 | DXGI_FORMAT_X32_TYPELESS_G8X24_UINT = 22, 60 | DXGI_FORMAT_R10G10B10A2_TYPELESS = 23, 61 | DXGI_FORMAT_R10G10B10A2_UNORM = 24, 62 | DXGI_FORMAT_R10G10B10A2_UINT = 25, 63 | DXGI_FORMAT_R11G11B10_FLOAT = 26, 64 | DXGI_FORMAT_R8G8B8A8_TYPELESS = 27, 65 | DXGI_FORMAT_R8G8B8A8_UNORM = 28, 66 | DXGI_FORMAT_R8G8B8A8_UNORM_SRGB = 29, 67 | DXGI_FORMAT_R8G8B8A8_UINT = 30, 68 | DXGI_FORMAT_R8G8B8A8_SNORM = 31, 69 | DXGI_FORMAT_R8G8B8A8_SINT = 32, 70 | DXGI_FORMAT_R16G16_TYPELESS = 33, 71 | DXGI_FORMAT_R16G16_FLOAT = 34, 72 | DXGI_FORMAT_R16G16_UNORM = 35, 73 | DXGI_FORMAT_R16G16_UINT = 36, 74 | DXGI_FORMAT_R16G16_SNORM = 37, 75 | DXGI_FORMAT_R16G16_SINT = 38, 76 | DXGI_FORMAT_R32_TYPELESS = 39, 77 | DXGI_FORMAT_D32_FLOAT = 40, 78 | DXGI_FORMAT_R32_FLOAT = 41, 79 | DXGI_FORMAT_R32_UINT = 42, 80 | DXGI_FORMAT_R32_SINT = 43, 81 | DXGI_FORMAT_R24G8_TYPELESS = 44, 82 | DXGI_FORMAT_D24_UNORM_S8_UINT = 45, 83 | DXGI_FORMAT_R24_UNORM_X8_TYPELESS = 46, 84 | DXGI_FORMAT_X24_TYPELESS_G8_UINT = 47, 85 | DXGI_FORMAT_R8G8_TYPELESS = 48, 86 | DXGI_FORMAT_R8G8_UNORM = 49, 87 | DXGI_FORMAT_R8G8_UINT = 50, 88 | DXGI_FORMAT_R8G8_SNORM = 51, 89 | DXGI_FORMAT_R8G8_SINT = 52, 90 | DXGI_FORMAT_R16_TYPELESS = 53, 91 | DXGI_FORMAT_R16_FLOAT = 54, 92 | DXGI_FORMAT_D16_UNORM = 55, 93 | DXGI_FORMAT_R16_UNORM = 56, 94 | DXGI_FORMAT_R16_UINT = 57, 95 | DXGI_FORMAT_R16_SNORM = 58, 96 | DXGI_FORMAT_R16_SINT = 59, 97 | DXGI_FORMAT_R8_TYPELESS = 60, 98 | DXGI_FORMAT_R8_UNORM = 61, 99 | DXGI_FORMAT_R8_UINT = 62, 100 | DXGI_FORMAT_R8_SNORM = 63, 101 | DXGI_FORMAT_R8_SINT = 64, 102 | DXGI_FORMAT_A8_UNORM = 65, 103 | DXGI_FORMAT_R1_UNORM = 66, 104 | DXGI_FORMAT_R9G9B9E5_SHAREDEXP = 67, 105 | DXGI_FORMAT_R8G8_B8G8_UNORM = 68, 106 | DXGI_FORMAT_G8R8_G8B8_UNORM = 69, 107 | DXGI_FORMAT_BC1_TYPELESS = 70, 108 | DXGI_FORMAT_BC1_UNORM = 71, 109 | DXGI_FORMAT_BC1_UNORM_SRGB = 72, 110 | DXGI_FORMAT_BC2_TYPELESS = 73, 111 | DXGI_FORMAT_BC2_UNORM = 74, 112 | DXGI_FORMAT_BC2_UNORM_SRGB = 75, 113 | DXGI_FORMAT_BC3_TYPELESS = 76, 114 | DXGI_FORMAT_BC3_UNORM = 77, 115 | DXGI_FORMAT_BC3_UNORM_SRGB = 78, 116 | DXGI_FORMAT_BC4_TYPELESS = 79, 117 | DXGI_FORMAT_BC4_UNORM = 80, 118 | DXGI_FORMAT_BC4_SNORM = 81, 119 | DXGI_FORMAT_BC5_TYPELESS = 82, 120 | DXGI_FORMAT_BC5_UNORM = 83, 121 | DXGI_FORMAT_BC5_SNORM = 84, 122 | DXGI_FORMAT_B5G6R5_UNORM = 85, 123 | DXGI_FORMAT_B5G5R5A1_UNORM = 86, 124 | DXGI_FORMAT_B8G8R8A8_UNORM = 87, 125 | DXGI_FORMAT_B8G8R8X8_UNORM = 88, 126 | DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM = 89, 127 | DXGI_FORMAT_B8G8R8A8_TYPELESS = 90, 128 | DXGI_FORMAT_B8G8R8A8_UNORM_SRGB = 91, 129 | DXGI_FORMAT_B8G8R8X8_TYPELESS = 92, 130 | DXGI_FORMAT_B8G8R8X8_UNORM_SRGB = 93, 131 | DXGI_FORMAT_BC6H_TYPELESS = 94, 132 | DXGI_FORMAT_BC6H_UF16 = 95, 133 | DXGI_FORMAT_BC6H_SF16 = 96, 134 | DXGI_FORMAT_BC7_TYPELESS = 97, 135 | DXGI_FORMAT_BC7_UNORM = 98, 136 | DXGI_FORMAT_BC7_UNORM_SRGB = 99, 137 | DXGI_FORMAT_AYUV = 100, 138 | DXGI_FORMAT_Y410 = 101, 139 | DXGI_FORMAT_Y416 = 102, 140 | DXGI_FORMAT_NV12 = 103, 141 | DXGI_FORMAT_P010 = 104, 142 | DXGI_FORMAT_P016 = 105, 143 | DXGI_FORMAT_420_OPAQUE = 106, 144 | DXGI_FORMAT_YUY2 = 107, 145 | DXGI_FORMAT_Y210 = 108, 146 | DXGI_FORMAT_Y216 = 109, 147 | DXGI_FORMAT_NV11 = 110, 148 | DXGI_FORMAT_AI44 = 111, 149 | DXGI_FORMAT_IA44 = 112, 150 | DXGI_FORMAT_P8 = 113, 151 | DXGI_FORMAT_A8P8 = 114, 152 | DXGI_FORMAT_B4G4R4A4_UNORM = 115, 153 | DXGI_FORMAT_P208 = 130, 154 | DXGI_FORMAT_V208 = 131, 155 | DXGI_FORMAT_V408 = 132, 156 | DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE, 157 | DXGI_FORMAT_SAMPLER_FEEDBACK_MIP_REGION_USED_OPAQUE, 158 | DXGI_FORMAT_FORCE_UINT = 0xffffffff 159 | }; 160 | 161 | enum D3D10_RESOURCE_DIMENSION { 162 | D3D10_RESOURCE_DIMENSION_UNKNOWN = 0, 163 | D3D10_RESOURCE_DIMENSION_BUFFER = 1, 164 | D3D10_RESOURCE_DIMENSION_TEXTURE1D = 2, 165 | D3D10_RESOURCE_DIMENSION_TEXTURE2D = 3, 166 | D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4 167 | }; 168 | 169 | struct DDS_HEADER_DXT10 { 170 | DXGI_FORMAT dxgiFormat; 171 | D3D10_RESOURCE_DIMENSION resourceDimension; 172 | UINT miscFlag; 173 | UINT arraySize; 174 | UINT miscFlags2; 175 | }; 176 | -------------------------------------------------------------------------------- /src/shaders/render_geometry.comp: -------------------------------------------------------------------------------- 1 | #include "common/bindings.glsl" 2 | #include "common/debug.glsl" 3 | #include "common/pbr.glsl" 4 | #include "common/util.glsl" 5 | #include "common/vbuffer.glsl" 6 | 7 | static const float4x4 bias_matrix = float4x4( 8 | float4(0.5, 0.0, 0.0, 0.0), 9 | float4(0.0, 0.5, 0.0, 0.0), 10 | float4(0.0, 0.0, 1.0, 0.0), 11 | float4(0.5, 0.5, 0.0, 1.0) 12 | ); 13 | 14 | struct Material { 15 | float3 base_color; 16 | float metallic; 17 | float roughness; 18 | }; 19 | 20 | float length_squared(float3 vec) { 21 | return dot(vec, vec); 22 | } 23 | 24 | // Adapted from http://www.thetenthplanet.de/archives/1180 25 | float3x3 compute_cotangent_frame( 26 | float3 normal, 27 | InterpolatedVector_float3 position, 28 | InterpolatedVector_float2 uv 29 | ) { 30 | float3 delta_pos_y_perp = cross(position.dy, normal); 31 | float3 delta_pos_x_perp = cross(normal, position.dx); 32 | 33 | float3 t = delta_pos_y_perp * uv.dx.x + delta_pos_x_perp * uv.dy.x; 34 | float3 b = delta_pos_y_perp * uv.dx.y + delta_pos_x_perp * uv.dy.y; 35 | 36 | float invmax = 1.0 / sqrt(max(length_squared(t), length_squared(b))); 37 | return float3x3(t * invmax, b * invmax, normal); 38 | } 39 | 40 | const static float3 AMBIENT_LEVEL = vec3(0.025); 41 | 42 | const static float3 AMBIENT_SH[9] = { 43 | AMBIENT_LEVEL, //float3(0.5,0.5,0.5), 44 | float3(0, 0, 0), 45 | AMBIENT_LEVEL, //float3(0.5,0.5,0.5), 46 | float3(0, 0, 0), 47 | float3(0, 0, 0), 48 | float3(0, 0, 0), 49 | float3(0, 0, 0), 50 | float3(0, 0, 0), 51 | float3(0, 0, 0)}; 52 | 53 | float3 evaluate_spherical_harmonic(float3 n, float3 sh[9]) { 54 | return sh[0] + sh[1] * (n.x) + sh[2] * (n.y) + sh[3] * (n.z) 55 | + sh[4] * (n.x * n.z) + sh[5] * (n.z * n.y) + sh[6] * (n.y * n.x) 56 | + sh[7] * (3.0 * n.z * n.z - 1.0) + sh[8] * (n.x * n.x - n.y * n.y); 57 | } 58 | 59 | vec4 sample_texture(uint32_t index, InterpolatedVector_float2 uv) { 60 | return textureGrad( 61 | sampler2D(textures[nonuniformEXT(index)], repeat_sampler), 62 | uv.value, 63 | uv.dx, 64 | uv.dy 65 | ); 66 | } 67 | 68 | layout(local_size_x = 8, local_size_y = 8) in; 69 | 70 | void main() { 71 | Uniforms uniforms = get_uniforms(); 72 | 73 | if (gl_GlobalInvocationID.x >= uniforms.window_size.x 74 | || gl_GlobalInvocationID.y >= uniforms.window_size.y) { 75 | return; 76 | } 77 | 78 | uint64_t start_time = clockARB(); 79 | 80 | ivec2 coord = ivec2(gl_GlobalInvocationID.xy); 81 | 82 | float2 ndc = 83 | float2(gl_GlobalInvocationID.xy) / float2(uniforms.window_size) * 2.0 84 | - 1.0; 85 | 86 | if (texelFetch(depth_buffer, coord, 0).x == 0.0) { 87 | // Get a ray in local space. 88 | float4 unprojected = uniforms.perspective_inverse * float4(ndc, 0, 1); 89 | float3 local_space_ray = normalize(unprojected.xyz); 90 | // Transform the ray into global space. 91 | float3 ray = (uniforms.view_inverse * float4(local_space_ray, 0.0)).xyz; 92 | 93 | float exposure = 3; 94 | 95 | float3 samp = 96 | textureLod(samplerCube(skybox, repeat_sampler), ray, 0.0).xyz 97 | * pow(2, exposure); 98 | 99 | imageStore(rw_scene_referred_framebuffer, coord, float4(samp, 1.0)); 100 | return; 101 | } 102 | 103 | uint32_t packed = texelFetch(visibility_buffer, coord, 0).x; 104 | uint32_t instance_index = packed & ((1 << 24) - 1); 105 | uint32_t triangle_index = packed >> 24; 106 | 107 | MeshletReference meshlet_reference = 108 | MeshletReferenceBuffer(uniforms.meshlet_references) 109 | .meshlet_reference[instance_index]; 110 | 111 | Instance instance = InstanceBuffer(uniforms.instances) 112 | .instances[meshlet_reference.instance_index]; 113 | MeshInfo mesh_info = MeshInfoBuffer(instance.mesh_info_address).mesh_info; 114 | Meshlet meshlet = MeshletBuffer(mesh_info.meshlets) 115 | .meshlets[meshlet_reference.meshlet_index]; 116 | uint32_t3 micro_indices = meshlet.index_offset 117 | + MicroIndexBuffer(mesh_info.micro_indices + meshlet.triangle_offset) 118 | .indices[triangle_index]; 119 | 120 | uint3 indices = uint3( 121 | load_index(mesh_info, micro_indices.x), 122 | load_index(mesh_info, micro_indices.y), 123 | load_index(mesh_info, micro_indices.z) 124 | ); 125 | 126 | float3 pos_a = calculate_world_pos(instance, mesh_info, indices.x); 127 | float3 pos_b = calculate_world_pos(instance, mesh_info, indices.y); 128 | float3 pos_c = calculate_world_pos(instance, mesh_info, indices.z); 129 | 130 | BarycentricDeriv bary = CalcFullBary( 131 | uniforms.combined_perspective_view * float4(pos_a, 1.0), 132 | uniforms.combined_perspective_view * float4(pos_b, 1.0), 133 | uniforms.combined_perspective_view * float4(pos_c, 1.0), 134 | ndc, 135 | uniforms.window_size 136 | ); 137 | 138 | InterpolatedVector_float3 world_pos = 139 | interpolate(bary, pos_a, pos_b, pos_c); 140 | 141 | InterpolatedVector_float2 uv = interpolate( 142 | bary, 143 | float2(QuanitizedUvs(mesh_info.uvs).uvs[indices.x]) 144 | * mesh_info.texture_scale, 145 | float2(QuanitizedUvs(mesh_info.uvs).uvs[indices.y]) 146 | * mesh_info.texture_scale, 147 | float2(QuanitizedUvs(mesh_info.uvs).uvs[indices.z]) 148 | * mesh_info.texture_scale 149 | ); 150 | // Don't need to do this per triangle uv as it doesn't affect the derivs. 151 | uv.value += mesh_info.texture_offset; 152 | 153 | float3 view_vector = normalize(uniforms.camera_pos - world_pos.value); 154 | 155 | // Load 3 x i8 with a padding byte. 156 | float3 normal = 157 | interpolate( 158 | bary, 159 | normalize( 160 | float3(QuanitizedNormals(mesh_info.normals).normals[indices.x]) 161 | ), 162 | normalize( 163 | float3(QuanitizedNormals(mesh_info.normals).normals[indices.y]) 164 | ), 165 | normalize( 166 | float3(QuanitizedNormals(mesh_info.normals).normals[indices.z]) 167 | ) 168 | ) 169 | .value; 170 | normal = normalize(instance.normal_transform * normal); 171 | 172 | // Calculate whether the triangle is back facing using the (unnormalized) geometric normal. 173 | // I tried getting this with the code from 174 | // https://registry.khronos.org/vulkan/specs/1.3-khr-extensions/html/chap22.html#tessellation-vertex-winding-order 175 | // but I had problems with large triangles that go over the edges of the screen. 176 | float3 geometric_normal = cross(pos_b - pos_a, pos_c - pos_a); 177 | bool is_back_facing = dot(geometric_normal, view_vector) < 0; 178 | 179 | // For leaves etc, we shouldn't treat them as totally opaque by just flipping 180 | // the normals like this. But it's fine for now. 181 | normal = select(is_back_facing, -normal, normal); 182 | 183 | uint32_t cascade_index; 184 | float4 shadow_coord = float4(0, 0, 0, 0); 185 | for (cascade_index = 0; cascade_index < 4; cascade_index++) { 186 | // Get the coordinate in uv space. 187 | shadow_coord = MiscStorageBuffer(uniforms.misc_storage) 188 | .misc_storage.uv_space_shadow_matrices[cascade_index] 189 | * float4(world_pos.value, 1.0); 190 | float2 offset_coord = shadow_coord.xy - 0.5; 191 | // If it's inside the cascade uv space then stop as 192 | // this is the highest quality cascade for the fragment. 193 | if (abs(offset_coord.x) < 0.5 && abs(offset_coord.y) < 0.5) { 194 | break; 195 | } 196 | } 197 | 198 | // Use the tiniest shadow bias for alpha clipped meshes as they're double sided.. 199 | // Hand-chosen for a shadow_cam_distance of 1024 with the sponza flowers in the nearest shadow frustum. 200 | float shadow_bias = select( 201 | bool(mesh_info.flags & MESH_INFO_FLAGS_ALPHA_CLIP), 202 | 0.000002, 203 | 0.0 204 | ); 205 | 206 | shadow_coord /= shadow_coord.w; 207 | float shadow_sum = 0.0; 208 | for (int x = -1; x <= 1; x++) { 209 | for (int y = -1; y <= 1; y++) { 210 | float2 offset = float2(x, y) / 1024.0; 211 | shadow_sum += texture( 212 | sampler2DArrayShadow(shadowmap, shadowmap_comparison_sampler), 213 | float4( 214 | shadow_coord.xy + offset, 215 | cascade_index, 216 | shadow_coord.z - shadow_bias 217 | ) 218 | ); 219 | } 220 | } 221 | shadow_sum /= 9.0; 222 | 223 | // If the shadow coord clips the far plane of the shadow frustum 224 | // then just ignore any shadow values. If `uniforms.shadow_cam_distance` 225 | // is high enough then this shouldn't be needed. 226 | shadow_sum = select(shadow_coord.z > 1, 1.0, shadow_sum); 227 | 228 | Material material; 229 | 230 | material.base_color = 231 | sample_texture(mesh_info.base_color_texture_index, uv).rgb 232 | * mesh_info.base_color_factor; 233 | float4 metallic_roughness_sample = 234 | sample_texture(mesh_info.metallic_roughness_texture_index, uv); 235 | material.roughness = metallic_roughness_sample.y; 236 | material.metallic = metallic_roughness_sample.z; 237 | 238 | if (mesh_info.normal_texture_index != UNUSED_TEXTURE_INDEX) { 239 | float3 map_normal = 240 | sample_texture(mesh_info.normal_texture_index, uv).xyz; 241 | map_normal = map_normal * 255.0 / 127.0 - 128.0 / 127.0; 242 | 243 | normal = normalize( 244 | compute_cotangent_frame(normal, world_pos, uv) * map_normal 245 | ); 246 | } 247 | 248 | float3 ambient_lighting = evaluate_spherical_harmonic(normal, AMBIENT_SH) 249 | * diffuse_color(material.base_color, material.metallic); 250 | float sun_intensity = PI * shadow_sum; 251 | float3 sun_lighting = BRDF( 252 | view_vector, 253 | uniforms.sun_dir, 254 | normal, 255 | material.roughness, 256 | material.metallic, 257 | material.base_color, 258 | bool(mesh_info.flags & MESH_INFO_FLAGS_ALPHA_CLIP) 259 | ) 260 | * sun_intensity; 261 | 262 | imageStore( 263 | rw_scene_referred_framebuffer, 264 | coord, 265 | float4(sun_lighting + ambient_lighting, 1.0) 266 | ); 267 | 268 | if (uniforms.debug == UNIFORMS_DEBUG_CASCADES) { 269 | float3 debug_col = DEBUG_COLOURS[cascade_index]; 270 | imageStore( 271 | rw_scene_referred_framebuffer, 272 | coord, 273 | float4(material.base_color * debug_col, 1.0) 274 | ); 275 | } else if (uniforms.debug == UNIFORMS_DEBUG_TRIANGLE_INDEX) { 276 | imageStore( 277 | rw_scene_referred_framebuffer, 278 | coord, 279 | float4(DEBUG_COLOURS[triangle_index % 10], 1.0) 280 | ); 281 | } else if (uniforms.debug == UNIFORMS_DEBUG_INSTANCE_INDEX) { 282 | imageStore( 283 | rw_scene_referred_framebuffer, 284 | coord, 285 | float4(DEBUG_COLOURS[meshlet_reference.meshlet_index % 10], 1.0) 286 | ); 287 | } else if (uniforms.debug == UNIFORMS_DEBUG_SHADER_CLOCK) { 288 | uint64_t end_time = clockARB(); 289 | 290 | float heatmapScale = 65000.0f; 291 | float deltaTimeScaled = clamp( 292 | float(timediff(start_time, end_time)) / heatmapScale, 293 | 0.0f, 294 | 1.0f 295 | ); 296 | 297 | imageStore( 298 | rw_scene_referred_framebuffer, 299 | coord, 300 | float4(temperature(deltaTimeScaled), 1.0) 301 | ); 302 | } else if (uniforms.debug == UNIFORMS_DEBUG_NORMALS) { 303 | imageStore( 304 | rw_scene_referred_framebuffer, 305 | coord, 306 | float4(normal * 0.5 + 0.5, 1.0) 307 | ); 308 | } 309 | } 310 | -------------------------------------------------------------------------------- /src/descriptor_set.cpp: -------------------------------------------------------------------------------- 1 | #include "descriptor_set.h" 2 | 3 | IndexTracker::IndexTracker() {} 4 | 5 | uint32_t IndexTracker::push() { 6 | if (!free_indices.empty()) { 7 | uint32_t index = free_indices.back(); 8 | free_indices.pop_back(); 9 | return index; 10 | } 11 | 12 | uint32_t index = next_index; 13 | next_index += 1; 14 | 15 | return index; 16 | } 17 | 18 | void IndexTracker::free(uint32_t index) { 19 | free_indices.push_back(index); 20 | } 21 | 22 | IndexTracker::~IndexTracker() { 23 | // Ensure that we've freed all images. 24 | assert(free_indices.size() == next_index); 25 | } 26 | 27 | DescriptorSetLayouts 28 | create_descriptor_set_layouts(const vk::raii::Device& device) { 29 | auto everything_bindings = std::array { 30 | // Bindless images 31 | vk::DescriptorSetLayoutBinding { 32 | .binding = 0, 33 | .descriptorType = vk::DescriptorType::eSampledImage, 34 | .descriptorCount = 512, 35 | .stageFlags = vk::ShaderStageFlagBits::eCompute 36 | | vk::ShaderStageFlagBits::eFragment, 37 | }, 38 | // scene_referred_framebuffer 39 | vk::DescriptorSetLayoutBinding { 40 | .binding = 1, 41 | .descriptorType = vk::DescriptorType::eSampledImage, 42 | .descriptorCount = 1, 43 | .stageFlags = vk::ShaderStageFlagBits::eCompute, 44 | }, 45 | // display transform LUT 46 | vk::DescriptorSetLayoutBinding { 47 | .binding = 2, 48 | .descriptorType = vk::DescriptorType::eSampledImage, 49 | .descriptorCount = 1, 50 | .stageFlags = vk::ShaderStageFlagBits::eCompute, 51 | }, 52 | // depthbuffer 53 | vk::DescriptorSetLayoutBinding { 54 | .binding = 3, 55 | .descriptorType = vk::DescriptorType::eSampledImage, 56 | .descriptorCount = 1, 57 | .stageFlags = vk::ShaderStageFlagBits::eCompute, 58 | }, 59 | // shadow map 60 | vk::DescriptorSetLayoutBinding { 61 | .binding = 4, 62 | .descriptorType = vk::DescriptorType::eSampledImage, 63 | .descriptorCount = 1, 64 | .stageFlags = vk::ShaderStageFlagBits::eCompute, 65 | }, 66 | // rw scene referred framebuffer 67 | vk::DescriptorSetLayoutBinding { 68 | .binding = 5, 69 | .descriptorType = vk::DescriptorType::eStorageImage, 70 | .descriptorCount = 1, 71 | .stageFlags = vk::ShaderStageFlagBits::eCompute, 72 | }, 73 | // visibility buffer 74 | vk::DescriptorSetLayoutBinding { 75 | .binding = 6, 76 | .descriptorType = vk::DescriptorType::eSampledImage, 77 | .descriptorCount = 1, 78 | .stageFlags = vk::ShaderStageFlagBits::eCompute, 79 | }, 80 | // clamp sampler 81 | vk::DescriptorSetLayoutBinding { 82 | .binding = 7, 83 | .descriptorType = vk::DescriptorType::eSampler, 84 | .descriptorCount = 1, 85 | .stageFlags = vk::ShaderStageFlagBits::eCompute, 86 | }, 87 | // repeat sampler 88 | vk::DescriptorSetLayoutBinding { 89 | .binding = 8, 90 | .descriptorType = vk::DescriptorType::eSampler, 91 | .descriptorCount = 1, 92 | .stageFlags = vk::ShaderStageFlagBits::eCompute 93 | | vk::ShaderStageFlagBits::eFragment, 94 | }, 95 | // Shadowmap comparison sampler 96 | vk::DescriptorSetLayoutBinding { 97 | .binding = 9, 98 | .descriptorType = vk::DescriptorType::eSampler, 99 | .descriptorCount = 1, 100 | .stageFlags = vk::ShaderStageFlagBits::eCompute, 101 | }, 102 | // skybox 103 | vk::DescriptorSetLayoutBinding { 104 | .binding = 10, 105 | .descriptorType = vk::DescriptorType::eSampledImage, 106 | .descriptorCount = 1, 107 | .stageFlags = vk::ShaderStageFlagBits::eCompute, 108 | }, 109 | }; 110 | 111 | std::vector flags(everything_bindings.size()); 112 | // Set the images as being partially bound, so not all slots have to be used. 113 | flags[0] = vk::DescriptorBindingFlagBits::ePartiallyBound; 114 | 115 | auto flags_create_info = vk::DescriptorSetLayoutBindingFlagsCreateInfo { 116 | .bindingCount = static_cast(flags.size()), 117 | .pBindingFlags = flags.data()}; 118 | 119 | auto swapchain_storage_image_bindings = std::array { 120 | vk::DescriptorSetLayoutBinding { 121 | .binding = 0, 122 | .descriptorType = vk::DescriptorType::eStorageImage, 123 | .descriptorCount = 1, 124 | .stageFlags = vk::ShaderStageFlagBits::eCompute}, 125 | }; 126 | 127 | return DescriptorSetLayouts { 128 | .everything = device.createDescriptorSetLayout({ 129 | .pNext = &flags_create_info, 130 | .bindingCount = everything_bindings.size(), 131 | .pBindings = everything_bindings.data(), 132 | }), 133 | .swapchain_storage_image = device.createDescriptorSetLayout({ 134 | .bindingCount = swapchain_storage_image_bindings.size(), 135 | .pBindings = swapchain_storage_image_bindings.data(), 136 | }), 137 | }; 138 | } 139 | 140 | uint32_t 141 | DescriptorSet::write_image(const ImageWithView& image, vk::Device device) { 142 | auto index = tracker->push(); 143 | 144 | if (index >= 512) { 145 | dbg(index); 146 | abort(); 147 | } 148 | 149 | auto image_info = vk::DescriptorImageInfo { 150 | .imageView = *image.view, 151 | .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; 152 | 153 | device.updateDescriptorSets( 154 | {vk::WriteDescriptorSet { 155 | .dstSet = *set, 156 | .dstBinding = 0, 157 | .dstArrayElement = index, 158 | .descriptorCount = 1, 159 | .descriptorType = vk::DescriptorType::eSampledImage, 160 | .pImageInfo = &image_info}}, 161 | {} 162 | ); 163 | 164 | return index; 165 | } 166 | 167 | DescriptorSet::DescriptorSet( 168 | vk::raii::DescriptorSet set_, 169 | std::vector swapchain_image_sets_ 170 | ) : 171 | set(std::move(set_)), 172 | swapchain_image_sets(std::move(swapchain_image_sets_)) {} 173 | 174 | void DescriptorSet::write_resizing_descriptors( 175 | const ResizingResources& resizing_resources, 176 | const vk::raii::Device& device, 177 | const std::vector& swapchain_image_views 178 | ) { 179 | auto image_info = vk::DescriptorImageInfo { 180 | .imageView = *resizing_resources.scene_referred_framebuffer.view, 181 | .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; 182 | 183 | auto depthbuffer_image_info = vk::DescriptorImageInfo { 184 | .imageView = *resizing_resources.depthbuffer.view, 185 | .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; 186 | 187 | auto rw_scene_referred_framebuffer_info = vk::DescriptorImageInfo { 188 | .imageView = *resizing_resources.scene_referred_framebuffer.view, 189 | .imageLayout = vk::ImageLayout::eGeneral}; 190 | 191 | auto visbuffer_image_info = vk::DescriptorImageInfo { 192 | .imageView = *resizing_resources.visbuffer.view, 193 | .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; 194 | 195 | device.updateDescriptorSets( 196 | {vk::WriteDescriptorSet { 197 | .dstSet = *set, 198 | .dstBinding = 1, 199 | .descriptorCount = 1, 200 | .descriptorType = vk::DescriptorType::eSampledImage, 201 | .pImageInfo = &image_info}, 202 | vk::WriteDescriptorSet { 203 | .dstSet = *set, 204 | .dstBinding = 3, 205 | .descriptorCount = 1, 206 | .descriptorType = vk::DescriptorType::eSampledImage, 207 | .pImageInfo = &depthbuffer_image_info}, 208 | vk::WriteDescriptorSet { 209 | .dstSet = *set, 210 | .dstBinding = 5, 211 | .descriptorCount = 1, 212 | .descriptorType = vk::DescriptorType::eStorageImage, 213 | .pImageInfo = &rw_scene_referred_framebuffer_info}, 214 | vk::WriteDescriptorSet { 215 | .dstSet = *set, 216 | .dstBinding = 6, 217 | .descriptorCount = 1, 218 | .descriptorType = vk::DescriptorType::eSampledImage, 219 | .pImageInfo = &visbuffer_image_info}}, 220 | {} 221 | ); 222 | 223 | for (uint32_t i = 0; i < swapchain_image_views.size(); i++) { 224 | auto swapchain_image_info = vk::DescriptorImageInfo { 225 | .imageView = *swapchain_image_views[i], 226 | .imageLayout = vk::ImageLayout::eGeneral}; 227 | 228 | device.updateDescriptorSets( 229 | { 230 | vk::WriteDescriptorSet { 231 | .dstSet = *swapchain_image_sets[i], 232 | .dstBinding = 0, 233 | .descriptorCount = 1, 234 | .descriptorType = vk::DescriptorType::eStorageImage, 235 | .pImageInfo = &swapchain_image_info}, 236 | }, 237 | {} 238 | ); 239 | } 240 | } 241 | 242 | void DescriptorSet::write_descriptors( 243 | const Resources& resources, 244 | const vk::raii::Device& device, 245 | const std::vector& swapchain_image_views 246 | ) { 247 | write_resizing_descriptors( 248 | resources.resizing, 249 | device, 250 | swapchain_image_views 251 | ); 252 | 253 | auto lut_image_info = vk::DescriptorImageInfo { 254 | .imageView = *resources.display_transform_lut.view, 255 | .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; 256 | 257 | auto shadowmap_image_info = vk::DescriptorImageInfo { 258 | .imageView = *resources.shadowmap.view, 259 | .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; 260 | 261 | auto clamp_sampler_info = 262 | vk::DescriptorImageInfo {.sampler = *resources.clamp_sampler}; 263 | auto repeat_sampler_info = 264 | vk::DescriptorImageInfo {.sampler = *resources.repeat_sampler}; 265 | auto shadowmap_comparison_sampler_info = vk::DescriptorImageInfo { 266 | .sampler = *resources.shadowmap_comparison_sampler}; 267 | 268 | auto skybox_image_info = vk::DescriptorImageInfo { 269 | .imageView = *resources.skybox.view, 270 | .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal}; 271 | 272 | // Write initial descriptor sets. 273 | device.updateDescriptorSets( 274 | { 275 | vk::WriteDescriptorSet { 276 | .dstSet = *set, 277 | .dstBinding = 2, 278 | .descriptorCount = 1, 279 | .descriptorType = vk::DescriptorType::eSampledImage, 280 | .pImageInfo = &lut_image_info}, 281 | vk::WriteDescriptorSet { 282 | .dstSet = *set, 283 | .dstBinding = 4, 284 | .descriptorCount = 1, 285 | .descriptorType = vk::DescriptorType::eSampledImage, 286 | .pImageInfo = &shadowmap_image_info}, 287 | 288 | vk::WriteDescriptorSet { 289 | .dstSet = *set, 290 | .dstBinding = 7, 291 | .descriptorCount = 1, 292 | .descriptorType = vk::DescriptorType::eSampler, 293 | .pImageInfo = &clamp_sampler_info}, 294 | vk::WriteDescriptorSet { 295 | .dstSet = *set, 296 | .dstBinding = 8, 297 | .descriptorCount = 1, 298 | .descriptorType = vk::DescriptorType::eSampler, 299 | .pImageInfo = &repeat_sampler_info}, 300 | vk::WriteDescriptorSet { 301 | .dstSet = *set, 302 | .dstBinding = 9, 303 | .descriptorCount = 1, 304 | .descriptorType = vk::DescriptorType::eSampler, 305 | .pImageInfo = &shadowmap_comparison_sampler_info}, 306 | vk::WriteDescriptorSet { 307 | .dstSet = *set, 308 | .dstBinding = 10, 309 | .descriptorCount = 1, 310 | .descriptorType = vk::DescriptorType::eSampledImage, 311 | .pImageInfo = &skybox_image_info}, 312 | 313 | }, 314 | {} 315 | ); 316 | } 317 | -------------------------------------------------------------------------------- /src/resources/image_loading.cpp: -------------------------------------------------------------------------------- 1 | #include "image_loading.h" 2 | 3 | #include "../allocations/persistently_mapped.h" 4 | #include "../sync.h" 5 | #include "dds.h" 6 | #include "ktx2.h" 7 | 8 | struct FormatInfo { 9 | vk::Format format; 10 | uint32_t bits_per_pixel; 11 | bool is_block_compressed = false; 12 | }; 13 | 14 | FormatInfo translate_format(DXGI_FORMAT dxgi_format) { 15 | switch (dxgi_format) { 16 | case DXGI_FORMAT_R9G9B9E5_SHAREDEXP: 17 | return { 18 | .format = vk::Format::eE5B9G9R9UfloatPack32, 19 | .bits_per_pixel = 32}; 20 | case DXGI_FORMAT_BC1_UNORM: 21 | return { 22 | .format = vk::Format::eBc1RgbSrgbBlock, 23 | .bits_per_pixel = 4, 24 | .is_block_compressed = true}; 25 | case DXGI_FORMAT_BC7_UNORM: 26 | return {// todo: this is only to work around a bug in compressonator 27 | .format = vk::Format::eBc7SrgbBlock, 28 | .bits_per_pixel = 8, 29 | .is_block_compressed = true}; 30 | case DXGI_FORMAT_BC7_UNORM_SRGB: 31 | return { 32 | .format = vk::Format::eBc7SrgbBlock, 33 | .bits_per_pixel = 8, 34 | .is_block_compressed = true}; 35 | default: 36 | dbg(dxgi_format); 37 | abort(); 38 | } 39 | } 40 | 41 | struct Dimension { 42 | vk::ImageType type; 43 | vk::ImageViewType view_type; 44 | }; 45 | 46 | Dimension translate_dimension(D3D10_RESOURCE_DIMENSION dimension) { 47 | switch (dimension) { 48 | case D3D10_RESOURCE_DIMENSION_TEXTURE3D: 49 | return { 50 | .type = vk::ImageType::e3D, 51 | .view_type = vk::ImageViewType::e3D}; 52 | case D3D10_RESOURCE_DIMENSION_TEXTURE2D: 53 | return { 54 | .type = vk::ImageType::e2D, 55 | .view_type = vk::ImageViewType::e2D}; 56 | default: 57 | dbg(dimension); 58 | assert(false); 59 | } 60 | } 61 | 62 | uint32_t round_up(uint32_t value, uint32_t round_to) { 63 | return static_cast(std::ceil(float(value) / float(round_to))) 64 | * round_to; 65 | } 66 | 67 | ImageWithView load_dds( 68 | const std::filesystem::path& filepath, 69 | vma::Allocator allocator, 70 | const vk::raii::Device& device, 71 | const vk::raii::CommandBuffer& command_buffer, 72 | uint32_t graphics_queue_family, 73 | std::vector& temp_buffers 74 | ) { 75 | if (!std::filesystem::exists(filepath)) { 76 | dbg(filepath, "does not exist"); 77 | abort(); 78 | } 79 | 80 | std::ifstream stream(filepath, std::ios::binary); 81 | 82 | assert(stream); 83 | 84 | std::array dwMagic; 85 | DDS_HEADER header; 86 | DDS_HEADER_DXT10 header10; 87 | stream.read(dwMagic.data(), sizeof dwMagic); 88 | 89 | auto expected_magic = std::array {'D', 'D', 'S', ' '}; 90 | 91 | assert(dwMagic == expected_magic); 92 | 93 | stream.read(reinterpret_cast(&header), sizeof header); 94 | 95 | stream.read(reinterpret_cast(&header10), sizeof header10); 96 | 97 | auto format = translate_format(header10.dxgiFormat); 98 | 99 | auto data_offset = sizeof dwMagic + sizeof header + sizeof header10; 100 | 101 | auto dimension = translate_dimension(header10.resourceDimension); 102 | 103 | auto width = header.dwWidth; 104 | auto height = header.dwHeight; 105 | auto depth = std::max(header.dwDepth, 1u); 106 | 107 | stream.seekg(0, stream.end); 108 | auto bytes_remaining = static_cast(stream.tellg()) - data_offset; 109 | stream.seekg(static_cast(data_offset), stream.beg); 110 | 111 | auto mip_levels = header.dwMipMapCount; 112 | bool is_cubemap = header.dwCaps2 & DDSCAPS2_CUBEMAP; 113 | 114 | auto subresource_range = vk::ImageSubresourceRange { 115 | .aspectMask = vk::ImageAspectFlagBits::eColor, 116 | .baseMipLevel = 0, 117 | .levelCount = mip_levels, 118 | .baseArrayLayer = 0, 119 | .layerCount = is_cubemap ? 6u : 1u, 120 | }; 121 | 122 | auto image_name = std::string("'") + filepath.string() + "'"; 123 | 124 | auto image = ImageWithView( 125 | vk::ImageCreateInfo { 126 | .flags = is_cubemap ? vk::ImageCreateFlagBits::eCubeCompatible 127 | : vk::ImageCreateFlagBits(0), 128 | .imageType = dimension.type, 129 | .format = format.format, 130 | .extent = 131 | vk::Extent3D { 132 | .width = width, 133 | .height = height, 134 | .depth = depth, 135 | }, 136 | .mipLevels = mip_levels, 137 | .arrayLayers = is_cubemap ? 6u : 1u, 138 | .usage = vk::ImageUsageFlagBits::eSampled 139 | | vk::ImageUsageFlagBits::eTransferDst}, 140 | allocator, 141 | device, 142 | image_name.data(), 143 | subresource_range, 144 | is_cubemap ? vk::ImageViewType::eCube : dimension.view_type 145 | ); 146 | 147 | auto staging_buffer_name = filepath.string() + " staging buffer"; 148 | 149 | auto staging_buffer = PersistentlyMappedBuffer(AllocatedBuffer( 150 | vk::BufferCreateInfo { 151 | .size = bytes_remaining, 152 | .usage = vk::BufferUsageFlagBits::eTransferSrc}, 153 | { 154 | .flags = vma::AllocationCreateFlagBits::eMapped 155 | | vma::AllocationCreateFlagBits::eHostAccessSequentialWrite, 156 | .usage = vma::MemoryUsage::eAuto, 157 | }, 158 | allocator, 159 | staging_buffer_name 160 | )); 161 | 162 | stream.read( 163 | reinterpret_cast(staging_buffer.mapped_ptr), 164 | static_cast(bytes_remaining) 165 | ); 166 | 167 | insert_color_image_barriers( 168 | command_buffer, 169 | std::array {ImageBarrier { 170 | .prev_access = THSVS_ACCESS_TRANSFER_WRITE, 171 | .next_access = THSVS_ACCESS_TRANSFER_WRITE, 172 | .discard_contents = true, 173 | .queue_family = graphics_queue_family, 174 | .image = image.image.image, 175 | .subresource_range = subresource_range}} 176 | ); 177 | 178 | uint64_t buffer_offset = 0; 179 | 180 | std::vector regions(mip_levels); 181 | 182 | for (uint32_t i = 0; i < mip_levels; i++) { 183 | auto level_width = std::max(width >> i, 1u); 184 | auto level_height = std::max(height >> i, 1u); 185 | 186 | regions[i] = vk::BufferImageCopy { 187 | .bufferOffset = buffer_offset, 188 | .imageSubresource = 189 | { 190 | .aspectMask = vk::ImageAspectFlagBits::eColor, 191 | .mipLevel = static_cast(i), 192 | .baseArrayLayer = 0, 193 | .layerCount = is_cubemap ? 6u : 1u, 194 | }, 195 | .imageExtent = vk::Extent3D { 196 | .width = level_width, 197 | .height = level_height, 198 | .depth = depth}}; 199 | 200 | // We need to round up the width and heights here because for block 201 | // compressed textures, the minimum amount of data a miplevel can use 202 | // is the equivalent of 4x4 pixels, even when the actual mip size is smaller. 203 | auto rounded_width = 204 | format.is_block_compressed ? round_up(level_width, 4) : level_width; 205 | auto rounded_height = format.is_block_compressed 206 | ? round_up(level_height, 4) 207 | : level_height; 208 | 209 | buffer_offset += 210 | (rounded_width * rounded_height * depth * (is_cubemap ? 6 : 1)) 211 | * format.bits_per_pixel / 8; 212 | } 213 | 214 | if (buffer_offset != bytes_remaining) { 215 | dbg(buffer_offset, bytes_remaining, filepath, format.bits_per_pixel); 216 | assert(buffer_offset == bytes_remaining); 217 | } 218 | 219 | command_buffer.copyBufferToImage( 220 | staging_buffer.buffer.buffer, 221 | image.image.image, 222 | vk::ImageLayout::eTransferDstOptimal, 223 | regions 224 | ); 225 | 226 | insert_color_image_barriers( 227 | command_buffer, 228 | std::array {ImageBarrier { 229 | .prev_access = THSVS_ACCESS_TRANSFER_WRITE, 230 | .next_access = 231 | THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, 232 | .queue_family = graphics_queue_family, 233 | .image = image.image.image, 234 | .subresource_range = subresource_range}} 235 | ); 236 | 237 | temp_buffers.push_back(std::move(staging_buffer.buffer)); 238 | 239 | return image; 240 | } 241 | 242 | ImageWithView load_ktx2_image( 243 | const std::filesystem::path& filepath, 244 | vma::Allocator allocator, 245 | const vk::raii::Device& device, 246 | const vk::raii::CommandBuffer& command_buffer, 247 | uint32_t graphics_queue_family, 248 | std::vector& temp_buffers 249 | ) { 250 | std::ifstream stream(filepath, std::ios::binary); 251 | 252 | std::array identifier; 253 | stream.read((char*)identifier.data(), identifier.size()); 254 | 255 | if (identifier != KTX2_IDENTIFIER) { 256 | dbg(filepath); 257 | abort(); 258 | } 259 | 260 | Ktx2Header header; 261 | 262 | stream.read((char*)&header, sizeof header); 263 | 264 | Ktx2Index index; 265 | 266 | stream.read((char*)&index, sizeof index); 267 | 268 | std::vector levels(std::max(1u, header.level_count)); 269 | 270 | vk::DeviceSize total_size = 0; 271 | 272 | for (size_t i = 0; i < std::max(1u, header.level_count); i++) { 273 | stream.read((char*)&levels[i], sizeof levels[i]); 274 | total_size += levels[i].uncompressed_byte_length; 275 | } 276 | 277 | auto subresource_range = vk::ImageSubresourceRange { 278 | .aspectMask = vk::ImageAspectFlagBits::eColor, 279 | .baseMipLevel = 0, 280 | .levelCount = header.level_count, 281 | .baseArrayLayer = 0, 282 | .layerCount = header.face_count, 283 | }; 284 | 285 | bool is_cubemap = header.face_count == 6; 286 | 287 | auto image = ImageWithView( 288 | vk::ImageCreateInfo { 289 | .flags = is_cubemap ? vk::ImageCreateFlagBits::eCubeCompatible 290 | : vk::ImageCreateFlagBits(0), 291 | .imageType = vk::ImageType::e2D, 292 | .format = header.format, 293 | .extent = 294 | vk::Extent3D { 295 | .width = header.width, 296 | .height = header.height, 297 | .depth = std::max(header.depth, 1u), 298 | }, 299 | .mipLevels = header.level_count, 300 | .arrayLayers = header.face_count, 301 | .usage = vk::ImageUsageFlagBits::eSampled 302 | | vk::ImageUsageFlagBits::eTransferDst}, 303 | allocator, 304 | device, 305 | filepath.string(), 306 | subresource_range, 307 | is_cubemap ? vk::ImageViewType::eCube : vk::ImageViewType::e2D 308 | ); 309 | 310 | auto staging_buffer = PersistentlyMappedBuffer(AllocatedBuffer( 311 | vk::BufferCreateInfo { 312 | .size = total_size, 313 | .usage = vk::BufferUsageFlagBits::eTransferSrc}, 314 | { 315 | .flags = vma::AllocationCreateFlagBits::eMapped 316 | | vma::AllocationCreateFlagBits::eHostAccessSequentialWrite, 317 | .usage = vma::MemoryUsage::eAuto, 318 | }, 319 | allocator, 320 | filepath.string() + " staging buffer" 321 | )); 322 | 323 | vk::DeviceSize offset = 0; 324 | std::vector regions(levels.size()); 325 | 326 | for (uint32_t i = 0; i < levels.size(); i++) { 327 | auto level_width = std::max(header.width >> i, 1u); 328 | auto level_height = std::max(header.height >> i, 1u); 329 | 330 | auto level = levels[i]; 331 | 332 | stream.seekg(level.byte_offset, stream.beg); 333 | 334 | if (header.supercompression_scheme 335 | == Ktx2SupercompressionScheme::Zstandard) { 336 | std::vector compressed_bytes(level.byte_length); 337 | stream.read( 338 | reinterpret_cast(compressed_bytes.data()), 339 | level.byte_length 340 | ); 341 | auto bytes_decompressed = ZSTD_decompress( 342 | reinterpret_cast(staging_buffer.mapped_ptr) + offset, 343 | level.uncompressed_byte_length, 344 | compressed_bytes.data(), 345 | level.byte_length 346 | ); 347 | assert(bytes_decompressed == level.uncompressed_byte_length); 348 | } else { 349 | assert( 350 | header.supercompression_scheme 351 | == Ktx2SupercompressionScheme::None 352 | ); 353 | 354 | stream.read( 355 | reinterpret_cast(staging_buffer.mapped_ptr) + offset, 356 | level.uncompressed_byte_length 357 | ); 358 | } 359 | regions[i] = vk::BufferImageCopy { 360 | .bufferOffset = offset, 361 | .imageSubresource = 362 | { 363 | .aspectMask = vk::ImageAspectFlagBits::eColor, 364 | .mipLevel = i, 365 | .baseArrayLayer = 0, 366 | .layerCount = header.face_count, 367 | }, 368 | .imageExtent = vk::Extent3D { 369 | .width = level_width, 370 | .height = level_height, 371 | .depth = std::max(header.depth, 1u)}}; 372 | offset += level.uncompressed_byte_length; 373 | } 374 | if (offset != total_size) { 375 | dbg(offset, total_size); 376 | abort(); 377 | } 378 | 379 | insert_color_image_barriers( 380 | command_buffer, 381 | std::array {ImageBarrier { 382 | .prev_access = THSVS_ACCESS_TRANSFER_WRITE, 383 | .next_access = THSVS_ACCESS_TRANSFER_WRITE, 384 | .discard_contents = true, 385 | .queue_family = graphics_queue_family, 386 | .image = image.image.image, 387 | .subresource_range = subresource_range}} 388 | ); 389 | 390 | command_buffer.copyBufferToImage( 391 | staging_buffer.buffer.buffer, 392 | image.image.image, 393 | vk::ImageLayout::eTransferDstOptimal, 394 | regions 395 | ); 396 | 397 | insert_color_image_barriers( 398 | command_buffer, 399 | std::array {ImageBarrier { 400 | .prev_access = THSVS_ACCESS_TRANSFER_WRITE, 401 | .next_access = 402 | THSVS_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, 403 | .queue_family = graphics_queue_family, 404 | .image = image.image.image, 405 | .subresource_range = subresource_range}} 406 | ); 407 | 408 | temp_buffers.push_back(std::move(staging_buffer.buffer)); 409 | 410 | return image; 411 | } 412 | -------------------------------------------------------------------------------- /src/pipelines.cpp: -------------------------------------------------------------------------------- 1 | #include "pipelines.h" 2 | 3 | #include "shared_cpu_gpu.h" 4 | #include "util.h" 5 | 6 | const auto RGBA_MASK = vk::ColorComponentFlagBits::eR 7 | | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB 8 | | vk::ColorComponentFlagBits::eA; 9 | 10 | const auto FILL_RASTERIZATION = vk::PipelineRasterizationStateCreateInfo { 11 | .polygonMode = vk::PolygonMode::eFill, 12 | .cullMode = vk::CullModeFlagBits::eBack, 13 | .lineWidth = 1.0f}; 14 | 15 | const auto FILL_RASTERIZATION_CULL_FRONT_FACES = 16 | vk::PipelineRasterizationStateCreateInfo { 17 | .polygonMode = vk::PolygonMode::eFill, 18 | .cullMode = vk::CullModeFlagBits::eFront, 19 | .lineWidth = 1.0f}; 20 | 21 | const auto FILL_RASTERIZATION_DOUBLE_SIDED = 22 | vk::PipelineRasterizationStateCreateInfo { 23 | .polygonMode = vk::PolygonMode::eFill, 24 | .cullMode = vk::CullModeFlagBits::eNone, 25 | .lineWidth = 1.0f}; 26 | 27 | const auto NO_MULTISAMPLING = vk::PipelineMultisampleStateCreateInfo { 28 | .rasterizationSamples = vk::SampleCountFlagBits::e1, 29 | .sampleShadingEnable = false, 30 | .minSampleShading = 1.0f, 31 | .alphaToCoverageEnable = false, 32 | .alphaToOneEnable = false, 33 | }; 34 | 35 | const std::array DEFAULT_DYNAMIC_STATES = { 36 | vk::DynamicState::eViewport, 37 | vk::DynamicState::eScissor, 38 | }; 39 | 40 | const auto DEFAULT_DYNAMIC_STATE_INFO = vk::PipelineDynamicStateCreateInfo { 41 | .dynamicStateCount = DEFAULT_DYNAMIC_STATES.size(), 42 | .pDynamicStates = DEFAULT_DYNAMIC_STATES.data()}; 43 | 44 | const auto DEFAULT_VIEWPORT_STATE = vk::PipelineViewportStateCreateInfo { 45 | .viewportCount = 1, 46 | .scissorCount = 1, 47 | }; 48 | 49 | const auto TRIANGLE_LIST_INPUT_ASSEMBLY = 50 | vk::PipelineInputAssemblyStateCreateInfo { 51 | .topology = vk::PrimitiveTopology::eTriangleList}; 52 | 53 | const vk::PipelineVertexInputStateCreateInfo EMPTY_VERTEX_INPUT = {}; 54 | 55 | const auto SINGLE_REPLACE_BLEND_ATTACHMENT = 56 | std::array {vk::PipelineColorBlendAttachmentState { 57 | .blendEnable = false, 58 | .colorWriteMask = RGBA_MASK}}; 59 | 60 | const auto EMPTY_BLEND_STATE = vk::PipelineColorBlendStateCreateInfo {}; 61 | 62 | const auto SINGLE_REPLACE_BLEND_STATE = vk::PipelineColorBlendStateCreateInfo { 63 | .logicOpEnable = false, 64 | .attachmentCount = SINGLE_REPLACE_BLEND_ATTACHMENT.size(), 65 | .pAttachments = SINGLE_REPLACE_BLEND_ATTACHMENT.data()}; 66 | 67 | const auto DEPTH_WRITE_GREATER = vk::PipelineDepthStencilStateCreateInfo { 68 | .depthTestEnable = true, 69 | .depthWriteEnable = true, 70 | .depthCompareOp = vk::CompareOp::eGreater, 71 | }; 72 | 73 | const auto DEPTH_WRITE_LESS = vk::PipelineDepthStencilStateCreateInfo { 74 | .depthTestEnable = true, 75 | .depthWriteEnable = true, 76 | .depthCompareOp = vk::CompareOp::eLess, 77 | }; 78 | 79 | vk::raii::ShaderModule create_shader_from_file( 80 | const vk::raii::Device& device, 81 | const std::filesystem::path& filepath 82 | ) { 83 | auto bytes = read_file_to_bytes(filepath); 84 | 85 | auto shader = device.createShaderModule(vk::ShaderModuleCreateInfo { 86 | .codeSize = bytes.size(), 87 | .pCode = reinterpret_cast(bytes.data()), 88 | }); 89 | 90 | return shader; 91 | } 92 | 93 | vk::raii::Pipeline name_pipeline( 94 | vk::raii::Pipeline pipeline, 95 | const vk::raii::Device& device, 96 | const std::string& name 97 | ) { 98 | std::string pipeline_name = std::string("pipeline ") + name; 99 | VkPipeline c_pipeline = *pipeline; 100 | device.setDebugUtilsObjectNameEXT(vk::DebugUtilsObjectNameInfoEXT { 101 | .objectType = vk::ObjectType::ePipeline, 102 | .objectHandle = reinterpret_cast(c_pipeline), 103 | .pObjectName = pipeline_name.data()}); 104 | return pipeline; 105 | } 106 | 107 | vk::raii::Pipeline create_compute_pipeline_from_shader( 108 | const vk::raii::Device& device, 109 | const vk::raii::PipelineLayout& layout, 110 | const std::filesystem::path& filepath 111 | ) { 112 | auto shader = create_shader_from_file(device, filepath); 113 | 114 | auto create_info = std::array {vk::ComputePipelineCreateInfo { 115 | .stage = 116 | vk::PipelineShaderStageCreateInfo { 117 | .stage = vk::ShaderStageFlagBits::eCompute, 118 | .module = *shader, 119 | .pName = "main", 120 | }, 121 | .layout = *layout}}; 122 | 123 | return name_pipeline( 124 | std::move(device.createComputePipelines(nullptr, create_info)[0]), 125 | device, 126 | filepath.string() 127 | ); 128 | } 129 | 130 | Pipelines Pipelines::compile_pipelines( 131 | const vk::raii::Device& device, 132 | const DescriptorSetLayouts& descriptor_set_layouts 133 | ) { 134 | auto descriptor_set_layout_array = std::array { 135 | *descriptor_set_layouts.everything, 136 | *descriptor_set_layouts.swapchain_storage_image}; 137 | 138 | // Simple push constant for instructing the shadow pass which shadowmap to render to. 139 | auto push_constant_ranges = std::array {vk::PushConstantRange { 140 | .stageFlags = vk::ShaderStageFlagBits::eVertex 141 | | vk::ShaderStageFlagBits::eCompute, 142 | .offset = 0, 143 | .size = sizeof(UniformBufferAddressConstant) 144 | + sizeof(UniformBufferAddressConstant)}}; 145 | 146 | auto pipeline_layout = 147 | device.createPipelineLayout(vk::PipelineLayoutCreateInfo { 148 | .setLayoutCount = descriptor_set_layout_array.size(), 149 | .pSetLayouts = descriptor_set_layout_array.data(), 150 | .pushConstantRangeCount = push_constant_ranges.size(), 151 | .pPushConstantRanges = push_constant_ranges.data(), 152 | }); 153 | 154 | auto copy_quantized_positions_push_constants = 155 | std::array {vk::PushConstantRange { 156 | .stageFlags = vk::ShaderStageFlagBits::eCompute, 157 | .offset = 0, 158 | .size = sizeof(CopyQuantizedPositionsConstant)}}; 159 | 160 | auto copy_quantized_positions_pipeline_layout = 161 | device.createPipelineLayout(vk::PipelineLayoutCreateInfo { 162 | .pushConstantRangeCount = 163 | copy_quantized_positions_push_constants.size(), 164 | .pPushConstantRanges = 165 | copy_quantized_positions_push_constants.data(), 166 | 167 | }); 168 | 169 | auto visbuffer_opaque_vertex = create_shader_from_file( 170 | device, 171 | "compiled_shaders/visbuffer_opaque_vertex.spv" 172 | ); 173 | 174 | auto visbuffer_opaque_pixel = create_shader_from_file( 175 | device, 176 | "compiled_shaders/visbuffer_opaque_pixel.spv" 177 | ); 178 | 179 | auto visbuffer_alpha_clip_pixel = create_shader_from_file( 180 | device, 181 | "compiled_shaders/visbuffer_alpha_clip_pixel.spv" 182 | ); 183 | 184 | auto visbuffer_alpha_clip_vertex = create_shader_from_file( 185 | device, 186 | "compiled_shaders/visbuffer_alpha_clip_vertex.spv" 187 | ); 188 | 189 | auto shadowmap_opaque_vertex = create_shader_from_file( 190 | device, 191 | "compiled_shaders/shadowmap_opaque_vertex.spv" 192 | ); 193 | 194 | auto shadowmap_alpha_clip_vertex = create_shader_from_file( 195 | device, 196 | "compiled_shaders/shadowmap_alpha_clip_vertex.spv" 197 | ); 198 | 199 | auto shadowmap_alpha_clipped_pixel = create_shader_from_file( 200 | device, 201 | "compiled_shaders/shadowmap_alpha_clipped_pixel.spv" 202 | ); 203 | 204 | auto visbuffer_stages = std::array { 205 | vk::PipelineShaderStageCreateInfo { 206 | .stage = vk::ShaderStageFlagBits::eVertex, 207 | .module = *visbuffer_opaque_vertex, 208 | .pName = "main", 209 | }, 210 | vk::PipelineShaderStageCreateInfo { 211 | .stage = vk::ShaderStageFlagBits::eFragment, 212 | .module = *visbuffer_opaque_pixel, 213 | .pName = "main"}}; 214 | 215 | auto visbuffer_alpha_clip_stages = std::array { 216 | vk::PipelineShaderStageCreateInfo { 217 | .stage = vk::ShaderStageFlagBits::eVertex, 218 | .module = *visbuffer_alpha_clip_vertex, 219 | .pName = "main", 220 | }, 221 | vk::PipelineShaderStageCreateInfo { 222 | .stage = vk::ShaderStageFlagBits::eFragment, 223 | .module = *visbuffer_alpha_clip_pixel, 224 | .pName = "main"}}; 225 | 226 | auto opaque_shadow_stage = std::array {vk::PipelineShaderStageCreateInfo { 227 | .stage = vk::ShaderStageFlagBits::eVertex, 228 | .module = *shadowmap_opaque_vertex, 229 | .pName = "main", 230 | }}; 231 | 232 | auto alpha_clip_shadow_stages = std::array { 233 | vk::PipelineShaderStageCreateInfo { 234 | .stage = vk::ShaderStageFlagBits::eVertex, 235 | .module = *shadowmap_alpha_clip_vertex, 236 | .pName = "main", 237 | }, 238 | vk::PipelineShaderStageCreateInfo { 239 | .stage = vk::ShaderStageFlagBits::eFragment, 240 | .module = *shadowmap_alpha_clipped_pixel, 241 | .pName = "main", 242 | }, 243 | }; 244 | 245 | auto u32 = vk::Format::eR32Uint; 246 | 247 | auto u32_format_rendering_info = vk::PipelineRenderingCreateInfoKHR { 248 | .colorAttachmentCount = 1, 249 | .pColorAttachmentFormats = &u32, 250 | .depthAttachmentFormat = vk::Format::eD32Sfloat}; 251 | 252 | auto depth_only_rendering_info = vk::PipelineRenderingCreateInfoKHR { 253 | .depthAttachmentFormat = vk::Format::eD32Sfloat}; 254 | 255 | auto graphics_pipeline_infos = std::array { 256 | // opaque shadowmaps 257 | vk::GraphicsPipelineCreateInfo { 258 | .pNext = &depth_only_rendering_info, 259 | .stageCount = opaque_shadow_stage.size(), 260 | .pStages = opaque_shadow_stage.data(), 261 | .pVertexInputState = &EMPTY_VERTEX_INPUT, 262 | .pInputAssemblyState = &TRIANGLE_LIST_INPUT_ASSEMBLY, 263 | .pViewportState = &DEFAULT_VIEWPORT_STATE, 264 | .pRasterizationState = &FILL_RASTERIZATION_CULL_FRONT_FACES, 265 | .pMultisampleState = &NO_MULTISAMPLING, 266 | .pDepthStencilState = &DEPTH_WRITE_LESS, 267 | .pColorBlendState = &EMPTY_BLEND_STATE, 268 | .pDynamicState = &DEFAULT_DYNAMIC_STATE_INFO, 269 | .layout = *pipeline_layout, 270 | }, 271 | // alpha clip shadow maps 272 | vk::GraphicsPipelineCreateInfo { 273 | .pNext = &depth_only_rendering_info, 274 | .stageCount = alpha_clip_shadow_stages.size(), 275 | .pStages = alpha_clip_shadow_stages.data(), 276 | .pVertexInputState = &EMPTY_VERTEX_INPUT, 277 | .pInputAssemblyState = &TRIANGLE_LIST_INPUT_ASSEMBLY, 278 | .pViewportState = &DEFAULT_VIEWPORT_STATE, 279 | .pRasterizationState = &FILL_RASTERIZATION_DOUBLE_SIDED, 280 | .pMultisampleState = &NO_MULTISAMPLING, 281 | .pDepthStencilState = &DEPTH_WRITE_LESS, 282 | .pColorBlendState = &EMPTY_BLEND_STATE, 283 | .pDynamicState = &DEFAULT_DYNAMIC_STATE_INFO, 284 | .layout = *pipeline_layout, 285 | }, 286 | // opaque visibility buffer 287 | vk::GraphicsPipelineCreateInfo { 288 | .pNext = &u32_format_rendering_info, 289 | .stageCount = visbuffer_stages.size(), 290 | .pStages = visbuffer_stages.data(), 291 | .pVertexInputState = &EMPTY_VERTEX_INPUT, 292 | .pInputAssemblyState = &TRIANGLE_LIST_INPUT_ASSEMBLY, 293 | .pViewportState = &DEFAULT_VIEWPORT_STATE, 294 | .pRasterizationState = &FILL_RASTERIZATION, 295 | .pMultisampleState = &NO_MULTISAMPLING, 296 | .pDepthStencilState = &DEPTH_WRITE_GREATER, 297 | .pColorBlendState = &SINGLE_REPLACE_BLEND_STATE, 298 | .pDynamicState = &DEFAULT_DYNAMIC_STATE_INFO, 299 | .layout = *pipeline_layout, 300 | }, 301 | // alpha clip visibility buffer 302 | vk::GraphicsPipelineCreateInfo { 303 | .pNext = &u32_format_rendering_info, 304 | .stageCount = visbuffer_alpha_clip_stages.size(), 305 | .pStages = visbuffer_alpha_clip_stages.data(), 306 | .pVertexInputState = &EMPTY_VERTEX_INPUT, 307 | .pInputAssemblyState = &TRIANGLE_LIST_INPUT_ASSEMBLY, 308 | .pViewportState = &DEFAULT_VIEWPORT_STATE, 309 | .pRasterizationState = &FILL_RASTERIZATION_DOUBLE_SIDED, 310 | .pMultisampleState = &NO_MULTISAMPLING, 311 | .pDepthStencilState = &DEPTH_WRITE_GREATER, 312 | .pColorBlendState = &SINGLE_REPLACE_BLEND_STATE, 313 | .pDynamicState = &DEFAULT_DYNAMIC_STATE_INFO, 314 | .layout = *pipeline_layout, 315 | }}; 316 | 317 | auto graphics_pipelines = 318 | device.createGraphicsPipelines(nullptr, graphics_pipeline_infos); 319 | 320 | return { 321 | .rasterize_shadowmap { 322 | .opaque = name_pipeline( 323 | std::move(graphics_pipelines[0]), 324 | device, 325 | "rasterize_shadowmap::opaque" 326 | ), 327 | .alpha_clip = name_pipeline( 328 | std::move(graphics_pipelines[1]), 329 | device, 330 | "rasterize_shadowmap::alpha_clip" 331 | )}, 332 | .rasterize_visbuffer = 333 | {.opaque = name_pipeline( 334 | std::move(graphics_pipelines[2]), 335 | device, 336 | "rasterize_visbuffer::opaque" 337 | ), 338 | .alpha_clip = name_pipeline( 339 | std::move(graphics_pipelines[3]), 340 | device, 341 | "rasterize_visbuffer::alpha_clip" 342 | )}, 343 | .read_depth = create_compute_pipeline_from_shader( 344 | device, 345 | pipeline_layout, 346 | "compiled_shaders/compute/read_depth.spv" 347 | ), 348 | .generate_matrices = create_compute_pipeline_from_shader( 349 | device, 350 | pipeline_layout, 351 | "compiled_shaders/compute/generate_shadow_matrices.spv" 352 | ), 353 | .write_draw_calls = create_compute_pipeline_from_shader( 354 | device, 355 | pipeline_layout, 356 | "compiled_shaders/write_draw_calls.spv" 357 | ), 358 | .display_transform = create_compute_pipeline_from_shader( 359 | device, 360 | pipeline_layout, 361 | "compiled_shaders/display_transform.spv" 362 | ), 363 | .render_geometry = create_compute_pipeline_from_shader( 364 | device, 365 | pipeline_layout, 366 | "compiled_shaders/render_geometry.spv" 367 | ), 368 | .reset_buffers_a = create_compute_pipeline_from_shader( 369 | device, 370 | pipeline_layout, 371 | "compiled_shaders/compute/reset_buffers_a.spv" 372 | ), 373 | .reset_buffers_b = create_compute_pipeline_from_shader( 374 | device, 375 | pipeline_layout, 376 | "compiled_shaders/compute/reset_buffers_b.spv" 377 | ), 378 | .reset_buffers_c = create_compute_pipeline_from_shader( 379 | device, 380 | pipeline_layout, 381 | "compiled_shaders/compute/reset_buffers_c.spv" 382 | ), 383 | .write_draw_calls_shadows = create_compute_pipeline_from_shader( 384 | device, 385 | pipeline_layout, 386 | "compiled_shaders/write_draw_calls_shadows.spv" 387 | ), 388 | .cull_instances = create_compute_pipeline_from_shader( 389 | device, 390 | pipeline_layout, 391 | "compiled_shaders/cull_instances.spv" 392 | ), 393 | .cull_instances_shadows = create_compute_pipeline_from_shader( 394 | device, 395 | pipeline_layout, 396 | "compiled_shaders/cull_instances_shadows.spv" 397 | ), 398 | .pipeline_layout = std::move(pipeline_layout), 399 | .copy_quantized_positions = create_compute_pipeline_from_shader( 400 | device, 401 | copy_quantized_positions_pipeline_layout, 402 | "compiled_shaders/compute/copy_quantized_positions.spv" 403 | ), 404 | .copy_quantized_normals = create_compute_pipeline_from_shader( 405 | device, 406 | copy_quantized_positions_pipeline_layout, 407 | "compiled_shaders/compute/copy_quantized_normals.spv" 408 | ), 409 | .copy_pipeline_layout = 410 | std::move(copy_quantized_positions_pipeline_layout), 411 | 412 | }; 413 | } 414 | -------------------------------------------------------------------------------- /src/rendering.cpp: -------------------------------------------------------------------------------- 1 | #include "rendering.h" 2 | 3 | #include "sync.h" 4 | 5 | const auto u32_max = std::numeric_limits::max(); 6 | 7 | void set_scissor_and_viewport( 8 | const vk::raii::CommandBuffer& command_buffer, 9 | uint32_t width, 10 | uint32_t height 11 | ) { 12 | command_buffer.setScissor( 13 | 0, 14 | {vk::Rect2D { 15 | .offset = {}, 16 | .extent = vk::Extent2D {.width = width, .height = height}}} 17 | ); 18 | command_buffer.setViewport( 19 | 0, 20 | {vk::Viewport { 21 | .width = static_cast(width), 22 | .height = static_cast(height), 23 | .minDepth = 0.0, 24 | .maxDepth = 1.0}} 25 | ); 26 | } 27 | 28 | void render( 29 | const vk::raii::CommandBuffer& command_buffer, 30 | const Pipelines& pipelines, 31 | const DescriptorSet& descriptor_set, 32 | const Resources& resources, 33 | vk::Image swapchain_image, 34 | const vk::raii::ImageView& swapchain_image_view, 35 | vk::Extent2D extent, 36 | uint32_t graphics_queue_family, 37 | tracy::VkCtx* tracy_ctx, 38 | uint32_t swapchain_image_index, 39 | uint64_t uniform_buffer_address 40 | ) { 41 | ZoneScoped; 42 | TracyVkZone(tracy_ctx, *command_buffer, "render"); 43 | 44 | auto dispatch_scalar = [&](const vk::raii::Pipeline& pipeline) { 45 | command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, *pipeline); 46 | command_buffer.dispatch(1, 1, 1); 47 | }; 48 | 49 | auto dispatch_indirect = [&](uint32_t index) { 50 | command_buffer.dispatchIndirect( 51 | resources.dispatches_buffer.buffer, 52 | index * sizeof(vk::DispatchIndirectCommand) 53 | ); 54 | }; 55 | 56 | command_buffer.pushConstants( 57 | *pipelines.pipeline_layout, 58 | vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eCompute, 59 | 0, 60 | {{.address = uniform_buffer_address}} 61 | ); 62 | 63 | command_buffer.bindDescriptorSets( 64 | vk::PipelineBindPoint::eCompute, 65 | *pipelines.pipeline_layout, 66 | 0, 67 | {*descriptor_set.set, 68 | *descriptor_set.swapchain_image_sets[swapchain_image_index]}, 69 | {} 70 | ); 71 | command_buffer.bindDescriptorSets( 72 | vk::PipelineBindPoint::eGraphics, 73 | *pipelines.pipeline_layout, 74 | 0, 75 | {*descriptor_set.set}, 76 | {} 77 | ); 78 | 79 | dispatch_scalar(pipelines.reset_buffers_a); 80 | 81 | insert_color_image_barriers( 82 | command_buffer, 83 | std::array { 84 | // Get depth buffer ready for rendering. 85 | ImageBarrier { 86 | .prev_access = THSVS_ACCESS_NONE, 87 | .next_access = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE, 88 | .discard_contents = true, 89 | .queue_family = graphics_queue_family, 90 | .image = resources.resizing.depthbuffer.image.image, 91 | .subresource_range = DEPTH_SUBRESOURCE_RANGE}, 92 | // Get shadowmaps ready for rendering. 93 | ImageBarrier { 94 | .prev_access = THSVS_ACCESS_NONE, 95 | .next_access = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE, 96 | .discard_contents = true, 97 | .queue_family = graphics_queue_family, 98 | .image = resources.shadowmap.image.image, 99 | .subresource_range = 100 | { 101 | .aspectMask = vk::ImageAspectFlagBits::eDepth, 102 | .baseMipLevel = 0, 103 | .levelCount = 1, 104 | .baseArrayLayer = 0, 105 | .layerCount = 4, 106 | }}, 107 | // Get framebuffer ready for writing 108 | ImageBarrier { 109 | .prev_access = THSVS_ACCESS_COMPUTE_SHADER_WRITE, 110 | .next_access = THSVS_ACCESS_COMPUTE_SHADER_WRITE, 111 | .next_layout = THSVS_IMAGE_LAYOUT_GENERAL, 112 | .discard_contents = true, 113 | .queue_family = graphics_queue_family, 114 | .image = 115 | resources.resizing.scene_referred_framebuffer.image.image}, 116 | // Get swapchain image ready for rendering. 117 | ImageBarrier { 118 | .prev_access = THSVS_ACCESS_COMPUTE_SHADER_WRITE, 119 | .next_access = THSVS_ACCESS_COMPUTE_SHADER_WRITE, 120 | .next_layout = THSVS_IMAGE_LAYOUT_GENERAL, 121 | .discard_contents = true, 122 | .queue_family = graphics_queue_family, 123 | .image = swapchain_image}, 124 | // Get visbuffer image ready for rendering. 125 | ImageBarrier { 126 | .prev_access = THSVS_ACCESS_NONE, 127 | .next_access = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE, 128 | .discard_contents = true, 129 | .queue_family = graphics_queue_family, 130 | .image = resources.resizing.visbuffer.image.image}, 131 | }, 132 | std::optional(GlobalBarrier<1, 2> { 133 | .prev_accesses = {THSVS_ACCESS_TRANSFER_WRITE}, 134 | .next_accesses = 135 | {THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER, 136 | THSVS_ACCESS_INDIRECT_BUFFER}}) 137 | ); 138 | 139 | { 140 | TracyVkZone(tracy_ctx, *command_buffer, "cull instances"); 141 | 142 | command_buffer.bindPipeline( 143 | vk::PipelineBindPoint::eCompute, 144 | *pipelines.cull_instances 145 | ); 146 | dispatch_indirect(PER_INSTANCE_DISPATCH); 147 | } 148 | 149 | insert_global_barrier( 150 | command_buffer, 151 | GlobalBarrier<1, 1> { 152 | .prev_accesses = {THSVS_ACCESS_COMPUTE_SHADER_WRITE}, 153 | .next_accesses = {THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER}} 154 | ); 155 | 156 | dispatch_scalar(pipelines.reset_buffers_b); 157 | 158 | insert_global_barrier( 159 | command_buffer, 160 | GlobalBarrier<3, 3> { 161 | .prev_accesses = 162 | {THSVS_ACCESS_COMPUTE_SHADER_WRITE, 163 | THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER, 164 | THSVS_ACCESS_INDIRECT_BUFFER}, 165 | .next_accesses = 166 | {THSVS_ACCESS_COMPUTE_SHADER_WRITE, 167 | THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER, 168 | THSVS_ACCESS_INDIRECT_BUFFER}} 169 | ); 170 | 171 | { 172 | TracyVkZone( 173 | tracy_ctx, 174 | *command_buffer, 175 | "cull meshlets and write draw calls" 176 | ); 177 | 178 | command_buffer.bindPipeline( 179 | vk::PipelineBindPoint::eCompute, 180 | *pipelines.write_draw_calls 181 | ); 182 | dispatch_indirect(PER_MESHLET_DISPATCH); 183 | } 184 | 185 | insert_global_barrier( 186 | command_buffer, 187 | GlobalBarrier<1, 1> { 188 | .prev_accesses = 189 | std::array { 190 | THSVS_ACCESS_COMPUTE_SHADER_WRITE}, 191 | .next_accesses = 192 | std::array {THSVS_ACCESS_INDIRECT_BUFFER}} 193 | ); 194 | 195 | set_scissor_and_viewport(command_buffer, extent.width, extent.height); 196 | 197 | { 198 | TracyVkZone(tracy_ctx, *command_buffer, "visbuffer rendering"); 199 | 200 | vk::RenderingAttachmentInfoKHR visbuffer_attachment_info = { 201 | .imageView = *resources.resizing.visbuffer.view, 202 | .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, 203 | .loadOp = vk::AttachmentLoadOp::eDontCare, 204 | .storeOp = vk::AttachmentStoreOp::eStore, 205 | }; 206 | vk::RenderingAttachmentInfoKHR depth_attachment_info = { 207 | .imageView = *resources.resizing.depthbuffer.view, 208 | .imageLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal, 209 | .loadOp = vk::AttachmentLoadOp::eClear, 210 | .storeOp = vk::AttachmentStoreOp::eStore, 211 | }; 212 | command_buffer.beginRendering( 213 | {.renderArea = 214 | { 215 | .offset = {}, 216 | .extent = extent, 217 | }, 218 | .layerCount = 1, 219 | .colorAttachmentCount = 1, 220 | .pColorAttachments = &visbuffer_attachment_info, 221 | .pDepthAttachment = &depth_attachment_info} 222 | ); 223 | 224 | command_buffer.bindPipeline( 225 | vk::PipelineBindPoint::eGraphics, 226 | *pipelines.rasterize_visbuffer.opaque 227 | ); 228 | { 229 | TracyVkZone( 230 | tracy_ctx, 231 | *command_buffer, 232 | "visbuffer: opaque geometry" 233 | ); 234 | 235 | command_buffer.drawIndirectCount( 236 | resources.draw_calls_buffer.buffer, 237 | sizeof(uint32_t) * 2, 238 | resources.draw_calls_buffer.buffer, 239 | 0, 240 | MAX_OPAQUE_DRAWS, 241 | sizeof(vk::DrawIndirectCommand) 242 | ); 243 | } 244 | command_buffer.bindPipeline( 245 | vk::PipelineBindPoint::eGraphics, 246 | *pipelines.rasterize_visbuffer.alpha_clip 247 | ); 248 | { 249 | TracyVkZone( 250 | tracy_ctx, 251 | *command_buffer, 252 | "visbuffer: alpha clip geometry" 253 | ); 254 | 255 | command_buffer.drawIndirectCount( 256 | resources.draw_calls_buffer.buffer, 257 | sizeof(uint32_t) * 2 258 | + ALPHA_CLIP_DRAWS_OFFSET * sizeof(vk::DrawIndirectCommand), 259 | resources.draw_calls_buffer.buffer, 260 | sizeof(uint32_t), 261 | MAX_ALPHA_CLIP_DRAWS, 262 | sizeof(vk::DrawIndirectCommand) 263 | ); 264 | } 265 | command_buffer.endRendering(); 266 | } 267 | 268 | insert_color_image_barriers( 269 | command_buffer, 270 | std::array { 271 | // Switch depthbuffer from write to read. 272 | ImageBarrier { 273 | .prev_access = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE, 274 | .next_access = 275 | THSVS_ACCESS_COMPUTE_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, 276 | .queue_family = graphics_queue_family, 277 | .image = resources.resizing.depthbuffer.image.image, 278 | .subresource_range = DEPTH_SUBRESOURCE_RANGE}, 279 | // Switch visbuffer from write to read. 280 | ImageBarrier { 281 | .prev_access = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE, 282 | .next_access = 283 | THSVS_ACCESS_COMPUTE_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, 284 | .queue_family = graphics_queue_family, 285 | .image = resources.resizing.visbuffer.image.image}, 286 | } 287 | ); 288 | 289 | { 290 | TracyVkZone(tracy_ctx, *command_buffer, "depth reduction"); 291 | command_buffer.bindPipeline( 292 | vk::PipelineBindPoint::eCompute, 293 | *pipelines.read_depth 294 | ); 295 | command_buffer.dispatch( 296 | dispatch_size(extent.width, 8 * 4), 297 | dispatch_size(extent.height, 8 * 4), 298 | 1 299 | ); 300 | } 301 | 302 | dispatch_scalar(pipelines.generate_matrices); 303 | 304 | { 305 | TracyVkZone(tracy_ctx, *command_buffer, "cull instances for shadows"); 306 | 307 | command_buffer.bindPipeline( 308 | vk::PipelineBindPoint::eCompute, 309 | *pipelines.cull_instances_shadows 310 | ); 311 | dispatch_indirect(PER_SHADOW_INSTANCE_DISPATCH); 312 | } 313 | 314 | insert_global_barrier( 315 | command_buffer, 316 | GlobalBarrier<1, 1> { 317 | .prev_accesses = 318 | std::array { 319 | THSVS_ACCESS_COMPUTE_SHADER_WRITE}, 320 | .next_accesses = 321 | std::array { 322 | THSVS_ACCESS_COMPUTE_SHADER_READ_OTHER}} 323 | ); 324 | 325 | { 326 | TracyVkZone(tracy_ctx, *command_buffer, "shadowmap rasterization"); 327 | 328 | set_scissor_and_viewport(command_buffer, 1024, 1024); 329 | 330 | for (uint32_t i = 0; i < resources.shadowmap_layer_views.size(); i++) { 331 | TracyVkZone(tracy_ctx, *command_buffer, "shadowmap inner"); 332 | 333 | command_buffer.pushConstants( 334 | *pipelines.pipeline_layout, 335 | vk::ShaderStageFlagBits::eVertex 336 | | vk::ShaderStageFlagBits::eCompute, 337 | sizeof(UniformBufferAddressConstant), 338 | {{.cascade_index = i}} 339 | ); 340 | 341 | dispatch_scalar(pipelines.reset_buffers_c); 342 | 343 | insert_global_barrier( 344 | command_buffer, 345 | GlobalBarrier<1, 1> { 346 | .prev_accesses = 347 | std::array { 348 | THSVS_ACCESS_COMPUTE_SHADER_WRITE}, 349 | .next_accesses = 350 | std::array { 351 | THSVS_ACCESS_INDIRECT_BUFFER}} 352 | ); 353 | 354 | { 355 | TracyVkZone( 356 | tracy_ctx, 357 | *command_buffer, 358 | "cull meshlets and write draw calls" 359 | ); 360 | 361 | command_buffer.bindPipeline( 362 | vk::PipelineBindPoint::eCompute, 363 | *pipelines.write_draw_calls_shadows 364 | ); 365 | dispatch_indirect(PER_MESHLET_DISPATCH); 366 | } 367 | 368 | insert_global_barrier( 369 | command_buffer, 370 | GlobalBarrier<1, 1> { 371 | .prev_accesses = 372 | std::array { 373 | THSVS_ACCESS_COMPUTE_SHADER_WRITE}, 374 | .next_accesses = 375 | std::array { 376 | THSVS_ACCESS_INDIRECT_BUFFER}} 377 | ); 378 | 379 | vk::RenderingAttachmentInfoKHR depth_attachment_info = { 380 | .imageView = *resources.shadowmap_layer_views[i], 381 | .imageLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal, 382 | .loadOp = vk::AttachmentLoadOp::eClear, 383 | .storeOp = vk::AttachmentStoreOp::eStore, 384 | .clearValue = {.depthStencil = {.depth = 1.0f}}}; 385 | command_buffer.beginRendering( 386 | {.renderArea = 387 | { 388 | .offset = {}, 389 | .extent = vk::Extent2D {.width = 1024, .height = 1024}, 390 | }, 391 | .layerCount = 1, 392 | .pDepthAttachment = &depth_attachment_info} 393 | ); 394 | command_buffer.bindPipeline( 395 | vk::PipelineBindPoint::eGraphics, 396 | *pipelines.rasterize_shadowmap.opaque 397 | ); 398 | { 399 | TracyVkZone( 400 | tracy_ctx, 401 | *command_buffer, 402 | "shadowmap: opaque geometry" 403 | ); 404 | 405 | command_buffer.drawIndirectCount( 406 | resources.draw_calls_buffer.buffer, 407 | sizeof(uint32_t) * 2, 408 | resources.draw_calls_buffer.buffer, 409 | 0, 410 | MAX_OPAQUE_DRAWS, 411 | sizeof(vk::DrawIndirectCommand) 412 | ); 413 | } 414 | command_buffer.bindPipeline( 415 | vk::PipelineBindPoint::eGraphics, 416 | *pipelines.rasterize_shadowmap.alpha_clip 417 | ); 418 | { 419 | TracyVkZone( 420 | tracy_ctx, 421 | *command_buffer, 422 | "shadowmap: alpha clip geometry" 423 | ); 424 | 425 | command_buffer.drawIndirectCount( 426 | resources.draw_calls_buffer.buffer, 427 | sizeof(uint32_t) * 2 428 | + (ALPHA_CLIP_DRAWS_OFFSET) 429 | * sizeof(vk::DrawIndirectCommand), 430 | resources.draw_calls_buffer.buffer, 431 | sizeof(uint32_t), 432 | MAX_ALPHA_CLIP_DRAWS, 433 | sizeof(vk::DrawIndirectCommand) 434 | ); 435 | } 436 | command_buffer.endRendering(); 437 | } 438 | } 439 | 440 | insert_color_image_barriers( 441 | command_buffer, 442 | std::array { 443 | // Switch shadowmap from write to read. 444 | ImageBarrier { 445 | .prev_access = THSVS_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE, 446 | .next_access = 447 | THSVS_ACCESS_COMPUTE_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, 448 | .queue_family = graphics_queue_family, 449 | .image = resources.shadowmap.image.image, 450 | .subresource_range = 451 | { 452 | .aspectMask = vk::ImageAspectFlagBits::eDepth, 453 | .baseMipLevel = 0, 454 | .levelCount = 1, 455 | .baseArrayLayer = 0, 456 | .layerCount = 4, 457 | }} 458 | 459 | } 460 | ); 461 | 462 | { 463 | TracyVkZone(tracy_ctx, *command_buffer, "render geometry"); 464 | 465 | command_buffer.bindPipeline( 466 | vk::PipelineBindPoint::eCompute, 467 | *pipelines.render_geometry 468 | ); 469 | command_buffer.dispatch( 470 | dispatch_size(extent.width, 8), 471 | dispatch_size(extent.height, 8), 472 | 1 473 | ); 474 | } 475 | 476 | insert_color_image_barriers( 477 | command_buffer, 478 | std::array { 479 | // Switch framebuffer from write to read. 480 | ImageBarrier { 481 | .prev_access = THSVS_ACCESS_COMPUTE_SHADER_WRITE, 482 | .next_access = 483 | THSVS_ACCESS_COMPUTE_SHADER_READ_SAMPLED_IMAGE_OR_UNIFORM_TEXEL_BUFFER, 484 | .prev_layout = THSVS_IMAGE_LAYOUT_GENERAL, 485 | .queue_family = graphics_queue_family, 486 | .image = 487 | resources.resizing.scene_referred_framebuffer.image.image}, 488 | } 489 | ); 490 | 491 | { 492 | TracyVkZone(tracy_ctx, *command_buffer, "display transform"); 493 | command_buffer.bindPipeline( 494 | vk::PipelineBindPoint::eCompute, 495 | *pipelines.display_transform 496 | ); 497 | command_buffer.dispatch( 498 | dispatch_size(extent.width, 8), 499 | dispatch_size(extent.height, 8), 500 | 1 501 | ); 502 | } 503 | 504 | insert_global_barrier( 505 | command_buffer, 506 | GlobalBarrier<1, 1> { 507 | .prev_accesses = {THSVS_ACCESS_COMPUTE_SHADER_WRITE}, 508 | .next_accesses = {THSVS_ACCESS_COLOR_ATTACHMENT_WRITE}} 509 | ); 510 | 511 | { 512 | TracyVkZone(tracy_ctx, *command_buffer, "imgui"); 513 | 514 | vk::RenderingAttachmentInfoKHR color_attachment_info = { 515 | .imageView = *swapchain_image_view, 516 | .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, 517 | .loadOp = vk::AttachmentLoadOp::eLoad, 518 | .storeOp = vk::AttachmentStoreOp::eStore, 519 | .clearValue = {}}; 520 | command_buffer.beginRendering( 521 | {.renderArea = 522 | { 523 | .offset = {}, 524 | .extent = extent, 525 | }, 526 | .layerCount = 1, 527 | .colorAttachmentCount = 1, 528 | .pColorAttachments = &color_attachment_info} 529 | ); 530 | 531 | ImDrawData* draw_data = ImGui::GetDrawData(); 532 | ImGui_ImplVulkan_RenderDrawData(draw_data, *command_buffer); 533 | 534 | command_buffer.endRendering(); 535 | } 536 | 537 | // Transition the swapchain image from being used as a color attachment 538 | // to presenting. Don't discard contents!! 539 | insert_color_image_barriers( 540 | command_buffer, 541 | std::array {ImageBarrier { 542 | .prev_access = THSVS_ACCESS_COLOR_ATTACHMENT_WRITE, 543 | .next_access = THSVS_ACCESS_PRESENT, 544 | .prev_layout = THSVS_IMAGE_LAYOUT_GENERAL, 545 | .queue_family = graphics_queue_family, 546 | .image = swapchain_image}} 547 | ); 548 | } 549 | --------------------------------------------------------------------------------