├── .clang-format ├── .editorconfig ├── .gitignore ├── .gitmodules ├── CHANGELOG.md ├── CMakeLists.txt ├── CONTRIBUTING ├── LICENSE ├── README.md ├── docs ├── cluster_ray_tracing.png └── sample.jpg ├── shaders ├── animupdate_normals.comp.glsl ├── animupdate_vertices.comp.glsl ├── cluster_statistics.comp.glsl ├── hbao.h ├── hbao_blur.comp.glsl ├── hbao_blur.glsl ├── hbao_blur_apply.comp.glsl ├── hbao_calc.comp.glsl ├── hbao_deinterleave.comp.glsl ├── hbao_depthlinearize.comp.glsl ├── hbao_reinterleave.comp.glsl ├── hbao_viewnormal.comp.glsl ├── render_cluster_bbox.frag.glsl ├── render_cluster_bbox.mesh.glsl ├── render_raster.frag.glsl ├── render_raster_clusters.mesh.glsl ├── render_raster_triangles.vert.glsl ├── render_raytrace.rgen.glsl ├── render_raytrace.rmiss.glsl ├── render_raytrace_clusters.rchit.glsl ├── render_raytrace_triangles.rchit.glsl ├── render_shading.glsl └── shaderio.h └── src ├── animatedclusters.cpp ├── animatedclusters.hpp ├── animatedclusters_ui.cpp ├── cgltf.cpp ├── hbao_pass.cpp ├── hbao_pass.hpp ├── main.cpp ├── renderer.cpp ├── renderer.hpp ├── renderer_raster_clusters.cpp ├── renderer_raster_triangles.cpp ├── renderer_raytrace_clusters.cpp ├── renderer_raytrace_triangles.cpp ├── resources.cpp ├── resources.hpp ├── scene.cpp ├── scene.hpp ├── scene_gltf.cpp ├── vk_nv_cluster_acc.cpp └── vk_nv_cluster_acc.h /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: LLVM 2 | AccessModifierOffset: '-2' 3 | AlignAfterOpenBracket: Align 4 | AlignConsecutiveAssignments: 'true' 5 | AlignConsecutiveDeclarations: 'true' 6 | AlignOperands: 'true' 7 | AlignTrailingComments: 'true' 8 | AllowAllParametersOfDeclarationOnNextLine: 'false' 9 | AllowShortBlocksOnASingleLine: 'false' 10 | AllowShortCaseLabelsOnASingleLine: 'false' 11 | AllowShortFunctionsOnASingleLine: Inline 12 | AllowShortIfStatementsOnASingleLine: 'false' 13 | AllowShortLoopsOnASingleLine: 'false' 14 | AlwaysBreakAfterReturnType: None 15 | AlwaysBreakBeforeMultilineStrings: 'true' 16 | AlwaysBreakTemplateDeclarations: 'true' 17 | BinPackArguments: 'true' 18 | BinPackParameters: 'false' 19 | ExperimentalAutoDetectBinPacking: 'false' 20 | BreakBeforeBinaryOperators: NonAssignment 21 | BreakBeforeBraces: Custom 22 | BreakBeforeTernaryOperators: 'false' 23 | BreakConstructorInitializersBeforeComma: 'true' 24 | ColumnLimit: '120' 25 | ConstructorInitializerAllOnOneLineOrOnePerLine: 'false' 26 | Cpp11BracedListStyle: 'true' 27 | IndentCaseLabels: 'true' 28 | IndentWidth: '2' 29 | KeepEmptyLinesAtTheStartOfBlocks: 'true' 30 | Language: Cpp 31 | MaxEmptyLinesToKeep: '2' 32 | NamespaceIndentation: None 33 | ObjCSpaceBeforeProtocolList: 'true' 34 | PointerAlignment: Left 35 | SpaceAfterCStyleCast: 'false' 36 | SpaceBeforeAssignmentOperators: 'true' 37 | SpaceBeforeParens: Never 38 | SpaceInEmptyParentheses: 'false' 39 | SpacesBeforeTrailingComments: '2' 40 | SpacesInAngles: 'false' 41 | SpacesInCStyleCastParentheses: 'false' 42 | SpacesInParentheses: 'false' 43 | SpacesInSquareBrackets: 'false' 44 | Standard: Cpp11 45 | TabWidth: '2' 46 | UseTab: Never 47 | SortIncludes: 'false' 48 | ReflowComments: 'false' 49 | BraceWrapping: { 50 | AfterClass: 'true' 51 | AfterControlStatement: 'true' 52 | AfterEnum: 'true' 53 | AfterFunction: 'true' 54 | AfterNamespace: 'false' 55 | AfterStruct: 'true' 56 | AfterUnion: 'true' 57 | BeforeCatch: 'true' 58 | BeforeElse: 'true' 59 | IndentBraces: 'false' 60 | } 61 | PenaltyExcessCharacter: 1 62 | PenaltyBreakBeforeFirstCallParameter: 40 63 | PenaltyBreakFirstLessLess: 1 64 | PenaltyBreakComment: 30 65 | PenaltyBreakString: 30 66 | PenaltyReturnTypeOnItsOwnLine: 9999 67 | BreakStringLiterals: false -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # This is the top-most editor config file 2 | root = true 3 | 4 | # Default to 2 space indentation for C/C++ files 5 | [*.{c,cpp,h,hpp,inl}] 6 | indent_size = 2 7 | indent_style = space 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ############################# 2 | # generic 3 | ############################# 4 | 5 | *.bak 6 | 7 | ############################# 8 | # spirv/sass 9 | ############################# 10 | 11 | *.spv 12 | *.spva 13 | *.sass 14 | *.sassbin 15 | 16 | ############################# 17 | #specific to the project 18 | ############################# 19 | 20 | cmake_built 21 | cmake_build 22 | build 23 | _install 24 | bin_x64 25 | downloaded_resources/ 26 | zbsgfxpack.lua 27 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/meshoptimizer"] 2 | path = external/meshoptimizer 3 | url = https://github.com/zeux/meshoptimizer.git 4 | branch = master 5 | [submodule "external/nv_cluster_builder"] 6 | path = external/nv_cluster_builder 7 | url = https://github.com/nvpro-samples/nv_cluster_builder.git 8 | branch = main 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog for vk_animated_clusters 2 | * 2025/4/30: 3 | * Automatically set preferred ray tracing build settings when animation is toggled on or off in UI. 4 | * Highlight render resolution in UI when ray tracing. 5 | * 2025/4/25: 6 | * bugfix gltf loading of meshes with multiple primitives 7 | * 2025/2/11: 8 | * Expose more cluster config options for nvidia cluster builder library. 9 | * Add option for using per-cluster vertices. Note, this increases memory quite a bit, as well as animation processing. And can be useful to have more metric to compare with. 10 | * 2025/2/4: `doAnimation` is moved to renderer config. This allows a newly added codepath for the triangle ray tracer to use BLAS compaction when animation is off. It enables more comparisons between ray traced triangles and clusters. 11 | * 2025/1/30: Initial Release -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6...3.31) 2 | 3 | get_filename_component(PROJNAME ${CMAKE_CURRENT_SOURCE_DIR} NAME) 4 | Project(${PROJNAME}) 5 | Message(STATUS "-------------------------------") 6 | Message(STATUS "Processing Project ${PROJNAME}:") 7 | 8 | 9 | ##################################################################################### 10 | # look for nvpro_core 1) as a sub-folder 2) at some other locations 11 | # this cannot be put anywhere else since we still didn't find setup.cmake yet 12 | 13 | # which nvprocore tag or branch to download if repo not found 14 | set(NVPRO_GIT_TAG main) 15 | # Where to decompress nvprocore source code if repo not found 16 | set(NVPRO_TGT_SRC_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/_deps) 17 | 18 | if(NOT BASE_DIRECTORY) 19 | find_path(BASE_DIRECTORY 20 | NAMES nvpro_core/cmake/setup.cmake 21 | PATHS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/../.. ${CMAKE_CURRENT_SOURCE_DIR}/external 22 | DOC "Directory containing nvpro_core" 23 | ) 24 | endif() 25 | if(EXISTS ${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake) 26 | set(OUTPUT_PATH ${CMAKE_CURRENT_SOURCE_DIR}/bin_x64) 27 | include(${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake) 28 | else() 29 | # nvpro_core not found, will try to download. 30 | # first find where the current sample comes from 31 | execute_process( 32 | COMMAND git config --get remote.origin.url 33 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 34 | OUTPUT_VARIABLE GIT_REPO_URL OUTPUT_STRIP_TRAILING_WHITESPACE 35 | ) 36 | # Check if "github.com" is in URL 37 | string(FIND "${GIT_REPO_URL}" "github.com" FOUND_INDEX) 38 | if (FOUND_INDEX GREATER -1) 39 | # Use regex to extract everything up to and including "github.com" 40 | string(REGEX MATCH ".*github\\.com" GIT_BASE_URL "${GIT_REPO_URL}") 41 | # construct URL 42 | string(FIND "${GIT_REPO_URL}" "git@" SSH_FOUND_INDEX) 43 | if (SSH_FOUND_INDEX GREATER -1) # ssh 44 | set(NVPRO_GIT_URL ${GIT_BASE_URL}:nvpro-samples/nvpro_core.git) 45 | else() # https 46 | set(NVPRO_GIT_URL ${GIT_BASE_URL}/nvpro-samples/nvpro_core.git) 47 | endif() 48 | if("${NVPRO_GIT_TAG}" STREQUAL "main" ) 49 | set(NVPRO_GIT_TAG master) 50 | endif() 51 | message("Sample comes from github , nvprocore is at " ${NVPRO_GIT_URL} ) 52 | else () 53 | # reconstruct the path to nvpro_core, preserving the protocol 54 | string(REGEX MATCH "^[^/]+//[^/]+/" GIT_BASE_URL "${GIT_REPO_URL}") 55 | # construct URL 56 | set(NVPRO_GIT_URL ${GIT_BASE_URL}devtechproviz/nvpro-samples/nvpro_core.git) 57 | # message("Sample comes from prod server, nvprocore is at " ${NVPRO_GIT_URL}) 58 | endif() 59 | # let's clone the commit we need, depth to 1 so that we do not download the full history 60 | execute_process( 61 | COMMAND git clone --depth 1 --branch ${NVPRO_GIT_TAG} ${NVPRO_GIT_URL} ${CMAKE_CURRENT_BINARY_DIR}/_deps/nvpro_core 62 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 63 | ) 64 | # do the search again with downloaded version, use find to be sure everyting runs ok 65 | find_path(BASE_DIRECTORY 66 | NAMES nvpro_core 67 | PATHS ${CMAKE_CURRENT_BINARY_DIR}/_deps 68 | REQUIRED 69 | DOC "Directory containing nvpro_core" 70 | ) 71 | # invoke the setup 72 | if(EXISTS ${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake) 73 | set(OUTPUT_PATH ${CMAKE_CURRENT_SOURCE_DIR}/bin_x64) 74 | include(${BASE_DIRECTORY}/nvpro_core/cmake/setup.cmake) 75 | else() 76 | message(FATAL_ERROR "could not find base directory or download nvpro_core, please set BASE_DIRECTORY to folder containing nvpro_core") 77 | endif() 78 | endif() 79 | set(NVPRO_CORE_DIR ${BASE_DIRECTORY}/nvpro_core) 80 | 81 | _add_project_definitions(${PROJNAME}) 82 | 83 | # Download the default scene 84 | download_files(FILENAMES bunny_v2.zip EXTRACT) 85 | 86 | ##################################################################################### 87 | # additions from packages needed for this sample 88 | # add refs in LIBRARIES_OPTIMIZED 89 | # add refs in LIBRARIES_DEBUG 90 | # add files in PACKAGE_SOURCE_FILES 91 | 92 | _add_package_VulkanSDK() 93 | _add_package_ShaderC() 94 | _add_package_IMGUI() 95 | 96 | #_add_package_NVML() 97 | 98 | ##################################################################################### 99 | # process the rest of some cmake code that needs to be done *after* the packages add 100 | _add_nvpro_core_lib() 101 | 102 | if(NOT TARGET nv_cluster_builder) 103 | add_subdirectory(external/nv_cluster_builder) 104 | endif() 105 | 106 | if(NOT TARGET meshoptimizer) 107 | add_subdirectory(external/meshoptimizer) 108 | endif() 109 | 110 | ##################################################################################### 111 | # Source files for this project 112 | # 113 | file(GLOB SOURCE_FILES src/*.*) 114 | file(GLOB SHADER_FILES shaders/*.glsl shaders/*.h) 115 | list(APPEND SHADER_FILES ${NVPRO_CORE_DIR}/nvvkhl/shaders/dh_sky.h) 116 | 117 | include_directories(${CMAKE_CURRENT_SOURCE_DIR}/shaders) 118 | include_directories(${NVPRO_CORE_DIR}/nvvkhl/shaders) 119 | 120 | ##################################################################################### 121 | # Executable 122 | # 123 | 124 | if(WIN32 AND NOT GLUT_FOUND) 125 | add_definitions(/wd4996) #remove printf warning 126 | add_definitions(/wd4244) #remove double to float conversion warning 127 | add_definitions(/wd4305) #remove double to float truncation warning 128 | else() 129 | add_definitions(-fpermissive) 130 | endif() 131 | add_executable(${PROJNAME} ${SOURCE_FILES} ${COMMON_SOURCE_FILES} ${PACKAGE_SOURCE_FILES} ${SHADER_FILES}) 132 | 133 | set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJNAME}) 134 | 135 | target_compile_definitions(${PROJNAME} PRIVATE NVPRO_CORE_DIR="${NVPRO_CORE_DIR}") 136 | 137 | 138 | ##################################################################################### 139 | # common source code needed for this sample 140 | # 141 | source_group(common FILES 142 | ${COMMON_SOURCE_FILES} 143 | ${PACKAGE_SOURCE_FILES} 144 | ) 145 | source_group("Shader Files" FILES ${SHADER_FILES}) 146 | source_group("Source Files" FILES ${SOURCE_FILES}) 147 | 148 | if(UNIX) 149 | set(UNIXLINKLIBS dl pthread) 150 | else() 151 | set(UNIXLINKLIBS) 152 | endif() 153 | 154 | ##################################################################################### 155 | # Linkage 156 | # 157 | 158 | target_link_libraries(${PROJNAME} ${PLATFORM_LIBRARIES} nvpro_core nv_cluster_builder meshoptimizer) 159 | 160 | foreach(DEBUGLIB ${LIBRARIES_DEBUG}) 161 | target_link_libraries(${PROJNAME} debug ${DEBUGLIB}) 162 | endforeach(DEBUGLIB) 163 | 164 | foreach(RELEASELIB ${LIBRARIES_OPTIMIZED}) 165 | target_link_libraries(${PROJNAME} optimized ${RELEASELIB}) 166 | endforeach(RELEASELIB) 167 | 168 | ##################################################################################### 169 | # copies binaries that need to be put next to the exe files (ZLib, etc.) 170 | # 171 | 172 | _finalize_target( ${PROJNAME} ) 173 | 174 | install(FILES ${SHADER_FILES} CONFIGURATIONS Release DESTINATION "bin_${ARCH}/GLSL_${PROJNAME}") 175 | install(FILES ${SHADER_FILES} CONFIGURATIONS Debug DESTINATION "bin_${ARCH}_debug/GLSL_${PROJNAME}") 176 | -------------------------------------------------------------------------------- /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | https://developercertificate.org/ 2 | 3 | Developer Certificate of Origin 4 | Version 1.1 5 | 6 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 7 | 8 | Everyone is permitted to copy and distribute verbatim copies of this 9 | license document, but changing it is not allowed. 10 | 11 | 12 | Developer's Certificate of Origin 1.1 13 | 14 | By making a contribution to this project, I certify that: 15 | 16 | (a) The contribution was created in whole or in part by me and I 17 | have the right to submit it under the open source license 18 | indicated in the file; or 19 | 20 | (b) The contribution is based upon previous work that, to the best 21 | of my knowledge, is covered under an appropriate open source 22 | license and I have the right under that license to submit that 23 | work with modifications, whether created in whole or in part 24 | by me, under the same open source license (unless I am 25 | permitted to submit under a different license), as indicated 26 | in the file; or 27 | 28 | (c) The contribution was provided directly to me by some other 29 | person who certified (a), (b) or (c) and I have not modified 30 | it. 31 | 32 | (d) I understand and agree that this project and the contribution 33 | are public and that a record of the contribution (including all 34 | personal information I submit with it, including my sign-off) is 35 | maintained indefinitely and may be redistributed consistent with 36 | this project or the open source license(s) involved. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /docs/cluster_ray_tracing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/vk_animated_clusters/389ddf0b776f811b2209fdc0fc2fd83cd197675e/docs/cluster_ray_tracing.png -------------------------------------------------------------------------------- /docs/sample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nvpro-samples/vk_animated_clusters/389ddf0b776f811b2209fdc0fc2fd83cd197675e/docs/sample.jpg -------------------------------------------------------------------------------- /shaders/animupdate_normals.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | 22 | #extension GL_GOOGLE_include_directive : enable 23 | 24 | #extension GL_EXT_nonuniform_qualifier : require 25 | #extension GL_EXT_shader_16bit_storage : require 26 | #extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 27 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 28 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 29 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 30 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 31 | #extension GL_EXT_shader_atomic_int64 : enable 32 | #extension GL_EXT_buffer_reference : enable 33 | #extension GL_EXT_scalar_block_layout : enable 34 | #extension GL_EXT_shader_atomic_float : enable 35 | 36 | #include "shaderio.h" 37 | 38 | layout(push_constant) uniform animationConstantsPush 39 | { 40 | AnimationConstants constants; 41 | }; 42 | 43 | layout(buffer_reference, scalar) readonly buffer RenderInstances_in 44 | { 45 | RenderInstance instances[]; 46 | }; 47 | 48 | layout(buffer_reference, scalar) readonly buffer U32Buffer 49 | { 50 | uint32_t i[]; 51 | }; 52 | 53 | layout(buffer_reference, scalar) buffer F32Buffer 54 | { 55 | float v[]; 56 | }; 57 | 58 | layout(local_size_x = ANIMATION_WORKGROUP_SIZE, local_size_y = 1, local_size_z = 1) in; 59 | void main() 60 | { 61 | uint32_t index = gl_GlobalInvocationID.x; 62 | 63 | RenderInstance instance = RenderInstances_in(constants.renderInstances).instances[constants.instanceIndex]; 64 | 65 | if(index >= instance.numTriangles) 66 | { 67 | return; 68 | } 69 | 70 | vec3 vertices[3]; 71 | for(uint32_t i = 0; i < 3; i++) 72 | { 73 | uint32_t vertexIndex = U32Buffer(instance.triangles).i[3 * index + i]; 74 | for(uint32_t axis = 0; axis < 3; axis++) 75 | { 76 | vertices[i][axis] = F32Buffer(instance.positions).v[3 * vertexIndex + axis]; 77 | } 78 | } 79 | 80 | vec3 e0 = vertices[1] - vertices[0]; 81 | vec3 e1 = vertices[2] - vertices[0]; 82 | 83 | vec3 n = normalize(cross(e0, e1)); 84 | 85 | for(uint32_t i = 0; i < 3; i++) 86 | { 87 | uint32_t vertexIndex = U32Buffer(instance.triangles).i[3 * index + i]; 88 | 89 | for(uint32_t axis = 0; axis < 3; axis++) 90 | { 91 | atomicAdd(F32Buffer(instance.normals).v[3 * vertexIndex + axis], n[axis]); 92 | } 93 | } 94 | } -------------------------------------------------------------------------------- /shaders/animupdate_vertices.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | 22 | #extension GL_GOOGLE_include_directive : enable 23 | 24 | #extension GL_EXT_nonuniform_qualifier : require 25 | #extension GL_EXT_shader_16bit_storage : require 26 | #extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 27 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 28 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 29 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 30 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 31 | #extension GL_EXT_shader_atomic_int64 : enable 32 | #extension GL_EXT_buffer_reference : enable 33 | #extension GL_EXT_scalar_block_layout : enable 34 | 35 | #include "shaderio.h" 36 | #define M_PI 3.14159265358979323f 37 | 38 | layout(push_constant) uniform animationConstantsPush 39 | { 40 | AnimationConstants constants; 41 | }; 42 | 43 | layout(buffer_reference, scalar) readonly buffer RenderInstances_in 44 | { 45 | RenderInstance instances[]; 46 | }; 47 | 48 | layout(buffer_reference, buffer_reference_align = 4, scalar) buffer F32Buffer 49 | { 50 | float v[]; 51 | }; 52 | 53 | layout(local_size_x = ANIMATION_WORKGROUP_SIZE, local_size_y = 1, local_size_z = 1) in; 54 | 55 | mat3 rotationMatrix(vec3 axis, float angle) 56 | { 57 | axis = normalize(axis); 58 | float s = sin(angle); 59 | float c = cos(angle); 60 | float oc = 1.0 - c; 61 | 62 | return mat3(oc * axis.x * axis.x + c, oc * axis.x * axis.y - axis.z * s, oc * axis.z * axis.x + axis.y * s, 63 | oc * axis.x * axis.y + axis.z * s, oc * axis.y * axis.y + c, oc * axis.y * axis.z - axis.x * s, 64 | oc * axis.z * axis.x - axis.y * s, oc * axis.y * axis.z + axis.x * s, oc * axis.z * axis.z + c); 65 | } 66 | 67 | 68 | void main() 69 | { 70 | uint32_t index = gl_GlobalInvocationID.x; 71 | 72 | RenderInstance instance = RenderInstances_in(constants.renderInstances).instances[constants.instanceIndex]; 73 | 74 | 75 | if(index >= instance.numVertices) 76 | { 77 | return; 78 | } 79 | 80 | float seed = float(instance.positions) / float(~0u); 81 | vec3 originalVertex; 82 | for(uint32_t i = 0; i < 3; i++) 83 | { 84 | 85 | float coord = F32Buffer(instance.originalPositions).v[3 * index + i]; 86 | originalVertex[i] = coord; 87 | F32Buffer(instance.normals).v[3 * index + i] = 0.f; 88 | } 89 | 90 | vec3 newVertex = originalVertex; 91 | 92 | if(constants.rippleEnabled != 0 && constants.animationState != 0.f) 93 | { 94 | 95 | float maxCoord = max(abs(originalVertex.x), max(abs(originalVertex.y), abs(originalVertex.z))); 96 | 97 | float frequency = constants.rippleFrequency / constants.geometrySize; 98 | 99 | vec3 wave = vec3(sin(maxCoord * frequency + seed + constants.animationState * constants.rippleSpeed), 100 | cos(maxCoord * frequency * 3 + seed + constants.animationState * constants.rippleSpeed), 101 | sin(maxCoord * frequency * 1.2f + seed + constants.animationState * constants.rippleSpeed)); 102 | newVertex += (normalize(originalVertex.zyx)) * (constants.rippleAmplitude * constants.geometrySize * wave); 103 | } 104 | 105 | if(constants.twistEnabled != 0 && constants.animationState != 0.f) 106 | { 107 | float time = constants.animationState * constants.twistSpeed; 108 | float stage = mod(time, 3.f); 109 | vec3 axis; 110 | for(uint32_t i = 0; i < 3; i++) 111 | { 112 | if(stage >= i && stage <= i + 1) 113 | { 114 | axis[i] = 1; 115 | } 116 | else 117 | { 118 | axis[i] = 0; 119 | } 120 | } 121 | 122 | float angle = (sin(time * 2.f * M_PI) * length(originalVertex / (constants.geometrySize * .5f)) * constants.twistMaxAngle); 123 | 124 | mat3 rotation = rotationMatrix(axis, angle); 125 | newVertex = rotation * newVertex; 126 | } 127 | 128 | 129 | for(uint32_t i = 0; i < 3; i++) 130 | { 131 | F32Buffer(instance.positions).v[3 * index + i] = newVertex[i]; 132 | } 133 | } -------------------------------------------------------------------------------- /shaders/cluster_statistics.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | 22 | #extension GL_GOOGLE_include_directive : enable 23 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 24 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 25 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 26 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 27 | #extension GL_EXT_buffer_reference : enable 28 | #extension GL_EXT_buffer_reference2 : enable 29 | #extension GL_EXT_scalar_block_layout : enable 30 | 31 | #include "shaderio.h" 32 | 33 | layout(local_size_x = STATISTICS_WORKGROUP_SIZE) in; 34 | layout(push_constant, scalar) uniform pushConstant { 35 | StatisticsConstants push; 36 | }; 37 | 38 | void main() 39 | { 40 | if (gl_GlobalInvocationID.x < push.count) 41 | { 42 | atomicAdd(push.sum.d[0], uint64_t(push.sizes.d[gl_GlobalInvocationID.x])); 43 | }; 44 | } -------------------------------------------------------------------------------- /shaders/hbao.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2023 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #ifndef NVHBAO_H_ 21 | #define NVHBAO_H_ 22 | 23 | #define NVHBAO_RANDOMTEX_SIZE 4 24 | #define NVHBAO_NUM_DIRECTIONS 8 25 | 26 | #define NVHBAO_MAIN_UBO 0 27 | #define NVHBAO_MAIN_TEX_DEPTH 1 28 | #define NVHBAO_MAIN_TEX_LINDEPTH 2 29 | #define NVHBAO_MAIN_TEX_VIEWNORMAL 3 30 | #define NVHBAO_MAIN_TEX_DEPTHARRAY 4 31 | #define NVHBAO_MAIN_TEX_RESULTARRAY 5 32 | #define NVHBAO_MAIN_TEX_RESULT 6 33 | #define NVHBAO_MAIN_TEX_BLUR 7 34 | #define NVHBAO_MAIN_IMG_LINDEPTH 8 35 | #define NVHBAO_MAIN_IMG_VIEWNORMAL 9 36 | #define NVHBAO_MAIN_IMG_DEPTHARRAY 10 37 | #define NVHBAO_MAIN_IMG_RESULTARRAY 11 38 | #define NVHBAO_MAIN_IMG_RESULT 12 39 | #define NVHBAO_MAIN_IMG_BLUR 13 40 | #define NVHBAO_MAIN_IMG_OUT 14 41 | 42 | #ifndef NVHBAO_BLUR 43 | #define NVHBAO_BLUR 1 44 | #endif 45 | 46 | // 1 is slower 47 | #ifndef NVHBAO_SKIP_INTERPASS 48 | #define NVHBAO_SKIP_INTERPASS 0 49 | #endif 50 | 51 | #ifdef __cplusplus 52 | namespace glsl { 53 | using namespace glm; 54 | #endif 55 | 56 | struct NVHBAOData 57 | { 58 | float RadiusToScreen; // radius 59 | float R2; // 1/radius 60 | float NegInvR2; // radius * radius 61 | float NDotVBias; 62 | 63 | vec2 InvFullResolution; 64 | vec2 InvQuarterResolution; 65 | 66 | ivec2 SourceResolutionScale; 67 | float AOMultiplier; 68 | float PowExponent; 69 | 70 | vec4 projReconstruct; 71 | vec4 projInfo; 72 | int projOrtho; 73 | int _pad0; 74 | ivec2 _pad1; 75 | 76 | ivec2 FullResolution; 77 | ivec2 QuarterResolution; 78 | 79 | mat4 InvProjMatrix; 80 | 81 | vec4 float2Offsets[NVHBAO_RANDOMTEX_SIZE * NVHBAO_RANDOMTEX_SIZE]; 82 | vec4 jitters[NVHBAO_RANDOMTEX_SIZE * NVHBAO_RANDOMTEX_SIZE]; 83 | }; 84 | 85 | // keep all these equal size 86 | struct NVHBAOMainPush 87 | { 88 | int layer; 89 | int _pad0; 90 | ivec2 _pad1; 91 | }; 92 | 93 | struct NVHBAOBlurPush 94 | { 95 | vec2 invResolutionDirection; 96 | float sharpness; 97 | float _pad; 98 | }; 99 | 100 | #ifdef __cplusplus 101 | } 102 | #else 103 | 104 | layout(std140, binding = NVHBAO_MAIN_UBO) uniform controlBuffer 105 | { 106 | NVHBAOData control; 107 | }; 108 | 109 | #ifndef NVHABO_GFX 110 | 111 | layout(local_size_x = 32, local_size_y = 2) in; 112 | 113 | bool setupCoord(inout ivec2 coord, inout vec2 texCoord, ivec2 res, vec2 invRes) 114 | { 115 | ivec2 base = ivec2(gl_WorkGroupID.xy) * 8; 116 | ivec2 subset = ivec2(int(gl_LocalInvocationID.x) & 1, int(gl_LocalInvocationID.x) / 2); 117 | subset += gl_LocalInvocationID.x >= 16 ? ivec2(2, -8) : ivec2(0, 0); 118 | subset += ivec2(gl_LocalInvocationID.y * 4, 0); 119 | 120 | coord = base + subset; 121 | 122 | if(coord.x >= res.x || coord.y >= res.y) 123 | return true; 124 | 125 | texCoord = (vec2(coord) + vec2(0.5)) * invRes; 126 | 127 | return false; 128 | } 129 | 130 | bool setupCoordFull(inout ivec2 coord, inout vec2 texCoord) 131 | { 132 | return setupCoord(coord, texCoord, control.FullResolution, control.InvFullResolution); 133 | } 134 | 135 | bool setupCoordQuarter(inout ivec2 coord, inout vec2 texCoord) 136 | { 137 | return setupCoord(coord, texCoord, control.QuarterResolution, control.InvQuarterResolution); 138 | } 139 | 140 | #endif 141 | 142 | #endif 143 | #endif 144 | -------------------------------------------------------------------------------- /shaders/hbao_blur.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2023 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | #extension GL_GOOGLE_include_directive : enable 22 | #extension GL_EXT_control_flow_attributes : require 23 | 24 | #include "hbao.h" 25 | 26 | layout(binding=NVHBAO_MAIN_IMG_BLUR, rg16f) uniform image2D imgBlur; 27 | layout(binding=NVHBAO_MAIN_TEX_RESULT) uniform sampler2D texSource; 28 | 29 | #include "hbao_blur.glsl" 30 | 31 | //------------------------------------------------------------------------- 32 | 33 | void main() 34 | { 35 | ivec2 intCoord; 36 | vec2 texCoord; 37 | 38 | if (setupCoordFull(intCoord, texCoord)) return; 39 | 40 | vec2 res = BlurRun(texCoord); 41 | imageStore(imgBlur, intCoord, vec4(res,0,0)); 42 | } 43 | -------------------------------------------------------------------------------- /shaders/hbao_blur.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2023 NVIDIA CORPORATION 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | layout(push_constant) uniform pushData { 21 | NVHBAOBlurPush blur; 22 | }; 23 | 24 | 25 | const float KERNEL_RADIUS = 3; 26 | 27 | //------------------------------------------------------------------------- 28 | 29 | float BlurFunction(vec2 uv, float r, float center_c, float center_d, inout float w_total) 30 | { 31 | vec2 aoz = texture(texSource, uv).xy; 32 | float c = aoz.x; 33 | float d = aoz.y; 34 | 35 | const float BlurSigma = float(KERNEL_RADIUS) * 0.5; 36 | const float BlurFalloff = 1.0 / (2.0*BlurSigma*BlurSigma); 37 | 38 | float ddiff = (d - center_d) * blur.sharpness; 39 | float w = exp2(-r*r*BlurFalloff - ddiff*ddiff); 40 | w_total += w; 41 | 42 | return c*w; 43 | } 44 | 45 | vec2 BlurRun(vec2 texCoord) 46 | { 47 | vec2 aoz = texture(texSource, texCoord).xy; 48 | float center_c = aoz.x; 49 | float center_d = aoz.y; 50 | 51 | float c_total = center_c; 52 | float w_total = 1.0; 53 | 54 | [[unroll]] 55 | for (float r = 1; r <= KERNEL_RADIUS; ++r) 56 | { 57 | vec2 uv = texCoord + blur.invResolutionDirection * r; 58 | c_total += BlurFunction(uv, r, center_c, center_d, w_total); 59 | } 60 | 61 | [[unroll]] 62 | for (float r = 1; r <= KERNEL_RADIUS; ++r) 63 | { 64 | vec2 uv = texCoord - blur.invResolutionDirection * r; 65 | c_total += BlurFunction(uv, r, center_c, center_d, w_total); 66 | } 67 | 68 | return vec2(c_total/w_total, center_d); 69 | //return vec2(aoz); 70 | } 71 | -------------------------------------------------------------------------------- /shaders/hbao_blur_apply.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2023, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | #extension GL_GOOGLE_include_directive : enable 22 | #extension GL_EXT_control_flow_attributes : require 23 | #extension GL_EXT_shader_image_load_formatted : require 24 | 25 | #include "hbao.h" 26 | 27 | layout(binding=NVHBAO_MAIN_IMG_OUT) uniform image2D imgOut; 28 | layout(binding=NVHBAO_MAIN_TEX_BLUR) uniform sampler2D texSource; 29 | 30 | #include "hbao_blur.glsl" 31 | 32 | //------------------------------------------------------------------------- 33 | 34 | 35 | void main() 36 | { 37 | ivec2 intCoord; 38 | vec2 texCoord; 39 | 40 | if (setupCoordFull(intCoord, texCoord)) return; 41 | 42 | vec2 res = BlurRun(texCoord); 43 | vec4 color = imageLoad(imgOut, intCoord); 44 | imageStore(imgOut, intCoord, vec4( vec3(color.xyz * res.x), 1)); 45 | } 46 | -------------------------------------------------------------------------------- /shaders/hbao_calc.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2023, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | /* 21 | Based on DeinterleavedTexturing sample by Louis Bavoil 22 | https://github.com/NVIDIAGameWorks/D3DSamples/tree/master/samples/DeinterleavedTexturing 23 | 24 | */ 25 | 26 | #version 460 27 | #extension GL_GOOGLE_include_directive : enable 28 | #extension GL_EXT_control_flow_attributes : require 29 | 30 | #include "hbao.h" 31 | 32 | layout(push_constant) uniform pushData { 33 | NVHBAOMainPush push; 34 | }; 35 | 36 | #define M_PI 3.14159265f 37 | 38 | // tweakables 39 | const float NUM_STEPS = 12; 40 | const float NUM_DIRECTIONS = NVHBAO_NUM_DIRECTIONS; // texRandom/g_Jitter initialization depends on this 41 | 42 | layout(binding=NVHBAO_MAIN_TEX_DEPTHARRAY) uniform sampler2DArray texLinearDepth; 43 | layout(binding=NVHBAO_MAIN_TEX_VIEWNORMAL) uniform sampler2D texViewNormal; 44 | 45 | 46 | #if NVHBAO_SKIP_INTERPASS 47 | #if NVHBAO_BLUR 48 | layout(binding=NVHBAO_MAIN_IMG_RESULT,rg16f) uniform image2D imgOutput; 49 | #else 50 | layout(binding=NVHBAO_MAIN_IMG_RESULT,r8) uniform image2D imgOutput; 51 | #endif 52 | void outputColor(ivec2 icoord, vec4 color) 53 | { 54 | icoord = icoord * 4 + ivec2(push.layer & 3, push.layer / 4); 55 | if (icoord.x < control.FullResolution.x && icoord.y < control.FullResolution.y){ 56 | imageStore(imgOutput, icoord, color); 57 | } 58 | } 59 | #else 60 | #if NVHBAO_BLUR 61 | layout(binding=NVHBAO_MAIN_IMG_RESULTARRAY,rg16f) uniform image2DArray imgOutput; 62 | #else 63 | layout(binding=NVHBAO_MAIN_IMG_RESULTARRAY,r8) uniform image2DArray imgOutput; 64 | #endif 65 | void outputColor(ivec2 icoord, vec4 color) 66 | { 67 | imageStore(imgOutput, ivec3(icoord, push.layer), color); 68 | } 69 | #endif 70 | 71 | 72 | vec2 g_Float2Offset = control.float2Offsets[push.layer].xy; 73 | vec4 g_Jitter = control.jitters[push.layer]; 74 | 75 | vec3 getQuarterCoord(vec2 UV){ 76 | return vec3(UV,float(push.layer)); 77 | } 78 | 79 | 80 | //---------------------------------------------------------------------------------- 81 | 82 | vec3 UVToView(vec2 uv, float eye_z) 83 | { 84 | return vec3((uv * control.projInfo.xy + control.projInfo.zw) * (control.projOrtho != 0 ? 1. : eye_z), eye_z); 85 | } 86 | 87 | vec3 FetchQuarterResViewPos(vec2 UV) 88 | { 89 | float ViewDepth = textureLod(texLinearDepth,getQuarterCoord(UV),0).x; 90 | return UVToView(UV, ViewDepth); 91 | } 92 | 93 | //---------------------------------------------------------------------------------- 94 | float Falloff(float DistanceSquare) 95 | { 96 | // 1 scalar mad instruction 97 | return DistanceSquare * control.NegInvR2 + 1.0; 98 | } 99 | 100 | //---------------------------------------------------------------------------------- 101 | // P = view-space position at the kernel center 102 | // N = view-space normal at the kernel center 103 | // S = view-space position of the current sample 104 | //---------------------------------------------------------------------------------- 105 | float ComputeAO(vec3 P, vec3 N, vec3 S) 106 | { 107 | vec3 V = S - P; 108 | float VdotV = dot(V, V); 109 | float NdotV = dot(N, V) * 1.0/sqrt(VdotV); 110 | 111 | // Use saturate(x) instead of max(x,0.f) because that is faster on Kepler 112 | return clamp(NdotV - control.NDotVBias,0,1) * clamp(Falloff(VdotV),0,1); 113 | } 114 | 115 | //---------------------------------------------------------------------------------- 116 | vec2 RotateDirection(vec2 Dir, vec2 CosSin) 117 | { 118 | return vec2(Dir.x*CosSin.x - Dir.y*CosSin.y, 119 | Dir.x*CosSin.y + Dir.y*CosSin.x); 120 | } 121 | 122 | //---------------------------------------------------------------------------------- 123 | vec4 GetJitter() 124 | { 125 | // Get the current jitter vector from the per-pass constant buffer 126 | return g_Jitter; 127 | } 128 | 129 | //---------------------------------------------------------------------------------- 130 | float ComputeCoarseAO(vec2 FullResUV, float RadiusPixels, vec4 Rand, vec3 ViewPosition, vec3 ViewNormal) 131 | { 132 | RadiusPixels /= 4.0; 133 | 134 | // Divide by NUM_STEPS+1 so that the farthest samples are not fully attenuated 135 | float StepSizePixels = RadiusPixels / (NUM_STEPS + 1); 136 | 137 | const float Alpha = 2.0 * M_PI / NUM_DIRECTIONS; 138 | float AO = 0; 139 | 140 | [[unroll]] 141 | for (float DirectionIndex = 0; DirectionIndex < NUM_DIRECTIONS; ++DirectionIndex) 142 | { 143 | float Angle = Alpha * DirectionIndex; 144 | 145 | // Compute normalized 2D direction 146 | vec2 Direction = RotateDirection(vec2(cos(Angle), sin(Angle)), Rand.xy); 147 | 148 | // Jitter starting sample within the first step 149 | float RayPixels = (Rand.z * StepSizePixels + 1.0); 150 | 151 | for (float StepIndex = 0; StepIndex < NUM_STEPS; ++StepIndex) 152 | { 153 | vec2 SnappedUV = round(RayPixels * Direction) * control.InvQuarterResolution + FullResUV; 154 | vec3 S = FetchQuarterResViewPos(SnappedUV); 155 | 156 | RayPixels += StepSizePixels; 157 | 158 | AO += ComputeAO(ViewPosition, ViewNormal, S); 159 | } 160 | } 161 | 162 | AO *= control.AOMultiplier / (NUM_DIRECTIONS * NUM_STEPS); 163 | return clamp(1.0 - AO * 2.0,0,1); 164 | } 165 | 166 | //---------------------------------------------------------------------------------- 167 | void main() 168 | { 169 | ivec2 intCoord; 170 | vec2 texCoord; 171 | 172 | if (setupCoordQuarter(intCoord, texCoord)) return; 173 | 174 | vec2 base = vec2(intCoord.xy) * 4.0 + g_Float2Offset; 175 | vec2 uv = base * (control.InvQuarterResolution / 4.0); 176 | 177 | vec3 ViewPosition = FetchQuarterResViewPos(uv); 178 | vec4 NormalAndAO = texelFetch( texViewNormal, ivec2(base), 0); 179 | vec3 ViewNormal = -(NormalAndAO.xyz * 2.0 - 1.0); 180 | 181 | // Compute projection of disk of radius control.R into screen space 182 | float RadiusPixels = control.RadiusToScreen / (control.projOrtho != 0 ? 1.0 : ViewPosition.z); 183 | 184 | // Get jitter vector for the current full-res pixel 185 | vec4 Rand = GetJitter(); 186 | 187 | float AO = ComputeCoarseAO(uv, RadiusPixels, Rand, ViewPosition, ViewNormal); 188 | 189 | #if NVHBAO_BLUR 190 | outputColor(intCoord, vec4(pow(AO, control.PowExponent), ViewPosition.z, 0, 0)); 191 | #else 192 | outputColor(intCoord, vec4(pow(AO, control.PowExponent))); 193 | #endif 194 | 195 | } 196 | -------------------------------------------------------------------------------- /shaders/hbao_deinterleave.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2023, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | #extension GL_GOOGLE_include_directive : enable 22 | #extension GL_EXT_control_flow_attributes : require 23 | 24 | #include "hbao.h" 25 | 26 | layout(binding=NVHBAO_MAIN_TEX_LINDEPTH) uniform sampler2D texLinearDepth; 27 | layout(binding=NVHBAO_MAIN_IMG_DEPTHARRAY,r32f) uniform image2DArray imgDepthArray; 28 | 29 | //---------------------------------------------------------------------------------- 30 | 31 | void outputColor(ivec2 intCoord, int layer, float value) 32 | { 33 | imageStore(imgDepthArray, ivec3(intCoord,layer), vec4(value,0,0,0)); 34 | } 35 | 36 | void main() 37 | { 38 | ivec2 intCoord; 39 | vec2 texCoord; 40 | 41 | if (setupCoordQuarter(intCoord, texCoord)) return; 42 | 43 | vec2 uv = vec2(intCoord) * 4.0 + 0.5; 44 | uv *= control.InvFullResolution; 45 | 46 | vec4 S0 = textureGather (texLinearDepth, uv, 0); 47 | vec4 S1 = textureGatherOffset(texLinearDepth, uv, ivec2(2,0), 0); 48 | vec4 S2 = textureGatherOffset(texLinearDepth, uv, ivec2(0,2), 0); 49 | vec4 S3 = textureGatherOffset(texLinearDepth, uv, ivec2(2,2), 0); 50 | 51 | outputColor(intCoord, 0, S0.w); 52 | outputColor(intCoord, 1, S0.z); 53 | outputColor(intCoord, 2, S1.w); 54 | outputColor(intCoord, 3, S1.z); 55 | outputColor(intCoord, 4, S0.x); 56 | outputColor(intCoord, 5, S0.y); 57 | outputColor(intCoord, 6, S1.x); 58 | outputColor(intCoord, 7, S1.y); 59 | 60 | outputColor(intCoord, 0 + 8, S2.w); 61 | outputColor(intCoord, 1 + 8, S2.z); 62 | outputColor(intCoord, 2 + 8, S3.w); 63 | outputColor(intCoord, 3 + 8, S3.z); 64 | outputColor(intCoord, 4 + 8, S2.x); 65 | outputColor(intCoord, 5 + 8, S2.y); 66 | outputColor(intCoord, 6 + 8, S3.x); 67 | outputColor(intCoord, 7 + 8, S3.y); 68 | } 69 | -------------------------------------------------------------------------------- /shaders/hbao_depthlinearize.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2023, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | #extension GL_GOOGLE_include_directive : enable 22 | #extension GL_EXT_control_flow_attributes : require 23 | 24 | #include "hbao.h" 25 | 26 | layout(binding=NVHBAO_MAIN_TEX_DEPTH) uniform sampler2D inputTexture; 27 | layout(binding=NVHBAO_MAIN_IMG_LINDEPTH, r32f) uniform image2D imgLinearDepth; 28 | #if NVHBAO_SKIP_INTERPASS 29 | layout(binding=NVHBAO_MAIN_IMG_DEPTHARRAY, r32f) uniform image2DArray imgLinearDepthArray; 30 | #endif 31 | 32 | 33 | float reconstructCSZ(float d, vec4 clipInfo) { 34 | #if 1 35 | vec4 ndc = vec4(0,0,d,1); 36 | vec4 unproj = control.InvProjMatrix * ndc; 37 | return unproj.z / unproj.w; 38 | #else 39 | // clipInfo = z_n * z_f, z_n - z_f, z_f, perspective = 1 : 0 40 | 41 | if (clipInfo[3] != 0) { 42 | return (clipInfo[0] / (clipInfo[1] * d + clipInfo[2])); 43 | } 44 | else { 45 | return (clipInfo[1]+clipInfo[2] - d * clipInfo[1]); 46 | } 47 | #endif 48 | 49 | } 50 | /* 51 | if (in_perspective == 1.0) // perspective 52 | { 53 | ze = (zNear * zFar) / (zFar - zb * (zFar - zNear)); 54 | } 55 | else // orthographic proj 56 | { 57 | ze = zNear + zb * (zFar - zNear); 58 | } 59 | */ 60 | void main() 61 | { 62 | ivec2 intCoord; 63 | vec2 texCoord; 64 | 65 | if (setupCoordFull(intCoord, texCoord)) return; 66 | 67 | float depth = textureLod(inputTexture, texCoord.xy, 0).x; 68 | float linDepth = reconstructCSZ(depth, control.projReconstruct); 69 | imageStore(imgLinearDepth, intCoord, vec4(linDepth,0,0,0)); 70 | #if NVHBAO_SKIP_INTERPASS 71 | ivec2 FullResPos = intCoord; 72 | ivec2 Offset = FullResPos & 3; 73 | int SliceId = Offset.y * 4 + Offset.x; 74 | ivec2 QuarterResPos = FullResPos >> 2; 75 | imageStore(imgLinearDepthArray, ivec3(QuarterResPos, SliceId), vec4(linDepth,0,0,0)); 76 | #endif 77 | } 78 | -------------------------------------------------------------------------------- /shaders/hbao_reinterleave.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2023, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | #extension GL_GOOGLE_include_directive : enable 22 | #extension GL_EXT_control_flow_attributes : require 23 | 24 | #include "hbao.h" 25 | 26 | layout(binding=NVHBAO_MAIN_TEX_RESULTARRAY) uniform sampler2DArray texResultsArray; 27 | #if NVHBAO_BLUR 28 | layout(binding=NVHBAO_MAIN_IMG_RESULT, rg16f) uniform image2D imgResult; 29 | #else 30 | layout(binding=NVHBAO_MAIN_IMG_RESULT, r8) uniform image2D imgResult; 31 | #endif 32 | 33 | //---------------------------------------------------------------------------------- 34 | 35 | void main() { 36 | ivec2 intCoord; 37 | vec2 texCoord; 38 | 39 | if (setupCoordFull(intCoord, texCoord)) return; 40 | 41 | ivec2 FullResPos = intCoord; 42 | ivec2 Offset = FullResPos & 3; 43 | int SliceId = Offset.y * 4 + Offset.x; 44 | ivec2 QuarterResPos = FullResPos >> 2; 45 | 46 | #if NVHBAO_BLUR 47 | imageStore(imgResult, intCoord, vec4(texelFetch( texResultsArray, ivec3(QuarterResPos, SliceId), 0).xy,0,0)); 48 | #else 49 | imageStore(imgResult, intCoord, vec4(texelFetch( texResultsArray, ivec3(QuarterResPos, SliceId), 0).x)); 50 | #endif 51 | } 52 | -------------------------------------------------------------------------------- /shaders/hbao_viewnormal.comp.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2018-2023, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | #extension GL_GOOGLE_include_directive : enable 22 | #extension GL_EXT_control_flow_attributes : require 23 | 24 | #include "hbao.h" 25 | 26 | layout(binding=NVHBAO_MAIN_TEX_LINDEPTH) uniform sampler2D texLinearDepth; 27 | layout(binding=NVHBAO_MAIN_IMG_VIEWNORMAL,rgba8) uniform image2D imgViewNormal; 28 | 29 | //---------------------------------------------------------------------------------- 30 | 31 | vec3 UVToView(vec2 uv, float eye_z) 32 | { 33 | return vec3((uv * control.projInfo.xy + control.projInfo.zw) * (control.projOrtho != 0 ? 1. : eye_z), eye_z); 34 | } 35 | 36 | vec3 FetchViewPos(vec2 UV) 37 | { 38 | float ViewDepth = textureLod(texLinearDepth,UV,0).x; 39 | return UVToView(UV, ViewDepth); 40 | } 41 | 42 | vec3 MinDiff(vec3 P, vec3 Pr, vec3 Pl) 43 | { 44 | vec3 V1 = Pr - P; 45 | vec3 V2 = P - Pl; 46 | return (dot(V1,V1) < dot(V2,V2)) ? V1 : V2; 47 | } 48 | 49 | vec3 ReconstructNormal(vec2 UV, vec3 P) 50 | { 51 | vec3 Pr = FetchViewPos(UV + vec2(control.InvFullResolution.x, 0)); 52 | vec3 Pl = FetchViewPos(UV + vec2(-control.InvFullResolution.x, 0)); 53 | vec3 Pt = FetchViewPos(UV + vec2(0, control.InvFullResolution.y)); 54 | vec3 Pb = FetchViewPos(UV + vec2(0, -control.InvFullResolution.y)); 55 | return normalize(cross(MinDiff(P, Pr, Pl), MinDiff(P, Pt, Pb))); 56 | } 57 | 58 | //---------------------------------------------------------------------------------- 59 | 60 | void main() { 61 | ivec2 intCoord; 62 | vec2 texCoord; 63 | 64 | if (setupCoordFull(intCoord, texCoord)) return; 65 | 66 | vec3 P = FetchViewPos(texCoord); 67 | vec3 N = ReconstructNormal(texCoord, P); 68 | 69 | imageStore(imgViewNormal, intCoord, vec4(N*0.5 + 0.5,0)); 70 | } 71 | -------------------------------------------------------------------------------- /shaders/render_cluster_bbox.frag.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | #version 460 20 | 21 | #extension GL_GOOGLE_include_directive : enable 22 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 23 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 24 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 25 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 26 | #extension GL_EXT_buffer_reference : enable 27 | #extension GL_EXT_buffer_reference2 : enable 28 | #extension GL_EXT_scalar_block_layout : enable 29 | 30 | #include "shaderio.h" 31 | 32 | /////////////////////////////////////////////////// 33 | 34 | layout(push_constant) uniform pushData 35 | { 36 | uint instanceID; 37 | } push; 38 | 39 | layout(std140, binding = BINDINGS_FRAME_UBO, set = 0) uniform frameConstantsBuffer 40 | { 41 | FrameConstants view; 42 | }; 43 | 44 | layout(scalar, binding = BINDINGS_READBACK_SSBO, set = 0) buffer readbackBuffer 45 | { 46 | Readback readback; 47 | }; 48 | 49 | layout(scalar, binding = BINDINGS_RENDERINSTANCES_SSBO, set = 0) buffer renderInstancesBuffer 50 | { 51 | RenderInstance instances[]; 52 | }; 53 | 54 | /////////////////////////////////////////////////// 55 | 56 | #include "render_shading.glsl" 57 | 58 | /////////////////////////////////////////////////// 59 | 60 | layout(location=0) in Interpolants 61 | { 62 | flat uint clusterID; 63 | } IN; 64 | 65 | /////////////////////////////////////////////////// 66 | 67 | layout(location=0,index=0) out vec4 out_Color; 68 | 69 | /////////////////////////////////////////////////// 70 | 71 | void main() 72 | { 73 | out_Color = unpackUnorm4x8(murmurHash(IN.clusterID)) * 0.9 + 0.1; 74 | out_Color.w = 1.0; 75 | } -------------------------------------------------------------------------------- /shaders/render_cluster_bbox.mesh.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | 22 | #extension GL_GOOGLE_include_directive : enable 23 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 24 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 25 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 26 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 27 | #extension GL_EXT_buffer_reference : enable 28 | #extension GL_EXT_buffer_reference2 : enable 29 | #extension GL_EXT_scalar_block_layout : enable 30 | 31 | #extension GL_EXT_mesh_shader : require 32 | #extension GL_EXT_control_flow_attributes: require 33 | 34 | #include "shaderio.h" 35 | 36 | layout(push_constant) uniform pushData 37 | { 38 | uint instanceID; 39 | } push; 40 | 41 | layout(std140, binding = BINDINGS_FRAME_UBO, set = 0) uniform frameConstantsBuffer 42 | { 43 | FrameConstants view; 44 | }; 45 | 46 | layout(scalar, binding = BINDINGS_READBACK_SSBO, set = 0) buffer readbackBuffer 47 | { 48 | Readback readback; 49 | }; 50 | 51 | layout(scalar, binding = BINDINGS_RENDERINSTANCES_SSBO, set = 0) buffer renderInstancesBuffer 52 | { 53 | RenderInstance instances[]; 54 | }; 55 | 56 | layout(buffer_reference, buffer_reference_align = 4, scalar) restrict readonly buffer BBoxes_in 57 | { 58 | BBox d[]; 59 | }; 60 | 61 | //////////////////////////////////////////// 62 | 63 | layout(location=0) out Interpolants { 64 | flat uint clusterID; 65 | } OUT[]; 66 | 67 | //////////////////////////////////////////// 68 | 69 | #define MESH_WORKGROUP_SIZE 32 70 | 71 | #define BOX_VERTICES 8 72 | #define BOX_LINES 12 73 | #define BOX_LINE_THREADS 4 74 | 75 | layout(local_size_x=MESH_WORKGROUP_SIZE) in; 76 | layout(max_vertices=BBOXES_PER_MESHLET * BOX_VERTICES, max_primitives=BBOXES_PER_MESHLET * BOX_LINES) out; 77 | layout(lines) out; 78 | 79 | //////////////////////////////////////////// 80 | 81 | void main() 82 | { 83 | RenderInstance instance = instances[push.instanceID]; 84 | 85 | BBoxes_in bboxes = BBoxes_in(instance.clusterBboxes); 86 | 87 | uint baseID = gl_WorkGroupID.x * BBOXES_PER_MESHLET; 88 | uint numBoxes = min(instance.numClusters, baseID + BBOXES_PER_MESHLET) - baseID; 89 | 90 | SetMeshOutputsEXT(numBoxes * 8, numBoxes * 12); 91 | 92 | const uint vertexRuns = ((BBOXES_PER_MESHLET * BOX_VERTICES) + MESH_WORKGROUP_SIZE-1) / MESH_WORKGROUP_SIZE; 93 | 94 | [[unroll]] 95 | for (uint32_t run = 0; run < vertexRuns; run++) 96 | { 97 | uint vert = gl_LocalInvocationID.x + run * MESH_WORKGROUP_SIZE; 98 | uint box = vert / BOX_VERTICES; 99 | uint corner = vert % BOX_VERTICES; 100 | 101 | uint boxLoad = min(box,numBoxes-1); 102 | 103 | BBox bbox = bboxes.d[boxLoad + baseID]; 104 | 105 | bvec3 weight = bvec3((corner & 1) != 0, (corner & 2) != 0, (corner & 4) != 0); 106 | vec3 cornerPos = mix(bbox.lo, bbox.hi, weight); 107 | 108 | if (box < numBoxes) 109 | { 110 | gl_MeshVerticesEXT[vert].gl_Position = view.viewProjMatrix * (instance.worldMatrix * vec4(cornerPos,1)); 111 | OUT[vert].clusterID = baseID + box; 112 | } 113 | } 114 | 115 | 116 | { 117 | uvec2 boxIndices[4] = uvec2[4]( 118 | uvec2(0,1),uvec2(1,3),uvec2(3,2),uvec2(2,0) 119 | ); 120 | 121 | uint subID = gl_LocalInvocationID.x & (BOX_LINE_THREADS-1); 122 | uint box = gl_LocalInvocationID.x / BOX_LINE_THREADS; 123 | 124 | uvec2 circle = boxIndices[subID]; 125 | 126 | if (box < numBoxes) 127 | { 128 | // lower 129 | gl_PrimitiveLineIndicesEXT[box * 12 + subID + 0] = circle + box * BOX_VERTICES; 130 | // upper 131 | gl_PrimitiveLineIndicesEXT[box * 12 + subID + 4] = circle + 4 + box * BOX_VERTICES; 132 | // connectors 133 | gl_PrimitiveLineIndicesEXT[box * 12 + subID + 8] = uvec2(subID, subID + 4) + box * BOX_VERTICES;; 134 | } 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /shaders/render_raster.frag.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | 22 | #extension GL_GOOGLE_include_directive : enable 23 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 24 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 25 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 26 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 27 | #extension GL_EXT_buffer_reference : enable 28 | #extension GL_EXT_buffer_reference2 : enable 29 | #extension GL_EXT_scalar_block_layout : enable 30 | #extension GL_EXT_shader_atomic_int64 : enable 31 | #extension GL_EXT_fragment_shader_barycentric : enable 32 | 33 | #include "shaderio.h" 34 | 35 | layout(push_constant) uniform pushData 36 | { 37 | uint instanceID; 38 | } push; 39 | 40 | layout(scalar, binding = BINDINGS_FRAME_UBO, set = 0) uniform frameConstantsBuffer 41 | { 42 | FrameConstants view; 43 | }; 44 | 45 | layout(scalar, binding = BINDINGS_READBACK_SSBO, set = 0) buffer readbackBuffer 46 | { 47 | Readback readback; 48 | }; 49 | 50 | layout(scalar, binding = BINDINGS_RENDERINSTANCES_SSBO, set = 0) buffer renderInstancesBuffer 51 | { 52 | RenderInstance instances[]; 53 | }; 54 | 55 | /////////////////////////////////////////////////// 56 | 57 | #include "render_shading.glsl" 58 | 59 | /////////////////////////////////////////////////// 60 | 61 | layout(location = 0) in Interpolants 62 | { 63 | vec3 wPos; 64 | vec3 wNormal; 65 | flat uint clusterID; 66 | } 67 | IN; 68 | 69 | /////////////////////////////////////////////////// 70 | 71 | layout(location = 0, index = 0) out vec4 out_Color; 72 | 73 | /////////////////////////////////////////////////// 74 | 75 | void main() 76 | { 77 | vec3 wNormal; 78 | 79 | if(view.facetShading != 0) 80 | { 81 | wNormal = -cross(dFdx(IN.wPos), dFdy(IN.wPos)); 82 | } 83 | else 84 | { 85 | wNormal = IN.wNormal; 86 | } 87 | 88 | uint visClusterID = IN.clusterID; 89 | #if LINKED_MESH_SHADER 90 | if (view.visualize == VISUALIZE_TRIANGLES) 91 | { 92 | visClusterID ^= gl_PrimitiveID + 1; 93 | } 94 | #endif 95 | 96 | const float overHeadLight = 1.0f; 97 | const float ambientLight = 1.f; 98 | 99 | out_Color = shading(push.instanceID, IN.wPos, wNormal, visClusterID, overHeadLight, ambientLight); 100 | 101 | #if DEBUG_VISUALIZATION 102 | if(view.doWireframe != 0 || (view.visFilterInstanceID == push.instanceID && view.visFilterClusterID == IN.clusterID)) 103 | { 104 | out_Color.xyz = addWireframe(out_Color.xyz, gl_BaryCoordEXT, gl_FrontFacing, fwidthFine(gl_BaryCoordEXT), view.wireColor); 105 | } 106 | #endif 107 | 108 | uvec2 pixelCoord = uvec2(gl_FragCoord.xy); 109 | if(pixelCoord == view.mousePosition) 110 | { 111 | uint32_t packedClusterTriangleId = (IN.clusterID << 8) | (gl_PrimitiveID & 0xFF); 112 | atomicMax(readback.clusterTriangleId, packPickingValue(packedClusterTriangleId, gl_FragCoord.z)); 113 | atomicMax(readback.instanceId, packPickingValue(push.instanceID, gl_FragCoord.z)); 114 | } 115 | } -------------------------------------------------------------------------------- /shaders/render_raster_clusters.mesh.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | 22 | #extension GL_GOOGLE_include_directive : enable 23 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 24 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 25 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 26 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 27 | #extension GL_EXT_buffer_reference : enable 28 | #extension GL_EXT_buffer_reference2 : enable 29 | #extension GL_EXT_scalar_block_layout : enable 30 | 31 | #extension GL_EXT_mesh_shader : require 32 | #extension GL_EXT_control_flow_attributes: require 33 | 34 | #include "shaderio.h" 35 | 36 | layout(push_constant) uniform pushData 37 | { 38 | uint instanceID; 39 | } push; 40 | 41 | layout(std140, binding = BINDINGS_FRAME_UBO, set = 0) uniform frameConstantsBuffer 42 | { 43 | FrameConstants view; 44 | }; 45 | 46 | layout(scalar, binding = BINDINGS_READBACK_SSBO, set = 0) buffer readbackBuffer 47 | { 48 | Readback readback; 49 | }; 50 | 51 | layout(scalar, binding = BINDINGS_RENDERINSTANCES_SSBO, set = 0) buffer renderInstancesBuffer 52 | { 53 | RenderInstance instances[]; 54 | }; 55 | 56 | //////////////////////////////////////////// 57 | 58 | layout(location=0) out Interpolants { 59 | vec3 wPos; 60 | vec3 wNormal; 61 | flat uint clusterID; 62 | } OUT[]; 63 | 64 | //////////////////////////////////////////// 65 | 66 | #ifndef MESHSHADER_WORKGROUP_SIZE 67 | #define MESHSHADER_WORKGROUP_SIZE 32 68 | #endif 69 | 70 | #ifndef CLUSTER_VERTEX_COUNT 71 | #define CLUSTER_VERTEX_COUNT 32 72 | #endif 73 | 74 | #ifndef CLUSTER_TRIANGLE_COUNT 75 | #define CLUSTER_TRIANGLE_COUNT 32 76 | #endif 77 | 78 | #ifndef CLUSTER_DEDICATED_VERTICES 79 | #define CLUSTER_DEDICATED_VERTICES 0 80 | #endif 81 | 82 | layout(local_size_x=MESHSHADER_WORKGROUP_SIZE) in; 83 | layout(max_vertices=CLUSTER_VERTEX_COUNT, max_primitives=CLUSTER_TRIANGLE_COUNT) out; 84 | layout(triangles) out; 85 | 86 | const uint MESHLET_VERTEX_ITERATIONS = ((CLUSTER_VERTEX_COUNT + MESHSHADER_WORKGROUP_SIZE - 1) / MESHSHADER_WORKGROUP_SIZE); 87 | const uint MESHLET_TRIANGLE_ITERATIONS = ((CLUSTER_TRIANGLE_COUNT + MESHSHADER_WORKGROUP_SIZE - 1) / MESHSHADER_WORKGROUP_SIZE); 88 | 89 | //////////////////////////////////////////// 90 | 91 | void main() 92 | { 93 | RenderInstance instance = instances[push.instanceID]; 94 | 95 | Cluster cluster = Clusters_in(instance.clusters).d[gl_WorkGroupID.x]; 96 | 97 | uint vertMax = cluster.numVertices-1; 98 | uint triMax = cluster.numTriangles-1; 99 | 100 | // We keep things simple and avoid per-triangle culling. It reduces 101 | // the complexity of the mesh shader and may not always be worth it. 102 | 103 | SetMeshOutputsEXT(cluster.numVertices, cluster.numTriangles); 104 | 105 | vec3s_in oPositions = vec3s_in(instance.positions); 106 | vec3s_in oNormals = vec3s_in(instance.normals); 107 | 108 | #if !CLUSTER_DEDICATED_VERTICES 109 | // the global vertex indices used within this cluster 110 | uints_in localVertices = uints_in(instance.clusterLocalVertices); 111 | #endif 112 | // the local triangle indices used within this cluster 113 | uint8s_in localTriangles = uint8s_in(instance.clusterLocalTriangles); 114 | 115 | mat4 worldMatrix = instance.worldMatrix; 116 | mat3 worldMatrixIT = transpose(inverse(mat3(worldMatrix))); 117 | 118 | // We unroll to force loading vertices & triangles in advance. 119 | // This reduces latency / dependent loads in the shader. 120 | // Because the cluster generators will mostly saturate packing 121 | // triangles and vertices in a cluster, we normally hardly waste 122 | // any loading. 123 | 124 | [[unroll]] 125 | for (uint i = 0; i < uint(MESHLET_VERTEX_ITERATIONS); i++) 126 | { 127 | uint vert = gl_LocalInvocationID.x + i * MESHSHADER_WORKGROUP_SIZE; 128 | // Clamp the load because we force processing over max vertices. 129 | // An alternative to clamping the load index would be to just over-allocate a bit 130 | // space in the appropriate buffers so we can always do a load operation. 131 | uint vertLoad = min(vert, vertMax); 132 | 133 | #if CLUSTER_DEDICATED_VERTICES 134 | uint vertexIndex = vertLoad + cluster.firstLocalVertex; 135 | #else 136 | // Convert the per-cluster vertex into the shared geometry wide vertex index. 137 | // This allows re-use of vertices across clusters. 138 | uint vertexIndex = localVertices.d[vertLoad + cluster.firstLocalVertex]; 139 | #endif 140 | 141 | vec3 oPos = oPositions.d[vertexIndex]; 142 | vec4 wPos = worldMatrix * vec4(oPos,1.0f); 143 | 144 | vec3 oNormal = oNormals.d[vertexIndex]; 145 | 146 | if (vert <= vertMax) { 147 | gl_MeshVerticesEXT[vert].gl_Position = view.viewProjMatrix * wPos; 148 | OUT[vert].wPos = wPos.xyz; 149 | OUT[vert].wNormal = normalize(worldMatrixIT * oNormal); 150 | OUT[vert].clusterID = gl_WorkGroupID.x; 151 | } 152 | } 153 | 154 | [[unroll]] 155 | for (uint i = 0; i < uint(MESHLET_TRIANGLE_ITERATIONS); i++) 156 | { 157 | uint tri = gl_LocalInvocationID.x + i * MESHSHADER_WORKGROUP_SIZE; 158 | uint triLoad = min(tri, triMax); 159 | 160 | uvec3 indices = uvec3(localTriangles.d[cluster.firstLocalTriangle + triLoad * 3 + 0], 161 | localTriangles.d[cluster.firstLocalTriangle + triLoad * 3 + 1], 162 | localTriangles.d[cluster.firstLocalTriangle + triLoad * 3 + 2]); 163 | 164 | if (tri <= triMax) { 165 | gl_PrimitiveTriangleIndicesEXT[tri] = indices; 166 | gl_MeshPrimitivesEXT[tri].gl_PrimitiveID = int(tri); 167 | } 168 | } 169 | } -------------------------------------------------------------------------------- /shaders/render_raster_triangles.vert.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | #version 460 20 | 21 | #extension GL_GOOGLE_include_directive : enable 22 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 23 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 24 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 25 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 26 | #extension GL_EXT_buffer_reference : enable 27 | #extension GL_EXT_buffer_reference2 : enable 28 | #extension GL_EXT_scalar_block_layout : enable 29 | 30 | #include "shaderio.h" 31 | 32 | layout(push_constant) uniform pushData 33 | { 34 | uint instanceID; 35 | } push; 36 | 37 | layout(std140, binding = BINDINGS_FRAME_UBO, set = 0) uniform frameConstantsBuffer 38 | { 39 | FrameConstants view; 40 | }; 41 | 42 | layout(scalar, binding = BINDINGS_READBACK_SSBO, set = 0) buffer readbackBuffer 43 | { 44 | Readback readback; 45 | }; 46 | 47 | layout(scalar, binding = BINDINGS_RENDERINSTANCES_SSBO, set = 0) buffer renderInstancesBuffer 48 | { 49 | RenderInstance instances[]; 50 | }; 51 | 52 | //////////////////////////////////////////// 53 | 54 | layout(location=0) out Interpolants { 55 | vec3 wPos; 56 | vec3 wNormal; 57 | flat uint clusterID; 58 | } OUT; 59 | 60 | //////////////////////////////////////////// 61 | 62 | void main() 63 | { 64 | vec3s_in oPositions = vec3s_in(instances[push.instanceID].positions); 65 | vec3s_in oNormals = vec3s_in(instances[push.instanceID].normals); 66 | 67 | mat4 worldMatrix = instances[push.instanceID].worldMatrix; 68 | 69 | vec3 oPos = oPositions.d[gl_VertexIndex]; 70 | vec4 wPos = worldMatrix * vec4(oPos,1.0f); 71 | 72 | mat3 worldMatrixIT = transpose(inverse(mat3(worldMatrix))); 73 | 74 | gl_Position = view.viewProjMatrix * wPos; 75 | OUT.wPos = wPos.xyz; 76 | 77 | vec3 oNormal = oNormals.d[gl_VertexIndex]; 78 | OUT.wNormal = normalize(worldMatrixIT * oNormal); 79 | 80 | OUT.clusterID = 0; 81 | } 82 | -------------------------------------------------------------------------------- /shaders/render_raytrace.rgen.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | 22 | #extension GL_GOOGLE_include_directive : enable 23 | 24 | #extension GL_EXT_ray_tracing : require 25 | 26 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 27 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 28 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 29 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 30 | #extension GL_EXT_buffer_reference :enable 31 | #extension GL_EXT_scalar_block_layout:enable 32 | #include "shaderio.h" 33 | 34 | ////////////////////////////////////////////////////////////// 35 | 36 | layout(std140, binding = BINDINGS_FRAME_UBO, set = 0) uniform frameConstantsBuffer 37 | { 38 | FrameConstants view; 39 | }; 40 | 41 | layout(set = 0, binding = BINDINGS_TLAS) uniform accelerationStructureEXT asScene; 42 | layout(set = 0, binding = BINDINGS_RENDER_TARGET, rgba8) uniform image2D imgColor; 43 | 44 | ////////////////////////////////////////////////////////////// 45 | 46 | layout(location = 0) rayPayloadEXT RayPayload rayHit; 47 | 48 | ////////////////////////////////////////////////////////////// 49 | 50 | void main() 51 | { 52 | // for writing debugging values to stats.debug etc. 53 | bool center = gl_LaunchIDEXT.xy == (gl_LaunchSizeEXT.xy/2); 54 | 55 | ivec2 screen = ivec2(gl_LaunchIDEXT.xy); 56 | vec2 uv = (vec2(gl_LaunchIDEXT.xy) + vec2(0.5)) / vec2(gl_LaunchSizeEXT.xy); 57 | //uv.y = 1.f-uv.y; 58 | //uv.x = 1.f-uv.x; 59 | vec2 d = uv * 2.0 - 1.0; 60 | vec4 origin = view.viewMatrixI * vec4(0, 0, 0, 1); 61 | vec4 target = normalize(view.projMatrixI * vec4(d.x, d.y, 1, 1)); 62 | vec4 direction = view.viewMatrixI * vec4(target.xyz, 0); 63 | 64 | float tMin = view.nearPlane; 65 | float tMax = view.farPlane; 66 | 67 | traceRayEXT(asScene, gl_RayFlagsCullBackFacingTrianglesEXT, 68 | 0xff, 69 | 0, 0, // hit offset, hit stride 70 | 0, // miss offset 71 | origin.xyz, tMin, direction.xyz, tMax, 72 | 0 // rayPayloadNV location qualifier 73 | ); 74 | 75 | { 76 | imageStore(imgColor, screen, vec4(rayHit.color.xyz,1)); 77 | } 78 | } -------------------------------------------------------------------------------- /shaders/render_raytrace.rmiss.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | 22 | #extension GL_GOOGLE_include_directive : enable 23 | 24 | #extension GL_EXT_ray_tracing : require 25 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 26 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 27 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 28 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 29 | 30 | #include "shaderio.h" 31 | 32 | ////////////////////////////////////////////////////////////// 33 | 34 | layout(std140, binding = BINDINGS_FRAME_UBO, set = 0) uniform frameConstantsBuffer 35 | { 36 | FrameConstants view; 37 | }; 38 | 39 | ////////////////////////////////////////////////////////////// 40 | 41 | layout(location = RAYTRACING_PAYLOAD_INDEX) rayPayloadInEXT RayPayload rayHit; 42 | 43 | ////////////////////////////////////////////////////////////// 44 | 45 | void main() 46 | { 47 | vec3 skyColor = evalSimpleSky(view.skyParams, gl_WorldRayDirectionEXT); 48 | 49 | rayHit.color.rgb = skyColor; 50 | rayHit.color.w = 0.f; 51 | } 52 | -------------------------------------------------------------------------------- /shaders/render_raytrace_clusters.rchit.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | 22 | #extension GL_GOOGLE_include_directive : enable 23 | 24 | #extension GL_EXT_ray_tracing : require 25 | #extension GL_EXT_nonuniform_qualifier : require 26 | #extension GL_EXT_shader_16bit_storage : require 27 | #extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 28 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 29 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 30 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 31 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 32 | #extension GL_EXT_shader_atomic_int64 : enable 33 | #extension GL_EXT_buffer_reference2 : enable 34 | #extension GL_EXT_control_flow_attributes : require 35 | 36 | // at the time of writing, no GLSL extension was available, we leverage 37 | // GL_EXT_spirv_intrinsics to hook up the new builtin. 38 | #extension GL_EXT_spirv_intrinsics : require 39 | 40 | // Note that `VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV::allowClusterAccelerationStructure` must 41 | // be set to `VK_TRUE` to make this valid. 42 | spirv_decorate(extensions = ["SPV_NV_cluster_acceleration_structure"], capabilities = [5437], 11, 5436) in int gl_ClusterIDNV_; 43 | 44 | // While not required in this sample, as we use dedicated hit-shader for clusters, 45 | // `int gl_ClusterIDNoneNV = -1;` can be used to dynamically detect regular hits. 46 | 47 | #include "shaderio.h" 48 | 49 | ///////////////////////////////// 50 | 51 | layout(std140, binding = BINDINGS_FRAME_UBO, set = 0) uniform frameConstantsBuffer 52 | { 53 | FrameConstants view; 54 | }; 55 | 56 | layout(scalar, binding = BINDINGS_READBACK_SSBO, set = 0) buffer readbackBuffer 57 | { 58 | Readback readback; 59 | }; 60 | 61 | layout(scalar, binding = BINDINGS_RENDERINSTANCES_SSBO, set = 0) buffer renderInstancesBuffer 62 | { 63 | RenderInstance instances[]; 64 | }; 65 | 66 | layout(set = 0, binding = BINDINGS_TLAS) uniform accelerationStructureEXT asScene; 67 | 68 | 69 | ///////////////////////////////// 70 | 71 | hitAttributeEXT vec2 barycentrics; 72 | 73 | ///////////////////////////////// 74 | 75 | layout(location = 0) rayPayloadInEXT RayPayload rayHit; 76 | layout(location = 1) rayPayloadEXT RayPayload rayHitAO; 77 | 78 | ///////////////////////////////// 79 | 80 | #define SUPPORTS_RT 1 81 | 82 | #include "render_shading.glsl" 83 | 84 | #ifndef CLUSTER_DEDICATED_VERTICES 85 | #define CLUSTER_DEDICATED_VERTICES 0 86 | #endif 87 | 88 | ///////////////////////////////// 89 | 90 | void main() 91 | { 92 | // get cluster ID (see top of file how we hooked up this value to spir-v) 93 | uint clusterID = gl_ClusterIDNV_; 94 | 95 | uint visClusterID = clusterID; 96 | if (view.visualize == VISUALIZE_TRIANGLES) { 97 | visClusterID ^= 1 + gl_PrimitiveID; 98 | } 99 | 100 | RenderInstance instance = instances[gl_InstanceID]; 101 | 102 | // Fetch cluster header 103 | Clusters_in clusterBuffer = Clusters_in(instance.clusters); 104 | Cluster cluster = clusterBuffer.d[clusterID]; 105 | 106 | // Fetch triangle 107 | // There is three different possibilities. 108 | #if CLUSTER_DEDICATED_VERTICES 109 | // The data has been baked to have vertices per-cluster. 110 | // This way we get away with the 8-bit triangle indices that are local to the cluster. 111 | 112 | // the local triangle indices used within this cluster 113 | uint8s_in localTriangles = uint8s_in(instance.clusterLocalTriangles); 114 | 115 | uvec3 triangleIndices = uvec3(localTriangles.d[cluster.firstLocalTriangle + gl_PrimitiveID * 3 + 0], 116 | localTriangles.d[cluster.firstLocalTriangle + gl_PrimitiveID * 3 + 1], 117 | localTriangles.d[cluster.firstLocalTriangle + gl_PrimitiveID * 3 + 2]); 118 | 119 | // convert to global indices for attribute lookup 120 | triangleIndices += cluster.firstLocalVertex; 121 | 122 | #elif (!CLUSTER_DEDICATED_VERTICES) && 0 123 | // Disable this for codepath for now, given we kept the original indexbuffer for computing the normals anyway, 124 | // and disabling avoids the indirection. When the original triangle indexbuffer isn't needed 125 | // then using this would be less memory. 126 | 127 | // the local triangle indices used within this cluster 128 | uint8s_in localTriangles = uint8s_in(instance.clusterLocalTriangles); 129 | 130 | uvec3 triangleIndices = uvec3(localTriangles.d[cluster.firstLocalTriangle + gl_PrimitiveID * 3 + 0], 131 | localTriangles.d[cluster.firstLocalTriangle + gl_PrimitiveID * 3 + 1], 132 | localTriangles.d[cluster.firstLocalTriangle + gl_PrimitiveID * 3 + 2]); 133 | 134 | // convert to global indices for attribute lookup 135 | 136 | // we need another indirection, mapping the local triangle indices, to the global 137 | // vertex indices within the cluster. 138 | uints_in localVertices = uints_in(instance.clusterLocalVertices); 139 | 140 | triangleIndices.x = localVertices.d[cluster.firstLocalVertex + triangleIndices.x]; 141 | triangleIndices.y = localVertices.d[cluster.firstLocalVertex + triangleIndices.y]; 142 | triangleIndices.z = localVertices.d[cluster.firstLocalVertex + triangleIndices.z]; 143 | 144 | #else 145 | // The simple way is we just use the traditional triangle index buffer, 146 | // which operates on global indices already. 147 | 148 | // get the classic triangle index buffer of this instance 149 | uvec3s_in indexBuffer = uvec3s_in(instance.triangles); 150 | // fetch triangle with cluster's offset 151 | // gl_PrimitiveID is the local triangle index within the cluster 152 | uvec3 triangleIndices = indexBuffer.d[gl_PrimitiveID + cluster.firstTriangle]; 153 | #endif 154 | 155 | // Fetch vertex positions 156 | vec3 vertices[3]; 157 | vec3s_in vertexBuffer = vec3s_in(instance.positions); 158 | 159 | [[unroll]] 160 | for(uint32_t i = 0; i < 3; i++) 161 | { 162 | vertices[i] = vertexBuffer.d[triangleIndices[i]]; 163 | } 164 | 165 | vec3 baryWeight = vec3((1.f - barycentrics[0] - barycentrics[1]), barycentrics[0], barycentrics[1]); 166 | 167 | vec3 oPos = baryWeight.x * vertices[0] + baryWeight.y * vertices[1] + baryWeight.z * vertices[2]; 168 | vec3 wPos = vec3(gl_ObjectToWorldEXT * vec4(oPos, 1.0)); 169 | 170 | vec3 oNrm; 171 | if(view.facetShading != 0) 172 | { 173 | // Otherwise compute geometric normal 174 | vec3 e0 = vertices[1] - vertices[0]; 175 | vec3 e1 = vertices[2] - vertices[0]; 176 | oNrm = normalize(cross(e0, e1)); 177 | } 178 | else 179 | { 180 | vec3 normals[3]; 181 | vec3s_in normalsBuffer = vec3s_in(instances[gl_InstanceID].normals); 182 | 183 | [[unroll]] 184 | for(uint32_t i = 0; i < 3; i++) 185 | { 186 | normals[i] = normalize(normalsBuffer.d[triangleIndices[i]]); 187 | } 188 | oNrm = baryWeight.x * normals[0] + baryWeight.y * normals[1] + baryWeight.z * normals[2]; 189 | } 190 | 191 | vec3 wNrm = normalize(vec3(oNrm * gl_WorldToObjectEXT)); 192 | 193 | vec3 directionToLight = view.skyParams.directionToLight; 194 | float ambientOcclusion = ambientOcclusion(wPos, wNrm, view.ambientOcclusionRays, view.ambientOcclusionRadius * view.sceneSize); 195 | 196 | float sunContribution = 1.0; 197 | if(view.doShadow == 1) 198 | sunContribution = traceShadowRay(wPos, directionToLight); 199 | 200 | rayHit.color = shading(gl_InstanceID, wPos, wNrm, visClusterID, sunContribution, ambientOcclusion); 201 | 202 | if(gl_LaunchIDEXT.xy == view.mousePosition) 203 | { 204 | vec4 projected = (view.viewProjMatrix * vec4(wPos, 1.f)); 205 | float depth = projected.z / projected.w; 206 | readback.clusterTriangleId = packPickingValue((clusterID << 8) | gl_PrimitiveID, depth); 207 | readback.instanceId = packPickingValue(gl_InstanceID, depth); 208 | } 209 | } -------------------------------------------------------------------------------- /shaders/render_raytrace_triangles.rchit.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #version 460 21 | 22 | #extension GL_GOOGLE_include_directive : enable 23 | 24 | #extension GL_EXT_ray_tracing : require 25 | #extension GL_EXT_nonuniform_qualifier : require 26 | #extension GL_EXT_shader_16bit_storage : require 27 | #extension GL_EXT_shader_explicit_arithmetic_types_float16 : require 28 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 29 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 30 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 31 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 32 | #extension GL_EXT_shader_atomic_int64 : enable 33 | #extension GL_EXT_buffer_reference2 : enable 34 | 35 | #include "shaderio.h" 36 | 37 | ///////////////////////////////// 38 | 39 | layout(std140, binding = BINDINGS_FRAME_UBO, set = 0) uniform frameConstantsBuffer 40 | { 41 | FrameConstants view; 42 | }; 43 | 44 | layout(scalar, binding = BINDINGS_READBACK_SSBO, set = 0) buffer readbackBuffer 45 | { 46 | Readback readback; 47 | }; 48 | 49 | layout(scalar, binding = BINDINGS_RENDERINSTANCES_SSBO, set = 0) buffer renderInstancesBuffer 50 | { 51 | RenderInstance instances[]; 52 | }; 53 | 54 | layout(set = 0, binding = BINDINGS_TLAS) uniform accelerationStructureEXT asScene; 55 | 56 | ///////////////////////////////// 57 | 58 | hitAttributeEXT vec2 barycentrics; 59 | 60 | ///////////////////////////////// 61 | 62 | layout(location = 0) rayPayloadInEXT RayPayload rayHit; 63 | layout(location = 1) rayPayloadEXT RayPayload rayHitAO; 64 | 65 | ///////////////////////////////// 66 | 67 | #define SUPPORTS_RT 1 68 | 69 | #include "render_shading.glsl" 70 | 71 | ///////////////////////////////// 72 | 73 | void main() 74 | { 75 | // Fetch triangle 76 | uvec3s_in indexBuffer = uvec3s_in(instances[gl_InstanceID].triangles); 77 | uvec3 triangleIndices = indexBuffer.d[gl_PrimitiveID]; 78 | 79 | // Fetch vertex positions 80 | vec3 vertices[3]; 81 | vec3s_in vertexBuffer = vec3s_in(instances[gl_InstanceID].positions); 82 | 83 | for(uint32_t i = 0; i < 3; i++) 84 | { 85 | vertices[i] = vertexBuffer.d[triangleIndices[i]]; 86 | } 87 | 88 | vec3 baryWeight = vec3((1.f - barycentrics[0] - barycentrics[1]), barycentrics[0], barycentrics[1]); 89 | 90 | vec3 oPos = baryWeight.x * vertices[0] + baryWeight.y * vertices[1] + baryWeight.z * vertices[2]; 91 | vec3 wPos = vec3(gl_ObjectToWorldEXT * vec4(oPos, 1.0)); 92 | 93 | vec3 oNrm; 94 | if(view.facetShading != 0) 95 | { 96 | // Otherwise compute geometric normal 97 | vec3 e0 = vertices[1] - vertices[0]; 98 | vec3 e1 = vertices[2] - vertices[0]; 99 | oNrm = normalize(cross(e0, e1)); 100 | } 101 | else 102 | { 103 | vec3 normals[3]; 104 | vec3s_in normalsBuffer = vec3s_in(instances[gl_InstanceID].normals); 105 | 106 | for(uint32_t i = 0; i < 3; i++) 107 | { 108 | normals[i] = normalize(normalsBuffer.d[triangleIndices[i]]); 109 | } 110 | oNrm = baryWeight.x * normals[0] + baryWeight.y * normals[1] + baryWeight.z * normals[2]; 111 | } 112 | 113 | vec3 wNrm = normalize(vec3(oNrm * gl_WorldToObjectEXT)); 114 | 115 | 116 | // triangles don't have clusterID 117 | uint32_t visClusterID = 0; 118 | if (view.visualize == VISUALIZE_TRIANGLES) { 119 | visClusterID = 1 + gl_PrimitiveID; 120 | } 121 | 122 | vec3 directionToLight = view.skyParams.directionToLight; 123 | float ambientOcclusion = ambientOcclusion(wPos, wNrm, view.ambientOcclusionRays, view.ambientOcclusionRadius * view.sceneSize); 124 | 125 | float sunContribution = 1.0; 126 | if(view.doShadow == 1) 127 | sunContribution = traceShadowRay(wPos, directionToLight); 128 | 129 | rayHit.color = shading(gl_InstanceID, wPos, wNrm, visClusterID, sunContribution, ambientOcclusion); 130 | 131 | if(gl_LaunchIDEXT.xy == view.mousePosition) 132 | { 133 | vec4 projected = (view.viewProjMatrix * vec4(wPos, 1.f)); 134 | float depth = projected.z / projected.w; 135 | readback.clusterTriangleId = packPickingValue(gl_PrimitiveID, depth); 136 | readback.instanceId = packPickingValue(gl_InstanceID, depth); 137 | } 138 | } -------------------------------------------------------------------------------- /shaders/render_shading.glsl: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | uint murmurHash(uint idx) 21 | { 22 | uint m = 0x5bd1e995; 23 | uint r = 24; 24 | 25 | uint h = 64684; 26 | uint k = idx; 27 | 28 | k *= m; 29 | k ^= (k >> r); 30 | k *= m; 31 | h *= m; 32 | h ^= k; 33 | 34 | return h; 35 | } 36 | 37 | vec4 shading(uint instanceID, vec3 wPos, vec3 wNormal, uint clusterID, float overheadLight, float ambientOcclusion) 38 | { 39 | const vec3 sunColor = vec3(0.99f, 1.f, 0.71f); 40 | const vec3 skyColor = view.skyParams.skyColor; 41 | const vec3 groundColor = view.skyParams.groundColor; 42 | vec3 materialAlbedo = vec3(0.8f); 43 | vec4 color = vec4(0.f); 44 | 45 | if(view.visualize != VISUALIZE_NONE && clusterID != 0) 46 | { 47 | materialAlbedo = unpackUnorm4x8(murmurHash(clusterID ^ view.colorXor)).xyz * 0.3 + 0.5; 48 | } 49 | 50 | vec3 normal = normalize(wNormal.xyz); 51 | vec3 wEyePos = vec3(view.viewMatrixI[3].x, view.viewMatrixI[3].y, view.viewMatrixI[3].z); 52 | vec3 eyeDir = normalize(wEyePos.xyz - wPos.xyz); 53 | 54 | // Ambient 55 | float ambientIntensity = 1.f; 56 | vec3 ambientLighting = ambientOcclusion * materialAlbedo* ambientIntensity 57 | * mix(groundColor, skyColor, dot(normal, view.wUpDir.xyz) * 0.5 + 0.5) ; 58 | 59 | // Light mixer 60 | float lightMixer = view.lightMixer; 61 | float flashlightIntensity = 1.0f - lightMixer; 62 | float overheadLightIntensity = lightMixer; 63 | 64 | // Flashlight 65 | vec3 flashlightLighting = vec3(0.f); 66 | { 67 | // Use a flashlight intensity similar to the sky color for average luminance consistency 68 | flashlightIntensity *= max(skyColor.x, max(skyColor.y, skyColor.z)); 69 | vec3 lightDir = normalize(view.wLightPos.xyz - wPos.xyz); 70 | vec3 reflDir = normalize(-reflect(lightDir, normal)); 71 | float bsdf = abs(dot(normal, lightDir)) + pow(max(0, dot(reflDir, eyeDir)), 16) * 0.3; 72 | flashlightLighting = flashlightIntensity * materialAlbedo * bsdf; 73 | } 74 | 75 | // Overhead light 76 | vec3 overheadLightColor = view.skyParams.lightRadiance; 77 | vec3 overheadLighting = vec3(overheadLightIntensity * overheadLight * overheadLightColor); 78 | { 79 | vec3 lightDir = normalize(view.skyParams.directionToLight); 80 | vec3 reflDir = normalize(-reflect(lightDir, normal)); 81 | float diffuse = max(0, dot(normal, lightDir)); 82 | float specular = pow(max(0, dot(reflDir, eyeDir)), 16) * 0.3; 83 | float bsdf = diffuse + specular; 84 | overheadLighting = overheadLighting * materialAlbedo * bsdf; 85 | } 86 | 87 | color.xyz = overheadLighting + flashlightLighting + ambientLighting; 88 | color.w = 1.0; 89 | return color; 90 | } 91 | 92 | uint64_t packPickingValue(uint32_t v, float z) 93 | { 94 | z = 1.f - clamp(z, 0.f, 1.f); 95 | uint bits = floatBitsToUint(z); 96 | bits ^= (int(bits) >> 31) | 0x80000000u; 97 | uint64_t value = (uint64_t(bits) << 32) | uint64_t(v); 98 | return value; 99 | } 100 | 101 | 102 | // Return the width [0..1] for which the line should be displayed or not 103 | float getLineWidth(in vec3 deltas, in float thickness, in float smoothing, in vec3 barys) 104 | { 105 | barys = smoothstep(deltas * (thickness), deltas * (thickness + smoothing), barys); 106 | float minBary = min(barys.x, min(barys.y, barys.z)); 107 | return 1.0 - minBary; 108 | } 109 | 110 | // Position along the edge [0..1] 111 | float edgePosition(vec3 barycentrics) 112 | { 113 | return max(barycentrics.z, max(barycentrics.y, barycentrics.x)); 114 | } 115 | 116 | // Return 0 or 1 if edgePos should be diplayed or not 117 | float stipple(in float stippleRepeats, in float stippleLength, in float edgePos) 118 | { 119 | float offset = 1.0 / stippleRepeats; 120 | offset *= 0.5 * stippleLength; 121 | float pattern = fract((edgePos + offset) * stippleRepeats); 122 | return 1.0 - step(stippleLength, pattern); 123 | } 124 | 125 | 126 | vec3 addWireframe(vec3 color, vec3 barycentrics, bool frontFacing, vec3 barycentricsDerivatives, vec3 wireColor) 127 | { 128 | float oThickness = view.wireThickness * 0.5; 129 | float thickness = oThickness * 0.5; // Thickness for both side of the edge, must be divided by 2 130 | float smoothing = oThickness * view.wireSmoothing; // Could be thickness 131 | bool enableStipple = (view.wireStipple == 1); 132 | 133 | // Uniform position on the edge [0, 1] 134 | float edgePos = edgePosition(barycentrics); 135 | 136 | if(!frontFacing) 137 | { 138 | enableStipple = true; // Forcing backface to always stipple the line 139 | wireColor = view.wireBackfaceColor; 140 | } 141 | 142 | 143 | // fwidth ? return the sum of the absolute value of derivatives in x and y 144 | // which makes the width in screen space 145 | vec3 deltas = barycentricsDerivatives; //fwidthFine(barycentrics); 146 | 147 | // Get the wireframe line width 148 | float lineWidth = getLineWidth(deltas, thickness, smoothing, barycentrics); 149 | 150 | // [optional] 151 | if(enableStipple) 152 | { 153 | float stippleFact = stipple(view.wireStippleRepeats, view.wireStippleLength, edgePos); 154 | lineWidth *= stippleFact; // 0 or 1 155 | } 156 | 157 | // Final color 158 | return mix(color, wireColor, lineWidth); 159 | } 160 | 161 | 162 | uvec2 digits[] = { uvec2(706880028, 471999018), uvec2(136845320, 1040713736), uvec2(67248668, 1042288648), 163 | uvec2(201466396, 471990786), uvec2(302646786, 33701666), uvec2(1008738366, 471990786), 164 | uvec2(1008734220, 471999010), uvec2(67371582, 269486088), uvec2(471999004, 471999010), 165 | uvec2(572662300, 402915870) }; 166 | 167 | bool isDigit(uvec2 coord, uint number) 168 | { 169 | number = clamp(number, 0, 9); 170 | if (coord.y < 1) 171 | { 172 | return false; 173 | } 174 | if (coord.x < 1) 175 | { 176 | return false; 177 | } 178 | 179 | uint part; 180 | uint yOffset; 181 | coord.y = 8 - coord.y; 182 | if (coord.y > 3) 183 | { 184 | part = digits[number].y; 185 | yOffset = 4; 186 | } 187 | else 188 | { 189 | part = digits[number].x; 190 | yOffset = 0; 191 | } 192 | return ((part >> (8 * (coord.y - yOffset) + (8 - coord.x))) & 0x1) == 1; 193 | } 194 | 195 | uint log10(uint n) 196 | { 197 | uint l = 0; 198 | while (n >= 10) 199 | { 200 | l++; 201 | n /= 10; 202 | } 203 | return l; 204 | } 205 | 206 | // Return true if the pixel at the coordinate fromAnchor relative to the anchor point 207 | // is covered by a glyph from number 208 | bool isNumber(uvec2 fromAnchor, uvec2 availableSize, uint number) 209 | { 210 | uint baseDigitSize = 8; 211 | uint digitCount = log10(number) + 1; 212 | 213 | fromAnchor += (availableSize) / 2; 214 | 215 | uint realDigitWidth = availableSize.x / digitCount; 216 | 217 | uint digitIndex = (fromAnchor.x / realDigitWidth); 218 | 219 | for (uint i = 1; i < digitCount - digitIndex; i++) 220 | { 221 | number /= 10; 222 | } 223 | number = number % 10; 224 | 225 | uvec2 digitStart = uvec2(fromAnchor.x - digitIndex * realDigitWidth, fromAnchor.y); 226 | digitStart = (digitStart * baseDigitSize) / realDigitWidth; 227 | digitStart.y = baseDigitSize - digitStart.y; 228 | digitStart = clamp(digitStart, uvec2(0), uvec2(baseDigitSize)); 229 | 230 | return isDigit(digitStart, number); 231 | } 232 | 233 | 234 | 235 | 236 | 237 | 238 | #if SUPPORTS_RT == 1 239 | 240 | uint wangHash(uint seed) 241 | { 242 | seed = (seed ^ 61) ^ (seed >> 16); 243 | seed *= 9; 244 | seed = seed ^ (seed >> 4); 245 | seed *= 0x27d4eb2d; 246 | seed = seed ^ (seed >> 15); 247 | return seed; 248 | } 249 | 250 | //----------------------------------------------------------------------- 251 | // https://www.pcg-random.org/ 252 | //----------------------------------------------------------------------- 253 | uint pcg(inout uint state) 254 | { 255 | uint prev = state * 747796405u + 2891336453u; 256 | uint word = ((prev >> ((prev >> 28u) + 4u)) ^ prev) * 277803737u; 257 | state = prev; 258 | return (word >> 22u) ^ word; 259 | } 260 | 261 | //----------------------------------------------------------------------- 262 | // Generate a random float in [0, 1) given the previous RNG state 263 | //----------------------------------------------------------------------- 264 | float rand(inout uint seed) 265 | { 266 | uint r = pcg(seed); 267 | return float(r) * (1.F / float(0xffffffffu)); 268 | } 269 | // Generate an arbitrary orthonormal basis from a normal vector 270 | void computeDefaultBasis(const vec3 z, out vec3 x, out vec3 y) 271 | { 272 | const float yz = -z.y * z.z; 273 | y = normalize(((abs(z.z) > 0.99999f) ? vec3(-z.x * z.y, 1.0f - z.y * z.y, yz) : vec3(-z.x * z.z, yz, 1.0f - z.z * z.z))); 274 | 275 | x = cross(y, z); 276 | } 277 | #ifndef M_PI 278 | #define M_PI 3.141592653589 279 | #endif 280 | float ambientOcclusion(vec3 wPos, vec3 wNormal, uint32_t sampleCount, float radius) 281 | { 282 | uint32_t seed = wangHash(gl_LaunchIDEXT.x) ^ wangHash(gl_LaunchIDEXT.y); 283 | vec3 z = wNormal; 284 | vec3 x, y; 285 | computeDefaultBasis(z, x, y); 286 | 287 | uint32_t occlusion = 0u; 288 | 289 | for(uint32_t i = 0; i < sampleCount; i++) 290 | { 291 | float r1 = 2 * M_PI * rand(seed); 292 | float r2 = rand(seed); 293 | float sq = sqrt(1.0 - r2); 294 | 295 | vec3 wDirection = vec3(cos(r1) * sq, sin(r1) * sq, sqrt(r2)); 296 | wDirection = wDirection.x * x + wDirection.y * y + wDirection.z * z; 297 | rayHitAO.color.w = 1.f; 298 | uint mask = 0xFF; 299 | traceRayEXT(asScene, gl_RayFlagsOpaqueEXT | gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsSkipClosestHitShaderEXT, 300 | mask /*0xFF*/, 0, 0, 1, wPos, 1e-4f, wDirection, radius, 1); 301 | if(rayHitAO.color.w > 0.f) 302 | { 303 | occlusion++; 304 | } 305 | } 306 | float linearAo = float(sampleCount - occlusion) / float(sampleCount); 307 | return max(0.2f, linearAo* linearAo); 308 | } 309 | 310 | float overheadLightingContribution(vec3 wPos, vec3 wNormal, vec3 wShadowDir, bool doShadow) 311 | { 312 | const float minValue = 0.f; 313 | if(!doShadow) 314 | return 0.f; 315 | 316 | float nDotDir = clamp(dot(wNormal, -wShadowDir), 0.f, 1.f); 317 | if(nDotDir <= minValue) 318 | { 319 | return minValue; 320 | } 321 | 322 | vec3 wDirection = -wShadowDir; 323 | 324 | rayHitAO.color.w = 1.f; 325 | uint mask = 0xFF; 326 | traceRayEXT(asScene, gl_RayFlagsOpaqueEXT | gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsSkipClosestHitShaderEXT, 327 | mask /*0xFF*/, 0, 0, 1, wPos, 0.001f, wDirection, 10000000, 1); 328 | 329 | return (rayHitAO.color.w > 0.f) ? minValue : 1.f; 330 | } 331 | 332 | // Returns 0.0 if there is a hit along the light direction and 1.0, if nothing was hit 333 | float traceShadowRay(vec3 wPos, vec3 wDirection) 334 | { 335 | rayHitAO.color.w = 1.f; 336 | uint mask = 0xFF; 337 | uint flags = gl_RayFlagsOpaqueEXT | gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsSkipClosestHitShaderEXT; 338 | float minT = 0.001f; 339 | float maxT = 10000000.0f; 340 | traceRayEXT(asScene, flags, mask, 0, 0, 1, wPos, minT, wDirection, maxT, 1); 341 | 342 | return (rayHitAO.color.w > 0.f) ? 0.0F : 1.0f; 343 | } 344 | 345 | float determinant(vec3 a, vec3 b, vec3 c) 346 | { 347 | return dot(cross(a, b), c); 348 | } 349 | 350 | vec3 intersectRayTriangle(vec3 origin, vec3 direction, vec3 v0, vec3 v1, vec3 v2) 351 | { 352 | // Edge vectors 353 | vec3 e1 = v1 - v0; 354 | vec3 e2 = v2 - v0; 355 | 356 | // Plane normal (cross product of edge vectors) 357 | vec3 planeNormal = cross(e1, e2); 358 | 359 | // Check for parallelism (ray parallel to the plane) 360 | float nDotDir = dot(planeNormal, direction); 361 | //if (abs(nDotDir) < 0.001f) { 362 | // return vec3(-1.0); // Return negative values to indicate no intersection 363 | //} 364 | 365 | // Distance from ray origin to the plane 366 | float t = dot(planeNormal, v0 - origin) / nDotDir; 367 | 368 | // Check if intersection is behind the ray origin (negative t means no intersection) 369 | //if (t <= 0.0) { 370 | // return vec3(-1.0); // Return negative values to indicate no intersection 371 | //} 372 | 373 | // Intersection point 374 | vec3 p = origin + t * direction; 375 | 376 | // Compute barycentric coordinates using determinant 377 | vec3 temp = p - v0; 378 | float det = determinant(e1, e2, planeNormal); 379 | float u = dot(cross(temp, e2), planeNormal) / det; 380 | float v = dot(cross(e1, temp), planeNormal) / det; 381 | float w = 1.0 - u - v; 382 | 383 | return vec3(w, u, v); 384 | } 385 | 386 | ivec2 objectToPixel(vec3 objectPos) 387 | { 388 | vec3 wObjectPos = gl_ObjectToWorldEXT * vec4(objectPos, 1.f); 389 | 390 | vec4 pPos = view.viewProjMatrix * vec4(wObjectPos, 1.f); 391 | 392 | pPos /= pPos.w; 393 | pPos.xy = pPos.xy * vec2(0.5f) + vec2(0.5f); 394 | pPos.xy *= vec2(gl_LaunchSizeEXT.xy); 395 | return ivec2(pPos.xy); 396 | } 397 | 398 | 399 | 400 | #endif // SUPPORTS_RT -------------------------------------------------------------------------------- /shaders/shaderio.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #ifndef _SHADERIO_H_ 21 | #define _SHADERIO_H_ 22 | 23 | #include "dh_sky.h" 24 | 25 | ////////////////////////////////////////////////////////////////////////// 26 | 27 | #define BBOXES_PER_MESHLET 8 28 | 29 | ////////////////////////////////////////////////////////////////////////// 30 | 31 | #define STATISTICS_WORKGROUP_SIZE 64 32 | #define ANIMATION_WORKGROUP_SIZE 256 33 | 34 | ////////////////////////////////////////////////////////////////////////// 35 | 36 | #define VISUALIZE_NONE 0 37 | #define VISUALIZE_CLUSTER 1 38 | #define VISUALIZE_TRIANGLES 2 39 | 40 | ////////////////////////////////////////////////////////////////////////// 41 | 42 | #define BINDINGS_FRAME_UBO 0 43 | #define BINDINGS_READBACK_SSBO 1 44 | #define BINDINGS_RENDERINSTANCES_SSBO 2 45 | #define BINDINGS_TLAS 3 46 | #define BINDINGS_RENDER_TARGET 4 47 | 48 | ////////////////////////////////////////////////////////////////////////// 49 | 50 | #ifdef __cplusplus 51 | namespace shaderio { 52 | using namespace glm; 53 | using namespace nvvkhl_shaders; 54 | #define BUFFER_REF(typ) uint64_t 55 | #define BUFFER_REF_DECLARE_ARRAY(refname, typ, keywords, alignment) \ 56 | static_assert(alignof(typ) == alignment || (alignment > alignof(typ) && ((alignment % alignof(typ)) == 0)), \ 57 | "Alignment incompatible: " #refname) 58 | #else 59 | 60 | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable 61 | #extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable 62 | #extension GL_EXT_shader_explicit_arithmetic_types_int16 : enable 63 | #extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable 64 | #extension GL_EXT_buffer_reference : enable 65 | #extension GL_EXT_buffer_reference2 : enable 66 | #extension GL_EXT_scalar_block_layout : enable 67 | #extension GL_EXT_shader_atomic_int64 : enable 68 | 69 | #define BUFFER_REF(refname) refname 70 | #define BUFFER_REF_DECLARE_ARRAY(refname, typ, keywords, alignment) \ 71 | layout(buffer_reference, buffer_reference_align = alignment, scalar) keywords buffer refname \ 72 | { \ 73 | typ d[]; \ 74 | }; 75 | 76 | BUFFER_REF_DECLARE_ARRAY(uint8s_in, uint8_t, readonly, 4); 77 | BUFFER_REF_DECLARE_ARRAY(uints_in, uint, readonly, 4); 78 | BUFFER_REF_DECLARE_ARRAY(uint64s_inout, uint64_t, , 8); 79 | BUFFER_REF_DECLARE_ARRAY(uvec3s_in, uvec3, readonly, 4); 80 | BUFFER_REF_DECLARE_ARRAY(vec3s_in, vec3, readonly, 4); 81 | BUFFER_REF_DECLARE_ARRAY(vec3s_inout, vec3, , 4); 82 | #endif 83 | 84 | struct BBox 85 | { 86 | vec3 lo; 87 | vec3 hi; 88 | }; 89 | 90 | struct Cluster 91 | { 92 | uint16_t numVertices; 93 | uint16_t numTriangles; 94 | uint32_t firstTriangle; 95 | uint32_t firstLocalVertex; 96 | uint32_t firstLocalTriangle; 97 | }; 98 | BUFFER_REF_DECLARE_ARRAY(Clusters_in, Cluster, readonly, 16); 99 | 100 | struct RenderInstance 101 | { 102 | mat4 worldMatrix; 103 | 104 | uint32_t numTriangles; 105 | uint32_t numVertices; 106 | uint32_t numClusters; 107 | uint32_t geometryID; 108 | 109 | // animated 110 | uint64_t positions; 111 | uint64_t normals; 112 | 113 | // original 114 | uint64_t triangles; 115 | uint64_t clusters; 116 | uint64_t clusterLocalVertices; 117 | uint64_t clusterLocalTriangles; 118 | uint64_t clusterBboxes; 119 | uint64_t originalPositions; 120 | }; 121 | 122 | struct FrameConstants 123 | { 124 | mat4 projMatrix; 125 | mat4 projMatrixI; 126 | 127 | mat4 viewProjMatrix; 128 | mat4 viewProjMatrixI; 129 | mat4 viewMatrix; 130 | mat4 viewMatrixI; 131 | vec4 viewPos; 132 | vec4 viewDir; 133 | vec4 viewPlane; 134 | 135 | ivec2 viewport; 136 | vec2 viewportf; 137 | 138 | vec2 viewPixelSize; 139 | vec2 viewClipSize; 140 | 141 | vec3 wLightPos; 142 | uint _pad1; 143 | 144 | vec2 _padShadow; 145 | float lightMixer; 146 | uint doShadow; 147 | 148 | vec3 wUpDir; 149 | uint visualize; 150 | 151 | vec4 bgColor; 152 | 153 | float lodScale; 154 | float animationState; 155 | float ambientOcclusionRadius; 156 | int32_t ambientOcclusionRays; 157 | 158 | int32_t animationRippleEnabled; 159 | float animationRippleFrequency; 160 | float animationRippleAmplitude; 161 | float animationRippleSpeed; 162 | 163 | int32_t animationTwistEnabled; 164 | float animationTwistSpeed; 165 | float animationTwistMaxAngle; 166 | float sceneSize; 167 | 168 | 169 | uint doAnimation; 170 | uint _pad; 171 | float nearPlane; 172 | float farPlane; 173 | 174 | vec4 hizSizeFactors; 175 | vec4 nearSizeFactors; 176 | 177 | float hizSizeMax; 178 | int facetShading; 179 | int supersample; 180 | uint colorXor; 181 | 182 | uint dbgUint; 183 | float dbgFloat; 184 | float time; 185 | uint frame; 186 | 187 | uvec2 mousePosition; 188 | float wireThickness; 189 | float wireSmoothing; 190 | 191 | vec3 wireColor; 192 | uint wireStipple; 193 | 194 | vec3 wireBackfaceColor; 195 | float wireStippleRepeats; 196 | 197 | float wireStippleLength; 198 | uint doWireframe; 199 | uint visFilterInstanceID; 200 | uint visFilterClusterID; 201 | 202 | SimpleSkyParameters skyParams; 203 | }; 204 | 205 | struct Readback 206 | { 207 | uint64_t clustersSize; 208 | uint64_t blasesSize; 209 | 210 | #ifndef __cplusplus 211 | uint64_t clusterTriangleId; 212 | uint64_t instanceId; 213 | #else 214 | uint32_t clusterTriangleId; 215 | uint32_t _packedDepth0; 216 | 217 | uint32_t instanceId; 218 | uint32_t _packedDepth1; 219 | #endif 220 | 221 | int debugI; 222 | uint debugUI; 223 | uint debugA[32]; 224 | uint debugB[32]; 225 | uint debugC[32]; 226 | }; 227 | 228 | struct AnimationConstants 229 | { 230 | uint64_t renderInstances; 231 | 232 | uint32_t instanceIndex; 233 | float animationState; 234 | 235 | uint32_t rippleEnabled; 236 | float rippleFrequency; 237 | float rippleAmplitude; 238 | float rippleSpeed; 239 | 240 | uint32_t twistEnabled; 241 | float twistSpeed; 242 | float twistMaxAngle; 243 | float geometrySize; 244 | }; 245 | 246 | struct StatisticsConstants 247 | { 248 | BUFFER_REF(uints_in) sizes; 249 | BUFFER_REF(uint64s_inout) sum; 250 | uint32_t count; 251 | }; 252 | 253 | struct RayPayload 254 | { 255 | vec4 color; 256 | }; 257 | 258 | #ifdef __cplusplus 259 | } 260 | #endif 261 | 262 | #endif 263 | -------------------------------------------------------------------------------- /src/animatedclusters.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | #pragma once 20 | 21 | #include 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "renderer.hpp" 33 | #include "resources.hpp" 34 | #include "scene.hpp" 35 | #include "shaders/shaderio.h" 36 | 37 | 38 | namespace animatedclusters { 39 | int const SAMPLE_SIZE_WIDTH(1024); 40 | int const SAMPLE_SIZE_HEIGHT(1024); 41 | class AnimatedClusters 42 | { 43 | public: 44 | enum RendererType 45 | { 46 | RENDERER_RASTER_TRIANGLES, 47 | RENDERER_RASTER_CLUSTERS, 48 | RENDERER_RAYTRACE_TRIANGLES, 49 | RENDERER_RAYTRACE_CLUSTERS, 50 | }; 51 | 52 | enum ClusterConfig 53 | { 54 | CLUSTER_32T_32T, 55 | CLUSTER_64T_64V, 56 | CLUSTER_96T_96V, 57 | CLUSTER_128T_128V, 58 | CLUSTER_128T_256V, 59 | CLUSTER_256T_256V, 60 | CLUSTER_CUSTOM, 61 | }; 62 | 63 | enum BuildMode 64 | { 65 | BUILD_DEFAULT, 66 | BUILD_FAST_BUILD, 67 | BUILD_FAST_TRACE, 68 | }; 69 | 70 | enum TLASUpdateMode 71 | { 72 | TLAS_UPDATE_REFIT, 73 | TLAS_UPDATE_REBUILD 74 | }; 75 | 76 | enum GuiEnums 77 | { 78 | GUI_RENDERER, 79 | GUI_SUPERSAMPLE, 80 | GUI_MESHLET, 81 | GUI_BUILDMODE, 82 | GUI_TLAS_UPDATEMODE, 83 | GUI_VISUALIZE, 84 | }; 85 | 86 | struct Tweak 87 | { 88 | ClusterConfig clusterConfig = CLUSTER_64T_64V; 89 | 90 | RendererType renderer = RENDERER_RAYTRACE_CLUSTERS; 91 | int supersample = 2; 92 | 93 | bool hbaoFullRes = false; 94 | bool hbaoActive = true; 95 | float hbaoRadius = 0.05; 96 | 97 | float overrideTime = 0.0f; 98 | bool facetShading = true; 99 | bool useTemplates = true; 100 | bool useImplicitTemplates = false; 101 | float templateBboxBloat = 0.5f; 102 | BuildMode templateInstantiateMode = BuildMode::BUILD_FAST_BUILD; 103 | BuildMode templateBuildMode = BuildMode::BUILD_FAST_TRACE; 104 | BuildMode clusterBuildMode = BuildMode::BUILD_FAST_TRACE; 105 | BuildMode blasBuildMode = BuildMode::BUILD_FAST_BUILD; 106 | uint32_t clusterPositionTruncationBits = 0; 107 | 108 | uint32_t gridCopies = 1; 109 | uint32_t gridConfig = 13; 110 | 111 | bool autoResetTimers = false; 112 | }; 113 | 114 | struct MouseButtonHandler 115 | { 116 | enum InternalState 117 | { 118 | eInternalNone, 119 | eInternalFirstDown, 120 | eInternalFirstUp, 121 | eInternalDrag, 122 | eInternalFirstClick, 123 | eInternalSecondDown, 124 | eInternalSecondUp, 125 | eInternalSecondClick 126 | }; 127 | 128 | 129 | struct ButtonClick 130 | { 131 | glm::uvec2 pos; 132 | InternalState internal = eInternalNone; 133 | double firstUpTime; 134 | nvh::Stopwatch internalTime; 135 | }; 136 | inline void init() 137 | { 138 | for(auto& p : mouseClickPos) 139 | { 140 | p.pos = glm::uvec2(~0u, ~0u); 141 | } 142 | doubleClickThreshold = ImGui::GetIO().MouseDoubleClickTime * 1000.0; 143 | doubleClickDist = ImGui::GetIO().MouseDoubleClickMaxDist; 144 | } 145 | 146 | inline void update(glm::uvec2 mousePos) { currentPos = mousePos; } 147 | 148 | 149 | enum ButtonState 150 | { 151 | eNone, 152 | eSingleClick, 153 | eDoubleClick, 154 | eDrag 155 | }; 156 | 157 | ButtonState getButtonState(ImGuiMouseButton button) 158 | { 159 | ButtonClick& b = mouseClickPos[button]; 160 | 161 | bool isDown = ImGui::IsMouseDown(button); 162 | bool isUp = ImGui::IsMouseReleased(button); 163 | float dist = std::sqrt((b.pos.x - currentPos.x) * (b.pos.x - currentPos.x) 164 | + (b.pos.y - currentPos.y) * (b.pos.y - currentPos.y)); 165 | bool isMoved = dist > doubleClickDist; 166 | 167 | switch(b.internal) 168 | { 169 | case eInternalNone: 170 | if(isDown) 171 | { 172 | b.internal = eInternalFirstDown; 173 | b.pos = currentPos; 174 | } 175 | break; 176 | 177 | case eInternalFirstDown: 178 | if(isUp) 179 | { 180 | b.internal = eInternalFirstUp; 181 | b.firstUpTime = b.internalTime.elapsed(); 182 | break; 183 | } 184 | if(isMoved) 185 | { 186 | b.internal = eInternalDrag; 187 | } 188 | break; 189 | 190 | case eInternalFirstUp: { 191 | double elapsed = b.internalTime.elapsed() - b.firstUpTime; 192 | if(isMoved || elapsed > doubleClickThreshold) 193 | { 194 | b.internal = eInternalFirstClick; 195 | break; 196 | } 197 | if(isDown) 198 | { 199 | b.internal = eInternalSecondDown; 200 | } 201 | break; 202 | } 203 | case eInternalFirstClick: 204 | b.internal = eInternalNone; 205 | break; 206 | 207 | case eInternalDrag: 208 | if(isUp) 209 | { 210 | b.internal = eInternalNone; 211 | } 212 | break; 213 | 214 | case eInternalSecondDown: 215 | if(isMoved) 216 | { 217 | b.internal = eInternalDrag; 218 | break; 219 | } 220 | if(isUp) 221 | { 222 | b.internal = eInternalSecondUp; 223 | } 224 | break; 225 | case eInternalSecondUp: 226 | b.internal = eInternalNone; 227 | break; 228 | } 229 | 230 | 231 | switch(b.internal) 232 | { 233 | case eInternalFirstClick: 234 | return eSingleClick; 235 | case eInternalDrag: 236 | return eDrag; 237 | case eInternalSecondUp: 238 | return eDoubleClick; 239 | default: 240 | return eNone; 241 | } 242 | } 243 | 244 | std::array mouseClickPos; 245 | glm::uvec2 currentPos; 246 | double doubleClickThreshold = 300.0; 247 | float doubleClickDist = 6.f; 248 | }; 249 | 250 | struct ViewPoint 251 | { 252 | std::string name; 253 | glm::mat4 mat; 254 | float sceneScale; 255 | float fov; 256 | }; 257 | 258 | struct TargetImage 259 | { 260 | VkImage image; 261 | VkImageView view; 262 | VkFormat format; 263 | }; 264 | 265 | ////////////////////////////////////////////////////////////////////////// 266 | 267 | // key components 268 | 269 | Resources m_resources; 270 | std::unique_ptr m_scene; 271 | std::unique_ptr m_renderer; 272 | 273 | // configuration settings etc. 274 | 275 | bool m_rtClusterSupport = false; 276 | 277 | ImGuiH::Registry m_ui; 278 | double m_uiTime = 0; 279 | 280 | Tweak m_tweak; 281 | Tweak m_lastTweak; 282 | bool m_lastVsync; 283 | 284 | FrameConfig m_frameConfig; 285 | 286 | SceneConfig m_sceneConfig; 287 | SceneConfig m_lastSceneConfig; 288 | RendererConfig m_rendererConfig; 289 | RendererConfig m_lastRendererConfig; 290 | 291 | std::string m_rendererShaderPrepend; 292 | std::string m_rendererLastShaderPrepend; 293 | TargetImage m_targetImage; 294 | size_t m_rendererFboChangeID; 295 | 296 | std::string m_customShaderPrepend; 297 | std::string m_lastCustomShaderPrepend; 298 | 299 | std::string m_modelFilename; 300 | glm::vec3 m_modelUpVector = glm::vec3(0, 1, 0); 301 | 302 | int m_frames = 0; 303 | 304 | double m_animTime = 0; 305 | double m_lastTime = 0; 306 | 307 | bool m_requestCameraRecenter = false; 308 | 309 | static void setupContextInfo(nvvk::ContextCreateInfo& info); 310 | 311 | bool initCore(nvvk::Context& context, int winWidth, int winHeight, const std::string& exePath); 312 | void deinit(nvvk::Context& context); 313 | 314 | bool initScene(const char* filename); 315 | void deinitScene(); 316 | void postInitNewScene(); 317 | 318 | bool initFramebuffers(int width, int height); 319 | void updateTargetImage(); 320 | void initRenderer(RendererType rtype); 321 | void deinitRenderer(); 322 | 323 | void setupConfigParameters(nvh::ParameterList& parameterList); 324 | 325 | void onSceneChanged(); 326 | 327 | void updatedClusterConfig(); 328 | 329 | void applyConfigFile(nvh::ParameterList& parameterList, const char* filename); 330 | 331 | struct CallBacks 332 | { 333 | std::function openFile; 334 | nvh::ParameterList* parameterList; 335 | }; 336 | 337 | void loadFile(const std::string& filename); 338 | void processUI(double time, nvh::Profiler& profiler, const CallBacks& callbacks); 339 | void viewportUI(ImVec2 corner); 340 | 341 | struct EventStates 342 | { 343 | glm::ivec2 winSize = {}; 344 | glm::ivec2 mouse = {}; 345 | int mouseButtonFlags = 0; 346 | int mouseWheel = 0; 347 | 348 | bool reloadShaders = false; 349 | bool alignView = false; 350 | bool saveView = false; 351 | }; 352 | 353 | struct ChangeStates 354 | { 355 | uint32_t timerReset : 1; 356 | uint32_t targetImage : 1; 357 | }; 358 | ChangeStates handleChanges(uint32_t width, uint32_t height, const EventStates& states); 359 | 360 | void renderFrame(VkCommandBuffer cmd, uint32_t width, uint32_t height, double time, nvvk::ProfilerVK& profilerVK, uint32_t cycleIndex); 361 | 362 | template 363 | bool sceneChanged(const T& val) const 364 | { 365 | size_t offset = size_t(&val) - size_t(&m_sceneConfig); 366 | return memcmp(&val, reinterpret_cast(&m_lastSceneConfig) + offset, sizeof(T)) != 0; 367 | } 368 | 369 | template 370 | bool tweakChanged(const T& val) const 371 | { 372 | size_t offset = size_t(&val) - size_t(&m_tweak); 373 | return memcmp(&val, reinterpret_cast(&m_lastTweak) + offset, sizeof(T)) != 0; 374 | } 375 | 376 | template 377 | bool rendererCfgChanged(const T& val) const 378 | { 379 | size_t offset = size_t(&val) - size_t(&m_rendererConfig); 380 | return memcmp(&val, reinterpret_cast(&m_lastRendererConfig) + offset, sizeof(T)) != 0; 381 | } 382 | 383 | template 384 | void uiPlot(std::string plotName, std::string tooltipFormat, const std::vector& data, const T& maxValue) 385 | { 386 | ImVec2 plotSize = ImVec2(ImGui::GetContentRegionAvail().x, ImGui::GetContentRegionAvail().y / 2); 387 | 388 | // Ensure minimum height to avoid overly squished graphics 389 | plotSize.y = std::max(plotSize.y, ImGui::GetTextLineHeight() * 20); 390 | 391 | const ImPlotFlags plotFlags = ImPlotFlags_NoBoxSelect | ImPlotFlags_NoMouseText | ImPlotFlags_Crosshairs; 392 | const ImPlotAxisFlags axesFlags = ImPlotAxisFlags_Lock | ImPlotAxisFlags_NoLabel; 393 | const ImColor plotColor = ImColor(0.07f, 0.9f, 0.06f, 1.0f); 394 | 395 | if(ImPlot::BeginPlot(plotName.c_str(), plotSize, plotFlags)) 396 | { 397 | ImPlot::SetupLegend(ImPlotLocation_NorthWest, ImPlotLegendFlags_NoButtons); 398 | ImPlot::SetupAxes(nullptr, "Count", axesFlags, axesFlags); 399 | ImPlot::SetupAxesLimits(0, data.size(), 0, static_cast(maxValue), ImPlotCond_Always); 400 | 401 | ImPlot::PushStyleVar(ImPlotStyleVar_FillAlpha, 0.25f); 402 | ImPlot::SetAxes(ImAxis_X1, ImAxis_Y1); 403 | ImPlot::SetNextFillStyle(plotColor); 404 | ImPlot::PlotShaded("", data.data(), (int)data.size(), -INFINITY, 1.0, 0.0, 0, 0); 405 | ImPlot::PopStyleVar(); 406 | 407 | if(ImPlot::IsPlotHovered()) 408 | { 409 | ImPlotPoint mouse = ImPlot::GetPlotMousePos(); 410 | int mouseOffset = (int(mouse.x)) % (int)data.size(); 411 | ImGui::BeginTooltip(); 412 | ImGui::Text(tooltipFormat.c_str(), mouseOffset, data[mouseOffset]); 413 | ImGui::EndTooltip(); 414 | } 415 | 416 | ImPlot::EndPlot(); 417 | } 418 | } 419 | void setCameraFromScene(const char* filename); 420 | 421 | float decodePickingDepth(const shaderio::Readback& readback); 422 | bool isReadbackValid(const shaderio::Readback& readback); 423 | 424 | MouseButtonHandler m_mouseButtonHandler; 425 | }; 426 | } // namespace animatedclusters 427 | -------------------------------------------------------------------------------- /src/cgltf.cpp: -------------------------------------------------------------------------------- 1 | 2 | #define CGLTF_IMPLEMENTATION 3 | #include 4 | -------------------------------------------------------------------------------- /src/hbao_pass.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | 21 | #ifndef HBAOPASS_H__ 22 | #define HBAOPASS_H__ 23 | 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | ////////////////////////////////////////////////////////////////////////// 34 | 35 | /// HbaoSystem implements a screen-space 36 | /// ambient occlusion effect using 37 | /// horizon-based ambient occlusion. 38 | /// See https://github.com/nvpro-samples/gl_ssao 39 | /// for more details 40 | 41 | class HbaoPass 42 | { 43 | public: 44 | static const int RANDOM_SIZE = 4; 45 | static const int RANDOM_ELEMENTS = RANDOM_SIZE * RANDOM_SIZE; 46 | 47 | struct Config 48 | { 49 | VkFormat targetFormat; 50 | uint32_t maxFrames; 51 | }; 52 | 53 | void init(VkDevice device, nvvk::ResourceAllocator* allocator, nvvk::ShaderModuleManager* shaderManager, const Config& config); 54 | void reloadShaders(); 55 | void deinit(); 56 | 57 | struct FrameConfig 58 | { 59 | bool blend; 60 | 61 | uint32_t sourceWidthScale; 62 | uint32_t sourceHeightScale; 63 | 64 | uint32_t targetWidth; 65 | uint32_t targetHeight; 66 | 67 | VkDescriptorImageInfo sourceDepth; 68 | VkDescriptorImageInfo targetColor; 69 | }; 70 | 71 | struct FrameIMGs 72 | { 73 | nvvk::Texture depthlinear, viewnormal, result, blur, resultarray, deptharray; 74 | }; 75 | 76 | struct Frame 77 | { 78 | uint32_t slot = ~0u; 79 | 80 | FrameIMGs images; 81 | int width; 82 | int height; 83 | 84 | FrameConfig config; 85 | }; 86 | 87 | bool initFrame(Frame& frame, const FrameConfig& config, VkCommandBuffer cmd); 88 | void deinitFrame(Frame& frame); 89 | 90 | 91 | struct View 92 | { 93 | bool isOrtho; 94 | float nearPlane; 95 | float farPlane; 96 | float halfFovyTan; 97 | glm::mat4 projectionMatrix; 98 | }; 99 | 100 | struct Settings 101 | { 102 | View view; 103 | 104 | float unit2viewspace = 1.0f; 105 | float intensity = 1.0f; 106 | float radius = 1.0f; 107 | float bias = 0.1f; 108 | float blurSharpness = 40.0f; 109 | }; 110 | 111 | // before: must do appropriate barriers for color write access and depth read access 112 | // after: from compute write to whatever output image needs 113 | void cmdCompute(VkCommandBuffer cmd, const Frame& frame, const Settings& settings) const; 114 | 115 | private: 116 | struct Shaders 117 | { 118 | nvvk::ShaderModuleID depth_linearize, viewnormal, blur, blur_apply, deinterleave, calc, reinterleave; 119 | }; 120 | 121 | struct Pipelines 122 | { 123 | VkPipeline depth_linearize = VK_NULL_HANDLE; 124 | VkPipeline viewnormal = VK_NULL_HANDLE; 125 | VkPipeline blur = VK_NULL_HANDLE; 126 | VkPipeline blur_apply = VK_NULL_HANDLE; 127 | VkPipeline deinterleave = VK_NULL_HANDLE; 128 | VkPipeline calc = VK_NULL_HANDLE; 129 | VkPipeline reinterleave = VK_NULL_HANDLE; 130 | }; 131 | 132 | VkDevice m_device; 133 | nvvk::ResourceAllocator* m_allocator; 134 | nvvk::ShaderModuleManager* m_shaderManager; 135 | nvh::TRangeAllocator<1> m_slots; 136 | Config m_config; 137 | 138 | nvvk::DescriptorSetContainer m_setup; 139 | 140 | nvvk::Buffer m_ubo; 141 | VkDescriptorBufferInfo m_uboInfo; 142 | 143 | VkSampler m_linearSampler; 144 | 145 | Shaders m_shaders; 146 | Pipelines m_pipelines; 147 | 148 | glm::vec4 m_hbaoRandom[RANDOM_ELEMENTS]; 149 | 150 | void updatePipelines(); 151 | void updateUbo(VkCommandBuffer cmd, const Frame& frame, const Settings& settings) const; 152 | }; 153 | 154 | #endif -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include "nvvkhl/element_gui.hpp" 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "animatedclusters.hpp" 33 | 34 | namespace animatedclusters { 35 | 36 | nvvkhl::SampleAppLog g_logger; 37 | std::shared_ptr g_elementCamera; 38 | 39 | ////////////////////////////////////////////////////////////////////////// 40 | 41 | class AnimatedClustersElement : public nvvkhl::IAppElement 42 | { 43 | public: 44 | nvvkhl::Application* m_app; 45 | 46 | AnimatedClusters::CallBacks m_callbacks; 47 | AnimatedClusters::EventStates m_eventStates; 48 | AnimatedClusters m_core; 49 | uint32_t m_width; 50 | uint32_t m_height; 51 | VkDescriptorSet m_imguiImage = nullptr; 52 | VkSampler m_imguiSampler = nullptr; 53 | 54 | std::shared_ptr m_profilerVK; 55 | nvh::ParameterList* m_parameterList = nullptr; 56 | 57 | bool m_useUI = true; 58 | 59 | AnimatedClustersElement() = default; 60 | ~AnimatedClustersElement() override = default; 61 | 62 | private: 63 | nvvk::Context* m_context{}; 64 | 65 | public: 66 | void setContext(nvvk::Context* context) { m_context = context; } 67 | 68 | void windowTitle() 69 | { 70 | // Window Title 71 | static float dirty_timer = 0.0F; 72 | dirty_timer += ImGui::GetIO().DeltaTime; 73 | if(dirty_timer > 1.0F) // Refresh every seconds 74 | { 75 | const auto& size = m_app->getViewportSize(); 76 | std::array buf{}; 77 | const int ret = snprintf(buf.data(), buf.size(), "%s %dx%d | %d FPS / %.3fms", PROJECT_NAME, 78 | static_cast(size.width), static_cast(size.height), 79 | static_cast(ImGui::GetIO().Framerate), 1000.F / ImGui::GetIO().Framerate); 80 | glfwSetWindowTitle(m_app->getWindowHandle(), buf.data()); 81 | dirty_timer = 0; 82 | } 83 | } 84 | 85 | void cmdImageTransition(VkCommandBuffer cmd, 86 | VkImage img, 87 | VkImageAspectFlags aspects, 88 | VkAccessFlags src, 89 | VkAccessFlags dst, 90 | VkImageLayout oldLayout, 91 | VkImageLayout newLayout) const 92 | { 93 | VkPipelineStageFlags srcPipe = nvvk::makeAccessMaskPipelineStageFlags(src, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); 94 | VkPipelineStageFlags dstPipe = nvvk::makeAccessMaskPipelineStageFlags(dst, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); 95 | 96 | VkImageSubresourceRange range; 97 | memset(&range, 0, sizeof(range)); 98 | range.aspectMask = aspects; 99 | range.baseMipLevel = 0; 100 | range.levelCount = VK_REMAINING_MIP_LEVELS; 101 | range.baseArrayLayer = 0; 102 | range.layerCount = VK_REMAINING_ARRAY_LAYERS; 103 | 104 | VkImageMemoryBarrier memBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER}; 105 | memBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; 106 | memBarrier.dstAccessMask = dst; 107 | memBarrier.srcAccessMask = src; 108 | memBarrier.oldLayout = oldLayout; 109 | memBarrier.newLayout = newLayout; 110 | memBarrier.image = img; 111 | memBarrier.subresourceRange = range; 112 | 113 | vkCmdPipelineBarrier(cmd, srcPipe, dstPipe, VK_FALSE, 0, nullptr, 0, nullptr, 1, &memBarrier); 114 | } 115 | 116 | void updateImage() 117 | { 118 | if(m_imguiImage) 119 | { 120 | ImGui_ImplVulkan_RemoveTexture(m_imguiImage); 121 | m_imguiImage = nullptr; 122 | } 123 | 124 | m_imguiImage = ImGui_ImplVulkan_AddTexture(m_imguiSampler, m_core.m_targetImage.view, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); 125 | } 126 | 127 | void setParameterList(nvh::ParameterList& paramaterList) 128 | { 129 | m_parameterList = ¶materList; 130 | m_core.setupConfigParameters(paramaterList); 131 | } 132 | 133 | void onAttach(nvvkhl::Application* app) override 134 | { 135 | m_app = app; 136 | 137 | if(!m_core.initCore(*m_context, 128, 128, NVPSystem::exePath())) 138 | { 139 | exit(-1); 140 | } 141 | 142 | { 143 | const VkSamplerCreateInfo sampler_info = nvvk::makeSamplerCreateInfo(); 144 | vkCreateSampler(m_context->m_device, &sampler_info, nullptr, &m_imguiSampler); 145 | } 146 | 147 | m_callbacks.openFile = [&](const char* msg, const char* exts) { 148 | return NVPSystem::windowOpenFileDialog(m_app->getWindowHandle(), msg, exts); 149 | }; 150 | m_callbacks.parameterList = m_parameterList; 151 | 152 | updateImage(); 153 | } 154 | 155 | void onDetach() override 156 | { 157 | m_core.deinit(*m_context); 158 | ImGui_ImplVulkan_RemoveTexture((VkDescriptorSet)m_imguiImage); 159 | vkDestroySampler(m_context->m_device, m_imguiSampler, nullptr); 160 | } 161 | 162 | // Return true if the current window is active 163 | bool isWindowHovered(ImGuiWindow* ref_window, ImGuiHoveredFlags flags) 164 | { 165 | ImGuiContext& g = *ImGui::GetCurrentContext(); 166 | if(g.HoveredWindow != ref_window) 167 | return false; 168 | if(!ImGui::IsWindowContentHoverable(ref_window, ImGuiFocusedFlags_RootWindow)) 169 | return false; 170 | if(g.ActiveId != 0 && !g.ActiveIdAllowOverlap && g.ActiveId != ref_window->MoveId) 171 | return false; 172 | 173 | // Cancel if over the title bar 174 | { 175 | if(g.IO.ConfigWindowsMoveFromTitleBarOnly) 176 | if(!(ref_window->Flags & ImGuiWindowFlags_NoTitleBar) || ref_window->DockIsActive) 177 | if(ref_window->TitleBarRect().Contains(g.IO.MousePos)) 178 | return false; 179 | } 180 | 181 | return true; 182 | } 183 | 184 | void onUIRender() override 185 | { 186 | ImGuiWindow* viewportWindow = ImGui::FindWindowByName("Viewport"); 187 | if(viewportWindow) 188 | { 189 | // If the mouse cursor is over the "Viewport", check for all inputs that can manipulate 190 | // the camera. 191 | if(isWindowHovered(viewportWindow, ImGuiFocusedFlags_RootWindow)) 192 | { 193 | m_eventStates.winSize = {m_width, m_height}; 194 | m_eventStates.mouseButtonFlags = 0; 195 | m_eventStates.mouseButtonFlags |= ImGui::IsMouseDown(ImGuiMouseButton_Left) ? 1 : 0; 196 | m_eventStates.mouseButtonFlags |= ImGui::IsMouseDown(ImGuiMouseButton_Right) ? 2 : 0; 197 | m_eventStates.mouseButtonFlags |= ImGui::IsMouseDown(ImGuiMouseButton_Middle) ? 4 : 0; 198 | ImVec2 mousePos = ImGui::GetMousePos(); 199 | m_eventStates.mouse = {mousePos.x, mousePos.y}; 200 | m_eventStates.mouseWheel += ImGui::GetIO().MouseWheel * 50.0f; 201 | } 202 | else 203 | { 204 | m_eventStates.mouseButtonFlags = 0; 205 | } 206 | } 207 | 208 | m_eventStates.alignView = ImGui::IsKeyPressed(ImGuiKey::ImGuiKey_A, false); 209 | m_eventStates.reloadShaders = ImGui::IsKeyPressed(ImGuiKey::ImGuiKey_R, false); 210 | m_eventStates.saveView = ImGui::IsKeyPressed(ImGuiKey::ImGuiKey_C, false); 211 | 212 | // [optional] convenient setting panel 213 | m_core.processUI(glfwGetTime(), *m_profilerVK, m_callbacks); 214 | 215 | AnimatedClusters::ChangeStates changes = m_core.handleChanges(m_width, m_height, m_eventStates); 216 | if(changes.targetImage) 217 | { 218 | updateImage(); 219 | } 220 | if(changes.timerReset) 221 | { 222 | m_profilerVK->reset(); 223 | } 224 | 225 | // Rendered image displayed fully in 'Viewport' window 226 | ImGui::Begin("Viewport"); 227 | ImVec2 corner = ImGui::GetCursorScreenPos(); // Corner of the viewport 228 | ImGui::Image(m_imguiImage, ImGui::GetContentRegionAvail()); 229 | m_core.viewportUI(corner); 230 | ImGui::End(); 231 | } 232 | 233 | void onRender(VkCommandBuffer cmd) override 234 | { 235 | m_core.renderFrame(cmd, m_width, m_height, glfwGetTime(), *m_profilerVK, m_app->getFrameCycleIndex()); 236 | } 237 | 238 | void onResize(uint32_t width, uint32_t height) override 239 | { 240 | m_width = width; 241 | m_height = height; 242 | m_core.initFramebuffers(m_width, m_height); 243 | updateImage(); 244 | } 245 | 246 | 247 | // Called if showMenu is true 248 | void onUIMenu() override 249 | { 250 | if(ImGui::BeginMenu("File")) 251 | { 252 | if(ImGui::MenuItem("Open")) 253 | { 254 | std::string fileNameLo = m_callbacks.openFile("Pick model file", 255 | "Supported (glTF 2.0)|*.gltf;*.glb;" 256 | "|All|*.*"); 257 | m_core.loadFile(fileNameLo); 258 | } 259 | ImGui::Separator(); 260 | ImGui::EndMenu(); 261 | } 262 | 263 | windowTitle(); 264 | } 265 | 266 | void onFileDrop(const char* filename) override { m_core.loadFile(filename); } 267 | }; 268 | 269 | } // namespace animatedclusters 270 | 271 | using namespace animatedclusters; 272 | 273 | 274 | #include 275 | 276 | int main(int argc, char** argv) 277 | { 278 | try 279 | { 280 | NVPSystem sys(PROJECT_NAME); 281 | 282 | // This is not absolutely required, but having this early, loads the Vulkan DLL, which delays 283 | // the window to show up by ~1.5 seconds, but on the other hands, reduce the time the window 284 | // displays a white background. 285 | int glfw_valid = GLFW_TRUE; 286 | glfw_valid &= glfwInit(); 287 | glfw_valid &= glfwVulkanSupported(); 288 | if(!glfw_valid) 289 | { 290 | std::string err_message("Vulkan is not supported on this computer."); 291 | #if _WIN32 292 | MessageBox(nullptr, err_message.c_str(), "Vulkan error", MB_OK); 293 | #endif 294 | LOGE("%s", err_message.c_str()); 295 | return EXIT_FAILURE; 296 | } 297 | 298 | nvvkhl::ApplicationCreateInfo appInfo; 299 | appInfo.name = PROJECT_NAME; 300 | appInfo.useMenu = true; 301 | appInfo.vSync = false; 302 | appInfo.imguiConfigFlags &= ~ImGuiConfigFlags_ViewportsEnable; // keep single window 303 | 304 | // for now always set to false, given extension isn't supported 305 | bool validationLayer = false; 306 | bool wantedVSync = true; 307 | uint32_t compatibleDeviceIndex = 0; 308 | 309 | for(int a = 0; a < argc; a++) 310 | { 311 | if(strcmp(argv[a], "-device") == 0 && a + 1 < argc) 312 | { 313 | compatibleDeviceIndex = atoi(argv[a + 1]); 314 | a += 1; 315 | } 316 | else if(strcmp(argv[a], "-novalidation") == 0) 317 | { 318 | validationLayer = false; 319 | } 320 | else if(strcmp(argv[a], "-validation") == 0) 321 | { 322 | validationLayer = true; 323 | } 324 | else if(strcmp(argv[a], "-vsync") == 0 && a + 1 < argc) 325 | { 326 | wantedVSync = atoi(argv[a + 1]); 327 | a += 1; 328 | } 329 | } 330 | 331 | nvvk::Context context{}; 332 | nvvk::ContextCreateInfo contextCreateInfo(validationLayer); 333 | contextCreateInfo.setVersion(1, 3); 334 | contextCreateInfo.compatibleDeviceIndex = compatibleDeviceIndex; 335 | 336 | nvvkhl::addSurfaceExtensions(contextCreateInfo.instanceExtensions); 337 | contextCreateInfo.deviceExtensions.emplace_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 338 | 339 | AnimatedClusters::setupContextInfo(contextCreateInfo); 340 | 341 | if(!context.init(contextCreateInfo)) 342 | { 343 | LOGE("Vulkan context init failed\n"); 344 | return EXIT_FAILURE; 345 | } 346 | 347 | context.ignoreDebugMessage(0x715035dd); // 16 bit storage for mesh shaders 348 | context.ignoreDebugMessage(0x6e224e9); // 16 bit storage for mesh shaders 349 | 350 | // 351 | 352 | // Setting up the layout of the application 353 | appInfo.dockSetup = [](ImGuiID viewportID) { 354 | #ifdef _DEBUG 355 | // left side panel container 356 | ImGuiID debugID = ImGui::DockBuilderSplitNode(viewportID, ImGuiDir_Left, 0.15F, nullptr, &viewportID); 357 | ImGui::DockBuilderDockWindow("Debug", debugID); 358 | #endif 359 | // right side panel container 360 | ImGuiID settingID = ImGui::DockBuilderSplitNode(viewportID, ImGuiDir_Right, 0.25F, nullptr, &viewportID); 361 | ImGui::DockBuilderDockWindow("Settings", settingID); 362 | ImGui::DockBuilderDockWindow("Misc Settings", settingID); 363 | 364 | // bottom panel container 365 | ImGuiID loggerID = ImGui::DockBuilderSplitNode(viewportID, ImGuiDir_Down, 0.35F, nullptr, &viewportID); 366 | ImGui::DockBuilderDockWindow("Log", loggerID); 367 | ImGuiID profilerID = ImGui::DockBuilderSplitNode(loggerID, ImGuiDir_Right, 0.4F, nullptr, &loggerID); 368 | ImGui::DockBuilderDockWindow("Profiler", profilerID); 369 | ImGuiID statisticsID = ImGui::DockBuilderSplitNode(profilerID, ImGuiDir_Right, 0.5F, nullptr, &profilerID); 370 | ImGui::DockBuilderDockWindow("Statistics", statisticsID); 371 | }; 372 | 373 | appInfo.instance = context.m_instance; 374 | appInfo.device = context.m_device; 375 | appInfo.physicalDevice = context.m_physicalDevice; 376 | appInfo.queues.resize(1); 377 | appInfo.queues[0].queue = context.m_queueGCT.queue; 378 | appInfo.queues[0].familyIndex = context.m_queueGCT.familyIndex; 379 | appInfo.queues[0].queueIndex = context.m_queueGCT.queueIndex; 380 | 381 | { 382 | // Create the application 383 | auto app = std::make_unique(appInfo); 384 | 385 | auto elementSample = std::make_shared(); 386 | auto elementBenchmark = std::make_shared(argc, argv); 387 | auto elementProfiler = std::make_shared(true); 388 | g_elementCamera = std::make_shared(); 389 | 390 | nvprintSetCallback([](int level, const char* fmt) { g_logger.addLog(level, "%s", fmt); }); 391 | g_logger.setLogLevel(LOGBITS_INFO); 392 | bool hideLogOnStart = true; 393 | #ifdef _DEBUG 394 | hideLogOnStart = false; 395 | #endif 396 | auto elementLogger = std::make_shared(&g_logger, !hideLogOnStart); 397 | elementBenchmark->setProfiler(elementProfiler); 398 | 399 | elementSample->m_profilerVK = elementProfiler; 400 | elementSample->setParameterList(elementBenchmark->parameterLists()); 401 | elementSample->setContext(&context); 402 | 403 | app->addElement(elementBenchmark); 404 | app->addElement(elementSample); 405 | app->addElement(elementProfiler); 406 | app->addElement(elementLogger); 407 | app->addElement(g_elementCamera); 408 | // Set the actual vsync after initialization, works around an issue with window creation 409 | // FIXME see with MKL for a fix 410 | app->setVsync(wantedVSync); 411 | app->addElement(std::make_shared()); // Menu / Quit 412 | app->run(); 413 | } 414 | 415 | context.deinit(); 416 | 417 | return EXIT_SUCCESS; 418 | } 419 | catch(const std::exception& e) 420 | { 421 | LOGE("Uncaught exception: %s\n", e.what()); 422 | assert(!"We should never reach here under normal operation, but this " 423 | "prints a nicer error message in the event we do."); 424 | return EXIT_FAILURE; 425 | } 426 | } 427 | -------------------------------------------------------------------------------- /src/renderer.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | 24 | #include 25 | 26 | #include "resources.hpp" 27 | #include "scene.hpp" 28 | 29 | namespace animatedclusters { 30 | struct RendererConfig 31 | { 32 | // scene related 33 | uint32_t numSceneCopies = 1; 34 | uint32_t gridConfig = 3; 35 | glm::vec3 refShift = glm::vec3(1, 1, 1); 36 | 37 | bool doAnimation = true; 38 | 39 | // rt related 40 | VkBuildAccelerationStructureFlagsKHR triangleBuildFlags = 0; 41 | 42 | // cluster related 43 | VkBuildAccelerationStructureFlagsKHR clusterBlasFlags = 0; 44 | VkBuildAccelerationStructureFlagsKHR clusterBuildFlags = 0; 45 | VkBuildAccelerationStructureFlagsKHR templateBuildFlags = 0; 46 | VkBuildAccelerationStructureFlagsKHR templateInstantiateFlags = 0; 47 | bool useTemplates = true; 48 | bool useImplicitTemplates = true; 49 | uint32_t positionTruncateBits = 0; 50 | float templateBBoxBloat = 0.1f; 51 | }; 52 | 53 | class Renderer 54 | { 55 | public: 56 | virtual bool init(Resources& res, Scene& scene, const RendererConfig& config) = 0; 57 | virtual void render(VkCommandBuffer primary, Resources& res, Scene& scene, const FrameConfig& frame, nvvk::ProfilerVK& profiler) = 0; 58 | virtual void deinit(Resources& res) = 0; 59 | virtual ~Renderer(){}; // Defined only so that inherited classes also have virtual destructors. Use deinit(). 60 | virtual void updatedFrameBuffer(Resources& res){}; 61 | 62 | struct ResourceUsageInfo 63 | { 64 | size_t rtBlasMemBytes{}; 65 | size_t rtTlasMemBytes{}; 66 | size_t rtClasMemBytes{}; 67 | size_t rtOtherMemBytes{}; 68 | size_t sceneMemBytes{}; 69 | 70 | size_t getTotalSum() const { return rtBlasMemBytes + rtTlasMemBytes + rtClasMemBytes + rtOtherMemBytes + sceneMemBytes; } 71 | }; 72 | 73 | inline ResourceUsageInfo getResourceUsage() const { return m_resourceUsageInfo; }; 74 | 75 | protected: 76 | bool initBasicShaders(Resources& res, Scene& scene, const RendererConfig& config); 77 | void initBasics(Resources& res, Scene& scene, const RendererConfig& config); 78 | void deinitBasics(Resources& res); 79 | 80 | void updateAnimation(VkCommandBuffer cmd, Resources& res, Scene& scene, const FrameConfig& frame, nvvk::ProfilerVK& profiler); 81 | 82 | void initRayTracingTlas(Resources& res, Scene& scene, const RendererConfig& config, const VkAccelerationStructureKHR* blas = nullptr); 83 | void updateRayTracingTlas(VkCommandBuffer cmd, Resources& res, Scene& scene, bool update = false); 84 | 85 | RendererConfig m_config; 86 | 87 | struct BasicShaders 88 | { 89 | nvvk::ShaderModuleID animVertexShader; 90 | nvvk::ShaderModuleID animNormalShader; 91 | } m_basicShaders; 92 | 93 | nvvk::PushComputeDispatcher m_animDispatcher; 94 | 95 | struct RenderInstanceBuffers 96 | { 97 | RBuffer positions; 98 | RBuffer normals; 99 | }; 100 | 101 | std::vector m_renderInstances; 102 | RBuffer m_renderInstanceBuffer; 103 | std::vector m_renderInstanceBuffers; 104 | 105 | RBuffer m_tlasInstancesBuffer; 106 | VkAccelerationStructureGeometryKHR m_tlasGeometry; 107 | VkAccelerationStructureBuildGeometryInfoKHR m_tlasBuildInfo; 108 | RBuffer m_tlasScratchBuffer; 109 | nvvk::AccelKHR m_tlas; 110 | 111 | ResourceUsageInfo m_resourceUsageInfo{}; 112 | }; 113 | 114 | std::unique_ptr makeRendererRasterTriangles(); 115 | std::unique_ptr makeRendererRasterClusters(); 116 | std::unique_ptr makeRendererRayTraceTriangles(); 117 | std::unique_ptr makeRendererRayTraceClusters(); 118 | } // namespace animatedclusters 119 | -------------------------------------------------------------------------------- /src/renderer_raster_clusters.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #include 21 | 22 | #include "renderer.hpp" 23 | #include "shaders/shaderio.h" 24 | 25 | namespace animatedclusters { 26 | 27 | class RendererRasterClusters : public Renderer 28 | { 29 | public: 30 | virtual bool init(Resources& res, Scene& scene, const RendererConfig& config) override; 31 | virtual void render(VkCommandBuffer primary, Resources& res, Scene& scene, const FrameConfig& frame, nvvk::ProfilerVK& profiler) override; 32 | virtual void deinit(Resources& res) override; 33 | 34 | private: 35 | bool initShaders(Resources& res, Scene& scene, const RendererConfig& config); 36 | 37 | struct Shaders 38 | { 39 | nvvk::ShaderModuleID meshShader; 40 | nvvk::ShaderModuleID fragmentShader; 41 | 42 | nvvk::ShaderModuleID boxMeshShader; 43 | nvvk::ShaderModuleID boxFragmentShader; 44 | } m_shaders; 45 | 46 | nvvk::DescriptorSetContainer m_dsetContainer; 47 | VkPipeline m_pipeline = nullptr; 48 | VkPipeline m_boxPipeline = nullptr; 49 | }; 50 | 51 | bool RendererRasterClusters::initShaders(Resources& res, Scene& scene, const RendererConfig& config) 52 | { 53 | VkPhysicalDeviceMeshShaderPropertiesEXT meshProps = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_EXT}; 54 | VkPhysicalDeviceProperties2 props2 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; 55 | props2.pNext = &meshProps; 56 | vkGetPhysicalDeviceProperties2(res.m_physical, &props2); 57 | 58 | std::string prepend; 59 | prepend += nvh::stringFormat("#define CLUSTER_VERTEX_COUNT %d\n", scene.m_config.clusterVertices); 60 | prepend += nvh::stringFormat("#define CLUSTER_TRIANGLE_COUNT %d\n", scene.m_config.clusterTriangles); 61 | prepend += nvh::stringFormat("#define CLUSTER_DEDICATED_VERTICES %d\n", scene.m_config.clusterDedicatedVertices ? 1 : 0); 62 | prepend += nvh::stringFormat("#define MESHSHADER_WORKGROUP_SIZE %d\n", meshProps.maxPreferredMeshWorkGroupInvocations); 63 | 64 | m_shaders.meshShader = 65 | res.m_shaderManager.createShaderModule(VK_SHADER_STAGE_MESH_BIT_EXT, "render_raster_clusters.mesh.glsl", prepend); 66 | m_shaders.fragmentShader = res.m_shaderManager.createShaderModule(VK_SHADER_STAGE_FRAGMENT_BIT, "render_raster.frag.glsl", 67 | "#define LINKED_MESH_SHADER 1\n"); 68 | m_shaders.boxMeshShader = res.m_shaderManager.createShaderModule(VK_SHADER_STAGE_MESH_BIT_EXT, "render_cluster_bbox.mesh.glsl"); 69 | m_shaders.boxFragmentShader = 70 | res.m_shaderManager.createShaderModule(VK_SHADER_STAGE_FRAGMENT_BIT, "render_cluster_bbox.frag.glsl"); 71 | 72 | if(!res.verifyShaders(m_shaders)) 73 | { 74 | return false; 75 | } 76 | 77 | return initBasicShaders(res, scene, config); 78 | } 79 | 80 | bool RendererRasterClusters::init(Resources& res, Scene& scene, const RendererConfig& config) 81 | { 82 | m_config = config; 83 | 84 | if(!initShaders(res, scene, config)) 85 | return false; 86 | 87 | initBasics(res, scene, config); 88 | 89 | m_resourceUsageInfo.sceneMemBytes += scene.m_sceneClusterMemBytes; 90 | 91 | m_dsetContainer.init(res.m_device); 92 | 93 | VkShaderStageFlags stageFlags = VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT; 94 | 95 | m_dsetContainer.addBinding(BINDINGS_FRAME_UBO, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, stageFlags); 96 | m_dsetContainer.addBinding(BINDINGS_READBACK_SSBO, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, stageFlags); 97 | m_dsetContainer.addBinding(BINDINGS_RENDERINSTANCES_SSBO, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, stageFlags); 98 | m_dsetContainer.initLayout(); 99 | 100 | VkPushConstantRange pushRange; 101 | pushRange.offset = 0; 102 | pushRange.size = sizeof(uint32_t); 103 | pushRange.stageFlags = stageFlags; 104 | m_dsetContainer.initPipeLayout(1, &pushRange); 105 | 106 | m_dsetContainer.initPool(1); 107 | VkWriteDescriptorSet writeSets[3]; 108 | writeSets[0] = m_dsetContainer.makeWrite(0, BINDINGS_FRAME_UBO, &res.m_common.view.info); 109 | writeSets[1] = m_dsetContainer.makeWrite(0, BINDINGS_READBACK_SSBO, &res.m_common.readbackDevice.info); 110 | writeSets[2] = m_dsetContainer.makeWrite(0, BINDINGS_RENDERINSTANCES_SSBO, &m_renderInstanceBuffer.info); 111 | vkUpdateDescriptorSets(res.m_device, 3, writeSets, 0, nullptr); 112 | 113 | nvvk::GraphicsPipelineState state = res.m_basicGraphicsState; 114 | nvvk::GraphicsPipelineGenerator gfxGen(res.m_device, m_dsetContainer.getPipeLayout(), 115 | res.m_framebuffer.pipelineRenderingInfo, state); 116 | gfxGen.addShader(res.m_shaderManager.get(m_shaders.meshShader), VK_SHADER_STAGE_MESH_BIT_EXT); 117 | gfxGen.addShader(res.m_shaderManager.get(m_shaders.fragmentShader), VK_SHADER_STAGE_FRAGMENT_BIT); 118 | m_pipeline = gfxGen.createPipeline(); 119 | 120 | gfxGen.clearShaders(); 121 | state.inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; 122 | gfxGen.addShader(res.m_shaderManager.get(m_shaders.boxMeshShader), VK_SHADER_STAGE_MESH_BIT_EXT); 123 | gfxGen.addShader(res.m_shaderManager.get(m_shaders.boxFragmentShader), VK_SHADER_STAGE_FRAGMENT_BIT); 124 | m_boxPipeline = gfxGen.createPipeline(); 125 | 126 | return true; 127 | } 128 | 129 | void RendererRasterClusters::render(VkCommandBuffer primary, Resources& res, Scene& scene, const FrameConfig& frame, nvvk::ProfilerVK& profiler) 130 | { 131 | if(m_config.doAnimation) 132 | { 133 | updateAnimation(primary, res, scene, frame, profiler); 134 | } 135 | 136 | const bool useSky = true; // When using Sky, the sky is rendered first and the rest of the scene is rendered on top of it. 137 | 138 | { 139 | auto timerSection = profiler.timeRecurring("Render", primary); 140 | 141 | vkCmdUpdateBuffer(primary, res.m_common.view.buffer, 0, sizeof(shaderio::FrameConstants), (const uint32_t*)&frame.frameConstants); 142 | vkCmdFillBuffer(primary, res.m_common.readbackDevice.buffer, 0, sizeof(shaderio::Readback), 0); 143 | 144 | if(useSky) 145 | { 146 | 147 | res.m_sky.skyParams() = frame.frameConstants.skyParams; 148 | res.m_sky.updateParameterBuffer(primary); 149 | res.cmdImageTransition(primary, res.m_framebuffer.imgColor, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_GENERAL); 150 | res.m_sky.draw(primary, frame.frameConstants.viewMatrix, frame.frameConstants.projMatrix, 151 | res.m_framebuffer.scissor.extent); 152 | } 153 | 154 | res.cmdBeginRendering(primary, false, useSky ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_CLEAR); 155 | 156 | res.cmdDynamicState(primary); 157 | vkCmdBindDescriptorSets(primary, VK_PIPELINE_BIND_POINT_GRAPHICS, m_dsetContainer.getPipeLayout(), 0, 1, 158 | m_dsetContainer.getSets(), 0, nullptr); 159 | 160 | if(frame.drawObjects) 161 | { 162 | vkCmdBindPipeline(primary, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline); 163 | 164 | for(size_t i = 0; i < m_renderInstances.size(); i++) 165 | { 166 | const shaderio::RenderInstance& renderInstance = m_renderInstances[i]; 167 | uint32_t instanceIndex = (uint32_t)i; 168 | vkCmdPushConstants(primary, m_dsetContainer.getPipeLayout(), VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT, 169 | 0, sizeof(instanceIndex), &instanceIndex); 170 | vkCmdDrawMeshTasksEXT(primary, renderInstance.numClusters, 1, 1); 171 | } 172 | } 173 | 174 | if(frame.drawClusterBoxes) 175 | { 176 | vkCmdBindPipeline(primary, VK_PIPELINE_BIND_POINT_GRAPHICS, m_boxPipeline); 177 | for(size_t i = 0; i < m_renderInstances.size(); i++) 178 | { 179 | const shaderio::RenderInstance& renderInstance = m_renderInstances[i]; 180 | uint32_t instanceIndex = (uint32_t)i; 181 | vkCmdPushConstants(primary, m_dsetContainer.getPipeLayout(), VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_FRAGMENT_BIT, 182 | 0, sizeof(instanceIndex), &instanceIndex); 183 | vkCmdDrawMeshTasksEXT(primary, (renderInstance.numClusters + BBOXES_PER_MESHLET - 1) / BBOXES_PER_MESHLET, 1, 1); 184 | } 185 | } 186 | 187 | vkCmdEndRendering(primary); 188 | } 189 | } 190 | 191 | void RendererRasterClusters::deinit(Resources& res) 192 | { 193 | vkDestroyPipeline(res.m_device, m_pipeline, nullptr); 194 | vkDestroyPipeline(res.m_device, m_boxPipeline, nullptr); 195 | 196 | m_dsetContainer.deinit(); 197 | 198 | res.destroyShaders(m_shaders); 199 | 200 | deinitBasics(res); 201 | } 202 | 203 | std::unique_ptr makeRendererRasterClusters() 204 | { 205 | return std::make_unique(); 206 | } 207 | } // namespace animatedclusters 208 | -------------------------------------------------------------------------------- /src/renderer_raster_triangles.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #include "renderer.hpp" 21 | #include "shaders/shaderio.h" 22 | 23 | namespace animatedclusters { 24 | 25 | class RendererRasterTriangles : public Renderer 26 | { 27 | public: 28 | virtual bool init(Resources& res, Scene& scene, const RendererConfig& config) override; 29 | virtual void render(VkCommandBuffer primary, Resources& res, Scene& scene, const FrameConfig& frame, nvvk::ProfilerVK& profiler) override; 30 | virtual void deinit(Resources& res) override; 31 | 32 | private: 33 | bool initShaders(Resources& res, Scene& scene, const RendererConfig& config); 34 | 35 | struct Shaders 36 | { 37 | nvvk::ShaderModuleID vertexShader; 38 | nvvk::ShaderModuleID fragmentShader; 39 | } m_shaders; 40 | 41 | 42 | nvvk::DescriptorSetContainer m_dsetContainer; 43 | VkPipeline m_pipeline = nullptr; 44 | }; 45 | 46 | bool RendererRasterTriangles::initShaders(Resources& res, Scene& scene, const RendererConfig& config) 47 | { 48 | m_shaders.vertexShader = res.m_shaderManager.createShaderModule(VK_SHADER_STAGE_VERTEX_BIT, "render_raster_triangles.vert.glsl"); 49 | m_shaders.fragmentShader = res.m_shaderManager.createShaderModule(VK_SHADER_STAGE_FRAGMENT_BIT, "render_raster.frag.glsl"); 50 | if(!res.verifyShaders(m_shaders)) 51 | { 52 | return false; 53 | } 54 | 55 | return initBasicShaders(res, scene, config); 56 | } 57 | 58 | bool RendererRasterTriangles::init(Resources& res, Scene& scene, const RendererConfig& config) 59 | { 60 | m_config = config; 61 | 62 | if(!initShaders(res, scene, config)) 63 | return false; 64 | 65 | initBasics(res, scene, config); 66 | 67 | m_resourceUsageInfo.sceneMemBytes += scene.m_sceneTriangleMemBytes; 68 | 69 | m_dsetContainer.init(res.m_device); 70 | 71 | VkShaderStageFlags stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; 72 | 73 | m_dsetContainer.addBinding(BINDINGS_FRAME_UBO, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, stageFlags); 74 | m_dsetContainer.addBinding(BINDINGS_READBACK_SSBO, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, stageFlags); 75 | m_dsetContainer.addBinding(BINDINGS_RENDERINSTANCES_SSBO, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, stageFlags); 76 | m_dsetContainer.initLayout(); 77 | 78 | VkPushConstantRange pushRange; 79 | pushRange.offset = 0; 80 | pushRange.size = sizeof(uint32_t); 81 | pushRange.stageFlags = stageFlags; 82 | m_dsetContainer.initPipeLayout(1, &pushRange); 83 | 84 | m_dsetContainer.initPool(1); 85 | VkWriteDescriptorSet writeSets[3]; 86 | writeSets[0] = m_dsetContainer.makeWrite(0, BINDINGS_FRAME_UBO, &res.m_common.view.info); 87 | writeSets[1] = m_dsetContainer.makeWrite(0, BINDINGS_READBACK_SSBO, &res.m_common.readbackDevice.info); 88 | writeSets[2] = m_dsetContainer.makeWrite(0, BINDINGS_RENDERINSTANCES_SSBO, &m_renderInstanceBuffer.info); 89 | vkUpdateDescriptorSets(res.m_device, 3, writeSets, 0, nullptr); 90 | 91 | nvvk::GraphicsPipelineState state = res.m_basicGraphicsState; 92 | nvvk::GraphicsPipelineGenerator gfxGen(res.m_device, m_dsetContainer.getPipeLayout(), 93 | res.m_framebuffer.pipelineRenderingInfo, state); 94 | gfxGen.addShader(res.m_shaderManager.get(m_shaders.vertexShader), VK_SHADER_STAGE_VERTEX_BIT); 95 | gfxGen.addShader(res.m_shaderManager.get(m_shaders.fragmentShader), VK_SHADER_STAGE_FRAGMENT_BIT); 96 | m_pipeline = gfxGen.createPipeline(); 97 | 98 | return true; 99 | } 100 | 101 | void RendererRasterTriangles::render(VkCommandBuffer primary, Resources& res, Scene& scene, const FrameConfig& frame, nvvk::ProfilerVK& profiler) 102 | { 103 | if(m_config.doAnimation) 104 | { 105 | updateAnimation(primary, res, scene, frame, profiler); 106 | } 107 | 108 | const bool useSky = true; // When using Sky, the sky is rendered first and the rest of the scene is rendered on top of it. 109 | 110 | { 111 | auto timerSection = profiler.timeRecurring("Render", primary); 112 | 113 | vkCmdUpdateBuffer(primary, res.m_common.view.buffer, 0, sizeof(shaderio::FrameConstants), (const uint32_t*)&frame.frameConstants); 114 | vkCmdFillBuffer(primary, res.m_common.readbackDevice.buffer, 0, sizeof(shaderio::Readback), 0); 115 | 116 | if(useSky) 117 | { 118 | res.m_sky.skyParams() = frame.frameConstants.skyParams; 119 | res.m_sky.updateParameterBuffer(primary); 120 | res.cmdImageTransition(primary, res.m_framebuffer.imgColor, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_GENERAL); 121 | res.m_sky.draw(primary, frame.frameConstants.viewMatrix, frame.frameConstants.projMatrix, 122 | res.m_framebuffer.scissor.extent); 123 | } 124 | 125 | res.cmdBeginRendering(primary, false, useSky ? VK_ATTACHMENT_LOAD_OP_LOAD : VK_ATTACHMENT_LOAD_OP_CLEAR); 126 | 127 | if(frame.drawObjects) 128 | { 129 | res.cmdDynamicState(primary); 130 | vkCmdBindDescriptorSets(primary, VK_PIPELINE_BIND_POINT_GRAPHICS, m_dsetContainer.getPipeLayout(), 0, 1, 131 | m_dsetContainer.getSets(), 0, nullptr); 132 | vkCmdBindPipeline(primary, VK_PIPELINE_BIND_POINT_GRAPHICS, m_pipeline); 133 | 134 | for(size_t i = 0; i < m_renderInstances.size(); i++) 135 | { 136 | const shaderio::RenderInstance& renderInstance = m_renderInstances[i]; 137 | uint32_t instanceIndex = (uint32_t)i; 138 | vkCmdPushConstants(primary, m_dsetContainer.getPipeLayout(), VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 139 | 0, sizeof(instanceIndex), &instanceIndex); 140 | vkCmdBindIndexBuffer(primary, scene.m_geometries[renderInstance.geometryID].trianglesBuffer.buffer, 0, VK_INDEX_TYPE_UINT32); 141 | vkCmdDrawIndexed(primary, renderInstance.numTriangles * 3, 1, 0, 0, 0); 142 | } 143 | } 144 | 145 | vkCmdEndRendering(primary); 146 | } 147 | } 148 | 149 | void RendererRasterTriangles::deinit(Resources& res) 150 | { 151 | vkDestroyPipeline(res.m_device, m_pipeline, nullptr); 152 | 153 | m_dsetContainer.deinit(); 154 | 155 | res.destroyShaders(m_shaders); 156 | 157 | deinitBasics(res); 158 | } 159 | 160 | std::unique_ptr makeRendererRasterTriangles() 161 | { 162 | return std::make_unique(); 163 | } 164 | } // namespace animatedclusters 165 | -------------------------------------------------------------------------------- /src/resources.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | 39 | #include "hbao_pass.hpp" 40 | #include "shaders/shaderio.h" 41 | 42 | namespace animatedclusters { 43 | 44 | struct FrameConfig 45 | { 46 | uint32_t winWidth; 47 | uint32_t winHeight; 48 | bool hbaoActive = true; 49 | bool drawClusterBoxes = false; 50 | bool drawObjects = true; 51 | uint32_t rebuildNth; 52 | bool forceTlasFullRebuild = false; 53 | float blasRebuildFraction = 0.1f; 54 | shaderio::FrameConstants frameConstants; 55 | HbaoPass::Settings hbaoSettings; 56 | }; 57 | 58 | ////////////////////////////////////////////////////////////////////////// 59 | 60 | struct RBuffer : nvvk::Buffer 61 | { 62 | VkDescriptorBufferInfo info = {VK_NULL_HANDLE}; 63 | void* mapping = nullptr; 64 | }; 65 | 66 | // allows > 4 GB allocations using sparse memory 67 | struct RLargeBuffer : nvvk::LargeBuffer 68 | { 69 | VkDescriptorBufferInfo info = {VK_NULL_HANDLE}; 70 | }; 71 | 72 | struct RImage : nvvk::Image 73 | { 74 | RImage() {} 75 | RImage& operator=(nvvk::Image other) 76 | { 77 | *(nvvk::Image*)this = other; 78 | layout = VK_IMAGE_LAYOUT_UNDEFINED; 79 | return *this; 80 | } 81 | 82 | VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; 83 | }; 84 | 85 | inline void cmdCopyBuffer(VkCommandBuffer cmd, const RBuffer& src, const RBuffer& dst) 86 | { 87 | VkBufferCopy cpy = {src.info.offset, dst.info.offset, src.info.range}; 88 | vkCmdCopyBuffer(cmd, src.buffer, dst.buffer, 1, &cpy); 89 | } 90 | 91 | ////////////////////////////////////////////////////////////////////////// 92 | 93 | #define DEBUGUTIL_SET_NAME(var) debugUtil.setObjectName(var, #var) 94 | 95 | 96 | class Resources 97 | { 98 | public: 99 | struct FrameBuffer 100 | { 101 | int renderWidth = 0; 102 | int renderHeight = 0; 103 | int supersample = 0; 104 | bool useResolved = false; 105 | 106 | VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM; 107 | VkFormat depthStencilFormat; 108 | 109 | VkViewport viewport; 110 | VkRect2D scissor; 111 | 112 | RImage imgColor = {}; 113 | RImage imgColorResolved = {}; 114 | RImage imgDepthStencil = {}; 115 | 116 | VkImageView viewColor = VK_NULL_HANDLE; 117 | VkImageView viewColorResolved = VK_NULL_HANDLE; 118 | VkImageView viewDepthStencil = VK_NULL_HANDLE; 119 | VkImageView viewDepth = VK_NULL_HANDLE; 120 | 121 | VkPipelineRenderingCreateInfo pipelineRenderingInfo = {VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO}; 122 | }; 123 | 124 | struct CommonResources 125 | { 126 | RBuffer view; 127 | RBuffer readbackDevice; 128 | RBuffer readbackHost; 129 | }; 130 | 131 | const nvvk::Context* m_context; 132 | VkDevice m_device = VK_NULL_HANDLE; 133 | VkPhysicalDevice m_physical; 134 | VkQueue m_queue; 135 | uint32_t m_queueFamily; 136 | 137 | VkPipelineStageFlags m_supportedSaderStageFlags; 138 | 139 | nvvk::DeviceMemoryAllocator m_memAllocator; 140 | nvvk::ResourceAllocator m_allocator; 141 | 142 | bool m_hbaoFullRes = false; 143 | HbaoPass m_hbaoPass; 144 | HbaoPass::Frame m_hbaoFrame; 145 | 146 | nvvk::CommandPool m_tempCommandPool; 147 | nvvk::ShaderModuleManager m_shaderManager; 148 | nvvk::GraphicsPipelineState m_basicGraphicsState; 149 | 150 | nvvkhl::SimpleSkyDome m_sky; 151 | 152 | CommonResources m_common; 153 | FrameBuffer m_framebuffer; 154 | 155 | uint32_t m_cycleIndex = 0; 156 | 157 | size_t m_fboChangeID = ~0; 158 | 159 | glm::vec4 m_bgColor = {0.1, 0.13, 0.15, 1.0}; 160 | 161 | bool init(nvvk::Context* context, const std::vector& shaderSearchPaths); 162 | void deinit(); 163 | 164 | bool initFramebuffer(int width, int height, int supersample, bool hbaoFullRes); 165 | void deinitFramebuffer(); 166 | 167 | void synchronize(const char* debugMsg = nullptr); 168 | 169 | void beginFrame(uint32_t cycleIndex); 170 | void blitFrame(VkCommandBuffer cmd, const FrameConfig& frame, nvvk::ProfilerVK& profiler); 171 | void emptyFrame(VkCommandBuffer cmd, const FrameConfig& frame, nvvk::ProfilerVK& profiler); 172 | void endFrame(); 173 | 174 | void cmdHBAO(VkCommandBuffer cmd, const FrameConfig& frame, nvvk::ProfilerVK& profiler); 175 | 176 | void getReadbackData(shaderio::Readback& readback); 177 | 178 | ////////////////////////////////////////////////////////////////////////// 179 | 180 | // tests if all shaders compiled well, returns false if not 181 | // also destroys all shaders if not all were successful. 182 | bool verifyShaders(size_t numShaders, nvvk::ShaderModuleID* shaders); 183 | template 184 | bool verifyShaders(T& container) 185 | { 186 | return verifyShaders(sizeof(T) / sizeof(nvvk::ShaderModuleID), (nvvk::ShaderModuleID*)&container); 187 | } 188 | 189 | void destroyShaders(size_t numShaders, nvvk::ShaderModuleID* shaders); 190 | template 191 | void destroyShaders(T& container) 192 | { 193 | destroyShaders(sizeof(T) / sizeof(nvvk::ShaderModuleID), (nvvk::ShaderModuleID*)&container); 194 | } 195 | 196 | ////////////////////////////////////////////////////////////////////////// 197 | 198 | bool isBufferSizeValid(VkDeviceSize size) const; 199 | 200 | RBuffer createBuffer(VkDeviceSize size, VkBufferUsageFlags flags, VkMemoryPropertyFlags memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); 201 | RLargeBuffer createLargeBuffer(VkDeviceSize size, VkBufferUsageFlags flags, VkMemoryPropertyFlags memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); 202 | 203 | void destroy(RBuffer& obj); 204 | void destroy(RLargeBuffer& obj); 205 | 206 | nvvk::AccelKHR createAccelKHR(VkAccelerationStructureCreateInfoKHR& createInfo); 207 | void destroy(nvvk::AccelKHR& obj); 208 | 209 | ////////////////////////////////////////////////////////////////////////// 210 | 211 | void simpleUploadBuffer(const RBuffer& dst, const void* src); 212 | void simpleUploadBuffer(const RBuffer& dst, size_t offset, size_t sz, const void* src); 213 | void simpleDownloadBuffer(void* dst, const RBuffer& src); 214 | 215 | ////////////////////////////////////////////////////////////////////////// 216 | 217 | VkCommandBuffer createTempCmdBuffer(); 218 | void tempSyncSubmit(VkCommandBuffer cmd, bool reset = true); 219 | void tempResetResources(); 220 | 221 | ////////////////////////////////////////////////////////////////////////// 222 | 223 | VkCommandBuffer createCmdBuffer(VkCommandPool pool, bool singleshot, bool primary, bool secondaryInClear, bool isCompute = false) const; 224 | 225 | ////////////////////////////////////////////////////////////////////////// 226 | 227 | void cmdBeginRendering(VkCommandBuffer cmd, 228 | bool hasSecondary = false, 229 | VkAttachmentLoadOp loadOpColor = VK_ATTACHMENT_LOAD_OP_CLEAR, 230 | VkAttachmentLoadOp loadOpDepth = VK_ATTACHMENT_LOAD_OP_CLEAR); 231 | void cmdDynamicState(VkCommandBuffer cmd) const; 232 | void cmdBegin(VkCommandBuffer cmd, bool singleshot, bool primary, bool secondaryInClear, bool isCompute = false) const; 233 | 234 | void cmdImageTransition(VkCommandBuffer cmd, RImage& img, VkImageAspectFlags aspects, VkImageLayout newLayout, bool needBarrier = false) const; 235 | 236 | ////////////////////////////////////////////////////////////////////////// 237 | 238 | enum FlushState 239 | { 240 | ALLOW_FLUSH, 241 | DONT_FLUSH, 242 | }; 243 | 244 | class BatchedUploader 245 | { 246 | public: 247 | BatchedUploader(Resources& resources, VkDeviceSize maxBatchSize = 128 * 1024 * 1024) 248 | : m_resources(resources) 249 | , m_maxBatchSize(maxBatchSize) 250 | { 251 | } 252 | 253 | template 254 | T* uploadBuffer(const RBuffer& dst, size_t offset, size_t sz, const T* src, FlushState flushState = FlushState::ALLOW_FLUSH) 255 | { 256 | if(sz) 257 | { 258 | if(m_batchSize && m_batchSize + sz > m_maxBatchSize && flushState == FlushState::ALLOW_FLUSH) 259 | { 260 | flush(); 261 | } 262 | 263 | if(!m_cmd) 264 | { 265 | m_cmd = m_resources.createTempCmdBuffer(); 266 | } 267 | 268 | m_batchSize += sz; 269 | return static_cast(m_resources.m_allocator.getStaging()->cmdToBuffer(m_cmd, dst.buffer, offset, sz, src)); 270 | } 271 | return nullptr; 272 | } 273 | template 274 | T* uploadBuffer(const RBuffer& dst, const T* src, FlushState flushState = FlushState::ALLOW_FLUSH) 275 | { 276 | return uploadBuffer(dst, 0, dst.info.range, src, flushState); 277 | } 278 | 279 | void fillBuffer(const RBuffer& dst, uint32_t fillValue) 280 | { 281 | if(!m_cmd) 282 | { 283 | m_cmd = m_resources.createTempCmdBuffer(); 284 | } 285 | vkCmdFillBuffer(m_cmd, dst.buffer, 0, dst.info.range, fillValue); 286 | } 287 | 288 | // must call flush at end of operations 289 | void flush() 290 | { 291 | if(m_cmd) 292 | { 293 | m_resources.tempSyncSubmit(m_cmd); 294 | m_cmd = nullptr; 295 | m_batchSize = 0; 296 | } 297 | } 298 | 299 | ~BatchedUploader() { assert(!m_batchSize); } 300 | 301 | private: 302 | Resources& m_resources; 303 | VkDeviceSize m_maxBatchSize = 0; 304 | VkDeviceSize m_batchSize = 0; 305 | VkCommandBuffer m_cmd = nullptr; 306 | }; 307 | }; 308 | 309 | 310 | } // namespace animatedclusters 311 | -------------------------------------------------------------------------------- /src/scene.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | #pragma once 20 | 21 | #include 22 | 23 | #include "resources.hpp" 24 | 25 | 26 | namespace animatedclusters { 27 | struct SceneConfig 28 | { 29 | uint32_t clusterVertices = 64; 30 | uint32_t clusterTriangles = 64; 31 | // 0 disables 32 | float clusterNvGraphWeight = 0.0; 33 | // Cost penalty for under-filling clusters 34 | float clusterNvUnderfill = 1.0f; 35 | // Cost penalty for overlapping bounding boxes 36 | float clusterNvOverlap = 0.5f; 37 | 38 | bool clusterDedicatedVertices = false; 39 | bool clusterStripify = true; 40 | bool clusterNvLibrary = true; 41 | }; 42 | 43 | class Scene 44 | { 45 | public: 46 | struct Instance 47 | { 48 | glm::mat4 matrix; 49 | shaderio::BBox bbox; 50 | uint32_t geometryID = ~0U; 51 | }; 52 | 53 | struct Geometry 54 | { 55 | uint32_t numTriangles; 56 | uint32_t numVertices; 57 | uint32_t numClusters; 58 | 59 | shaderio::BBox bbox; 60 | 61 | std::vector positions; 62 | std::vector triangles; 63 | std::vector clusterLocalTriangles; 64 | std::vector clusterLocalVertices; 65 | 66 | std::vector clusters; 67 | std::vector clusterBboxes; 68 | 69 | RBuffer positionsBuffer; 70 | RBuffer trianglesBuffer; 71 | RBuffer clustersBuffer; 72 | RBuffer clusterLocalTrianglesBuffer; 73 | RBuffer clusterLocalVerticesBuffer; 74 | RBuffer clusterBboxesBuffer; 75 | }; 76 | 77 | struct Camera 78 | { 79 | glm::mat4 worldMatrix{1}; 80 | glm::vec3 eye{0, 0, 0}; 81 | glm::vec3 center{0, 0, 0}; 82 | glm::vec3 up{0, 1, 0}; 83 | float fovy; 84 | }; 85 | 86 | bool init(const char* filename, Resources& res, const SceneConfig& config); 87 | void deinit(Resources& res); 88 | 89 | SceneConfig m_config; 90 | 91 | shaderio::BBox m_bbox; 92 | 93 | std::vector m_instances; 94 | std::vector m_geometries; 95 | std::vector m_cameras; 96 | 97 | size_t m_sceneClusterMemBytes = 0; 98 | size_t m_sceneTriangleMemBytes = 0; 99 | uint32_t m_maxPerGeometryClusters = 0; 100 | uint32_t m_maxPerGeometryTriangles = 0; 101 | uint32_t m_maxPerGeometryVertices = 0; 102 | uint32_t m_maxPerGeometryClusterVertices = 0; 103 | uint32_t m_numClusters = 0; 104 | uint32_t m_numTriangles = 0; 105 | std::vector m_clusterTriangleHistogram; 106 | std::vector m_clusterVertexHistogram; 107 | 108 | uint32_t m_clusterTriangleHistogramMax; 109 | uint32_t m_clusterVertexHistogramMax; 110 | 111 | 112 | private: 113 | bool loadGLTF(const char* filename); 114 | bool buildClusters(); 115 | void computeBBoxes(); 116 | void upload(Resources& res); 117 | }; 118 | } // namespace animatedclusters 119 | -------------------------------------------------------------------------------- /src/scene_gltf.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #include "scene.hpp" 27 | 28 | namespace { 29 | struct FileMappingList 30 | { 31 | struct Entry 32 | { 33 | nvh::FileReadMapping mapping; 34 | int64_t refCount = 1; 35 | }; 36 | std::unordered_map m_nameToMapping; 37 | std::unordered_map m_dataToName; 38 | #ifdef _DEBUG 39 | int64_t m_openBias = 0; 40 | #endif 41 | 42 | bool open(const char* path, size_t* size, void** data) 43 | { 44 | #ifdef _DEBUG 45 | m_openBias++; 46 | #endif 47 | 48 | std::string pathStr(path); 49 | 50 | auto it = m_nameToMapping.find(pathStr); 51 | if(it != m_nameToMapping.end()) 52 | { 53 | *data = const_cast(it->second.mapping.data()); 54 | *size = it->second.mapping.size(); 55 | it->second.refCount++; 56 | return true; 57 | } 58 | 59 | Entry entry; 60 | if(entry.mapping.open(path)) 61 | { 62 | const void* mappingData = entry.mapping.data(); 63 | *data = const_cast(mappingData); 64 | *size = entry.mapping.size(); 65 | m_dataToName.insert({mappingData, pathStr}); 66 | m_nameToMapping.insert({pathStr, std::move(entry)}); 67 | return true; 68 | } 69 | 70 | return false; 71 | } 72 | 73 | void close(void* data) 74 | { 75 | #ifdef _DEBUG 76 | m_openBias--; 77 | #endif 78 | auto itName = m_dataToName.find(data); 79 | if(itName != m_dataToName.end()) 80 | { 81 | auto itMapping = m_nameToMapping.find(itName->second); 82 | if(itMapping != m_nameToMapping.end()) 83 | { 84 | itMapping->second.refCount--; 85 | 86 | if(!itMapping->second.refCount) 87 | { 88 | m_nameToMapping.erase(itMapping); 89 | m_dataToName.erase(itName); 90 | } 91 | } 92 | } 93 | } 94 | 95 | ~FileMappingList() 96 | { 97 | #ifdef _DEBUG 98 | assert(m_openBias == 0 && "open/close bias wrong"); 99 | #endif 100 | assert(m_nameToMapping.empty() && m_dataToName.empty() && "not all opened files were closed"); 101 | } 102 | }; 103 | 104 | const uint8_t* cgltf_buffer_view_data(const cgltf_buffer_view* view) 105 | { 106 | if(view->data) 107 | return (const uint8_t*)view->data; 108 | 109 | if(!view->buffer->data) 110 | return NULL; 111 | 112 | const uint8_t* result = (const uint8_t*)view->buffer->data; 113 | result += view->offset; 114 | return result; 115 | } 116 | 117 | cgltf_result cgltf_read(const struct cgltf_memory_options* memory_options, 118 | const struct cgltf_file_options* file_options, 119 | const char* path, 120 | cgltf_size* size, 121 | void** data) 122 | { 123 | FileMappingList* mappings = (FileMappingList*)file_options->user_data; 124 | if(mappings->open(path, size, data)) 125 | { 126 | return cgltf_result_success; 127 | } 128 | 129 | return cgltf_result_io_error; 130 | } 131 | 132 | void cgltf_release(const struct cgltf_memory_options* memory_options, const struct cgltf_file_options* file_options, void* data) 133 | { 134 | FileMappingList* mappings = (FileMappingList*)file_options->user_data; 135 | mappings->close(data); 136 | } 137 | 138 | // Defines a unique_ptr that can be used for cgltf_data objects. 139 | // Freeing a unique_cgltf_ptr calls cgltf_free, instead of delete. 140 | // This can be constructed using unique_cgltf_ptr foo(..., &cgltf_free). 141 | using unique_cgltf_ptr = std::unique_ptr; 142 | 143 | 144 | // Traverses the glTF node and any of its children, adding a MeshInstance to 145 | // the meshSet for each referenced glTF primitive. 146 | void addInstancesFromNode(std::vector& instances, 147 | const cgltf_data* data, 148 | const cgltf_node* node, 149 | const glm::mat4 parentObjToWorldTransform = glm::mat4(1)) 150 | { 151 | if(node == nullptr) 152 | return; 153 | 154 | // Compute this node's object-to-world transform. 155 | // See https://github.com/KhronosGroup/glTF-Tutorials/blob/master/gltfTutorial/gltfTutorial_004_ScenesNodes.md . 156 | // Note that this depends on glm::mat4 being column-major. 157 | // The documentation above also means that vectors are multiplied on the right. 158 | glm::mat4 localNodeTransform(1); 159 | cgltf_node_transform_local(node, glm::value_ptr(localNodeTransform)); 160 | const glm::mat4 nodeObjToWorldTransform = parentObjToWorldTransform * localNodeTransform; 161 | 162 | // If this node has a mesh, add instances for its primitives. 163 | if(node->mesh != nullptr) 164 | { 165 | const ptrdiff_t meshIndex = (node->mesh) - data->meshes; 166 | 167 | animatedclusters::Scene::Instance instance{}; 168 | instance.geometryID = uint32_t(meshIndex); 169 | instance.matrix = nodeObjToWorldTransform; 170 | 171 | instances.push_back(instance); 172 | } 173 | 174 | // Recurse over any children of this node. 175 | const size_t numChildren = node->children_count; 176 | for(size_t childIdx = 0; childIdx < numChildren; childIdx++) 177 | { 178 | addInstancesFromNode(instances, data, node->children[childIdx], nodeObjToWorldTransform); 179 | } 180 | } 181 | 182 | } // namespace 183 | 184 | 185 | namespace animatedclusters { 186 | bool Scene::loadGLTF(const char* filename) 187 | { 188 | // Parse the glTF file using cgltf 189 | cgltf_options options = {}; 190 | 191 | FileMappingList mappings; 192 | options.file.read = cgltf_read; 193 | options.file.release = cgltf_release; 194 | options.file.user_data = &mappings; 195 | 196 | cgltf_result cgltfResult; 197 | unique_cgltf_ptr data = unique_cgltf_ptr(nullptr, &cgltf_free); 198 | { 199 | // We have this local pointer followed by an ownership transfer here 200 | // because cgltf_parse_file takes a pointer to a pointer to cgltf_data. 201 | cgltf_data* rawData = nullptr; 202 | cgltfResult = cgltf_parse_file(&options, filename, &rawData); 203 | data = unique_cgltf_ptr(rawData, &cgltf_free); 204 | } 205 | // Check for errors; special message for legacy files 206 | if(cgltfResult == cgltf_result_legacy_gltf) 207 | { 208 | LOGE( 209 | "loadGLTF: This glTF file is an unsupported legacy file - probably glTF 1.0, while cgltf only supports glTF " 210 | "2.0 files. Please load a glTF 2.0 file instead.\n"); 211 | return false; 212 | } 213 | else if((cgltfResult != cgltf_result_success) || (data == nullptr)) 214 | { 215 | LOGE("loadGLTF: cgltf_parse_file failed. Is this a valid glTF file? (cgltf result: %d)\n", cgltfResult); 216 | return false; 217 | } 218 | 219 | // Perform additional validation. 220 | cgltfResult = cgltf_validate(data.get()); 221 | if(cgltfResult != cgltf_result_success) 222 | { 223 | LOGE( 224 | "loadGLTF: The glTF file could be parsed, but cgltf_validate failed. Consider using the glTF Validator at " 225 | "https://github.khronos.org/glTF-Validator/ to see if the non-displacement parts of the glTF file are correct. " 226 | "(cgltf result: %d)\n", 227 | cgltfResult); 228 | return false; 229 | } 230 | 231 | // For now, also tell cgltf to go ahead and load all buffers. 232 | cgltfResult = cgltf_load_buffers(&options, data.get(), filename); 233 | if(cgltfResult != cgltf_result_success) 234 | { 235 | LOGE( 236 | "loadGLTF: The glTF file was valid, but cgltf_load_buffers failed. Are the glTF file's referenced file paths " 237 | "valid? (cgltf result: %d)\n", 238 | cgltfResult); 239 | return false; 240 | } 241 | 242 | m_geometries.resize(data->meshes_count); 243 | 244 | for(size_t meshIdx = 0; meshIdx < data->meshes_count; meshIdx++) 245 | { 246 | const cgltf_mesh gltfMesh = data->meshes[meshIdx]; 247 | Geometry& geom = m_geometries[meshIdx]; 248 | geom.bbox = {{FLT_MAX, FLT_MAX, FLT_MAX}, {-FLT_MAX, -FLT_MAX, -FLT_MAX}}; 249 | 250 | 251 | // count pass 252 | geom.numTriangles = 0; 253 | geom.numVertices = 0; 254 | for(size_t primIdx = 0; primIdx < gltfMesh.primitives_count; primIdx++) 255 | { 256 | cgltf_primitive* gltfPrim = &gltfMesh.primitives[primIdx]; 257 | 258 | if(gltfPrim->type != cgltf_primitive_type_triangles) 259 | { 260 | continue; 261 | } 262 | 263 | // If the mesh has no attributes, there's nothing we can do 264 | if(gltfPrim->attributes_count == 0) 265 | { 266 | continue; 267 | } 268 | 269 | for(size_t attribIdx = 0; attribIdx < gltfPrim->attributes_count; attribIdx++) 270 | { 271 | const cgltf_attribute& gltfAttrib = gltfPrim->attributes[attribIdx]; 272 | const cgltf_accessor* accessor = gltfAttrib.data; 273 | 274 | // TODO: Can we assume alignment in order to make these a single read_float call? 275 | if(strcmp(gltfAttrib.name, "POSITION") == 0) 276 | { 277 | geom.numVertices += (uint32_t)accessor->count; 278 | break; 279 | } 280 | } 281 | 282 | geom.numTriangles += (uint32_t)gltfPrim->indices->count / 3; 283 | } 284 | 285 | geom.positions.resize(geom.numVertices); 286 | geom.triangles.resize(geom.numTriangles); 287 | 288 | // fill pass 289 | 290 | uint32_t offsetVertices = 0; 291 | uint32_t offsetTriangles = 0; 292 | 293 | for(size_t primIdx = 0; primIdx < gltfMesh.primitives_count; primIdx++) 294 | { 295 | cgltf_primitive* gltfPrim = &gltfMesh.primitives[primIdx]; 296 | 297 | if(gltfPrim->type != cgltf_primitive_type_triangles) 298 | { 299 | continue; 300 | } 301 | 302 | // If the mesh has no attributes, there's nothing we can do 303 | if(gltfPrim->attributes_count == 0) 304 | { 305 | continue; 306 | } 307 | 308 | uint32_t numVertices = 0; 309 | 310 | for(size_t attribIdx = 0; attribIdx < gltfPrim->attributes_count; attribIdx++) 311 | { 312 | const cgltf_attribute& gltfAttrib = gltfPrim->attributes[attribIdx]; 313 | const cgltf_accessor* accessor = gltfAttrib.data; 314 | 315 | // TODO: Can we assume alignment in order to make these a single read_float call? 316 | if(strcmp(gltfAttrib.name, "POSITION") == 0) 317 | { 318 | glm::vec3* writePositions = geom.positions.data() + offsetVertices; 319 | 320 | if(accessor->component_type == cgltf_component_type_r_32f && accessor->type == cgltf_type_vec3 321 | && accessor->stride == sizeof(glm::vec3)) 322 | { 323 | const glm::vec3* readPositions = (const glm::vec3*)(cgltf_buffer_view_data(accessor->buffer_view) + accessor->offset); 324 | for(size_t i = 0; i < accessor->count; i++) 325 | { 326 | glm::vec3 tmp = readPositions[i]; 327 | writePositions[i] = tmp; 328 | geom.bbox.lo = glm::min(geom.bbox.lo, tmp); 329 | geom.bbox.hi = glm::max(geom.bbox.hi, tmp); 330 | } 331 | } 332 | else 333 | { 334 | for(size_t i = 0; i < accessor->count; i++) 335 | { 336 | glm::vec3 tmp; 337 | cgltf_accessor_read_float(accessor, i, &tmp.x, 3); 338 | writePositions[i] = tmp; 339 | geom.bbox.lo = glm::min(geom.bbox.lo, tmp); 340 | geom.bbox.hi = glm::max(geom.bbox.hi, tmp); 341 | } 342 | } 343 | 344 | numVertices = (uint32_t)accessor->count; 345 | 346 | break; 347 | } 348 | } 349 | 350 | // indices 351 | { 352 | const cgltf_accessor* accessor = gltfPrim->indices; 353 | 354 | uint32_t* writeIndices = (uint32_t*)(geom.triangles.data() + offsetTriangles); 355 | 356 | if(offsetVertices == 0 && accessor->component_type == cgltf_component_type_r_32u 357 | && accessor->type == cgltf_type_scalar && accessor->stride == sizeof(uint32_t)) 358 | { 359 | memcpy(writeIndices, cgltf_buffer_view_data(accessor->buffer_view) + accessor->offset, 360 | sizeof(uint32_t) * accessor->count); 361 | } 362 | else 363 | { 364 | for(size_t i = 0; i < accessor->count; i++) 365 | { 366 | writeIndices[i] = (uint32_t)cgltf_accessor_read_index(gltfPrim->indices, i) + offsetVertices; 367 | } 368 | } 369 | 370 | offsetTriangles += (uint32_t)accessor->count / 3; 371 | } 372 | 373 | offsetVertices += numVertices; 374 | } 375 | } 376 | 377 | if(data->scenes_count > 0) 378 | { 379 | const cgltf_scene scene = (data->scene != nullptr) ? (*(data->scene)) : (data->scenes[0]); 380 | for(size_t nodeIdx = 0; nodeIdx < scene.nodes_count; nodeIdx++) 381 | { 382 | addInstancesFromNode(m_instances, data.get(), scene.nodes[nodeIdx]); 383 | } 384 | } 385 | else 386 | { 387 | for(size_t nodeIdx = 0; nodeIdx < data->nodes_count; nodeIdx++) 388 | { 389 | if(data->nodes[nodeIdx].parent == nullptr) 390 | { 391 | addInstancesFromNode(m_instances, data.get(), &(data->nodes[nodeIdx])); 392 | } 393 | } 394 | } 395 | 396 | return true; 397 | } 398 | } // namespace animatedclusters 399 | -------------------------------------------------------------------------------- /src/vk_nv_cluster_acc.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #include 21 | 22 | #include "vk_nv_cluster_acc.h" 23 | 24 | static PFN_vkGetClusterAccelerationStructureBuildSizesNV s_vkGetClusterAccelerationStructureBuildSizesNV = nullptr; 25 | static PFN_vkCmdBuildClusterAccelerationStructureIndirectNV s_vkCmdBuildClusterAccelerationStructureIndirectNV = nullptr; 26 | 27 | #ifndef NVVK_HAS_VK_NV_cluster_acceleration_structure 28 | VKAPI_ATTR void VKAPI_CALL vkGetClusterAccelerationStructureBuildSizesNV(VkDevice device, 29 | const VkClusterAccelerationStructureInputInfoNV* input, 30 | VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo) 31 | { 32 | s_vkGetClusterAccelerationStructureBuildSizesNV(device, input, pSizeInfo); 33 | } 34 | 35 | VKAPI_ATTR void VKAPI_CALL vkCmdBuildClusterAccelerationStructureIndirectNV(VkCommandBuffer commandBuffer, 36 | const VkClusterAccelerationStructureCommandsInfoNV* cmdInfo) 37 | { 38 | s_vkCmdBuildClusterAccelerationStructureIndirectNV(commandBuffer, cmdInfo); 39 | } 40 | #endif 41 | 42 | 43 | VkBool32 load_VK_NV_cluster_accleration_structure(VkInstance instance, VkDevice device) 44 | { 45 | s_vkGetClusterAccelerationStructureBuildSizesNV = nullptr; 46 | s_vkCmdBuildClusterAccelerationStructureIndirectNV = nullptr; 47 | 48 | s_vkGetClusterAccelerationStructureBuildSizesNV = 49 | (PFN_vkGetClusterAccelerationStructureBuildSizesNV)vkGetDeviceProcAddr(device, "vkGetClusterAccelerationStructureBuildSizesNV"); 50 | s_vkCmdBuildClusterAccelerationStructureIndirectNV = 51 | (PFN_vkCmdBuildClusterAccelerationStructureIndirectNV)vkGetDeviceProcAddr(device, "vkCmdBuildClusterAccelerationStructureIndirectNV"); 52 | 53 | return s_vkGetClusterAccelerationStructureBuildSizesNV && s_vkCmdBuildClusterAccelerationStructureIndirectNV; 54 | } 55 | -------------------------------------------------------------------------------- /src/vk_nv_cluster_acc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. 17 | * SPDX-License-Identifier: Apache-2.0 18 | */ 19 | 20 | #pragma once 21 | 22 | #include 23 | 24 | ////////////////////////////////////////////////////////////////////////// 25 | // 26 | // Two extensions, which can be used independently, make the "RTX Mega Geometry" feature: 27 | // 28 | // # VK_NV_cluster_acceleration_structure 29 | // 30 | // Clusters contain content like triangles, and are then referenced within 31 | // one or more bottom-level acceleration structures. Referencing allows similar 32 | // memory saving like instances, but is without transforms. The clusters are also called "CLAS". 33 | // 34 | // Cluster templates allow quicker building of clusters for the purpose of 35 | // animation or "micro-instancing" topology. 36 | // 37 | // # VK_NV_partitioned_acceleration_structure 38 | // 39 | // Partitions divide a fixed pool with a maximum size of number of instances across a top-level 40 | // acceleration structure (AS). The feature is also referred to as "PTLAS". 41 | // 42 | // # Common 43 | // 44 | // Both new extensions are "multi indirect", however with slightly different designs. 45 | // Cluster builds are one type of operation per single commandbuffer command, following 46 | // the traditional indirect approach. 47 | // 48 | // Partition builds/updates use two level-indirection, meaning multiple operation types 49 | // can be executed per single commandbuffer command, and the types are also sourced 50 | // from GPU 51 | 52 | 53 | #ifndef VK_NV_cluster_acceleration_structure 54 | #define VK_NV_cluster_acceleration_structure 1 55 | #define VK_NV_CLUSTER_ACCELERATION_STRUCTURE_SPEC_VERSION 2 56 | #define VK_NV_CLUSTER_ACCELERATION_STRUCTURE_EXTENSION_NAME "VK_NV_cluster_acceleration_structure" 57 | #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CLUSTER_ACCELERATION_STRUCTURE_FEATURES_NV ((VkStructureType)1000569000) 58 | #define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CLUSTER_ACCELERATION_STRUCTURE_PROPERTIES_NV ((VkStructureType)1000569001) 59 | #define VK_STRUCTURE_TYPE_CLUSTER_ACCELERATION_STRUCTURE_CLUSTERS_BOTTOM_LEVEL_INPUT_NV ((VkStructureType)1000569002) 60 | #define VK_STRUCTURE_TYPE_CLUSTER_ACCELERATION_STRUCTURE_TRIANGLE_CLUSTER_INPUT_NV ((VkStructureType)1000569003) 61 | #define VK_STRUCTURE_TYPE_CLUSTER_ACCELERATION_STRUCTURE_MOVE_OBJECTS_INPUT_NV ((VkStructureType)1000569004) 62 | #define VK_STRUCTURE_TYPE_CLUSTER_ACCELERATION_STRUCTURE_INPUT_INFO_NV ((VkStructureType)1000569005) 63 | #define VK_STRUCTURE_TYPE_CLUSTER_ACCELERATION_STRUCTURE_COMMANDS_INFO_NV ((VkStructureType)1000569006) 64 | #define VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CLUSTER_ACCELERATION_STRUCTURE_CREATE_INFO_NV \ 65 | ((VkStructureType)1000569007) 66 | #define VK_STRUCTURE_TYPE_CLUSTER_ACCELERATION_STRUCTURE_FLAGS_NV ((VkStructureType)1000569008) 67 | #define VK_OPACITY_MICROMAP_SPECIAL_INDEX_CLUSTER_GEOMETRY_DISABLE_OPACITY_MICROMAP_NV \ 68 | ((VkOpacityMicromapSpecialIndexEXT) - 5) 69 | 70 | typedef struct VkPhysicalDeviceClusterAccelerationStructureFeaturesNV 71 | { 72 | VkStructureType sType; 73 | void* pNext; 74 | VkBool32 clusterAccelerationStructures; 75 | } VkPhysicalDeviceClusterAccelerationStructureFeaturesNV; 76 | 77 | typedef struct VkPhysicalDeviceClusterAccelerationStructurePropertiesNV 78 | { 79 | VkStructureType sType; 80 | void* pNext; 81 | uint32_t maxVerticesPerCluster; 82 | uint32_t maxTrianglesPerCluster; 83 | uint32_t clusterScratchByteAlignment; 84 | uint32_t clusterByteAlignment; 85 | uint32_t clusterTemplateByteAlignment; 86 | uint32_t clusterBottomLevelByteAlignment; 87 | uint32_t clusterTemplateBoundsByteAlignment; 88 | uint32_t maxClusterGeometryIndex; 89 | } VkPhysicalDeviceClusterAccelerationStructurePropertiesNV; 90 | 91 | typedef struct VkClusterAccelerationStructureClustersBottomLevelInputNV 92 | { 93 | VkStructureType sType; 94 | void* pNext; 95 | uint32_t maxTotalClusterCount; 96 | uint32_t maxClusterCountPerAccelerationStructure; 97 | } VkClusterAccelerationStructureClustersBottomLevelInputNV; 98 | 99 | typedef struct VkClusterAccelerationStructureTriangleClusterInputNV 100 | { 101 | VkStructureType sType; 102 | void* pNext; 103 | VkFormat vertexFormat; 104 | uint32_t maxGeometryIndexValue; 105 | uint32_t maxClusterUniqueGeometryCount; 106 | uint32_t maxClusterTriangleCount; 107 | uint32_t maxClusterVertexCount; 108 | uint32_t maxTotalTriangleCount; 109 | uint32_t maxTotalVertexCount; 110 | uint32_t minPositionTruncateBitCount; 111 | } VkClusterAccelerationStructureTriangleClusterInputNV; 112 | 113 | typedef enum VkClusterAccelerationStructureTypeNV 114 | { 115 | VK_CLUSTER_ACCELERATION_STRUCTURE_TYPE_CLUSTERS_BOTTOM_LEVEL_NV = 0, 116 | VK_CLUSTER_ACCELERATION_STRUCTURE_TYPE_TRIANGLE_CLUSTER_NV = 1, 117 | VK_CLUSTER_ACCELERATION_STRUCTURE_TYPE_TRIANGLE_CLUSTER_TEMPLATE_NV = 2, 118 | VK_CLUSTER_ACCELERATION_STRUCTURE_TYPE_MAX_ENUM_NV = 0x7FFFFFFF 119 | } VkClusterAccelerationStructureTypeNV; 120 | 121 | typedef struct VkClusterAccelerationStructureMoveObjectsInputNV 122 | { 123 | VkStructureType sType; 124 | void* pNext; 125 | VkClusterAccelerationStructureTypeNV type; 126 | VkBool32 noMoveOverlap; 127 | VkDeviceSize maxMovedBytes; 128 | } VkClusterAccelerationStructureMoveObjectsInputNV; 129 | 130 | typedef union VkClusterAccelerationStructureOpInputNV 131 | { 132 | VkClusterAccelerationStructureClustersBottomLevelInputNV* pClustersBottomLevel; 133 | VkClusterAccelerationStructureTriangleClusterInputNV* pTriangleClusters; 134 | VkClusterAccelerationStructureMoveObjectsInputNV* pMoveObjects; 135 | } VkClusterAccelerationStructureOpInputNV; 136 | 137 | typedef enum VkClusterAccelerationStructureOpTypeNV 138 | { 139 | VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MOVE_OBJECTS_NV = 0, 140 | VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_CLUSTERS_BOTTOM_LEVEL_NV = 1, 141 | VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_NV = 2, 142 | VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_BUILD_TRIANGLE_CLUSTER_TEMPLATE_NV = 3, 143 | VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_INSTANTIATE_TRIANGLE_CLUSTER_NV = 4, 144 | VK_CLUSTER_ACCELERATION_STRUCTURE_OP_TYPE_MAX_ENUM_NV = 0x7FFFFFFF 145 | } VkClusterAccelerationStructureOpTypeNV; 146 | 147 | typedef enum VkClusterAccelerationStructureOpModeNV 148 | { 149 | VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_IMPLICIT_DESTINATIONS_NV = 0, 150 | VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_EXPLICIT_DESTINATIONS_NV = 1, 151 | VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_COMPUTE_SIZES_NV = 2, 152 | VK_CLUSTER_ACCELERATION_STRUCTURE_OP_MODE_MAX_ENUM_NV = 0x7FFFFFFF 153 | } VkClusterAccelerationStructureOpModeNV; 154 | 155 | typedef struct VkClusterAccelerationStructureInputInfoNV 156 | { 157 | VkStructureType sType; 158 | void* pNext; 159 | uint32_t maxAccelerationStructureCount; 160 | VkBuildAccelerationStructureFlagsKHR flags; 161 | VkClusterAccelerationStructureOpTypeNV opType; 162 | VkClusterAccelerationStructureOpModeNV opMode; 163 | VkClusterAccelerationStructureOpInputNV opInput; 164 | } VkClusterAccelerationStructureInputInfoNV; 165 | 166 | typedef VkFlags VkClusterAccelerationStructureAddressResolutionFlagsNV; 167 | 168 | typedef struct VkClusterAccelerationStructureCommandsInfoNV 169 | { 170 | VkStructureType sType; 171 | void* pNext; 172 | VkClusterAccelerationStructureInputInfoNV input; 173 | VkDeviceAddress dstImplicitData; 174 | VkDeviceAddress scratchData; 175 | VkStridedDeviceAddressRegionKHR dstAddressesArray; 176 | VkStridedDeviceAddressRegionKHR dstSizesArray; 177 | VkStridedDeviceAddressRegionKHR srcInfosArray; 178 | VkDeviceAddress srcInfosCount; 179 | VkClusterAccelerationStructureAddressResolutionFlagsNV addressResolutionFlags; 180 | } VkClusterAccelerationStructureCommandsInfoNV; 181 | 182 | typedef struct VkStridedDeviceAddressNV 183 | { 184 | VkDeviceAddress startAddress; 185 | VkDeviceSize strideInBytes; 186 | } VkStridedDeviceAddressNV; 187 | 188 | typedef struct VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV 189 | { 190 | VkStructureType sType; 191 | void* pNext; 192 | VkBool32 allowClusterAccelerationStructure; 193 | } VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV; 194 | 195 | typedef struct VkClusterAccelerationStructureMoveObjectsInfoNV 196 | { 197 | VkDeviceAddress srcAccelerationStructure; 198 | } VkClusterAccelerationStructureMoveObjectsInfoNV; 199 | 200 | typedef struct VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV 201 | { 202 | uint32_t clusterReferencesCount; 203 | uint32_t clusterReferencesStride; 204 | VkDeviceAddress clusterReferences; 205 | } VkClusterAccelerationStructureBuildClustersBottomLevelInfoNV; 206 | 207 | typedef struct VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV 208 | { 209 | uint32_t geometryIndex : 24; 210 | uint32_t reserved : 5; 211 | uint32_t geometryFlags : 3; 212 | } VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV; 213 | 214 | typedef VkFlags VkClusterAccelerationStructureClusterFlagsNV; 215 | 216 | typedef struct VkClusterAccelerationStructureBuildTriangleClusterInfoNV 217 | { 218 | uint32_t clusterID; 219 | VkClusterAccelerationStructureClusterFlagsNV clusterFlags; 220 | uint32_t triangleCount : 9; 221 | uint32_t vertexCount : 9; 222 | uint32_t positionTruncateBitCount : 6; 223 | uint32_t indexType : 4; 224 | uint32_t opacityMicromapIndexType : 4; 225 | VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV baseGeometryIndexAndGeometryFlags; 226 | uint16_t indexBufferStride; 227 | uint16_t vertexBufferStride; 228 | uint16_t geometryIndexAndFlagsBufferStride; 229 | uint16_t opacityMicromapIndexBufferStride; 230 | VkDeviceAddress indexBuffer; 231 | VkDeviceAddress vertexBuffer; 232 | VkDeviceAddress geometryIndexAndFlagsBuffer; 233 | VkDeviceAddress opacityMicromapArray; 234 | VkDeviceAddress opacityMicromapIndexBuffer; 235 | } VkClusterAccelerationStructureBuildTriangleClusterInfoNV; 236 | 237 | typedef struct VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV 238 | { 239 | uint32_t clusterID; 240 | VkClusterAccelerationStructureClusterFlagsNV clusterFlags; 241 | uint32_t triangleCount : 9; 242 | uint32_t vertexCount : 9; 243 | uint32_t positionTruncateBitCount : 6; 244 | uint32_t indexType : 4; 245 | uint32_t opacityMicromapIndexType : 4; 246 | VkClusterAccelerationStructureGeometryIndexAndGeometryFlagsNV baseGeometryIndexAndGeometryFlags; 247 | uint16_t indexBufferStride; 248 | uint16_t vertexBufferStride; 249 | uint16_t geometryIndexAndFlagsBufferStride; 250 | uint16_t opacityMicromapIndexBufferStride; 251 | VkDeviceAddress indexBuffer; 252 | VkDeviceAddress vertexBuffer; 253 | VkDeviceAddress geometryIndexAndFlagsBuffer; 254 | VkDeviceAddress opacityMicromapArray; 255 | VkDeviceAddress opacityMicromapIndexBuffer; 256 | VkDeviceAddress instantiationBoundingBoxLimit; 257 | } VkClusterAccelerationStructureBuildTriangleClusterTemplateInfoNV; 258 | 259 | typedef enum VkClusterAccelerationStructureClusterFlagBitsNV 260 | { 261 | VK_CLUSTER_ACCELERATION_STRUCTURE_CLUSTER_ALLOW_DISABLE_OPACITY_MICROMAPS_NV = 0x00000001, 262 | VK_CLUSTER_ACCELERATION_STRUCTURE_CLUSTER_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF 263 | } VkClusterAccelerationStructureClusterFlagBitsNV; 264 | 265 | typedef VkFlags VkClusterAccelerationStructureGeometryFlagsNV; 266 | 267 | typedef enum VkClusterAccelerationStructureGeometryFlagBitsNV 268 | { 269 | VK_CLUSTER_ACCELERATION_STRUCTURE_GEOMETRY_CULL_DISABLE_BIT_NV = 0x00000001, 270 | VK_CLUSTER_ACCELERATION_STRUCTURE_GEOMETRY_NO_DUPLICATE_ANYHIT_INVOCATION_BIT_NV = 0x00000002, 271 | VK_CLUSTER_ACCELERATION_STRUCTURE_GEOMETRY_OPAQUE_BIT_NV = 0x00000004, 272 | VK_CLUSTER_ACCELERATION_STRUCTURE_GEOMETRY_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF 273 | } VkClusterAccelerationStructureGeometryFlagBitsNV; 274 | 275 | typedef enum VkClusterAccelerationStructureAddressResolutionFlagBitsNV 276 | { 277 | VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_DST_IMPLICIT_DATA_BIT_NV = 0x00000001, 278 | VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_SCRATCH_DATA_BIT_NV = 0x00000002, 279 | VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_DST_ADDRESS_ARRAY_BIT_NV = 0x00000004, 280 | VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_DST_SIZES_ARRAY_BIT_NV = 0x00000008, 281 | VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_SRC_INFOS_ARRAY_BIT_NV = 0x00000010, 282 | VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_INDIRECTED_SRC_INFOS_COUNT_BIT_NV = 0x00000020, 283 | VK_CLUSTER_ACCELERATION_STRUCTURE_ADDRESS_RESOLUTION_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF 284 | } VkClusterAccelerationStructureAddressResolutionFlagBitsNV; 285 | 286 | typedef struct VkClusterAccelerationStructureInstantiateClusterInfoNV 287 | { 288 | uint32_t clusterIdOffset; 289 | uint32_t geometryIndexOffset : 24; 290 | uint32_t reserved : 8; 291 | VkDeviceAddress clusterTemplateAddress; 292 | VkStridedDeviceAddressNV vertexBuffer; 293 | } VkClusterAccelerationStructureInstantiateClusterInfoNV; 294 | 295 | typedef enum VkClusterAccelerationStructureIndexFormatNV 296 | { 297 | VK_CLUSTER_ACCELERATION_STRUCTURE_INDEX_FORMAT_8BIT_NV = 0x00000001, 298 | VK_CLUSTER_ACCELERATION_STRUCTURE_INDEX_FORMAT_16BIT_NV = 0x00000002, 299 | VK_CLUSTER_ACCELERATION_STRUCTURE_INDEX_FORMAT_32BIT_NV = 0x00000004, 300 | VK_CLUSTER_ACCELERATION_STRUCTURE_INDEX_FORMAT_MAX_ENUM_NV = 0x7FFFFFFF 301 | } VkClusterAccelerationStructureIndexFormatNV; 302 | 303 | typedef void(VKAPI_PTR* PFN_vkGetClusterAccelerationStructureBuildSizesNV)(VkDevice device, 304 | const VkClusterAccelerationStructureInputInfoNV* pInfo, 305 | VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo); 306 | typedef void(VKAPI_PTR* PFN_vkCmdBuildClusterAccelerationStructureIndirectNV)(VkCommandBuffer commandBuffer, 307 | const VkClusterAccelerationStructureCommandsInfoNV* pCommandInfos); 308 | 309 | #ifndef VK_NO_PROTOTYPES 310 | VKAPI_ATTR void VKAPI_CALL vkGetClusterAccelerationStructureBuildSizesNV(VkDevice device, 311 | VkClusterAccelerationStructureInputInfoNV const* pInfo, 312 | VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo); 313 | 314 | VKAPI_ATTR void VKAPI_CALL vkCmdBuildClusterAccelerationStructureIndirectNV(VkCommandBuffer commandBuffer, 315 | VkClusterAccelerationStructureCommandsInfoNV const* pCommandInfos); 316 | #endif 317 | #endif 318 | 319 | VkBool32 load_VK_NV_cluster_accleration_structure(VkInstance instance, VkDevice device); 320 | --------------------------------------------------------------------------------