├── .clang-format ├── .gitignore ├── CMakeLists.txt ├── LICENSE.md ├── README.md ├── cmake ├── FindVulkan.cmake ├── SPIRV2C.cmake └── glm.cmake └── src ├── CMakeLists.txt ├── add_block_sums.comp ├── block_prefix_sum.comp ├── compute_active_voxel.comp ├── compute_marching_cubes.cpp ├── compute_marching_cubes.h ├── compute_num_verts.comp ├── compute_vertex_values.comp ├── compute_vertices.comp ├── exclusive_scan.cpp ├── exclusive_scan.h ├── marching_cubes.cpp ├── prefix_sum.comp ├── stream_compact.comp ├── tri_table.h ├── util.cpp ├── util.glsl ├── util.h ├── vulkan_utils.cpp └── vulkan_utils.h /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | AccessModifierOffset: -4 4 | AlignAfterOpenBracket: Align 5 | AlignConsecutiveAssignments: false 6 | AlignConsecutiveDeclarations: false 7 | AlignEscapedNewlinesLeft: true 8 | AlignOperands: true 9 | AlignTrailingComments: true 10 | AllowAllParametersOfDeclarationOnNextLine: true 11 | AllowShortBlocksOnASingleLine: false 12 | AllowShortCaseLabelsOnASingleLine: false 13 | AllowShortFunctionsOnASingleLine: Empty 14 | AllowShortIfStatementsOnASingleLine: false 15 | AllowShortLoopsOnASingleLine: false 16 | AlwaysBreakAfterDefinitionReturnType: None 17 | AlwaysBreakAfterReturnType: None 18 | AlwaysBreakBeforeMultilineStrings: true 19 | AlwaysBreakTemplateDeclarations: true 20 | BinPackArguments: false 21 | BinPackParameters: false 22 | BraceWrapping: 23 | AfterClass: false 24 | AfterControlStatement: false 25 | AfterEnum: false 26 | AfterFunction: true 27 | AfterNamespace: false 28 | AfterStruct: false 29 | AfterUnion: false 30 | AfterExternBlock: false 31 | BeforeCatch: false 32 | BeforeElse: false 33 | IndentBraces: false 34 | BreakBeforeBinaryOperators: None 35 | BreakBeforeBraces: Custom 36 | BreakBeforeTernaryOperators: true 37 | BreakConstructorInitializersBeforeComma: false 38 | BreakStringLiterals: true 39 | ColumnLimit: 95 40 | CommentPragmas: '^ IWYU pragma:' 41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 42 | Cpp11BracedListStyle: true 43 | DerivePointerAlignment: false 44 | DisableFormat: false 45 | ExperimentalAutoDetectBinPacking: false 46 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 47 | IncludeCategories: 48 | - Regex: '^<[^\.]*>' 49 | Priority: 1 50 | - Regex: '^<.*\.h>' 51 | Priority: 2 52 | - Regex: '.*' 53 | Priority: 3 54 | SortIncludes: true 55 | ConstructorInitializerIndentWidth: 4 56 | ContinuationIndentWidth: 4 57 | IndentCaseLabels: false 58 | IndentWidth: 4 59 | IndentWrappedFunctionNames: false 60 | KeepEmptyLinesAtTheStartOfBlocks: false 61 | MacroBlockBegin: '' 62 | MacroBlockEnd: '' 63 | MaxEmptyLinesToKeep: 1 64 | NamespaceIndentation: Inner 65 | PenaltyBreakBeforeFirstCallParameter: 1 66 | PenaltyBreakComment: 300 67 | PenaltyBreakFirstLessLess: 120 68 | PenaltyBreakString: 1000 69 | PenaltyExcessCharacter: 1000000 70 | PenaltyReturnTypeOnItsOwnLine: 200 71 | PointerAlignment: Right 72 | ReflowComments: true 73 | SpaceAfterCStyleCast: false 74 | SpaceAfterTemplateKeyword: true 75 | SpaceBeforeAssignmentOperators: true 76 | SpaceBeforeParens: ControlStatements 77 | SpaceInEmptyParentheses: false 78 | SpacesBeforeTrailingComments: 2 79 | SpacesInAngles: false 80 | SpacesInContainerLiterals: false 81 | SpacesInCStyleCastParentheses: false 82 | SpacesInParentheses: false 83 | SpacesInSquareBrackets: false 84 | Standard: Cpp11 85 | TabWidth: 4 86 | FixNamespaceComments: false 87 | UseTab: Never 88 | ... 89 | --- 90 | Language: ObjC 91 | AccessModifierOffset: -4 92 | AlignAfterOpenBracket: Align 93 | AlignConsecutiveAssignments: false 94 | AlignConsecutiveDeclarations: false 95 | AlignEscapedNewlinesLeft: true 96 | AlignOperands: true 97 | AlignTrailingComments: true 98 | AllowAllParametersOfDeclarationOnNextLine: true 99 | AllowShortBlocksOnASingleLine: false 100 | AllowShortCaseLabelsOnASingleLine: false 101 | AllowShortFunctionsOnASingleLine: Empty 102 | AllowShortIfStatementsOnASingleLine: false 103 | AllowShortLoopsOnASingleLine: false 104 | AlwaysBreakAfterDefinitionReturnType: None 105 | AlwaysBreakAfterReturnType: None 106 | AlwaysBreakBeforeMultilineStrings: true 107 | AlwaysBreakTemplateDeclarations: true 108 | BinPackArguments: false 109 | BinPackParameters: false 110 | BraceWrapping: 111 | AfterClass: false 112 | AfterControlStatement: false 113 | AfterEnum: false 114 | AfterFunction: true 115 | AfterNamespace: false 116 | AfterStruct: false 117 | AfterUnion: false 118 | AfterExternBlock: false 119 | BeforeCatch: false 120 | BeforeElse: false 121 | IndentBraces: false 122 | BreakBeforeBinaryOperators: None 123 | BreakBeforeBraces: Custom 124 | BreakBeforeTernaryOperators: true 125 | BreakConstructorInitializersBeforeComma: false 126 | BreakStringLiterals: true 127 | ColumnLimit: 95 128 | CommentPragmas: '^ IWYU pragma:' 129 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 130 | Cpp11BracedListStyle: true 131 | DerivePointerAlignment: false 132 | DisableFormat: false 133 | ExperimentalAutoDetectBinPacking: false 134 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] 135 | IncludeCategories: 136 | - Regex: '^<[^\.]*>' 137 | Priority: 1 138 | - Regex: '^<.*\.h>' 139 | Priority: 2 140 | - Regex: '.*' 141 | Priority: 3 142 | SortIncludes: true 143 | ConstructorInitializerIndentWidth: 4 144 | ContinuationIndentWidth: 4 145 | IndentCaseLabels: false 146 | IndentWidth: 4 147 | IndentWrappedFunctionNames: false 148 | KeepEmptyLinesAtTheStartOfBlocks: false 149 | MacroBlockBegin: '' 150 | MacroBlockEnd: '' 151 | MaxEmptyLinesToKeep: 1 152 | NamespaceIndentation: Inner 153 | PenaltyBreakBeforeFirstCallParameter: 1 154 | PenaltyBreakComment: 300 155 | PenaltyBreakFirstLessLess: 120 156 | PenaltyBreakString: 1000 157 | PenaltyExcessCharacter: 1000000 158 | PenaltyReturnTypeOnItsOwnLine: 200 159 | PointerAlignment: Right 160 | ReflowComments: true 161 | SpaceAfterCStyleCast: false 162 | SpaceAfterTemplateKeyword: true 163 | SpaceBeforeAssignmentOperators: true 164 | SpaceBeforeParens: ControlStatements 165 | SpaceInEmptyParentheses: false 166 | SpacesBeforeTrailingComments: 2 167 | SpacesInAngles: false 168 | SpacesInContainerLiterals: false 169 | SpacesInCStyleCastParentheses: false 170 | SpacesInParentheses: false 171 | SpacesInSquareBrackets: false 172 | Standard: Cpp11 173 | TabWidth: 4 174 | FixNamespaceComments: false 175 | UseTab: Never 176 | ... 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.raw 3 | build/ 4 | cmake-build 5 | cmake-ninja 6 | .DS_Store 7 | compile_commands.json 8 | .cache/ 9 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.21) 2 | project(vulkan_marching_cubes) 3 | 4 | set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_LIST_DIR}/cmake") 5 | include(cmake/glm.cmake) 6 | 7 | find_package(Vulkan REQUIRED) 8 | 9 | add_subdirectory(src) 10 | 11 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2024 Will Usher 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vulkan Marching Cubes 2 | 3 | This is GPU-parallel implementation of Marching Cubes using Vulkan. It builds 4 | using CMake, and you can run it via: 5 | ``` 6 | ./vulkan_marching_cubes [optional output.obj] 7 | ``` 8 | 9 | The program takes volumes whose file names are formatted like those found 10 | on [OpenScivisDatasets](https://klacansky.com/open-scivis-datasets/), you can 11 | download datasets from that page to try out the app. For example, you can 12 | compute the isosurface at isovalue = 80 on the skull and output the mesh 13 | to an OBJ file: 14 | ``` 15 | ./vulkan_marching_cubes skull_256x256x256_uint8.raw 80 skull_iso_80.obj 16 | ``` 17 | -------------------------------------------------------------------------------- /cmake/FindVulkan.cmake: -------------------------------------------------------------------------------- 1 | # Distributed under the OSI-approved BSD 3-Clause License. See accompanying 2 | # file Copyright.txt or https://cmake.org/licensing for details. 3 | 4 | #.rst: 5 | # FindVulkan 6 | # ---------- 7 | # 8 | # Try to find Vulkan 9 | # 10 | # IMPORTED Targets 11 | # ^^^^^^^^^^^^^^^^ 12 | # 13 | # This module defines :prop_tgt:`IMPORTED` target ``Vulkan::Vulkan``, if 14 | # Vulkan has been found. 15 | # 16 | # Result Variables 17 | # ^^^^^^^^^^^^^^^^ 18 | # 19 | # This module defines the following variables:: 20 | # 21 | # Vulkan_FOUND - True if Vulkan was found 22 | # Vulkan_INCLUDE_DIRS - include directories for Vulkan 23 | # Vulkan_LIBRARIES - link against this library to use Vulkan 24 | # 25 | # The module will also define two cache variables:: 26 | # 27 | # Vulkan_INCLUDE_DIR - the Vulkan include directory 28 | # Vulkan_LIBRARY - the path to the Vulkan library 29 | # 30 | 31 | if(WIN32) 32 | find_path(Vulkan_INCLUDE_DIR 33 | NAMES vulkan/vulkan.h 34 | PATHS 35 | ${VULKAN_SDK}/Include 36 | $ENV{VULKAN_SDK}/Include 37 | ) 38 | 39 | if(CMAKE_SIZEOF_VOID_P EQUAL 8) 40 | find_library(Vulkan_LIBRARY 41 | NAMES vulkan-1 42 | PATHS 43 | ${VULKAN_SDK}/Lib 44 | ${VULKAN_SDK}/Bin 45 | $ENV{VULKAN_SDK}/Lib 46 | $ENV{VULKAN_SDK}/Bin 47 | ) 48 | find_program(SPIRV_COMPILER 49 | NAMES glslc 50 | PATHS 51 | ${VULKAN_SDK}/Bin 52 | $ENV{VULKAN_SDK}/Bin 53 | ) 54 | elseif(CMAKE_SIZEOF_VOID_P EQUAL 4) 55 | find_library(Vulkan_LIBRARY 56 | NAMES vulkan-1 57 | PATHS 58 | ${VULKAN_SDK}/Lib32 59 | ${VULKAN_SDK}/Bin32 60 | $ENV{VULKAN_SDK}/Lib32 61 | $ENV{VULKAN_SDK}/Bin32 62 | NO_SYSTEM_ENVIRONMENT_PATH 63 | ) 64 | find_program(SPIRV_COMPILER 65 | NAMES glslc 66 | PATHS 67 | ${VULKAN_SDK}/Bin32 68 | $ENV{VULKAN_SDK}/Bin32 69 | ) 70 | endif() 71 | else() 72 | find_path(Vulkan_INCLUDE_DIR 73 | NAMES vulkan/vulkan.h 74 | PATHS 75 | ${VULKAN_SDK}/x86_64/include 76 | $ENV{VULKAN_SDK}/x86_64/include 77 | ) 78 | find_library(Vulkan_LIBRARY 79 | NAMES vulkan 80 | PATHS 81 | ${VULKAN_SDK}/x86_64/lib 82 | $ENV{VULKAN_SDK}/x86_64/lib 83 | ) 84 | find_program(SPIRV_COMPILER 85 | NAMES glslc 86 | PATHS 87 | ${VULKAN_SDK}/x86_64/bin 88 | $ENV{VULKAN_SDK}/x86_64/bin 89 | ) 90 | endif() 91 | 92 | find_file(SPIRV2C NAME SPIRV2C.cmake 93 | PATHS ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_LIST_DIR}) 94 | 95 | # Note that the include paths and defines should not have 96 | # the -I or -D prefix, respectively 97 | function(add_spirv_embed_library) 98 | set(options INCLUDE_DIRECTORIES COMPILE_DEFINITIONS COMPILE_OPTIONS) 99 | cmake_parse_arguments(PARSE_ARGV 1 SPIRV "" "" "${options}") 100 | 101 | set(GLSL_INCLUDE_DIRECTORIES "") 102 | foreach (inc ${SPIRV_INCLUDE_DIRECTORIES}) 103 | file(TO_NATIVE_PATH "${inc}" native_path) 104 | list(APPEND GLSL_INCLUDE_DIRECTORIES "-I${native_path}") 105 | endforeach() 106 | 107 | set(GLSL_COMPILE_DEFNS "") 108 | foreach (def ${SPIRV_COMPILE_DEFINITIONS}) 109 | list(APPEND GLSL_COMPILE_DEFNS "-D${def}") 110 | endforeach() 111 | 112 | # Compile each GLSL file to embedded SPIRV bytecode 113 | set(SPIRV_LIB ${ARGV0}) 114 | set(SPIRV_BINARIES "") 115 | foreach (shader ${SPIRV_UNPARSED_ARGUMENTS}) 116 | get_filename_component(FNAME ${shader} NAME_WE) 117 | set(SPV_OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${FNAME}.spv) 118 | list(APPEND SPIRV_BINARIES ${SPV_OUTPUT}) 119 | 120 | # Determine the dependencies for the shader and track them 121 | execute_process( 122 | COMMAND ${SPIRV_COMPILER} ${CMAKE_CURRENT_LIST_DIR}/${shader} 123 | ${GLSL_INCLUDE_DIRECTORIES} ${GLSL_COMPILE_DEFNS} 124 | --target-env=vulkan1.2 ${SPIRV_COMPILE_OPTIONS} -MM 125 | OUTPUT_VARIABLE SPV_DEPS_STRING) 126 | 127 | # The first item is the spv file name formatted as .spv:, so remove that 128 | string(REPLACE " " ";" SPV_DEPS_LIST "${SPV_DEPS_STRING}") 129 | list(REMOVE_AT SPV_DEPS_LIST 0) 130 | 131 | add_custom_command(OUTPUT ${SPV_OUTPUT} 132 | COMMAND ${SPIRV_COMPILER} ${CMAKE_CURRENT_LIST_DIR}/${shader} 133 | ${GLSL_INCLUDE_DIRECTORIES} ${GLSL_COMPILE_DEFNS} 134 | --target-env=vulkan1.2 -mfmt=c -o ${SPV_OUTPUT} ${SPIRV_COMPILE_OPTIONS} 135 | DEPENDS ${SPV_DEPS_LIST} 136 | COMMENT "Compiling ${CMAKE_CURRENT_LIST_DIR}/${shader} to ${SPV_OUTPUT}") 137 | endforeach() 138 | 139 | set(SPIRV_EMBED_FILE "${CMAKE_CURRENT_BINARY_DIR}/${SPIRV_LIB}_embedded_spv.h") 140 | add_custom_command(OUTPUT ${SPIRV_EMBED_FILE} 141 | COMMAND ${CMAKE_COMMAND} 142 | -DSPIRV_EMBED_FILE=${SPIRV_EMBED_FILE} -DOUTPUT_DIR=${CMAKE_CURRENT_BINARY_DIR} 143 | -P ${SPIRV2C} 144 | DEPENDS ${SPIRV_BINARIES} 145 | COMMENT "Embedding SPIRV bytecode into ${SPIRV_EMBED_FILE}") 146 | 147 | set(SPIRV_CMAKE_CUSTOM_WRAPPER ${SPIRV_LIB}_custom_target) 148 | add_custom_target(${SPIRV_CMAKE_CUSTOM_WRAPPER} ALL DEPENDS ${SPIRV_EMBED_FILE}) 149 | 150 | add_library(${SPIRV_LIB} INTERFACE) 151 | add_dependencies(${SPIRV_LIB} ${SPIRV_CMAKE_CUSTOM_WRAPPER}) 152 | target_include_directories(${SPIRV_LIB} INTERFACE 153 | $) 154 | endfunction() 155 | 156 | set(Vulkan_LIBRARIES ${Vulkan_LIBRARY}) 157 | set(Vulkan_INCLUDE_DIRS ${Vulkan_INCLUDE_DIR}) 158 | 159 | include(FindPackageHandleStandardArgs) 160 | find_package_handle_standard_args(Vulkan 161 | DEFAULT_MSG 162 | Vulkan_LIBRARY Vulkan_INCLUDE_DIR) 163 | 164 | mark_as_advanced(Vulkan_INCLUDE_DIR Vulkan_LIBRARY) 165 | 166 | if(Vulkan_FOUND AND NOT TARGET Vulkan::Vulkan) 167 | add_library(Vulkan::Vulkan UNKNOWN IMPORTED) 168 | set_target_properties(Vulkan::Vulkan PROPERTIES 169 | IMPORTED_LOCATION "${Vulkan_LIBRARIES}" 170 | INTERFACE_INCLUDE_DIRECTORIES "${Vulkan_INCLUDE_DIRS}") 171 | endif() 172 | 173 | -------------------------------------------------------------------------------- /cmake/SPIRV2C.cmake: -------------------------------------------------------------------------------- 1 | function(spirv2c) 2 | file(GLOB SPIRV_BINARIES *.spv) 3 | 4 | file(WRITE ${SPIRV_EMBED_FILE} "#pragma once\n") 5 | foreach (spv ${SPIRV_BINARIES}) 6 | get_filename_component(FNAME ${spv} NAME_WE) 7 | file(READ ${spv} SPV_CONTENT) 8 | file(APPEND ${SPIRV_EMBED_FILE} "const uint32_t ${FNAME}_spv[] =\n${SPV_CONTENT};\n") 9 | endforeach() 10 | endfunction() 11 | 12 | spirv2c(${SPIRV_EMBED_FILE} ${OUTPUT_DIR}) 13 | 14 | -------------------------------------------------------------------------------- /cmake/glm.cmake: -------------------------------------------------------------------------------- 1 | include(ExternalProject) 2 | 3 | ExternalProject_Add(glm_ext 4 | PREFIX glm 5 | DOWNLOAD_DIR glm 6 | STAMP_DIR glm/stamp 7 | SOURCE_DIR glm/src 8 | BINARY_DIR glm 9 | URL "https://github.com/g-truc/glm/releases/download/0.9.9.8/glm-0.9.9.8.zip" 10 | URL_HASH "SHA256=37e2a3d62ea3322e43593c34bae29f57e3e251ea89f4067506c94043769ade4c" 11 | CONFIGURE_COMMAND "" 12 | BUILD_COMMAND "" 13 | INSTALL_COMMAND "" 14 | BUILD_ALWAYS OFF 15 | ) 16 | 17 | set(GLM_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/glm/src) 18 | 19 | add_library(glm INTERFACE) 20 | 21 | add_dependencies(glm glm_ext) 22 | 23 | target_include_directories(glm INTERFACE 24 | ${GLM_INCLUDE_DIRS}) 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(BLOCK_SIZE 512) 2 | 3 | add_spirv_embed_library(scan_shaders 4 | add_block_sums.comp 5 | block_prefix_sum.comp 6 | prefix_sum.comp 7 | COMPILE_OPTIONS -O --target-env=vulkan1.2 8 | INCLUDE_DIRECTORIES 9 | ${PROJECT_SOURCE_DIR} 10 | COMPILE_DEFINITIONS 11 | BLOCK_SIZE=${BLOCK_SIZE}) 12 | 13 | add_spirv_embed_library(mc_shaders 14 | compute_active_voxel.comp 15 | compute_num_verts.comp 16 | compute_vertices.comp 17 | stream_compact.comp 18 | COMPILE_OPTIONS -O --target-env=vulkan1.2 19 | INCLUDE_DIRECTORIES 20 | ${PROJECT_SOURCE_DIR} 21 | COMPILE_DEFINITIONS 22 | VOLUME_DTYPE=uint 23 | UINT8_VOLUME=1) 24 | 25 | add_library(vulkan_utils util.cpp vulkan_utils.cpp) 26 | 27 | target_link_libraries(vulkan_utils PUBLIC 28 | Vulkan::Vulkan 29 | glm) 30 | 31 | set_target_properties(vulkan_utils PROPERTIES 32 | CXX_STANDARD 14 33 | CXX_STANDARD_REQUIRED ON) 34 | 35 | add_executable(vulkan_marching_cubes 36 | marching_cubes.cpp 37 | exclusive_scan.cpp 38 | compute_marching_cubes.cpp) 39 | 40 | set_target_properties(vulkan_marching_cubes PROPERTIES 41 | CXX_STANDARD 14 42 | CXX_STANDARD_REQUIRED ON) 43 | 44 | target_compile_options(vulkan_marching_cubes PUBLIC 45 | -DBLOCK_SIZE=${BLOCK_SIZE} 46 | -DVOLUME_DTYPE=uint 47 | -DUINT8_VOLUME=1) 48 | 49 | target_link_libraries(vulkan_marching_cubes PUBLIC 50 | vulkan_utils 51 | scan_shaders 52 | mc_shaders) 53 | 54 | -------------------------------------------------------------------------------- /src/add_block_sums.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = BLOCK_SIZE / 2) in; 4 | 5 | layout(set = 0, binding = 0, std430) buffer Data { 6 | uint vals[]; 7 | }; 8 | 9 | layout(set = 0, binding = 1, std430) buffer BlockSums { 10 | uint block_sums[]; 11 | }; 12 | 13 | void main(void) { 14 | const uint prev_sum = block_sums[gl_WorkGroupID.x]; 15 | vals[2 * gl_GlobalInvocationID.x] += prev_sum; 16 | vals[2 * gl_GlobalInvocationID.x + 1] += prev_sum; 17 | } 18 | 19 | -------------------------------------------------------------------------------- /src/block_prefix_sum.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | // See https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf 4 | // Compute the prefix sum over the results from each block, this no longer 5 | // writes out the block sums since we're scanning on the block sums 6 | 7 | layout(local_size_x = BLOCK_SIZE / 2) in; 8 | 9 | layout(set = 0, binding = 0, std430) buffer Data { 10 | uint vals[]; 11 | }; 12 | 13 | layout(set = 0, binding = 1, std430) buffer CarryInOut { 14 | uint carry_in; 15 | uint carry_out; 16 | }; 17 | 18 | shared uint chunk[BLOCK_SIZE]; 19 | 20 | void main(void) { 21 | chunk[2 * gl_LocalInvocationID.x] = vals[2 * gl_GlobalInvocationID.x]; 22 | chunk[2 * gl_LocalInvocationID.x + 1] = vals[2 * gl_GlobalInvocationID.x + 1]; 23 | 24 | uint offs = 1; 25 | // Reduce step up tree 26 | for (int d = BLOCK_SIZE >> 1; d > 0; d = d >> 1) { 27 | barrier(); 28 | if (gl_LocalInvocationID.x < d) { 29 | uint a = offs * (2 * gl_LocalInvocationID.x + 1) - 1; 30 | uint b = offs * (2 * gl_LocalInvocationID.x + 2) - 1; 31 | chunk[b] += chunk[a]; 32 | } 33 | offs = offs << 1; 34 | } 35 | 36 | if (gl_LocalInvocationID.x == 0) { 37 | carry_out = chunk[BLOCK_SIZE - 1] + carry_in; 38 | chunk[BLOCK_SIZE - 1] = 0; 39 | } 40 | 41 | // Sweep down the tree to finish the scan 42 | for (int d = 1; d < BLOCK_SIZE; d = d << 1) { 43 | offs = offs >> 1; 44 | barrier(); 45 | if (gl_LocalInvocationID.x < d) { 46 | uint a = offs * (2 * gl_LocalInvocationID.x + 1) - 1; 47 | uint b = offs * (2 * gl_LocalInvocationID.x + 2) - 1; 48 | const uint tmp = chunk[a]; 49 | chunk[a] = chunk[b]; 50 | chunk[b] += tmp; 51 | } 52 | } 53 | 54 | barrier(); 55 | vals[2 * gl_GlobalInvocationID.x] = chunk[2 * gl_LocalInvocationID.x] + carry_in; 56 | vals[2 * gl_GlobalInvocationID.x + 1] = chunk[2 * gl_LocalInvocationID.x + 1] + carry_in; 57 | } 58 | 59 | -------------------------------------------------------------------------------- /src/compute_active_voxel.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | #include "compute_vertex_values.comp" 4 | 5 | layout(set = 1, binding = 0, std430) buffer Active { 6 | uint voxel_active[]; 7 | }; 8 | 9 | void main(void) { 10 | const float values[8] = compute_vertex_values(gl_GlobalInvocationID); 11 | uint case_index = 0; 12 | for (int i = 0; i < 8; ++i) { 13 | if (values[i] <= isovalue) { 14 | case_index |= 1 << i; 15 | } 16 | } 17 | uint v = gl_GlobalInvocationID.x 18 | + (volume_dims.x - 1) * (gl_GlobalInvocationID.y + (volume_dims.y - 1) * gl_GlobalInvocationID.z); 19 | voxel_active[v] = case_index != 0 && case_index != TRI_TABLE_SIZE - 1 ? 1 : 0; 20 | } 21 | 22 | -------------------------------------------------------------------------------- /src/compute_marching_cubes.cpp: -------------------------------------------------------------------------------- 1 | #include "compute_marching_cubes.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "mc_shaders_embedded_spv.h" 7 | #include "tri_table.h" 8 | 9 | const size_t MarchingCubes::max_dispatch_size = ((2 * 65535 * 4) / 256) * 256; 10 | 11 | MarchingCubes::MarchingCubes(std::shared_ptr &device, 12 | void *volume_data, 13 | const glm::uvec3 &volume_dims, 14 | const std::string &volume_type) 15 | : device(device), 16 | active_voxel_scanner(device), 17 | num_verts_scanner(device), 18 | volume_dims(volume_dims) 19 | { 20 | // Note: explicitly not using 3D storage textures here to match the WebGPU version, where 21 | // they're not implemented fully yet 22 | volume_data_layout = 23 | vkrt::DescriptorSetLayoutBuilder() 24 | .add_binding(0, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 25 | .add_binding(1, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 26 | .build(*device); 27 | 28 | compute_active_layout = 29 | vkrt::DescriptorSetLayoutBuilder() 30 | .add_binding(0, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 31 | .build(*device); 32 | 33 | // Note: not using push constants here to match the WebGPU backend, where push constants 34 | // aren't available 35 | stream_compact_layout = 36 | vkrt::DescriptorSetLayoutBuilder() 37 | .add_binding( 38 | 0, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, VK_SHADER_STAGE_COMPUTE_BIT) 39 | .add_binding( 40 | 1, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, VK_SHADER_STAGE_COMPUTE_BIT) 41 | .add_binding( 42 | 2, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, VK_SHADER_STAGE_COMPUTE_BIT) 43 | .add_binding(3, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 44 | .build(*device); 45 | 46 | compute_num_verts_layout = 47 | vkrt::DescriptorSetLayoutBuilder() 48 | .add_binding(0, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 49 | .add_binding(1, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 50 | .add_binding(2, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 51 | .build(*device); 52 | 53 | compute_verts_layout = 54 | vkrt::DescriptorSetLayoutBuilder() 55 | .add_binding(0, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 56 | .add_binding(1, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 57 | .add_binding(2, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 58 | .add_binding(3, 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 59 | .build(*device); 60 | 61 | // Allocate the descriptor sets from a pool 62 | const std::vector pool_sizes = { 63 | VkDescriptorPoolSize{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 4}, 64 | VkDescriptorPoolSize{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC, 2}, 65 | VkDescriptorPoolSize{VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3}, 66 | VkDescriptorPoolSize{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 9}}; 67 | 68 | VkDescriptorPoolCreateInfo pool_create_info = {}; 69 | pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; 70 | pool_create_info.maxSets = 6; 71 | pool_create_info.poolSizeCount = pool_sizes.size(); 72 | pool_create_info.pPoolSizes = pool_sizes.data(); 73 | CHECK_VULKAN(vkCreateDescriptorPool( 74 | device->logical_device(), &pool_create_info, nullptr, &desc_pool)); 75 | 76 | VkDescriptorSetAllocateInfo alloc_info = {}; 77 | alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; 78 | alloc_info.pSetLayouts = &volume_data_layout; 79 | alloc_info.descriptorPool = desc_pool; 80 | alloc_info.descriptorSetCount = 1; 81 | CHECK_VULKAN(vkAllocateDescriptorSets( 82 | device->logical_device(), &alloc_info, &volume_data_desc_set)); 83 | 84 | alloc_info.pSetLayouts = &compute_active_layout; 85 | CHECK_VULKAN(vkAllocateDescriptorSets( 86 | device->logical_device(), &alloc_info, &compute_active_desc_set)); 87 | 88 | alloc_info.pSetLayouts = &stream_compact_layout; 89 | CHECK_VULKAN(vkAllocateDescriptorSets( 90 | device->logical_device(), &alloc_info, &stream_compact_desc_set)); 91 | CHECK_VULKAN(vkAllocateDescriptorSets( 92 | device->logical_device(), &alloc_info, &stream_compact_remainder_desc_set)); 93 | 94 | alloc_info.pSetLayouts = &compute_num_verts_layout; 95 | CHECK_VULKAN(vkAllocateDescriptorSets( 96 | device->logical_device(), &alloc_info, &compute_num_verts_desc_set)); 97 | 98 | alloc_info.pSetLayouts = &compute_verts_layout; 99 | CHECK_VULKAN(vkAllocateDescriptorSets( 100 | device->logical_device(), &alloc_info, &compute_verts_desc_set)); 101 | 102 | // Build the different pipeline layouts and pipelines 103 | vkrt::make_basic_compute_pipeline(compute_active_voxel_spv, 104 | sizeof(compute_active_voxel_spv), 105 | {volume_data_layout, compute_active_layout}, 106 | *device, 107 | compute_active_pipeline_layout, 108 | compute_active_pipeline); 109 | 110 | vkrt::make_basic_compute_pipeline(stream_compact_spv, 111 | sizeof(stream_compact_spv), 112 | {stream_compact_layout}, 113 | *device, 114 | stream_compact_pipeline_layout, 115 | stream_compact_pipeline); 116 | 117 | vkrt::make_basic_compute_pipeline(compute_num_verts_spv, 118 | sizeof(compute_num_verts_spv), 119 | {volume_data_layout, compute_num_verts_layout}, 120 | *device, 121 | compute_num_verts_pipeline_layout, 122 | compute_num_verts_pipeline); 123 | 124 | vkrt::make_basic_compute_pipeline(compute_vertices_spv, 125 | sizeof(compute_vertices_spv), 126 | {volume_data_layout, compute_verts_layout}, 127 | *device, 128 | compute_verts_pipeline_layout, 129 | compute_verts_pipeline); 130 | 131 | command_pool = device->make_command_pool(VK_COMMAND_POOL_CREATE_TRANSIENT_BIT); 132 | { 133 | VkCommandBufferAllocateInfo info = {}; 134 | info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; 135 | info.commandPool = command_pool; 136 | info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; 137 | info.commandBufferCount = 1; 138 | CHECK_VULKAN( 139 | vkAllocateCommandBuffers(device->logical_device(), &info, &command_buffer)); 140 | } 141 | { 142 | VkFenceCreateInfo info = {}; 143 | info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; 144 | CHECK_VULKAN(vkCreateFence(device->logical_device(), &info, nullptr, &fence)); 145 | } 146 | 147 | size_t voxel_size = 0; 148 | #if UINT8_VOLUME 149 | if (volume_type == "uint8") { 150 | voxel_size = 1; 151 | } 152 | #endif 153 | #if UINT16_VOLUME 154 | if (volume_type == "uint16") { 155 | voxel_size = 2; 156 | } 157 | #endif 158 | #if UINT32_VOLUME 159 | if (volume_type == "uint32") { 160 | voxel_size = 4; 161 | } 162 | #endif 163 | #if FLOAT32_VOLUME 164 | if (volume_type == "float32") { 165 | voxel_size = 4; 166 | } 167 | #endif 168 | if (voxel_size == 0) { 169 | std::cout << "Volume type '" << volume_type 170 | << "' support was not built, please recompile" << std::endl; 171 | throw std::runtime_error("Rebuild with " + volume_type + " support"); 172 | } 173 | 174 | volume_buffer = vkrt::Buffer::device( 175 | *device, 176 | size_t(volume_dims.x) * size_t(volume_dims.y) * size_t(volume_dims.z) * voxel_size, 177 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); 178 | auto volume_upload = 179 | vkrt::Buffer::host(*device, volume_buffer->size(), VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 180 | 181 | volume_info_buffer = vkrt::Buffer::device( 182 | *device, 183 | 4 * 4 + 4, 184 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); 185 | upload_volume_info_buffer = vkrt::Buffer::host( 186 | *device, volume_info_buffer->size(), VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 187 | 188 | { 189 | uint8_t *map = reinterpret_cast(upload_volume_info_buffer->map()); 190 | glm::uvec3 *dims = reinterpret_cast(map); 191 | *dims = volume_dims; 192 | float *isovalue = reinterpret_cast(map + sizeof(glm::uvec4)); 193 | *isovalue = 128.f; 194 | upload_volume_info_buffer->unmap(); 195 | } 196 | std::memcpy(volume_upload->map(), volume_data, volume_upload->size()); 197 | volume_upload->unmap(); 198 | 199 | tri_table_buffer = vkrt::Buffer::device( 200 | *device, 201 | 256 * 16 * sizeof(int), 202 | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); 203 | 204 | auto upload_tri_table_buffer = 205 | vkrt::Buffer::host(*device, 256 * 16 * sizeof(int), VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 206 | std::memcpy(upload_tri_table_buffer->map(), tri_table, upload_tri_table_buffer->size()); 207 | upload_tri_table_buffer->unmap(); 208 | 209 | VkCommandBufferBeginInfo begin_info = {}; 210 | begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 211 | begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; 212 | CHECK_VULKAN(vkBeginCommandBuffer(command_buffer, &begin_info)); 213 | 214 | { 215 | VkBufferCopy copy_cmd = {}; 216 | copy_cmd.size = upload_volume_info_buffer->size(); 217 | vkCmdCopyBuffer(command_buffer, 218 | upload_volume_info_buffer->handle(), 219 | volume_info_buffer->handle(), 220 | 1, 221 | ©_cmd); 222 | } 223 | { 224 | VkBufferCopy copy_cmd = {}; 225 | copy_cmd.size = volume_upload->size(); 226 | vkCmdCopyBuffer( 227 | command_buffer, volume_upload->handle(), volume_buffer->handle(), 1, ©_cmd); 228 | } 229 | { 230 | VkBufferCopy copy_cmd = {}; 231 | copy_cmd.size = upload_tri_table_buffer->size(); 232 | vkCmdCopyBuffer(command_buffer, 233 | upload_tri_table_buffer->handle(), 234 | tri_table_buffer->handle(), 235 | 1, 236 | ©_cmd); 237 | } 238 | CHECK_VULKAN(vkEndCommandBuffer(command_buffer)); 239 | 240 | VkSubmitInfo submit_info = {}; 241 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 242 | submit_info.commandBufferCount = 1; 243 | submit_info.pCommandBuffers = &command_buffer; 244 | CHECK_VULKAN(vkQueueSubmit(device->graphics_queue(), 1, &submit_info, VK_NULL_HANDLE)); 245 | CHECK_VULKAN(vkQueueWaitIdle(device->graphics_queue())); 246 | 247 | vkResetCommandPool( 248 | device->logical_device(), command_pool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT); 249 | 250 | const size_t voxels_to_process = 251 | size_t(volume_dims.x - 1) * size_t(volume_dims.y - 1) * size_t(volume_dims.z - 1); 252 | active_voxel_buffer = vkrt::Buffer::device( 253 | *device, 254 | voxels_to_process * sizeof(uint32_t), 255 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 256 | 257 | active_voxel_offsets_buffer = vkrt::Buffer::device( 258 | *device, 259 | active_voxel_scanner.get_aligned_size(voxels_to_process) * sizeof(uint32_t), 260 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 261 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 262 | 263 | active_voxel_scanner.prepare_gpu_input(active_voxel_offsets_buffer, voxels_to_process); 264 | 265 | vkrt::DescriptorSetUpdater() 266 | .write_ssbo(volume_data_desc_set, 0, volume_buffer) 267 | .write_ubo(volume_data_desc_set, 1, volume_info_buffer) 268 | .write_ssbo(compute_active_desc_set, 0, active_voxel_buffer) 269 | .update(*device); 270 | } 271 | 272 | uint32_t MarchingCubes::compute_surface(const float isovalue) 273 | { 274 | { 275 | uint8_t *map = reinterpret_cast(upload_volume_info_buffer->map()); 276 | float *v = reinterpret_cast(map + sizeof(glm::uvec4)); 277 | *v = isovalue; 278 | upload_volume_info_buffer->unmap(); 279 | 280 | VkCommandBufferBeginInfo begin_info = {}; 281 | begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 282 | begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; 283 | CHECK_VULKAN(vkBeginCommandBuffer(command_buffer, &begin_info)); 284 | 285 | VkBufferCopy copy_cmd = {}; 286 | copy_cmd.size = upload_volume_info_buffer->size(); 287 | vkCmdCopyBuffer(command_buffer, 288 | upload_volume_info_buffer->handle(), 289 | volume_info_buffer->handle(), 290 | 1, 291 | ©_cmd); 292 | CHECK_VULKAN(vkEndCommandBuffer(command_buffer)); 293 | 294 | VkSubmitInfo submit_info = {}; 295 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 296 | submit_info.commandBufferCount = 1; 297 | submit_info.pCommandBuffers = &command_buffer; 298 | CHECK_VULKAN(vkQueueSubmit(device->graphics_queue(), 1, &submit_info, VK_NULL_HANDLE)); 299 | CHECK_VULKAN(vkQueueWaitIdle(device->graphics_queue())); 300 | 301 | vkResetCommandPool(device->logical_device(), 302 | command_pool, 303 | VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT); 304 | } 305 | using namespace std::chrono; 306 | 307 | auto start = steady_clock::now(); 308 | const uint32_t total_active = compute_active_voxels(); 309 | auto end = steady_clock::now(); 310 | std::cout << "compute_active_voxels took " 311 | << duration_cast(end - start).count() << "\n"; 312 | if (total_active == 0) { 313 | return 0; 314 | } 315 | 316 | start = steady_clock::now(); 317 | auto active_voxel_ids = compact_active_voxels(total_active); 318 | end = steady_clock::now(); 319 | std::cout << "compact_active_voxels took " 320 | << duration_cast(end - start).count() << "\n"; 321 | 322 | uint32_t total_vertices = 0; 323 | start = steady_clock::now(); 324 | auto vertex_offset_buffer = compute_num_vertices(active_voxel_ids, total_vertices); 325 | end = steady_clock::now(); 326 | std::cout << "compute_num_vertices took " 327 | << duration_cast(end - start).count() << "\n"; 328 | if (total_vertices == 0) { 329 | return 0; 330 | } 331 | start = steady_clock::now(); 332 | compute_vertices(active_voxel_ids, vertex_offset_buffer, total_vertices); 333 | end = steady_clock::now(); 334 | std::cout << "compute_vertices took " << duration_cast(end - start).count() 335 | << "\n"; 336 | return total_vertices; 337 | } 338 | 339 | uint32_t MarchingCubes::compute_active_voxels() 340 | { 341 | const size_t voxels_to_process = 342 | size_t(volume_dims.x - 1) * size_t(volume_dims.y - 1) * size_t(volume_dims.z - 1); 343 | 344 | VkCommandBufferBeginInfo begin_info = {}; 345 | begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 346 | begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; 347 | CHECK_VULKAN(vkBeginCommandBuffer(command_buffer, &begin_info)); 348 | 349 | vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_active_pipeline); 350 | 351 | const std::vector desc_sets = {volume_data_desc_set, 352 | compute_active_desc_set}; 353 | vkCmdBindDescriptorSets(command_buffer, 354 | VK_PIPELINE_BIND_POINT_COMPUTE, 355 | compute_active_pipeline_layout, 356 | 0, 357 | 2, 358 | desc_sets.data(), 359 | 0, 360 | nullptr); 361 | vkCmdDispatch(command_buffer, volume_dims.x - 1, volume_dims.y - 1, volume_dims.z - 1); 362 | 363 | { 364 | VkBufferCopy copy_cmd = {}; 365 | copy_cmd.size = active_voxel_buffer->size(); 366 | vkCmdCopyBuffer(command_buffer, 367 | active_voxel_buffer->handle(), 368 | active_voxel_offsets_buffer->handle(), 369 | 1, 370 | ©_cmd); 371 | } 372 | 373 | CHECK_VULKAN(vkEndCommandBuffer(command_buffer)); 374 | 375 | VkSubmitInfo submit_info = {}; 376 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 377 | submit_info.commandBufferCount = 1; 378 | submit_info.pCommandBuffers = &command_buffer; 379 | CHECK_VULKAN(vkQueueSubmit(device->graphics_queue(), 1, &submit_info, VK_NULL_HANDLE)); 380 | CHECK_VULKAN(vkQueueWaitIdle(device->graphics_queue())); 381 | vkResetCommandPool( 382 | device->logical_device(), command_pool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT); 383 | return active_voxel_scanner.scan(); 384 | } 385 | 386 | std::shared_ptr MarchingCubes::compact_active_voxels(const uint32_t total_active) 387 | { 388 | if (total_active > current_total_active) { 389 | active_voxel_ids = vkrt::Buffer::device( 390 | *device, 391 | total_active * sizeof(uint32_t), 392 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 393 | current_total_active = total_active; 394 | } 395 | 396 | const size_t voxels_to_process = 397 | size_t(volume_dims.x - 1) * size_t(volume_dims.y - 1) * size_t(volume_dims.z - 1); 398 | 399 | // Note: not using push constants to send the offset to match the WebGPU version, 400 | // which currently does not have push constant support. Also following the 256b 401 | // dynamic offset restriction from Dawn to match closely 402 | const size_t num_chunks = static_cast( 403 | std::ceil(static_cast(voxels_to_process) / max_dispatch_size)); 404 | auto chunk_offsets = vkrt::Buffer::device( 405 | *device, 406 | num_chunks * 256, 407 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); 408 | 409 | vkrt::DescriptorSetUpdater() 410 | .write_ssbo_dynamic(stream_compact_desc_set, 411 | 0, 412 | active_voxel_buffer, 413 | 0, 414 | 4 * std::min(voxels_to_process, max_dispatch_size)) 415 | .write_ssbo_dynamic(stream_compact_desc_set, 416 | 1, 417 | active_voxel_offsets_buffer, 418 | 0, 419 | 4 * std::min(voxels_to_process, max_dispatch_size)) 420 | .write_ubo_dynamic(stream_compact_desc_set, 2, chunk_offsets, 0, 4) 421 | .write_ssbo(stream_compact_desc_set, 3, active_voxel_ids) 422 | .write_ssbo_dynamic(stream_compact_remainder_desc_set, 423 | 0, 424 | active_voxel_buffer, 425 | 0, 426 | 4 * (voxels_to_process % max_dispatch_size)) 427 | .write_ssbo_dynamic(stream_compact_remainder_desc_set, 428 | 1, 429 | active_voxel_offsets_buffer, 430 | 0, 431 | 4 * (voxels_to_process % max_dispatch_size)) 432 | .write_ubo_dynamic(stream_compact_remainder_desc_set, 2, chunk_offsets, 0, 4) 433 | .write_ssbo(stream_compact_remainder_desc_set, 3, active_voxel_ids) 434 | .update(*device); 435 | 436 | auto upload_chunks = 437 | vkrt::Buffer::host(*device, chunk_offsets->size(), VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 438 | uint32_t *offs = reinterpret_cast(upload_chunks->map()); 439 | for (size_t i = 0; i < num_chunks; ++i) { 440 | offs[i * 64] = i * max_dispatch_size; 441 | } 442 | upload_chunks->unmap(); 443 | 444 | VkCommandBufferBeginInfo begin_info = {}; 445 | begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 446 | begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; 447 | CHECK_VULKAN(vkBeginCommandBuffer(command_buffer, &begin_info)); 448 | 449 | VkMemoryBarrier barrier = {}; 450 | barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; 451 | barrier.srcAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; 452 | barrier.dstAccessMask = barrier.srcAccessMask; 453 | { 454 | VkBufferCopy copy_cmd = {}; 455 | copy_cmd.size = upload_chunks->size(); 456 | vkCmdCopyBuffer( 457 | command_buffer, upload_chunks->handle(), chunk_offsets->handle(), 1, ©_cmd); 458 | 459 | vkCmdPipelineBarrier(command_buffer, 460 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 461 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 462 | 0, 463 | 1, 464 | &barrier, 465 | 0, 466 | nullptr, 467 | 0, 468 | nullptr); 469 | } 470 | 471 | for (size_t i = 0; i < num_chunks; ++i) { 472 | const size_t num_work_groups = 473 | std::min(voxels_to_process - i * max_dispatch_size, max_dispatch_size); 474 | const std::vector dynamic_offsets = { 475 | uint32_t(i * max_dispatch_size * sizeof(uint32_t)), 476 | uint32_t(i * max_dispatch_size * sizeof(uint32_t)), 477 | uint32_t(i * 256)}; 478 | 479 | vkCmdBindPipeline( 480 | command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, stream_compact_pipeline); 481 | VkDescriptorSet desc_set = stream_compact_desc_set; 482 | if (num_work_groups < max_dispatch_size) { 483 | desc_set = stream_compact_remainder_desc_set; 484 | } 485 | vkCmdBindDescriptorSets(command_buffer, 486 | VK_PIPELINE_BIND_POINT_COMPUTE, 487 | stream_compact_pipeline_layout, 488 | 0, 489 | 1, 490 | &desc_set, 491 | dynamic_offsets.size(), 492 | dynamic_offsets.data()); 493 | vkCmdDispatch(command_buffer, num_work_groups, 1, 1); 494 | 495 | vkCmdPipelineBarrier(command_buffer, 496 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 497 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 498 | 0, 499 | 1, 500 | &barrier, 501 | 0, 502 | nullptr, 503 | 0, 504 | nullptr); 505 | } 506 | 507 | CHECK_VULKAN(vkEndCommandBuffer(command_buffer)); 508 | VkSubmitInfo submit_info = {}; 509 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 510 | submit_info.commandBufferCount = 1; 511 | submit_info.pCommandBuffers = &command_buffer; 512 | 513 | CHECK_VULKAN(vkResetFences(device->logical_device(), 1, &fence)); 514 | CHECK_VULKAN(vkQueueSubmit(device->graphics_queue(), 1, &submit_info, fence)); 515 | CHECK_VULKAN(vkWaitForFences( 516 | device->logical_device(), 1, &fence, true, std::numeric_limits::max())); 517 | vkResetCommandPool( 518 | device->logical_device(), command_pool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT); 519 | return active_voxel_ids; 520 | } 521 | 522 | std::shared_ptr MarchingCubes::compute_num_vertices( 523 | std::shared_ptr &active_voxel_ids, uint32_t &total_vertices) 524 | { 525 | const size_t total_active = active_voxel_ids->size() / sizeof(uint32_t); 526 | const uint32_t aligned_total_active = num_verts_scanner.get_aligned_size(total_active); 527 | if (aligned_total_active > current_aligned_total_active) { 528 | num_verts_buffer = vkrt::Buffer::device(*device, 529 | aligned_total_active * sizeof(uint32_t), 530 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | 531 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | 532 | VK_BUFFER_USAGE_TRANSFER_DST_BIT); 533 | current_aligned_total_active = aligned_total_active; 534 | } 535 | 536 | vkrt::DescriptorSetUpdater() 537 | .write_ssbo(compute_num_verts_desc_set, 0, active_voxel_ids) 538 | .write_ssbo(compute_num_verts_desc_set, 1, num_verts_buffer) 539 | .write_ubo(compute_num_verts_desc_set, 2, tri_table_buffer) 540 | .update(*device); 541 | 542 | using namespace std::chrono; 543 | auto start = steady_clock::now(); 544 | VkCommandBufferBeginInfo begin_info = {}; 545 | begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 546 | begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; 547 | CHECK_VULKAN(vkBeginCommandBuffer(command_buffer, &begin_info)); 548 | 549 | vkCmdBindPipeline( 550 | command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_num_verts_pipeline); 551 | 552 | const std::vector desc_sets = {volume_data_desc_set, 553 | compute_num_verts_desc_set}; 554 | vkCmdBindDescriptorSets(command_buffer, 555 | VK_PIPELINE_BIND_POINT_COMPUTE, 556 | compute_num_verts_pipeline_layout, 557 | 0, 558 | 2, 559 | desc_sets.data(), 560 | 0, 561 | nullptr); 562 | vkCmdDispatch(command_buffer, total_active, 1, 1); 563 | 564 | CHECK_VULKAN(vkEndCommandBuffer(command_buffer)); 565 | 566 | VkSubmitInfo submit_info = {}; 567 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 568 | submit_info.commandBufferCount = 1; 569 | submit_info.pCommandBuffers = &command_buffer; 570 | 571 | CHECK_VULKAN(vkResetFences(device->logical_device(), 1, &fence)); 572 | CHECK_VULKAN(vkQueueSubmit(device->graphics_queue(), 1, &submit_info, fence)); 573 | CHECK_VULKAN(vkWaitForFences( 574 | device->logical_device(), 1, &fence, true, std::numeric_limits::max())); 575 | vkResetCommandPool( 576 | device->logical_device(), command_pool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT); 577 | auto end = steady_clock::now(); 578 | std::cout << "num_verts pipeline only: " 579 | << duration_cast(end - start).count() << "\n"; 580 | 581 | num_verts_scanner.prepare_gpu_input(num_verts_buffer, total_active); 582 | total_vertices = num_verts_scanner.scan(); 583 | return num_verts_buffer; 584 | } 585 | 586 | void MarchingCubes::compute_vertices(std::shared_ptr &active_voxel_ids, 587 | std::shared_ptr &vertex_offset_buffer, 588 | const uint32_t total_vertices) 589 | { 590 | const size_t total_active = active_voxel_ids->size() / sizeof(uint32_t); 591 | 592 | if (!vertex_buffer || total_vertices * 16 > vertex_buffer->size()) { 593 | vertex_buffer = vkrt::Buffer::device( 594 | *device, 595 | total_vertices * 16, 596 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 597 | } 598 | 599 | vkrt::DescriptorSetUpdater() 600 | .write_ssbo(compute_verts_desc_set, 0, active_voxel_ids) 601 | .write_ssbo(compute_verts_desc_set, 1, vertex_offset_buffer) 602 | .write_ssbo(compute_verts_desc_set, 2, vertex_buffer) 603 | .write_ubo(compute_verts_desc_set, 3, tri_table_buffer) 604 | .update(*device); 605 | 606 | VkCommandBufferBeginInfo begin_info = {}; 607 | begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 608 | begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; 609 | CHECK_VULKAN(vkBeginCommandBuffer(command_buffer, &begin_info)); 610 | 611 | vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_verts_pipeline); 612 | 613 | const std::vector desc_sets = {volume_data_desc_set, 614 | compute_verts_desc_set}; 615 | vkCmdBindDescriptorSets(command_buffer, 616 | VK_PIPELINE_BIND_POINT_COMPUTE, 617 | compute_verts_pipeline_layout, 618 | 0, 619 | 2, 620 | desc_sets.data(), 621 | 0, 622 | nullptr); 623 | vkCmdDispatch(command_buffer, total_active, 1, 1); 624 | 625 | CHECK_VULKAN(vkEndCommandBuffer(command_buffer)); 626 | 627 | VkSubmitInfo submit_info = {}; 628 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 629 | submit_info.commandBufferCount = 1; 630 | submit_info.pCommandBuffers = &command_buffer; 631 | 632 | CHECK_VULKAN(vkResetFences(device->logical_device(), 1, &fence)); 633 | CHECK_VULKAN(vkQueueSubmit(device->graphics_queue(), 1, &submit_info, fence)); 634 | CHECK_VULKAN(vkWaitForFences( 635 | device->logical_device(), 1, &fence, true, std::numeric_limits::max())); 636 | vkResetCommandPool( 637 | device->logical_device(), command_pool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT); 638 | } 639 | -------------------------------------------------------------------------------- /src/compute_marching_cubes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "exclusive_scan.h" 5 | #include "vulkan_utils.h" 6 | 7 | struct MarchingCubes { 8 | static const size_t max_dispatch_size; 9 | 10 | std::shared_ptr device; 11 | 12 | ExclusiveScanner active_voxel_scanner, num_verts_scanner; 13 | 14 | glm::uvec3 volume_dims; 15 | 16 | std::shared_ptr volume_info_buffer, upload_volume_info_buffer, volume_buffer, 17 | active_voxel_buffer, active_voxel_offsets_buffer, vertex_buffer, tri_table_buffer; 18 | 19 | // Scratch buffers which we keep and re-use if the new computation 20 | // fits in the same buffer space 21 | std::shared_ptr active_voxel_ids, num_verts_buffer; 22 | 23 | uint32_t current_total_active = 0; 24 | uint32_t current_aligned_total_active = 0; 25 | 26 | VkDescriptorSetLayout volume_data_layout = VK_NULL_HANDLE; 27 | VkDescriptorSetLayout compute_active_layout = VK_NULL_HANDLE; 28 | VkDescriptorSetLayout stream_compact_layout = VK_NULL_HANDLE; 29 | VkDescriptorSetLayout compute_num_verts_layout = VK_NULL_HANDLE; 30 | VkDescriptorSetLayout compute_verts_layout = VK_NULL_HANDLE; 31 | 32 | VkDescriptorPool desc_pool = VK_NULL_HANDLE; 33 | 34 | VkDescriptorSet volume_data_desc_set = VK_NULL_HANDLE; 35 | VkDescriptorSet compute_active_desc_set = VK_NULL_HANDLE; 36 | VkDescriptorSet stream_compact_desc_set = VK_NULL_HANDLE; 37 | VkDescriptorSet stream_compact_remainder_desc_set = VK_NULL_HANDLE; 38 | VkDescriptorSet compute_num_verts_desc_set = VK_NULL_HANDLE; 39 | VkDescriptorSet compute_verts_desc_set = VK_NULL_HANDLE; 40 | 41 | VkPipelineLayout compute_active_pipeline_layout = VK_NULL_HANDLE; 42 | VkPipelineLayout stream_compact_pipeline_layout = VK_NULL_HANDLE; 43 | VkPipelineLayout compute_num_verts_pipeline_layout = VK_NULL_HANDLE; 44 | VkPipelineLayout compute_verts_pipeline_layout = VK_NULL_HANDLE; 45 | 46 | VkPipeline compute_active_pipeline = VK_NULL_HANDLE; 47 | VkPipeline stream_compact_pipeline = VK_NULL_HANDLE; 48 | VkPipeline compute_num_verts_pipeline = VK_NULL_HANDLE; 49 | VkPipeline compute_verts_pipeline = VK_NULL_HANDLE; 50 | 51 | VkCommandPool command_pool = VK_NULL_HANDLE; 52 | VkCommandBuffer command_buffer = VK_NULL_HANDLE; 53 | VkFence fence = VK_NULL_HANDLE; 54 | 55 | MarchingCubes(std::shared_ptr &device, 56 | void *volume_data, 57 | const glm::uvec3 &volume_dims, 58 | const std::string &volume_type); 59 | 60 | uint32_t compute_surface(const float isovalue); 61 | 62 | private: 63 | uint32_t compute_active_voxels(); 64 | 65 | std::shared_ptr compact_active_voxels(const uint32_t total_active); 66 | 67 | std::shared_ptr compute_num_vertices( 68 | std::shared_ptr &active_voxel_ids, uint32_t &total_vertices); 69 | 70 | void compute_vertices(std::shared_ptr &active_voxel_ids, 71 | std::shared_ptr &vertex_offset_buffer, 72 | const uint32_t total_vertices); 73 | }; 74 | -------------------------------------------------------------------------------- /src/compute_num_verts.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | #extension GL_EXT_scalar_block_layout : require 3 | 4 | #include "compute_vertex_values.comp" 5 | 6 | layout(set = 1, binding = 0, std430) buffer ActiveVoxels { 7 | uint active_voxels[]; 8 | }; 9 | 10 | layout(set = 1, binding = 1, std430) buffer NumVerts { 11 | uint num_verts[]; 12 | }; 13 | 14 | layout(set = 1, binding = 2, scalar) uniform TriTable { 15 | int tri_table[256 * 16]; 16 | }; 17 | 18 | void main(void) { 19 | const uint voxel_id = active_voxels[gl_GlobalInvocationID.x]; 20 | const float values[8] = compute_vertex_values(voxel_id_to_voxel(voxel_id)); 21 | uint case_index = 0; 22 | for (int i = 0; i < 8; ++i) { 23 | if (values[i] <= isovalue) { 24 | case_index |= 1 << i; 25 | } 26 | } 27 | 28 | uint nverts = 0; 29 | // The triangle table gives us the mapping from index to actual 30 | // triangles to return for this configuration 31 | const uint base_index = case_index * 16; 32 | for (uint t = 0; tri_table[base_index + t] != -1; ++t) { 33 | ++nverts; 34 | } 35 | num_verts[gl_GlobalInvocationID.x] = nverts; 36 | } 37 | 38 | -------------------------------------------------------------------------------- /src/compute_vertex_values.comp: -------------------------------------------------------------------------------- 1 | #ifndef COMPUTE_VERTEX_VALUES_COMP 2 | #define COMPUTE_VERTEX_VALUES_COMP 3 | 4 | #define TRI_TABLE_SIZE 256 5 | 6 | layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; 7 | 8 | layout(set = 0, binding = 0, std430) buffer Volume { 9 | VOLUME_DTYPE volume[]; 10 | }; 11 | 12 | layout(set = 0, binding = 1, std140) uniform VolumeInfo { 13 | uvec4 volume_dims; 14 | float isovalue; 15 | }; 16 | 17 | const ivec3[8] index_to_vertex = { 18 | ivec3(0, 0, 0), 19 | ivec3(1, 0, 0), 20 | ivec3(1, 1, 0), 21 | ivec3(0, 1, 0), 22 | ivec3(0, 0, 1), 23 | ivec3(1, 0, 1), 24 | ivec3(1, 1, 1), 25 | ivec3(0, 1, 1) 26 | }; 27 | 28 | uvec3 voxel_id_to_voxel(uint id) { 29 | return uvec3(id % (volume_dims[0] - 1), 30 | (id / (volume_dims[0] - 1)) % (volume_dims[1] - 1), 31 | id / ((volume_dims[0] - 1) * (volume_dims[1] - 1))); 32 | } 33 | 34 | float[8] compute_vertex_values(uvec3 voxel_pos) { 35 | float values[8]; 36 | for (int i = 0; i < 8; ++i) { 37 | const uvec3 v = index_to_vertex[i]; 38 | 39 | uint voxel = ((voxel_pos.z + v.z) * volume_dims.y + voxel_pos.y + v.y) * volume_dims.x 40 | + voxel_pos.x + v.x; 41 | #if UINT8_VOLUME == 1 42 | uint subbit = (voxel % 4) * 8; 43 | voxel = voxel / 4; 44 | values[i] = (volume[voxel] & (0x000000ff << subbit)) >> subbit; 45 | #elif UINT16_VOLUME == 1 46 | uint subbit = (voxel % 2) * 16; 47 | voxel = voxel / 2; 48 | values[i] = (volume[voxel] & (0x0000ffff << subbit)) >> subbit; 49 | #else 50 | values[i] = volume[voxel]; 51 | #endif 52 | } 53 | return values; 54 | } 55 | 56 | #endif 57 | 58 | -------------------------------------------------------------------------------- /src/compute_vertices.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | #extension GL_EXT_scalar_block_layout : require 3 | 4 | #include "compute_vertex_values.comp" 5 | 6 | const int[12][2] edge_vertices = { 7 | {0, 1}, 8 | {1, 2}, 9 | {2, 3}, 10 | {3, 0}, 11 | {4, 5}, 12 | {6, 5}, 13 | {6, 7}, 14 | {7, 4}, 15 | {0, 4}, 16 | {1, 5}, 17 | {2, 6}, 18 | {3, 7}, 19 | }; 20 | 21 | layout(set = 1, binding = 0, std430) buffer ActiveVoxels { 22 | uint active_voxels[]; 23 | }; 24 | 25 | layout(set = 1, binding = 1, std430) buffer VertexOffsets { 26 | uint vertex_offsets[]; 27 | }; 28 | 29 | layout(set = 1, binding = 2, std430) buffer Vertices { 30 | vec4 verts[]; 31 | }; 32 | 33 | layout(set = 1, binding = 3, scalar) uniform TriTable { 34 | int tri_table[256 * 16]; 35 | }; 36 | 37 | vec3 lerp_verts(const ivec3 va, const ivec3 vb, const float fa, const float fb) { 38 | float t = 0; 39 | if (abs(fa - fb) < 0.001) { 40 | t = 0.0; 41 | } else { 42 | t = (isovalue - fa) / (fb - fa); 43 | } 44 | return vec3(va[0] + t * (vb[0] - va[0]), 45 | va[1] + t * (vb[1] - va[1]), 46 | va[2] + t * (vb[2] - va[2])); 47 | } 48 | 49 | void main(void) { 50 | const uint voxel_id = active_voxels[gl_GlobalInvocationID.x]; 51 | const uvec3 voxel_pos = voxel_id_to_voxel(voxel_id); 52 | const float values[8] = compute_vertex_values(voxel_pos); 53 | uint case_index = 0; 54 | for (int i = 0; i < 8; ++i) { 55 | if (values[i] <= isovalue) { 56 | case_index |= 1 << i; 57 | } 58 | } 59 | 60 | uint vertex_offset = vertex_offsets[gl_GlobalInvocationID.x]; 61 | // The triangle table gives us the mapping from index to actual 62 | // triangles to return for this configuration 63 | const uint base_index = case_index * 16; 64 | for (uint t = 0; tri_table[base_index + t] != -1; ++t) { 65 | const uint eidx = tri_table[base_index + t]; 66 | const uint v0 = edge_vertices[eidx][0]; 67 | const uint v1 = edge_vertices[eidx][1]; 68 | 69 | const vec3 v = lerp_verts(index_to_vertex[v0], index_to_vertex[v1], 70 | values[v0], values[v1]); 71 | 72 | verts[vertex_offset + t] = vec4( 73 | v.x + voxel_pos.x + 0.5, 74 | v.y + voxel_pos.y + 0.5, 75 | v.z + voxel_pos.z + 0.5, 76 | 1.0); 77 | } 78 | } 79 | 80 | -------------------------------------------------------------------------------- /src/exclusive_scan.cpp: -------------------------------------------------------------------------------- 1 | #include "exclusive_scan.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "scan_shaders_embedded_spv.h" 7 | #include "util.h" 8 | 9 | const size_t ExclusiveScanner::block_size = BLOCK_SIZE; 10 | const size_t ExclusiveScanner::workgroup_size = BLOCK_SIZE / 2; 11 | const size_t ExclusiveScanner::max_scan_size = BLOCK_SIZE * BLOCK_SIZE; 12 | 13 | ExclusiveScanner::ExclusiveScanner(std::shared_ptr &device) : device(device) 14 | { 15 | // Make descriptor sets 16 | scan_blocks_layout = 17 | vkrt::DescriptorSetLayoutBuilder() 18 | .add_binding( 19 | 0, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, VK_SHADER_STAGE_COMPUTE_BIT) 20 | .add_binding(1, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 21 | .build(*device); 22 | 23 | scan_block_results_layout = 24 | vkrt::DescriptorSetLayoutBuilder() 25 | .add_binding(0, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 26 | .add_binding(1, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 27 | .build(*device); 28 | 29 | // Make pipelines for scan/add blocks pipelines 30 | vkrt::make_basic_compute_pipeline(prefix_sum_spv, 31 | sizeof(prefix_sum_spv), 32 | {scan_blocks_layout}, 33 | *device, 34 | scan_blocks_pipeline_layout, 35 | scan_blocks_pipeline); 36 | 37 | vkrt::make_basic_compute_pipeline(add_block_sums_spv, 38 | sizeof(add_block_sums_spv), 39 | {scan_blocks_layout}, 40 | *device, 41 | scan_blocks_pipeline_layout, 42 | add_block_sums_pipeline); 43 | 44 | vkrt::make_basic_compute_pipeline(block_prefix_sum_spv, 45 | sizeof(block_prefix_sum_spv), 46 | {scan_block_results_layout}, 47 | *device, 48 | scan_block_results_pipeline_layout, 49 | scan_block_results_pipeline); 50 | 51 | const std::vector pool_sizes = { 52 | VkDescriptorPoolSize{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC, 2}, 53 | VkDescriptorPoolSize{VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 4}}; 54 | 55 | VkDescriptorPoolCreateInfo pool_create_info = {}; 56 | pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; 57 | pool_create_info.maxSets = 3; 58 | pool_create_info.poolSizeCount = pool_sizes.size(); 59 | pool_create_info.pPoolSizes = pool_sizes.data(); 60 | CHECK_VULKAN(vkCreateDescriptorPool( 61 | device->logical_device(), &pool_create_info, nullptr, &desc_pool)); 62 | 63 | VkDescriptorSetAllocateInfo alloc_info = {}; 64 | alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; 65 | alloc_info.pSetLayouts = &scan_blocks_layout; 66 | alloc_info.descriptorPool = desc_pool; 67 | alloc_info.descriptorSetCount = 1; 68 | CHECK_VULKAN(vkAllocateDescriptorSets( 69 | device->logical_device(), &alloc_info, &scan_blocks_desc_set)); 70 | CHECK_VULKAN(vkAllocateDescriptorSets( 71 | device->logical_device(), &alloc_info, &scan_blocks_remainder_desc_set)); 72 | 73 | alloc_info.pSetLayouts = &scan_block_results_layout; 74 | CHECK_VULKAN(vkAllocateDescriptorSets( 75 | device->logical_device(), &alloc_info, &scan_block_results_desc_set)); 76 | 77 | readback_buffer = vkrt::Buffer::host(*device, 4, VK_BUFFER_USAGE_TRANSFER_DST_BIT); 78 | 79 | block_sum_buffer = vkrt::Buffer::device( 80 | *device, 81 | block_size * 4, 82 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); 83 | 84 | carry_buffer = vkrt::Buffer::device(*device, 85 | 8, 86 | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 87 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | 88 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); 89 | 90 | clearcarry_buffer = vkrt::Buffer::host(*device, 8, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 91 | std::memset(clearcarry_buffer->map(), 0, clearcarry_buffer->size()); 92 | clearcarry_buffer->unmap(); 93 | 94 | command_pool = device->make_command_pool(VK_COMMAND_POOL_CREATE_TRANSIENT_BIT); 95 | { 96 | VkCommandBufferAllocateInfo info = {}; 97 | info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; 98 | info.commandPool = command_pool; 99 | info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; 100 | info.commandBufferCount = 1; 101 | CHECK_VULKAN( 102 | vkAllocateCommandBuffers(device->logical_device(), &info, &command_buffer)); 103 | } 104 | { 105 | VkFenceCreateInfo info = {}; 106 | info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; 107 | CHECK_VULKAN(vkCreateFence(device->logical_device(), &info, nullptr, &fence)); 108 | } 109 | } 110 | 111 | size_t ExclusiveScanner::get_aligned_size(size_t size) 112 | { 113 | return align_to(size, block_size); 114 | } 115 | 116 | void ExclusiveScanner::prepare_input(const std::vector &array) 117 | { 118 | auto upload_input = vkrt::Buffer::host( 119 | *device, array.size() * sizeof(uint32_t), VK_BUFFER_USAGE_TRANSFER_SRC_BIT); 120 | std::memcpy(upload_input->map(), array.data(), upload_input->size()); 121 | upload_input->unmap(); 122 | 123 | auto gpu_input = vkrt::Buffer::device(*device, 124 | get_aligned_size(array.size()) * sizeof(uint32_t), 125 | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 126 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | 127 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); 128 | 129 | VkCommandBufferBeginInfo begin_info = {}; 130 | begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 131 | begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; 132 | CHECK_VULKAN(vkBeginCommandBuffer(command_buffer, &begin_info)); 133 | 134 | VkBufferCopy copy_cmd = {}; 135 | copy_cmd.size = upload_input->size(); 136 | vkCmdCopyBuffer(command_buffer, upload_input->handle(), gpu_input->handle(), 1, ©_cmd); 137 | CHECK_VULKAN(vkEndCommandBuffer(command_buffer)); 138 | 139 | VkSubmitInfo submit_info = {}; 140 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 141 | submit_info.commandBufferCount = 1; 142 | submit_info.pCommandBuffers = &command_buffer; 143 | CHECK_VULKAN(vkQueueSubmit(device->graphics_queue(), 1, &submit_info, VK_NULL_HANDLE)); 144 | CHECK_VULKAN(vkQueueWaitIdle(device->graphics_queue())); 145 | 146 | vkResetCommandPool( 147 | device->logical_device(), command_pool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT); 148 | 149 | prepare_gpu_input(gpu_input, array.size()); 150 | } 151 | 152 | void ExclusiveScanner::prepare_gpu_input(std::shared_ptr &buffer, 153 | size_t data_size) 154 | { 155 | if (get_aligned_size(buffer->size()) != buffer->size()) { 156 | throw std::runtime_error( 157 | "Buffer size must be aligned via ExclusiveScanner::get_aligned_size"); 158 | } 159 | 160 | input_buffer = buffer; 161 | const size_t scan_num_elements = buffer->size() / 4; 162 | 163 | // Write the descriptor sets (aka, the WebGPU bind groups) 164 | vkrt::DescriptorSetUpdater() 165 | .write_ssbo_dynamic(scan_blocks_desc_set, 166 | 0, 167 | input_buffer, 168 | 0, 169 | 4 * std::min(max_scan_size, scan_num_elements)) 170 | .write_ssbo(scan_blocks_desc_set, 1, block_sum_buffer) 171 | .write_ssbo_dynamic(scan_blocks_remainder_desc_set, 172 | 0, 173 | input_buffer, 174 | 0, 175 | 4 * (scan_num_elements % max_scan_size)) 176 | .write_ssbo(scan_blocks_remainder_desc_set, 1, block_sum_buffer) 177 | .write_ssbo(scan_block_results_desc_set, 0, block_sum_buffer) 178 | .write_ssbo(scan_block_results_desc_set, 1, carry_buffer) 179 | .update(*device); 180 | 181 | const size_t num_chunks = std::ceil(static_cast(scan_num_elements) / max_scan_size); 182 | std::vector offsets; 183 | for (size_t i = 0; i < num_chunks; ++i) { 184 | offsets.push_back(i * max_scan_size * 4); 185 | } 186 | 187 | // Build the command buffer 188 | VkCommandBufferBeginInfo begin_info = {}; 189 | begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 190 | begin_info.flags = 0; 191 | CHECK_VULKAN(vkBeginCommandBuffer(command_buffer, &begin_info)); 192 | // Clear the carry buffer and readback sum entry 193 | { 194 | VkBufferCopy copy_cmd = {}; 195 | copy_cmd.size = clearcarry_buffer->size(); 196 | vkCmdCopyBuffer( 197 | command_buffer, clearcarry_buffer->handle(), carry_buffer->handle(), 1, ©_cmd); 198 | } 199 | if (data_size < scan_num_elements) { 200 | VkBufferCopy copy_cmd = {}; 201 | copy_cmd.size = 4; 202 | copy_cmd.dstOffset = data_size * 4; 203 | vkCmdCopyBuffer( 204 | command_buffer, clearcarry_buffer->handle(), input_buffer->handle(), 1, ©_cmd); 205 | } 206 | 207 | VkMemoryBarrier barrier = {}; 208 | barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; 209 | barrier.srcAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; 210 | barrier.dstAccessMask = barrier.srcAccessMask; 211 | for (size_t i = 0; i < num_chunks; ++i) { 212 | const uint32_t num_work_groups = 213 | std::min((scan_num_elements - i * max_scan_size) / block_size, size_t(block_size)); 214 | vkCmdBindPipeline( 215 | command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, scan_blocks_pipeline); 216 | 217 | VkDescriptorSet scan_desc_set = scan_blocks_desc_set; 218 | if (num_work_groups < max_scan_size / block_size) { 219 | scan_desc_set = scan_blocks_remainder_desc_set; 220 | } 221 | 222 | vkCmdBindDescriptorSets(command_buffer, 223 | VK_PIPELINE_BIND_POINT_COMPUTE, 224 | scan_blocks_pipeline_layout, 225 | 0, 226 | 1, 227 | &scan_desc_set, 228 | 1, 229 | &offsets[i]); 230 | vkCmdDispatch(command_buffer, num_work_groups, 1, 1); 231 | // Queue a barrier for the pass to finish 232 | vkCmdPipelineBarrier(command_buffer, 233 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 234 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 235 | 0, 236 | 1, 237 | &barrier, 238 | 0, 239 | nullptr, 240 | 0, 241 | nullptr); 242 | 243 | vkCmdBindPipeline( 244 | command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, scan_block_results_pipeline); 245 | vkCmdBindDescriptorSets(command_buffer, 246 | VK_PIPELINE_BIND_POINT_COMPUTE, 247 | scan_block_results_pipeline_layout, 248 | 0, 249 | 1, 250 | &scan_block_results_desc_set, 251 | 0, 252 | nullptr); 253 | vkCmdDispatch(command_buffer, 1, 1, 1); 254 | // Queue a barrier for the pass to finish 255 | vkCmdPipelineBarrier(command_buffer, 256 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 257 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 258 | 0, 259 | 1, 260 | &barrier, 261 | 0, 262 | nullptr, 263 | 0, 264 | nullptr); 265 | 266 | vkCmdBindPipeline( 267 | command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, add_block_sums_pipeline); 268 | vkCmdBindDescriptorSets(command_buffer, 269 | VK_PIPELINE_BIND_POINT_COMPUTE, 270 | scan_blocks_pipeline_layout, 271 | 0, 272 | 1, 273 | &scan_desc_set, 274 | 1, 275 | &offsets[i]); 276 | vkCmdDispatch(command_buffer, num_work_groups, 1, 1); 277 | // Queue a barrier for the pass to finish 278 | vkCmdPipelineBarrier(command_buffer, 279 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 280 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 281 | 0, 282 | 1, 283 | &barrier, 284 | 0, 285 | nullptr, 286 | 0, 287 | nullptr); 288 | 289 | // Update the carry buffer 290 | VkBufferCopy copy_cmd = {}; 291 | copy_cmd.size = 4; 292 | copy_cmd.srcOffset = 4; 293 | copy_cmd.dstOffset = 0; 294 | vkCmdCopyBuffer( 295 | command_buffer, carry_buffer->handle(), carry_buffer->handle(), 1, ©_cmd); 296 | } 297 | vkCmdPipelineBarrier(command_buffer, 298 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 299 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 300 | 0, 301 | 1, 302 | &barrier, 303 | 0, 304 | nullptr, 305 | 0, 306 | nullptr); 307 | // Readback the last element to return the total sum 308 | if (data_size < scan_num_elements) { 309 | VkBufferCopy copy_cmd = {}; 310 | copy_cmd.size = 4; 311 | copy_cmd.srcOffset = data_size * 4; 312 | vkCmdCopyBuffer( 313 | command_buffer, input_buffer->handle(), readback_buffer->handle(), 1, ©_cmd); 314 | } else { 315 | VkBufferCopy copy_cmd = {}; 316 | copy_cmd.size = 4; 317 | copy_cmd.srcOffset = 4; 318 | vkCmdCopyBuffer( 319 | command_buffer, carry_buffer->handle(), readback_buffer->handle(), 1, ©_cmd); 320 | } 321 | CHECK_VULKAN(vkEndCommandBuffer(command_buffer)); 322 | } 323 | 324 | uint32_t ExclusiveScanner::scan() 325 | { 326 | using namespace std::chrono; 327 | auto start = steady_clock::now(); 328 | CHECK_VULKAN(vkResetFences(device->logical_device(), 1, &fence)); 329 | VkSubmitInfo submit_info = {}; 330 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 331 | submit_info.commandBufferCount = 1; 332 | submit_info.pCommandBuffers = &command_buffer; 333 | CHECK_VULKAN(vkQueueSubmit(device->graphics_queue(), 1, &submit_info, fence)); 334 | CHECK_VULKAN(vkWaitForFences( 335 | device->logical_device(), 1, &fence, true, std::numeric_limits::max())); 336 | auto end = steady_clock::now(); 337 | std::cout << "Scan took " << duration_cast(end - start).count() << "ms\n"; 338 | 339 | uint32_t sum = 0; 340 | std::memcpy(&sum, readback_buffer->map(), 4); 341 | readback_buffer->unmap(); 342 | 343 | return sum; 344 | } 345 | 346 | uint32_t serial_exclusive_scan(const std::vector &input, 347 | std::vector &output) 348 | { 349 | output.resize(input.size(), 0); 350 | output[0] = 0; 351 | for (size_t i = 1; i < input.size(); ++i) { 352 | output[i] = input[i - 1] + output[i - 1]; 353 | } 354 | return output[output.size() - 1] + input[input.size() - 1]; 355 | } 356 | -------------------------------------------------------------------------------- /src/exclusive_scan.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "vulkan_utils.h" 5 | 6 | struct ExclusiveScanner { 7 | static const size_t block_size; 8 | static const size_t workgroup_size; 9 | static const size_t max_scan_size; 10 | 11 | std::shared_ptr device; 12 | std::shared_ptr input_buffer, block_sum_buffer, readback_buffer, 13 | carry_buffer, clearcarry_buffer; 14 | 15 | VkDescriptorSetLayout scan_blocks_layout = VK_NULL_HANDLE; 16 | VkDescriptorSetLayout scan_block_results_layout = VK_NULL_HANDLE; 17 | 18 | VkPipelineLayout scan_blocks_pipeline_layout = VK_NULL_HANDLE; 19 | VkPipelineLayout scan_block_results_pipeline_layout = VK_NULL_HANDLE; 20 | 21 | VkPipeline scan_blocks_pipeline = VK_NULL_HANDLE; 22 | VkPipeline scan_block_results_pipeline = VK_NULL_HANDLE; 23 | VkPipeline add_block_sums_pipeline = VK_NULL_HANDLE; 24 | 25 | VkDescriptorPool desc_pool = VK_NULL_HANDLE; 26 | 27 | VkDescriptorSet scan_blocks_desc_set = VK_NULL_HANDLE; 28 | VkDescriptorSet scan_blocks_remainder_desc_set = VK_NULL_HANDLE; 29 | VkDescriptorSet scan_block_results_desc_set = VK_NULL_HANDLE; 30 | 31 | VkCommandPool command_pool = VK_NULL_HANDLE; 32 | VkCommandBuffer command_buffer = VK_NULL_HANDLE; 33 | 34 | VkFence fence = VK_NULL_HANDLE; 35 | 36 | ExclusiveScanner(std::shared_ptr &device); 37 | 38 | size_t get_aligned_size(size_t size); 39 | 40 | void prepare_input(const std::vector &array); 41 | 42 | void prepare_gpu_input(std::shared_ptr &buffer, size_t data_size); 43 | 44 | uint32_t scan(); 45 | }; 46 | 47 | uint32_t serial_exclusive_scan(const std::vector &input, 48 | std::vector &output); 49 | -------------------------------------------------------------------------------- /src/marching_cubes.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "compute_marching_cubes.h" 7 | #include "exclusive_scan.h" 8 | #include "vulkan_utils.h" 9 | 10 | int main(int argc, char **argv) 11 | { 12 | if (argc < 3) { 13 | std::cerr << "Usage: " << argv[0] << " [output.obj]\n"; 14 | return 1; 15 | } 16 | 17 | const std::string file = argv[1]; 18 | const float isovalue = std::stof(argv[2]); 19 | const std::regex match_filename("(\\w+)_(\\d+)x(\\d+)x(\\d+)_(.+)\\.raw"); 20 | auto matches = std::sregex_iterator(file.begin(), file.end(), match_filename); 21 | if (matches == std::sregex_iterator() || matches->size() != 6) { 22 | std::cerr << "Unrecognized raw volume naming scheme, expected a format like: " 23 | << "'_xx_.raw' but '" << file << "' did not match" 24 | << std::endl; 25 | throw std::runtime_error("Invalaid raw file naming scheme"); 26 | } 27 | std::string output; 28 | if (argc == 4) { 29 | output = argv[3]; 30 | } 31 | 32 | const glm::uvec3 volume_dims( 33 | std::stoi((*matches)[2]), std::stoi((*matches)[3]), std::stoi((*matches)[4])); 34 | const std::string volume_type = (*matches)[5]; 35 | 36 | size_t voxel_size = 0; 37 | #if UINT8_VOLUME 38 | if (volume_type == "uint8") { 39 | voxel_size = 1; 40 | } 41 | #endif 42 | #if UINT16_VOLUME 43 | if (volume_type == "uint16") { 44 | voxel_size = 2; 45 | } 46 | #endif 47 | #if UINT32_VOLUME 48 | if (volume_type == "uint32") { 49 | voxel_size = 4; 50 | } 51 | #endif 52 | #if FLOAT32_VOLUME 53 | if (volume_type == "float32") { 54 | voxel_size = 4; 55 | } 56 | #endif 57 | if (voxel_size == 0) { 58 | std::cout << "Volume type '" << volume_type 59 | << "' support was not built, please recompile" << std::endl; 60 | throw std::runtime_error("Rebuild with " + volume_type + " support"); 61 | } 62 | const size_t volume_bytes = 63 | size_t(volume_dims.x) * size_t(volume_dims.y) * size_t(volume_dims.z) * voxel_size; 64 | std::vector volume_data(volume_bytes, 0); 65 | std::ifstream fin(file.c_str(), std::ios::binary); 66 | if (!fin) { 67 | std::cerr << "Failed to open " << file << "\n"; 68 | return 1; 69 | } 70 | if (!fin.read(reinterpret_cast(volume_data.data()), volume_bytes)) { 71 | std::cerr << "Failed to read volume data\n"; 72 | return 1; 73 | } 74 | 75 | std::shared_ptr device = std::make_shared(); 76 | 77 | MarchingCubes marching_cubes(device, volume_data.data(), volume_dims, volume_type); 78 | 79 | using namespace std::chrono; 80 | uint32_t num_verts = 0; 81 | size_t total_time = 0; 82 | const size_t num_iters = 10; 83 | for (size_t i = 0; i < num_iters; ++i) { 84 | auto start = steady_clock::now(); 85 | num_verts = marching_cubes.compute_surface(isovalue); 86 | auto end = steady_clock::now(); 87 | std::cout << "Extraction of surface w/ " << num_verts / 3 << " triangles took " 88 | << duration_cast(end - start).count() << "ms\n"; 89 | total_time += duration_cast(end - start).count(); 90 | } 91 | std::cout << "Avg. time " << static_cast(total_time) / num_iters << "ms\n"; 92 | 93 | if (num_verts == 0 || output.empty()) { 94 | return 0; 95 | } 96 | 97 | VkCommandPool command_pool = 98 | device->make_command_pool(VK_COMMAND_POOL_CREATE_TRANSIENT_BIT); 99 | VkCommandBuffer command_buffer; 100 | { 101 | VkCommandBufferAllocateInfo info = {}; 102 | info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; 103 | info.commandPool = command_pool; 104 | info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; 105 | info.commandBufferCount = 1; 106 | CHECK_VULKAN( 107 | vkAllocateCommandBuffers(device->logical_device(), &info, &command_buffer)); 108 | } 109 | 110 | auto readback_verts = vkrt::Buffer::host( 111 | *device, marching_cubes.vertex_buffer->size(), VK_BUFFER_USAGE_TRANSFER_DST_BIT); 112 | 113 | VkCommandBufferBeginInfo begin_info = {}; 114 | begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 115 | begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; 116 | CHECK_VULKAN(vkBeginCommandBuffer(command_buffer, &begin_info)); 117 | 118 | { 119 | VkBufferCopy copy_cmd = {}; 120 | copy_cmd.size = marching_cubes.vertex_buffer->size(); 121 | vkCmdCopyBuffer(command_buffer, 122 | marching_cubes.vertex_buffer->handle(), 123 | readback_verts->handle(), 124 | 1, 125 | ©_cmd); 126 | } 127 | 128 | CHECK_VULKAN(vkEndCommandBuffer(command_buffer)); 129 | 130 | VkSubmitInfo submit_info = {}; 131 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 132 | submit_info.commandBufferCount = 1; 133 | submit_info.pCommandBuffers = &command_buffer; 134 | CHECK_VULKAN(vkQueueSubmit(device->graphics_queue(), 1, &submit_info, VK_NULL_HANDLE)); 135 | CHECK_VULKAN(vkQueueWaitIdle(device->graphics_queue())); 136 | 137 | glm::vec4 *vertices = reinterpret_cast(readback_verts->map()); 138 | std::ofstream fout(output.c_str()); 139 | fout << "# Isosurface of " << file << " at isovalue " << isovalue << " (" << num_verts / 3 140 | << " triangles)\n"; 141 | for (size_t i = 0; i < num_verts; ++i) { 142 | fout << "v " << vertices[i].x << " " << vertices[i].y << " " << vertices[i].z << "\n"; 143 | } 144 | for (size_t i = 0; i < num_verts; i += 3) { 145 | fout << "f " << i + 1 << " " << i + 2 << " " << i + 3 << "\n"; 146 | } 147 | 148 | readback_verts->unmap(); 149 | 150 | return 0; 151 | } 152 | -------------------------------------------------------------------------------- /src/prefix_sum.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | // See https://www.eecs.umich.edu/courses/eecs570/hw/parprefix.pdf 4 | 5 | layout(local_size_x = BLOCK_SIZE / 2) in; 6 | 7 | layout(set = 0, binding = 0, std430) buffer Data { 8 | uint vals[]; 9 | }; 10 | 11 | layout(set = 0, binding = 1, std430) buffer BlockSums { 12 | uint block_sums[]; 13 | }; 14 | 15 | shared uint chunk[BLOCK_SIZE]; 16 | 17 | void main(void) { 18 | chunk[2 * gl_LocalInvocationID.x] = vals[2 * gl_GlobalInvocationID.x]; 19 | chunk[2 * gl_LocalInvocationID.x + 1] = vals[2 * gl_GlobalInvocationID.x + 1]; 20 | 21 | uint offs = 1; 22 | // Reduce step up tree 23 | for (int d = BLOCK_SIZE >> 1; d > 0; d = d >> 1) { 24 | barrier(); 25 | if (gl_LocalInvocationID.x < d) { 26 | uint a = offs * (2 * gl_LocalInvocationID.x + 1) - 1; 27 | uint b = offs * (2 * gl_LocalInvocationID.x + 2) - 1; 28 | chunk[b] += chunk[a]; 29 | } 30 | offs = offs << 1; 31 | } 32 | 33 | if (gl_LocalInvocationID.x == 0) { 34 | block_sums[gl_WorkGroupID.x] = chunk[BLOCK_SIZE - 1]; 35 | chunk[BLOCK_SIZE - 1] = 0; 36 | } 37 | 38 | // Sweep down the tree to finish the scan 39 | for (int d = 1; d < BLOCK_SIZE; d = d << 1) { 40 | offs = offs >> 1; 41 | barrier(); 42 | if (gl_LocalInvocationID.x < d) { 43 | uint a = offs * (2 * gl_LocalInvocationID.x + 1) - 1; 44 | uint b = offs * (2 * gl_LocalInvocationID.x + 2) - 1; 45 | const uint tmp = chunk[a]; 46 | chunk[a] = chunk[b]; 47 | chunk[b] += tmp; 48 | } 49 | } 50 | 51 | barrier(); 52 | vals[2 * gl_GlobalInvocationID.x] = chunk[2 * gl_LocalInvocationID.x]; 53 | vals[2 * gl_GlobalInvocationID.x + 1] = chunk[2 * gl_LocalInvocationID.x + 1]; 54 | } 55 | 56 | -------------------------------------------------------------------------------- /src/stream_compact.comp: -------------------------------------------------------------------------------- 1 | #version 450 core 2 | 3 | layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; 4 | 5 | layout(set = 0, binding = 0) buffer Input { 6 | uint inputs[]; 7 | }; 8 | 9 | layout(set = 0, binding = 1) buffer Offsets { 10 | uint offsets[]; 11 | }; 12 | 13 | // The compaction execution offset chunk we're running 14 | layout(set = 0, binding = 2) uniform CompactionOffset { 15 | uint compact_offset; 16 | }; 17 | 18 | layout(set = 0, binding = 3) buffer Output { 19 | uint outputs[]; 20 | }; 21 | 22 | void main(void) { 23 | // Note: this is just for compacting down id's of "active" elements, so 0's are inherently 24 | // things we don't want to output. 25 | if (inputs[gl_GlobalInvocationID.x] != 0) { 26 | outputs[offsets[gl_GlobalInvocationID.x]] = gl_GlobalInvocationID.x + compact_offset; 27 | } 28 | } 29 | 30 | -------------------------------------------------------------------------------- /src/tri_table.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define TRI_TABLE_SIZE 256 4 | 5 | const int tri_table[256 * 16] = { 6 | -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 3, -1, 0, 0, 0, 7 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 9, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 | 0, 0, 8, 1, 9, 8, 3, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 10, 1, -1, 0, 9 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 3, 1, 2, 10, -1, 0, 0, 0, 0, 0, 10 | 0, 0, 0, 0, 9, 2, 10, 9, 0, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11 | 3, 10, 8, 8, 10, 9, -1, 0, 0, 0, 0, 0, 0, 2, 3, 11, -1, 0, 0, 0, 0, 0, 0, 12 | 0, 0, 0, 0, 0, 0, 11, 0, 8, 11, 2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 13 | 9, 0, 2, 3, 11, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 9, 2, 9, 11, 11, 9, 14 | 8, -1, 0, 0, 0, 0, 0, 0, 3, 10, 1, 3, 11, 10, -1, 0, 0, 0, 0, 0, 0, 0, 0, 15 | 0, 1, 0, 8, 1, 8, 10, 10, 8, 11, -1, 0, 0, 0, 0, 0, 0, 0, 3, 11, 0, 11, 9, 16 | 9, 11, 10, -1, 0, 0, 0, 0, 0, 0, 11, 10, 9, 11, 9, 8, -1, 0, 0, 0, 0, 0, 0, 17 | 0, 0, 0, 4, 7, 8, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 0, 4, 18 | 7, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 7, 8, 9, 0, 1, -1, 0, 0, 0, 0, 19 | 0, 0, 0, 0, 0, 9, 4, 7, 9, 7, 1, 1, 7, 3, -1, 0, 0, 0, 0, 0, 0, 4, 7, 20 | 8, 1, 2, 10, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 0, 4, 7, 3, 2, 10, 1, 21 | -1, 0, 0, 0, 0, 0, 0, 2, 9, 0, 2, 10, 9, 4, 7, 8, -1, 0, 0, 0, 0, 0, 0, 22 | 3, 2, 7, 7, 9, 4, 7, 2, 9, 9, 2, 10, -1, 0, 0, 0, 8, 4, 7, 3, 11, 2, -1, 23 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 11, 2, 7, 2, 4, 4, 2, 0, -1, 0, 0, 0, 0, 24 | 0, 0, 2, 3, 11, 1, 9, 0, 8, 4, 7, -1, 0, 0, 0, 0, 0, 0, 2, 1, 9, 2, 9, 25 | 4, 2, 4, 11, 11, 4, 7, -1, 0, 0, 0, 10, 3, 11, 10, 1, 3, 8, 4, 7, -1, 0, 0, 26 | 0, 0, 0, 0, 4, 7, 0, 0, 10, 1, 7, 10, 0, 7, 11, 10, -1, 0, 0, 0, 8, 4, 7, 27 | 0, 3, 11, 0, 11, 9, 9, 11, 10, -1, 0, 0, 0, 7, 9, 4, 7, 11, 9, 9, 11, 10, -1, 28 | 0, 0, 0, 0, 0, 0, 4, 9, 5, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 29 | 3, 0, 4, 9, 5, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 0, 1, 5, -1, 0, 30 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 8, 3, 4, 3, 5, 5, 3, 1, -1, 0, 0, 0, 0, 0, 31 | 0, 1, 2, 10, 9, 5, 4, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 9, 5, 8, 3, 0, 32 | 1, 2, 10, -1, 0, 0, 0, 0, 0, 0, 10, 5, 4, 10, 4, 2, 2, 4, 0, -1, 0, 0, 0, 33 | 0, 0, 0, 4, 8, 3, 4, 3, 2, 4, 2, 5, 5, 2, 10, -1, 0, 0, 0, 2, 3, 11, 5, 34 | 4, 9, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 8, 11, 2, 0, 9, 5, 4, -1, 0, 35 | 0, 0, 0, 0, 0, 5, 0, 1, 5, 4, 0, 3, 11, 2, -1, 0, 0, 0, 0, 0, 0, 11, 2, 36 | 8, 8, 5, 4, 2, 5, 8, 2, 1, 5, -1, 0, 0, 0, 3, 10, 1, 3, 11, 10, 5, 4, 9, 37 | -1, 0, 0, 0, 0, 0, 0, 9, 5, 4, 1, 0, 8, 1, 8, 10, 10, 8, 11, -1, 0, 0, 0, 38 | 10, 5, 11, 11, 0, 3, 11, 5, 0, 0, 5, 4, -1, 0, 0, 0, 4, 10, 5, 4, 8, 10, 10, 39 | 8, 11, -1, 0, 0, 0, 0, 0, 0, 7, 9, 5, 7, 8, 9, -1, 0, 0, 0, 0, 0, 0, 0, 40 | 0, 0, 0, 9, 5, 0, 5, 3, 3, 5, 7, -1, 0, 0, 0, 0, 0, 0, 8, 0, 1, 8, 1, 41 | 7, 7, 1, 5, -1, 0, 0, 0, 0, 0, 0, 3, 1, 5, 3, 5, 7, -1, 0, 0, 0, 0, 0, 42 | 0, 0, 0, 0, 7, 9, 5, 7, 8, 9, 1, 2, 10, -1, 0, 0, 0, 0, 0, 0, 1, 2, 10, 43 | 0, 9, 5, 0, 5, 3, 3, 5, 7, -1, 0, 0, 0, 7, 8, 5, 5, 2, 10, 8, 2, 5, 8, 44 | 0, 2, -1, 0, 0, 0, 10, 3, 2, 10, 5, 3, 3, 5, 7, -1, 0, 0, 0, 0, 0, 0, 9, 45 | 7, 8, 9, 5, 7, 11, 2, 3, -1, 0, 0, 0, 0, 0, 0, 0, 9, 2, 2, 7, 11, 2, 9, 46 | 7, 7, 9, 5, -1, 0, 0, 0, 3, 11, 2, 8, 0, 1, 8, 1, 7, 7, 1, 5, -1, 0, 0, 47 | 0, 2, 7, 11, 2, 1, 7, 7, 1, 5, -1, 0, 0, 0, 0, 0, 0, 11, 1, 3, 11, 10, 1, 48 | 7, 8, 9, 7, 9, 5, -1, 0, 0, 0, 11, 10, 1, 11, 1, 7, 7, 1, 0, 7, 0, 9, 7, 49 | 9, 5, -1, 5, 7, 8, 5, 8, 10, 10, 8, 0, 10, 0, 3, 10, 3, 11, -1, 11, 10, 5, 11, 50 | 5, 7, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 6, 5, -1, 0, 0, 0, 0, 0, 0, 0, 51 | 0, 0, 0, 0, 0, 0, 8, 3, 10, 6, 5, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 52 | 1, 5, 10, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 1, 9, 8, 3, 1, 10, 6, 5, 53 | -1, 0, 0, 0, 0, 0, 0, 6, 1, 2, 6, 5, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 54 | 6, 1, 2, 6, 5, 1, 0, 8, 3, -1, 0, 0, 0, 0, 0, 0, 5, 9, 0, 5, 0, 6, 6, 55 | 0, 2, -1, 0, 0, 0, 0, 0, 0, 6, 5, 2, 2, 8, 3, 5, 8, 2, 5, 9, 8, -1, 0, 56 | 0, 0, 2, 3, 11, 10, 6, 5, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 2, 0, 8, 57 | 11, 6, 5, 10, -1, 0, 0, 0, 0, 0, 0, 0, 1, 9, 3, 11, 2, 10, 6, 5, -1, 0, 0, 58 | 0, 0, 0, 0, 10, 6, 5, 2, 1, 9, 2, 9, 11, 11, 9, 8, -1, 0, 0, 0, 11, 6, 5, 59 | 11, 5, 3, 3, 5, 1, -1, 0, 0, 0, 0, 0, 0, 11, 6, 8, 8, 1, 0, 8, 6, 1, 1, 60 | 6, 5, -1, 0, 0, 0, 0, 3, 11, 0, 11, 6, 0, 6, 9, 9, 6, 5, -1, 0, 0, 0, 5, 61 | 11, 6, 5, 9, 11, 11, 9, 8, -1, 0, 0, 0, 0, 0, 0, 7, 8, 4, 6, 5, 10, -1, 0, 62 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 7, 3, 0, 4, 5, 10, 6, -1, 0, 0, 0, 0, 0, 63 | 0, 6, 5, 10, 7, 8, 4, 9, 0, 1, -1, 0, 0, 0, 0, 0, 0, 5, 10, 6, 9, 4, 7, 64 | 9, 7, 1, 1, 7, 3, -1, 0, 0, 0, 1, 6, 5, 1, 2, 6, 7, 8, 4, -1, 0, 0, 0, 65 | 0, 0, 0, 7, 0, 4, 7, 3, 0, 6, 5, 1, 6, 1, 2, -1, 0, 0, 0, 4, 7, 8, 5, 66 | 9, 0, 5, 0, 6, 6, 0, 2, -1, 0, 0, 0, 2, 6, 5, 2, 5, 3, 3, 5, 9, 3, 9, 67 | 4, 3, 4, 7, -1, 4, 7, 8, 5, 10, 6, 11, 2, 3, -1, 0, 0, 0, 0, 0, 0, 6, 5, 68 | 10, 7, 11, 2, 7, 2, 4, 4, 2, 0, -1, 0, 0, 0, 4, 7, 8, 9, 0, 1, 6, 5, 10, 69 | 3, 11, 2, -1, 0, 0, 0, 6, 5, 10, 11, 4, 7, 11, 2, 4, 4, 2, 9, 9, 2, 1, -1, 70 | 7, 8, 4, 11, 6, 5, 11, 5, 3, 3, 5, 1, -1, 0, 0, 0, 0, 4, 7, 0, 7, 1, 1, 71 | 7, 11, 1, 11, 6, 1, 6, 5, -1, 4, 7, 8, 9, 6, 5, 9, 0, 6, 6, 0, 11, 11, 0, 72 | 3, -1, 7, 11, 4, 11, 9, 4, 11, 5, 9, 11, 6, 5, -1, 0, 0, 0, 10, 4, 9, 10, 6, 73 | 4, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 4, 9, 10, 6, 4, 8, 3, 0, -1, 0, 0, 74 | 0, 0, 0, 0, 1, 10, 6, 1, 6, 0, 0, 6, 4, -1, 0, 0, 0, 0, 0, 0, 4, 8, 6, 75 | 6, 1, 10, 6, 8, 1, 1, 8, 3, -1, 0, 0, 0, 9, 1, 2, 9, 2, 4, 4, 2, 6, -1, 76 | 0, 0, 0, 0, 0, 0, 0, 8, 3, 9, 1, 2, 9, 2, 4, 4, 2, 6, -1, 0, 0, 0, 0, 77 | 2, 6, 0, 6, 4, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 8, 3, 2, 4, 4, 2, 78 | 6, -1, 0, 0, 0, 0, 0, 0, 4, 10, 6, 4, 9, 10, 2, 3, 11, -1, 0, 0, 0, 0, 0, 79 | 0, 8, 2, 0, 8, 11, 2, 4, 9, 10, 4, 10, 6, -1, 0, 0, 0, 2, 3, 11, 1, 10, 6, 80 | 1, 6, 0, 0, 6, 4, -1, 0, 0, 0, 8, 11, 2, 8, 2, 4, 4, 2, 1, 4, 1, 10, 4, 81 | 10, 6, -1, 3, 11, 1, 1, 4, 9, 11, 4, 1, 11, 6, 4, -1, 0, 0, 0, 6, 4, 9, 6, 82 | 9, 11, 11, 9, 1, 11, 1, 0, 11, 0, 8, -1, 11, 0, 3, 11, 6, 0, 0, 6, 4, -1, 0, 83 | 0, 0, 0, 0, 0, 8, 11, 6, 8, 6, 4, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 84 | 8, 6, 8, 10, 10, 8, 9, -1, 0, 0, 0, 0, 0, 0, 3, 0, 7, 7, 10, 6, 0, 10, 7, 85 | 0, 9, 10, -1, 0, 0, 0, 1, 10, 6, 1, 6, 7, 1, 7, 0, 0, 7, 8, -1, 0, 0, 0, 86 | 6, 1, 10, 6, 7, 1, 1, 7, 3, -1, 0, 0, 0, 0, 0, 0, 9, 1, 8, 8, 6, 7, 8, 87 | 1, 6, 6, 1, 2, -1, 0, 0, 0, 7, 3, 0, 7, 0, 6, 6, 0, 9, 6, 9, 1, 6, 1, 88 | 2, -1, 8, 6, 7, 8, 0, 6, 6, 0, 2, -1, 0, 0, 0, 0, 0, 0, 2, 6, 7, 2, 7, 89 | 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 2, 3, 6, 7, 8, 6, 8, 10, 10, 8, 9, 90 | -1, 0, 0, 0, 9, 10, 6, 9, 6, 0, 0, 6, 7, 0, 7, 11, 0, 11, 2, -1, 3, 11, 2, 91 | 0, 7, 8, 0, 1, 7, 7, 1, 6, 6, 1, 10, -1, 6, 7, 10, 7, 1, 10, 7, 2, 1, 7, 92 | 11, 2, -1, 0, 0, 0, 1, 3, 11, 1, 11, 9, 9, 11, 6, 9, 6, 7, 9, 7, 8, -1, 6, 93 | 7, 11, 9, 1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 7, 0, 6, 7, 0, 11, 94 | 6, 0, 3, 11, -1, 0, 0, 0, 6, 7, 11, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95 | 0, 6, 11, 7, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 8, 11, 7, 6, 96 | -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 11, 7, 9, 0, 1, -1, 0, 0, 0, 0, 0, 0, 97 | 0, 0, 0, 1, 8, 3, 1, 9, 8, 7, 6, 11, -1, 0, 0, 0, 0, 0, 0, 11, 7, 6, 2, 98 | 10, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 10, 0, 8, 3, 11, 7, 6, -1, 0, 99 | 0, 0, 0, 0, 0, 9, 2, 10, 9, 0, 2, 11, 7, 6, -1, 0, 0, 0, 0, 0, 0, 11, 7, 100 | 6, 3, 2, 10, 3, 10, 8, 8, 10, 9, -1, 0, 0, 0, 2, 7, 6, 2, 3, 7, -1, 0, 0, 101 | 0, 0, 0, 0, 0, 0, 0, 8, 7, 6, 8, 6, 0, 0, 6, 2, -1, 0, 0, 0, 0, 0, 0, 102 | 7, 2, 3, 7, 6, 2, 1, 9, 0, -1, 0, 0, 0, 0, 0, 0, 8, 7, 9, 9, 2, 1, 9, 103 | 7, 2, 2, 7, 6, -1, 0, 0, 0, 6, 10, 1, 6, 1, 7, 7, 1, 3, -1, 0, 0, 0, 0, 104 | 0, 0, 6, 10, 1, 6, 1, 0, 6, 0, 7, 7, 0, 8, -1, 0, 0, 0, 7, 6, 3, 3, 9, 105 | 0, 6, 9, 3, 6, 10, 9, -1, 0, 0, 0, 6, 8, 7, 6, 10, 8, 8, 10, 9, -1, 0, 0, 106 | 0, 0, 0, 0, 8, 6, 11, 8, 4, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 3, 0, 107 | 11, 0, 6, 6, 0, 4, -1, 0, 0, 0, 0, 0, 0, 6, 8, 4, 6, 11, 8, 0, 1, 9, -1, 108 | 0, 0, 0, 0, 0, 0, 1, 9, 3, 3, 6, 11, 9, 6, 3, 9, 4, 6, -1, 0, 0, 0, 8, 109 | 6, 11, 8, 4, 6, 10, 1, 2, -1, 0, 0, 0, 0, 0, 0, 2, 10, 1, 11, 3, 0, 11, 0, 110 | 6, 6, 0, 4, -1, 0, 0, 0, 11, 4, 6, 11, 8, 4, 2, 10, 9, 2, 9, 0, -1, 0, 0, 111 | 0, 4, 6, 11, 4, 11, 9, 9, 11, 3, 9, 3, 2, 9, 2, 10, -1, 3, 8, 4, 3, 4, 2, 112 | 2, 4, 6, -1, 0, 0, 0, 0, 0, 0, 2, 0, 4, 2, 4, 6, -1, 0, 0, 0, 0, 0, 0, 113 | 0, 0, 0, 0, 1, 9, 3, 8, 4, 3, 4, 2, 2, 4, 6, -1, 0, 0, 0, 9, 2, 1, 9, 114 | 4, 2, 2, 4, 6, -1, 0, 0, 0, 0, 0, 0, 6, 10, 4, 4, 3, 8, 4, 10, 3, 3, 10, 115 | 1, -1, 0, 0, 0, 1, 6, 10, 1, 0, 6, 6, 0, 4, -1, 0, 0, 0, 0, 0, 0, 10, 9, 116 | 0, 10, 0, 6, 6, 0, 3, 6, 3, 8, 6, 8, 4, -1, 10, 9, 4, 10, 4, 6, -1, 0, 0, 117 | 0, 0, 0, 0, 0, 0, 0, 6, 11, 7, 5, 4, 9, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118 | 0, 8, 3, 9, 5, 4, 7, 6, 11, -1, 0, 0, 0, 0, 0, 0, 0, 5, 4, 0, 1, 5, 6, 119 | 11, 7, -1, 0, 0, 0, 0, 0, 0, 7, 6, 11, 4, 8, 3, 4, 3, 5, 5, 3, 1, -1, 0, 120 | 0, 0, 2, 10, 1, 11, 7, 6, 5, 4, 9, -1, 0, 0, 0, 0, 0, 0, 0, 8, 3, 1, 2, 121 | 10, 4, 9, 5, 11, 7, 6, -1, 0, 0, 0, 6, 11, 7, 10, 5, 4, 10, 4, 2, 2, 4, 0, 122 | -1, 0, 0, 0, 6, 11, 7, 5, 2, 10, 5, 4, 2, 2, 4, 3, 3, 4, 8, -1, 2, 7, 6, 123 | 2, 3, 7, 4, 9, 5, -1, 0, 0, 0, 0, 0, 0, 4, 9, 5, 8, 7, 6, 8, 6, 0, 0, 124 | 6, 2, -1, 0, 0, 0, 3, 6, 2, 3, 7, 6, 0, 1, 5, 0, 5, 4, -1, 0, 0, 0, 1, 125 | 5, 4, 1, 4, 2, 2, 4, 8, 2, 8, 7, 2, 7, 6, -1, 5, 4, 9, 6, 10, 1, 6, 1, 126 | 7, 7, 1, 3, -1, 0, 0, 0, 4, 9, 5, 7, 0, 8, 7, 6, 0, 0, 6, 1, 1, 6, 10, 127 | -1, 3, 7, 6, 3, 6, 0, 0, 6, 10, 0, 10, 5, 0, 5, 4, -1, 4, 8, 5, 8, 10, 5, 128 | 8, 6, 10, 8, 7, 6, -1, 0, 0, 0, 5, 6, 11, 5, 11, 9, 9, 11, 8, -1, 0, 0, 0, 129 | 0, 0, 0, 0, 9, 5, 0, 5, 6, 0, 6, 3, 3, 6, 11, -1, 0, 0, 0, 8, 0, 11, 11, 130 | 5, 6, 11, 0, 5, 5, 0, 1, -1, 0, 0, 0, 11, 5, 6, 11, 3, 5, 5, 3, 1, -1, 0, 131 | 0, 0, 0, 0, 0, 10, 1, 2, 5, 6, 11, 5, 11, 9, 9, 11, 8, -1, 0, 0, 0, 2, 10, 132 | 1, 3, 6, 11, 3, 0, 6, 6, 0, 5, 5, 0, 9, -1, 0, 2, 10, 0, 10, 8, 8, 10, 5, 133 | 8, 5, 6, 8, 6, 11, -1, 11, 3, 6, 3, 5, 6, 3, 10, 5, 3, 2, 10, -1, 0, 0, 0, 134 | 2, 3, 6, 6, 9, 5, 3, 9, 6, 3, 8, 9, -1, 0, 0, 0, 5, 0, 9, 5, 6, 0, 0, 135 | 6, 2, -1, 0, 0, 0, 0, 0, 0, 6, 2, 3, 6, 3, 5, 5, 3, 8, 5, 8, 0, 5, 0, 136 | 1, -1, 6, 2, 1, 6, 1, 5, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 5, 8, 5, 137 | 3, 3, 5, 6, 3, 6, 10, 3, 10, 1, -1, 1, 0, 10, 0, 6, 10, 0, 5, 6, 0, 9, 5, 138 | -1, 0, 0, 0, 0, 3, 8, 10, 5, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 5, 6, 139 | -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 5, 10, 11, 7, 5, -1, 0, 0, 0, 140 | 0, 0, 0, 0, 0, 0, 5, 11, 7, 5, 10, 11, 3, 0, 8, -1, 0, 0, 0, 0, 0, 0, 11, 141 | 5, 10, 11, 7, 5, 9, 0, 1, -1, 0, 0, 0, 0, 0, 0, 9, 3, 1, 9, 8, 3, 5, 10, 142 | 11, 5, 11, 7, -1, 0, 0, 0, 2, 11, 7, 2, 7, 1, 1, 7, 5, -1, 0, 0, 0, 0, 0, 143 | 0, 3, 0, 8, 2, 11, 7, 2, 7, 1, 1, 7, 5, -1, 0, 0, 0, 2, 11, 0, 0, 5, 9, 144 | 0, 11, 5, 5, 11, 7, -1, 0, 0, 0, 9, 8, 3, 9, 3, 5, 5, 3, 2, 5, 2, 11, 5, 145 | 11, 7, -1, 10, 2, 3, 10, 3, 5, 5, 3, 7, -1, 0, 0, 0, 0, 0, 0, 5, 10, 7, 7, 146 | 0, 8, 10, 0, 7, 10, 2, 0, -1, 0, 0, 0, 1, 9, 0, 10, 2, 3, 10, 3, 5, 5, 3, 147 | 7, -1, 0, 0, 0, 7, 5, 10, 7, 10, 8, 8, 10, 2, 8, 2, 1, 8, 1, 9, -1, 7, 5, 148 | 1, 7, 1, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 1, 0, 8, 7, 1, 1, 7, 5, 149 | -1, 0, 0, 0, 0, 0, 0, 0, 5, 9, 0, 3, 5, 5, 3, 7, -1, 0, 0, 0, 0, 0, 0, 150 | 7, 5, 9, 7, 9, 8, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 10, 4, 10, 8, 8, 151 | 10, 11, -1, 0, 0, 0, 0, 0, 0, 11, 3, 10, 10, 4, 5, 10, 3, 4, 4, 3, 0, -1, 0, 152 | 0, 0, 9, 0, 1, 4, 5, 10, 4, 10, 8, 8, 10, 11, -1, 0, 0, 0, 3, 1, 9, 3, 9, 153 | 11, 11, 9, 4, 11, 4, 5, 11, 5, 10, -1, 8, 4, 11, 11, 1, 2, 4, 1, 11, 4, 5, 1, 154 | -1, 0, 0, 0, 5, 1, 2, 5, 2, 4, 4, 2, 11, 4, 11, 3, 4, 3, 0, -1, 11, 8, 4, 155 | 11, 4, 2, 2, 4, 5, 2, 5, 9, 2, 9, 0, -1, 2, 11, 3, 5, 9, 4, -1, 0, 0, 0, 156 | 0, 0, 0, 0, 0, 0, 4, 5, 10, 4, 10, 2, 4, 2, 8, 8, 2, 3, -1, 0, 0, 0, 10, 157 | 4, 5, 10, 2, 4, 4, 2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 1, 9, 8, 2, 3, 8, 4, 158 | 2, 2, 4, 10, 10, 4, 5, -1, 10, 2, 5, 2, 4, 5, 2, 9, 4, 2, 1, 9, -1, 0, 0, 159 | 0, 4, 3, 8, 4, 5, 3, 3, 5, 1, -1, 0, 0, 0, 0, 0, 0, 0, 4, 5, 0, 5, 1, 160 | -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 9, 3, 5, 9, 3, 4, 5, 3, 8, 4, -1, 161 | 0, 0, 0, 4, 5, 9, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 4, 9, 7, 162 | 9, 11, 11, 9, 10, -1, 0, 0, 0, 0, 0, 0, 8, 3, 0, 7, 4, 9, 7, 9, 11, 11, 9, 163 | 10, -1, 0, 0, 0, 0, 1, 4, 4, 11, 7, 1, 11, 4, 1, 10, 11, -1, 0, 0, 0, 10, 11, 164 | 7, 10, 7, 1, 1, 7, 4, 1, 4, 8, 1, 8, 3, -1, 2, 11, 7, 2, 7, 4, 2, 4, 1, 165 | 1, 4, 9, -1, 0, 0, 0, 0, 8, 3, 1, 4, 9, 1, 2, 4, 4, 2, 7, 7, 2, 11, -1, 166 | 7, 2, 11, 7, 4, 2, 2, 4, 0, -1, 0, 0, 0, 0, 0, 0, 7, 4, 11, 4, 2, 11, 4, 167 | 3, 2, 4, 8, 3, -1, 0, 0, 0, 7, 4, 3, 3, 10, 2, 3, 4, 10, 10, 4, 9, -1, 0, 168 | 0, 0, 2, 0, 8, 2, 8, 10, 10, 8, 7, 10, 7, 4, 10, 4, 9, -1, 4, 0, 1, 4, 1, 169 | 7, 7, 1, 10, 7, 10, 2, 7, 2, 3, -1, 4, 8, 7, 1, 10, 2, -1, 0, 0, 0, 0, 0, 170 | 0, 0, 0, 0, 9, 7, 4, 9, 1, 7, 7, 1, 3, -1, 0, 0, 0, 0, 0, 0, 8, 7, 0, 171 | 7, 1, 0, 7, 9, 1, 7, 4, 9, -1, 0, 0, 0, 4, 0, 3, 4, 3, 7, -1, 0, 0, 0, 172 | 0, 0, 0, 0, 0, 0, 4, 8, 7, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 173 | 9, 10, 8, 10, 11, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 3, 0, 9, 11, 11, 9, 174 | 10, -1, 0, 0, 0, 0, 0, 0, 1, 8, 0, 1, 10, 8, 8, 10, 11, -1, 0, 0, 0, 0, 0, 175 | 0, 3, 1, 10, 3, 10, 11, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 9, 1, 2, 11, 9, 176 | 9, 11, 8, -1, 0, 0, 0, 0, 0, 0, 0, 9, 3, 9, 11, 3, 9, 2, 11, 9, 1, 2, -1, 177 | 0, 0, 0, 11, 8, 0, 11, 0, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 11, 3, -1, 178 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 10, 2, 3, 8, 10, 10, 8, 9, -1, 0, 179 | 0, 0, 0, 0, 0, 9, 10, 2, 9, 2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 8, 180 | 2, 8, 10, 2, 8, 1, 10, 8, 0, 1, -1, 0, 0, 0, 2, 1, 10, -1, 0, 0, 0, 0, 0, 181 | 0, 0, 0, 0, 0, 0, 0, 8, 9, 1, 8, 1, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 182 | 1, 0, 9, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 8, -1, 0, 0, 0, 183 | 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184 | 0, 0}; 185 | -------------------------------------------------------------------------------- /src/util.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #ifdef _WIN32 4 | #include 5 | #else 6 | #include 7 | #endif 8 | #include "util.h" 9 | #include 10 | 11 | std::string pretty_print_count(const double count) 12 | { 13 | const double giga = 1000000000; 14 | const double mega = 1000000; 15 | const double kilo = 1000; 16 | if (count > giga) { 17 | return std::to_string(count / giga) + " G"; 18 | } else if (count > mega) { 19 | return std::to_string(count / mega) + " M"; 20 | } else if (count > kilo) { 21 | return std::to_string(count / kilo) + " K"; 22 | } 23 | return std::to_string(count); 24 | } 25 | 26 | uint64_t align_to(uint64_t val, uint64_t align) 27 | { 28 | return ((val + align - 1) / align) * align; 29 | } 30 | 31 | void ortho_basis(glm::vec3 &v_x, glm::vec3 &v_y, const glm::vec3 &n) 32 | { 33 | v_y = glm::vec3(0); 34 | 35 | if (n.x < 0.6f && n.x > -0.6f) { 36 | v_y.x = 1.f; 37 | } else if (n.y < 0.6f && n.y > -0.6f) { 38 | v_y.y = 1.f; 39 | } else if (n.z < 0.6f && n.z > -0.6f) { 40 | v_y.z = 1.f; 41 | } else { 42 | v_y.x = 1.f; 43 | } 44 | v_x = glm::normalize(glm::cross(v_y, n)); 45 | v_y = glm::normalize(glm::cross(n, v_x)); 46 | } 47 | 48 | void canonicalize_path(std::string &path) 49 | { 50 | std::replace(path.begin(), path.end(), '\\', '/'); 51 | } 52 | 53 | std::string get_file_extension(const std::string &fname) 54 | { 55 | const size_t fnd = fname.find_last_of('.'); 56 | if (fnd == std::string::npos) { 57 | return ""; 58 | } 59 | return fname.substr(fnd + 1); 60 | } 61 | 62 | std::string get_cpu_brand() 63 | { 64 | std::string brand = "Unspecified"; 65 | std::array regs; 66 | #ifdef _WIN32 67 | __cpuid(regs.data(), 0x80000000); 68 | #else 69 | __cpuid(0x80000000, regs[0], regs[1], regs[2], regs[3]); 70 | #endif 71 | if (regs[0] >= 0x80000004) { 72 | char b[64] = {0}; 73 | for (int i = 0; i < 3; ++i) { 74 | #ifdef _WIN32 75 | __cpuid(regs.data(), 0x80000000 + i + 2); 76 | #else 77 | __cpuid(0x80000000 + i + 2, regs[0], regs[1], regs[2], regs[3]); 78 | #endif 79 | std::memcpy(b + i * sizeof(regs), regs.data(), sizeof(regs)); 80 | } 81 | brand = b; 82 | } 83 | return brand; 84 | } 85 | 86 | float srgb_to_linear(float x) 87 | { 88 | if (x <= 0.04045f) { 89 | return x / 12.92f; 90 | } 91 | return std::pow((x + 0.055f) / 1.055f, 2.4f); 92 | } 93 | 94 | float linear_to_srgb(float x) 95 | { 96 | if (x <= 0.0031308f) { 97 | return 12.92f * x; 98 | } 99 | return 1.055f * pow(x, 1.f / 2.4f) - 0.055f; 100 | } 101 | 102 | float luminance(const glm::vec3 &c) 103 | { 104 | return 0.2126f * c.x + 0.7152f * c.y + 0.0722f * c.z; 105 | } 106 | -------------------------------------------------------------------------------- /src/util.glsl: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_GLSL 2 | #define UTIL_GLSL 3 | 4 | #extension GL_EXT_ray_tracing : require 5 | #extension GL_EXT_scalar_block_layout : require 6 | #extension GL_EXT_nonuniform_qualifier : enable 7 | #extension GL_EXT_buffer_reference2 : enable 8 | 9 | #define M_PI 3.14159265358979323846f 10 | #define M_1_PI 0.318309886183790671538f 11 | #define EPSILON 0.0001f 12 | #define uint32_t uint 13 | 14 | #define PRIMARY_RAY 0 15 | #define OCCLUSION_RAY 1 16 | #define MAX_PATH_DEPTH 5 17 | 18 | struct RayPayload { 19 | vec3 normal; 20 | float dist; 21 | vec2 uv; 22 | uint material_id; 23 | float pad; 24 | }; 25 | 26 | float linear_to_srgb(float x) { 27 | if (x <= 0.0031308f) { 28 | return 12.92f * x; 29 | } 30 | return 1.055f * pow(x, 1.f / 2.4f) - 0.055f; 31 | } 32 | 33 | void ortho_basis(out vec3 v_x, out vec3 v_y, const vec3 n) { 34 | v_y = vec3(0, 0, 0); 35 | 36 | if (n.x < 0.6f && n.x > -0.6f) { 37 | v_y.x = 1.f; 38 | } else if (n.y < 0.6f && n.y > -0.6f) { 39 | v_y.y = 1.f; 40 | } else if (n.z < 0.6f && n.z > -0.6f) { 41 | v_y.z = 1.f; 42 | } else { 43 | v_y.x = 1.f; 44 | } 45 | v_x = normalize(cross(v_y, n)); 46 | v_y = normalize(cross(n, v_x)); 47 | } 48 | 49 | float luminance(in const vec3 c) { 50 | return 0.2126f * c.r + 0.7152f * c.g + 0.0722f * c.b; 51 | } 52 | 53 | float pow2(float x) { 54 | return x * x; 55 | } 56 | 57 | #endif 58 | 59 | -------------------------------------------------------------------------------- /src/util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | // Format the count as #G, #M, #K, depending on its magnitude 7 | std::string pretty_print_count(const double count); 8 | 9 | uint64_t align_to(uint64_t val, uint64_t align); 10 | 11 | void ortho_basis(glm::vec3 &v_x, glm::vec3 &v_y, const glm::vec3 &n); 12 | 13 | void canonicalize_path(std::string &path); 14 | 15 | std::string get_file_extension(const std::string &fname); 16 | 17 | std::string get_cpu_brand(); 18 | 19 | float srgb_to_linear(const float x); 20 | 21 | float linear_to_srgb(const float x); 22 | 23 | float luminance(const glm::vec3 &c); 24 | -------------------------------------------------------------------------------- /src/vulkan_utils.cpp: -------------------------------------------------------------------------------- 1 | #include "vulkan_utils.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace vkrt { 9 | 10 | void make_basic_compute_pipeline(const uint32_t *code, 11 | const size_t code_size, 12 | const std::vector &desc_set_layouts, 13 | Device &device, 14 | VkPipelineLayout &layout, 15 | VkPipeline &pipeline) 16 | { 17 | if (layout == VK_NULL_HANDLE) { 18 | VkPipelineLayoutCreateInfo pipeline_create_info = {}; 19 | pipeline_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; 20 | pipeline_create_info.setLayoutCount = desc_set_layouts.size(); 21 | pipeline_create_info.pSetLayouts = desc_set_layouts.data(); 22 | CHECK_VULKAN(vkCreatePipelineLayout( 23 | device.logical_device(), &pipeline_create_info, nullptr, &layout)); 24 | } 25 | 26 | VkShaderModule shader_module = VK_NULL_HANDLE; 27 | VkShaderModuleCreateInfo create_info = {}; 28 | create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 29 | create_info.codeSize = code_size; 30 | create_info.pCode = code; 31 | CHECK_VULKAN( 32 | vkCreateShaderModule(device.logical_device(), &create_info, nullptr, &shader_module)); 33 | 34 | VkPipelineShaderStageCreateInfo scan_blocks_stage = {}; 35 | scan_blocks_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 36 | scan_blocks_stage.stage = VK_SHADER_STAGE_COMPUTE_BIT; 37 | scan_blocks_stage.module = shader_module; 38 | scan_blocks_stage.pName = "main"; 39 | 40 | VkComputePipelineCreateInfo pipeline_ci = {}; 41 | pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; 42 | pipeline_ci.stage = scan_blocks_stage; 43 | pipeline_ci.layout = layout; 44 | CHECK_VULKAN(vkCreateComputePipelines( 45 | device.logical_device(), VK_NULL_HANDLE, 1, &pipeline_ci, nullptr, &pipeline)); 46 | } 47 | 48 | static const std::array validation_layers = {"VK_LAYER_KHRONOS_validation"}; 49 | 50 | Device::Device(const std::vector &instance_extensions, 51 | const std::vector &logical_device_extensions) 52 | { 53 | make_instance(instance_extensions); 54 | select_physical_device(); 55 | make_logical_device(logical_device_extensions); 56 | 57 | // Query the properties we'll use frequently 58 | vkGetPhysicalDeviceMemoryProperties(vk_physical_device, &mem_props); 59 | } 60 | 61 | Device::~Device() 62 | { 63 | if (vk_instance != VK_NULL_HANDLE) { 64 | vkDestroyDevice(device, nullptr); 65 | vkDestroyInstance(vk_instance, nullptr); 66 | } 67 | } 68 | 69 | Device::Device(Device &&d) 70 | : vk_instance(d.vk_instance), 71 | vk_physical_device(d.vk_physical_device), 72 | device(d.device), 73 | queue(d.queue), 74 | mem_props(d.mem_props) 75 | { 76 | d.vk_instance = VK_NULL_HANDLE; 77 | d.vk_physical_device = VK_NULL_HANDLE; 78 | d.device = VK_NULL_HANDLE; 79 | d.queue = VK_NULL_HANDLE; 80 | } 81 | 82 | Device &Device::operator=(Device &&d) 83 | { 84 | if (vk_instance != VK_NULL_HANDLE) { 85 | vkDestroyDevice(device, nullptr); 86 | vkDestroyInstance(vk_instance, nullptr); 87 | } 88 | vk_instance = d.vk_instance; 89 | vk_physical_device = d.vk_physical_device; 90 | device = d.device; 91 | queue = d.queue; 92 | mem_props = d.mem_props; 93 | 94 | d.vk_instance = VK_NULL_HANDLE; 95 | d.vk_physical_device = VK_NULL_HANDLE; 96 | d.device = VK_NULL_HANDLE; 97 | d.queue = VK_NULL_HANDLE; 98 | 99 | return *this; 100 | } 101 | 102 | VkDevice Device::logical_device() 103 | { 104 | return device; 105 | } 106 | 107 | VkPhysicalDevice Device::physical_device() 108 | { 109 | return vk_physical_device; 110 | } 111 | 112 | VkInstance Device::instance() 113 | { 114 | return vk_instance; 115 | } 116 | 117 | VkQueue Device::graphics_queue() 118 | { 119 | return queue; 120 | } 121 | 122 | uint32_t Device::queue_index() const 123 | { 124 | return graphics_queue_index; 125 | } 126 | 127 | VkCommandPool Device::make_command_pool(VkCommandPoolCreateFlagBits flags) 128 | { 129 | VkCommandPool pool = VK_NULL_HANDLE; 130 | VkCommandPoolCreateInfo create_info = {}; 131 | create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; 132 | create_info.flags = flags; 133 | create_info.queueFamilyIndex = graphics_queue_index; 134 | CHECK_VULKAN(vkCreateCommandPool(device, &create_info, nullptr, &pool)); 135 | return pool; 136 | } 137 | 138 | uint32_t Device::memory_type_index(uint32_t type_filter, VkMemoryPropertyFlags props) const 139 | { 140 | for (uint32_t i = 0; i < mem_props.memoryTypeCount; ++i) { 141 | if (type_filter & (1 << i) && 142 | (mem_props.memoryTypes[i].propertyFlags & props) == props) { 143 | return i; 144 | } 145 | } 146 | throw std::runtime_error("failed to find appropriate memory"); 147 | } 148 | 149 | VkDeviceMemory Device::alloc(size_t nbytes, uint32_t type_filter, VkMemoryPropertyFlags props) 150 | { 151 | VkMemoryAllocateInfo info = {}; 152 | info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; 153 | info.allocationSize = nbytes; 154 | info.memoryTypeIndex = memory_type_index(type_filter, props); 155 | 156 | VkMemoryAllocateFlagsInfo flags = {}; 157 | flags.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO; 158 | /* 159 | * not used here, not supported on intel 160 | if (props & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { 161 | flags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; 162 | info.pNext = &flags; 163 | } 164 | */ 165 | 166 | VkDeviceMemory mem = VK_NULL_HANDLE; 167 | CHECK_VULKAN(vkAllocateMemory(device, &info, nullptr, &mem)); 168 | return mem; 169 | } 170 | 171 | const VkPhysicalDeviceMemoryProperties &Device::memory_properties() const 172 | { 173 | return mem_props; 174 | } 175 | 176 | void Device::make_instance(const std::vector &extensions) 177 | { 178 | VkApplicationInfo app_info = {}; 179 | app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; 180 | app_info.pApplicationName = "Marching Cubes"; 181 | app_info.applicationVersion = VK_MAKE_VERSION(1, 0, 0); 182 | app_info.pEngineName = "None"; 183 | app_info.engineVersion = VK_MAKE_VERSION(1, 0, 0); 184 | app_info.apiVersion = VK_API_VERSION_1_2; 185 | 186 | std::vector extension_names; 187 | for (const auto &ext : extensions) { 188 | extension_names.push_back(ext.c_str()); 189 | } 190 | 191 | VkInstanceCreateInfo create_info = {}; 192 | create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; 193 | create_info.pApplicationInfo = &app_info; 194 | create_info.enabledExtensionCount = extension_names.size(); 195 | create_info.ppEnabledExtensionNames = 196 | extension_names.empty() ? nullptr : extension_names.data(); 197 | #ifdef _DEBUG 198 | create_info.enabledLayerCount = validation_layers.size(); 199 | create_info.ppEnabledLayerNames = validation_layers.data(); 200 | #else 201 | create_info.enabledLayerCount = 0; 202 | create_info.ppEnabledLayerNames = nullptr; 203 | #endif 204 | 205 | CHECK_VULKAN(vkCreateInstance(&create_info, nullptr, &vk_instance)); 206 | } 207 | 208 | void Device::select_physical_device() 209 | { 210 | uint32_t device_count = 0; 211 | vkEnumeratePhysicalDevices(vk_instance, &device_count, nullptr); 212 | std::vector devices(device_count, VkPhysicalDevice{}); 213 | vkEnumeratePhysicalDevices(vk_instance, &device_count, devices.data()); 214 | 215 | for (const auto &d : devices) { 216 | VkPhysicalDeviceProperties properties; 217 | VkPhysicalDeviceFeatures features; 218 | vkGetPhysicalDeviceProperties(d, &properties); 219 | vkGetPhysicalDeviceFeatures(d, &features); 220 | 221 | if (properties.deviceType == 222 | VkPhysicalDeviceType::VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) { 223 | vk_physical_device = d; 224 | break; 225 | } 226 | } 227 | 228 | if (vk_physical_device == VK_NULL_HANDLE) { 229 | std::cout << "Failed to find discrete GPU, falling back to first/only device\n"; 230 | vk_physical_device = devices[0]; 231 | } 232 | } 233 | 234 | void Device::make_logical_device(const std::vector &extensions) 235 | { 236 | uint32_t num_queue_families = 0; 237 | vkGetPhysicalDeviceQueueFamilyProperties(vk_physical_device, &num_queue_families, nullptr); 238 | std::vector family_props(num_queue_families, 239 | VkQueueFamilyProperties{}); 240 | vkGetPhysicalDeviceQueueFamilyProperties( 241 | vk_physical_device, &num_queue_families, family_props.data()); 242 | for (uint32_t i = 0; i < num_queue_families; ++i) { 243 | if (family_props[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { 244 | graphics_queue_index = i; 245 | break; 246 | } 247 | } 248 | 249 | const float queue_priority = 1.f; 250 | 251 | VkDeviceQueueCreateInfo queue_create_info = {}; 252 | queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; 253 | queue_create_info.queueFamilyIndex = graphics_queue_index; 254 | queue_create_info.queueCount = 1; 255 | queue_create_info.pQueuePriorities = &queue_priority; 256 | 257 | VkPhysicalDeviceDescriptorIndexingFeatures device_desc_features = {}; 258 | device_desc_features.sType = 259 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES; 260 | device_desc_features.shaderStorageBufferArrayNonUniformIndexing = true; 261 | device_desc_features.runtimeDescriptorArray = true; 262 | device_desc_features.descriptorBindingVariableDescriptorCount = true; 263 | device_desc_features.shaderSampledImageArrayNonUniformIndexing = true; 264 | 265 | VkPhysicalDeviceFeatures2 device_features = {}; 266 | device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; 267 | device_features.features.shaderInt64 = true; 268 | device_features.pNext = &device_desc_features; 269 | 270 | VkDeviceCreateInfo create_info = {}; 271 | create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; 272 | create_info.queueCreateInfoCount = 1; 273 | create_info.pQueueCreateInfos = &queue_create_info; 274 | #ifdef _DEBUG 275 | create_info.enabledLayerCount = validation_layers.size(); 276 | create_info.ppEnabledLayerNames = validation_layers.data(); 277 | #else 278 | create_info.enabledLayerCount = 0; 279 | create_info.ppEnabledLayerNames = nullptr; 280 | #endif 281 | create_info.enabledExtensionCount = 0; 282 | create_info.ppEnabledExtensionNames = nullptr; 283 | create_info.pEnabledFeatures = nullptr; 284 | create_info.pNext = &device_features; 285 | CHECK_VULKAN(vkCreateDevice(vk_physical_device, &create_info, nullptr, &device)); 286 | 287 | vkGetDeviceQueue(device, graphics_queue_index, 0, &queue); 288 | } 289 | 290 | VkBufferCreateInfo Buffer::create_info(size_t nbytes, VkBufferUsageFlags usage) 291 | { 292 | VkBufferCreateInfo info = {}; 293 | info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; 294 | info.size = nbytes; 295 | info.usage = usage; 296 | info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; 297 | return info; 298 | } 299 | 300 | std::shared_ptr Buffer::make_buffer(Device &device, 301 | size_t nbytes, 302 | VkBufferUsageFlags usage, 303 | VkMemoryPropertyFlags mem_props) 304 | { 305 | auto buf = std::make_shared(); 306 | buf->vkdevice = &device; 307 | buf->buf_size = nbytes; 308 | buf->host_visible = mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; 309 | 310 | auto create_info = Buffer::create_info(nbytes, usage); 311 | CHECK_VULKAN(vkCreateBuffer(device.logical_device(), &create_info, nullptr, &buf->buf)); 312 | 313 | VkMemoryRequirements mem_reqs = {}; 314 | vkGetBufferMemoryRequirements(device.logical_device(), buf->buf, &mem_reqs); 315 | buf->mem = device.alloc(mem_reqs.size, mem_reqs.memoryTypeBits, mem_props); 316 | 317 | vkBindBufferMemory(device.logical_device(), buf->buf, buf->mem, 0); 318 | 319 | return buf; 320 | } 321 | 322 | Buffer::~Buffer() 323 | { 324 | if (buf != VK_NULL_HANDLE) { 325 | vkDestroyBuffer(vkdevice->logical_device(), buf, nullptr); 326 | vkFreeMemory(vkdevice->logical_device(), mem, nullptr); 327 | } 328 | } 329 | 330 | Buffer::Buffer(Buffer &&b) 331 | : buf_size(b.buf_size), 332 | buf(b.buf), 333 | mem(b.mem), 334 | vkdevice(b.vkdevice), 335 | host_visible(b.host_visible) 336 | { 337 | b.buf_size = 0; 338 | b.buf = VK_NULL_HANDLE; 339 | b.mem = VK_NULL_HANDLE; 340 | b.vkdevice = nullptr; 341 | } 342 | 343 | Buffer &Buffer::operator=(Buffer &&b) 344 | { 345 | if (buf != VK_NULL_HANDLE) { 346 | vkDestroyBuffer(vkdevice->logical_device(), buf, nullptr); 347 | vkFreeMemory(vkdevice->logical_device(), mem, nullptr); 348 | } 349 | buf_size = b.buf_size; 350 | buf = b.buf; 351 | mem = b.mem; 352 | vkdevice = b.vkdevice; 353 | host_visible = b.host_visible; 354 | 355 | b.buf_size = 0; 356 | b.buf = VK_NULL_HANDLE; 357 | b.mem = VK_NULL_HANDLE; 358 | b.vkdevice = nullptr; 359 | return *this; 360 | } 361 | 362 | std::shared_ptr Buffer::host(Device &device, 363 | size_t nbytes, 364 | VkBufferUsageFlags usage, 365 | VkMemoryPropertyFlagBits extra_mem_props) 366 | { 367 | return make_buffer(device, 368 | nbytes, 369 | usage, 370 | VK_MEMORY_PROPERTY_HOST_CACHED_BIT | 371 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | extra_mem_props); 372 | } 373 | 374 | std::shared_ptr Buffer::device(Device &device, 375 | size_t nbytes, 376 | VkBufferUsageFlags usage, 377 | VkMemoryPropertyFlagBits extra_mem_props) 378 | { 379 | return make_buffer( 380 | device, nbytes, usage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | extra_mem_props); 381 | } 382 | 383 | void *Buffer::map() 384 | { 385 | assert(host_visible); 386 | void *mapping = nullptr; 387 | CHECK_VULKAN(vkMapMemory(vkdevice->logical_device(), mem, 0, buf_size, 0, &mapping)); 388 | return mapping; 389 | } 390 | 391 | void *Buffer::map(size_t offset, size_t size) 392 | { 393 | assert(host_visible); 394 | assert(offset + size < buf_size); 395 | void *mapping = nullptr; 396 | CHECK_VULKAN(vkMapMemory(vkdevice->logical_device(), mem, offset, size, 0, &mapping)); 397 | return mapping; 398 | } 399 | 400 | void Buffer::unmap() 401 | { 402 | assert(host_visible); 403 | vkUnmapMemory(vkdevice->logical_device(), mem); 404 | } 405 | 406 | size_t Buffer::size() const 407 | { 408 | return buf_size; 409 | } 410 | 411 | VkBuffer Buffer::handle() const 412 | { 413 | return buf; 414 | } 415 | 416 | Texture2D::~Texture2D() 417 | { 418 | if (image != VK_NULL_HANDLE) { 419 | vkDestroyImageView(vkdevice->logical_device(), view, nullptr); 420 | vkDestroyImage(vkdevice->logical_device(), image, nullptr); 421 | vkFreeMemory(vkdevice->logical_device(), mem, nullptr); 422 | } 423 | } 424 | 425 | Texture2D::Texture2D(Texture2D &&t) 426 | : tdims(t.tdims), 427 | img_format(t.img_format), 428 | img_layout(t.img_layout), 429 | image(t.image), 430 | mem(t.mem), 431 | view(t.view), 432 | vkdevice(t.vkdevice) 433 | { 434 | t.image = VK_NULL_HANDLE; 435 | t.mem = VK_NULL_HANDLE; 436 | t.view = VK_NULL_HANDLE; 437 | t.vkdevice = nullptr; 438 | } 439 | 440 | Texture2D &Texture2D::operator=(Texture2D &&t) 441 | { 442 | if (image != VK_NULL_HANDLE) { 443 | vkDestroyImageView(vkdevice->logical_device(), view, nullptr); 444 | vkDestroyImage(vkdevice->logical_device(), image, nullptr); 445 | vkFreeMemory(vkdevice->logical_device(), mem, nullptr); 446 | } 447 | tdims = t.tdims; 448 | img_format = t.img_format; 449 | img_layout = t.img_layout; 450 | image = t.image; 451 | mem = t.mem; 452 | view = t.view; 453 | vkdevice = t.vkdevice; 454 | 455 | t.image = VK_NULL_HANDLE; 456 | t.view = VK_NULL_HANDLE; 457 | t.vkdevice = nullptr; 458 | return *this; 459 | } 460 | 461 | std::shared_ptr Texture2D::device(Device &device, 462 | glm::uvec2 dims, 463 | VkFormat img_format, 464 | VkImageUsageFlags usage) 465 | { 466 | auto texture = std::make_shared(); 467 | texture->img_format = img_format; 468 | texture->tdims = dims; 469 | texture->vkdevice = &device; 470 | 471 | VkImageCreateInfo create_info = {}; 472 | create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; 473 | create_info.imageType = VK_IMAGE_TYPE_2D; 474 | create_info.format = texture->img_format; 475 | create_info.extent.width = texture->tdims.x; 476 | create_info.extent.height = texture->tdims.y; 477 | create_info.extent.depth = 1; 478 | create_info.mipLevels = 1; 479 | create_info.arrayLayers = 1; 480 | create_info.samples = VK_SAMPLE_COUNT_1_BIT; 481 | create_info.tiling = VK_IMAGE_TILING_OPTIMAL; 482 | create_info.usage = usage; 483 | create_info.initialLayout = texture->img_layout; 484 | CHECK_VULKAN( 485 | vkCreateImage(device.logical_device(), &create_info, nullptr, &texture->image)); 486 | 487 | VkMemoryRequirements mem_reqs = {}; 488 | vkGetImageMemoryRequirements(device.logical_device(), texture->image, &mem_reqs); 489 | texture->mem = device.alloc( 490 | mem_reqs.size, mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); 491 | 492 | CHECK_VULKAN(vkBindImageMemory(device.logical_device(), texture->image, texture->mem, 0)); 493 | 494 | // An ImageView is only valid for certain image types, so check that the image being made 495 | // is one of those 496 | const bool make_view = (usage & VK_IMAGE_USAGE_SAMPLED_BIT) || 497 | (usage & VK_IMAGE_USAGE_STORAGE_BIT) || 498 | (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) || 499 | (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) || 500 | (usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT); 501 | if (make_view) { 502 | VkImageViewCreateInfo view_create_info = {}; 503 | view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; 504 | view_create_info.image = texture->image; 505 | view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; 506 | view_create_info.format = texture->img_format; 507 | 508 | view_create_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; 509 | view_create_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; 510 | view_create_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; 511 | view_create_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; 512 | 513 | view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 514 | view_create_info.subresourceRange.baseMipLevel = 0; 515 | view_create_info.subresourceRange.levelCount = 1; 516 | view_create_info.subresourceRange.baseArrayLayer = 0; 517 | view_create_info.subresourceRange.layerCount = 1; 518 | 519 | CHECK_VULKAN(vkCreateImageView( 520 | device.logical_device(), &view_create_info, nullptr, &texture->view)); 521 | } 522 | return texture; 523 | } 524 | 525 | size_t Texture2D::pixel_size() const 526 | { 527 | switch (img_format) { 528 | case VK_FORMAT_R16_UINT: 529 | return 2; 530 | case VK_FORMAT_R32_UINT: 531 | case VK_FORMAT_R32_SFLOAT: 532 | case VK_FORMAT_R8G8B8A8_UNORM: 533 | case VK_FORMAT_B8G8R8A8_UNORM: 534 | case VK_FORMAT_R8G8B8A8_SRGB: 535 | return 4; 536 | case VK_FORMAT_R32G32B32A32_SFLOAT: 537 | return 16; 538 | default: 539 | throw std::runtime_error("Unhandled image format!"); 540 | } 541 | } 542 | 543 | VkFormat Texture2D::pixel_format() const 544 | { 545 | return img_format; 546 | } 547 | 548 | glm::uvec2 Texture2D::dims() const 549 | { 550 | return tdims; 551 | } 552 | 553 | VkImage Texture2D::image_handle() const 554 | { 555 | return image; 556 | } 557 | 558 | VkImageView Texture2D::view_handle() const 559 | { 560 | return view; 561 | } 562 | 563 | ShaderModule::ShaderModule(Device &vkdevice, const uint32_t *code, size_t code_size) 564 | : device(&vkdevice) 565 | { 566 | VkShaderModuleCreateInfo info = {}; 567 | info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 568 | info.codeSize = code_size; 569 | info.pCode = code; 570 | CHECK_VULKAN(vkCreateShaderModule(vkdevice.logical_device(), &info, nullptr, &module)); 571 | } 572 | 573 | ShaderModule::~ShaderModule() 574 | { 575 | if (module != VK_NULL_HANDLE) { 576 | vkDestroyShaderModule(device->logical_device(), module, nullptr); 577 | } 578 | } 579 | 580 | ShaderModule::ShaderModule(ShaderModule &&sm) : device(sm.device), module(sm.module) 581 | { 582 | sm.device = nullptr; 583 | sm.module = VK_NULL_HANDLE; 584 | } 585 | ShaderModule &ShaderModule::operator=(ShaderModule &&sm) 586 | { 587 | if (module != VK_NULL_HANDLE) { 588 | vkDestroyShaderModule(device->logical_device(), module, nullptr); 589 | } 590 | device = sm.device; 591 | module = sm.module; 592 | 593 | sm.device = nullptr; 594 | sm.module = VK_NULL_HANDLE; 595 | return *this; 596 | } 597 | 598 | CombinedImageSampler::CombinedImageSampler(const std::shared_ptr &t, 599 | VkSampler sampler) 600 | : texture(t), sampler(sampler) 601 | { 602 | } 603 | 604 | DescriptorSetLayoutBuilder &DescriptorSetLayoutBuilder::add_binding(uint32_t binding, 605 | uint32_t count, 606 | VkDescriptorType type, 607 | uint32_t stage_flags, 608 | uint32_t ext_flags) 609 | { 610 | VkDescriptorSetLayoutBinding desc = {}; 611 | desc.binding = binding; 612 | desc.descriptorCount = count; 613 | desc.descriptorType = type; 614 | desc.stageFlags = stage_flags; 615 | bindings.push_back(desc); 616 | binding_ext_flags.push_back(ext_flags); 617 | return *this; 618 | } 619 | 620 | VkDescriptorSetLayout DescriptorSetLayoutBuilder::build(Device &device) 621 | { 622 | VkDescriptorSetLayoutBindingFlagsCreateInfo ext_flags = {}; 623 | ext_flags.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; 624 | ext_flags.bindingCount = binding_ext_flags.size(); 625 | ext_flags.pBindingFlags = binding_ext_flags.data(); 626 | 627 | VkDescriptorSetLayoutCreateInfo create_info = {}; 628 | create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; 629 | create_info.bindingCount = bindings.size(); 630 | create_info.pBindings = bindings.data(); 631 | create_info.pNext = &ext_flags; 632 | 633 | VkDescriptorSetLayout layout = VK_NULL_HANDLE; 634 | CHECK_VULKAN( 635 | vkCreateDescriptorSetLayout(device.logical_device(), &create_info, nullptr, &layout)); 636 | return layout; 637 | } 638 | 639 | DescriptorSetUpdater &DescriptorSetUpdater::write_storage_image( 640 | VkDescriptorSet set, uint32_t binding, const std::shared_ptr &img) 641 | { 642 | VkDescriptorImageInfo img_desc = {}; 643 | img_desc.imageView = img->view_handle(); 644 | img_desc.imageLayout = VK_IMAGE_LAYOUT_GENERAL; 645 | 646 | WriteDescriptorInfo write; 647 | write.dst_set = set; 648 | write.binding = binding; 649 | write.count = 1; 650 | write.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; 651 | write.img_index = images.size(); 652 | 653 | images.push_back(img_desc); 654 | writes.push_back(write); 655 | return *this; 656 | } 657 | 658 | DescriptorSetUpdater &DescriptorSetUpdater::write_ubo(VkDescriptorSet set, 659 | uint32_t binding, 660 | const std::shared_ptr &buf) 661 | { 662 | VkDescriptorBufferInfo buf_desc = {}; 663 | buf_desc.buffer = buf->handle(); 664 | buf_desc.offset = 0; 665 | buf_desc.range = buf->size(); 666 | 667 | WriteDescriptorInfo write; 668 | write.dst_set = set; 669 | write.binding = binding; 670 | write.count = 1; 671 | write.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; 672 | write.buf_index = buffers.size(); 673 | 674 | buffers.push_back(buf_desc); 675 | writes.push_back(write); 676 | return *this; 677 | } 678 | 679 | DescriptorSetUpdater &DescriptorSetUpdater::write_ubo_dynamic( 680 | VkDescriptorSet set, 681 | uint32_t binding, 682 | const std::shared_ptr &buf, 683 | uint32_t offset, 684 | uint32_t range) 685 | { 686 | VkDescriptorBufferInfo buf_desc = {}; 687 | buf_desc.buffer = buf->handle(); 688 | buf_desc.offset = offset; 689 | buf_desc.range = range; 690 | 691 | WriteDescriptorInfo write; 692 | write.dst_set = set; 693 | write.binding = binding; 694 | write.count = 1; 695 | write.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; 696 | write.buf_index = buffers.size(); 697 | 698 | buffers.push_back(buf_desc); 699 | writes.push_back(write); 700 | return *this; 701 | } 702 | 703 | DescriptorSetUpdater &DescriptorSetUpdater::write_ssbo(VkDescriptorSet set, 704 | uint32_t binding, 705 | const std::shared_ptr &buf) 706 | { 707 | VkDescriptorBufferInfo buf_desc = {}; 708 | buf_desc.buffer = buf->handle(); 709 | buf_desc.offset = 0; 710 | buf_desc.range = buf->size(); 711 | 712 | WriteDescriptorInfo write; 713 | write.dst_set = set; 714 | write.binding = binding; 715 | write.count = 1; 716 | write.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 717 | write.buf_index = buffers.size(); 718 | 719 | buffers.push_back(buf_desc); 720 | writes.push_back(write); 721 | return *this; 722 | } 723 | 724 | DescriptorSetUpdater &DescriptorSetUpdater::write_ssbo_dynamic( 725 | VkDescriptorSet set, 726 | uint32_t binding, 727 | const std::shared_ptr &buf, 728 | uint32_t offset, 729 | uint32_t range) 730 | { 731 | VkDescriptorBufferInfo buf_desc = {}; 732 | buf_desc.buffer = buf->handle(); 733 | buf_desc.offset = offset; 734 | buf_desc.range = range; 735 | 736 | WriteDescriptorInfo write; 737 | write.dst_set = set; 738 | write.binding = binding; 739 | write.count = 1; 740 | write.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC; 741 | write.buf_index = buffers.size(); 742 | 743 | buffers.push_back(buf_desc); 744 | writes.push_back(write); 745 | return *this; 746 | } 747 | 748 | DescriptorSetUpdater &DescriptorSetUpdater::write_ssbo_array( 749 | VkDescriptorSet set, uint32_t binding, const std::vector> &bufs) 750 | { 751 | WriteDescriptorInfo write; 752 | write.dst_set = set; 753 | write.binding = binding; 754 | write.count = bufs.size(); 755 | write.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 756 | write.buf_index = buffers.size(); 757 | 758 | std::transform(bufs.begin(), 759 | bufs.end(), 760 | std::back_inserter(buffers), 761 | [](const std::shared_ptr &b) { 762 | VkDescriptorBufferInfo buf_desc = {}; 763 | buf_desc.buffer = b->handle(); 764 | buf_desc.offset = 0; 765 | buf_desc.range = b->size(); 766 | return buf_desc; 767 | }); 768 | 769 | writes.push_back(write); 770 | return *this; 771 | } 772 | 773 | DescriptorSetUpdater &DescriptorSetUpdater::write_combined_sampler_array( 774 | VkDescriptorSet set, 775 | uint32_t binding, 776 | const std::vector &combined_samplers) 777 | { 778 | WriteDescriptorInfo write; 779 | write.dst_set = set; 780 | write.binding = binding; 781 | write.count = combined_samplers.size(); 782 | write.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 783 | write.img_index = images.size(); 784 | 785 | std::transform(combined_samplers.begin(), 786 | combined_samplers.end(), 787 | std::back_inserter(images), 788 | [](const CombinedImageSampler &cs) { 789 | VkDescriptorImageInfo desc = {}; 790 | desc.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; 791 | desc.imageView = cs.texture->view_handle(); 792 | desc.sampler = cs.sampler; 793 | return desc; 794 | }); 795 | 796 | writes.push_back(write); 797 | return *this; 798 | } 799 | 800 | void DescriptorSetUpdater::update(Device &device) 801 | { 802 | std::vector desc_writes; 803 | std::transform( 804 | writes.begin(), 805 | writes.end(), 806 | std::back_inserter(desc_writes), 807 | [&](const WriteDescriptorInfo &w) { 808 | VkWriteDescriptorSet wd = {}; 809 | wd.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; 810 | wd.dstSet = w.dst_set; 811 | wd.dstBinding = w.binding; 812 | wd.descriptorCount = w.count; 813 | wd.descriptorType = w.type; 814 | 815 | if (wd.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || 816 | wd.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || 817 | wd.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC || 818 | wd.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { 819 | wd.pBufferInfo = &buffers[w.buf_index]; 820 | } else if (wd.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE || 821 | wd.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { 822 | wd.pImageInfo = &images[w.img_index]; 823 | } 824 | return wd; 825 | }); 826 | vkUpdateDescriptorSets( 827 | device.logical_device(), desc_writes.size(), desc_writes.data(), 0, nullptr); 828 | } 829 | } 830 | -------------------------------------------------------------------------------- /src/vulkan_utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define CHECK_VULKAN(FN) \ 12 | { \ 13 | VkResult r = FN; \ 14 | if (r != VK_SUCCESS) { \ 15 | std::cout << #FN << " failed\n" << std::flush; \ 16 | throw std::runtime_error(#FN " failed!"); \ 17 | } \ 18 | } 19 | 20 | namespace vkrt { 21 | 22 | class Device { 23 | VkInstance vk_instance = VK_NULL_HANDLE; 24 | VkPhysicalDevice vk_physical_device = VK_NULL_HANDLE; 25 | VkDevice device = VK_NULL_HANDLE; 26 | VkQueue queue = VK_NULL_HANDLE; 27 | 28 | uint32_t graphics_queue_index = -1; 29 | 30 | VkPhysicalDeviceMemoryProperties mem_props = {}; 31 | 32 | public: 33 | Device(const std::vector &instance_extensions = std::vector{}, 34 | const std::vector &logical_device_extensions = 35 | std::vector{}); 36 | ~Device(); 37 | 38 | Device(Device &&d); 39 | Device &operator=(Device &&d); 40 | 41 | Device(const Device &) = delete; 42 | Device &operator=(const Device &) = delete; 43 | 44 | VkDevice logical_device(); 45 | 46 | VkPhysicalDevice physical_device(); 47 | 48 | VkInstance instance(); 49 | 50 | VkQueue graphics_queue(); 51 | uint32_t queue_index() const; 52 | 53 | VkCommandPool make_command_pool( 54 | VkCommandPoolCreateFlagBits flags = (VkCommandPoolCreateFlagBits)0); 55 | 56 | uint32_t memory_type_index(uint32_t type_filter, VkMemoryPropertyFlags props) const; 57 | VkDeviceMemory alloc(size_t nbytes, uint32_t type_filter, VkMemoryPropertyFlags props); 58 | 59 | const VkPhysicalDeviceMemoryProperties &memory_properties() const; 60 | 61 | private: 62 | void make_instance(const std::vector &extensions); 63 | void select_physical_device(); 64 | void make_logical_device(const std::vector &extensions); 65 | }; 66 | 67 | void make_basic_compute_pipeline(const uint32_t *code, 68 | const size_t code_size, 69 | const std::vector &desc_set_layouts, 70 | Device &device, 71 | VkPipelineLayout &layout, 72 | VkPipeline &pipeline); 73 | 74 | // TODO: Maybe a base resource class which tracks the queue and access flags 75 | 76 | class Buffer { 77 | size_t buf_size = 0; 78 | VkBuffer buf = VK_NULL_HANDLE; 79 | VkDeviceMemory mem = VK_NULL_HANDLE; 80 | Device *vkdevice = nullptr; 81 | bool host_visible = false; 82 | 83 | static VkBufferCreateInfo create_info(size_t nbytes, VkBufferUsageFlags usage); 84 | 85 | static std::shared_ptr make_buffer(Device &device, 86 | size_t nbytes, 87 | VkBufferUsageFlags usage, 88 | VkMemoryPropertyFlags mem_props); 89 | 90 | public: 91 | Buffer() = default; 92 | ~Buffer(); 93 | Buffer(Buffer &&b); 94 | Buffer &operator=(Buffer &&b); 95 | 96 | Buffer(const Buffer &) = delete; 97 | Buffer &operator=(const Buffer &) = delete; 98 | 99 | static std::shared_ptr host( 100 | Device &device, 101 | size_t nbytes, 102 | VkBufferUsageFlags usage, 103 | VkMemoryPropertyFlagBits extra_mem_props = (VkMemoryPropertyFlagBits)0); 104 | static std::shared_ptr device( 105 | Device &device, 106 | size_t nbytes, 107 | VkBufferUsageFlags usage, 108 | VkMemoryPropertyFlagBits extra_mem_props = (VkMemoryPropertyFlagBits)0); 109 | 110 | // Map the entire range of the buffer 111 | void *map(); 112 | // Map a subset of the buffer starting at offset of some size 113 | void *map(size_t offset, size_t size); 114 | 115 | void unmap(); 116 | 117 | size_t size() const; 118 | 119 | VkBuffer handle() const; 120 | }; 121 | 122 | class Texture2D { 123 | glm::uvec2 tdims = glm::uvec2(0); 124 | VkFormat img_format; 125 | VkImageLayout img_layout = VK_IMAGE_LAYOUT_UNDEFINED; 126 | VkImage image = VK_NULL_HANDLE; 127 | VkDeviceMemory mem = VK_NULL_HANDLE; 128 | VkImageView view = VK_NULL_HANDLE; 129 | Device *vkdevice = nullptr; 130 | 131 | public: 132 | Texture2D() = default; 133 | ~Texture2D(); 134 | Texture2D(Texture2D &&t); 135 | Texture2D &operator=(Texture2D &&t); 136 | 137 | Texture2D(Texture2D &t) = delete; 138 | Texture2D &operator=(Texture2D &t) = delete; 139 | 140 | // Note after creation image will be in the image_layout_undefined layout 141 | static std::shared_ptr device(Device &device, 142 | glm::uvec2 dims, 143 | VkFormat img_format, 144 | VkImageUsageFlags usage); 145 | 146 | // Size of one pixel, in bytes 147 | size_t pixel_size() const; 148 | VkFormat pixel_format() const; 149 | glm::uvec2 dims() const; 150 | 151 | VkImage image_handle() const; 152 | VkImageView view_handle() const; 153 | }; 154 | 155 | struct ShaderModule { 156 | Device *device = nullptr; 157 | VkShaderModule module = VK_NULL_HANDLE; 158 | 159 | ShaderModule() = default; 160 | ShaderModule(Device &device, const uint32_t *code, size_t code_size); 161 | ~ShaderModule(); 162 | 163 | ShaderModule(ShaderModule &&sm); 164 | ShaderModule &operator=(ShaderModule &&sm); 165 | 166 | ShaderModule(ShaderModule &) = delete; 167 | ShaderModule &operator=(ShaderModule &) = delete; 168 | }; 169 | 170 | class DescriptorSetLayoutBuilder { 171 | std::vector bindings; 172 | std::vector binding_ext_flags; 173 | 174 | public: 175 | DescriptorSetLayoutBuilder &add_binding(uint32_t binding, 176 | uint32_t count, 177 | VkDescriptorType type, 178 | uint32_t stage_flags, 179 | uint32_t ext_flags = 0); 180 | 181 | VkDescriptorSetLayout build(Device &device); 182 | }; 183 | 184 | class TopLevelBVH; 185 | 186 | struct WriteDescriptorInfo { 187 | VkDescriptorSet dst_set = VK_NULL_HANDLE; 188 | uint32_t binding = 0; 189 | uint32_t count = 0; 190 | VkDescriptorType type; 191 | size_t as_index = -1; 192 | size_t img_index = -1; 193 | size_t buf_index = -1; 194 | }; 195 | 196 | struct CombinedImageSampler { 197 | const std::shared_ptr texture; 198 | VkSampler sampler; 199 | 200 | CombinedImageSampler(const std::shared_ptr &t, VkSampler sampler); 201 | }; 202 | 203 | class DescriptorSetUpdater { 204 | std::vector writes; 205 | std::vector images; 206 | std::vector buffers; 207 | 208 | public: 209 | DescriptorSetUpdater &write_storage_image(VkDescriptorSet set, 210 | uint32_t binding, 211 | const std::shared_ptr &img); 212 | 213 | DescriptorSetUpdater &write_ubo(VkDescriptorSet set, 214 | uint32_t binding, 215 | const std::shared_ptr &buf); 216 | 217 | DescriptorSetUpdater &write_ubo_dynamic(VkDescriptorSet set, 218 | uint32_t binding, 219 | const std::shared_ptr &buf, 220 | uint32_t offset, 221 | uint32_t range); 222 | 223 | DescriptorSetUpdater &write_ssbo(VkDescriptorSet set, 224 | uint32_t binding, 225 | const std::shared_ptr &buf); 226 | 227 | DescriptorSetUpdater &write_ssbo_dynamic(VkDescriptorSet set, 228 | uint32_t binding, 229 | const std::shared_ptr &buf, 230 | uint32_t offset, 231 | uint32_t range); 232 | 233 | DescriptorSetUpdater &write_ssbo_array(VkDescriptorSet set, 234 | uint32_t binding, 235 | const std::vector> &bufs); 236 | 237 | DescriptorSetUpdater &write_combined_sampler_array( 238 | VkDescriptorSet set, 239 | uint32_t binding, 240 | const std::vector &combined_samplers); 241 | 242 | // Commit the writes to the descriptor sets 243 | void update(Device &device); 244 | }; 245 | 246 | } 247 | --------------------------------------------------------------------------------