├── test ├── CMakeLists.txt ├── performance │ ├── CMakeLists.txt │ └── saxpy_b.cpp └── correctness │ ├── saxpy_t.cpp │ ├── CMakeLists.txt │ └── approx.hpp ├── CMakeLists.txt ├── src ├── CMakeLists.txt ├── main.cpp ├── shaders │ └── saxpy.comp ├── vulkan_helpers.h ├── example_filter.h ├── vulkan_helpers.hpp ├── vulkan_helpers.cpp └── example_filter.cpp ├── config └── CompileShader.cmake ├── LICENSE └── readme.md /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(correctness) 2 | add_subdirectory(performance) 3 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.8) 2 | project(vulkan_compute_example) 3 | 4 | option(VULKAN_COMPUTE_EXAMPLE_BUILD_TESTS "Build tests for vulkan compute example" ON) 5 | option(VULKAN_COMPUTE_EXAMPLE_BUILD_BENCHMARKS "Build benchmarks for vulkan compute example" ON) 6 | 7 | set(CMAKE_CXX_STANDARD 14) 8 | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/config) 9 | enable_testing() 10 | 11 | add_subdirectory(src) 12 | add_subdirectory(test) 13 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(Vulkan REQUIRED) 2 | 3 | include(CompileShader) 4 | compile_shader(saxpy_shader 5 | SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/shaders/saxpy.comp 6 | TARGET ${CMAKE_CURRENT_BINARY_DIR}/shaders/saxpy.spv 7 | ) 8 | 9 | add_library(example_filter STATIC example_filter.cpp vulkan_helpers.cpp) 10 | target_link_libraries(example_filter PUBLIC Vulkan::Vulkan) 11 | target_include_directories(example_filter PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) 12 | add_dependencies(example_filter saxpy_shader) 13 | 14 | add_executable(vulkan_example main.cpp) 15 | target_link_libraries(vulkan_example PRIVATE example_filter) 16 | -------------------------------------------------------------------------------- /test/performance/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if(NOT VULKAN_COMPUTE_EXAMPLE_BUILD_BENCHMARKS) 2 | return() 3 | endif() 4 | 5 | add_custom_command( 6 | OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/shaders 7 | COMMAND ${CMAKE_COMMAND} 8 | ARGS -E create_symlink $/shaders ${CMAKE_CURRENT_BINARY_DIR}/shaders 9 | DEPENDS example_filter 10 | COMMENT "link shaders to build tests folder" 11 | ) 12 | add_custom_target(link_shaders_bench DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/shaders) 13 | 14 | find_package(sltbench REQUIRED) 15 | 16 | add_executable(bench_saxpy saxpy_b.cpp) 17 | target_link_libraries(bench_saxpy PRIVATE sltbench example_filter) 18 | add_dependencies(bench_saxpy link_shaders_bench) 19 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "example_filter.h" 2 | #include "vulkan_helpers.hpp" 3 | 4 | auto main(int argc, char* argv[])-> int { 5 | const auto width = 90; 6 | const auto height = 60; 7 | const auto a = 2.0f; // saxpy scaling factor 8 | 9 | auto y = std::vector(width*height, 0.71f); 10 | auto x = std::vector(width*height, 0.65f); 11 | 12 | ExampleFilter f("shaders/saxpy.spv"); 13 | auto d_y = vuh::Array::fromHost(y, f.device, f.physDevice); 14 | auto d_x = vuh::Array::fromHost(x, f.device, f.physDevice); 15 | 16 | f(d_y, d_x, {width, height, a}); 17 | 18 | auto out_tst = std::vector{}; 19 | d_y.to_host(out_tst); // and now out_tst should contain the result of saxpy (y = y + ax) 20 | 21 | return 0; 22 | } 23 | -------------------------------------------------------------------------------- /config/CompileShader.cmake: -------------------------------------------------------------------------------- 1 | 2 | find_program(GlslangValidator NAMES glslangValidator DOC "glsl to SPIR-V compiler") 3 | if(NOT GlslangValidator) 4 | message(FATAL_ERROR "failed to find glslangValidator") 5 | endif() 6 | 7 | function(compile_shader) 8 | set(OneValueArgs SOURCE TARGET) 9 | cmake_parse_arguments(COMPILE_SHADER "" "${OneValueArgs}" "" ${ARGN}) 10 | 11 | get_filename_component(TargetDir ${COMPILE_SHADER_TARGET} DIRECTORY) 12 | add_custom_command( 13 | COMMAND ${CMAKE_COMMAND} ARGS -E make_directory ${TargetDir} 14 | COMMAND ${GlslangValidator} ARGS -V ${COMPILE_SHADER_SOURCE} -o ${COMPILE_SHADER_TARGET} 15 | DEPENDS ${COMPILE_SHADER_SOURCE} 16 | OUTPUT ${COMPILE_SHADER_TARGET} 17 | ) 18 | add_custom_target(${ARGV0} DEPENDS ${COMPILE_SHADER_TARGET}) 19 | endfunction() 20 | -------------------------------------------------------------------------------- /test/correctness/saxpy_t.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_MAIN 2 | #include 3 | 4 | #include "approx.hpp" 5 | 6 | #include 7 | #include 8 | 9 | using test::approx; 10 | 11 | TEST_CASE("saxpy", "[correctness]"){ 12 | const auto width = 90; 13 | const auto height = 60; 14 | const auto a = 2.0f; // saxpy scaling factor 15 | 16 | auto y = std::vector(width*height, 0.71f); 17 | auto x = std::vector(width*height, 0.65f); 18 | 19 | ExampleFilter f("shaders/saxpy.spv"); 20 | auto d_y = vuh::Array::fromHost(y, f.device, f.physDevice); 21 | auto d_x = vuh::Array::fromHost(x, f.device, f.physDevice); 22 | 23 | f(d_y, d_x, {width, height, a}); 24 | 25 | auto out_tst = std::vector{}; 26 | d_y.to_host(out_tst); 27 | 28 | auto out_ref = y; 29 | for(size_t i = 0; i < y.size(); ++i){ 30 | out_ref[i] += a*x[i]; 31 | } 32 | 33 | REQUIRE(out_tst == approx(out_ref).eps(1.e-5).verbose()); 34 | } 35 | -------------------------------------------------------------------------------- /src/shaders/saxpy.comp: -------------------------------------------------------------------------------- 1 | #version 440 2 | 3 | layout(local_size_x_id = 0, local_size_y_id = 1) in; // workgroup size defined with specialization constants. On cpp side there is associated SpecializationInfo entry in PipelineShaderStageCreateInfo 4 | layout(push_constant) uniform Parameters { // specify push constants. on cpp side its layout is fixed at PipelineLayout, and values are provided via vk::CommandBuffer::pushConstants() 5 | uint Width; 6 | uint Height; 7 | float a; 8 | } params; 9 | 10 | layout(std430, binding = 0) buffer lay0 { float arr_y[]; }; 11 | layout(std430, binding = 1) buffer lay1 { float arr_x[]; }; 12 | 13 | void main(){ 14 | // drop threads outside the buffer dimensions. 15 | if(params.Width <= gl_GlobalInvocationID.x || params.Height <= gl_GlobalInvocationID.y){ 16 | return; 17 | } 18 | const uint id = params.Width*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x; // current offset 19 | 20 | arr_y[id] += params.a*arr_x[id]; // saxpy 21 | } 22 | -------------------------------------------------------------------------------- /test/correctness/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # /test cmake file 2 | 3 | if(NOT VULKAN_COMPUTE_EXAMPLE_BUILD_TESTS) 4 | return() 5 | endif() 6 | 7 | add_custom_command( 8 | OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/shaders 9 | COMMAND ${CMAKE_COMMAND} 10 | ARGS -E create_symlink $/shaders ${CMAKE_CURRENT_BINARY_DIR}/shaders 11 | DEPENDS example_filter 12 | COMMENT "link shaders to build tests folder" 13 | ) 14 | add_custom_target(link_shaders_dir DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/shaders) 15 | 16 | find_package(Catch2 REQUIRED) 17 | function(add_catch_test arg_test_name arg_test_src) 18 | add_executable(${arg_test_name} ${arg_test_src}) 19 | target_link_libraries(${arg_test_name} PRIVATE Catch2::Catch) 20 | target_include_directories( ${arg_test_name} PRIVATE ${PROJECT_SOURCE_DIR}/src ) 21 | add_test(NAME ${arg_test_name} COMMAND ${arg_test_name} ) 22 | add_dependencies(${arg_test_name} link_shaders_dir) 23 | endfunction() 24 | 25 | add_catch_test(test_saxpy saxpy_t.cpp) 26 | target_link_libraries(test_saxpy PRIVATE example_filter) 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Slava Savenko 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Vulkan Compute Example 2 | 3 | Simple (but complete) example of Vulkan use for GPGPU computing. 4 | Saxpy kernel computation on 2d arrays. 5 | 6 | Features covered: 7 | - Vulkan boilerplate setup using vulkan-hpp 8 | - data copy between host and device-local memory 9 | - passing array parameters to shader (layout bindings) 10 | - passing non-array parameters to shader (push constants) 11 | - define workgroup dimensions (specialization constants) 12 | - very simple glsl shader (saxpy) 13 | - glsl to spir-v compilation (build time) 14 | 15 | This was an attempt to structure the Vulkan compute code in a way that would be easy to modify for each particular use case. 16 | I think I failed here so this example still sucks. But I learned while doing this and as a result there is a [vuh](https://github.com/Glavnokoman/vuh) Vulkan compute library which enables you to do the same but in (literally) 10 lines of code. You're cordially invited to use that instead. 17 | 18 | ## Dependencies 19 | - c++14 compatible compiler 20 | - cmake 21 | - [vulkan-headers](https://github.com/KhronosGroup/Vulkan-Docs) 22 | - [vulkan-hpp](https://github.com/KhronosGroup/Vulkan-Hpp) 23 | - [glslang](https://github.com/KhronosGroup/glslang) 24 | - [catch2](https://github.com/catchorg/Catch2) (optional) 25 | - [sltbench](https://github.com/ivafanas/sltbench) (optional) 26 | -------------------------------------------------------------------------------- /src/vulkan_helpers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace vuh { 8 | 9 | inline auto div_up(uint32_t x, uint32_t y){ return (x + y - 1u)/y; } 10 | 11 | VKAPI_ATTR VkBool32 VKAPI_CALL debugReporter( 12 | VkDebugReportFlagsEXT, VkDebugReportObjectTypeEXT, uint64_t, size_t, int32_t 13 | , const char* pLayerPrefix 14 | , const char* pMessage 15 | , void* /*pUserData*/ 16 | ); 17 | 18 | auto readShaderSrc(const char* filename)-> std::vector; 19 | 20 | auto loadShader(const vk::Device& device, const char* filename 21 | , vk::ShaderModuleCreateFlags flags = vk::ShaderModuleCreateFlags() 22 | )-> vk::ShaderModule; 23 | 24 | auto enabledExtensions(const std::vector& extensions)-> std::vector; 25 | 26 | auto enabledLayers(const std::vector& layers)-> std::vector; 27 | 28 | auto registerValidationReporter(const vk::Instance& instance, PFN_vkDebugReportCallbackEXT reporter 29 | )-> VkDebugReportCallbackEXT; 30 | 31 | auto getComputeQueueFamilyId(const vk::PhysicalDevice& physicalDevice)-> uint32_t; 32 | 33 | auto createDevice(const vk::PhysicalDevice& physicalDevice, const std::vector& layers 34 | , uint32_t queueFamilyID)-> vk::Device; 35 | 36 | auto createBuffer(const vk::Device& device 37 | , uint32_t bufSize 38 | , vk::BufferUsageFlags usage=vk::BufferUsageFlagBits::eStorageBuffer 39 | )-> vk::Buffer; 40 | 41 | auto selectMemory(const vk::PhysicalDevice& physDev 42 | , const vk::Device& device 43 | , const vk::Buffer& buf 44 | , const vk::MemoryPropertyFlags properties ///< desired memory properties 45 | )-> uint32_t; 46 | 47 | auto allocMemory(const vk::PhysicalDevice& physDev, const vk::Device& device 48 | , const vk::Buffer& buf 49 | , uint32_t memory_id 50 | )-> vk::DeviceMemory; 51 | 52 | auto copyBuf(const vk::Buffer& src, vk::Buffer& dst, const uint32_t size 53 | , const vk::Device& device, const vk::PhysicalDevice& physDev)-> void; 54 | 55 | } // namespace vuh 56 | -------------------------------------------------------------------------------- /test/performance/saxpy_b.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | namespace { 10 | 11 | struct Params{ 12 | uint32_t width; 13 | uint32_t height; 14 | float a; 15 | 16 | auto operator== (const Params& other) const-> bool { 17 | return width == other.width && height == other.height && a == other.a; 18 | } 19 | auto operator!= (const Params& other) const-> bool { return !(*this == other); } 20 | 21 | friend auto operator<< (std::ostream& s, const Params& p)-> std::ostream& { 22 | s << "{" << p.width << ", " << p.height << ", " << p.a << "}"; 23 | return s; 24 | } 25 | }; 26 | 27 | struct DataFixFull { 28 | ExampleFilter f{"shaders/saxpy.spv"}; 29 | Params p; 30 | std::vector y; 31 | std::vector x; 32 | }; 33 | 34 | struct FixSaxpyFull: private DataFixFull { 35 | using Type = DataFixFull; 36 | 37 | auto SetUp(const Params& p)-> Type& { 38 | if(p != this->p) { 39 | this->p = p; 40 | y = std::vector(p.width*p.height, 3.1f); 41 | x = std::vector(p.width*p.height, 1.9f); 42 | } 43 | return *this; 44 | } 45 | 46 | auto TearDown()-> void {} 47 | }; // class FixSaxpyFull 48 | 49 | struct FixShaderOnly: private DataFixFull { 50 | using Type = ExampleFilter; 51 | 52 | struct DeviceData{ 53 | explicit DeviceData(const DataFixFull& d) 54 | : d_y{vuh::Array::fromHost(d.y, d.f.device, d.f.physDevice)} 55 | , d_x{vuh::Array::fromHost(d.x, d.f.device, d.f.physDevice)} 56 | {} 57 | 58 | vuh::Array d_y; 59 | vuh::Array d_x; 60 | }; 61 | 62 | 63 | auto SetUp(const Params& p)-> Type& { 64 | if(p != this->p){ 65 | this->p = p; 66 | y = std::vector(p.width*p.height, 3.1f); 67 | x = std::vector(p.width*p.height, 1.9f); 68 | 69 | f.unbindParameters(); 70 | _dev_data = std::make_unique(static_cast(*this)); 71 | f.bindParameters(_dev_data->d_y, _dev_data->d_x, {p.width, p.height, p.a}); 72 | } 73 | return f; 74 | } 75 | 76 | auto TearDown()-> void {} 77 | 78 | private: 79 | std::unique_ptr _dev_data; 80 | }; // struct FixShaderOnly 81 | 82 | /// Copy arrays data to gpu device, setup the kernel and run it. 83 | auto saxpy(DataFixFull& fix, const Params& p)-> void { 84 | auto d_y = vuh::Array::fromHost(fix.y, fix.f.device, fix.f.physDevice); 85 | auto d_x = vuh::Array::fromHost(fix.x, fix.f.device, fix.f.physDevice); 86 | 87 | fix.f(d_y, d_x, {fix.p.width, fix.p.height, fix.p.a}); 88 | } 89 | 90 | /// Just run the kernel, assumes the data has been copied and shader is all set up. 91 | auto saxpy(ExampleFilter& f, const Params& p)-> void { 92 | f.run(); 93 | } 94 | 95 | static const auto params = std::vector({{32u, 32u, 2.f}, {128, 128, 2.f}, {1024, 1024, 3.f}}); 96 | 97 | } // namespace 98 | 99 | 100 | SLTBENCH_FUNCTION_WITH_FIXTURE_AND_ARGS(saxpy, FixSaxpyFull, params); 101 | SLTBENCH_FUNCTION_WITH_FIXTURE_AND_ARGS(saxpy, FixShaderOnly, params); 102 | 103 | SLTBENCH_MAIN(); 104 | -------------------------------------------------------------------------------- /src/example_filter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "vulkan_helpers.h" 4 | 5 | /// doc me 6 | struct ExampleFilter { 7 | static constexpr auto NumDescriptors = uint32_t(2); ///< number of binding descriptors (array input-output parameters) 8 | 9 | /// C++ mirror of the shader push constants interface 10 | struct PushParams { 11 | uint32_t width; ///< frame width 12 | uint32_t height; ///< frame height 13 | float a; ///< saxpy (\$ y = y + ax \$) scaling factor 14 | }; 15 | 16 | public: // data 17 | vk::Instance instance; ///< Vulkan instance 18 | VkDebugReportCallbackEXT debugReportCallback; // 19 | vk::PhysicalDevice physDevice; ///< physical device 20 | vk::Device device; ///< logical device providing access to a physical one 21 | vk::ShaderModule shader; ///< compute shader 22 | vk::DescriptorSetLayout dscLayout; ///< c++ definition of the shader binding interface 23 | mutable vk::DescriptorPool dscPool; ///< descriptors pool 24 | vk::CommandPool cmdPool; ///< used to allocate command buffers 25 | vk::PipelineCache pipeCache; ///< pipeline cache 26 | vk::PipelineLayout pipeLayout; ///< defines shader interface as a set of layout bindings and push constants 27 | 28 | vk::Pipeline pipe; ///< pipeline to submit compute commands 29 | mutable vk::CommandBuffer cmdBuffer; ///< commands recorded here, once command buffer is submitted to a queue those commands get executed 30 | 31 | uint32_t compute_queue_familly_id; ///< index of the queue family supporting compute loads 32 | public: 33 | explicit ExampleFilter(const std::string& shaderPath); 34 | ~ExampleFilter() noexcept; 35 | 36 | auto bindParameters(vk::Buffer& out, const vk::Buffer& in, const PushParams& p) const-> void; 37 | auto unbindParameters() const-> void; 38 | auto run() const-> void; 39 | auto operator()(vk::Buffer& out, const vk::Buffer& in, const PushParams& p ) const-> void; 40 | private: // helpers 41 | static auto createInstance(const std::vector layers 42 | , const std::vector extensions 43 | )-> vk::Instance; 44 | 45 | static auto createDescriptorSetLayout(const vk::Device& device)-> vk::DescriptorSetLayout; 46 | static auto allocDescriptorPool(const vk::Device& device)-> vk::DescriptorPool; 47 | 48 | static auto createPipelineLayout(const vk::Device& device 49 | , const vk::DescriptorSetLayout& dscLayout 50 | )-> vk::PipelineLayout; 51 | 52 | static auto createComputePipeline(const vk::Device& device, const vk::ShaderModule& shader 53 | , const vk::PipelineLayout& pipeLayout 54 | , const vk::PipelineCache& cache 55 | )-> vk::Pipeline; 56 | 57 | static auto createDescriptorSet(const vk::Device& device, const vk::DescriptorPool& pool 58 | , const vk::DescriptorSetLayout& layout 59 | , vk::Buffer& out 60 | , const vk::Buffer& in 61 | , uint32_t size 62 | )-> vk::DescriptorSet; 63 | 64 | static auto createCommandBuffer(const vk::Device& device, const vk::CommandPool& cmdPool 65 | , const vk::Pipeline& pipeline, const vk::PipelineLayout& pipeLayout 66 | , const vk::DescriptorSet& dscSet 67 | , const PushParams& p 68 | )-> vk::CommandBuffer; 69 | }; // struct MixpixFilter 70 | -------------------------------------------------------------------------------- /test/correctness/approx.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace test{ 9 | 10 | using std::begin; using std::end; 11 | 12 | namespace traits { 13 | namespace detail { 14 | template 15 | auto is_iterable_impl(int) 16 | -> decltype( begin(std::declval()) != end(std::declval()) // begin/end and operator != 17 | , void() // Handle evil operator , 18 | , ++std::declval()))&>() // operator ++ 19 | , void(*begin(std::declval())) // operator* 20 | , std::true_type{} 21 | ); 22 | 23 | template auto is_iterable_impl(...)-> std::false_type; 24 | } // namespace detail 25 | 26 | template using is_iterable = decltype(detail::is_iterable_impl(0)); 27 | } // namespace traits 28 | 29 | namespace detail { 30 | template 31 | auto is_close(const T& t1, const T& t2, const T& eps)-> bool { 32 | auto ret = std::abs(2.0*(t1 - t2)) <= std::abs(eps*(t1 + t2)); //eps*std::max(std::abs(t1), std::abs(t2))); 33 | return ret; 34 | } 35 | 36 | template 37 | struct ApproxIterable { 38 | using value_t = std::decay_t()))>; 39 | 40 | explicit ApproxIterable(const T& values 41 | , value_t eps=std::numeric_limits::epsilon()*100 42 | ) 43 | : _values{values}, _eps{eps} 44 | {} 45 | 46 | auto eps(double e)-> ApproxIterable& { _eps = e; return *this; } 47 | 48 | auto verbose()-> ApproxIterable& { _verbose = true; return *this; } 49 | 50 | template::value>* = nullptr> 51 | friend auto operator== (const U& v1, const ApproxIterable& app)-> bool { 52 | if(v1.size() != app._values.size()){ // size() strictly speaking not required by iterable trait 53 | if(app._verbose){ 54 | std::cerr << "approximate comparison failed: different iterables size" << "\n"; 55 | } 56 | return false; 57 | } 58 | 59 | auto it_v1 = begin(v1); 60 | auto it_v2 = begin(app._values); 61 | for(size_t i = 0; it_v1 != end(v1); ++it_v1, ++it_v2, ++i){ 62 | if(!is_close(*it_v1, *it_v2, app._eps)){ 63 | if(app._verbose){ 64 | std::cerr << "approximate compare failed at offset " << i 65 | << ": " << *it_v1 << " ~= " << *it_v2 << std::endl; 66 | } 67 | return false; 68 | } 69 | } 70 | if(it_v2 != end(app._values)){ 71 | if(app._verbose){ 72 | std::cerr << "approximate compare failed: different iterables size" << std::endl; 73 | } 74 | return false; 75 | } 76 | return true; 77 | } 78 | 79 | friend auto operator<< (std::ostream& out, const ApproxIterable& app)-> std::ostream& { 80 | if(app._values.size() < 16) { 81 | for(const auto& v: app._values){ 82 | out << v << ","; 83 | } 84 | } else { 85 | out << app._values.front() << "\n...\n" << app._values.back(); 86 | } 87 | return out; 88 | } 89 | private: 90 | const T& _values; 91 | value_t _eps; 92 | bool _verbose = false; 93 | }; // struct ApproxIterable 94 | 95 | template 96 | struct ApproxScalar { 97 | explicit ApproxScalar(const T& value 98 | , T eps=std::numeric_limits::epsilon()*100 99 | ) 100 | : _val{value}, _eps{eps} 101 | {} 102 | 103 | auto eps(double e)-> ApproxScalar& { _eps = e; return *this; } 104 | 105 | template 106 | friend auto operator== (const U& v1, const ApproxScalar& app)-> bool { 107 | return is_close(v1, app._val, app._eps); 108 | } 109 | 110 | friend auto operator<< (std::ostream& out, const ApproxScalar& app)-> std::ostream& { 111 | out << app._val; 112 | return out; 113 | } 114 | private: 115 | const T& _val; 116 | double _eps; 117 | }; // struct ApproxScalar 118 | 119 | } // namespace detail 120 | 121 | /// factory function for iterable approximators 122 | template 123 | auto approx(const T& values 124 | , typename std::enable_if_t::value>* = nullptr 125 | ) 126 | { 127 | return detail::ApproxIterable(values); 128 | } 129 | 130 | /// factory function for scalar approximators 131 | template 132 | auto approx(const T& val 133 | , typename std::enable_if_t::value>* = nullptr 134 | ) 135 | { 136 | return detail::ApproxScalar(val); 137 | } 138 | 139 | } //namespace test 140 | -------------------------------------------------------------------------------- /src/vulkan_helpers.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "vulkan_helpers.h" 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace vuh { 12 | 13 | /// Device buffer owning its chunk of memory. 14 | template 15 | class Array { 16 | // Helper class to access to (host-visible!!!) device memory from the host. 17 | // Unmapping memory is not necessary. 18 | struct BufferHostView { 19 | using ptr_type = T*; 20 | 21 | const vk::Device device; 22 | const vk::DeviceMemory devMemory; 23 | const ptr_type data; ///< points to the first element 24 | const size_t size; ///< number of elements 25 | 26 | /// Constructor 27 | explicit BufferHostView(vk::Device device, vk::DeviceMemory devMem 28 | , size_t nelements ///< number of elements 29 | ) 30 | : device(device), devMemory(devMem) 31 | , data(ptr_type(device.mapMemory(devMem, 0, nelements*sizeof(T)))) 32 | , size(nelements) 33 | {} 34 | 35 | auto begin()-> ptr_type { return data; } 36 | auto end()-> ptr_type { return data + size; } 37 | }; // BufferHostView 38 | 39 | private: 40 | vk::Buffer _buf; ///< device buffer 41 | vk::DeviceMemory _mem; ///< associated chunk of device memorys 42 | vk::PhysicalDevice _physdev; ///< physical device owning the memory 43 | std::unique_ptr _dev; ///< pointer to logical device. no real ownership, just to provide value semantics to the class. 44 | vk::MemoryPropertyFlags _flags; ///< Actual flags of allocated memory. Can be a superset of requested flags. 45 | size_t _size; ///< number of elements. actual allocated memory may be a bit bigger than necessary. 46 | public: 47 | using value_type = T; 48 | 49 | Array(Array&&) = default; 50 | auto operator=(Array&&)-> Array& = default; 51 | 52 | /// Constructor 53 | explicit Array(const vk::Device& device, const vk::PhysicalDevice& physDevice 54 | , uint32_t n_elements ///< number of elements of corresponding type 55 | , vk::MemoryPropertyFlags properties=vk::MemoryPropertyFlagBits::eDeviceLocal 56 | , vk::BufferUsageFlags usage=vk::BufferUsageFlagBits::eStorageBuffer 57 | ) 58 | : Array(device, physDevice 59 | , createBuffer(device, n_elements*sizeof(T), update_usage(physDevice, properties, usage)) 60 | , properties, n_elements) 61 | {} 62 | 63 | /// Destructor 64 | ~Array() noexcept { 65 | if(_dev){ 66 | _dev->freeMemory(_mem); 67 | _dev->destroyBuffer(_buf); 68 | _dev.release(); 69 | } 70 | } 71 | 72 | template 73 | static auto fromHost(C&& c, const vk::Device& device, const vk::PhysicalDevice& physDev 74 | , vk::MemoryPropertyFlags properties=vk::MemoryPropertyFlagBits::eDeviceLocal 75 | , vk::BufferUsageFlags usage=vk::BufferUsageFlagBits::eStorageBuffer 76 | )-> Array 77 | { 78 | auto r = Array(device, physDev, uint32_t(c.size()), properties, usage); 79 | if(r._flags & vk::MemoryPropertyFlagBits::eHostVisible){ // memory is host-visible 80 | std::copy(begin(c), end(c), r.host_view().data); 81 | } else { // memory is not host visible, use staging buffer 82 | auto stage_buf = fromHost(std::forward(c), device, physDev 83 | , vk::MemoryPropertyFlagBits::eHostVisible 84 | , vk::BufferUsageFlagBits::eTransferSrc); 85 | copyBuf(stage_buf, r, stage_buf.size()*sizeof(T), device, physDev); 86 | } 87 | return r; 88 | } 89 | 90 | operator vk::Buffer& () { return *reinterpret_cast(this + offsetof(Array, _buf)); } 91 | operator const vk::Buffer& () const { return *reinterpret_cast(this + offsetof(Array, _buf)); } 92 | 93 | /// @return number of items in the buffer 94 | auto size() const-> size_t { 95 | return _size; 96 | } 97 | 98 | template 99 | auto to_host(C& c)-> void { 100 | if(_flags & vk::MemoryPropertyFlagBits::eHostVisible){ // memory IS host visible 101 | auto hv = host_view(); 102 | c.resize(size()); 103 | std::copy(std::begin(hv), std::end(hv), c.data()); 104 | } else { // memory is not host visible, use staging buffer 105 | // copy device memory to staging buffer 106 | auto stage_buf = Array(*_dev, _physdev, size() 107 | , vk::MemoryPropertyFlagBits::eHostVisible 108 | , vk::BufferUsageFlagBits::eTransferDst); 109 | copyBuf(_buf, stage_buf, size()*sizeof(T), *_dev, _physdev); 110 | stage_buf.to_host(c); // copy from staging buffer to host 111 | } 112 | } 113 | 114 | private: // helpers 115 | /// 116 | auto host_view()-> BufferHostView { return BufferHostView(*_dev, _mem, size()); } 117 | 118 | /// Helper constructor 119 | explicit Array(const vk::Device& device, const vk::PhysicalDevice& physDevice 120 | , vk::Buffer buffer 121 | , vk::MemoryPropertyFlags properties 122 | , size_t size 123 | ) 124 | : Array(device, physDevice, buffer, size 125 | , selectMemory(physDevice, device, buffer, properties)) 126 | {} 127 | 128 | /// Helper constructor. This one does the actual construction and binding. 129 | explicit Array(const vk::Device& device, const vk::PhysicalDevice& physDevice 130 | , vk::Buffer buf, size_t size 131 | , uint32_t memory_id) 132 | : _buf(buf) 133 | , _mem(allocMemory(physDevice, device, buf, memory_id)) 134 | , _physdev(physDevice) 135 | , _dev(&device) 136 | , _flags(physDevice.getMemoryProperties().memoryTypes[memory_id].propertyFlags) 137 | , _size(size) 138 | { 139 | device.bindBufferMemory(buf, _mem, 0); 140 | } 141 | 142 | /// crutch to modify buffer usage 143 | auto update_usage(const vk::PhysicalDevice& physDevice 144 | , vk::MemoryPropertyFlags properties 145 | , vk::BufferUsageFlags usage 146 | )-> vk::BufferUsageFlags 147 | { 148 | if(physDevice.getProperties().deviceType == vk::PhysicalDeviceType::eDiscreteGpu 149 | && properties == vk::MemoryPropertyFlagBits::eDeviceLocal 150 | && usage == vk::BufferUsageFlagBits::eStorageBuffer) 151 | { 152 | usage |= vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst; 153 | } 154 | return usage; 155 | } 156 | }; // Array 157 | 158 | } // namespace vuh 159 | -------------------------------------------------------------------------------- /src/vulkan_helpers.cpp: -------------------------------------------------------------------------------- 1 | #include "vulkan_helpers.h" 2 | #include "vulkan_helpers.hpp" 3 | 4 | #include 5 | #include 6 | 7 | using std::begin; 8 | using std::end; 9 | #define ALL(x) begin(x), end(x) 10 | #define ARR_VIEW(x) uint32_t(x.size()), x.data() 11 | 12 | namespace vuh { 13 | 14 | VKAPI_ATTR VkBool32 VKAPI_CALL debugReporter( 15 | VkDebugReportFlagsEXT , VkDebugReportObjectTypeEXT, uint64_t, size_t, int32_t 16 | , const char* pLayerPrefix 17 | , const char* pMessage 18 | , void* /*pUserData*/ 19 | ){ 20 | std::cerr << "[WARNING]: Vulkan says: " << pLayerPrefix << ": " << pMessage << "\n"; 21 | return VK_FALSE; 22 | } 23 | 24 | /// Read binary shader file into array of uint32_t. little endian assumed. 25 | /// Padded by 0s to a boundary of 4. 26 | auto readShaderSrc(const char* filename)-> std::vector { 27 | auto fin = std::ifstream(filename, std::ios::binary); 28 | if(!fin.is_open()){ 29 | throw std::runtime_error(std::string("could not open file ") + filename); 30 | } 31 | auto ret = std::vector(std::istreambuf_iterator(fin), std::istreambuf_iterator()); 32 | 33 | ret.resize(4*div_up(ret.size(), size_t(4))); 34 | return ret; 35 | } 36 | 37 | /// create shader module, reading spir-v from a file 38 | auto loadShader(const vk::Device& device, const char* filename 39 | , vk::ShaderModuleCreateFlags flags 40 | )-> vk::ShaderModule 41 | { 42 | auto code = readShaderSrc(filename); 43 | auto shaderCI = vk::ShaderModuleCreateInfo(flags, code.size() 44 | , reinterpret_cast(code.data())); 45 | return device.createShaderModule(shaderCI); 46 | } 47 | 48 | /// filter list of desired extensions to include only those supported by current Vulkan instance 49 | auto enabledExtensions(const std::vector& extensions)-> std::vector { 50 | auto ret = std::vector{}; 51 | auto instanceExtensions = vk::enumerateInstanceExtensionProperties(); 52 | for(auto e: extensions){ 53 | auto it = std::find_if(ALL(instanceExtensions) 54 | , [=](auto& p){ return strcmp(p.extensionName, e);}); 55 | if(it != end(instanceExtensions)){ 56 | ret.push_back(e); 57 | } else { 58 | std::cerr << "[WARNING]: extension " << e << " is not found" "\n"; 59 | } 60 | } 61 | return ret; 62 | } 63 | 64 | /// filter list of desired extensions to include only those supported by current Vulkan instance 65 | auto enabledLayers(const std::vector& layers)-> std::vector { 66 | auto ret = std::vector{}; 67 | auto instanceLayers = vk::enumerateInstanceLayerProperties(); 68 | for(auto l: layers){ 69 | auto it = std::find_if(ALL(instanceLayers) 70 | , [=](auto& p){ return strcmp(p.layerName, l);}); 71 | if(it != end(instanceLayers)){ 72 | ret.push_back(l); 73 | } else { 74 | std::cerr << "[WARNING] layer " << l << " is not found" "\n"; 75 | } 76 | } 77 | return ret; 78 | } 79 | 80 | /// Register a callback function for the extension VK_EXT_DEBUG_REPORT_EXTENSION_NAME, 81 | /// so that warnings emitted from the validation layer are actually printed. 82 | auto registerValidationReporter(const vk::Instance& instance, PFN_vkDebugReportCallbackEXT reporter 83 | )-> VkDebugReportCallbackEXT 84 | { 85 | auto ret = VkDebugReportCallbackEXT(nullptr); 86 | auto createInfo = VkDebugReportCallbackCreateInfoEXT{}; 87 | createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT; 88 | createInfo.flags = VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT 89 | | VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT; 90 | createInfo.pfnCallback = reporter; 91 | 92 | // We have to explicitly load this function 93 | auto createFN = PFN_vkCreateDebugReportCallbackEXT( 94 | instance.getProcAddr("vkCreateDebugReportCallbackEXT")); 95 | if(createFN){ 96 | createFN(instance, &createInfo, nullptr, &ret); 97 | } else { 98 | std::cerr << "Could not load vkCreateDebugReportCallbackEXT\n"; 99 | } 100 | return ret; 101 | } 102 | 103 | /// create logical device to interact with the physical one 104 | auto createDevice(const vk::PhysicalDevice& physicalDevice, const std::vector& layers 105 | , uint32_t queueFamilyID 106 | )-> vk::Device 107 | { 108 | // When creating the device specify what queues it has 109 | auto p = float(1.0); // queue priority 110 | auto queueCI = vk::DeviceQueueCreateInfo(vk::DeviceQueueCreateFlags(), queueFamilyID, 1, &p); 111 | auto devCI = vk::DeviceCreateInfo(vk::DeviceCreateFlags(), 1, &queueCI, ARR_VIEW(layers)); 112 | 113 | return physicalDevice.createDevice(devCI, nullptr); 114 | } 115 | 116 | /// Create buffer on a device. Does NOT allocate memory. 117 | auto createBuffer(const vk::Device& device, uint32_t bufSize 118 | , vk::BufferUsageFlags usage 119 | )-> vk::Buffer 120 | { 121 | auto bufferCI = vk::BufferCreateInfo(vk::BufferCreateFlags(), bufSize, usage); 122 | return device.createBuffer(bufferCI); 123 | } 124 | 125 | /// @return the index of a queue family that supports compute operations. 126 | /// Groups of queues that have the same capabilities (for instance, they all supports graphics 127 | /// and computer operations), are grouped into queue families. 128 | /// When submitting a command buffer, you must specify to which queue in the family you are submitting to. 129 | auto getComputeQueueFamilyId(const vk::PhysicalDevice& physicalDevice)-> uint32_t { 130 | auto queueFamilies = physicalDevice.getQueueFamilyProperties(); 131 | 132 | // prefer using compute-only queue 133 | auto queue_it = std::find_if(ALL(queueFamilies), [](auto& f){ 134 | auto maskedFlags = ~vk::QueueFlagBits::eSparseBinding & f.queueFlags; // ignore sparse binding flag 135 | return 0 < f.queueCount // queue family does have some queues in it 136 | && (vk::QueueFlagBits::eCompute & maskedFlags) 137 | && !(vk::QueueFlagBits::eGraphics & maskedFlags); 138 | }); 139 | if(queue_it != end(queueFamilies)){ 140 | return uint32_t(std::distance(begin(queueFamilies), queue_it)); 141 | } 142 | 143 | // otherwise use any queue that has compute flag set 144 | queue_it = std::find_if(ALL(queueFamilies), [](auto& f){ 145 | auto maskedFlags = ~vk::QueueFlagBits::eSparseBinding & f.queueFlags; 146 | return 0 < f.queueCount && (vk::QueueFlagBits::eCompute & maskedFlags); 147 | }); 148 | if(queue_it != end(queueFamilies)){ 149 | return uint32_t(std::distance(begin(queueFamilies), queue_it)); 150 | } 151 | 152 | throw std::runtime_error("could not find a queue family that supports compute operations"); 153 | } 154 | 155 | /// Select memory with desired properties. 156 | /// @return id of the suitable memory, -1 if no suitable memory found. 157 | auto selectMemory(const vk::PhysicalDevice& physDev 158 | , const vk::Device& device 159 | , const vk::Buffer& buf 160 | , const vk::MemoryPropertyFlags properties ///< desired memory properties 161 | )-> uint32_t 162 | { 163 | auto memProperties = physDev.getMemoryProperties(); 164 | auto memoryReqs = device.getBufferMemoryRequirements(buf); 165 | for(uint32_t i = 0; i < memProperties.memoryTypeCount; ++i){ 166 | if( (memoryReqs.memoryTypeBits & (1u << i)) 167 | && ((properties & memProperties.memoryTypes[i].propertyFlags) == properties)) 168 | { 169 | return i; 170 | } 171 | } 172 | throw std::runtime_error("failed to select memory with required properties"); 173 | } 174 | 175 | auto allocMemory(const vk::PhysicalDevice& physDev, const vk::Device& device 176 | , const vk::Buffer& buf 177 | , uint32_t memory_id 178 | )-> vk::DeviceMemory 179 | { 180 | auto memoryReqs = device.getBufferMemoryRequirements(buf); 181 | auto allocInfo = vk::MemoryAllocateInfo(memoryReqs.size, memory_id); 182 | return device.allocateMemory(allocInfo); 183 | } 184 | 185 | /// Copy device buffers using the transient command pool. 186 | /// Fully sync, no latency hiding whatsoever. 187 | auto copyBuf(const vk::Buffer& src, vk::Buffer& dst, const uint32_t size 188 | , const vk::Device& device, const vk::PhysicalDevice& physDev)-> void 189 | { 190 | const auto qf_id = getComputeQueueFamilyId(physDev); // queue family id, TODO: use transfer queue 191 | auto cmd_pool = device.createCommandPool({vk::CommandPoolCreateFlagBits::eTransient, qf_id}); 192 | auto cmd_buf = device.allocateCommandBuffers({cmd_pool, vk::CommandBufferLevel::ePrimary, 1})[0]; 193 | cmd_buf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); 194 | auto region = vk::BufferCopy(0, 0, size); 195 | cmd_buf.copyBuffer(src, dst, 1, ®ion); 196 | cmd_buf.end(); 197 | auto queue = device.getQueue(qf_id, 0); 198 | auto submit_info = vk::SubmitInfo(0, nullptr, nullptr, 1, &cmd_buf); 199 | queue.submit({submit_info}, nullptr); 200 | queue.waitIdle(); 201 | device.freeCommandBuffers(cmd_pool, 1, &cmd_buf); 202 | device.destroyCommandPool(cmd_pool); 203 | } 204 | 205 | } // namespace vuh 206 | -------------------------------------------------------------------------------- /src/example_filter.cpp: -------------------------------------------------------------------------------- 1 | #include "example_filter.h" 2 | 3 | #include "vulkan_helpers.hpp" 4 | 5 | #include 6 | 7 | #define ARR_VIEW(x) uint32_t(x.size()), x.data() 8 | #define ST_VIEW(s) uint32_t(sizeof(s)), &s 9 | 10 | using namespace vuh; 11 | namespace { 12 | constexpr uint32_t WORKGROUP_SIZE = 16; ///< compute shader workgroup dimension is WORKGROUP_SIZE x WORKGROUP_SIZE 13 | 14 | #ifdef NDEBUG 15 | constexpr bool enableValidation = false; 16 | #else 17 | constexpr bool enableValidation = true; 18 | #endif 19 | } // namespace 20 | 21 | 22 | /// Constructor 23 | ExampleFilter::ExampleFilter(const std::string& shaderPath){ 24 | auto layers = enableValidation ? enabledLayers({"VK_LAYER_LUNARG_standard_validation"}) 25 | : std::vector{}; 26 | auto extensions = enableValidation ? enabledExtensions({VK_EXT_DEBUG_REPORT_EXTENSION_NAME}) 27 | : std::vector{}; 28 | instance = createInstance(layers, extensions); 29 | debugReportCallback = enableValidation ? registerValidationReporter(instance, debugReporter) 30 | : nullptr; 31 | physDevice = instance.enumeratePhysicalDevices()[0]; // just use the first device 32 | compute_queue_familly_id = getComputeQueueFamilyId(physDevice); 33 | device = createDevice(physDevice, layers, compute_queue_familly_id); // TODO: when physical device is a discrete gpu, transfer queue needs to be included 34 | shader = loadShader(device, shaderPath.c_str()); 35 | 36 | dscLayout = createDescriptorSetLayout(device); 37 | dscPool = allocDescriptorPool(device); 38 | auto commandPoolCI = vk::CommandPoolCreateInfo(vk::CommandPoolCreateFlags(), compute_queue_familly_id); 39 | cmdPool = device.createCommandPool(commandPoolCI); 40 | pipeCache = device.createPipelineCache(vk::PipelineCacheCreateInfo()); 41 | pipeLayout = createPipelineLayout(device, dscLayout); 42 | 43 | pipe = createComputePipeline(device, shader, pipeLayout, pipeCache); 44 | cmdBuffer = vk::CommandBuffer{}; 45 | } 46 | 47 | /// Destructor 48 | ExampleFilter::~ExampleFilter() noexcept { 49 | device.destroyPipeline(pipe); 50 | device.destroyPipelineLayout(pipeLayout); 51 | device.destroyPipelineCache(pipeCache); 52 | device.destroyCommandPool(cmdPool); 53 | device.destroyDescriptorPool(dscPool); 54 | device.destroyDescriptorSetLayout(dscLayout); 55 | device.destroyShaderModule(shader); 56 | device.destroy(); 57 | 58 | if(debugReportCallback){ 59 | // unregister callback. 60 | auto destroyFn = PFN_vkDestroyDebugReportCallbackEXT( 61 | vkGetInstanceProcAddr(instance, "vkDestroyDebugReportCallbackEXT")); 62 | if(destroyFn){ 63 | destroyFn(instance, debugReportCallback, nullptr); 64 | } else { 65 | std::cerr << "Could not load vkDestroyDebugReportCallbackEXT\n"; 66 | } 67 | } 68 | 69 | instance.destroy(); 70 | } 71 | 72 | /// 73 | auto ExampleFilter::bindParameters(vk::Buffer& out, const vk::Buffer& in 74 | , const ExampleFilter::PushParams& p 75 | ) const-> void 76 | { 77 | auto dscSet = createDescriptorSet(device, dscPool, dscLayout, out, in, p.width*p.height); 78 | cmdBuffer = createCommandBuffer(device, cmdPool, pipe, pipeLayout, dscSet, p); 79 | } 80 | 81 | /// 82 | auto ExampleFilter::unbindParameters() const-> void 83 | { 84 | device.destroyDescriptorPool(dscPool); 85 | device.resetCommandPool(cmdPool, vk::CommandPoolResetFlags()); 86 | dscPool = allocDescriptorPool(device); 87 | } 88 | 89 | /// run (sync) the filter on previously bound parameters 90 | auto ExampleFilter::run() const-> void { 91 | assert(cmdBuffer != vk::CommandBuffer{}); // TODO: this should be a check for a valid command buffer 92 | auto submitInfo = vk::SubmitInfo(0, nullptr, nullptr, 1, &cmdBuffer); // submit a single command buffer 93 | 94 | // submit the command buffer to the queue and set up a fence. 95 | auto queue = device.getQueue(compute_queue_familly_id, 0); // 0 is the queue index in the family, by default just the first one is used 96 | auto fence = device.createFence(vk::FenceCreateInfo()); // fence makes sure the control is not returned to CPU till command buffer is depleted 97 | queue.submit({submitInfo}, fence); 98 | device.waitForFences({fence}, true, uint64_t(-1)); // wait for the fence indefinitely 99 | device.destroyFence(fence); 100 | } 101 | 102 | /// run (sync) the filter 103 | auto ExampleFilter::operator()(vk::Buffer& out, const vk::Buffer& in 104 | , const ExampleFilter::PushParams& p 105 | ) const-> void 106 | { 107 | bindParameters(out, in, p); 108 | run(); 109 | unbindParameters(); 110 | } 111 | 112 | /// Create vulkan Instance with app specific parameters. 113 | auto ExampleFilter::createInstance(const std::vector layers 114 | , const std::vector extensions 115 | )-> vk::Instance 116 | { 117 | auto appInfo = vk::ApplicationInfo("Example Filter", 0, "no_engine" 118 | , 0, VK_API_VERSION_1_0); // The only important field here is apiVersion 119 | auto createInfo = vk::InstanceCreateInfo(vk::InstanceCreateFlags(), &appInfo 120 | , ARR_VIEW(layers), ARR_VIEW(extensions)); 121 | return vk::createInstance(createInfo); 122 | } 123 | 124 | /// Specify a descriptor set layout (number and types of descriptors). 125 | auto ExampleFilter::createDescriptorSetLayout(const vk::Device& device)-> vk::DescriptorSetLayout { 126 | auto bindLayout = std::array{{ 127 | {0, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute} 128 | ,{1, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute} 129 | }}; 130 | auto layoutCI = vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags() 131 | , ARR_VIEW(bindLayout)); 132 | return device.createDescriptorSetLayout(layoutCI); 133 | } 134 | 135 | /// Allocate descriptor pool for a descriptors to all storage buffer in use 136 | auto ExampleFilter::allocDescriptorPool(const vk::Device& device)-> vk::DescriptorPool { 137 | auto descriptorPoolSize = vk::DescriptorPoolSize(vk::DescriptorType::eStorageBuffer, NumDescriptors); 138 | auto descriptorPoolCI = vk::DescriptorPoolCreateInfo(vk::DescriptorPoolCreateFlags(), 1 139 | , 1, &descriptorPoolSize); 140 | return device.createDescriptorPool(descriptorPoolCI); 141 | } 142 | 143 | /// Pipeline layout defines shader interface as a set of layout bindings and push constants. 144 | auto ExampleFilter::createPipelineLayout(const vk::Device& device 145 | , const vk::DescriptorSetLayout& dscLayout 146 | )-> vk::PipelineLayout 147 | { 148 | auto pushConstantsRange = vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute 149 | , 0, sizeof(PushParams)); 150 | auto pipelineLayoutCI = vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags() 151 | , 1, &dscLayout, 1, &pushConstantsRange); 152 | return device.createPipelineLayout(pipelineLayoutCI); 153 | } 154 | 155 | /// Create compute pipeline consisting of a single stage with compute shader. 156 | /// Specialization constants specialized here. 157 | auto ExampleFilter::createComputePipeline(const vk::Device& device, const vk::ShaderModule& shader 158 | , const vk::PipelineLayout& pipeLayout 159 | , const vk::PipelineCache& cache 160 | )-> vk::Pipeline 161 | { 162 | // specialize constants of the shader 163 | auto specEntries = std::array{ 164 | {{0, 0, sizeof(int)}, {1, 1*sizeof(int), sizeof(int)}} 165 | }; 166 | auto specValues = std::array{WORKGROUP_SIZE, WORKGROUP_SIZE}; 167 | auto specInfo = vk::SpecializationInfo(ARR_VIEW(specEntries) 168 | , specValues.size()*sizeof(int), specValues.data()); 169 | 170 | // Specify the compute shader stage, and it's entry point (main), and specializations 171 | auto stageCI = vk::PipelineShaderStageCreateInfo(vk::PipelineShaderStageCreateFlags() 172 | , vk::ShaderStageFlagBits::eCompute 173 | , shader, "main", &specInfo); 174 | auto pipelineCI = vk::ComputePipelineCreateInfo(vk::PipelineCreateFlags() 175 | , stageCI, pipeLayout); 176 | return device.createComputePipeline(cache, pipelineCI, nullptr); 177 | } 178 | 179 | /// Create descriptor set. Actually associate buffers to binding points in bindLayout. 180 | /// Buffer sizes are specified here as well. 181 | auto ExampleFilter::createDescriptorSet(const vk::Device& device, const vk::DescriptorPool& pool 182 | , const vk::DescriptorSetLayout& layout 183 | , vk::Buffer& out, const vk::Buffer& in, uint32_t size 184 | )-> vk::DescriptorSet 185 | { 186 | auto descriptorSetAI = vk::DescriptorSetAllocateInfo(pool, 1, &layout); 187 | auto descriptorSet = device.allocateDescriptorSets(descriptorSetAI)[0]; 188 | 189 | auto outInfo = vk::DescriptorBufferInfo(out, 0, sizeof(float)*size); 190 | auto inInfo = vk::DescriptorBufferInfo(in, 0, sizeof(float)*size); 191 | 192 | auto writeDsSets = std::array{{ 193 | {descriptorSet, 0, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &outInfo} 194 | ,{descriptorSet, 1, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &inInfo} 195 | }}; 196 | 197 | device.updateDescriptorSets(writeDsSets, {}); 198 | return descriptorSet; 199 | } 200 | 201 | /// Create command buffer, push the push constants, bind descriptors and define the work batch size. 202 | /// All command buffers allocated from given command pool must be submitted to queues of corresponding 203 | /// family ONLY. 204 | auto ExampleFilter::createCommandBuffer(const vk::Device& device, const vk::CommandPool& cmdPool 205 | , const vk::Pipeline& pipeline 206 | , const vk::PipelineLayout& pipeLayout 207 | , const vk::DescriptorSet& dscSet 208 | , const ExampleFilter::PushParams& p 209 | )-> vk::CommandBuffer 210 | { 211 | // allocate a command buffer from the command pool. 212 | auto commandBufferAI = vk::CommandBufferAllocateInfo(cmdPool, vk::CommandBufferLevel::ePrimary, 1); 213 | auto commandBuffer = device.allocateCommandBuffers(commandBufferAI)[0]; 214 | 215 | // Start recording commands into the newly allocated command buffer. 216 | // auto beginInfo = vk::CommandBufferBeginInfo(vk::CommandBufferUsageFlagBits::eOneTimeSubmit); // buffer is only submitted and used once 217 | auto beginInfo = vk::CommandBufferBeginInfo(); 218 | commandBuffer.begin(beginInfo); 219 | 220 | // Before dispatch bind a pipeline, AND a descriptor set. 221 | // The validation layer will NOT give warnings if you forget those. 222 | commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline); 223 | commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, pipeLayout 224 | , 0, {dscSet}, {}); 225 | 226 | commandBuffer.pushConstants(pipeLayout, vk::ShaderStageFlagBits::eCompute, 0, ST_VIEW(p)); 227 | 228 | // Start the compute pipeline, and execute the compute shader. 229 | // The number of workgroups is specified in the arguments. 230 | commandBuffer.dispatch(div_up(p.width, WORKGROUP_SIZE), div_up(p.height, WORKGROUP_SIZE), 1); 231 | commandBuffer.end(); // end recording commands 232 | return commandBuffer; 233 | } 234 | --------------------------------------------------------------------------------