├── .clang-format ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── imported ├── CMakeLists.txt └── nuget.cmake ├── include ├── Application.h ├── Device.h ├── ShaderCompiler.h ├── Swapchain.h ├── Window.h └── WorkGraph.h ├── license.txt ├── readme.md ├── src ├── Application.cpp ├── Device.cpp ├── ShaderCompiler.cpp ├── Swapchain.cpp ├── Window.cpp ├── WorkGraph.cpp └── main.cpp └── tutorials ├── Common.h ├── tutorial-0 ├── HelloWorkGraphs.hlsl └── screenshot.png ├── tutorial-1 ├── Records.hlsl ├── RecordsSolution.hlsl └── screenshot.png ├── tutorial-2 ├── NodeLaunches.hlsl ├── NodeLaunchesSolution.hlsl └── screenshot.png ├── tutorial-3 ├── MaterialShading.hlsl ├── MaterialShadingSolution.hlsl ├── Scene.h └── screenshot.png ├── tutorial-4 ├── Recursion.hlsl ├── RecursionSolution.hlsl └── screenshot.png ├── tutorial-5 ├── Synchronization.hlsl ├── SynchronizationSolution.hlsl └── screenshot.png └── tutorial-6 ├── Mandelbrot.h ├── RecursiveGrid.hlsl ├── RecursiveGridSolution.hlsl └── screenshot.png /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: Google 3 | ColumnLimit: 120 4 | IndentWidth: 4 5 | AlignAfterOpenBracket: Align 6 | BreakBeforeBraces: Custom 7 | BraceWrapping: 8 | AfterCaseLabel: false 9 | AfterUnion: false 10 | AfterStruct: false 11 | SplitEmptyFunction: true 12 | AfterNamespace: false 13 | AfterClass: false 14 | AfterFunction: true 15 | AfterControlStatement: MultiLine 16 | BeforeElse: false 17 | AllowShortIfStatementsOnASingleLine: false 18 | IndentCaseLabels: false 19 | DerivePointerAlignment: false 20 | PointerAlignment: Left 21 | AlignTrailingComments: true 22 | AllowShortBlocksOnASingleLine: false 23 | AlignConsecutiveAssignments: AcrossComments 24 | AlignConsecutiveDeclarations: AcrossComments 25 | AlignConsecutiveBitFields: AcrossComments 26 | AlignOperands: true 27 | AllowAllParametersOfDeclarationOnNextLine: false 28 | AllowAllArgumentsOnNextLine: true 29 | AllowShortCaseLabelsOnASingleLine: false 30 | BinPackArguments: false 31 | BinPackParameters: false 32 | AccessModifierOffset: -4 33 | ReflowComments: true 34 | SortIncludes: true 35 | TabWidth: 4 36 | AlwaysBreakTemplateDeclarations: Yes 37 | AllowShortFunctionsOnASingleLine: Empty 38 | UseTab: Never 39 | NamespaceIndentation: All 40 | --- 41 | Language: Cpp -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # ignore thumbnails created by Windows 2 | Thumbs.db 3 | 4 | # build folder 5 | build/ 6 | 7 | # vscode related files 8 | .vscode/ 9 | *.code-workspace 10 | 11 | # UI window placements 12 | **/imgui.ini* 13 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "imported/imgui"] 2 | path = imported/imgui 3 | url = https://github.com/ocornut/imgui 4 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # This file is part of the AMD & HSC Work Graph Playground. 2 | # 3 | # Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | # All rights reserved. 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files(the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions : 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | # THE SOFTWARE. 23 | 24 | cmake_minimum_required(VERSION 3.17) 25 | project(WorkGraphPlayground VERSION 0.1) 26 | 27 | set(CMAKE_CXX_STANDARD 20) 28 | set(CMAKE_CXX_STANDARD_REQUIRED True) 29 | set(CMAKE_POSITION_INDEPENDENT_CODE ON) 30 | 31 | set(CMAKE_EXECUTABLE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 32 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 33 | 34 | add_compile_definitions(UNICODE _UNICODE) 35 | 36 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 37 | 38 | add_subdirectory(imported) 39 | 40 | 41 | file(GLOB PROJECT_SOURCE_FILES 42 | ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h 43 | ${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp) 44 | file(GLOB_RECURSE PROJECT_SHADER_FILES 45 | ${CMAKE_CURRENT_SOURCE_DIR}/tutorials/*.md 46 | ${CMAKE_CURRENT_SOURCE_DIR}/tutorials/*.png 47 | ${CMAKE_CURRENT_SOURCE_DIR}/tutorials/*.h 48 | ${CMAKE_CURRENT_SOURCE_DIR}/tutorials/*.hlsl) 49 | set_source_files_properties(${PROJECT_SHADER_FILES} PROPERTIES VS_TOOL_OVERRIDE "Text") 50 | 51 | add_executable(${PROJECT_NAME} ${PROJECT_SOURCE_FILES} ${PROJECT_SHADER_FILES}) 52 | target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) 53 | target_link_libraries(${PROJECT_NAME} PRIVATE 54 | Microsoft.Direct3D.D3D12 55 | Microsoft.Direct3D.DXC 56 | Microsoft.Direct3D.WARP 57 | d3d12 58 | dxcompiler 59 | dxgi 60 | dxguid 61 | imgui) 62 | 63 | set_target_properties(${PROJECT_NAME} PROPERTIES 64 | VS_DPI_AWARE "PerMonitor" 65 | VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$") 66 | set_property(DIRECTORY "." PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME}) 67 | 68 | # set source group for shader files & link to bin folder 69 | set(SHADER_BASE_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tutorials) 70 | foreach(SHADER ${PROJECT_SHADER_FILES}) 71 | get_filename_component(SHADER_FILE_DIRECTORY ${SHADER} DIRECTORY) 72 | get_filename_component(SHADER_FILE_NAME ${SHADER} NAME) 73 | file(RELATIVE_PATH SHADER_FILE_DIRECTORY_RELATIVE_PATH ${SHADER_BASE_DIRECTORY} ${SHADER_FILE_DIRECTORY}) 74 | 75 | if ("${SHADER_FILE_DIRECTORY_RELATIVE_PATH}" STREQUAL "") 76 | source_group("Shader Source Files" FILES ${SHADER}) 77 | else() 78 | source_group("Shader Source Files/${SHADER_FILE_DIRECTORY_RELATIVE_PATH}" FILES ${SHADER}) 79 | endif() 80 | 81 | # to enable shader hot-reloading, instead of copying the shaders to the bin output folder at the end of the build, 82 | # we create hardlinks between a file in the bin folder and the shader source file. 83 | # This way, updates to the shader source file are automatically propagated to the bin folder, 84 | # and - unlike symlinks - the hardlinks allow copying/moving/compressing the bin folder without having broken links. 85 | 86 | # create parent folder 87 | add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 88 | COMMAND ${CMAKE_COMMAND} -E make_directory 89 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$/tutorials/${SHADER_FILE_DIRECTORY_RELATIVE_PATH}) 90 | # create hardlink 91 | add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD 92 | COMMAND ${CMAKE_COMMAND} -E create_hardlink 93 | ${SHADER} 94 | ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$/tutorials/${SHADER_FILE_DIRECTORY_RELATIVE_PATH}/${SHADER_FILE_NAME}) 95 | endforeach() 96 | -------------------------------------------------------------------------------- /imported/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # This file is part of the AMD & HSC Work Graph Playground. 2 | # 3 | # Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | # All rights reserved. 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files(the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions : 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | # THE SOFTWARE. 23 | 24 | include(nuget.cmake) 25 | 26 | fetch_nuget_package( 27 | PACKAGE Microsoft.Direct3D.D3D12 28 | VERSION 1.613.3 29 | ) 30 | fetch_nuget_package( 31 | PACKAGE Microsoft.Direct3D.DXC 32 | VERSION 1.8.2403.18 33 | ) 34 | fetch_nuget_package( 35 | PACKAGE Microsoft.Direct3D.WARP 36 | VERSION 1.0.13 37 | ) 38 | 39 | file(GLOB IMGUI_SOURCES 40 | ${CMAKE_CURRENT_SOURCE_DIR}/imgui/*.h 41 | ${CMAKE_CURRENT_SOURCE_DIR}/imgui/*.cpp 42 | ${CMAKE_CURRENT_SOURCE_DIR}/imgui/backends/*_dx12.h 43 | ${CMAKE_CURRENT_SOURCE_DIR}/imgui/backends/*_dx12.cpp 44 | ${CMAKE_CURRENT_SOURCE_DIR}/imgui/backends/*_win32.h 45 | ${CMAKE_CURRENT_SOURCE_DIR}/imgui/backends/*_win32.cpp) 46 | add_library(imgui STATIC ${IMGUI_SOURCES}) 47 | target_include_directories(imgui PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/imgui) -------------------------------------------------------------------------------- /imported/nuget.cmake: -------------------------------------------------------------------------------- 1 | # This file is part of the AMD & HSC Work Graph Playground. 2 | # 3 | # Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | # All rights reserved. 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files(the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions : 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | # THE SOFTWARE. 23 | 24 | function(fetch_nuget_package) 25 | set(options) 26 | set(oneValueArgs PACKAGE VERSION) 27 | set(multiValueArgs) 28 | cmake_parse_arguments(FETCH_NUGET_PACKAGE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 29 | 30 | if (NOT DEFINED FETCH_NUGET_PACKAGE_PACKAGE) 31 | message(FATAL_ERROR "Missing PACKAGE argument") 32 | endif() 33 | if (NOT DEFINED FETCH_NUGET_PACKAGE_VERSION) 34 | message(FATAL_ERROR "Missing VERSION argument") 35 | endif() 36 | 37 | set(DOWNLOAD_URL "https://www.nuget.org/api/v2/package/${FETCH_NUGET_PACKAGE_PACKAGE}/${FETCH_NUGET_PACKAGE_VERSION}") 38 | set(DOWNLOAD_FILE ${CMAKE_CURRENT_BINARY_DIR}/nuget/${FETCH_NUGET_PACKAGE_PACKAGE}.zip) 39 | set(PACKAGE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/nuget/${FETCH_NUGET_PACKAGE_PACKAGE}) 40 | 41 | if (NOT EXISTS ${DOWNLOAD_FILE}) 42 | message(STATUS "Downloading NuGet package \"${FETCH_NUGET_PACKAGE_PACKAGE}\" from \"${DOWNLOAD_URL}\".") 43 | 44 | file(DOWNLOAD "${DOWNLOAD_URL}" ${DOWNLOAD_FILE} STATUS DOWNLOAD_RESULT) 45 | 46 | list(GET DOWNLOAD_RESULT 0 DOWNLOAD_RESULT_CODE) 47 | if(NOT DOWNLOAD_RESULT_CODE EQUAL 0) 48 | message(FATAL_ERROR "Failed to download NuGet package \"${FETCH_NUGET_PACKAGE_PACKAGE}\" from \"${DOWNLOAD_URL}\". Error: ${DOWNLOAD_RESULT}.") 49 | endif() 50 | endif() 51 | 52 | file(ARCHIVE_EXTRACT 53 | INPUT ${DOWNLOAD_FILE} 54 | DESTINATION ${PACKAGE_DIRECTORY}) 55 | 56 | message(STATUS "Adding NuGet package \"${FETCH_NUGET_PACKAGE_PACKAGE}\".") 57 | 58 | add_library(${FETCH_NUGET_PACKAGE_PACKAGE} INTERFACE) 59 | target_include_directories(${FETCH_NUGET_PACKAGE_PACKAGE} INTERFACE ${PACKAGE_DIRECTORY}/build/native/include) 60 | 61 | file(GLOB PACKAGE_BIN_FILES 62 | ${PACKAGE_DIRECTORY}/build/native/bin/x64/*.exe 63 | ${PACKAGE_DIRECTORY}/build/native/bin/x64/*.dll 64 | ${PACKAGE_DIRECTORY}/build/native/bin/x64/*.pdb) 65 | 66 | foreach(PACKAGE_BIN_FILE ${PACKAGE_BIN_FILES}) 67 | get_filename_component(PACKAGE_BIN_FILE_NAME ${PACKAGE_BIN_FILE} NAME) 68 | message("Generating custom command for ${PACKAGE_BIN_FILE_NAME}") 69 | add_custom_command( 70 | OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$/${PACKAGE_BIN_FILE_NAME} 71 | PRE_BUILD 72 | COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$ 73 | COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PACKAGE_BIN_FILE} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$ 74 | MAIN_DEPENDENCY ${PACKAGE_BIN_FILE} 75 | COMMENT "Updating ${PACKAGE_BIN_FILE} into bin folder" 76 | ) 77 | list(APPEND COPY_FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/$/${PACKAGE_BIN_FILE_NAME}) 78 | endforeach() 79 | 80 | add_custom_target(${FETCH_NUGET_PACKAGE_PACKAGE}_copy DEPENDS "${COPY_FILES}") 81 | set_target_properties(${FETCH_NUGET_PACKAGE_PACKAGE}_copy PROPERTIES FOLDER CopyTargets) 82 | 83 | add_dependencies(${FETCH_NUGET_PACKAGE_PACKAGE} ${FETCH_NUGET_PACKAGE_PACKAGE}_copy) 84 | 85 | endfunction(fetch_nuget_package) 86 | -------------------------------------------------------------------------------- /include/Application.h: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #pragma once 25 | 26 | #include 27 | 28 | #include "Device.h" 29 | #include "ShaderCompiler.h" 30 | #include "Swapchain.h" 31 | #include "Window.h" 32 | #include "WorkGraph.h" 33 | 34 | class Application { 35 | public: 36 | struct Options { 37 | std::wstring title = L"Work Graph Playground"; 38 | std::uint32_t windowWidth = 1280; 39 | std::uint32_t windowHeight = 720; 40 | 41 | bool forceWarpAdapter = false; 42 | bool enableDebugLayer = false; 43 | bool enableGpuValidationLayer = false; 44 | }; 45 | 46 | Application(const Options& options); 47 | ~Application(); 48 | 49 | void Run(); 50 | 51 | static std::span GetTutorials(); 52 | 53 | private: 54 | void OnRender(ID3D12GraphicsCommandList10* commandList, const Swapchain::RenderTarget& renderTarget); 55 | void OnRenderUserInterface(ID3D12GraphicsCommandList10* commandList, const Swapchain::RenderTarget& renderTarget); 56 | void OnResize(std::uint32_t width, std::uint32_t height); 57 | 58 | void CreateImGuiContext(); 59 | void DestroyImGuiContext(); 60 | 61 | void CreateWorkGraphRootSignature(); 62 | // Creates work graph. Returns if creation was successful 63 | bool CreateWorkGraph(); 64 | 65 | // Util methods for shader resources 66 | void CreateResourceDescriptorHeaps(); 67 | void CreateWritableBackbuffer(std::uint32_t width, std::uint32_t height); 68 | void CreateScratchBuffer(); 69 | void CreatePersistentScratchBuffer(); 70 | void ClearShaderResources(ID3D12GraphicsCommandList10* commandList); 71 | 72 | void CreateFontBuffer(); 73 | 74 | std::unique_ptr window_; 75 | std::unique_ptr device_; 76 | std::unique_ptr swapchain_; 77 | 78 | bool vsync_ = true; 79 | 80 | // Descriptor heap for ImGui 81 | ComPtr uiDescriptorHeap_; 82 | 83 | // Descriptor heaps for shader resources 84 | ComPtr clearDescriptorHeap_; 85 | ComPtr resourceDescriptorHeap_; 86 | 87 | // Shader resources 88 | ComPtr writableBackbuffer_; 89 | ComPtr scratchBuffer_; 90 | ComPtr persistentScratchBuffer_; 91 | 92 | // Buffer resource containing font atlas 93 | ComPtr fontBuffer_; 94 | 95 | // Clear persistent scratch buffer after work graph switch 96 | bool clearPersistentScratchBuffer_ = true; 97 | 98 | // Timeout to show compilation error message 99 | std::chrono::high_resolution_clock::time_point errorMessageEndTime_ = std::chrono::high_resolution_clock::now(); 100 | // Start time of current tutorial. Delta to current time is available in the shader as "Time" 101 | std::chrono::high_resolution_clock::time_point startTime_ = std::chrono::high_resolution_clock::now(); 102 | 103 | // Work Graph resources 104 | ShaderCompiler shaderCompiler_; 105 | ComPtr workGraphRootSignature_; 106 | std::uint32_t workGraphTutorialIndex_ = 0; 107 | bool workGraphUseSampleSolution_ = false; 108 | std::unique_ptr workGraph_; 109 | }; -------------------------------------------------------------------------------- /include/Device.h: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #pragma once 25 | 26 | #include 27 | #include 28 | 29 | // Device.h is also the common header for all D3D12 & WRL headers 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | // List of annoying defines in Windows.h 36 | #undef min 37 | #undef max 38 | #undef CreateWindow 39 | #undef CreateFile 40 | #undef GetMessage 41 | #undef RGB 42 | #undef Yield 43 | 44 | // Helper for ComPtr 45 | using Microsoft::WRL::ComPtr; 46 | 47 | // Helpers for D3D12 methods 48 | void ThrowIfFailed(HRESULT hr); 49 | 50 | class Device { 51 | public: 52 | static constexpr std::uint32_t BufferedFramesCount = 3; 53 | 54 | Device(bool forceWarpAdapter, bool enableDebugLayer, bool enableGpuValidationLayer); 55 | 56 | void WaitForDevice(); 57 | 58 | ID3D12GraphicsCommandList10* GetNextFrameCommandList(); 59 | void ExecuteCurrentFrameCommandList(); 60 | 61 | IDXGIFactory4* GetDXGIFactory() const; 62 | ID3D12Device9* GetDevice() const; 63 | ID3D12CommandQueue* GetCommandQueue() const; 64 | 65 | const std::string& GetAdapterDescription() const; 66 | 67 | private: 68 | void CreateDXGIFactory(bool enableDebugLayer, bool enableGpuValidationLayer); 69 | ComPtr CreateDevice(IDXGIAdapter1* adapter) const; 70 | bool CheckDeviceFeatures(ID3D12Device9* device) const; 71 | void CreateDeviceResources(); 72 | 73 | void RegisterDebugMessageCallback(); 74 | 75 | ComPtr dxgiFactory_; 76 | 77 | std::string adapterDescription_ = "Unknown Adapter"; 78 | 79 | ComPtr device_; 80 | ComPtr commandQueue_; 81 | 82 | struct FrameContext { 83 | ComPtr commandAllocator; 84 | ComPtr commandList; 85 | std::uint64_t waitFenceValue = 0; 86 | }; 87 | 88 | std::array frameContexts_; 89 | std::uint32_t frameIndex_; 90 | 91 | ComPtr fence_; 92 | HANDLE fenceEvent_; 93 | std::uint64_t signaledFenceValue_ = 0; 94 | }; -------------------------------------------------------------------------------- /include/ShaderCompiler.h: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #pragma once 25 | 26 | #include "Device.h" 27 | 28 | // 29 | #include 30 | 31 | #include 32 | #include 33 | 34 | class ShaderCompiler { 35 | public: 36 | ShaderCompiler(); 37 | 38 | ComPtr CompileShader(const std::string& shaderFile, const wchar_t* target, const wchar_t* entryPoint); 39 | 40 | // Checks shader source files for updates/changes 41 | bool CheckShaderSourceFiles(); 42 | 43 | private: 44 | friend class FileTrackingIncludeHandler; 45 | 46 | std::filesystem::path GetShaderSourceFilePath(const std::string& shaderFile); 47 | std::filesystem::path GetShaderSourceFilePath(const std::wstring& shaderFile); 48 | 49 | ComPtr utils_; 50 | ComPtr compiler_; 51 | ComPtr includeHandler_; 52 | 53 | std::filesystem::path shaderFolderPath_; 54 | 55 | std::unordered_map trackedFiles_; 56 | }; -------------------------------------------------------------------------------- /include/Swapchain.h: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #pragma once 25 | 26 | #include "Device.h" 27 | #include "Window.h" 28 | 29 | class Swapchain { 30 | public: 31 | static constexpr std::uint32_t BackbufferCount = 3; 32 | static constexpr auto ColorTargetFormat = DXGI_FORMAT_R8G8B8A8_UNORM; 33 | static constexpr auto DepthTargetFormat = DXGI_FORMAT_D32_FLOAT; 34 | 35 | struct RenderTarget { 36 | ComPtr colorResource; 37 | D3D12_CPU_DESCRIPTOR_HANDLE colorDescriptorHandle; 38 | ComPtr depthResource; 39 | D3D12_CPU_DESCRIPTOR_HANDLE depthDescriptorHandle; 40 | }; 41 | 42 | Swapchain(const Device* device, const Window* window); 43 | 44 | RenderTarget GetNextRenderTarget(); 45 | void Present(bool vsync = true); 46 | 47 | void Resize(std::uint32_t width, std::uint32_t height); 48 | 49 | std::uint32_t GetWidth() const; 50 | std::uint32_t GetHeight() const; 51 | 52 | private: 53 | void PrepareRenderTargets(); 54 | 55 | std::uint32_t width_; 56 | std::uint32_t height_; 57 | 58 | const Device* device_; 59 | 60 | ComPtr swapchain_; 61 | HANDLE swapchainWaitableObject_; 62 | 63 | struct FrameResources { 64 | ComPtr resource; 65 | D3D12_CPU_DESCRIPTOR_HANDLE descriptorHandle; 66 | }; 67 | 68 | ComPtr rtvDescriptorHeap_; 69 | std::array colorTargets_; 70 | 71 | ComPtr dsvDescriptorHeap_; 72 | ComPtr depthResource_; 73 | D3D12_CPU_DESCRIPTOR_HANDLE depthDescriptorHandle_; 74 | }; -------------------------------------------------------------------------------- /include/Window.h: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #pragma once 25 | 26 | // Device.h includes relevant headers for Window creation 27 | #include "Device.h" 28 | 29 | class Window { 30 | public: 31 | Window(const std::wstring& title, std::uint32_t width, std::uint32_t height); 32 | ~Window(); 33 | 34 | void Close(); 35 | bool HandleEvents(); 36 | 37 | HWND GetHandle() const; 38 | 39 | std::uint32_t GetWidth() const; 40 | std::uint32_t GetHeight() const; 41 | 42 | private: 43 | HWND hwnd_ = NULL; 44 | 45 | std::uint32_t width_; 46 | std::uint32_t height_; 47 | 48 | static LRESULT WINAPI MessageProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam); 49 | }; 50 | -------------------------------------------------------------------------------- /include/WorkGraph.h: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #pragma once 25 | 26 | #include 27 | 28 | #include "Device.h" 29 | #include "ShaderCompiler.h" 30 | 31 | class WorkGraph { 32 | public: 33 | struct WorkGraphTutorial { 34 | std::string name; 35 | std::string shaderFileName; 36 | // Filename for sample solution. Empty string means no solution is available. 37 | std::string solutionShaderFileName = ""; 38 | }; 39 | 40 | WorkGraph(const Device* device, 41 | ShaderCompiler& shaderCompiler, 42 | ID3D12RootSignature* rootSignature, 43 | std::uint32_t tutorialIndex, 44 | bool sampleSolution); 45 | 46 | void Dispatch(ID3D12GraphicsCommandList10* commandList); 47 | 48 | std::uint32_t GetTutorialIndex() const; 49 | bool IsSampleSolution() const; 50 | 51 | private: 52 | std::uint32_t tutorialIndex_; 53 | bool sampleSolution_; 54 | 55 | ComPtr stateObject_; 56 | ComPtr backingMemory_; 57 | D3D12_SET_PROGRAM_DESC programDesc_ = {}; 58 | std::uint32_t entryPointIndex_; 59 | }; -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. All rights reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files(the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions : 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /src/Device.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Device.h" 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | // Declarations for Microsoft.D3D.D3D12 Agility SDK NuGet package. 33 | // D3D12SDKVersion needs to be updated if newer NuGet package is used. 34 | extern "C" { 35 | __declspec(dllexport) extern const unsigned int D3D12SDKVersion = 613; 36 | } 37 | extern "C" { 38 | __declspec(dllexport) extern const char* D3D12SDKPath = ".\\"; 39 | } 40 | 41 | void ThrowIfFailed(const HRESULT hr) 42 | { 43 | if (FAILED(hr)) { 44 | const auto cond = std::system_category().default_error_condition(hr); 45 | 46 | std::stringstream stream; 47 | stream << "Operation Failed: " << cond.category().name() << // 48 | " (" << cond.value() << ") (" << std::hex << hr << "): " << // 49 | cond.message(); 50 | 51 | throw std::runtime_error(stream.str()); 52 | } 53 | } 54 | 55 | Device::Device(const bool forceWarpAdapter, const bool enableDebugLayer, const bool enableGpuValidationLayer) 56 | { 57 | CreateDXGIFactory(enableDebugLayer, enableGpuValidationLayer); 58 | 59 | if (forceWarpAdapter) { 60 | ComPtr adapter; 61 | dxgiFactory_->EnumWarpAdapter(IID_PPV_ARGS(&adapter)); 62 | 63 | device_ = CreateDevice(adapter.Get()); 64 | } else { 65 | // Try to find suitable adapter, fallback to WARP 66 | for (std::uint32_t adapterId = 0; true; ++adapterId) { 67 | ComPtr adapter; 68 | 69 | if (dxgiFactory_->EnumAdapters1(adapterId, &adapter) == DXGI_ERROR_NOT_FOUND) { 70 | // No more adapters to check 71 | break; 72 | } 73 | 74 | device_ = CreateDevice(adapter.Get()); 75 | 76 | // End search if adapter creation was successful. 77 | if (device_) { 78 | break; 79 | } 80 | } 81 | } 82 | 83 | // Check if an adapter was found 84 | if (!device_) { 85 | throw std::runtime_error("No device with work graphs support was found."); 86 | } 87 | 88 | do { 89 | // Query adapter via LUID 90 | ComPtr adapter; 91 | if (FAILED(dxgiFactory_->EnumAdapterByLuid(device_->GetAdapterLuid(), IID_PPV_ARGS(&adapter)))) { 92 | continue; 93 | } 94 | 95 | // Query DXGI_ADAPTER_DESC1 96 | DXGI_ADAPTER_DESC1 desc; 97 | if (FAILED(adapter->GetDesc1(&desc))) { 98 | continue; 99 | } 100 | 101 | // Convert adapter description to std::string 102 | std::wstring_convert> converter; 103 | adapterDescription_ = converter.to_bytes(desc.Description); 104 | } while (false); 105 | 106 | if (enableDebugLayer) { 107 | // Register callback to print D3D12 debug messages to std::cout 108 | RegisterDebugMessageCallback(); 109 | } 110 | 111 | // Create D3D12 resources (queue, command lists) 112 | CreateDeviceResources(); 113 | } 114 | 115 | void Device::WaitForDevice() 116 | { 117 | // Increment signaled value and set fence 118 | signaledFenceValue_++; 119 | commandQueue_->Signal(fence_.Get(), signaledFenceValue_); 120 | 121 | // Fence is already signaled 122 | if (fence_->GetCompletedValue() >= signaledFenceValue_) { 123 | return; 124 | } 125 | 126 | fence_->SetEventOnCompletion(signaledFenceValue_, fenceEvent_); 127 | WaitForSingleObject(fenceEvent_, INFINITE); 128 | } 129 | 130 | ID3D12GraphicsCommandList10* Device::GetNextFrameCommandList() 131 | { 132 | // Increment frame index to next frame 133 | frameIndex_ = (frameIndex_ + 1) % BufferedFramesCount; 134 | 135 | const auto& frameContext = frameContexts_[frameIndex_]; 136 | 137 | // Only wait if frame context has been signaled and 138 | // if fence does not have the signaled value yet. 139 | if ((frameContext.waitFenceValue != 0) && // 140 | (fence_->GetCompletedValue() < frameContext.waitFenceValue)) 141 | { 142 | fence_->SetEventOnCompletion(frameContext.waitFenceValue, fenceEvent_); 143 | WaitForSingleObject(fenceEvent_, INFINITE); 144 | } 145 | 146 | ThrowIfFailed(frameContext.commandAllocator->Reset()); 147 | ThrowIfFailed(frameContext.commandList->Reset(frameContext.commandAllocator.Get(), nullptr)); 148 | 149 | return frameContext.commandList.Get(); 150 | } 151 | 152 | void Device::ExecuteCurrentFrameCommandList() 153 | { 154 | auto& frameContext = frameContexts_[frameIndex_]; 155 | 156 | // Close command list 157 | ThrowIfFailed(frameContext.commandList->Close()); 158 | 159 | // Submit command list 160 | commandQueue_->ExecuteCommandLists( 161 | 1, reinterpret_cast(frameContext.commandList.GetAddressOf())); 162 | 163 | // Incrment signaled fence value & signale fence 164 | signaledFenceValue_++; 165 | commandQueue_->Signal(fence_.Get(), signaledFenceValue_); 166 | 167 | // Store fence value to frame context 168 | frameContext.waitFenceValue = signaledFenceValue_; 169 | } 170 | 171 | IDXGIFactory4* Device::GetDXGIFactory() const 172 | { 173 | return dxgiFactory_.Get(); 174 | } 175 | 176 | ID3D12Device9* Device::GetDevice() const 177 | { 178 | return device_.Get(); 179 | } 180 | 181 | ID3D12CommandQueue* Device::GetCommandQueue() const 182 | { 183 | return commandQueue_.Get(); 184 | } 185 | 186 | const std::string& Device::GetAdapterDescription() const 187 | { 188 | return adapterDescription_; 189 | } 190 | 191 | void Device::CreateDXGIFactory(bool enableDebugLayer, bool enableGpuValidationLayer) 192 | { 193 | if (enableDebugLayer) { 194 | // Enable the debug layer (requires the Graphics Tools "optional feature"). 195 | // NOTE: Enabling the debug layer after device creation will invalidate the active device. 196 | 197 | ComPtr debugController; 198 | if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) { 199 | debugController->EnableDebugLayer(); 200 | } else { 201 | OutputDebugString(TEXT("WARNING: Direct3D Debug Device is not available\n")); 202 | } 203 | 204 | ComPtr dxgiInfoQueue; 205 | if (SUCCEEDED(DXGIGetDebugInterface1(0, IID_PPV_ARGS(&dxgiInfoQueue)))) { 206 | ThrowIfFailed(CreateDXGIFactory2(DXGI_CREATE_FACTORY_DEBUG, IID_PPV_ARGS(&dxgiFactory_))); 207 | 208 | dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, true); 209 | dxgiInfoQueue->SetBreakOnSeverity(DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, true); 210 | } 211 | 212 | if (enableGpuValidationLayer) { 213 | ComPtr debugController1; 214 | if (SUCCEEDED(debugController->QueryInterface(IID_PPV_ARGS(&debugController1)))) { 215 | debugController1->SetEnableGPUBasedValidation(true); 216 | } else { 217 | OutputDebugString(TEXT("WARNING: Direct3D Debug Device for GPU based validation is not available\n")); 218 | } 219 | } 220 | } 221 | 222 | if (!dxgiFactory_) { 223 | // Fallback if enabling debug layer did not work, or debug layer is disabled 224 | ThrowIfFailed(CreateDXGIFactory1(IID_PPV_ARGS(&dxgiFactory_))); 225 | } 226 | } 227 | 228 | ComPtr Device::CreateDevice(IDXGIAdapter1* adapter) const 229 | { 230 | DXGI_ADAPTER_DESC1 desc; 231 | if (FAILED(adapter->GetDesc1(&desc))) { 232 | std::cout << "Could not get adapter description for adapter." << std::endl; 233 | return {}; 234 | } 235 | 236 | std::wcout << "Testing adapter \"" << desc.Description << "\": "; 237 | 238 | ComPtr device; 239 | 240 | if (FAILED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_12_2, IID_PPV_ARGS(&device)))) { 241 | std::cout << "Failed to create D3D12 device." << std::endl; 242 | 243 | if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { 244 | std::cout 245 | << "WARP adapter does not support D3D feature level 12.2 and work graphs.\n" 246 | " See readme.md#running-on-gpus-without-work-graphs-support for instructions on installing latest " 247 | "WARP adapter." 248 | << std::endl; 249 | } 250 | 251 | return {}; 252 | } 253 | 254 | if (!CheckDeviceFeatures(device.Get())) { 255 | std::cout << "Device does not support work graphs." << std::endl; 256 | 257 | if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { 258 | std::cout 259 | << "WARP adapter does not support work graphs.\n" 260 | " See readme.md#running-on-gpus-without-work-graphs-support for instructions on installing latest " 261 | "WARP adapter." 262 | << std::endl; 263 | } 264 | 265 | return {}; 266 | } 267 | 268 | // Adapter does support work graphs. 269 | std::cout << "Device supports work graphs." << std::endl; 270 | 271 | return device; 272 | } 273 | 274 | bool Device::CheckDeviceFeatures(ID3D12Device9* device) const 275 | { 276 | D3D12_FEATURE_DATA_D3D12_OPTIONS21 options = {}; 277 | if (FAILED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS21, &options, sizeof(options)))) { 278 | return false; 279 | } 280 | 281 | return options.WorkGraphsTier != D3D12_WORK_GRAPHS_TIER_NOT_SUPPORTED; 282 | } 283 | 284 | void Device::CreateDeviceResources() 285 | { 286 | // Create graphics command queue 287 | D3D12_COMMAND_QUEUE_DESC queueDesc = {}; 288 | queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; 289 | queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; 290 | 291 | ThrowIfFailed(device_->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&commandQueue_))); 292 | 293 | for (auto& frameContext : frameContexts_) { 294 | ThrowIfFailed(device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, 295 | IID_PPV_ARGS(&frameContext.commandAllocator))); 296 | ThrowIfFailed(device_->CreateCommandList(0, 297 | D3D12_COMMAND_LIST_TYPE_DIRECT, 298 | frameContext.commandAllocator.Get(), 299 | nullptr, 300 | IID_PPV_ARGS(&frameContext.commandList))); 301 | 302 | // Close all created command lists 303 | ThrowIfFailed(frameContext.commandList->Close()); 304 | } 305 | 306 | // Create wait fence & event 307 | ThrowIfFailed(device_->CreateFence(0, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&fence_))); 308 | fenceEvent_ = CreateEventA(nullptr, false, false, nullptr); 309 | } 310 | 311 | void Device::RegisterDebugMessageCallback() 312 | { 313 | static const auto callback = [](D3D12_MESSAGE_CATEGORY category, 314 | D3D12_MESSAGE_SEVERITY severity, 315 | D3D12_MESSAGE_ID, 316 | LPCSTR description, 317 | void* context) { 318 | if (severity == D3D12_MESSAGE_SEVERITY_CORRUPTION || severity == D3D12_MESSAGE_SEVERITY_ERROR) { 319 | std::cout << "[D3D12] " << description << std::endl; 320 | } 321 | }; 322 | 323 | ComPtr infoQueue; 324 | if (SUCCEEDED(device_.As(&infoQueue))) { 325 | DWORD callbackCookie; 326 | infoQueue->RegisterMessageCallback(callback, D3D12_MESSAGE_CALLBACK_IGNORE_FILTERS, nullptr, &callbackCookie); 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /src/ShaderCompiler.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "ShaderCompiler.h" 25 | 26 | #include 27 | 28 | // Include handler library to collect all included files for tracking 29 | class FileTrackingIncludeHandler : public IDxcIncludeHandler { 30 | public: 31 | FileTrackingIncludeHandler(ShaderCompiler& parent) : parent_(parent) {} 32 | 33 | HRESULT STDMETHODCALLTYPE LoadSource(_In_ LPCWSTR pFilename, 34 | _COM_Outptr_result_maybenull_ IDxcBlob** ppIncludeSource) override 35 | { 36 | if (pFilename == nullptr) { 37 | return E_FAIL; 38 | } 39 | if (ppIncludeSource == nullptr) { 40 | return E_FAIL; 41 | } 42 | 43 | const auto shaderSourceFilePath = parent_.GetShaderSourceFilePath(pFilename); 44 | 45 | IDxcBlobEncoding* includeSource; 46 | const auto result = parent_.utils_->LoadFile(shaderSourceFilePath.wstring().c_str(), nullptr, &includeSource); 47 | 48 | *ppIncludeSource = includeSource; 49 | 50 | if (SUCCEEDED(result)) { 51 | // Update/insert last file write time for hot-reloading 52 | parent_.trackedFiles_[shaderSourceFilePath] = std::filesystem::last_write_time(shaderSourceFilePath); 53 | } 54 | 55 | return result; 56 | } 57 | 58 | HRESULT STDMETHODCALLTYPE QueryInterface(REFIID riid, _COM_Outptr_ void __RPC_FAR* __RPC_FAR* ppvObject) override 59 | { 60 | return E_FAIL; 61 | } 62 | 63 | ULONG STDMETHODCALLTYPE AddRef(void) override 64 | { 65 | return 0; 66 | } 67 | 68 | ULONG STDMETHODCALLTYPE Release(void) override 69 | { 70 | return 0; 71 | } 72 | 73 | private: 74 | ShaderCompiler& parent_; 75 | }; 76 | 77 | ShaderCompiler::ShaderCompiler() 78 | { 79 | HMODULE dxcompilerModule = LoadLibraryW(L"dxcompiler.dll"); 80 | 81 | if (!dxcompilerModule) { 82 | throw std::runtime_error("Failed to load dxcompiler.dll"); 83 | } 84 | 85 | DxcCreateInstanceProc pfnDxcCreateInstance = 86 | DxcCreateInstanceProc(GetProcAddress(dxcompilerModule, "DxcCreateInstance")); 87 | 88 | if (pfnDxcCreateInstance == nullptr) { 89 | throw std::runtime_error("Failed to load DxcCreateInstance from dxcompiler.dll"); 90 | } 91 | 92 | ThrowIfFailed(pfnDxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&utils_))); 93 | ThrowIfFailed(pfnDxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&compiler_))); 94 | ThrowIfFailed(utils_->CreateDefaultIncludeHandler(&includeHandler_)); 95 | 96 | shaderFolderPath_ = std::filesystem::current_path() / L"tutorials"; 97 | } 98 | 99 | ComPtr ShaderCompiler::CompileShader(const std::string& shaderFile, 100 | const wchar_t* target, 101 | const wchar_t* entryPoint) 102 | { 103 | const auto shaderSourceFilePath = GetShaderSourceFilePath(shaderFile); 104 | 105 | HRESULT loadSourceResult; 106 | ComPtr source; 107 | 108 | loadSourceResult = utils_->LoadFile(shaderSourceFilePath.wstring().c_str(), nullptr, &source); 109 | 110 | if (FAILED(loadSourceResult) || (source == nullptr)) { 111 | // try load source again. Sometimes loading the file for hot-reloading will fail if the 112 | // file is still being written to. 113 | loadSourceResult = utils_->LoadFile(shaderSourceFilePath.wstring().c_str(), nullptr, &source); 114 | } 115 | 116 | if (FAILED(loadSourceResult) || (source == nullptr)) { 117 | // Second attempt failed as well. Throw error 118 | throw std::runtime_error("Failed to load shader file \"" + shaderFile + "\""); 119 | } 120 | 121 | const auto shaderIncludeArgument = std::wstring(L"-I") + shaderFolderPath_.wstring(); 122 | 123 | std::vector arguments = { 124 | L"-enable-16bit-types", 125 | // use HLSL 2021 126 | L"-HV", 127 | L"2021", 128 | // column major matrices 129 | DXC_ARG_PACK_MATRIX_COLUMN_MAJOR, 130 | // include path for "tutorials" folder 131 | shaderIncludeArgument.c_str(), 132 | }; 133 | 134 | FileTrackingIncludeHandler includeHandler(*this); 135 | 136 | ComPtr result = nullptr; 137 | ThrowIfFailed(compiler_->Compile(source.Get(), 138 | shaderSourceFilePath.wstring().c_str(), 139 | entryPoint, 140 | target, 141 | arguments.data(), 142 | static_cast(arguments.size()), 143 | nullptr, 144 | 0, 145 | &includeHandler, 146 | &result)); 147 | 148 | HRESULT compileStatus; 149 | ThrowIfFailed(result->GetStatus(&compileStatus)); 150 | 151 | std::string errorString = ""; 152 | 153 | // try get error string from DXC result 154 | { 155 | ComPtr errorStringBlob = nullptr; 156 | if (SUCCEEDED(result->GetErrorBuffer(&errorStringBlob)) && (errorStringBlob != nullptr)) { 157 | ComPtr errorStringBlob8 = nullptr; 158 | utils_->GetBlobAsUtf8(errorStringBlob.Get(), &errorStringBlob8); 159 | 160 | errorString = std::string(errorStringBlob8->GetStringPointer(), errorStringBlob8->GetStringLength()); 161 | } 162 | } 163 | 164 | if (FAILED(compileStatus)) { 165 | std::stringstream stream; 166 | stream << "Failed to compile shader \"" << shaderFile << "\":\n" << errorString; 167 | 168 | throw std::runtime_error(stream.str()); 169 | } 170 | 171 | ComPtr outputBlob; 172 | ThrowIfFailed(result->GetResult(&outputBlob)); 173 | 174 | // Update/insert last file write time for hot-reloading 175 | trackedFiles_[shaderSourceFilePath] = std::filesystem::last_write_time(shaderSourceFilePath); 176 | 177 | return outputBlob; 178 | } 179 | 180 | bool ShaderCompiler::CheckShaderSourceFiles() 181 | { 182 | bool result = false; 183 | 184 | for (auto& [file, writeTime] : trackedFiles_) { 185 | try { 186 | const auto newFileWriteTime = std::filesystem::last_write_time(file); 187 | 188 | // Return true if any file was modified 189 | result |= (writeTime != newFileWriteTime); 190 | 191 | // Update file timestamp to only trigger update once 192 | writeTime = newFileWriteTime; 193 | } catch (const std::filesystem::filesystem_error& e) { 194 | // last_write_time can throw an error if the file is currently being written to 195 | continue; 196 | } 197 | } 198 | 199 | return result; 200 | } 201 | 202 | std::filesystem::path ShaderCompiler::GetShaderSourceFilePath(const std::string& shaderFile) 203 | { 204 | return std::filesystem::absolute(shaderFolderPath_ / shaderFile).generic_string(); 205 | } 206 | 207 | std::filesystem::path ShaderCompiler::GetShaderSourceFilePath(const std::wstring& shaderFile) 208 | { 209 | return std::filesystem::absolute(shaderFolderPath_ / shaderFile).generic_string(); 210 | } 211 | -------------------------------------------------------------------------------- /src/Swapchain.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Swapchain.h" 25 | 26 | Swapchain::Swapchain(const Device* device, const Window* window) : device_(device) 27 | { 28 | width_ = window->GetWidth(); 29 | height_ = window->GetHeight(); 30 | 31 | DXGI_SWAP_CHAIN_DESC1 swapchainDesc = {}; 32 | 33 | swapchainDesc.Width = width_; 34 | swapchainDesc.Height = height_; 35 | swapchainDesc.Format = ColorTargetFormat; 36 | swapchainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; 37 | swapchainDesc.BufferCount = BackbufferCount; 38 | swapchainDesc.SampleDesc.Count = 1; 39 | swapchainDesc.SampleDesc.Quality = 0; 40 | swapchainDesc.Scaling = DXGI_SCALING_STRETCH; 41 | swapchainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; 42 | swapchainDesc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; 43 | swapchainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING | DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; 44 | 45 | DXGI_SWAP_CHAIN_FULLSCREEN_DESC fsSwapchainDesc = {0}; 46 | fsSwapchainDesc.Windowed = true; 47 | 48 | auto* const factory = device->GetDXGIFactory(); 49 | auto* const commandQueue = device->GetCommandQueue(); 50 | const auto windowHandle = window->GetHandle(); 51 | 52 | ComPtr swapchain1; 53 | ThrowIfFailed(factory->CreateSwapChainForHwnd( 54 | commandQueue, windowHandle, &swapchainDesc, &fsSwapchainDesc, nullptr, &swapchain1)); 55 | 56 | // Query Swapchain3 interface 57 | ThrowIfFailed(swapchain1->QueryInterface(IID_PPV_ARGS(&swapchain_))); 58 | 59 | swapchain_->SetMaximumFrameLatency(BackbufferCount); 60 | swapchainWaitableObject_ = swapchain_->GetFrameLatencyWaitableObject(); 61 | 62 | factory->MakeWindowAssociation(windowHandle, DXGI_MWA_NO_ALT_ENTER); 63 | 64 | auto* const d3dDevice = device->GetDevice(); 65 | 66 | // Create RTV descriptor heap 67 | { 68 | D3D12_DESCRIPTOR_HEAP_DESC desc = {}; 69 | desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; 70 | desc.NumDescriptors = BackbufferCount; 71 | desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; 72 | desc.NodeMask = 1; 73 | ThrowIfFailed(device->GetDevice()->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&rtvDescriptorHeap_))); 74 | 75 | const auto descriptorSize = 76 | device->GetDevice()->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); 77 | 78 | for (std::uint32_t index = 0; index < BackbufferCount; ++index) { 79 | colorTargets_[index].descriptorHandle = CD3DX12_CPU_DESCRIPTOR_HANDLE( 80 | rtvDescriptorHeap_->GetCPUDescriptorHandleForHeapStart(), index, descriptorSize); 81 | } 82 | } 83 | 84 | // Create DSV descriptor heap 85 | { 86 | D3D12_DESCRIPTOR_HEAP_DESC desc = {}; 87 | desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV; 88 | desc.NumDescriptors = 1; 89 | desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; 90 | desc.NodeMask = 1; 91 | ThrowIfFailed(device->GetDevice()->CreateDescriptorHeap(&desc, IID_PPV_ARGS(&dsvDescriptorHeap_))); 92 | 93 | depthDescriptorHandle_ = dsvDescriptorHeap_->GetCPUDescriptorHandleForHeapStart(); 94 | } 95 | 96 | PrepareRenderTargets(); 97 | } 98 | 99 | Swapchain::RenderTarget Swapchain::GetNextRenderTarget() 100 | { 101 | // Wait for swapchain biffer 102 | WaitForSingleObject(swapchainWaitableObject_, INFINITE); 103 | 104 | const auto backbufferIndex = swapchain_->GetCurrentBackBufferIndex(); 105 | const auto& colorTarget = colorTargets_[backbufferIndex]; 106 | 107 | return RenderTarget{ 108 | .colorResource = colorTarget.resource.Get(), 109 | .colorDescriptorHandle = colorTarget.descriptorHandle, 110 | .depthResource = depthResource_.Get(), 111 | .depthDescriptorHandle = depthDescriptorHandle_, 112 | }; 113 | } 114 | 115 | void Swapchain::Present(const bool vsync) 116 | { 117 | if (vsync) { 118 | ThrowIfFailed(swapchain_->Present(1, 0)); 119 | } else { 120 | ThrowIfFailed(swapchain_->Present(0, DXGI_PRESENT_ALLOW_TEARING)); 121 | } 122 | } 123 | 124 | void Swapchain::Resize(std::uint32_t width, std::uint32_t height) 125 | { 126 | // Release current resources 127 | for (std::uint32_t index = 0; index < BackbufferCount; ++index) { 128 | colorTargets_[index].resource.Reset(); 129 | } 130 | depthResource_.Reset(); 131 | 132 | // Update width & height 133 | width_ = width; 134 | height_ = height; 135 | 136 | // Resize swapchain 137 | swapchain_->ResizeBuffers(BackbufferCount, 138 | width, 139 | height, 140 | ColorTargetFormat, 141 | DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING | DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT); 142 | 143 | PrepareRenderTargets(); 144 | } 145 | 146 | std::uint32_t Swapchain::GetWidth() const 147 | { 148 | return width_; 149 | } 150 | 151 | std::uint32_t Swapchain::GetHeight() const 152 | { 153 | return height_; 154 | } 155 | 156 | void Swapchain::PrepareRenderTargets() 157 | { 158 | // Fetch color targets & create color render target views 159 | for (std::uint32_t index = 0; index < BackbufferCount; ++index) { 160 | ComPtr resource; 161 | 162 | ThrowIfFailed(swapchain_->GetBuffer(index, IID_PPV_ARGS(&resource))); 163 | device_->GetDevice()->CreateRenderTargetView(resource.Get(), nullptr, colorTargets_[index].descriptorHandle); 164 | 165 | colorTargets_[index].resource = resource; 166 | } 167 | 168 | // Create depth buffer & create depth-stencil view 169 | { 170 | D3D12_CLEAR_VALUE clearValue = {}; 171 | clearValue.Format = DepthTargetFormat; 172 | clearValue.DepthStencil.Depth = 1.0f; 173 | clearValue.DepthStencil.Stencil = 0; 174 | 175 | CD3DX12_HEAP_PROPERTIES heapProperties(D3D12_HEAP_TYPE_DEFAULT); 176 | CD3DX12_RESOURCE_DESC resourceDesc = CD3DX12_RESOURCE_DESC::Tex2D( 177 | DXGI_FORMAT_D32_FLOAT, width_, height_, 1, 0, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); 178 | ThrowIfFailed(device_->GetDevice()->CreateCommittedResource(&heapProperties, 179 | D3D12_HEAP_FLAG_NONE, 180 | &resourceDesc, 181 | D3D12_RESOURCE_STATE_DEPTH_WRITE, 182 | &clearValue, 183 | IID_PPV_ARGS(&depthResource_))); 184 | 185 | D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; 186 | dsvDesc.Format = DXGI_FORMAT_D32_FLOAT; 187 | dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; 188 | dsvDesc.Flags = D3D12_DSV_FLAG_NONE; 189 | device_->GetDevice()->CreateDepthStencilView( 190 | depthResource_.Get(), &dsvDesc, dsvDescriptorHeap_->GetCPUDescriptorHandleForHeapStart()); 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/Window.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Window.h" 25 | 26 | #include 27 | 28 | constexpr static const wchar_t* WindowClassName = L"SampleWindowClass"; 29 | 30 | Window::Window(const std::wstring& title, std::uint32_t width, std::uint32_t height) : width_(width), height_(height) 31 | { 32 | const auto hInstance = GetModuleHandleW(NULL); 33 | 34 | WNDCLASSEXW windowClass = {0}; 35 | windowClass.cbSize = sizeof(WNDCLASSEXW); 36 | windowClass.style = CS_HREDRAW | CS_VREDRAW; 37 | windowClass.lpfnWndProc = MessageProc; 38 | windowClass.hInstance = hInstance; 39 | windowClass.hCursor = LoadCursor(NULL, IDC_ARROW); 40 | windowClass.lpszClassName = WindowClassName; 41 | RegisterClassExW(&windowClass); 42 | 43 | // Create the window and store a handle to it. 44 | hwnd_ = CreateWindowW(windowClass.lpszClassName, 45 | title.c_str(), 46 | WS_OVERLAPPEDWINDOW, 47 | 100, 48 | 100, 49 | width, 50 | height, 51 | NULL, // We have no parent window. 52 | NULL, // We aren't using menus. 53 | hInstance, 54 | this); 55 | 56 | // Show window 57 | ShowWindow(hwnd_, SW_NORMAL); 58 | UpdateWindow(hwnd_); 59 | } 60 | 61 | Window::~Window() 62 | { 63 | DestroyWindow(hwnd_); 64 | 65 | const auto hInstance = GetModuleHandleW(NULL); 66 | UnregisterClassW(WindowClassName, hInstance); 67 | } 68 | 69 | void Window::Close() 70 | { 71 | PostMessageA(hwnd_, WM_CLOSE, 0, 0); 72 | } 73 | 74 | bool Window::HandleEvents() 75 | { 76 | MSG msg = {}; 77 | bool quit = false; 78 | 79 | while (PeekMessage(&msg, NULL, 0, 0, PM_REMOVE)) { 80 | TranslateMessage(&msg); 81 | DispatchMessage(&msg); 82 | 83 | if (msg.message == WM_QUIT) { 84 | quit = true; 85 | } 86 | } 87 | 88 | return !quit; 89 | } 90 | 91 | HWND Window::GetHandle() const 92 | { 93 | return hwnd_; 94 | } 95 | 96 | std::uint32_t Window::GetWidth() const 97 | { 98 | return width_; 99 | } 100 | 101 | std::uint32_t Window::GetHeight() const 102 | { 103 | return height_; 104 | } 105 | 106 | // Forward-declaration of ImGui Message Proc Handler 107 | extern IMGUI_IMPL_API LRESULT ImGui_ImplWin32_WndProcHandler(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam); 108 | 109 | // Window Message Proc Handler 110 | LRESULT WINAPI Window::MessageProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) 111 | { 112 | if (ImGui_ImplWin32_WndProcHandler(hWnd, msg, wParam, lParam)) { 113 | return true; 114 | } 115 | 116 | switch (msg) { 117 | case WM_CREATE: { 118 | // Save the Window instance pointer passed in to CreateWindow. 119 | LPCREATESTRUCT pCreateStruct = reinterpret_cast(lParam); 120 | SetWindowLongPtr(hWnd, GWLP_USERDATA, reinterpret_cast(pCreateStruct->lpCreateParams)); 121 | } 122 | return 0; 123 | case WM_SIZE: { 124 | Window* window = reinterpret_cast(GetWindowLongPtr(hWnd, GWLP_USERDATA)); 125 | 126 | // Update width & height in window instance. Swapchain resizing will be handled in main loop. 127 | window->width_ = LOWORD(lParam); 128 | window->height_ = HIWORD(lParam); 129 | } 130 | return 0; 131 | case WM_SYSCOMMAND: 132 | if ((wParam & 0xfff0) == SC_KEYMENU) // Disable ALT application menu 133 | return 0; 134 | break; 135 | case WM_DESTROY: 136 | ::PostQuitMessage(0); 137 | return 0; 138 | } 139 | return ::DefWindowProcW(hWnd, msg, wParam, lParam); 140 | } -------------------------------------------------------------------------------- /src/WorkGraph.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "WorkGraph.h" 25 | 26 | #include "Application.h" 27 | #include "Swapchain.h" 28 | 29 | WorkGraph::WorkGraph(const Device* device, 30 | ShaderCompiler& shaderCompiler, 31 | ID3D12RootSignature* rootSignature, 32 | const std::uint32_t tutorialIndex, 33 | const bool sampleSolution) 34 | : tutorialIndex_(tutorialIndex), sampleSolution_(sampleSolution) 35 | { 36 | // Name for work graph program inside the state object 37 | static const wchar_t* WorkGraphProgramName = L"WorkGraph"; 38 | 39 | // Create work graph 40 | CD3DX12_STATE_OBJECT_DESC stateObjectDesc(D3D12_STATE_OBJECT_TYPE_EXECUTABLE); 41 | 42 | // set root signature for work graph 43 | auto rootSignatureSubobject = stateObjectDesc.CreateSubobject(); 44 | rootSignatureSubobject->SetRootSignature(rootSignature); 45 | 46 | auto workgraphSubobject = stateObjectDesc.CreateSubobject(); 47 | workgraphSubobject->IncludeAllAvailableNodes(); 48 | workgraphSubobject->SetProgramName(WorkGraphProgramName); 49 | 50 | // list of compiled shaders to be released once the work graph is created 51 | std::vector> compiledShaders; 52 | 53 | // Helper function for adding a shader library to the work graph state object 54 | const auto AddShaderLibrary = [&](const std::string& shaderFileName) { 55 | // compile shader as library 56 | auto blob = shaderCompiler.CompileShader(shaderFileName, L"lib_6_8", nullptr); 57 | auto shaderBytecode = CD3DX12_SHADER_BYTECODE(blob->GetBufferPointer(), blob->GetBufferSize()); 58 | 59 | // add blob to state object 60 | auto librarySubobject = stateObjectDesc.CreateSubobject(); 61 | librarySubobject->SetDXILLibrary(&shaderBytecode); 62 | 63 | // add shader blob to be released later 64 | compiledShaders.emplace_back(std::move(blob)); 65 | }; 66 | 67 | // =================================== 68 | // Add shader libraries 69 | const auto tutorials = Application::GetTutorials(); 70 | const auto& tutorial = tutorials[tutorialIndex_]; 71 | 72 | if (sampleSolution_) { 73 | if (tutorial.solutionShaderFileName.empty()) { 74 | throw std::runtime_error("selected tutorial does not provide a sample solution."); 75 | } 76 | AddShaderLibrary(tutorial.solutionShaderFileName); 77 | } else { 78 | AddShaderLibrary(tutorial.shaderFileName); 79 | } 80 | 81 | // Create work graph state object 82 | ThrowIfFailed(device->GetDevice()->CreateStateObject(stateObjectDesc, IID_PPV_ARGS(&stateObject_))); 83 | 84 | // release all compiled shaders 85 | compiledShaders.clear(); 86 | 87 | // Get work graph properties 88 | ComPtr stateObjectProperties; 89 | ComPtr workGraphProperties; 90 | 91 | ThrowIfFailed(stateObject_->QueryInterface(IID_PPV_ARGS(&stateObjectProperties))); 92 | ThrowIfFailed(stateObject_->QueryInterface(IID_PPV_ARGS(&workGraphProperties))); 93 | 94 | // Get the index of our work graph inside the state object (state object can contain multiple work graphs) 95 | const auto workGraphIndex = workGraphProperties->GetWorkGraphIndex(WorkGraphProgramName); 96 | 97 | // Create backing memory buffer 98 | // See https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#getworkgraphmemoryrequirements 99 | D3D12_WORK_GRAPH_MEMORY_REQUIREMENTS memoryRequirements = {}; 100 | workGraphProperties->GetWorkGraphMemoryRequirements(workGraphIndex, &memoryRequirements); 101 | 102 | // Work graphs can also request no backing memory (i.e., MaxSizeInBytes = 0) 103 | if (memoryRequirements.MaxSizeInBytes > 0) { 104 | CD3DX12_HEAP_PROPERTIES heapProperties(D3D12_HEAP_TYPE_DEFAULT); 105 | CD3DX12_RESOURCE_DESC resourceDesc = CD3DX12_RESOURCE_DESC::Buffer(memoryRequirements.MaxSizeInBytes, 106 | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 107 | ThrowIfFailed(device->GetDevice()->CreateCommittedResource(&heapProperties, 108 | D3D12_HEAP_FLAG_NONE, 109 | &resourceDesc, 110 | D3D12_RESOURCE_STATE_COMMON, 111 | NULL, 112 | IID_PPV_ARGS(&backingMemory_))); 113 | } 114 | 115 | // Prepare work graph desc 116 | // See https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#d3d12_set_program_desc 117 | programDesc_.Type = D3D12_PROGRAM_TYPE_WORK_GRAPH; 118 | programDesc_.WorkGraph.ProgramIdentifier = stateObjectProperties->GetProgramIdentifier(WorkGraphProgramName); 119 | // Set flag to initialize backing memory. 120 | // We'll clear this flag once we've run the work graph for the first time. 121 | programDesc_.WorkGraph.Flags = D3D12_SET_WORK_GRAPH_FLAG_INITIALIZE; 122 | // Set backing memory 123 | if (backingMemory_) { 124 | programDesc_.WorkGraph.BackingMemory.StartAddress = backingMemory_->GetGPUVirtualAddress(); 125 | programDesc_.WorkGraph.BackingMemory.SizeInBytes = backingMemory_->GetDesc().Width; 126 | } 127 | 128 | // All tutorial work graphs must declare a node named "Entry" with an empty record (i.e., no input record). 129 | // The D3D12_DISPATCH_GRAPH_DESC uses entrypoint indices instead of string-based node IDs to reference the enty node. 130 | // GetEntrypointIndex allows us to translate from a node ID (i.e., node name and node array index) 131 | // to an entrypoint index. 132 | // See https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#getentrypointindex 133 | entryPointIndex_ = workGraphProperties->GetEntrypointIndex(workGraphIndex, {L"Entry", 0}); 134 | 135 | // Check if entrypoint was found. 136 | if (entryPointIndex_ == 0xFFFFFFFFU) { 137 | throw std::runtime_error("work graph does not contain an entry node with [NodeId(\"Entry\", 0)]."); 138 | } 139 | } 140 | 141 | void WorkGraph::Dispatch(ID3D12GraphicsCommandList10* commandList) 142 | { 143 | D3D12_DISPATCH_GRAPH_DESC dispatchDesc = {}; 144 | dispatchDesc.Mode = D3D12_DISPATCH_MODE_NODE_CPU_INPUT; 145 | dispatchDesc.NodeCPUInput = {}; 146 | dispatchDesc.NodeCPUInput.EntrypointIndex = entryPointIndex_; 147 | // Launch graph with one record 148 | dispatchDesc.NodeCPUInput.NumRecords = 1; 149 | // Record does not contain any data 150 | dispatchDesc.NodeCPUInput.RecordStrideInBytes = 0; 151 | dispatchDesc.NodeCPUInput.pRecords = nullptr; 152 | 153 | // Set program and dispatch the work graphs. 154 | // See 155 | // https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#setprogram 156 | // https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#dispatchgraph 157 | 158 | commandList->SetProgram(&programDesc_); 159 | commandList->DispatchGraph(&dispatchDesc); 160 | 161 | // Clear backing memory initialization flag, as the graph has run at least once now 162 | // See https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#d3d12_set_work_graph_flags 163 | programDesc_.WorkGraph.Flags &= ~D3D12_SET_WORK_GRAPH_FLAG_INITIALIZE; 164 | } 165 | 166 | std::uint32_t WorkGraph::GetTutorialIndex() const 167 | { 168 | return tutorialIndex_; 169 | } 170 | 171 | bool WorkGraph::IsSampleSolution() const 172 | { 173 | return sampleSolution_; 174 | } 175 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include 25 | 26 | #include "Application.h" 27 | 28 | int main(int argc, char* argv[]) 29 | { 30 | Application::Options options = {}; 31 | 32 | // Simple arg parsing for flags 33 | for (int argIdx = 1; argIdx < argc; ++argIdx) { 34 | using namespace std::string_literals; 35 | 36 | const auto arg = argv[argIdx]; 37 | 38 | options.forceWarpAdapter /* */ |= (arg == "--forceWarpAdapter"s); 39 | options.enableDebugLayer /* */ |= (arg == "--enableDebugLayer"s); 40 | options.enableGpuValidationLayer |= (arg == "--enableGpuValidationLayer"s); 41 | } 42 | 43 | try { 44 | Application app(options); 45 | app.Run(); 46 | } catch (const std::exception& e) { 47 | std::cerr << e.what() << std::endl; 48 | return 1; 49 | } 50 | 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /tutorials/tutorial-0/HelloWorkGraphs.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // Welcome to the first Work Graphs tutorial: Hello Work Graphs! 27 | // In this tutorial, you familiarize youself with the "Work Graph Playground" application 28 | // and see your first work graph in action. 29 | // 30 | // The Work Graph Playground app supports "hot-reloading". That means 31 | // whenever you save any of the tutorial shader files, the playgroud app automatically recompiles the shaders and rebuilds the work graph. 32 | // This will accellerate you on your Work Graphs learning curve! 33 | // 34 | // Now, follow the tutorial below to see this in action. 35 | 36 | // This attribute lets us turn any void function into a Work Graphs node. 37 | [Shader("node")] 38 | // Each tutorial uses one work graph. In all our tutorials, we call the work graph entry nodes consistently "Entry". 39 | // The CPU-side of the Work Graph Playground invokes the "Entry" node one time each frame. 40 | // In all our tutorials, the CPU always passes a single empty input record to "Entry". 41 | // Peek into WorkGraph::Dispatch in WorkGraph.cpp for more details on launching Work Graphs. 42 | // Mark the node as entry node by "NodeIsProgramEntry" so it can be launched. 43 | [NodeIsProgramEntry] 44 | // We only need a single thread for now, so we use the "thread" launch mode. 45 | // Other launch mode types are discussed in more detail in tutorial-2. 46 | [NodeLaunch("thread")] 47 | // In Work Graphs, nodes are identified by "node ids". A "Node id" consist of a node name and an optional array index. 48 | // If you skip the array index, it is set to zero. If you skip the NodeId attribute, it defaults to the function name. 49 | [NodeId("Entry", 0)] 50 | // The NodeId-attribute, however, deflates its full potential in the context of node-arrays, as detailed in tutorial-3. 51 | void EntryFunction( 52 | // Here in tutorial-0, the Entry node may invoke a second node and thus declares 53 | // an output record to the "Worker" node. 54 | [MaxRecords(1)] // How many outputs? (here 1 output) 55 | [NodeId("Worker")] // To what output node (here "Worker" is node id, we wish to launch). It is implemented by function WorkerFunction below). 56 | EmptyNodeOutput nodeOutput // What is the record we send to the "Worker" node (here it is empty, there we use the EmptyNodeOutput object). 57 | // More on records in tutorial-1. 58 | ) 59 | { 60 | // To give visual feedback on what our work graph here in tutorial-0 is doing, we provide a small set of utility functions 61 | // for printing text onscreen. 62 | // See Common.h for more details. 63 | // For now, we print a small welcome message to the center of the screen. 64 | 65 | // Position cursor in center of screen. 66 | Cursor cursor = Cursor(RenderSize / 2, 2, float3(0,.5,1)); 67 | cursor.Up(3); 68 | 69 | // Print welcome message 70 | PrintCentered(cursor, "Hello Work Graphs!"); 71 | cursor.Newline(); 72 | cursor.Newline(); 73 | PrintCentered(cursor, "Open"); 74 | cursor.Newline(); 75 | cursor.SetSize(3); 76 | cursor.SetColor(float3(1,.5,0)); 77 | PrintCentered(cursor, "tutorials/tutorial-0/HelloWorkGraphs.hlsl"); 78 | cursor.Newline(); 79 | cursor.SetSize(2); 80 | cursor.SetColor(float3(0,.5,1)); 81 | PrintCentered(cursor, "to start this tutorial"); 82 | 83 | // [Task 1] 84 | // With the playground application running, uncomment the following line and save this file. 85 | 86 | //nodeOutput.ThreadIncrementOutputCount(1); /* <-- uncomment me */ 87 | 88 | // This line invokes the "Worker" node below one single time and you should see a second message appearing on sceen. 89 | // Edit the "Worker" function below to print a personalized hello-world message. 90 | } 91 | 92 | // This function contains the code of the Worker node. 93 | [Shader("node")] 94 | [NodeId("Worker", 0)] 95 | [NodeLaunch("thread")] 96 | void WorkerFunction() 97 | { 98 | // Position cursor in center of screen. 99 | Cursor cursor = Cursor(RenderSize / 2, 2, float3(0, .5, 1)); 100 | // Move cursor underneath first message 101 | cursor.Down(6); 102 | 103 | // [Task 2] 104 | // Edit the hello-world message here. 105 | // Save the file again and see the updated text onscreen. 106 | PrintCentered(cursor, "Hello from the \"Worker\" node!"); 107 | 108 | // Congratulations, you've successfully completed tutorial-0! 109 | // To move on to the next tutorial, open the "Tutorials" menu on the top-left of the playground application and select "Tutorial 1: Records". 110 | } 111 | -------------------------------------------------------------------------------- /tutorials/tutorial-0/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/WorkGraphPlayground/616e78bad82a583e116a4db3f4f7ac6a84cd197e/tutorials/tutorial-0/screenshot.png -------------------------------------------------------------------------------- /tutorials/tutorial-1/Records.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // In this tutorial, we're going to take a look at the data-flow aspect of work graphs. 27 | // In particular, we're going to see how you can pass data (i.e., records) from a producer node to a consumer node. 28 | // 29 | // Our goal is to draw boxes with text in them on screen. 30 | // The "PrintBox" node is already printing the box content. All you have to do is draw a rectangle around the text. 31 | // Additionally, we're going to enclose all boxes with another, larger rectangle. 32 | // 33 | // Eventually, should create a work graph which looks like this: 34 | // 35 | // +-------+ 36 | // | Entry | 37 | // +-------+ 38 | // | 39 | // +-----------------+-----------------+ 40 | // v v v 41 | // +-----------------+ +----------+ +---------------+ 42 | // | PrintHelloWorld | | PrintBox | | DrawRectangle | 43 | // +-----------------+ +----------+ +---------------+ 44 | // 45 | // 46 | // Task 1: Take a look a the "Entry" node below, and see how it's currently emitting records to the "PrintBox" node. 47 | // See how the "PrintBox" node is then reading such a record to print text on screen. 48 | // Follow the instructions below to also emit an empty record to the "PrintHelloWorld" node. 49 | // Task 2: Create the record struct to draw a rectangle around all boxes. 50 | // Take a look at the prepared stub for the "DrawRectangle" node to see what data needs to be passed in the record. 51 | // Task 3: Add your record struct as an input to the "DrawRectangle" node below and 52 | // complete the code in the node to draw a rectangle on screen. 53 | // Task 4: Add a node output to the "Entry" node for "DrawRectangle" node with your newly created record struct. 54 | // For now, we only care about the boxes around the already existing text, thus each thread will emit a single record. 55 | // Set the [MaxRecords(...)] attribute for your accordingly. 56 | // Task 5: Emit the record to the "DrawRectanlge" node from the "Entry" node. 57 | // Task 6: Additionally, we now want to draw another rectangle around all of these boxes. 58 | // Update the [MaxRecords(...)] attribute of your node output and follow the instructions below 59 | // to emit a per-thread-group record. 60 | 61 | // Constants that define layout and positioning of boxes. 62 | static const int BoxMargin = 10; 63 | static const int2 BoxSize = int2(165, 20); 64 | static const int2 BoxCursorOffset = int2(5, 3); 65 | static const int2 InitialBoxPosition = int2(BoxMargin * 2, 60); 66 | 67 | // Record struct for the "PrintBox" node. 68 | struct PrintBoxRecord { 69 | // Top-left pixel coordinate for a box. 70 | int2 topLeft; 71 | // Index to print inside the box. See "PrintBox" implementation below. 72 | int2 index; 73 | }; 74 | 75 | // [Task 2]: Define a struct for the "DrawRectangle" node here! 76 | 77 | [Shader("node")] 78 | [NodeIsProgramEntry] 79 | [NodeLaunch("broadcasting")] 80 | [NodeDispatchGrid(1, 1, 1)] 81 | [NumThreads(4, 1, 1)] 82 | void Entry( 83 | // For this tutorial, the entry node uses a "broadcasting" node launch, 84 | // which in this case is equivalent to calling a compute shader with Dispatch(1, 1, 1). 85 | // We'll cover the different node launches in more detail in the next tutorial. 86 | // As this behaves like a compute shader, we can also use the SV_DispatchThreadID and 87 | // SV_GroupThreadID semantics to get the position of our thread in the dispatch/thread group. 88 | // In broadcasting mode, a node launches NodeDispatchGrid.x * NodeDispatchGrid.y * NodeDispatchGrid.z thread 89 | // groups. Here we have 1 thread-group. Each thread group has NumThreads.x * NumThreads.y * NumThreads.z threads. Here each thread group has 4 threads. 90 | uint2 dispatchThreadId : SV_DispatchThreadID, 91 | uint2 groupThreadId : SV_GroupThreadID, 92 | uint2 groupId : SV_GroupID, 93 | 94 | // The [MaxRecords(1)] attribute specifies the maximum number of records that a thread-group 95 | // will emit/send to a specific target node. 96 | // In this case, the entire thread group will only emit a single record. See [Task 1] for more details. 97 | [MaxRecords(1)] 98 | // As we have not specified a [NodeId(...)] attribute for the PrintHelloWorld node (see function definition below), its NodeId defaults to [NodeId("PrintHelloWorld", 0)]. 99 | // 100 | // "PrintHelloWorld" function does not have any input records, thus we must use the "EmptyNodeOutput" to 101 | // declare the output. 102 | EmptyNodeOutput PrintHelloWorld, 103 | 104 | // In this tutorial, every thread of our thread group can emit a record to the "PrintBox" node. 105 | // As we have 4 threads in our thread group, we set this limit to 4. 106 | // We know that dispatch-thread (1, 0) does not actually emit a record and we only launch one group. 107 | // Therefore, we could also set this to 3. 108 | // But imagine you want to increase the [NodeDispatchGrid(...)] dimensions above, to have more thread-groups. 109 | // Then all threads of those thread-groups might want to emit a record. 110 | [MaxRecords(4)] 111 | // Here we use the [NodeId(...)] attribute to explicitly set the target node ID. 112 | // Using an explicit target attribute allows us to name the output parameter however we like. 113 | // Here we call it boxOutput. 114 | [NodeId("PrintBox")] 115 | // "PrintBox" declares an input record of type "PrintBoxRecord" (see node declaration below), 116 | // thus we must specify a "NodeOutput" with a record of the same type. 117 | NodeOutput boxOutput 118 | 119 | // [Task 4]: Declare a new "NodeOutput" to the "DrawRectangle" node here using your newly created record struct from Task 2. 120 | // Similar to the "boxOutput", we want every thread to be able to request a per-thread output record. 121 | // Set the [MaxRecords(...)] attribute accordingly. 122 | ) 123 | { 124 | // [Task 1]: Emit a single empty record to the "PrintHelloWorld" node. 125 | // For non-empty records, you've seen how "GetThreadNodeOutputRecords" is used to request 126 | // one or more records below. 127 | // For empty outputs, the equivalent here is "ThreadIncrementOutputCount(in int recordCount)", 128 | // which emits a set number of empty records. 129 | // Keep in mind, that we only want to emit a single record per thread-group. 130 | // Thus only one thread would have to increment the output count. 131 | // Alternatively, you can use "GroupIncrementOutputCount", to increment the output count for 132 | // the entire thread group. 133 | 134 | // Question: Have a look at the implementation of "PrintHelloWorld". What would happen if we incremented the 135 | // output count multiple times? 136 | 137 | // Box position for each thread. 138 | const int2 threadBoxPosition = InitialBoxPosition + dispatchThreadId * (BoxSize + BoxMargin); 139 | 140 | // For demonstration purposes, we skip the second box. 141 | const bool hasBoxOutput = !all(dispatchThreadId == int2(1, 0)); 142 | 143 | // Here we request a single output record per thread (if hasBoxOutput is true) to the "PrintBox" node. 144 | // As these calls for requesting output records (or incrementing the output count for empty node outputs) 145 | // must be thread-group uniform, i.e., all threads in the thread group must call this function at the same time, 146 | // we cannot use normal controlflow like 147 | // if (hasBoxOutput) { 148 | // ThreadNodeOutputRecords boxOutputRecord = 149 | // boxOutput.GetThreadNodeOutputRecords(1); 150 | // ... 151 | // } 152 | // if the condition is non-uniform across all threads to skip requesting outputs for some threads. 153 | // Insteads, these threads can just request zero output records instead. 154 | ThreadNodeOutputRecords boxOutputRecord = 155 | boxOutput.GetThreadNodeOutputRecords(hasBoxOutput ? 1 : 0); 156 | 157 | // Threads that did not request any outputs must not write to the "boxOutputRecord" object. 158 | if (hasBoxOutput) { 159 | // Here we get the 0-th output record in the "boxOutputRecord" object and store our data to it. 160 | // If we called GetThreadNodeOutputRecords with more than one record (e.g., GetThreadNodeOutputRecords(2)), 161 | // we can then call "Get(...)" with different indices to write all these records. 162 | // If we only have a single record, we can also call "Get()" without any arguments to always get the 0-th record. 163 | boxOutputRecord.Get(0).topLeft = threadBoxPosition; 164 | // Alternatively, we can also use the []-operator to access the records. 165 | // Future HLSL version may also support a "->" operator for accessing records. 166 | boxOutputRecord[0].index = dispatchThreadId; 167 | } 168 | 169 | // We are done writing our records and thus can send them off to be processed using the "OutputComplete" method. 170 | // Calls to this method must again be thread-group-uniform, thus must also be called by threads that did not 171 | // request any output records. 172 | boxOutputRecord.OutputComplete(); 173 | 174 | // [Task 5]: Similar to the "boxOutputRecord" above, request a single per-thread record from your NodeOutput<...>. 175 | // Again, we want to skip the second box, thus you can again use "hasBoxOutput" to selectively request 176 | // zero records for the second thread. 177 | // Write all required data to your record. You can use the "BoxSize" constant above to correctly size your rectangle. 178 | // Don't forget to call "OutputComplete()" after writing the data to your record. 179 | 180 | // [Task 6]: Now we want to emit another record to "DrawRectangle", that draws a rectangle around all of our boxes. 181 | // Start by adjusting the "[MaxRecords(...)]" attribute for the output to "DrawRectangle". 182 | // As we need this rectangle to enclose all of the boxes emitted above, multiple thread must work together 183 | // to create the record for this new rectangle. 184 | // In particular, we know: 185 | // - Thread (0, 0) emitted the record for the most top-left box. 186 | // - Thread (3, 0) emitted the record for the most bottom-right box. 187 | // Thus, we need a record that all threads of this thread group can access together. 188 | // Use "GetGroupNodeOutputRecords" to get such a shared GroupNodeOutputRecords<...> object. 189 | // Write the data to this record and don't forget to call "OutputComplete()" at the end. 190 | } 191 | 192 | [Shader("node")] 193 | [NodeLaunch("thread")] 194 | void PrintHelloWorld( 195 | // This node does not declare any input record, thus there's nothing to see here. 196 | ) 197 | { 198 | // Print a "Hello World!" message above all the boxes. 199 | Cursor cursor = Cursor(InitialBoxPosition); 200 | cursor.Up(2); 201 | Print(cursor, "Hello World!"); 202 | } 203 | 204 | [Shader("node")] 205 | [NodeLaunch("thread")] 206 | void PrintBox( 207 | // "PrintBox" uses the "thread" node launch (more on these in the next tutorial), thus, if we want to declare 208 | // an input record to this node, we must use the "ThreadNodeInputRecord" type with our desired record struct. 209 | ThreadNodeInputRecord inputRecord 210 | 211 | // If our node were to also output any records to other nodes, then we could declare them here 212 | // in the same way as we've seen with the "Entry" node. 213 | ) 214 | { 215 | // For easier access to members of the input record struct, we fetch the input record 216 | // using the ".Get()" function and store it to a local variable. 217 | const PrintBoxRecord record = inputRecord.Get(); 218 | 219 | // Offset the cursor inside the box & print "Box(x, y)" 220 | Cursor cursor = Cursor(record.topLeft + BoxCursorOffset); 221 | Print(cursor, "Box ("); 222 | // As we stored the input record to "record", we can directly access members of the 223 | // PrintBoxRecord from it. 224 | // Alternatively, we can also write "inputRecord.Get().index.x". 225 | // Future HLSL versions might also support a "->" operator, thus we can then write "inputRecord->index.x". 226 | PrintInt(cursor, record.index.x); 227 | Print(cursor, ", "); 228 | PrintInt(cursor, record.index.y); 229 | Print(cursor, ")"); 230 | } 231 | 232 | [Shader("node")] 233 | [NodeLaunch("thread")] 234 | void DrawRectangle( 235 | // [Task 3]: Declare a node input for the "DrawRectangle" node with your new struct defined in Task 2 here. 236 | // Similar to "PrintBox", "DrawRectangle" also uses the "thread" node launch, 237 | // thus you must declare your input with "ThreadNodeInputRecord". 238 | ) 239 | { 240 | // [Task 3]: Use the DrawRect function provided in Common.h to draw a rectanle on screen. 241 | // Use the data of your input record to pass it as arguments to the DrawRectFunction. 242 | // DrawRect(...); 243 | } 244 | 245 | -------------------------------------------------------------------------------- /tutorials/tutorial-1/RecordsSolution.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | 27 | // +-------+ 28 | // | Entry | 29 | // +-------+ 30 | // | 31 | // +-----------------+-----------------+ 32 | // v v v 33 | // +-----------------+ +----------+ +---------------+ 34 | // | PrintHelloWorld | | PrintBox | | DrawRectangle | 35 | // +-----------------+ +----------+ +---------------+ 36 | 37 | // Constants that define layout and positioning of boxes. 38 | static const int BoxMargin = 10; 39 | static const int2 BoxSize = int2(165, 20); 40 | static const int2 BoxCursorOffset = int2(5, 3); 41 | static const int2 InitialBoxPosition = int2(BoxMargin * 2, 60); 42 | 43 | // Record struct for the "PrintBox" node. 44 | struct PrintBoxRecord { 45 | // Top-left pixel coordinate for a box. 46 | int2 topLeft; 47 | // Index to print inside the box. See "PrintBox" implementation below. 48 | int2 index; 49 | }; 50 | 51 | // [Task 2 Solution]: Record struct to draw a rectangle on screen. 52 | // See "DrawRectangle" node implementation below. 53 | struct DrawRectangleRecord { 54 | // Pixel coordinate of top-left corner of rectangle. 55 | int2 topLeft; 56 | // Pixel coordinate of bottom-right corner of rectangle. 57 | int2 bottomRight; 58 | // Color of the rectangle. 59 | float3 color; 60 | }; 61 | 62 | [Shader("node")] 63 | [NodeIsProgramEntry] 64 | [NodeLaunch("broadcasting")] 65 | [NodeDispatchGrid(1, 1, 1)] 66 | [NumThreads(4, 1, 1)] 67 | void Entry( 68 | uint2 dispatchThreadId : SV_DispatchThreadID, 69 | uint2 groupThreadId : SV_GroupThreadID, 70 | uint2 groupId : SV_GroupID, 71 | [MaxRecords(1)] 72 | EmptyNodeOutput PrintHelloWorld, 73 | 74 | [MaxRecords(4)] 75 | [NodeId("PrintBox")] 76 | NodeOutput boxOutput, 77 | 78 | // [Task 4 Solution]: 5 records: 4 records (one per box) and one record enclosing all boxes. 79 | [MaxRecords(5)] 80 | [NodeId("DrawRectangle")] 81 | NodeOutput rectangleOutput 82 | ) 83 | { 84 | // [Task 1 Solution]: 85 | PrintHelloWorld.GroupIncrementOutputCount(1); 86 | 87 | // Box position for each thread. 88 | const int2 threadBoxPosition = InitialBoxPosition + dispatchThreadId * (BoxSize + BoxMargin); 89 | 90 | // For demonstration purposes, we skip the second box. 91 | const bool hasBoxOutput = !all(dispatchThreadId == int2(1, 0)); 92 | 93 | ThreadNodeOutputRecords boxOutputRecord = 94 | boxOutput.GetThreadNodeOutputRecords(hasBoxOutput ? 1 : 0); 95 | 96 | if (hasBoxOutput) { 97 | boxOutputRecord.Get(0).topLeft = threadBoxPosition; 98 | boxOutputRecord[0].index = dispatchThreadId; 99 | } 100 | 101 | boxOutputRecord.OutputComplete(); 102 | 103 | // [Task 5 Solution]: 104 | ThreadNodeOutputRecords threadRectangleRecord = 105 | rectangleOutput.GetThreadNodeOutputRecords(hasBoxOutput ? 1 : 0); 106 | 107 | if (hasBoxOutput) { 108 | threadRectangleRecord.Get().topLeft = threadBoxPosition; 109 | threadRectangleRecord.Get().bottomRight = threadBoxPosition + BoxSize; 110 | threadRectangleRecord.Get().color = float3(0, 0, 0); 111 | } 112 | 113 | threadRectangleRecord.OutputComplete(); 114 | 115 | // [Task 6 Solution]: 116 | GroupNodeOutputRecords groupRectangleRecord = 117 | rectangleOutput.GetGroupNodeOutputRecords(1); 118 | 119 | // The first thread in the group wrote the record for the most top-left box, 120 | // thus only this thread must write the topLeft position of the shared rectangle record. 121 | if (groupThreadId.x == 0 && groupThreadId.y == 0) { 122 | groupRectangleRecord.Get().topLeft = threadBoxPosition - BoxMargin; 123 | } 124 | // Similarly, the last thread wrote the most bottom-right box record, 125 | // thus only this thread must write the bottomRight position of the shared rectangle record. 126 | if (groupThreadId.x == 3 && groupThreadId.y == 0) { 127 | groupRectangleRecord.Get().bottomRight = threadBoxPosition + BoxSize + BoxMargin; 128 | } 129 | 130 | // All threads jointly write the same color to the shared record. 131 | groupRectangleRecord.Get().color = float3(1, 0, 0); 132 | 133 | // All data has been written, send the shared group record. 134 | groupRectangleRecord.OutputComplete(); 135 | } 136 | 137 | [Shader("node")] 138 | [NodeLaunch("thread")] 139 | void PrintHelloWorld() 140 | { 141 | // Print a "Hello World!" message above all the boxes. 142 | Cursor cursor = Cursor(InitialBoxPosition); 143 | cursor.Up(2); 144 | Print(cursor, "Hello World!"); 145 | } 146 | 147 | [Shader("node")] 148 | [NodeLaunch("thread")] 149 | void PrintBox(ThreadNodeInputRecord inputRecord) 150 | { 151 | const PrintBoxRecord record = inputRecord.Get(); 152 | 153 | // Offset the cursor inside the box & print "Box(x, y)" 154 | Cursor cursor = Cursor(record.topLeft + BoxCursorOffset); 155 | Print(cursor, "Box ("); 156 | PrintInt(cursor, record.index.x); 157 | Print(cursor, ", "); 158 | PrintInt(cursor, record.index.y); 159 | Print(cursor, ")"); 160 | } 161 | 162 | [Shader("node")] 163 | [NodeLaunch("thread")] 164 | void DrawRectangle( 165 | // [Task 3 Solution]: 166 | ThreadNodeInputRecord inputRecord 167 | ) 168 | { 169 | // [Task 3 Solution]: 170 | // We again store the input record to a local variable first... 171 | const DrawRectangleRecord record = inputRecord.Get(); 172 | 173 | // ... and use the data contained in the record to draw a rectangle on screen. 174 | DrawRect(record.topLeft, record.bottomRight, 1, record.color); 175 | } 176 | 177 | -------------------------------------------------------------------------------- /tutorials/tutorial-1/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/WorkGraphPlayground/616e78bad82a583e116a4db3f4f7ac6a84cd197e/tutorials/tutorial-1/screenshot.png -------------------------------------------------------------------------------- /tutorials/tutorial-2/NodeLaunches.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // In this tutorial, we're going to take a look at all the different options for launching nodes in a work graph. 27 | // Work graphs replaces the concepts of draws (e.g., DrawInstanced, DrawIndexedInstances) and 28 | // dispatches (e.g., Dispatch, DispatchRays) with records and node launches. 29 | // Each record invokes a node and the node can choose from one of three launch modes: 30 | // 31 | // "broadcasting": 32 | // A broadcasting node is - on the surface - very similar to a compute shader: 33 | // Each record invokes a 3D grid of thread groups, with every thread group consisting of a 3D grid of threads. 34 | // You specify the size of the thread group with the [NumThreads(x, y, z)] attribute. 35 | // You can set the dispatch size (i.e., size of the thread-group grid) either statically using the 36 | // [NodeDispatchGrid(x, y, z)] attribute, or dynamically as part of the node input record. 37 | // We'll take a look at the latter part in Task 1. 38 | // 39 | // "thread": 40 | // Each record invokes a single thread, but unlike compute shaders or broadcasting nodes with [NumThreads(1, 1, 1)], 41 | // the work graphs runtime can combine multiple records and process them in a single thread group. Thus, the work 42 | // graph runtime can then better leverage the available GPU resources. 43 | // Thread launches are ideal for single-threaded workloads (e.g., our Print functions). 44 | // You have already seen thread node launches in action in tutorial-0 and tutorial-1. 45 | // 46 | // "coalescing": 47 | // Unlike "broadcasting" or "thread" nodes, "coalescing" nodes can accept more than one input record: 48 | // A set of one or more input records invokes a single thread group. The size of the thread group is again denoted 49 | // by the [NumThreads(x, y, z)] attribute. 50 | // The maximum number of input records is declared using the [MaxRecords(...)] attribute. This happens in the same way as 51 | // we declare the output limits of a node. We've done this already in tutorial-1. 52 | // That said, a limit of, say, [MaxRecords(5)] only guarantees that the coalescing node is invoked with one to five records. 53 | // However, it does not mean that the input will always contain five records. 54 | // The actual number of input records present can be queried with the "Count()" method of the "GroupNodeInputRecords" object. 55 | // In this tutorial, we'll use a coalescing node to combine two rectangles into a single one if they share a vertical edge. 56 | // 57 | // See https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#node-types for more details on all the launch modes. 58 | // In our Work Graph Playground, we skip the experimental "mesh" launch mode. If you're interested, 59 | // you can find more details on mesh nodes in Work Graphs here: https://gpuopen.com/learn/work_graphs_mesh_nodes. 60 | // 61 | // In tutorial-1, we declared inputs to our thread-launch nodes using the "ThreadNodeInputRecord" object. 62 | // Node with the "broadcasting" and "coalescing" attribute use "DispatchNodeInputRecord" and "GroupNodeInputRecords", respectively. 63 | // See https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#node-input-declaration for details. 64 | // 65 | // In this assignments, we are again going to draw rectangles, but this time, instead of just drawing the outline, 66 | // we also going to fill the rectangle. This is an ideal use-case for broadcasting nodes: we require as many threads as pixels 67 | // to efficiently fill large areas. 68 | // Additionally, we're going to demonstrate the capabilities of coalescing nodes, by combining two neighboring rectangles 69 | // into a single one, if they share a vertical edge. 70 | // The resulting graph for this tutorial will be as follows: 71 | // 72 | // +----------------------------+ 73 | // | NodeLaunch("broadcasting") | 74 | // | Entry | 75 | // +----------------------------+ 76 | // | 77 | // +--------------+--------------+ 78 | // v v 79 | // +----------------------+ +--------------------------+ 80 | // | NodeLaunch("thread") | | NodeLaunch("coalescing") | 81 | // | PrintLabel | | MergeRectangle | 82 | // +----------------------+ +--------------------------+ 83 | // | 84 | // v 85 | // +----------------------------+ 86 | // | NodeLaunch("broadcasting") | 87 | // | FillRectangle | 88 | // +----------------------------+ 89 | // 90 | // Task 1: Have a look at the "FillRectangle" node below. It is currently using a fixed dispatch grid set by the 91 | // [NodeDispatchGrid(...)] attribute, thus all rectangles have the same size. 92 | // As the GetRectanglePositionAndSize helper computes an individual position and size for every rectangle, 93 | // we need to change this to a dynamic dispatch grid set by the input record. 94 | // Start by adding variables for the dispatch grid and rectangle size in the "RectangleRecord" struct. 95 | // Next, change the [NodeDispatchGrid(...)] attribute of the "FillRectangle" to a [NodeMaxDispatchGrid(...)] 96 | // and update the dispatch size limit in the x dimension. 97 | // Lastly, set the dispatch grid and rectangle size for the rectangle records in the "Entry" node. 98 | // Once you're done, the rectangles should now cover a continuous horizontal rectangle. 99 | // Task 2: Change the "rectangleOutput" of the "Entry" node to target the "MergeRectangle" coalescing node. 100 | // The "MergeRectangle" takes one to two rectangles and we'll later use this functionality to combine 101 | // rectangles if they share an edge. In this task, you are going to implement the fallback path and 102 | // passthrough all incoming records to the "FillRectangle" node. 103 | // Once you're done, everything should still look the same. 104 | // Task 3: Complete the implementation of the "MergeRectangle" node. 105 | // Complete the sub-call to the "ComputeCombinedRect" helper method. 106 | // If this helper returns "true", then you must emit a single record to the "FillRectangle" node. 107 | // Position and size of this rectangle are given by the "ComputeCombinedRect" helper. 108 | // For the color of this rectangle, you can re-use the color from any of the input records (e.g., record[0]). 109 | // Once you're done, you should now see the same area being filled, but this time with just three instead of five rectangles. 110 | // As five is not dividable by two, there's also one rectangle which could not be merged and is passed through as-is from 111 | // the "MergeRectangle" node to the "FillRectangle" node. 112 | // Task 4: Increase the dispatch grid of the "Entry" node in x dimension to emit more rectangles. 113 | // You should now see the merged rectangles flickering, as the input to the coalescer node is non-deterministic 114 | // and depends on the timing of the different thread groups of the "Entry" node. 115 | // This step is omitted from the sample solution. 116 | 117 | // Constants that define the layout and positioning of rectangles. 118 | static const int RectangleSize = 48; 119 | // Size increase with every rectangle. 120 | static const int RectangleSizeStep = 4; 121 | static const int2 RectangleCursorOffset = int2(-8, -20); 122 | static const int2 InitialRectanglePosition = int2(20, 60); 123 | 124 | struct PrintLabelRecord { 125 | int2 topLeft; 126 | uint index; 127 | }; 128 | 129 | // [Task 1]: 130 | // Add a dispatch size and rectangle size member to the "RectangleRecord" struct. 131 | // The rectangle size will be in pixels, while the dispatch size will control how many thread 132 | // groups are dispatched. Each thread group will then cover an 8x8 pixel area. 133 | // Dispatch size (or dispatch grid) of a broadcasting node is specified in the record with the 134 | // "SV_DispatchGrid" semantic. The dispatch grid can be of type uint, uint2, uint3, uint16_t, uint16_t2 or uint16_t3. 135 | // See https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#sv_dispatchgrid for more details. 136 | // In our case, we only need two dimensions, thus we recommend using uint2. 137 | struct RectangleRecord { 138 | int2 topLeft; 139 | float4 color; 140 | }; 141 | 142 | // Helper function to compute the "position" and "size" for the rectangles from an "index". 143 | void GetRectanglePositionAndSize(in uint index, out int2 position, out int2 size); 144 | 145 | [Shader("node")] 146 | [NodeIsProgramEntry] 147 | [NodeLaunch("broadcasting")] 148 | // [Task 4]: Increment the x dimension of the dispatch grid and observe the changes to the rectangle merging. 149 | [NodeDispatchGrid(1, 1, 1)] 150 | [NumThreads(5, 1, 1)] 151 | void Entry( 152 | uint dispatchThreadId : SV_DispatchThreadID, 153 | 154 | [MaxRecords(5)] 155 | [NodeId("PrintLabel")] 156 | NodeOutput printLabelOutput, 157 | 158 | [MaxRecords(5)] 159 | // [Task 2]: Change this output to target the "MergeRectangle" node. 160 | // Hint: As "FillRectangle" and "MergeRectangle" share the same input node, your change should be rather small. 161 | [NodeId("FillRectangle")] 162 | NodeOutput rectangleOutput) 163 | { 164 | // Rectangle position and size for each thread 165 | int2 threadRectanglePositon; 166 | int2 threadRectangleSize; 167 | GetRectanglePositionAndSize(dispatchThreadId, threadRectanglePositon, threadRectangleSize); 168 | 169 | ThreadNodeOutputRecords printLabelRecord = 170 | printLabelOutput.GetThreadNodeOutputRecords(1); 171 | 172 | printLabelRecord.Get().topLeft = threadRectanglePositon; 173 | printLabelRecord.Get().index = dispatchThreadId; 174 | 175 | printLabelRecord.OutputComplete(); 176 | 177 | ThreadNodeOutputRecords rectangleOutputRecord = 178 | rectangleOutput.GetThreadNodeOutputRecords(1); 179 | 180 | // [Task 1]: 181 | // Set the newly created dispatch size and rectangle size in the record. 182 | // Rectangle size should be set to "threadRectangleSize" above. 183 | // The unit of "threadRectangleSize" is in pixels, but the dispatch size is in thread groups. 184 | // As each thread group covers an 8x8 pixel area, we need to divide the rectangle size by 8 185 | // and round up to get the required dispatch size. 186 | // You can use the "DivideAndRoundUp(int2 dividend, int2 divisor)" function in Common.h to perform this calculation. 187 | rectangleOutputRecord.Get().topLeft = threadRectanglePositon; 188 | rectangleOutputRecord.Get().color = UintToColor(dispatchThreadId); 189 | 190 | rectangleOutputRecord.OutputComplete(); 191 | } 192 | 193 | [Shader("node")] 194 | [NodeLaunch("broadcasting")] 195 | // [Task 1]: 196 | // Change this from [NodeDispatchGrid(...)] to [NodeMaxDispatchGrid(...)] to allow for a dynamic grid size. 197 | // Together with the SV_DispatchGrid semantic in the "RectangleRecord" struct, this will 198 | // enable the "FillRectangle" to read it's dispatch grid dynamically from the input record. 199 | // The width of the rectangles increases linearly by "RectangleSizeStep" with each one. 200 | // We currently have 5 rectangles, but we want to increase this number later on. 201 | // Set the maximum dispatch grid to allow for at least 20 rectangles 202 | // (i.e., a rectangle which has a width of RectangleSize + 20 * RectangleSizeStep). 203 | [NodeDispatchGrid(6, 6, 1)] 204 | [NumThreads(8, 8, 1)] 205 | [NodeId("FillRectangle")] 206 | void FillRectangleNode( 207 | DispatchNodeInputRecord inputRecord, 208 | 209 | uint2 dispatchThreadId : SV_DispatchThreadID 210 | ) 211 | { 212 | const RectangleRecord record = inputRecord.Get(); 213 | 214 | const int2 pixel = record.topLeft + dispatchThreadId; 215 | // [Task 1]: 216 | // Each thread group can fill up to 8x8 pixels. If the rectangle size is not divisible by 8, 217 | // we have to round up to ensure we launch enough thread groups. 218 | // Thus, some thread groups may extend past the size of the rectangle. 219 | // Add a check to test if "dispatchThreadId" is within the rectangle size (supplied by the input record). 220 | if (// Check if pixel is within bounds of render target. 221 | all(pixel >= 0) && all(pixel < RenderSize)) { 222 | RenderTarget[pixel] = record.color; 223 | } 224 | } 225 | 226 | [Shader("node")] 227 | [NodeLaunch("thread")] 228 | [NodeId("PrintLabel")] 229 | void PrintLabelNode(ThreadNodeInputRecord inputRecord) 230 | { 231 | const PrintLabelRecord record = inputRecord.Get(); 232 | 233 | Cursor cursor = Cursor(record.topLeft + RectangleCursorOffset); 234 | Print(cursor, "|"); 235 | PrintUint(cursor, record.index); 236 | } 237 | 238 | // Helper function to check if two rectangles share a vertical edge. 239 | // Rectangles are defined by the position of their top-left corner and their size. 240 | // If rectangles share a vertical edge, "topLeft" and "size" will contain the position and 241 | // size of a rectangle covering both input rectangles. 242 | bool ComputeCombinedRect(in int2 topLeft0, in int2 size0, in int2 topLeft1, in int2 size1, out int2 topLeft, out int2 size); 243 | 244 | [Shader("node")] 245 | [NodeLaunch("coalescing")] 246 | [NumThreads(1, 1, 1)] 247 | [NodeId("MergeRectangle")] 248 | void MergeRectangleNode( 249 | [MaxRecords(2)] 250 | GroupNodeInputRecords inputRecords, 251 | 252 | [MaxRecords(2)] 253 | [NodeId("FillRectangle")] 254 | NodeOutput output 255 | ) 256 | { 257 | // Only test of rectangles can be merged if two rectangles were passed. 258 | if (inputRecords.Count() == 2) { 259 | int2 topLeft, size; 260 | 261 | // [Task 3]: 262 | // Replace the parameters with the data from the input records. 263 | // "inputRecords.Get(uint index)" or "inputRecords[uint index]" to access a specific input record. 264 | if (ComputeCombinedRect(/* in topLeft0: replace me! */ 0, 265 | /* in size0 : replace me! */ 0, 266 | /* in topLeft1: replace me! */ 1, 267 | /* in size1 : replace me! */ 0, 268 | /* out */ topLeft, 269 | /* out */ size)) 270 | { 271 | // [Task 3]: 272 | // Emit a single record to the "FillRectangle" node here. 273 | // Use "topLeft" and "size" from above for this rectangle. 274 | // Compute and set the dispatch size in the same way as you did in the "Entry" node. 275 | // You can re-use the color from any of the input records, or compute a new color for 276 | // the merged rectangle here. 277 | 278 | 279 | // If we found two rectangles to merge, we can end the node here and thus 280 | // skip passing the input records through to the "FillRectangle" node. 281 | // Note: as we only have a single thread in our thread-group, such control flow 282 | // is allowed, since all calls to output records are still thread-group uniform. 283 | return; 284 | } 285 | } 286 | // [Task 2]: 287 | // Passthrough all incoming records to the "FillRectangle" output. 288 | // Use "inputRecords.Count()" to get the number of input records, 289 | // and thus also the number of required output records. 290 | // Use ".Get(uint index)" or the "[]"-operator to get/set and input/output record. 291 | } 292 | 293 | // ================= Helper Functions ================= 294 | 295 | // Helper function to compute position and size for the rectangles. 296 | void GetRectanglePositionAndSize(in uint index, out int2 position, out int2 size) { 297 | position = InitialRectanglePosition + 298 | int2(index, 0) * RectangleSize + 299 | int2(index * (index - 1) / 2, 0) * RectangleSizeStep; 300 | size = RectangleSize.xx + int2(index, 0) * RectangleSizeStep; 301 | } 302 | 303 | // Helper function to check if two rectangles share a vertical edge. 304 | bool ComputeCombinedRect(in int2 topLeft0, in int2 size0, in int2 topLeft1, in int2 size1, out int2 topLeft, out int2 size) 305 | { 306 | const int2 topRight0 = topLeft0 + int2(size0.x, 0); 307 | const int2 topRight1 = topLeft1 + int2(size1.x, 0); 308 | 309 | // Compute top-left edge of combined rectangle. 310 | topLeft = min(topLeft0, topLeft1); 311 | // Compute size of combined rectangle. 312 | const int width = max(topRight0, topRight1).x - topLeft.x; 313 | size = int2(width, size0.y); 314 | 315 | return 316 | // check if rectangles have same height. 317 | size0.y == size1.y && 318 | // check if rectangles share a vertical edge. 319 | (width <= (size0.x + size1.x)); 320 | } 321 | -------------------------------------------------------------------------------- /tutorials/tutorial-2/NodeLaunchesSolution.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // Constants that define the layout and positioning of rectangles. 27 | static const int RectangleSize = 48; 28 | // Size increase with every rectangle. 29 | static const int RectangleSizeStep = 4; 30 | static const int2 RectangleCursorOffset = int2(-8, -20); 31 | static const int2 InitialRectanglePosition = int2(20, 60); 32 | 33 | struct PrintLabelRecord { 34 | int2 topLeft; 35 | uint index; 36 | }; 37 | 38 | struct RectangleRecord { 39 | // [Task 1 Solution]: SV_DispatchGrid denotes the size of the dispatch grid for the "FillRectangle" node. 40 | // When this record is used with other non-broadcasting nodes (e.g. "MergeRectangle"), 41 | // this semantic has no effect. 42 | uint2 dispatchGrid : SV_DispatchGrid; 43 | int2 topLeft; 44 | // [Task 1 Solution]: As "dispatchGrid" has thread-group granularity, i.e. in our case 8x8 pixels, we also need to 45 | // pass the actual rectangle size, in-case it does not evenly divide by 8. 46 | // See implementation of "FillRectangle" node below. 47 | int2 size; 48 | float4 color; 49 | }; 50 | 51 | // Helper function to compute the "position" and "size" for the rectangles from an "index". 52 | void GetRectanglePositionAndSize(in uint index, out int2 position, out int2 size); 53 | 54 | [Shader("node")] 55 | [NodeIsProgramEntry] 56 | [NodeLaunch("broadcasting")] 57 | // [Task 4]: Increment the x dimension of the dispatch grid and observe the changes to the rectangle merging. 58 | // This step is omitted from the sample solution to show a state closer to the start of the tutorial. 59 | [NodeDispatchGrid(1, 1, 1)] 60 | [NumThreads(5, 1, 1)] 61 | void Entry( 62 | uint dispatchThreadId : SV_DispatchThreadID, 63 | 64 | [MaxRecords(5)] 65 | [NodeId("PrintLabel")] 66 | NodeOutput printLabelOutput, 67 | 68 | [MaxRecords(5)] 69 | // [Task 2 Solution]: 70 | [NodeId("MergeRectangle")] 71 | NodeOutput rectangleOutput) 72 | { 73 | // Rectangle position and size for each thread 74 | int2 threadRectanglePositon; 75 | int2 threadRectangleSize; 76 | GetRectanglePositionAndSize(dispatchThreadId, threadRectanglePositon, threadRectangleSize); 77 | 78 | ThreadNodeOutputRecords printLabelRecord = 79 | printLabelOutput.GetThreadNodeOutputRecords(1); 80 | 81 | printLabelRecord.Get().topLeft = threadRectanglePositon; 82 | printLabelRecord.Get().index = dispatchThreadId; 83 | 84 | printLabelRecord.OutputComplete(); 85 | 86 | ThreadNodeOutputRecords rectangleOutputRecord = 87 | rectangleOutput.GetThreadNodeOutputRecords(1); 88 | 89 | // [Task 1 Solution]: 90 | // Each thread group of the "FillRectangle" node covers an 8x8 pixel area. 91 | // Thus, we need to divide "threadRectangleSize" by 8 to get the number of thread groups required 92 | // to fill the rectangle. As "threadRectangleSize" might not evenly divide by 8, we need to round up 93 | // to ensure we dispatch enough thread groups. 94 | rectangleOutputRecord.Get().dispatchGrid = DivideAndRoundUp(threadRectangleSize, 8); 95 | rectangleOutputRecord.Get().topLeft = threadRectanglePositon; 96 | rectangleOutputRecord.Get().size = threadRectangleSize; 97 | rectangleOutputRecord.Get().color = UintToColor(dispatchThreadId); 98 | 99 | rectangleOutputRecord.OutputComplete(); 100 | } 101 | 102 | [Shader("node")] 103 | [NodeLaunch("broadcasting")] 104 | // [Task 1 Solution]: 105 | // 6 thread groups for base-size rectangle (48x48) 106 | // + 10 thread groups (x8 = 80 pixels) to cover the size of the 20 th thread group (48 + 19 * 4) 107 | [NodeMaxDispatchGrid(6 + 10, 6, 1)] 108 | [NumThreads(8, 8, 1)] 109 | [NodeId("FillRectangle")] 110 | void FillRectangleNode( 111 | DispatchNodeInputRecord inputRecord, 112 | 113 | uint2 dispatchThreadId : SV_DispatchThreadID 114 | ) 115 | { 116 | const RectangleRecord record = inputRecord.Get(); 117 | 118 | const int2 pixel = record.topLeft + dispatchThreadId; 119 | 120 | // [Task 1 Solution]: 121 | // Add "all(dispatchThreadId < record.size)" to check if pixel is within the rectangle. 122 | if (// Check if pixel is still within rectangle. 123 | all(dispatchThreadId < record.size) && 124 | // Check if pixel is within bounds of render target. 125 | all(pixel >= 0) && all(pixel < RenderSize)) { 126 | RenderTarget[pixel] = record.color; 127 | } 128 | } 129 | 130 | [Shader("node")] 131 | [NodeLaunch("thread")] 132 | [NodeId("PrintLabel")] 133 | void PrintLabelNode(ThreadNodeInputRecord inputRecord) 134 | { 135 | const PrintLabelRecord record = inputRecord.Get(); 136 | 137 | Cursor cursor = Cursor(record.topLeft + RectangleCursorOffset); 138 | Print(cursor, "|"); 139 | PrintUint(cursor, record.index); 140 | } 141 | 142 | // Helper function to check if two rectangles share a vertical edge. 143 | // Rectangles are defined by the position of their top-left corner and their size. 144 | // If rectangles share a vertical edge, "topLeft" and "size" will contain the position and 145 | // size of a rectangle covering both input rectangles. 146 | bool ComputeCombinedRect(in int2 topLeft0, in int2 size0, in int2 topLeft1, in int2 size1, out int2 topLeft, out int2 size); 147 | 148 | [Shader("node")] 149 | [NodeLaunch("coalescing")] 150 | [NumThreads(1, 1, 1)] 151 | [NodeId("MergeRectangle")] 152 | void MergeRectangleNode( 153 | [MaxRecords(2)] 154 | GroupNodeInputRecords inputRecords, 155 | 156 | [MaxRecords(2)] 157 | [NodeId("FillRectangle")] 158 | NodeOutput output 159 | ) 160 | { 161 | if (inputRecords.Count() == 2) { 162 | int2 topLeft, size; 163 | 164 | // [Task 3 Solution]: 165 | if (ComputeCombinedRect(inputRecords.Get(0).topLeft, 166 | inputRecords.Get(0).size, 167 | inputRecords.Get(1).topLeft, 168 | inputRecords.Get(1).size, 169 | /* out */ topLeft, 170 | /* out */ size)) 171 | { 172 | // Emit a single "RectangleRecord" to "FillRectangle". 173 | ThreadNodeOutputRecords outputRecord = 174 | output.GetThreadNodeOutputRecords(1); 175 | // Similar to "Entry" node, divide rectangle size by 8 and round up 176 | // to get the number of required thread groups to fill the rectangle. 177 | outputRecord.Get().dispatchGrid = DivideAndRoundUp(size, 8); 178 | outputRecord.Get().topLeft = topLeft; 179 | outputRecord.Get().size = size; 180 | // Passthrough color from input record [0] here. 181 | // This allows us to see which rectangle was passed to "MergeRectangle" 182 | // as input record [0], and which rectangle was passed as input record [1]. 183 | outputRecord.Get().color = inputRecords.Get(0).color; 184 | outputRecord.OutputComplete(); 185 | return; 186 | } 187 | } 188 | 189 | // [Task 2 Solution]: 190 | // Request one output record for each input record (inputRecords.Count()) 191 | // As the entire record, including the dispatch grid, has already been written 192 | // in the "Entry" node, there's nothing to change/modify here. 193 | ThreadNodeOutputRecords outputRecords = 194 | output.GetThreadNodeOutputRecords(inputRecords.Count()); 195 | 196 | // Iterate over input records and pass them through to the output records. 197 | // As the entire record, including the dispatch grid, has already been written 198 | // in the "Entry" node, there's nothing to change/modify here. 199 | for (int i = 0; i < inputRecords.Count(); ++i) { 200 | outputRecords.Get(i) = inputRecords.Get(i); 201 | } 202 | 203 | outputRecords.OutputComplete(); 204 | } 205 | 206 | // ================= Helper Functions ================= 207 | 208 | // Helper function to compute position and size for the rectangles. 209 | void GetRectanglePositionAndSize(in uint index, out int2 position, out int2 size) { 210 | position = InitialRectanglePosition + 211 | int2(index, 0) * RectangleSize + 212 | int2(index * (index - 1) / 2, 0) * RectangleSizeStep; 213 | size = RectangleSize.xx + int2(index, 0) * RectangleSizeStep; 214 | } 215 | 216 | // Helper function to check if two rectangles share a vertical edge. 217 | bool ComputeCombinedRect(in int2 topLeft0, in int2 size0, in int2 topLeft1, in int2 size1, out int2 topLeft, out int2 size) 218 | { 219 | const int2 topRight0 = topLeft0 + int2(size0.x, 0); 220 | const int2 topRight1 = topLeft1 + int2(size1.x, 0); 221 | 222 | // Compute top-left edge of combined rectangle. 223 | topLeft = min(topLeft0, topLeft1); 224 | // Compute size of combined rectangle. 225 | const int width = max(topRight0, topRight1).x - topLeft.x; 226 | size = int2(width, size0.y); 227 | 228 | return 229 | // check if rectangles have same height. 230 | size0.y == size1.y && 231 | // check if rectangles share a vertical edge. 232 | (width <= (size0.x + size1.x)); 233 | } 234 | -------------------------------------------------------------------------------- /tutorials/tutorial-2/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/WorkGraphPlayground/616e78bad82a583e116a4db3f4f7ac6a84cd197e/tutorials/tutorial-2/screenshot.png -------------------------------------------------------------------------------- /tutorials/tutorial-3/MaterialShading.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // Scene.h contains functionality for tracing rays into the scene and also 27 | // contains the material shading functions. 28 | // !! IMPORTANT: if you are using the WARP software adapter, 29 | // please consider increasing SHADING_RATE in Scene.h !! 30 | #include "Scene.h" 31 | 32 | // Record for broadcasting nodes to launch one thread per sample (samples are scaled by SHADING_RATE in Scene.h). 33 | struct RenderSceneRecord { 34 | uint2 dispatchGrid : SV_DispatchGrid; 35 | }; 36 | 37 | // ================ Start of tutorial ================= 38 | 39 | // In this tutorial, we'll look a node arrays, using per-pixel material shading as an example. 40 | // Material shading is commonly used with visibility buffers. A prominent example for this would be Unreal Engine's Nanite. 41 | // The input is a (different) material ID for every pixel and set of shading functions, which calculate the pixel color 42 | // based on the material ID and some other parameters. 43 | // For this tutorial, we use raytracing to render a small scene with three different materials: the sky, a sphere, and a plane. 44 | // You can find the raytracing code and material shading function in "Scene.h". 45 | 46 | // Task 0: Start by familiarizing yourself with the existing code. 47 | // Have a look at the "RenderScene" and see how it is launched from the "Entry" node at the bottom of the file. 48 | // All of this should be familiar by now - if not, maybe have a look at the previous tutorials again. 49 | // Task 1: Have a look at the data required to shade a pixel (or sample) (e.g., pixel coordinate) 50 | // Start by declaring a record for shading a single pixel below. 51 | // Task 2: Declare a node for each of the three different materials. See details below. 52 | // All of these nodes must use the same [NodeLaunch(...)] parameter and use the same input record. 53 | // Task 3: Declare an output to your node array in the "RenderScene" node. 54 | // Task 4: For each thread, create and send a record to your node array with the correct node array index based on the ray tracing result. 55 | // Fill your record with all the data needed to shade the material. 56 | // 57 | // The resulting graph will then look like this: 58 | // 59 | // +-------+ +-------------+ +------------------------+ 60 | // | Entry |-------->| RenderScene |-------->| ShadePixel[3] | 61 | // +-------+ +-------------+ |========================| 62 | // | Nodes in NodeArray: | 63 | // | [0]: ShadePixel_Sky | 64 | // | [1]: ShadePixel_Sphere | 65 | // | [2]: ShadePixel_Plane | 66 | // +------------------------+ 67 | 68 | // [Task 1]: Define your record struct to shade each pixel here. 69 | 70 | // [Task 2]: Declare your material shading nodes here, using the record you just defined as an input. 71 | // Revisit tutorial-2 and choose a fitting node launch mode for shading a pixel. 72 | // You'll need three different nodes for each of the different materials. 73 | // Use the [NodeId(...)] attribute to tie all of these nodes together to a single node array. 74 | // Tipp: You can use [NodeId("...", (uint)RayHit::Material::...)] to use the "Material" enum instead of hard-coded values. 75 | 76 | [Shader("node")] 77 | [NodeLaunch("broadcasting")] 78 | [NodeMaxDispatchGrid(512, 512, 1)] 79 | [NumThreads(8, 8, 1)] 80 | void RenderScene( 81 | uint2 dispatchThreadId : SV_DispatchThreadID, 82 | 83 | DispatchNodeInputRecord inputRecord 84 | 85 | // [Task 3]: Declare the output to your newly created node array here. 86 | // Revisit tutorial-1 for a refresh on node outputs and determine the correct [MaxRecords(...)] attribute. 87 | // As you're now using a node array, you'll need to use NodeOutputArray<...> instead of NodeOutput<...>. 88 | // Additionally, you need to specify the node array size using the [NodeArraySize(...)] attribute. 89 | // This is for the runtime to check that the entire node array is actually present in the graph. 90 | // See https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#node-output-declaration for more details. 91 | ) 92 | { 93 | // Scale dispatchThreadId by shading rate, 94 | // as every sample (i.e., every thread) can cover multiple pixel. 95 | const uint2 pixel = dispatchThreadId * SHADING_RATE; 96 | 97 | if (any(pixel >= RenderSize)) { 98 | // Early exit if pixel is outside the render target. 99 | // !! Keep in mind that any calls to GetThreadNodeOutputRecords or GetGroupNodeOutputRecords 100 | // need to be thread-group uniform, i.e., reached by every thread in the threadgroup !! 101 | return; 102 | } 103 | 104 | // GetCameraRay compute ray origin and direction for a primary ray. 105 | const Ray ray = GetCameraRay(pixel); 106 | // TraceRay computes ray intersections with the scene and returns 107 | // hit distance and material ID in the "RayHit" struct 108 | const RayHit hit = TraceRay(ray); 109 | 110 | float4 color = float4(0, 0, 0, 1); 111 | 112 | // Here we call different shading functions based on the raytracing results. 113 | // This will lead to divergent code flow on the GPU and can slow down shading if 114 | // lots of different materials, with varying resource requirements are used. 115 | // In this tutorial, we want to replace this swtich-case statement with a 116 | // work graph node array, such that every material is processed by a different shader. 117 | // Each node (material shader) then only allocated the resources it actually requires. 118 | // 119 | // Here we're only shading three different materials: the sky, a sphere and the plane. 120 | switch(hit.material) { 121 | case RayHit::Sky: 122 | color = ShadeSky(ray); 123 | break; 124 | case RayHit::Sphere: 125 | color = ShadeSphere(ray, hit.distance); 126 | break; 127 | case RayHit::Plane: 128 | color = ShadePlane(ray, hit.distance); 129 | break; 130 | default: 131 | break; 132 | } 133 | 134 | // WritePixel stores the color to all pixels in a sample (see SHADING_RATE above). 135 | // As Work Graphs does not offer a return-path to the producer node, you task it to move it 136 | // to each of the material shading nodes. 137 | WritePixel(pixel, color); 138 | 139 | // [Task 4]: Emit the output to your node array here. 140 | // NodeOutputArray provides a []-operator to select the array index for each output request with 141 | // output[index].Get{Thread|Group}NodeOutputRecords(...) 142 | // Make sure to call this in a thread-group uniform way and write your data to the record. 143 | // You can then remove the existing material shading code above. 144 | } 145 | 146 | // ==================== Entry Node ==================== 147 | // The entry node below is invoked when dispatching the graph and launches 148 | // the "RenderScene" node with one thread per sample. 149 | 150 | [Shader("node")] 151 | [NodeIsProgramEntry] 152 | [NodeLaunch("thread")] 153 | void Entry( 154 | [MaxRecords(1)] 155 | [NodeId("RenderScene")] 156 | NodeOutput output) 157 | { 158 | ThreadNodeOutputRecords outputRecord = 159 | output.GetThreadNodeOutputRecords(1); 160 | 161 | // RenderScene uses a 8x8 thread group with one samples per thread. 162 | // Samples can cover multiple pixels (see note on SHADING_RATE above). 163 | const uint pixelsPerThreadGroup = 8 * SHADING_RATE; 164 | outputRecord.Get().dispatchGrid = (uint2(RenderSize) + pixelsPerThreadGroup - 1) / pixelsPerThreadGroup; 165 | 166 | outputRecord.OutputComplete(); 167 | } -------------------------------------------------------------------------------- /tutorials/tutorial-3/MaterialShadingSolution.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // Scene.h contains functionality for tracing rays into the scene and also 27 | // contains the material shading functions. 28 | // !! IMPORTANT: if you are using the WARP software adapter, 29 | // please consider increasing SHADING_RATE in Scene.h !! 30 | #include "Scene.h" 31 | 32 | // Record for broadcasting nodes to launch one thread per sample (samples are scaled by SHADING_RATE in Scene.h). 33 | struct RenderSceneRecord { 34 | uint2 dispatchGrid : SV_DispatchGrid; 35 | }; 36 | 37 | // In this sample solution, we use the following work graph 38 | // to shade each pixel on screen with a different thread-launch node. 39 | // 40 | // +-------+ +-------------+ +------------------------+ 41 | // | Entry |-------->| RenderScene |-------->| ShadePixel[3] | 42 | // +-------+ +-------------+ |========================| 43 | // | Nodes in NodeArray: | 44 | // | [0]: ShadePixel_Sky | 45 | // | [1]: ShadePixel_Sphere | 46 | // | [2]: ShadePixel_Plane | 47 | // +------------------------+ 48 | 49 | // [Task 1 Solution]: 50 | // Record to shade a single sample (pixel) 51 | // Produced by "RenderScene", consumed by "ShadePixel" node array below. 52 | struct PixelRecord { 53 | // Pixel position of sample/pixel to shade 54 | uint2 pixel; 55 | // Ray information (required for shading functions) 56 | Ray ray; 57 | // Ray hit distance (required for shading functions) 58 | float hitDistance; 59 | }; 60 | 61 | // ============== "ShadePixel" Node Array ============= 62 | // [Task 2 Solution]: The "ShadePixel" node array below provides a dedicated thread-launch node 63 | // for each of the three different materials. 64 | // These nodes are joined to a node array using the [NodeId("ShadePixel", (uint)RayHit::XXX)] attribute. 65 | 66 | [Shader("node")] 67 | // NodeId attribute has to be used when specifying node array to set the node array index (second parameter). 68 | // The node array index has to be of type uint, but enums such as RayHit::Material can also be used, when cast to uint. 69 | [NodeId("ShadePixel", (uint)RayHit::Sky)] 70 | // Each sample only requires a single thread to compute the output, thus we use thread launch here. 71 | [NodeLaunch("thread")] 72 | void ShadePixel_Sky(ThreadNodeInputRecord input) 73 | { 74 | // Read input record 75 | const PixelRecord record = input.Get(); 76 | 77 | // Compute color based on material shading function 78 | const float4 color = ShadeSky(record.ray); 79 | 80 | // Write color to output pixel(s) 81 | WritePixel(record.pixel, color); 82 | } 83 | 84 | // ShadePixel_Sphere and ShadePixel_Plane are created in the same way as ShadePixel_Sky 85 | 86 | [Shader("node")] 87 | [NodeId("ShadePixel", (uint)RayHit::Sphere)] 88 | [NodeLaunch("thread")] 89 | void ShadePixel_Sphere(ThreadNodeInputRecord input) 90 | { 91 | const PixelRecord record = input.Get(); 92 | 93 | WritePixel(record.pixel, ShadeSphere(record.ray, record.hitDistance)); 94 | } 95 | 96 | [Shader("node")] 97 | [NodeId("ShadePixel", (uint)RayHit::Plane)] 98 | [NodeLaunch("thread")] 99 | void ShadePixel_Plane(ThreadNodeInputRecord input) 100 | { 101 | const PixelRecord record = input.Get(); 102 | 103 | WritePixel(record.pixel, ShadePlane(record.ray, record.hitDistance)); 104 | } 105 | 106 | // ================ "RenderScene" Node ================ 107 | 108 | [Shader("node")] 109 | [NodeLaunch("broadcasting")] 110 | [NodeMaxDispatchGrid(512, 512, 1)] 111 | [NumThreads(8, 8, 1)] 112 | void RenderScene( 113 | uint2 dispatchThreadId : SV_DispatchThreadID, 114 | 115 | DispatchNodeInputRecord inputRecord, 116 | 117 | // [Task 3 Solution]: Output declaration to the "ShadePixel" node array: 118 | // 119 | // RenderScene uses 8x8 threads and every thread can emit a "PixelRecord", 120 | // thus we need to declare a maximum of 8 * 8 = 64 outputs. 121 | [MaxRecords(8 * 8)] 122 | // NodeArraySize is required when using node array with fixed size. 123 | // If not all the nodes in the node array are populated, you can use [AllowSparseNodes] 124 | // to allow "gaps" in your node array. 125 | // If the maximum size of such a sparse node array is not known, you can use [UnboundedSparseNodes] 126 | // instead of [NodeArraySize(...)]. 127 | // See https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#node-output-declaration for more details. 128 | [NodeArraySize(3)] 129 | [NodeId("ShadePixel")] 130 | // As we are targeting an array of nodes, we have to use NodeOutputArray instead of NodeOutput. 131 | // NodeOutputArray provides an []-operator, with which we can select the 132 | // node array index for each record allocation (see below). 133 | // Similarly, for EmptyNodeOutput, there is also EmptyNodeOutputArray, if you do not want to pass any record data. 134 | NodeOutputArray output) 135 | { 136 | // Scale dispatchThreadId by shading rate, 137 | // as every sample (i.e., every thread) can cover multiple pixel. 138 | const uint2 pixel = dispatchThreadId * SHADING_RATE; 139 | 140 | // Check if pixel is still within the output texture region. 141 | const bool hasOutput = all(pixel < RenderSize); 142 | 143 | Ray ray; 144 | RayHit hit; 145 | 146 | // Trace ray into scene (if required). 147 | if (hasOutput) { 148 | ray = GetCameraRay(pixel); 149 | hit = TraceRay(ray); 150 | } 151 | 152 | // [Task 4 Solution]: Output a record to the "ShadePixel" node array with 153 | // hit.material being used as the index into this array: 154 | // 155 | // Request a per-thread record (if pixel is still on screen). 156 | // The material index of the hit object (or sky) is used as the index into 157 | // the "ShadePixel" node array (see nodes below). 158 | ThreadNodeOutputRecords outputRecord = 159 | output[(uint)hit.material].GetThreadNodeOutputRecords(hasOutput); 160 | 161 | if (hasOutput) { 162 | // Store all information required for shading the pixel into the record. 163 | outputRecord.Get().pixel = pixel; 164 | outputRecord.Get().ray = ray; 165 | outputRecord.Get().hitDistance = hit.distance; 166 | } 167 | 168 | // Mark records as complete and send it off. 169 | outputRecord.OutputComplete(); 170 | } 171 | 172 | // ==================== Entry Node ==================== 173 | // The entry node below is invoked when dispatching the graph and launches 174 | // the "RenderScene" node with one thread per sample. 175 | 176 | [Shader("node")] 177 | [NodeIsProgramEntry] 178 | [NodeLaunch("thread")] 179 | void Entry( 180 | [MaxRecords(1)] 181 | [NodeId("RenderScene")] 182 | NodeOutput output) 183 | { 184 | ThreadNodeOutputRecords outputRecord = 185 | output.GetThreadNodeOutputRecords(1); 186 | 187 | // RenderScene uses a 8x8 thread group with one samples per thread. 188 | // Samples can cover multiple pixels (see note on SHADING_RATE above). 189 | const uint pixelsPerThreadGroup = 8 * SHADING_RATE; 190 | outputRecord.Get().dispatchGrid = (uint2(RenderSize) + pixelsPerThreadGroup - 1) / pixelsPerThreadGroup; 191 | 192 | outputRecord.OutputComplete(); 193 | } -------------------------------------------------------------------------------- /tutorials/tutorial-3/Scene.h: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #pragma once 25 | 26 | // ====================== Config ====================== 27 | 28 | // Emitting one record per pixel may cause low frame rates when using the 29 | // WARP software adapter. 30 | // SHADING_RATE defines the size of a sample in pixels. 31 | // e.g., SHADING_RATE=2 means every sample will cover a 2x2 pixel area. 32 | // If you're using WARP, we recommend a shading rate of 4 or higher. 33 | #define SHADING_RATE 1 34 | 35 | // Enable/disable animation of camera rotating around the scene 36 | #define ANIMATION 1 37 | 38 | // ================= Data Structs ================ 39 | 40 | // A struct define a raytracing ray. 41 | struct Ray { 42 | // Origin of the ray. 43 | float3 origin; 44 | // Unit length direction of the ray. 45 | // It is up to the user's responsibility to write normalized vectors to this member. 46 | float3 direction; 47 | }; 48 | 49 | // A struct that describes the result when a ray hit a surface. 50 | struct RayHit { 51 | // Enum with the supported materials. 52 | enum Material : uint { Sky = 0, Sphere = 1, Plane = 2 }; 53 | 54 | // Tells the material of the surface that the ray intersected. 55 | Material material; 56 | 57 | // Holds the distance from ray origin to the intersection. 58 | float distance; 59 | }; 60 | 61 | // =============== Scene Definition ============== 62 | // Sphere setup. 63 | static const float3 SphereCenter = float3(0.0, 1.0, 0.0); 64 | static const float SphereRadius = 1.0; 65 | // Plane setup. 66 | static const float3 PlaneNormal = float3(0.0, 1.0, 0.0); 67 | static const float PlaneD = 0.0; 68 | static const float PlaneSize = 5; 69 | 70 | // ============== Raytracing Methods ============= 71 | 72 | // Returns a camera ray to a given dispatchThreadId. 73 | Ray GetCameraRay(in uint2 dispatchThreadId) 74 | { 75 | #if ANIMATION 76 | const float rotationAngle = Time; 77 | #else 78 | const float rotationAngle = radians(45); 79 | #endif 80 | const float3 cameraPosition = float3(sin(rotationAngle) * 5, 2, cos(rotationAngle) * 5); 81 | // Camera looks at origin 82 | const float3 cameraDirection = normalize(-cameraPosition); 83 | 84 | const float3 forward = cameraDirection; 85 | const float3 right = normalize(cross(forward, float3(0, 1, 0))); 86 | const float3 up = normalize(cross(cameraDirection, right)); 87 | 88 | // Normalize pixel position to [-1; 1]. 89 | const float2 pixelPosition = ((dispatchThreadId / float2(RenderSize)) - 0.5f) * 2.f; 90 | const float aspectRatio = RenderSize.x / float(RenderSize.y); 91 | 92 | // Create camera ray. 93 | Ray ray; 94 | ray.origin = cameraPosition; 95 | ray.direction = normalize(forward + right * pixelPosition.x * aspectRatio + up * pixelPosition.y); 96 | 97 | return ray; 98 | } 99 | 100 | RayHit TraceRay(in const Ray ray) 101 | { 102 | // Hit distances for sphere and plane. 103 | float tSphere = 1.#INF; 104 | float tPlane = 1.#INF; 105 | 106 | // Ray-Sphere intersection. 107 | { 108 | const float3 oc = ray.origin - SphereCenter; 109 | const float a = dot(ray.direction, ray.direction); 110 | const float b = 2.0 * dot(oc, ray.direction); 111 | const float c = dot(oc, oc) - SphereRadius * SphereRadius; 112 | const float d = b * b - 4.0 * a * c; 113 | 114 | if (d > 0.0) { 115 | const float t0 = (-b - sqrt(d)) / (2.0 * a); 116 | const float t1 = (-b + sqrt(d)) / (2.0 * a); 117 | tSphere = min(t0, t1); 118 | } 119 | } 120 | 121 | // Ray-Plane intersection. 122 | { 123 | const float denom = dot(PlaneNormal, ray.direction); 124 | if (abs(denom) > 1e-6) { 125 | tPlane = -(dot(ray.origin, PlaneNormal) + PlaneD) / denom; 126 | 127 | const float3 hitPosition = ray.origin + ray.direction * tPlane; 128 | 129 | // Limit plane to [-PlaneSize; PlaneSize] in xz-plane 130 | if (any(abs(hitPosition.xz) > (PlaneSize / 2.f))) { 131 | // Outside of plane dimensions 132 | tPlane = 1.#INF; 133 | } 134 | } 135 | } 136 | 137 | // Initialize fallback hit status. 138 | RayHit hit; 139 | hit.material = RayHit::Sky; 140 | hit.distance = 1.#INF; 141 | 142 | // Check if sphere distance is valid and closer than plane 143 | if (!isinf(tSphere) && (tSphere < tPlane)) { 144 | hit.material = RayHit::Sphere; 145 | hit.distance = tSphere; 146 | } else 147 | // Check plane is valid 148 | if (!isinf(tPlane)) { 149 | hit.material = RayHit::Plane; 150 | hit.distance = tPlane; 151 | } 152 | // else the ray "hits" the sky. We already initialized that above. 153 | 154 | // Return the hit. 155 | return hit; 156 | } 157 | 158 | // ========== Material Shading Functions ========= 159 | 160 | // Set the "color" of a "pixel" given in zero-based integer coordinates. 161 | // Covers SHADER_RATE pixels in x and y direction. 162 | // Pixel must be a valid pixel, otherwise the behaviour is undefined. 163 | void WritePixel(in const uint2 pixel, in const float4 color) 164 | { 165 | for (uint y = 0; y < SHADING_RATE; ++y) { 166 | for (uint x = 0; x < SHADING_RATE; ++x) { 167 | const uint2 outputPixel = pixel + uint2(x, y); 168 | 169 | if (all(outputPixel < RenderSize)) { 170 | RenderTarget[outputPixel] = color; 171 | } 172 | } 173 | } 174 | } 175 | 176 | // Returns the color of the sky. 177 | float4 ShadeSky(in const Ray ray) 178 | { 179 | const float f = (ray.direction.y + 1.5) * 0.5; 180 | return float4(f * 0.4, f * 0.7, f * 1, 1); 181 | } 182 | 183 | // Returns the color of the sphere. 184 | // Uses the "ray" and its "hitDistance" to determine the color. 185 | float4 ShadeSphere(in const Ray ray, in const float hitDistance) 186 | { 187 | const float3 hitPosition = ray.origin + ray.direction * hitDistance; 188 | const float3 normal = normalize(hitPosition - SphereCenter); 189 | 190 | return float4(0.5 + normal * 0.5, 1); 191 | } 192 | 193 | // Returns the color of the plane. 194 | // Uses the "ray" and its "hitDistance" to determine the color. 195 | float4 ShadePlane(in const Ray ray, in const float hitDistance) 196 | { 197 | const float3 hitPosition = ray.origin + ray.direction * hitDistance; 198 | 199 | const bool checkerboard = (int(hitPosition.x - PlaneSize) % 2) ^ (int(hitPosition.z - PlaneSize) % 2); 200 | 201 | return checkerboard ? float4(0.2, 0.2, 0.2, 1.0) : float4(0.8, 0.8, 0.8, 1.0); 202 | } -------------------------------------------------------------------------------- /tutorials/tutorial-3/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/WorkGraphPlayground/616e78bad82a583e116a4db3f4f7ac6a84cd197e/tutorials/tutorial-3/screenshot.png -------------------------------------------------------------------------------- /tutorials/tutorial-4/Recursion.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // This tutorial teaches about Node Recursion, using fractals as an example. 27 | // For the offical recursion spec, see https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#recursion 28 | // Remember: The only recursion (or cycles) allowed in Work Graphs is a Node creating records for itself. 29 | // Thus, a configuration where a Node "A" creates work for a node "B" and "B" is creating work for "A" is not allowed. 30 | // When a node has a NodeOutput to itself, the [NodeMaxRecursionDepth(n)] must be declared. 31 | // Calling GetRemainingRecursionLevels() in the node returns the remaining available levels. 32 | 33 | // To provide an example to learn and copy from, this tutorial provides code for drawing the Koch Snowflake fractal. 34 | // The node "Snowflake" receives a "Line" record as input and splits it into 4 lines. When the recursion limit is reached, the line is drawn instead. 35 | // 36 | // Task 1: Try playing around with the maximum number of recursion levels of the "Snowflake" node. 37 | 38 | // Task 2: Now, create a second fractal which works very similar to the given example: the Menger Sponge. 39 | // Use a square as input, which splits into 8 smaller new squares, leaving the center of the square empty. 40 | 41 | struct Line 42 | { 43 | float2 a, b; 44 | }; 45 | 46 | // [Task 2]: Declare a structure for the box. 47 | 48 | [Shader("node")] 49 | [NodeIsProgramEntry] 50 | [NodeLaunch("thread")] 51 | [NodeId("Entry")] 52 | void EntryNode( 53 | [MaxRecords(3)] 54 | [NodeId("Snowflake")] 55 | NodeOutput snowflakeOutput 56 | // [Task 2]: Declare output for Menger Sponge 57 | ){ 58 | const bool stackVertical = RenderSize.x > RenderSize.y; 59 | const float scale = stackVertical? min(RenderSize.x * .225, RenderSize.y * .45) : 60 | min(RenderSize.x * .45, RenderSize.y * .225); 61 | // Draw Snowflake fractal. 62 | { 63 | const float2 snowflakeCenter = RenderSize * (stackVertical? float2(.25, .5) : float2(.5, .25)); 64 | 65 | // Request three output records for the three sides of the initial equilateral triangle. 66 | ThreadNodeOutputRecords outputRecords = 67 | snowflakeOutput.GetThreadNodeOutputRecords(3); 68 | 69 | // Compute three vertices of the initial equilateral triangle. 70 | const float2 v0 = snowflakeCenter + scale * float2(0., -1.); 71 | const float2 v1 = snowflakeCenter + scale * float2(-sqrt(3) * .5, .5); 72 | const float2 v2 = snowflakeCenter + scale * float2(+sqrt(3) * .5, .5); 73 | 74 | // Create the initial equilateral triangle. 75 | outputRecords.Get(0).a = v0; 76 | outputRecords.Get(0).b = v1; 77 | 78 | outputRecords.Get(1).a = v1; 79 | outputRecords.Get(2).a = v2; 80 | 81 | outputRecords.Get(2).b = v0; 82 | outputRecords.Get(1).b = v2; 83 | 84 | outputRecords.OutputComplete(); 85 | } 86 | 87 | // Draw Sponge fractal. 88 | { 89 | // The center of your fractal should be at spongeCenter 90 | const float2 spongeCenter = RenderSize * (stackVertical? float2(.75, .5) : float2(.5, .75)); 91 | 92 | // [Task 2]: Emit your initial record(s) here to draw the Menger Sponge fractal. 93 | } 94 | } 95 | 96 | [Shader("node")] 97 | [NodeLaunch("thread")] 98 | // If a node declares a recursive output to itself (see "recursiveOutput" below), 99 | // a "[NodeMaxRecursionDepth(...)]" is required to specify the maximum number of recursion levels. 100 | // This is required, as each recursion level counts towards the total graph depth, 101 | // and the runtime has to ensure that this depth does not exceed the limit of 32 nodes. 102 | // We can use "GetRemainingRecursionLevels()" in the node function to query the remaining 103 | // recursion levels, i.e., determine whether we can still request recursive output records. 104 | [NodeMaxRecursionDepth(4)] 105 | [NodeId("Snowflake")] 106 | void SnowflakeNode( 107 | ThreadNodeInputRecord inputRecord, 108 | 109 | [MaxRecords(4)] 110 | [NodeId("Snowflake")] 111 | NodeOutput recursiveOutput 112 | ) 113 | { 114 | const float2 a = inputRecord.Get().a; 115 | const float2 b = inputRecord.Get().b; 116 | 117 | // Check if we have reached the recursion limit. 118 | const bool hasOutput = GetRemainingRecursionLevels() != 0; 119 | 120 | // Each recursion level has a 4x amplification factor, as each line 121 | // splits into four new lines. 122 | ThreadNodeOutputRecords outputRecords = 123 | recursiveOutput.GetThreadNodeOutputRecords(hasOutput * 4); 124 | 125 | if (hasOutput) { 126 | // Perpendicular vector to current line segment. 127 | const float2 perp = float2(a.y - b.y, b.x - a.x) * sqrt(3) / 6; 128 | 129 | // Compute vertices for the four new line segments: 130 | // 131 | // v2 132 | // / \ 133 | // / \ 134 | // v0 ---- v1 v3 ---- v4 135 | const float2 v0 = a; 136 | const float2 v1 = lerp(a, b, 1./3.); 137 | const float2 v2 = lerp(a, b, .5) + perp; 138 | const float2 v3 = lerp(a, b, 2./3.); 139 | const float2 v4 = b; 140 | 141 | outputRecords.Get(0).a = v0; 142 | outputRecords.Get(0).b = v1; 143 | 144 | outputRecords.Get(1).a = v1; 145 | outputRecords.Get(1).b = v2; 146 | 147 | outputRecords.Get(2).a = v2; 148 | outputRecords.Get(2).b = v3; 149 | 150 | outputRecords.Get(3).a = v3; 151 | outputRecords.Get(3).b = v4; 152 | } else { 153 | // We've reached the recursion limit, thus we draw the current line segment 154 | // to the output. 155 | DrawLine(a, b); 156 | } 157 | 158 | outputRecords.OutputComplete(); 159 | } 160 | 161 | // [Task 2]: Create a node that either subdivides a input Box into eight boxes or 162 | // draws the input Box. Use FillRect from Common.h to draw the box. -------------------------------------------------------------------------------- /tutorials/tutorial-4/RecursionSolution.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | struct Line 27 | { 28 | float2 a, b; 29 | }; 30 | 31 | // [Task 2 Solution]: 32 | struct Box 33 | { 34 | float2 topLeft; 35 | float size; 36 | }; 37 | 38 | [Shader("node")] 39 | [NodeIsProgramEntry] 40 | [NodeLaunch("thread")] 41 | [NodeId("Entry")] 42 | void EntryNode( 43 | [MaxRecords(3)] 44 | [NodeId("Snowflake")] 45 | NodeOutput snowflakeOutput, 46 | 47 | // [Task 2 Solution]: Single record output to the "Sponge" node. 48 | [MaxRecords(1)] 49 | [NodeId("Sponge")] 50 | NodeOutput spongeOutput 51 | ){ 52 | const bool stackVertical = RenderSize.x > RenderSize.y; 53 | const float scale = stackVertical? min(RenderSize.x * .225, RenderSize.y * .45) : 54 | min(RenderSize.x * .45, RenderSize.y * .225); 55 | // Draw Snowflake fractal. 56 | { 57 | const float2 snowflakeCenter = RenderSize * (stackVertical? float2(.25, .5) : float2(.5, .25)); 58 | 59 | // Request three output records for the three sides of the initial equilateral triangle. 60 | ThreadNodeOutputRecords outputRecords = 61 | snowflakeOutput.GetThreadNodeOutputRecords(3); 62 | 63 | // Compute three vertices of the initial equilateral triangle. 64 | const float2 v0 = snowflakeCenter + scale * float2(0., -1.); 65 | const float2 v1 = snowflakeCenter + scale * float2(-sqrt(3) * .5, .5); 66 | const float2 v2 = snowflakeCenter + scale * float2(+sqrt(3) * .5, .5); 67 | 68 | // Create the initial equilateral triangle. 69 | outputRecords.Get(0).a = v0; 70 | outputRecords.Get(0).b = v1; 71 | 72 | outputRecords.Get(1).a = v1; 73 | outputRecords.Get(2).a = v2; 74 | 75 | outputRecords.Get(2).b = v0; 76 | outputRecords.Get(1).b = v2; 77 | 78 | outputRecords.OutputComplete(); 79 | } 80 | 81 | // Draw Sponge fractal. 82 | { 83 | const float2 spongeCenter = RenderSize * (stackVertical? float2(.75, .5) : float2(.5, .75)); 84 | 85 | // [Solution 2]: Request a single record for the "Sponge" node and write the initial box 86 | // position and size to it. 87 | ThreadNodeOutputRecords outputRecord = spongeOutput.GetThreadNodeOutputRecords(1); 88 | 89 | outputRecord.Get().topLeft = spongeCenter - scale; 90 | outputRecord.Get().size = 2 * scale; 91 | 92 | outputRecord.OutputComplete(); 93 | } 94 | } 95 | 96 | [Shader("node")] 97 | [NodeLaunch("thread")] 98 | // If a node declares a recursive output to itself (see "recursiveOutput" below), 99 | // a "[NodeMaxRecursionDepth(...)]" is required to specify the maximum number of recursion levels. 100 | // This is required, as each recursion level counts towards the total graph depth, 101 | // and the runtime has to ensure that this depth does not exceed the limit of 32 nodes. 102 | // We can use "GetRemainingRecursionLevels()" in the node function to query the remaining 103 | // recursion levels, i.e., determine whether we can still request recursive output records. 104 | [NodeMaxRecursionDepth(4)] 105 | [NodeId("Snowflake")] 106 | void SnowflakeNode( 107 | ThreadNodeInputRecord inputRecord, 108 | 109 | [MaxRecords(4)] 110 | [NodeId("Snowflake")] 111 | NodeOutput recursiveOutput 112 | ) 113 | { 114 | const float2 a = inputRecord.Get().a; 115 | const float2 b = inputRecord.Get().b; 116 | 117 | // Check if we have reached the recursion limit. 118 | const bool hasOutput = GetRemainingRecursionLevels() != 0; 119 | 120 | // Each recursion level has a 4x amplification factor, as each line 121 | // splits into four new lines. 122 | ThreadNodeOutputRecords outputRecords = 123 | recursiveOutput.GetThreadNodeOutputRecords(hasOutput * 4); 124 | 125 | if (hasOutput) { 126 | // Perpendicular vector to current line segment. 127 | const float2 perp = float2(a.y - b.y, b.x - a.x) * sqrt(3) / 6; 128 | 129 | // Compute vertices for the four new line segments: 130 | // 131 | // v2 132 | // / \ 133 | // / \ 134 | // v0 ---- v1 v3 ---- v4 135 | const float2 v0 = a; 136 | const float2 v1 = lerp(a, b, 1./3.); 137 | const float2 v2 = lerp(a, b, .5) + perp; 138 | const float2 v3 = lerp(a, b, 2./3.); 139 | const float2 v4 = b; 140 | 141 | outputRecords.Get(0).a = v0; 142 | outputRecords.Get(0).b = v1; 143 | 144 | outputRecords.Get(1).a = v1; 145 | outputRecords.Get(1).b = v2; 146 | 147 | outputRecords.Get(2).a = v2; 148 | outputRecords.Get(2).b = v3; 149 | 150 | outputRecords.Get(3).a = v3; 151 | outputRecords.Get(3).b = v4; 152 | } else { 153 | // We've reached the recursion limit, thus we draw the current line segment 154 | // to the output. 155 | DrawLine(a, b); 156 | } 157 | 158 | outputRecords.OutputComplete(); 159 | } 160 | 161 | // [Task 2 Solution]: 162 | [Shader("node")] 163 | [NodeLaunch("thread")] 164 | [NodeMaxRecursionDepth(4)] 165 | [NodeId("Sponge")] 166 | void SpongeNode( 167 | ThreadNodeInputRecord inputRecord, 168 | 169 | [MaxRecords(8)] 170 | [NodeId("Sponge")] 171 | NodeOutput recursiveOutput 172 | ) 173 | { 174 | const float2 topLeft = inputRecord.Get().topLeft; 175 | const float size = inputRecord.Get().size; 176 | 177 | // Check if we have reached the recursion limit. 178 | const bool hasOutput = GetRemainingRecursionLevels() != 0; 179 | 180 | // Split each box into eight boxes: 181 | // +---+---+---+ 182 | // | 0 | 1 | 2 | 183 | // +---+---+---+ 184 | // | 3 | | 4 | 185 | // +---+---+---+ 186 | // | 5 | 6 | 7 | 187 | // +---+---+---+ 188 | ThreadNodeOutputRecords outputRecords 189 | = recursiveOutput.GetThreadNodeOutputRecords(hasOutput * 8); 190 | 191 | if (hasOutput) { 192 | const float newSize = size / 3.; 193 | 194 | uint outputRecordIndex = 0; 195 | 196 | for(uint row = 0; row < 3; ++row){ 197 | for(uint col = 0; col < 3; ++col){ 198 | // Skip the center (see visualization above). 199 | if(row == 1 && col == 1) continue; 200 | 201 | outputRecords.Get(outputRecordIndex).size = newSize; 202 | outputRecords.Get(outputRecordIndex).topLeft = topLeft + float2(col * newSize, row * newSize); 203 | 204 | // Advance to next output record. 205 | outputRecordIndex++; 206 | } 207 | } 208 | } else { 209 | // We've reached the recursion limit, thus we draw the current box to the output. 210 | FillRect(topLeft, topLeft + size); 211 | } 212 | 213 | outputRecords.OutputComplete(); 214 | } 215 | -------------------------------------------------------------------------------- /tutorials/tutorial-4/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/WorkGraphPlayground/616e78bad82a583e116a4db3f4f7ac6a84cd197e/tutorials/tutorial-4/screenshot.png -------------------------------------------------------------------------------- /tutorials/tutorial-5/Synchronization.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // This tutorial teaches about input scratch storage and synchronization with 27 | // Read/Write records. An input record to a broadcasting node can be defined as 28 | // read/write. This enables the nodes to use it as scratch memory. Furthermore, 29 | // RW records have a function FinishedCrossGroupSharing() which returns true for 30 | // the last launched group. This allows us to launch a grid of groups, wait for 31 | // all of them to finish, and then perform some extra work on the last group to 32 | // finish. 33 | 34 | // Your goal is to draw the axis-aligned bounding box of the sketch animation by 35 | // input sharing. 36 | 37 | // [Task 3]: Add the "[NodeTrackRWInputSharing]" attribute to this record 38 | // struct. This allows the runtime/driver to add hidden fields to this 39 | // struct, which will enable the usage of "FinishedCrossGroupSharing" 40 | // in the node shader. 41 | struct ComputeBoundingBoxRecord { 42 | int2 aabbmin; 43 | int2 aabbmax; 44 | }; 45 | 46 | [Shader("node")] 47 | [NodeIsProgramEntry] 48 | [NodeLaunch("thread")] 49 | [NodeId("Entry")] 50 | void EntryNode( 51 | [MaxRecords(1)] 52 | [NodeId("ComputeBoundingBox")] 53 | NodeOutput output 54 | ) 55 | { 56 | GroupNodeOutputRecords outputRecord = 57 | output.GetGroupNodeOutputRecords(1); 58 | 59 | // Initialize min and max values. 60 | outputRecord.Get().aabbmin = RenderSize; 61 | outputRecord.Get().aabbmax = int2(0, 0); 62 | 63 | outputRecord.OutputComplete(); 64 | } 65 | 66 | static const int numPoints = 1024; 67 | static const int groupSize = 32; 68 | static const int numGroups = (numPoints + groupSize - 1) / groupSize; 69 | 70 | [Shader("node")] 71 | [NodeLaunch("broadcasting")] 72 | [NodeDispatchGrid(numGroups, 1, 1)] 73 | [NodeId("ComputeBoundingBox")] 74 | [NumThreads(groupSize, 1, 1)] 75 | void ComputeBoundingBoxNode( 76 | uint gtid : SV_GroupThreadID, 77 | uint dtid : SV_DispatchThreadID, 78 | 79 | // [Task 1]: Change "inputRecord" to be read/write (RW). 80 | // Why do you need the globallycoherent attribute? 81 | DispatchNodeInputRecord inputRecord 82 | ) 83 | { 84 | // Timestamp offset of the current circle 85 | const float t = float(dtid) / numPoints; 86 | const int2 pixel = RenderSize * .5 + 0.9 * RenderSize * float2( 87 | random::PerlinNoise2D(float2('x', 2 * Time + t * 2)), 88 | random::PerlinNoise2D(float2('y', 2 * Time + t * 2))); 89 | // Radius of the circle to draw. This will also be important for the bounding box computation. 90 | // The radius will slowly get smaller over time. 91 | const float radius = pow(t, 2) * 15; 92 | // Draw a circle around the sampled pixel. The color slowly fades out over time. 93 | FillCircle(pixel, radius, lerp(float3(1, 1, 1), float3(0, 0, 1), pow(t, 2))); 94 | 95 | // [Task 2]: Use InterlockedMin and InterlockedMax to add the pixel 96 | // coordinate to the aabbmin/max variables. 97 | 98 | 99 | 100 | 101 | 102 | // Ensure all groups and threads have finished writing. 103 | Barrier(NODE_INPUT_MEMORY, DEVICE_SCOPE | GROUP_SYNC); 104 | 105 | // [Task 3]: Observe what's happening now: Each thread group draws the 106 | // current bounding box that was present when it was executed. Use the 107 | // record.FinishedCrossGroupSharing() function to only draw the 108 | // finalized bounding box. See 109 | // https://microsoft.github.io/DirectX-Specs/d3d/WorkGraphs.html#finishedcrossgroupsharing 110 | // for details. This will not compile at first, because you also need to 111 | // add the "struct [NodeTrackRWInputSharing] Foo {" attribute to the 112 | // record struct. This attribute allows the runtime to potentially add 113 | // hidden fields to the struct for the FinishedCrossGroupSharing() 114 | // functionality. 115 | 116 | 117 | // Draw the bounding box with the first thread in the thread group. 118 | if(gtid == 0) { 119 | DrawRect(inputRecord.Get().aabbmin, inputRecord.Get().aabbmax, 1); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /tutorials/tutorial-5/SynchronizationSolution.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // This tutorial teaches about input scratch storage and synchronization with 27 | // Read/Write records. An input record to a broadcasting node can be defined as 28 | // read/write. This enables the nodes to use it as scratch memory. Furthermore, 29 | // RW records have a function FinishedCrossGroupSharing() which returns true for 30 | // the last launched group. This allows us to launch a grid of groups, wait for 31 | // all of them to finish, and then perform some extra work on the last group to 32 | // finish. 33 | 34 | // Your goal is to draw the axis-aligned bounding box of the sketch animation by 35 | // input sharing. 36 | 37 | // [Task 3 Solution]: 38 | struct [NodeTrackRWInputSharing] ComputeBoundingBoxRecord { 39 | int2 aabbmin; 40 | int2 aabbmax; 41 | }; 42 | 43 | [Shader("node")] 44 | [NodeIsProgramEntry] 45 | [NodeLaunch("thread")] 46 | [NodeId("Entry")] 47 | void EntryNode( 48 | [MaxRecords(1)] 49 | [NodeId("ComputeBoundingBox")] 50 | NodeOutput output 51 | ) 52 | { 53 | GroupNodeOutputRecords outputRecord = 54 | output.GetGroupNodeOutputRecords(1); 55 | 56 | // Initialize min and max values. 57 | outputRecord.Get().aabbmin = RenderSize; 58 | outputRecord.Get().aabbmax = int2(0, 0); 59 | 60 | outputRecord.OutputComplete(); 61 | } 62 | 63 | static const int numPoints = 1024; 64 | static const int groupSize = 32; 65 | static const int numGroups = (numPoints + groupSize - 1) / groupSize; 66 | 67 | [Shader("node")] 68 | [NodeLaunch("broadcasting")] 69 | [NodeDispatchGrid(numGroups, 1, 1)] 70 | [NodeId("ComputeBoundingBox")] 71 | [NumThreads(groupSize, 1, 1)] 72 | void ComputeBoundingBoxNode( 73 | uint gtid : SV_GroupThreadID, 74 | uint dtid : SV_DispatchThreadID, 75 | 76 | // [Task 1 Solution]: We need the globallycoherent attribute so that the 77 | // data-reads and data-writes bypass the caches. 78 | globallycoherent RWDispatchNodeInputRecord inputRecord 79 | ) 80 | { 81 | // Timestamp offset of the current circle 82 | const float t = float(dtid) / numPoints; 83 | const int2 pixel = RenderSize * .5 + 0.9 * RenderSize * float2( 84 | random::PerlinNoise2D(float2('x', 2 * Time + t * 2)), 85 | random::PerlinNoise2D(float2('y', 2 * Time + t * 2))); 86 | // Radius of the circle to draw. This will also be important for the bounding box computation. 87 | // The radius will slowly get smaller over time. 88 | const float radius = pow(t, 2) * 15; 89 | // Draw a circle around the sampled pixel. The color slowly fades out over time. 90 | FillCircle(pixel, radius, lerp(float3(1, 1, 1), float3(0, 0, 1), pow(t, 2))); 91 | 92 | // [Task 2 Solution]: We use atomic min/max operation on the "aabbmin" and 93 | // "aabbmax" members of the input record. We padded the pixel position 94 | // with the radius of the current circle, and as this computation is 95 | // done with floating-point numbers, we round towards the outside of the 96 | // bounding box (i.e., floor for minimum and ceil for maximum). 97 | InterlockedMin(inputRecord.Get().aabbmin.x, floor(pixel.x - radius)); 98 | InterlockedMin(inputRecord.Get().aabbmin.y, floor(pixel.y - radius)); 99 | InterlockedMax(inputRecord.Get().aabbmax.x, ceil(pixel.x + radius)); 100 | InterlockedMax(inputRecord.Get().aabbmax.y, ceil(pixel.y + radius)); 101 | 102 | // Ensure all groups and threads have finished writing. 103 | // Required, because atomic ops do not act as barriers and return immediatley after calling. 104 | Barrier(NODE_INPUT_MEMORY, DEVICE_SCOPE | GROUP_SYNC); 105 | 106 | // [Task 3 Solution]: "FinishedCrossGroupSharing" only returns true for the 107 | // last thread group that calls this function. Thus, once 108 | // "FinishedCrossGroupSharing" returns true, we know that all other 109 | // thread groups have finished and thus "inputRecord.Get().aabbmin" and 110 | // "inputRecord.Get().aabbmax" now contain the computed bounding box. 111 | 112 | if(!inputRecord.FinishedCrossGroupSharing()) return; 113 | 114 | // Only one, i.e., the last, thread group arrives here. 115 | // Thus, only the last thread group gets to draw the bounding box. 116 | // Draw the bounding box with the first thread in the thread group. 117 | if(gtid == 0) { 118 | DrawRect(inputRecord.Get().aabbmin, inputRecord.Get().aabbmax, 1); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /tutorials/tutorial-5/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/WorkGraphPlayground/616e78bad82a583e116a4db3f4f7ac6a84cd197e/tutorials/tutorial-5/screenshot.png -------------------------------------------------------------------------------- /tutorials/tutorial-6/Mandelbrot.h: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #pragma once 25 | 26 | // ====================== Config ====================== 27 | 28 | // Enable/disable zoom animation to pointOfInterest (see below). 29 | #define ANIMATION 1 30 | 31 | // Length of zoom animation in seconds. 32 | static const float animationLength = 4; 33 | // Depth of zoom animation, i.e., how far to zoom into the fractal. 34 | static const float animationDepth = 12; 35 | // Point of interest for zoom animation. 36 | static const float2 pointOfInterest = float2(-0.6512, 0.4795); 37 | 38 | // Maximum number of Mandelbrot iterations to carry out. 39 | static const int maxIteration = 256; 40 | 41 | // Maximum area of Mandelbrot to draw. 42 | static const float2 mandelbrotMin = float2(-2.00, -1.12); 43 | static const float2 mandelbrotMax = float2(0.47, 1.12); 44 | 45 | // ===================== Mandelbrot ==================== 46 | 47 | int GetPixelDwell(in const float2 pixel) 48 | { 49 | #if ANIMATION 50 | float t = (Time % (2 * animationLength)) / animationLength; 51 | t = smoothstep(0, 1, (t > 1) ? 2 - t : t); 52 | const float zoomFactor = pow(2.0, t * -animationDepth); 53 | #else 54 | const float zoomFactor = 1.f; 55 | #endif 56 | 57 | float2 mandelMin = pointOfInterest + (mandelbrotMin - pointOfInterest) * zoomFactor; 58 | float2 mandelMax = pointOfInterest + (mandelbrotMax - pointOfInterest) * zoomFactor; 59 | float2 mandelDelta = mandelMax - mandelMin; 60 | float mandelRatio = mandelDelta.x / mandelDelta.y; 61 | 62 | float screenRatio = float(RenderSize.x) / RenderSize.y; 63 | float2 pos = pixel; 64 | if (screenRatio > mandelRatio) { // Screen is wider than Mandelbrot, adjust X (horizontal) center 65 | pos.x = pixel.x - (RenderSize.x - RenderSize.x * mandelRatio / screenRatio) * .5; 66 | pos *= mandelDelta.y / RenderSize.y; 67 | } else { // Screen is taller than Mandelbrot, adjust Y (vertical) center 68 | pos.y = pixel.y - (RenderSize.y - RenderSize.y * screenRatio / mandelRatio) * .5; 69 | pos *= mandelDelta.x / RenderSize.x; 70 | } 71 | pos += mandelMin; 72 | 73 | float2 c = float2(0, 0); 74 | int i = 0; 75 | for (; i < maxIteration && dot(c, c) <= 4; ++i) { 76 | c = pos + float2(c.x * c.x - c.y * c.y, 2 * c.x * c.y); 77 | } 78 | return i; 79 | } 80 | 81 | float3 Heatmap(float x) 82 | { 83 | x = clamp(x, 0.0f, 1.0f); 84 | float4 x1 = float4(1.0, x, x * x, x * x * x); // 1 x x2 x3 85 | float2 x2 = x1.xy * x1.w * x; // x4 x5 x6 x7 86 | return float3(dot(x1, float4(+0.063861086f, +1.992659096f, -1.023901152f, -0.490832805f)) + 87 | dot(x2, float2(+1.308442123f, -0.914547012f)), 88 | dot(x1, float4(+0.049718590f, -0.791144343f, +2.892305078f, +0.811726816f)) + 89 | dot(x2, float2(-4.686502417f, +2.717794514f)), 90 | dot(x1, float4(+0.513275779f, +1.580255060f, -5.164414457f, +4.559573646f)) + 91 | dot(x2, float2(-1.916810682f, +0.570638854f))); 92 | } 93 | 94 | float3 DwellToColor(int dwell) 95 | { 96 | return Heatmap(pow(dwell / float(maxIteration), .35)); 97 | } -------------------------------------------------------------------------------- /tutorials/tutorial-6/RecursiveGrid.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // Mandelbrot.h contains functionality for computing and drawing the Mandelbrot fractal 27 | #include "Mandelbrot.h" 28 | 29 | // When you have reached this tutorial, you have learned all the Work Graphs 30 | // concepts that we consider important for beginners! The point of this last 31 | // tutorial is to give you a tougher Work Graph problem to think about, possibly 32 | // even to take home. 33 | 34 | // The provided code renders a small zoom animation into the Mandelbrot set. 35 | // In the implementation provided, each pixel's "dwell", thus the number of 36 | // iterations, is computed individually. With Work Graphs, we can optimize 37 | // this using Mariani's algorithm: 38 | // All "structures" in the Mandelbrot set are known to be connected. Therefore, 39 | // regions that have the same dwell around their boundary are guaranteed to have 40 | // the same dwell for all the pixels inside. With Work Graphs, we can subdivide 41 | // the screen into a coarse grid. Next, each grid cell's boundary is evaluated. 42 | // If not all pixels of the boundary are the same dwell, the cell is subdivided 43 | // and the grid recurses. If all dwell values are equal, the cell is filled with 44 | // the specific color. Your task: implement this algorithm in work graphs! 45 | 46 | // Note that the solution provided is far from optimal and intentionally kept 47 | // simple, feel free to share your own faster solution! 48 | 49 | struct NaiveMandelbrotRecord { 50 | uint2 dispatchGrid : SV_DispatchGrid; 51 | }; 52 | 53 | [Shader("node")] 54 | [NodeIsProgramEntry] 55 | [NodeLaunch("thread")] 56 | [NodeId("Entry")] 57 | void EntryNode( 58 | [MaxRecords(1)] 59 | [NodeId("NaiveMandelbrot")] 60 | NodeOutput mandelbrotOutput 61 | ) 62 | { 63 | ThreadNodeOutputRecords outputRecord = 64 | mandelbrotOutput.GetThreadNodeOutputRecords(1); 65 | 66 | outputRecord.Get().dispatchGrid = (RenderSize + 7) / 8; 67 | 68 | outputRecord.OutputComplete(); 69 | } 70 | 71 | [Shader("node")] 72 | [NodeLaunch("broadcasting")] 73 | [NodeMaxDispatchGrid(1024, 1024, 1)] 74 | [NumThreads(8, 8, 1)] 75 | [NodeId("NaiveMandelbrot")] 76 | void NaiveMandelbrotNode( 77 | uint2 dtid : SV_DispatchThreadID, 78 | DispatchNodeInputRecord inputRecord 79 | ) 80 | { 81 | if (all(dtid < RenderSize)) { 82 | const int dwell = GetPixelDwell(dtid); 83 | 84 | RenderTarget[dtid] = float4(DwellToColor(dwell), 1); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /tutorials/tutorial-6/RecursiveGridSolution.hlsl: -------------------------------------------------------------------------------- 1 | // This file is part of the AMD & HSC Work Graph Playground. 2 | // 3 | // Copyright (C) 2024 Advanced Micro Devices, Inc. and Coburg University of Applied Sciences and Arts. 4 | // All rights reserved. 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files(the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and /or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions : 12 | // 13 | // The above copyright notice and this permission notice shall be included in 14 | // all copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | // THE SOFTWARE. 23 | 24 | #include "Common.h" 25 | 26 | // Mandelbrot.h contains functionality for computing and drawing the Mandelbrot fractal 27 | #include "Mandelbrot.h" 28 | 29 | // Enable/disable visualization of grid cells with same dwell values. 30 | #define VISUALIZE_GRID_CELLS 1 31 | 32 | struct MandelbrotGridRecord { 33 | uint2 dispatchGrid : SV_DispatchGrid; 34 | }; 35 | 36 | struct NaiveMandelbrotRecord { 37 | int2 topLeft; 38 | int size; 39 | }; 40 | 41 | struct [NodeTrackRWInputSharing] MarianiSilverRecord { 42 | uint dispatchSize : SV_DispatchGrid; 43 | int2 topLeft; 44 | int size; 45 | // Shared min and max dwell values for grid cell. 46 | // If minDwell = maxDwell, then all tested pixels have the same dwell. 47 | // That is why we need the NodeTrackRWInputSharing above. 48 | int minDwell; 49 | int maxDwell; 50 | }; 51 | 52 | struct MandelbrotFillRecord { 53 | uint2 dispatchGrid : SV_DispatchGrid; 54 | int2 topLeft; 55 | int size; 56 | float3 color; 57 | }; 58 | 59 | 60 | static const int maxResolution = 4096; 61 | static const int tilePow = 8; 62 | static const int tileSize = 3 * (1l << tilePow) - 2; 63 | static const int maxTilesPerAxis = min(32, maxResolution / tileSize); 64 | 65 | static const int minSize = 16; 66 | 67 | [Shader("node")] 68 | [NodeIsProgramEntry] 69 | [NodeLaunch("thread")] 70 | [NodeId("Entry")] 71 | void EntryNode( 72 | [MaxRecords(1)] 73 | [NodeId("MandelbrotGrid")] 74 | NodeOutput gridOutput 75 | ) 76 | { 77 | ThreadNodeOutputRecords outputRecord = 78 | gridOutput.GetThreadNodeOutputRecords(1); 79 | 80 | outputRecord.Get().dispatchGrid = DivideAndRoundUp(RenderSize, 8 * tileSize); 81 | 82 | outputRecord.OutputComplete(); 83 | } 84 | 85 | [Shader("node")] 86 | [NodeLaunch("broadcasting")] 87 | [NodeIsProgramEntry] 88 | [NodeMaxDispatchGrid(maxTilesPerAxis, maxTilesPerAxis, 1)] 89 | [NodeId("MandelbrotGrid")] 90 | [NumThreads(8, 8, 1)] 91 | void MandelbrotGridNode( 92 | uint2 dtid : SV_DispatchThreadID, 93 | 94 | DispatchNodeInputRecord inputRecord, 95 | 96 | [MaxRecords(8 * 8)] 97 | [NodeId("MandelbrotMarianiSilver")] 98 | NodeOutput mandelbrotOutput 99 | ) 100 | { 101 | const int2 topLeft = dtid * tileSize; 102 | const bool hasOutput = all(topLeft < RenderSize); 103 | 104 | ThreadNodeOutputRecords outputRecord = 105 | mandelbrotOutput.GetThreadNodeOutputRecords(hasOutput); 106 | 107 | if(hasOutput){ 108 | outputRecord.Get().dispatchSize = DivideAndRoundUp(tileSize, 8); 109 | outputRecord.Get().topLeft = topLeft; 110 | outputRecord.Get().size = tileSize; 111 | outputRecord.Get().minDwell = maxIteration; 112 | outputRecord.Get().maxDwell = 0; 113 | } 114 | 115 | outputRecord.OutputComplete(); 116 | } 117 | 118 | [Shader("node")] 119 | [NodeLaunch("broadcasting")] 120 | [NodeMaxDispatchGrid((tileSize + 7) / 8, (tileSize + 7) / 8, 1)] 121 | [NodeId("MandelbrotFill")] 122 | [NumThreads(8, 8, 1)] 123 | void MandelbrotFillNode( 124 | uint2 dtid : SV_DispatchThreadID, 125 | 126 | DispatchNodeInputRecord inputRecord) 127 | { 128 | const MandelbrotFillRecord record = inputRecord.Get(); 129 | 130 | #if VISUALIZE_GRID_CELLS 131 | // check if dtid is within record size and not along the outer edge. 132 | // The outer edge is left blank to visualize grid cells. 133 | if (all(dtid > 0) && all(dtid < (record.size - 1))) { 134 | #else 135 | // check if dtid is within record size 136 | if (all(dtid < record.size)) { 137 | #endif 138 | const int2 pixel = record.topLeft + dtid; 139 | 140 | if (all(pixel >= 0) && all(pixel < RenderSize)) { 141 | RenderTarget[pixel] = float4(record.color, 1); 142 | } 143 | } 144 | } 145 | 146 | [Shader("node")] 147 | [NodeLaunch("broadcasting")] 148 | [NodeDispatchGrid(1, 1, 1)] 149 | [NodeId("MandelbrotNaive")] 150 | [NumThreads(minSize, minSize, 1)] 151 | void MandelbrotNaiveNode( 152 | uint2 gtid : SV_GroupThreadID, 153 | 154 | DispatchNodeInputRecord inputRecord 155 | ) 156 | { 157 | const NaiveMandelbrotRecord record = inputRecord.Get(); 158 | 159 | if (all(gtid < record.size)) { 160 | const int2 pixel = record.topLeft + gtid; 161 | 162 | if (all(pixel >= 0) && all(pixel < RenderSize)) { 163 | const int dwell = GetPixelDwell(pixel); 164 | 165 | RenderTarget[pixel] = float4(DwellToColor(dwell), 1); 166 | } 167 | } 168 | } 169 | 170 | [Shader("node")] 171 | [NodeLaunch("broadcasting")] 172 | [NodeMaxRecursionDepth(12)] 173 | [NodeMaxDispatchGrid((tileSize + 7) / 8, 1, 1)] 174 | [NodeId("MandelbrotMarianiSilver")] 175 | [NumThreads(8, 4, 1)] 176 | void MandelbrotMarianiSilverNode( 177 | uint2 gtid : SV_GroupThreadID, 178 | uint2 dtid : SV_DispatchThreadID, 179 | 180 | globallycoherent RWDispatchNodeInputRecord inputRecord, 181 | 182 | [MaxRecords(4)] 183 | [NodeId("MandelbrotMarianiSilver")] 184 | NodeOutput recursiveOutput, 185 | 186 | [MaxRecordsSharedWith(recursiveOutput)] 187 | [NodeId("MandelbrotNaive")] 188 | NodeOutput naiveOutput, 189 | 190 | [MaxRecordsSharedWith(recursiveOutput)] 191 | [NodeId("MandelbrotFill")] 192 | NodeOutput fillOutput 193 | ) 194 | { 195 | int size = inputRecord.Get().size; 196 | int2 topLeft = inputRecord.Get().topLeft; 197 | 198 | // Number of pixels along one edge to check 199 | const int pixelsOnEdge = size - 1; 200 | // Required number of matching pixels (=votes) in order to determine area as single color. 201 | const int requiredVotes = pixelsOnEdge * 4; 202 | 203 | if (dtid.x < pixelsOnEdge) { 204 | // Lookup for side of the tile to test 205 | const int4 lookup = int4(0, dtid.x, size-1, size - 1 - dtid.x); 206 | // Compute pixel position to test 207 | const int2 pixel = topLeft + int2(lookup[(gtid.y + 1) % 4], lookup[gtid.y]); 208 | 209 | const int dwell = GetPixelDwell(pixel); 210 | 211 | // Update min/max to check if all threads have the same dwell 212 | InterlockedMin(inputRecord.Get().minDwell, dwell); 213 | InterlockedMax(inputRecord.Get().maxDwell, dwell); 214 | 215 | RenderTarget[pixel] = float4(DwellToColor(dwell), 1); 216 | } 217 | 218 | topLeft += int2(1, 1); 219 | size -= 2; 220 | 221 | // Ensure all groups and threads have finished writing 222 | Barrier(NODE_INPUT_MEMORY, DEVICE_SCOPE | GROUP_SYNC); 223 | 224 | if (!inputRecord.FinishedCrossGroupSharing()) { 225 | return; 226 | } 227 | 228 | const bool allEqual = inputRecord.Get().minDwell == inputRecord.Get().maxDwell; 229 | const bool hasFillOutput = allEqual; 230 | const bool hasNaiveOutput = !allEqual && 231 | ((GetRemainingRecursionLevels() == 0) || (size < minSize)); 232 | const bool hasRecursiveOutput = !allEqual && !hasNaiveOutput; 233 | 234 | GroupNodeOutputRecords fillOutputRecord = 235 | fillOutput.GetGroupNodeOutputRecords(hasFillOutput); 236 | 237 | if (hasFillOutput) { 238 | fillOutputRecord.Get().dispatchGrid = DivideAndRoundUp(size, 8); 239 | fillOutputRecord.Get().topLeft = topLeft; 240 | fillOutputRecord.Get().size = size; 241 | fillOutputRecord.Get().color = DwellToColor(inputRecord.Get().minDwell); 242 | } 243 | 244 | fillOutputRecord.OutputComplete(); 245 | 246 | GroupNodeOutputRecords naiveOutputRecord = 247 | naiveOutput.GetGroupNodeOutputRecords(hasNaiveOutput); 248 | 249 | if (hasNaiveOutput) { 250 | naiveOutputRecord.Get().topLeft = topLeft; 251 | naiveOutputRecord.Get().size = size; 252 | } 253 | 254 | naiveOutputRecord.OutputComplete(); 255 | 256 | GroupNodeOutputRecords recursiveOutputRecord = 257 | recursiveOutput.GetGroupNodeOutputRecords(hasRecursiveOutput? 4 : 0); 258 | 259 | if (hasRecursiveOutput) { 260 | // Only first threads in x dimensions write outputs. 261 | if (gtid.x == 0) { 262 | // Coordinate in 2x2 grid of records to emit. 263 | // +---+---+ 264 | // | 0 | 1 | 265 | // +---+---+ 266 | // | 2 | 3 | 267 | // +---+---+ 268 | const int2 coord = int2(gtid.y % 2, gtid.y / 2); 269 | // Size is always multiple of 2, thus we can split the current grid cell into 270 | // 2x2 grid cells with equal size. 271 | const int nextSize = size / 2; 272 | const int2 nextTopLeft = topLeft + coord * nextSize; 273 | 274 | recursiveOutputRecord.Get(gtid.y).dispatchSize = DivideAndRoundUp(nextSize, 8); 275 | recursiveOutputRecord.Get(gtid.y).topLeft = nextTopLeft; 276 | recursiveOutputRecord.Get(gtid.y).size = nextSize; 277 | recursiveOutputRecord.Get(gtid.y).minDwell = maxIteration; 278 | recursiveOutputRecord.Get(gtid.y).maxDwell = 0; 279 | } 280 | } 281 | 282 | recursiveOutputRecord.OutputComplete(); 283 | } 284 | 285 | -------------------------------------------------------------------------------- /tutorials/tutorial-6/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/WorkGraphPlayground/616e78bad82a583e116a4db3f4f7ac6a84cd197e/tutorials/tutorial-6/screenshot.png --------------------------------------------------------------------------------