├── .gitignore ├── .gitlab-ci.yml ├── .gitmodules ├── LICENSE.txt ├── README.md ├── ffx-parallelsort └── FFX_ParallelSort.h └── sample ├── CMakeLists.txt ├── build ├── .gitignore └── GenerateSolutions.bat ├── common.cmake └── src ├── Common ├── FFXParallelSort.json ├── Validate1080p.png ├── Validate2K.png ├── Validate4K.png └── shaders │ ├── ParallelSortCS.hlsl │ └── ParallelSortVerify.hlsl ├── DX12 ├── CMakeLists.txt ├── ParallelSort.cpp ├── ParallelSort.h ├── UI.cpp ├── UI.h ├── dpiawarescaling.manifest ├── renderer.cpp ├── renderer.h ├── sample.cpp ├── sample.h ├── stdafx.cpp └── stdafx.h └── VK ├── CMakeLists.txt ├── ParallelSort.cpp ├── ParallelSort.h ├── UI.cpp ├── UI.h ├── dpiawarescaling.manifest ├── renderer.cpp ├── renderer.h ├── sample.cpp ├── sample.h ├── stdafx.cpp └── stdafx.h /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | variables: 2 | SampleName: FFX_ParallelSort 3 | GIT_SUBMODULE_STRATEGY: normal 4 | 5 | stages: 6 | - build 7 | - deploy 8 | 9 | build_dx12: 10 | tags: 11 | - windows 12 | - amd64 13 | stage: build 14 | script: 15 | - 'cmake -S sample -B sample/build/DX12 -G "Visual Studio 16 2019" -A x64 -DGFX_API=DX12 -DBUILD_INSTALLER=ON' 16 | - 'cmake --build sample/build/DX12 --config Release' 17 | artifacts: 18 | paths: 19 | - sample/bin/ 20 | 21 | build_vk: 22 | tags: 23 | - windows 24 | - amd64 25 | stage: build 26 | script: 27 | - 'cmake -S sample -B sample/build/VK -G "Visual Studio 16 2019" -A x64 -DGFX_API=VK -DBUILD_INSTALLER=ON' 28 | - 'cmake --build sample/build/VK --config Release' 29 | artifacts: 30 | paths: 31 | - sample/bin/ 32 | 33 | package_sample: 34 | tags: 35 | - windows 36 | - amd64 37 | stage: deploy 38 | dependencies: 39 | - build_dx12 40 | - build_vk 41 | script: 42 | - echo "Packaging build" 43 | - copy %VULKAN_SDK%\Bin\glslc.exe .\sample\bin 44 | - echo cd .\sample\bin\ > %SampleName%_DX12.bat 45 | - echo start %SampleName%_DX12.exe >> %SampleName%_DX12.bat 46 | - echo cd .\sample\bin\ > %SampleName%_VK.bat 47 | - echo start %SampleName%_VK.exe >> %SampleName%_VK.bat 48 | artifacts: 49 | name: "%SampleName%-%CI_COMMIT_TAG%-%CI_COMMIT_REF_NAME%-%CI_COMMIT_SHORT_SHA%" 50 | paths: 51 | - "sample/bin/" 52 | - "sample/media/" 53 | - "docs/" 54 | - "readme.md" 55 | - "license.txt" 56 | - "%SampleName%_DX12.bat" 57 | - "%SampleName%_VK.bat" 58 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libs/cauldron"] 2 | path = sample/libs/cauldron 3 | url = ../../GPUOpen-LibrariesAndSDKs/Cauldron.git 4 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FidelityFX Parallel Sort 2 | 3 | Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | ## Parallel Sort 24 | 25 | The AMD FidelityFX Parallel Sort provides an open source header implementation to easily integrate a highly optimized compute-based radix sort into your game. 26 | 27 | Features of the implementation: 28 | 29 | - Direct and indirect execution support 30 | - RDNA+ optimized algorithm 31 | - Support for the Vulkan and Direct3D 12 APIs 32 | - Shaders written in HLSL utilizing SM 6.0 wave-level operations 33 | - A sample application is provided for both Direct3D 12 and Vulkan 34 | 35 | ## Resources 36 | 37 | [Introduction to GPU Radix Sort](http://www.heterogeneouscompute.org/wordpress/wp-content/uploads/2011/06/RadixSort.pdf) 38 | -------------------------------------------------------------------------------- /ffx-parallelsort/FFX_ParallelSort.h: -------------------------------------------------------------------------------- 1 | // FFX_ParallelSort.h 2 | // 3 | // Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files (the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions: 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #define FFX_PARALLELSORT_SORT_BITS_PER_PASS 4 21 | #define FFX_PARALLELSORT_SORT_BIN_COUNT (1 << FFX_PARALLELSORT_SORT_BITS_PER_PASS) 22 | #define FFX_PARALLELSORT_ELEMENTS_PER_THREAD 4 23 | #define FFX_PARALLELSORT_THREADGROUP_SIZE 128 24 | 25 | ////////////////////////////////////////////////////////////////////////// 26 | // ParallelSort constant buffer parameters: 27 | // 28 | // NumKeys The number of keys to sort 29 | // Shift How many bits to shift for this sort pass (we sort 4 bits at a time) 30 | // NumBlocksPerThreadGroup How many blocks of keys each thread group needs to process 31 | // NumThreadGroups How many thread groups are being run concurrently for sort 32 | // NumThreadGroupsWithAdditionalBlocks How many thread groups need to process additional block data 33 | // NumReduceThreadgroupPerBin How many thread groups are summed together for each reduced bin entry 34 | // NumScanValues How many values to perform scan prefix (+ add) on 35 | ////////////////////////////////////////////////////////////////////////// 36 | 37 | #ifdef FFX_CPP 38 | struct FFX_ParallelSortCB 39 | { 40 | uint32_t NumKeys; 41 | int32_t NumBlocksPerThreadGroup; 42 | uint32_t NumThreadGroups; 43 | uint32_t NumThreadGroupsWithAdditionalBlocks; 44 | uint32_t NumReduceThreadgroupPerBin; 45 | uint32_t NumScanValues; 46 | }; 47 | 48 | void FFX_ParallelSort_CalculateScratchResourceSize(uint32_t MaxNumKeys, uint32_t& ScratchBufferSize, uint32_t& ReduceScratchBufferSize) 49 | { 50 | uint32_t BlockSize = FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE; 51 | uint32_t NumBlocks = (MaxNumKeys + BlockSize - 1) / BlockSize; 52 | uint32_t NumReducedBlocks = (NumBlocks + BlockSize - 1) / BlockSize; 53 | 54 | ScratchBufferSize = FFX_PARALLELSORT_SORT_BIN_COUNT * NumBlocks * sizeof(uint32_t); 55 | ReduceScratchBufferSize = FFX_PARALLELSORT_SORT_BIN_COUNT * NumReducedBlocks * sizeof(uint32_t); 56 | } 57 | 58 | void FFX_ParallelSort_SetConstantAndDispatchData(uint32_t NumKeys, uint32_t MaxThreadGroups, FFX_ParallelSortCB& ConstantBuffer, uint32_t& NumThreadGroupsToRun, uint32_t& NumReducedThreadGroupsToRun) 59 | { 60 | ConstantBuffer.NumKeys = NumKeys; 61 | 62 | uint32_t BlockSize = FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE; 63 | uint32_t NumBlocks = (NumKeys + BlockSize - 1) / BlockSize; 64 | 65 | // Figure out data distribution 66 | NumThreadGroupsToRun = MaxThreadGroups; 67 | uint32_t BlocksPerThreadGroup = (NumBlocks / NumThreadGroupsToRun); 68 | ConstantBuffer.NumThreadGroupsWithAdditionalBlocks = NumBlocks % NumThreadGroupsToRun; 69 | 70 | if (NumBlocks < NumThreadGroupsToRun) 71 | { 72 | BlocksPerThreadGroup = 1; 73 | NumThreadGroupsToRun = NumBlocks; 74 | ConstantBuffer.NumThreadGroupsWithAdditionalBlocks = 0; 75 | } 76 | 77 | ConstantBuffer.NumThreadGroups = NumThreadGroupsToRun; 78 | ConstantBuffer.NumBlocksPerThreadGroup = BlocksPerThreadGroup; 79 | 80 | // Calculate the number of thread groups to run for reduction (each thread group can process BlockSize number of entries) 81 | NumReducedThreadGroupsToRun = FFX_PARALLELSORT_SORT_BIN_COUNT * ((BlockSize > NumThreadGroupsToRun) ? 1 : (NumThreadGroupsToRun + BlockSize - 1) / BlockSize); 82 | ConstantBuffer.NumReduceThreadgroupPerBin = NumReducedThreadGroupsToRun / FFX_PARALLELSORT_SORT_BIN_COUNT; 83 | ConstantBuffer.NumScanValues = NumReducedThreadGroupsToRun; // The number of reduce thread groups becomes our scan count (as each thread group writes out 1 value that needs scan prefix) 84 | } 85 | 86 | // We are using some optimizations to hide buffer load latency, so make sure anyone changing this define is made aware of that fact. 87 | static_assert(FFX_PARALLELSORT_ELEMENTS_PER_THREAD == 4, "FFX_ParallelSort Shaders currently explicitly rely on FFX_PARALLELSORT_ELEMENTS_PER_THREAD being set to 4 in order to optimize buffer loads. Please adjust the optimization to factor in the new define value."); 88 | #elif defined(FFX_HLSL) 89 | 90 | struct FFX_ParallelSortCB 91 | { 92 | uint NumKeys; 93 | int NumBlocksPerThreadGroup; 94 | uint NumThreadGroups; 95 | uint NumThreadGroupsWithAdditionalBlocks; 96 | uint NumReduceThreadgroupPerBin; 97 | uint NumScanValues; 98 | }; 99 | 100 | groupshared uint gs_FFX_PARALLELSORT_Histogram[FFX_PARALLELSORT_THREADGROUP_SIZE * FFX_PARALLELSORT_SORT_BIN_COUNT]; 101 | void FFX_ParallelSort_Count_uint(uint localID, uint groupID, FFX_ParallelSortCB CBuffer, uint ShiftBit, RWStructuredBuffer SrcBuffer, RWStructuredBuffer SumTable) 102 | { 103 | // Start by clearing our local counts in LDS 104 | for (int i = 0; i < FFX_PARALLELSORT_SORT_BIN_COUNT; i++) 105 | gs_FFX_PARALLELSORT_Histogram[(i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID] = 0; 106 | 107 | // Wait for everyone to catch up 108 | GroupMemoryBarrierWithGroupSync(); 109 | 110 | // Data is processed in blocks, and how many we process can changed based on how much data we are processing 111 | // versus how many thread groups we are processing with 112 | int BlockSize = FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE; 113 | 114 | // Figure out this thread group's index into the block data (taking into account thread groups that need to do extra reads) 115 | uint ThreadgroupBlockStart = (BlockSize * CBuffer.NumBlocksPerThreadGroup * groupID); 116 | uint NumBlocksToProcess = CBuffer.NumBlocksPerThreadGroup; 117 | 118 | if (groupID >= CBuffer.NumThreadGroups - CBuffer.NumThreadGroupsWithAdditionalBlocks) 119 | { 120 | ThreadgroupBlockStart += (groupID - (CBuffer.NumThreadGroups - CBuffer.NumThreadGroupsWithAdditionalBlocks)) * BlockSize; 121 | NumBlocksToProcess++; 122 | } 123 | 124 | // Get the block start index for this thread 125 | uint BlockIndex = ThreadgroupBlockStart + localID; 126 | 127 | // Count value occurrence 128 | for (uint BlockCount = 0; BlockCount < NumBlocksToProcess; BlockCount++, BlockIndex += BlockSize) 129 | { 130 | uint DataIndex = BlockIndex; 131 | 132 | // Pre-load the key values in order to hide some of the read latency 133 | uint srcKeys[FFX_PARALLELSORT_ELEMENTS_PER_THREAD]; 134 | srcKeys[0] = SrcBuffer[DataIndex]; 135 | srcKeys[1] = SrcBuffer[DataIndex + FFX_PARALLELSORT_THREADGROUP_SIZE]; 136 | srcKeys[2] = SrcBuffer[DataIndex + (FFX_PARALLELSORT_THREADGROUP_SIZE * 2)]; 137 | srcKeys[3] = SrcBuffer[DataIndex + (FFX_PARALLELSORT_THREADGROUP_SIZE * 3)]; 138 | 139 | for (uint i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++) 140 | { 141 | if (DataIndex < CBuffer.NumKeys) 142 | { 143 | uint localKey = (srcKeys[i] >> ShiftBit) & 0xf; 144 | InterlockedAdd(gs_FFX_PARALLELSORT_Histogram[(localKey * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID], 1); 145 | DataIndex += FFX_PARALLELSORT_THREADGROUP_SIZE; 146 | } 147 | } 148 | } 149 | 150 | // Even though our LDS layout guarantees no collisions, our thread group size is greater than a wave 151 | // so we need to make sure all thread groups are done counting before we start tallying up the results 152 | GroupMemoryBarrierWithGroupSync(); 153 | 154 | if (localID < FFX_PARALLELSORT_SORT_BIN_COUNT) 155 | { 156 | uint sum = 0; 157 | for (int i = 0; i < FFX_PARALLELSORT_THREADGROUP_SIZE; i++) 158 | { 159 | sum += gs_FFX_PARALLELSORT_Histogram[localID * FFX_PARALLELSORT_THREADGROUP_SIZE + i]; 160 | } 161 | SumTable[localID * CBuffer.NumThreadGroups + groupID] = sum; 162 | } 163 | } 164 | 165 | groupshared uint gs_FFX_PARALLELSORT_LDSSums[FFX_PARALLELSORT_THREADGROUP_SIZE]; 166 | uint FFX_ParallelSort_ThreadgroupReduce(uint localSum, uint localID) 167 | { 168 | // Do wave local reduce 169 | uint waveReduced = WaveActiveSum(localSum); 170 | 171 | // First lane in a wave writes out wave reduction to LDS (this accounts for num waves per group greater than HW wave size) 172 | // Note that some hardware with very small HW wave sizes (i.e. <= 8) may exhibit issues with this algorithm, and have not been tested. 173 | uint waveID = localID / WaveGetLaneCount(); 174 | if (WaveIsFirstLane()) 175 | gs_FFX_PARALLELSORT_LDSSums[waveID] = waveReduced; 176 | 177 | // Wait for everyone to catch up 178 | GroupMemoryBarrierWithGroupSync(); 179 | 180 | // First wave worth of threads sum up wave reductions 181 | if (!waveID) 182 | waveReduced = WaveActiveSum( (localID < FFX_PARALLELSORT_THREADGROUP_SIZE / WaveGetLaneCount()) ? gs_FFX_PARALLELSORT_LDSSums[localID] : 0); 183 | 184 | // Returned the reduced sum 185 | return waveReduced; 186 | } 187 | 188 | uint FFX_ParallelSort_BlockScanPrefix(uint localSum, uint localID) 189 | { 190 | // Do wave local scan-prefix 191 | uint wavePrefixed = WavePrefixSum(localSum); 192 | 193 | // Since we are dealing with thread group sizes greater than HW wave size, we need to account for what wave we are in. 194 | uint waveID = localID / WaveGetLaneCount(); 195 | uint laneID = WaveGetLaneIndex(); 196 | 197 | // Last element in a wave writes out partial sum to LDS 198 | if (laneID == WaveGetLaneCount() - 1) 199 | gs_FFX_PARALLELSORT_LDSSums[waveID] = wavePrefixed + localSum; 200 | 201 | // Wait for everyone to catch up 202 | GroupMemoryBarrierWithGroupSync(); 203 | 204 | // First wave prefixes partial sums 205 | if (!waveID) 206 | gs_FFX_PARALLELSORT_LDSSums[localID] = WavePrefixSum(gs_FFX_PARALLELSORT_LDSSums[localID]); 207 | 208 | // Wait for everyone to catch up 209 | GroupMemoryBarrierWithGroupSync(); 210 | 211 | // Add the partial sums back to each wave prefix 212 | wavePrefixed += gs_FFX_PARALLELSORT_LDSSums[waveID]; 213 | 214 | return wavePrefixed; 215 | } 216 | 217 | void FFX_ParallelSort_ReduceCount(uint localID, uint groupID, FFX_ParallelSortCB CBuffer, RWStructuredBuffer SumTable, RWStructuredBuffer ReduceTable) 218 | { 219 | // Figure out what bin data we are reducing 220 | uint BinID = groupID / CBuffer.NumReduceThreadgroupPerBin; 221 | uint BinOffset = BinID * CBuffer.NumThreadGroups; 222 | 223 | // Get the base index for this thread group 224 | uint BaseIndex = (groupID % CBuffer.NumReduceThreadgroupPerBin) * FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE; 225 | 226 | // Calculate partial sums for entries this thread reads in 227 | uint threadgroupSum = 0; 228 | for (uint i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; ++i) 229 | { 230 | uint DataIndex = BaseIndex + (i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID; 231 | threadgroupSum += (DataIndex < CBuffer.NumThreadGroups) ? SumTable[BinOffset + DataIndex] : 0; 232 | } 233 | 234 | // Reduce across the entirety of the thread group 235 | threadgroupSum = FFX_ParallelSort_ThreadgroupReduce(threadgroupSum, localID); 236 | 237 | // First thread of the group writes out the reduced sum for the bin 238 | if (!localID) 239 | ReduceTable[groupID] = threadgroupSum; 240 | 241 | // What this will look like in the reduced table is: 242 | // [ [bin0 ... bin0] [bin1 ... bin1] ... ] 243 | } 244 | 245 | // This is to transform uncoalesced loads into coalesced loads and 246 | // then scattered loads from LDS 247 | groupshared int gs_FFX_PARALLELSORT_LDS[FFX_PARALLELSORT_ELEMENTS_PER_THREAD][FFX_PARALLELSORT_THREADGROUP_SIZE]; 248 | void FFX_ParallelSort_ScanPrefix(uint numValuesToScan, uint localID, uint groupID, uint BinOffset, uint BaseIndex, bool AddPartialSums, 249 | FFX_ParallelSortCB CBuffer, RWStructuredBuffer ScanSrc, RWStructuredBuffer ScanDst, RWStructuredBuffer ScanScratch) 250 | { 251 | uint i; 252 | // Perform coalesced loads into LDS 253 | for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++) 254 | { 255 | uint DataIndex = BaseIndex + (i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID; 256 | 257 | uint col = ((i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID) / FFX_PARALLELSORT_ELEMENTS_PER_THREAD; 258 | uint row = ((i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID) % FFX_PARALLELSORT_ELEMENTS_PER_THREAD; 259 | gs_FFX_PARALLELSORT_LDS[row][col] = (DataIndex < numValuesToScan) ? ScanSrc[BinOffset + DataIndex] : 0; 260 | } 261 | 262 | // Wait for everyone to catch up 263 | GroupMemoryBarrierWithGroupSync(); 264 | 265 | uint threadgroupSum = 0; 266 | // Calculate the local scan-prefix for current thread 267 | for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++) 268 | { 269 | uint tmp = gs_FFX_PARALLELSORT_LDS[i][localID]; 270 | gs_FFX_PARALLELSORT_LDS[i][localID] = threadgroupSum; 271 | threadgroupSum += tmp; 272 | } 273 | 274 | // Scan prefix partial sums 275 | threadgroupSum = FFX_ParallelSort_BlockScanPrefix(threadgroupSum, localID); 276 | 277 | // Add reduced partial sums if requested 278 | uint partialSum = 0; 279 | if (AddPartialSums) 280 | { 281 | // Partial sum additions are a little special as they are tailored to the optimal number of 282 | // thread groups we ran in the beginning, so need to take that into account 283 | partialSum = ScanScratch[groupID]; 284 | } 285 | 286 | // Add the block scanned-prefixes back in 287 | for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++) 288 | gs_FFX_PARALLELSORT_LDS[i][localID] += threadgroupSum; 289 | 290 | // Wait for everyone to catch up 291 | GroupMemoryBarrierWithGroupSync(); 292 | 293 | // Perform coalesced writes to scan dst 294 | for (i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++) 295 | { 296 | uint DataIndex = BaseIndex + (i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID; 297 | 298 | uint col = ((i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID) / FFX_PARALLELSORT_ELEMENTS_PER_THREAD; 299 | uint row = ((i * FFX_PARALLELSORT_THREADGROUP_SIZE) + localID) % FFX_PARALLELSORT_ELEMENTS_PER_THREAD; 300 | 301 | if (DataIndex < numValuesToScan) 302 | ScanDst[BinOffset + DataIndex] = gs_FFX_PARALLELSORT_LDS[row][col] + partialSum; 303 | } 304 | } 305 | 306 | // Offset cache to avoid loading the offsets all the time 307 | groupshared uint gs_FFX_PARALLELSORT_BinOffsetCache[FFX_PARALLELSORT_THREADGROUP_SIZE]; 308 | // Local histogram for offset calculations 309 | groupshared uint gs_FFX_PARALLELSORT_LocalHistogram[FFX_PARALLELSORT_SORT_BIN_COUNT]; 310 | // Scratch area for algorithm 311 | groupshared uint gs_FFX_PARALLELSORT_LDSScratch[FFX_PARALLELSORT_THREADGROUP_SIZE]; 312 | void FFX_ParallelSort_Scatter_uint(uint localID, uint groupID, FFX_ParallelSortCB CBuffer, uint ShiftBit, RWStructuredBuffer SrcBuffer, RWStructuredBuffer DstBuffer, RWStructuredBuffer SumTable 313 | #ifdef kRS_ValueCopy 314 | ,RWStructuredBuffer SrcPayload, RWStructuredBuffer DstPayload 315 | #endif // kRS_ValueCopy 316 | ) 317 | { 318 | // Load the sort bin threadgroup offsets into LDS for faster referencing 319 | if (localID < FFX_PARALLELSORT_SORT_BIN_COUNT) 320 | gs_FFX_PARALLELSORT_BinOffsetCache[localID] = SumTable[localID * CBuffer.NumThreadGroups + groupID]; 321 | 322 | // Wait for everyone to catch up 323 | GroupMemoryBarrierWithGroupSync(); 324 | 325 | // Data is processed in blocks, and how many we process can changed based on how much data we are processing 326 | // versus how many thread groups we are processing with 327 | int BlockSize = FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE; 328 | 329 | // Figure out this thread group's index into the block data (taking into account thread groups that need to do extra reads) 330 | uint ThreadgroupBlockStart = (BlockSize * CBuffer.NumBlocksPerThreadGroup * groupID); 331 | uint NumBlocksToProcess = CBuffer.NumBlocksPerThreadGroup; 332 | 333 | if (groupID >= CBuffer.NumThreadGroups - CBuffer.NumThreadGroupsWithAdditionalBlocks) 334 | { 335 | ThreadgroupBlockStart += (groupID - (CBuffer.NumThreadGroups - CBuffer.NumThreadGroupsWithAdditionalBlocks)) * BlockSize; 336 | NumBlocksToProcess++; 337 | } 338 | 339 | // Get the block start index for this thread 340 | uint BlockIndex = ThreadgroupBlockStart + localID; 341 | 342 | // Count value occurences 343 | uint newCount; 344 | for (int BlockCount = 0; BlockCount < NumBlocksToProcess; BlockCount++, BlockIndex += BlockSize) 345 | { 346 | uint DataIndex = BlockIndex; 347 | 348 | // Pre-load the key values in order to hide some of the read latency 349 | uint srcKeys[FFX_PARALLELSORT_ELEMENTS_PER_THREAD]; 350 | srcKeys[0] = SrcBuffer[DataIndex]; 351 | srcKeys[1] = SrcBuffer[DataIndex + FFX_PARALLELSORT_THREADGROUP_SIZE]; 352 | srcKeys[2] = SrcBuffer[DataIndex + (FFX_PARALLELSORT_THREADGROUP_SIZE * 2)]; 353 | srcKeys[3] = SrcBuffer[DataIndex + (FFX_PARALLELSORT_THREADGROUP_SIZE * 3)]; 354 | 355 | #ifdef kRS_ValueCopy 356 | uint srcValues[FFX_PARALLELSORT_ELEMENTS_PER_THREAD]; 357 | srcValues[0] = SrcPayload[DataIndex]; 358 | srcValues[1] = SrcPayload[DataIndex + FFX_PARALLELSORT_THREADGROUP_SIZE]; 359 | srcValues[2] = SrcPayload[DataIndex + (FFX_PARALLELSORT_THREADGROUP_SIZE * 2)]; 360 | srcValues[3] = SrcPayload[DataIndex + (FFX_PARALLELSORT_THREADGROUP_SIZE * 3)]; 361 | #endif // kRS_ValueCopy 362 | 363 | for (int i = 0; i < FFX_PARALLELSORT_ELEMENTS_PER_THREAD; i++) 364 | { 365 | // Clear the local histogram 366 | if (localID < FFX_PARALLELSORT_SORT_BIN_COUNT) 367 | gs_FFX_PARALLELSORT_LocalHistogram[localID] = 0; 368 | 369 | uint localKey = (DataIndex < CBuffer.NumKeys ? srcKeys[i] : 0xffffffff); 370 | #ifdef kRS_ValueCopy 371 | uint localValue = (DataIndex < CBuffer.NumKeys ? srcValues[i] : 0); 372 | #endif // kRS_ValueCopy 373 | 374 | // Sort the keys locally in LDS 375 | for (uint bitShift = 0; bitShift < FFX_PARALLELSORT_SORT_BITS_PER_PASS; bitShift += 2) 376 | { 377 | // Figure out the keyIndex 378 | uint keyIndex = (localKey >> ShiftBit) & 0xf; 379 | uint bitKey = (keyIndex >> bitShift) & 0x3; 380 | 381 | // Create a packed histogram 382 | uint packedHistogram = 1U << (bitKey * 8); 383 | 384 | // Sum up all the packed keys (generates counted offsets up to current thread group) 385 | uint localSum = FFX_ParallelSort_BlockScanPrefix(packedHistogram, localID); 386 | 387 | // Last thread stores the updated histogram counts for the thread group 388 | // Scratch = 0xsum3|sum2|sum1|sum0 for thread group 389 | if (localID == (FFX_PARALLELSORT_THREADGROUP_SIZE - 1)) 390 | gs_FFX_PARALLELSORT_LDSScratch[0] = localSum + packedHistogram; 391 | 392 | // Wait for everyone to catch up 393 | GroupMemoryBarrierWithGroupSync(); 394 | 395 | // Load the sums value for the thread group 396 | packedHistogram = gs_FFX_PARALLELSORT_LDSScratch[0]; 397 | 398 | // Add prefix offsets for all 4 bit "keys" (packedHistogram = 0xsum2_1_0|sum1_0|sum0|0) 399 | packedHistogram = (packedHistogram << 8) + (packedHistogram << 16) + (packedHistogram << 24); 400 | 401 | // Calculate the proper offset for this thread's value 402 | localSum += packedHistogram; 403 | 404 | // Calculate target offset 405 | uint keyOffset = (localSum >> (bitKey * 8)) & 0xff; 406 | 407 | // Re-arrange the keys (store, sync, load) 408 | gs_FFX_PARALLELSORT_LDSSums[keyOffset] = localKey; 409 | GroupMemoryBarrierWithGroupSync(); 410 | localKey = gs_FFX_PARALLELSORT_LDSSums[localID]; 411 | 412 | // Wait for everyone to catch up 413 | GroupMemoryBarrierWithGroupSync(); 414 | 415 | #ifdef kRS_ValueCopy 416 | // Re-arrange the values if we have them (store, sync, load) 417 | gs_FFX_PARALLELSORT_LDSSums[keyOffset] = localValue; 418 | GroupMemoryBarrierWithGroupSync(); 419 | localValue = gs_FFX_PARALLELSORT_LDSSums[localID]; 420 | 421 | // Wait for everyone to catch up 422 | GroupMemoryBarrierWithGroupSync(); 423 | #endif // kRS_ValueCopy 424 | } 425 | 426 | // Need to recalculate the keyIndex on this thread now that values have been copied around the thread group 427 | uint keyIndex = (localKey >> ShiftBit) & 0xf; 428 | 429 | // Reconstruct histogram 430 | InterlockedAdd(gs_FFX_PARALLELSORT_LocalHistogram[keyIndex], 1); 431 | 432 | // Wait for everyone to catch up 433 | GroupMemoryBarrierWithGroupSync(); 434 | 435 | // Prefix histogram 436 | uint histogramPrefixSum = WavePrefixSum(localID < FFX_PARALLELSORT_SORT_BIN_COUNT ? gs_FFX_PARALLELSORT_LocalHistogram[localID] : 0); 437 | 438 | // Broadcast prefix-sum via LDS 439 | if (localID < FFX_PARALLELSORT_SORT_BIN_COUNT) 440 | gs_FFX_PARALLELSORT_LDSScratch[localID] = histogramPrefixSum; 441 | 442 | // Get the global offset for this key out of the cache 443 | uint globalOffset = gs_FFX_PARALLELSORT_BinOffsetCache[keyIndex]; 444 | 445 | // Wait for everyone to catch up 446 | GroupMemoryBarrierWithGroupSync(); 447 | 448 | // Get the local offset (at this point the keys are all in increasing order from 0 -> num bins in localID 0 -> thread group size) 449 | uint localOffset = localID - gs_FFX_PARALLELSORT_LDSScratch[keyIndex]; 450 | 451 | // Write to destination 452 | uint totalOffset = globalOffset + localOffset; 453 | 454 | if (totalOffset < CBuffer.NumKeys) 455 | { 456 | DstBuffer[totalOffset] = localKey; 457 | 458 | #ifdef kRS_ValueCopy 459 | DstPayload[totalOffset] = localValue; 460 | #endif // kRS_ValueCopy 461 | } 462 | 463 | // Wait for everyone to catch up 464 | GroupMemoryBarrierWithGroupSync(); 465 | 466 | // Update the cached histogram for the next set of entries 467 | if (localID < FFX_PARALLELSORT_SORT_BIN_COUNT) 468 | gs_FFX_PARALLELSORT_BinOffsetCache[localID] += gs_FFX_PARALLELSORT_LocalHistogram[localID]; 469 | 470 | DataIndex += FFX_PARALLELSORT_THREADGROUP_SIZE; // Increase the data offset by thread group size 471 | } 472 | } 473 | } 474 | 475 | void FFX_ParallelSort_SetupIndirectParams(uint NumKeys, uint MaxThreadGroups, RWStructuredBuffer CBuffer, RWStructuredBuffer CountScatterArgs, RWStructuredBuffer ReduceScanArgs) 476 | { 477 | CBuffer[0].NumKeys = NumKeys; 478 | 479 | uint BlockSize = FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE; 480 | uint NumBlocks = (NumKeys + BlockSize - 1) / BlockSize; 481 | 482 | // Figure out data distribution 483 | uint NumThreadGroupsToRun = MaxThreadGroups; 484 | uint BlocksPerThreadGroup = (NumBlocks / NumThreadGroupsToRun); 485 | CBuffer[0].NumThreadGroupsWithAdditionalBlocks = NumBlocks % NumThreadGroupsToRun; 486 | 487 | if (NumBlocks < NumThreadGroupsToRun) 488 | { 489 | BlocksPerThreadGroup = 1; 490 | NumThreadGroupsToRun = NumBlocks; 491 | CBuffer[0].NumThreadGroupsWithAdditionalBlocks = 0; 492 | } 493 | 494 | CBuffer[0].NumThreadGroups = NumThreadGroupsToRun; 495 | CBuffer[0].NumBlocksPerThreadGroup = BlocksPerThreadGroup; 496 | 497 | // Calculate the number of thread groups to run for reduction (each thread group can process BlockSize number of entries) 498 | uint NumReducedThreadGroupsToRun = FFX_PARALLELSORT_SORT_BIN_COUNT * ((BlockSize > NumThreadGroupsToRun) ? 1 : (NumThreadGroupsToRun + BlockSize - 1) / BlockSize); 499 | CBuffer[0].NumReduceThreadgroupPerBin = NumReducedThreadGroupsToRun / FFX_PARALLELSORT_SORT_BIN_COUNT; 500 | CBuffer[0].NumScanValues = NumReducedThreadGroupsToRun; // The number of reduce thread groups becomes our scan count (as each thread group writes out 1 value that needs scan prefix) 501 | 502 | // Setup dispatch arguments 503 | CountScatterArgs[0] = NumThreadGroupsToRun; 504 | CountScatterArgs[1] = 1; 505 | CountScatterArgs[2] = 1; 506 | 507 | ReduceScanArgs[0] = NumReducedThreadGroupsToRun; 508 | ReduceScanArgs[1] = 1; 509 | ReduceScanArgs[2] = 1; 510 | } 511 | 512 | #endif // __cplusplus 513 | 514 | -------------------------------------------------------------------------------- /sample/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | 3 | option (GFX_API_DX12 "Build with DX12" ON) 4 | option (GFX_API_VK "Build with Vulkan" ON) 5 | 6 | if(NOT DEFINED GFX_API) 7 | project (FFX_ParallelSort_) 8 | else() 9 | project (FFX_ParallelSort_${GFX_API}) 10 | 11 | set_property(DIRECTORY ${CMAKE_PROJECT_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME}) 12 | 13 | if(GFX_API STREQUAL DX12) 14 | set(GFX_API_DX12 ON) 15 | set(GFX_API_VK OFF) 16 | elseif(GFX_API STREQUAL VK) 17 | set(GFX_API_DX12 OFF) 18 | set(GFX_API_VK ON) 19 | else() 20 | message(STATUS "----------------------------------------------------------------------------------------") 21 | message(STATUS "") 22 | message(STATUS "** Almost there!!") 23 | message(STATUS "") 24 | message(STATUS " This framework supports DX12 and VULKAN, you need to invoke cmake in one of these ways:") 25 | message(STATUS "") 26 | message(STATUS " Examples:") 27 | message(STATUS " Generate selected one:") 28 | message(STATUS " cmake -DGFX_API=DX12") 29 | message(STATUS " cmake -DGFX_API=VK") 30 | message(STATUS " Generate with switches (Default is ON):") 31 | message(STATUS " cmake [-DGFX_API_DX12=ON|OFF] [-DGFX_API_VK=ON|OFF]") 32 | message(STATUS "") 33 | message(STATUS "----------------------------------------------------------------------------------------") 34 | message(FATAL_ERROR "") 35 | endif() 36 | endif() 37 | 38 | # Check MSVC toolset version, Visual Studio 2019 required 39 | if(MSVC_TOOLSET_VERSION VERSION_LESS 142) 40 | message(FATAL_ERROR "Cannot find MSVC toolset version 142 or greater. Please make sure Visual Studio 2019 or newer installed") 41 | endif() 42 | 43 | # ouput exe to bin directory 44 | SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin) 45 | foreach( OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES} ) 46 | string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG ) 47 | set( CMAKE_RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${CMAKE_HOME_DIRECTORY}/bin ) 48 | endforeach( OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES ) 49 | 50 | add_compile_options(/MP) 51 | 52 | # reference libs used by both backends 53 | add_subdirectory(libs/cauldron) 54 | 55 | # application icon 56 | set(icon_src 57 | ${CMAKE_CURRENT_SOURCE_DIR}/libs/cauldron/src/common/Icon/GPUOpenChip.ico 58 | ${CMAKE_CURRENT_SOURCE_DIR}/libs/cauldron/src/common/Icon/resource.h 59 | ${CMAKE_CURRENT_SOURCE_DIR}/libs/cauldron/src/common/Icon/Cauldron_Common.rc 60 | ) 61 | 62 | if(GFX_API_VK) 63 | find_package(Vulkan REQUIRED) 64 | add_subdirectory(src/VK) 65 | endif() 66 | if(GFX_API_DX12) 67 | add_subdirectory(src/DX12) 68 | endif() 69 | 70 | set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/libs/cauldron/src/common/Icon/Cauldron_Common.rc PROPERTIES VS_TOOL_OVERRIDE "Resource compiler") 71 | set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/libs/cauldron/src/common/Icon/GPUOpenChip.ico PROPERTIES VS_TOOL_OVERRIDE "Image") 72 | -------------------------------------------------------------------------------- /sample/build/.gitignore: -------------------------------------------------------------------------------- 1 | DX12/ 2 | VK/ -------------------------------------------------------------------------------- /sample/build/GenerateSolutions.bat: -------------------------------------------------------------------------------- 1 | mkdir DX12 2 | cd DX12 3 | cmake ..\.. -DGFX_API=DX12 4 | cd .. 5 | 6 | mkdir VK 7 | cd VK 8 | cmake ..\.. -DGFX_API=VK 9 | cd .. -------------------------------------------------------------------------------- /sample/common.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # enables multithreading compilation 3 | # 4 | 5 | add_compile_options(/MP) 6 | 7 | # 8 | # includes cauldron's helper cmakes 9 | # 10 | include(${CMAKE_CURRENT_SOURCE_DIR}/../../libs/cauldron/common.cmake) 11 | 12 | # 13 | # Add manifest so the app uses the right DPI settings 14 | # 15 | function(addManifest PROJECT_NAME) 16 | IF (MSVC) 17 | IF (CMAKE_MAJOR_VERSION LESS 3) 18 | MESSAGE(WARNING "CMake version 3.0 or newer is required use build variable TARGET_FILE") 19 | ELSE() 20 | ADD_CUSTOM_COMMAND( 21 | TARGET ${PROJECT_NAME} 22 | POST_BUILD 23 | COMMAND "mt.exe" -manifest \"${CMAKE_CURRENT_SOURCE_DIR}\\dpiawarescaling.manifest\" -inputresource:\"$\"\;\#1 -outputresource:\"$\"\;\#1 24 | COMMENT "Adding display aware manifest..." 25 | ) 26 | ENDIF() 27 | ENDIF(MSVC) 28 | endfunction() -------------------------------------------------------------------------------- /sample/src/Common/FFXParallelSort.json: -------------------------------------------------------------------------------- 1 | { 2 | "globals": { 3 | "CpuValidationLayerEnabled": false, 4 | "GpuValidationLayerEnabled": false, 5 | "presentationMode": 0, 6 | "width": 1920, 7 | "height": 1080, 8 | "activeScene": 0, 9 | "benchmark": false, 10 | "vsync": false, 11 | "stablePowerState": false, 12 | "fontsize": 13 13 | }, 14 | "BenchmarkSettings": { 15 | "timeStep": 1, 16 | "timeStart": 0, 17 | "timeEnd": 0, 18 | "exitWhenTimeEnds": true, 19 | "resultsFilename": "FFXParallelSort.csv", 20 | "warmUpFrames": 500 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /sample/src/Common/Validate1080p.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUOpen-Effects/FidelityFX-ParallelSort/0c539948c8d196ae338d91efbc8ca495f1ea0d1d/sample/src/Common/Validate1080p.png -------------------------------------------------------------------------------- /sample/src/Common/Validate2K.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUOpen-Effects/FidelityFX-ParallelSort/0c539948c8d196ae338d91efbc8ca495f1ea0d1d/sample/src/Common/Validate2K.png -------------------------------------------------------------------------------- /sample/src/Common/Validate4K.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GPUOpen-Effects/FidelityFX-ParallelSort/0c539948c8d196ae338d91efbc8ca495f1ea0d1d/sample/src/Common/Validate4K.png -------------------------------------------------------------------------------- /sample/src/Common/shaders/ParallelSortCS.hlsl: -------------------------------------------------------------------------------- 1 | // ParallelSortCS.hlsl 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | 21 | //-------------------------------------------------------------------------------------- 22 | // ParallelSort Shaders/Includes 23 | //-------------------------------------------------------------------------------------- 24 | #define FFX_HLSL 25 | #include "FFX-ParallelSort/FFX_ParallelSort.h" 26 | 27 | [[vk::binding(0, 0)]] ConstantBuffer CBuffer : register(b0); // Constant buffer 28 | [[vk::binding(0, 1)]] cbuffer SetupIndirectCB : register(b1) // Setup Indirect Constant buffer 29 | { 30 | uint NumKeysIndex; 31 | uint MaxThreadGroups; 32 | }; 33 | 34 | struct RootConstantData { 35 | uint CShiftBit; 36 | }; 37 | 38 | #ifdef VK_Const 39 | [[vk::push_constant]] RootConstantData rootConstData; // Store the shift bit directly in the root signature 40 | #else 41 | ConstantBuffer rootConstData : register(b2); // Store the shift bit directly in the root signature 42 | #endif // VK_Const 43 | 44 | [[vk::binding(0, 2)]] RWStructuredBuffer SrcBuffer : register(u0, space0); // The unsorted keys or scan data 45 | [[vk::binding(2, 2)]] RWStructuredBuffer SrcPayload : register(u0, space1); // The payload data 46 | 47 | [[vk::binding(0, 4)]] RWStructuredBuffer SumTable : register(u0, space2); // The sum table we will write sums to 48 | [[vk::binding(1, 4)]] RWStructuredBuffer ReduceTable : register(u0, space3); // The reduced sum table we will write sums to 49 | 50 | [[vk::binding(1, 2)]] RWStructuredBuffer DstBuffer : register(u0, space4); // The sorted keys or prefixed data 51 | [[vk::binding(3, 2)]] RWStructuredBuffer DstPayload : register(u0, space5); // the sorted payload data 52 | 53 | [[vk::binding(0, 3)]] RWStructuredBuffer ScanSrc : register(u0, space6); // Source for Scan Data 54 | [[vk::binding(1, 3)]] RWStructuredBuffer ScanDst : register(u0, space7); // Destination for Scan Data 55 | [[vk::binding(2, 3)]] RWStructuredBuffer ScanScratch : register(u0, space8); // Scratch data for Scan 56 | 57 | [[vk::binding(0, 5)]] RWStructuredBuffer NumKeysBuffer : register(u0, space9); // Number of keys to sort for indirect execution 58 | [[vk::binding(1, 5)]] RWStructuredBuffer CBufferUAV : register(u0, space10); // UAV for constant buffer parameters for indirect execution 59 | [[vk::binding(2, 5)]] RWStructuredBuffer CountScatterArgs: register(u0, space11); // Count and Scatter Args for indirect execution 60 | [[vk::binding(3, 5)]] RWStructuredBuffer ReduceScanArgs : register(u0, space12); // Reduce and Scan Args for indirect execution 61 | 62 | 63 | // FPS Count 64 | [numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)] 65 | void FPS_Count(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) 66 | { 67 | // Call the uint version of the count part of the algorithm 68 | FFX_ParallelSort_Count_uint(localID, groupID, CBuffer, rootConstData.CShiftBit, SrcBuffer, SumTable); 69 | } 70 | 71 | // FPS Reduce 72 | [numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)] 73 | void FPS_CountReduce(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) 74 | { 75 | // Call the reduce part of the algorithm 76 | FFX_ParallelSort_ReduceCount(localID, groupID, CBuffer, SumTable, ReduceTable); 77 | } 78 | 79 | // FPS Scan 80 | [numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)] 81 | void FPS_Scan(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) 82 | { 83 | uint BaseIndex = FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE * groupID; 84 | FFX_ParallelSort_ScanPrefix(CBuffer.NumScanValues, localID, groupID, 0, BaseIndex, false, 85 | CBuffer, ScanSrc, ScanDst, ScanScratch); 86 | } 87 | 88 | // FPS ScanAdd 89 | [numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)] 90 | void FPS_ScanAdd(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) 91 | { 92 | // When doing adds, we need to access data differently because reduce 93 | // has a more specialized access pattern to match optimized count 94 | // Access needs to be done similarly to reduce 95 | // Figure out what bin data we are reducing 96 | uint BinID = groupID / CBuffer.NumReduceThreadgroupPerBin; 97 | uint BinOffset = BinID * CBuffer.NumThreadGroups; 98 | 99 | // Get the base index for this thread group 100 | uint BaseIndex = (groupID % CBuffer.NumReduceThreadgroupPerBin) * FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE; 101 | 102 | FFX_ParallelSort_ScanPrefix(CBuffer.NumThreadGroups, localID, groupID, BinOffset, BaseIndex, true, 103 | CBuffer, ScanSrc, ScanDst, ScanScratch); 104 | } 105 | 106 | // FPS Scatter 107 | [numthreads(FFX_PARALLELSORT_THREADGROUP_SIZE, 1, 1)] 108 | void FPS_Scatter(uint localID : SV_GroupThreadID, uint groupID : SV_GroupID) 109 | { 110 | FFX_ParallelSort_Scatter_uint(localID, groupID, CBuffer, rootConstData.CShiftBit, SrcBuffer, DstBuffer, SumTable 111 | #ifdef kRS_ValueCopy 112 | ,SrcPayload, DstPayload 113 | #endif // kRS_ValueCopy 114 | ); 115 | } 116 | 117 | [numthreads(1, 1, 1)] 118 | void FPS_SetupIndirectParameters(uint localID : SV_GroupThreadID) 119 | { 120 | FFX_ParallelSort_SetupIndirectParams(NumKeysBuffer[NumKeysIndex], MaxThreadGroups, CBufferUAV, CountScatterArgs, ReduceScanArgs); 121 | } 122 | -------------------------------------------------------------------------------- /sample/src/Common/shaders/ParallelSortVerify.hlsl: -------------------------------------------------------------------------------- 1 | // ParallelSortVerify.hlsl 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | //-------------------------------------------------------------------------------------- 21 | // Render Verification Shaders/Constant buffers 22 | //-------------------------------------------------------------------------------------- 23 | [[vk::binding(0, 0)]] cbuffer ParallelSortRenderCB : register(b0) // If you change this, also change struct ParallelSortRenderCB in ParallelSort.h 24 | { 25 | int CB_Width; 26 | int CB_Height; 27 | int CB_SortWidth; 28 | int CB_SortHeight; 29 | }; 30 | 31 | [[vk::binding(0, 1)]] RWStructuredBuffer SortBuffer : register(u0, space0); 32 | [[vk::binding(0, 2)]] Texture2D ValidationTexture : register(t0, space0); 33 | 34 | struct VertexOut 35 | { 36 | float4 PosOut : SV_POSITION; 37 | float2 UVOut : TEXCOORD; 38 | }; 39 | 40 | static const float4 FullScreenVertsPos[3] = { float4(-1, 1, 1, 1), float4(3, 1, 1, 1), float4(-1, -3, 1, 1) }; 41 | static const float2 FullScreenVertsUVs[3] = { float2(0, 0), float2(2, 0), float2(0, 2) }; 42 | 43 | VertexOut FullscreenVS(uint vertexId : SV_VertexID) 44 | { 45 | VertexOut outVert; 46 | outVert.PosOut = FullScreenVertsPos[vertexId]; 47 | outVert.UVOut = FullScreenVertsUVs[vertexId]; 48 | return outVert; 49 | } 50 | 51 | float4 RenderSortValidationPS(VertexOut vertexIn) : SV_Target 52 | { 53 | float4 outColor = float4(0, 0, 0, 0); 54 | 55 | // When calculating the coordinates to use to lookup sort data, 56 | // always aim to keep the results centered on screen (to account for users 57 | // resizing the window, or dealing with sort size bigger than our current window) 58 | int2 uvCoord = vertexIn.UVOut * int2(CB_Width, CB_Height); 59 | 60 | // xRes > sort width 61 | int xStart, yStart; 62 | xStart = (CB_Width - CB_SortWidth) / 2; // Will be positive when screen width is larger than our key source, and negative when smaller 63 | yStart = (CB_Height - CB_SortHeight) / 2; // Will be positive when screen height is larger than our key source, and negative when smaller 64 | 65 | int2 lookupCoord = uvCoord.xy - int2(xStart, yStart); 66 | 67 | if (lookupCoord.x >= 0 && lookupCoord.y >= 0 && lookupCoord.x < CB_SortWidth && lookupCoord.y < CB_SortHeight) 68 | { 69 | int value = SortBuffer[lookupCoord.y * CB_SortWidth + lookupCoord.x]; 70 | 71 | int height = value / CB_SortWidth; 72 | int2 uv = int2(value - (height * CB_SortWidth), height); 73 | outColor = ValidationTexture[uv]; 74 | } 75 | 76 | return pow(outColor, 2.2); 77 | } -------------------------------------------------------------------------------- /sample/src/DX12/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project (${PROJECT_NAME}) 2 | 3 | include(${CMAKE_CURRENT_SOURCE_DIR}/../../common.cmake) 4 | 5 | add_compile_options(/MP) 6 | add_compile_definitions(FFX_CPP) 7 | 8 | set(sources 9 | sample.cpp 10 | sample.h 11 | stdafx.cpp 12 | stdafx.h 13 | Renderer.cpp 14 | Renderer.h 15 | UI.cpp 16 | UI.h 17 | ParallelSort.cpp 18 | ParallelSort.h 19 | dpiawarescaling.manifest) 20 | 21 | set(shader_sources 22 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/shaders/ParallelSortCS.hlsl 23 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/shaders/ParallelSortVerify.hlsl) 24 | 25 | set(fidelityfx_sources 26 | ${CMAKE_CURRENT_SOURCE_DIR}/../../../FFX-ParallelSort/FFX_ParallelSort.h) 27 | 28 | set(common_sources 29 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/FFXParallelSort.json 30 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/Validate4K.png 31 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/Validate2K.png 32 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/Validate1080p.png) 33 | 34 | copyCommand("${shader_sources}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX) 35 | copyCommand("${fidelityfx_sources}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX/FFX-ParallelSort) 36 | copyCommand("${common_sources}" ${CMAKE_HOME_DIRECTORY}/bin) 37 | 38 | source_group("Common" FILES ${common_sources}) 39 | source_group("Shaders" FILES ${shader_sources}) 40 | source_group("FidelityFX" FILES ${fidelityfx_sources}) 41 | source_group("Sources" FILES ${sources}) 42 | source_group("Icon" FILES ${icon_src}) # defined in top-level CMakeLists.txt 43 | 44 | # prevent VS from processing/compiling these files 45 | set_source_files_properties(${Shaders_src} PROPERTIES VS_TOOL_OVERRIDE "Text") 46 | 47 | add_executable(${PROJECT_NAME} WIN32 ${common_sources} ${shader_sources} ${sources} ${fidelityfx_sources} ${icon_src}) 48 | target_link_libraries(${PROJECT_NAME} LINK_PUBLIC Cauldron_DX12 ImGUI amd_ags DXC d3dcompiler D3D12 DXGI) 49 | set_target_properties(${PROJECT_NAME} PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_HOME_DIRECTORY}/bin" DEBUG_POSTFIX "d") 50 | 51 | addManifest(${PROJECT_NAME}) 52 | -------------------------------------------------------------------------------- /sample/src/DX12/ParallelSort.cpp: -------------------------------------------------------------------------------- 1 | // ParallelSort.cpp 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #include "stdafx.h" 21 | #include "../../../FFX-ParallelSort/FFX_ParallelSort.h" 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | static const uint32_t NumKeys[] = { 1920 * 1080, 2560 * 1440, 3840 * 2160 }; 28 | 29 | ////////////////////////////////////////////////////////////////////////// 30 | 31 | ////////////////////////////////////////////////////////////////////////// 32 | // For doing command-line based benchmark runs 33 | int FFXParallelSort::KeySetOverride = -1; 34 | void FFXParallelSort::OverrideKeySet(int ResolutionOverride) 35 | { 36 | KeySetOverride = ResolutionOverride; 37 | } 38 | bool FFXParallelSort::PayloadOverride = false; 39 | void FFXParallelSort::OverridePayload() 40 | { 41 | PayloadOverride = true; 42 | } 43 | ////////////////////////////////////////////////////////////////////////// 44 | 45 | // Create all of the sort data for the sample 46 | void FFXParallelSort::CreateKeyPayloadBuffers() 47 | { 48 | std::vector KeyData1080(NumKeys[0]); 49 | std::vector KeyData2K(NumKeys[1]); 50 | std::vector KeyData4K(NumKeys[2]); 51 | 52 | // Populate the buffers with linear access index 53 | std::iota(KeyData1080.begin(), KeyData1080.end(), 0); 54 | std::iota(KeyData2K.begin(), KeyData2K.end(), 0); 55 | std::iota(KeyData4K.begin(), KeyData4K.end(), 0); 56 | 57 | // Shuffle the data 58 | std::shuffle(KeyData1080.begin(), KeyData1080.end(), std::mt19937{ std::random_device{}() }); 59 | std::shuffle(KeyData2K.begin(), KeyData2K.end(), std::mt19937{ std::random_device{}() }); 60 | std::shuffle(KeyData4K.begin(), KeyData4K.end(), std::mt19937{ std::random_device{}() }); 61 | 62 | // 1080p 63 | CD3DX12_RESOURCE_DESC ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t) * NumKeys[0], D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 64 | m_SrcKeyBuffers[0].InitBuffer(m_pDevice, "SrcKeys1080", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COPY_DEST); 65 | // 2K 66 | ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t) * NumKeys[1], D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 67 | m_SrcKeyBuffers[1].InitBuffer(m_pDevice, "SrcKeys2K", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COPY_DEST); 68 | // 4K 69 | ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t) * NumKeys[2], D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 70 | m_SrcKeyBuffers[2].InitBuffer(m_pDevice, "SrcKeys4K", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COPY_DEST); 71 | m_SrcPayloadBuffers.InitBuffer(m_pDevice, "SrcPayloadBuffer", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COPY_DEST); 72 | 73 | // The DstKey and DstPayload buffers will be used as src/dst when sorting. A copy of the 74 | // source key/payload will be copied into them before hand so we can keep our original values 75 | ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t) * NumKeys[2], D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 76 | m_DstKeyBuffers[0].InitBuffer(m_pDevice, "DstKeyBuf0", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 77 | m_DstKeyBuffers[1].InitBuffer(m_pDevice, "DstKeyBuf1", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 78 | m_DstPayloadBuffers[0].InitBuffer(m_pDevice, "DstPayloadBuf0", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 79 | m_DstPayloadBuffers[1].InitBuffer(m_pDevice, "DstPayloadBuf1", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 80 | 81 | // Copy data in 82 | 83 | // 1080 84 | uint8_t* pKeyDataBuffer = m_pUploadHeap->Suballocate(NumKeys[0] * sizeof(uint32_t), sizeof(uint32_t)); 85 | memcpy(pKeyDataBuffer, KeyData1080.data() , sizeof(uint32_t) * NumKeys[0]); 86 | m_pUploadHeap->GetCommandList()->CopyBufferRegion(m_SrcKeyBuffers[0].GetResource(), 0, m_pUploadHeap->GetResource(), pKeyDataBuffer - m_pUploadHeap->BasePtr(), sizeof(uint32_t) * NumKeys[0]); 87 | 88 | // 2K 89 | pKeyDataBuffer = m_pUploadHeap->Suballocate(NumKeys[1] * sizeof(uint32_t), sizeof(uint32_t)); 90 | memcpy(pKeyDataBuffer, KeyData2K.data(), sizeof(uint32_t) * NumKeys[1]); 91 | m_pUploadHeap->GetCommandList()->CopyBufferRegion(m_SrcKeyBuffers[1].GetResource(), 0, m_pUploadHeap->GetResource(), pKeyDataBuffer - m_pUploadHeap->BasePtr(), sizeof(uint32_t) * NumKeys[1]); 92 | 93 | // 4K 94 | pKeyDataBuffer = m_pUploadHeap->Suballocate(NumKeys[2] * sizeof(uint32_t), sizeof(uint32_t)); 95 | memcpy(pKeyDataBuffer, KeyData4K.data(), sizeof(uint32_t) * NumKeys[2]); 96 | m_pUploadHeap->GetCommandList()->CopyBufferRegion(m_SrcKeyBuffers[2].GetResource(), 0, m_pUploadHeap->GetResource(), pKeyDataBuffer - m_pUploadHeap->BasePtr(), sizeof(uint32_t) * NumKeys[2]); 97 | uint8_t* pPayloadDataBuffer = m_pUploadHeap->Suballocate(NumKeys[2] * sizeof(uint32_t), sizeof(uint32_t)); 98 | memcpy(pPayloadDataBuffer, KeyData4K.data(), sizeof(uint32_t) * NumKeys[2]); // Copy the 4k source data for payload (it doesn't matter what the payload is as we really only want it to measure cost of copying/sorting) 99 | m_pUploadHeap->GetCommandList()->CopyBufferRegion(m_SrcPayloadBuffers.GetResource(), 0, m_pUploadHeap->GetResource(), pPayloadDataBuffer - m_pUploadHeap->BasePtr(), sizeof(uint32_t) * NumKeys[2]); 100 | 101 | 102 | // Once we are done copying the data, put in barriers to transition the source resources to 103 | // copy source (which is what they will stay for the duration of app runtime) 104 | CD3DX12_RESOURCE_BARRIER Barriers[6] = { CD3DX12_RESOURCE_BARRIER::Transition(m_SrcKeyBuffers[2].GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE), 105 | CD3DX12_RESOURCE_BARRIER::Transition(m_SrcPayloadBuffers.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE), 106 | CD3DX12_RESOURCE_BARRIER::Transition(m_SrcKeyBuffers[1].GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE), 107 | CD3DX12_RESOURCE_BARRIER::Transition(m_SrcKeyBuffers[0].GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE), 108 | 109 | // Copy the data into the dst[0] buffers for use on first frame 110 | CD3DX12_RESOURCE_BARRIER::Transition(m_DstKeyBuffers[0].GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST), 111 | CD3DX12_RESOURCE_BARRIER::Transition(m_DstPayloadBuffers[0].GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST) }; 112 | m_pUploadHeap->GetCommandList()->ResourceBarrier(6, Barriers); 113 | 114 | m_pUploadHeap->GetCommandList()->CopyBufferRegion(m_DstKeyBuffers[0].GetResource(), 0, m_SrcKeyBuffers[m_UIResolutionSize].GetResource(), 0, sizeof(uint32_t) * NumKeys[m_UIResolutionSize]); 115 | m_pUploadHeap->GetCommandList()->CopyBufferRegion(m_DstPayloadBuffers[0].GetResource(), 0, m_SrcPayloadBuffers.GetResource(), 0, sizeof(uint32_t) * NumKeys[m_UIResolutionSize]); 116 | 117 | // Put the dst buffers back to UAVs for sort usage 118 | Barriers[0] = CD3DX12_RESOURCE_BARRIER::Transition(m_DstKeyBuffers[0].GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 119 | Barriers[1] = CD3DX12_RESOURCE_BARRIER::Transition(m_DstPayloadBuffers[0].GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 120 | m_pUploadHeap->GetCommandList()->ResourceBarrier(2, Barriers); 121 | 122 | // Create UAVs 123 | m_SrcKeyBuffers[2].CreateBufferUAV(2, nullptr, &m_SrcKeyUAVTable); 124 | m_SrcKeyBuffers[1].CreateBufferUAV(1, nullptr, &m_SrcKeyUAVTable); 125 | m_SrcKeyBuffers[0].CreateBufferUAV(0, nullptr, &m_SrcKeyUAVTable); 126 | m_SrcPayloadBuffers.CreateBufferUAV(0, nullptr, &m_SrcPayloadUAV); 127 | m_DstKeyBuffers[0].CreateBufferUAV(0, nullptr, &m_DstKeyUAVTable); 128 | m_DstKeyBuffers[1].CreateBufferUAV(1, nullptr, &m_DstKeyUAVTable); 129 | m_DstPayloadBuffers[0].CreateBufferUAV(0, nullptr, &m_DstPayloadUAVTable); 130 | m_DstPayloadBuffers[1].CreateBufferUAV(1, nullptr, &m_DstPayloadUAVTable); 131 | } 132 | 133 | // Compile specified radix sort shader and create pipeline 134 | void FFXParallelSort::CompileRadixPipeline(const char* shaderFile, const DefineList* defines, const char* entryPoint, ID3D12PipelineState*& pPipeline) 135 | { 136 | std::string CompileFlags("-T cs_6_0"); 137 | #ifdef _DEBUG 138 | CompileFlags += " -Zi -Od"; 139 | #endif // _DEBUG 140 | 141 | D3D12_SHADER_BYTECODE shaderByteCode = {}; 142 | CompileShaderFromFile(shaderFile, defines, entryPoint, CompileFlags.c_str(), &shaderByteCode); 143 | 144 | D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; 145 | descPso.CS = shaderByteCode; 146 | descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; 147 | descPso.pRootSignature = m_pFPSRootSignature; 148 | descPso.NodeMask = 0; 149 | 150 | ThrowIfFailed(m_pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&pPipeline))); 151 | SetName(pPipeline, entryPoint); 152 | } 153 | 154 | // Parallel Sort initialization 155 | void FFXParallelSort::OnCreate(Device* pDevice, ResourceViewHeaps* pResourceViewHeaps, DynamicBufferRing* pConstantBufferRing, UploadHeap* pUploadHeap, SwapChain* pSwapChain) 156 | { 157 | m_pDevice = pDevice; 158 | m_pUploadHeap = pUploadHeap; 159 | m_pResourceViewHeaps = pResourceViewHeaps; 160 | m_pConstantBufferRing = pConstantBufferRing; 161 | m_MaxNumThreadgroups = 800; 162 | 163 | // Overrides for testing 164 | if (KeySetOverride >= 0) 165 | m_UIResolutionSize = KeySetOverride; 166 | if (PayloadOverride) 167 | m_UISortPayload = true; 168 | 169 | // Allocate UAVs to use for data 170 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(3, &m_SrcKeyUAVTable); 171 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_SrcPayloadUAV); 172 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(2, &m_DstKeyUAVTable); 173 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(2, &m_DstPayloadUAVTable); 174 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_FPSScratchUAV); 175 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_FPSReducedScratchUAV); 176 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_IndirectKeyCountsUAV); 177 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_IndirectConstantBufferUAV); 178 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_IndirectCountScatterArgsUAV); 179 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_IndirectReduceScanArgsUAV); 180 | m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(3, &m_ValidateTextureSRV); 181 | 182 | // Create resources to test with. Sorts will be done for 1080p, 2K, and 4K resolution data sets 183 | CreateKeyPayloadBuffers(); 184 | 185 | // We are just going to fudge the indirect execution parameters for each resolution 186 | CD3DX12_RESOURCE_DESC ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t) * 3, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 187 | m_IndirectKeyCounts.InitBuffer(m_pDevice, "IndirectKeyCounts", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_COPY_DEST); 188 | m_IndirectKeyCounts.CreateBufferUAV(0, nullptr, &m_IndirectKeyCountsUAV); 189 | uint8_t* pNumKeysBuffer = m_pUploadHeap->Suballocate(sizeof(uint32_t) * 3, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT); 190 | memcpy(pNumKeysBuffer, NumKeys, sizeof(uint32_t) * 3); 191 | m_pUploadHeap->GetCommandList()->CopyBufferRegion(m_IndirectKeyCounts.GetResource(), 0, m_pUploadHeap->GetResource(), pNumKeysBuffer - m_pUploadHeap->BasePtr(), sizeof(uint32_t) * 3); 192 | CD3DX12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectKeyCounts.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 193 | m_pUploadHeap->GetCommandList()->ResourceBarrier(1, &Barrier); 194 | 195 | // Create resources for sort validation (image that goes from shuffled to sorted) 196 | m_Validate1080pTexture.InitFromFile(m_pDevice, m_pUploadHeap, "Validate1080p.png", false, 1.f, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS ); 197 | m_Validate1080pTexture.CreateSRV(0, &m_ValidateTextureSRV, 0); 198 | m_Validate2KTexture.InitFromFile(m_pDevice, m_pUploadHeap, "Validate2K.png", false, 1.f, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 199 | m_Validate2KTexture.CreateSRV(1, &m_ValidateTextureSRV, 0); 200 | m_Validate4KTexture.InitFromFile(m_pDevice, m_pUploadHeap, "Validate4K.png", false, 1.f, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 201 | m_Validate4KTexture.CreateSRV(2, &m_ValidateTextureSRV, 0); 202 | 203 | // Finish up 204 | m_pUploadHeap->FlushAndFinish(); 205 | 206 | // Allocate the scratch buffers needed for radix sort 207 | uint32_t scratchBufferSize; 208 | uint32_t reducedScratchBufferSize; 209 | FFX_ParallelSort_CalculateScratchResourceSize(NumKeys[2], scratchBufferSize, reducedScratchBufferSize); 210 | 211 | ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(scratchBufferSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 212 | m_FPSScratchBuffer.InitBuffer(m_pDevice, "Scratch", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 213 | m_FPSScratchBuffer.CreateBufferUAV(0, nullptr, &m_FPSScratchUAV); 214 | 215 | ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(reducedScratchBufferSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 216 | m_FPSReducedScratchBuffer.InitBuffer(m_pDevice, "ReducedScratch", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 217 | m_FPSReducedScratchBuffer.CreateBufferUAV(0, nullptr, &m_FPSReducedScratchUAV); 218 | 219 | // Allocate the buffers for indirect execution of the algorithm 220 | ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(FFX_ParallelSortCB), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 221 | m_IndirectConstantBuffer.InitBuffer(m_pDevice, "IndirectConstantBuffer", &ResourceDesc, sizeof(FFX_ParallelSortCB), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 222 | m_IndirectConstantBuffer.CreateBufferUAV(0, nullptr, &m_IndirectConstantBufferUAV); 223 | 224 | ResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t) * 3, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); 225 | m_IndirectCountScatterArgs.InitBuffer(m_pDevice, "IndirectCount_Scatter_DispatchArgs", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 226 | m_IndirectCountScatterArgs.CreateBufferUAV(0, nullptr, &m_IndirectCountScatterArgsUAV); 227 | m_IndirectReduceScanArgs.InitBuffer(m_pDevice, "IndirectReduceScanArgs", &ResourceDesc, sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 228 | m_IndirectReduceScanArgs.CreateBufferUAV(0, nullptr, &m_IndirectReduceScanArgsUAV); 229 | 230 | // Create root signature for Radix sort passes 231 | { 232 | D3D12_DESCRIPTOR_RANGE descRange[15]; 233 | D3D12_ROOT_PARAMETER rootParams[16]; 234 | 235 | // Constant buffer table (always have 1) 236 | descRange[0] = { D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 237 | rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 238 | rootParams[0].Descriptor = { descRange[0].BaseShaderRegister, descRange[0].RegisterSpace }; 239 | 240 | // Constant buffer to setup indirect params (indirect) 241 | descRange[1] = { D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 1, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 242 | rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 243 | rootParams[1].Descriptor = { descRange[1].BaseShaderRegister, descRange[1].RegisterSpace }; 244 | 245 | rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 246 | rootParams[2].Constants = { 2, 0, 1 }; 247 | 248 | // SrcBuffer (sort or scan) 249 | descRange[2] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 250 | rootParams[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 251 | rootParams[3].DescriptorTable = { 1, &descRange[2] }; 252 | 253 | // ScrPayload (sort only) 254 | descRange[3] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 1, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 255 | rootParams[4].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[4].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 256 | rootParams[4].DescriptorTable = { 1, &descRange[3] }; 257 | 258 | // Scratch (sort only) 259 | descRange[4] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 2, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 260 | rootParams[5].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[5].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 261 | rootParams[5].DescriptorTable = { 1, &descRange[4] }; 262 | 263 | // Scratch (reduced) 264 | descRange[5] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 3, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 265 | rootParams[6].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[6].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 266 | rootParams[6].DescriptorTable = { 1, &descRange[5] }; 267 | 268 | // DstBuffer (sort or scan) 269 | descRange[6] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 4, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 270 | rootParams[7].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[7].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 271 | rootParams[7].DescriptorTable = { 1, &descRange[6] }; 272 | 273 | // DstPayload (sort only) 274 | descRange[7] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 5, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 275 | rootParams[8].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[8].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 276 | rootParams[8].DescriptorTable = { 1, &descRange[7] }; 277 | 278 | // ScanSrc 279 | descRange[8] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 6, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 280 | rootParams[9].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[9].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 281 | rootParams[9].DescriptorTable = { 1, &descRange[8] }; 282 | 283 | // ScanDst 284 | descRange[9] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 7, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 285 | rootParams[10].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[10].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 286 | rootParams[10].DescriptorTable = { 1, &descRange[9] }; 287 | 288 | // ScanScratch 289 | descRange[10] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 8, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 290 | rootParams[11].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[11].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 291 | rootParams[11].DescriptorTable = { 1, &descRange[10] }; 292 | 293 | // NumKeys (indirect) 294 | descRange[11] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 9, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 295 | rootParams[12].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[12].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 296 | rootParams[12].DescriptorTable = { 1, &descRange[11] }; 297 | 298 | // CBufferUAV (indirect) 299 | descRange[12] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 10, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 300 | rootParams[13].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[13].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 301 | rootParams[13].DescriptorTable = { 1, &descRange[12] }; 302 | 303 | // CountScatterArgs (indirect) 304 | descRange[13] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 11, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 305 | rootParams[14].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[14].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 306 | rootParams[14].DescriptorTable = { 1, &descRange[13] }; 307 | 308 | // ReduceScanArgs (indirect) 309 | descRange[14] = { D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 12, D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND }; 310 | rootParams[15].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; rootParams[15].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; 311 | rootParams[15].DescriptorTable = { 1, &descRange[14] }; 312 | 313 | D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {}; 314 | rootSigDesc.NumParameters = 16; 315 | rootSigDesc.pParameters = rootParams; 316 | rootSigDesc.NumStaticSamplers = 0; 317 | rootSigDesc.pStaticSamplers = nullptr; 318 | rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; 319 | 320 | ID3DBlob* pOutBlob, * pErrorBlob = nullptr; 321 | ThrowIfFailed(D3D12SerializeRootSignature(&rootSigDesc, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); 322 | ThrowIfFailed(pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&m_pFPSRootSignature))); 323 | SetName(m_pFPSRootSignature, "FPS_Signature"); 324 | 325 | pOutBlob->Release(); 326 | if (pErrorBlob) 327 | pErrorBlob->Release(); 328 | 329 | // Also create the command signature for the indirect version 330 | D3D12_INDIRECT_ARGUMENT_DESC dispatch = {}; 331 | dispatch.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH; 332 | D3D12_COMMAND_SIGNATURE_DESC desc = {}; 333 | desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS); 334 | desc.NodeMask = 0; 335 | desc.NumArgumentDescs = 1; 336 | desc.pArgumentDescs = &dispatch; 337 | 338 | ThrowIfFailed(pDevice->GetDevice()->CreateCommandSignature(&desc, nullptr, IID_PPV_ARGS(&m_pFPSCommandSignature))); 339 | m_pFPSCommandSignature->SetName(L"FPS_CommandSignature"); 340 | } 341 | 342 | // Create root signature for Render of RadixBuffer info 343 | { 344 | CD3DX12_DESCRIPTOR_RANGE DescRange[3]; 345 | CD3DX12_ROOT_PARAMETER RTSlot[3]; 346 | 347 | // Constant buffer 348 | DescRange[0].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0, 0); 349 | RTSlot[0].InitAsConstantBufferView(0, 0, D3D12_SHADER_VISIBILITY_ALL); 350 | 351 | // UAV for RadixBufer 352 | DescRange[1].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0); 353 | RTSlot[1].InitAsDescriptorTable(1, &DescRange[1], D3D12_SHADER_VISIBILITY_ALL); 354 | 355 | // SRV for Validation texture 356 | DescRange[2].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0, 0); 357 | RTSlot[2].InitAsDescriptorTable(1, &DescRange[2], D3D12_SHADER_VISIBILITY_ALL); 358 | 359 | CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); 360 | descRootSignature.NumParameters = 3; 361 | descRootSignature.pParameters = RTSlot; 362 | descRootSignature.NumStaticSamplers = 0; 363 | descRootSignature.pStaticSamplers = nullptr; 364 | descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; 365 | 366 | ID3DBlob* pOutBlob, * pErrorBlob = nullptr; 367 | ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); 368 | ThrowIfFailed(pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&m_pRenderRootSignature))); 369 | SetName(m_pRenderRootSignature, "FPS_RenderResults_Signature"); 370 | 371 | pOutBlob->Release(); 372 | if (pErrorBlob) 373 | pErrorBlob->Release(); 374 | } 375 | 376 | ////////////////////////////////////////////////////////////////////////// 377 | // Create pipelines for radix sort 378 | { 379 | // Create all of the necessary pipelines for Sort and Scan 380 | 381 | // SetupIndirectParams (indirect only) 382 | CompileRadixPipeline("ParallelSortCS.hlsl", nullptr, "FPS_SetupIndirectParameters", m_pFPSIndirectSetupParametersPipeline); 383 | 384 | // Radix count (sum table generation) 385 | CompileRadixPipeline("ParallelSortCS.hlsl", nullptr, "FPS_Count", m_pFPSCountPipeline); 386 | // Radix count reduce (sum table reduction for offset prescan) 387 | CompileRadixPipeline("ParallelSortCS.hlsl", nullptr, "FPS_CountReduce", m_pFPSCountReducePipeline); 388 | // Radix scan (prefix scan) 389 | CompileRadixPipeline("ParallelSortCS.hlsl", nullptr, "FPS_Scan", m_pFPSScanPipeline); 390 | // Radix scan add (prefix scan + reduced prefix scan addition) 391 | CompileRadixPipeline("ParallelSortCS.hlsl", nullptr, "FPS_ScanAdd", m_pFPSScanAddPipeline); 392 | // Radix scatter (key redistribution) 393 | CompileRadixPipeline("ParallelSortCS.hlsl", nullptr, "FPS_Scatter", m_pFPSScatterPipeline); 394 | 395 | // Radix scatter with payload (key and payload redistribution) 396 | DefineList defines; 397 | defines["kRS_ValueCopy"] = std::to_string(1); 398 | CompileRadixPipeline("ParallelSortCS.hlsl", &defines, "FPS_Scatter", m_pFPSScatterPayloadPipeline); 399 | } 400 | 401 | ////////////////////////////////////////////////////////////////////////// 402 | // Create pipelines for render pass 403 | { 404 | #ifdef _DEBUG 405 | std::string CompileFlagsVS("-T vs_6_0 -Zi -Od"); 406 | std::string CompileFlagsPS("-T ps_6_0 -Zi -Od"); 407 | #else 408 | std::string CompileFlagsVS("-T vs_6_0"); 409 | std::string CompileFlagsPS("-T ps_6_0"); 410 | #endif // _DEBUG 411 | 412 | D3D12_SHADER_BYTECODE shaderByteCodeVS = {}; 413 | CompileShaderFromFile("ParallelSortVerify.hlsl", nullptr, "FullscreenVS", CompileFlagsVS.c_str(), &shaderByteCodeVS); 414 | 415 | D3D12_SHADER_BYTECODE shaderByteCodePS = {}; 416 | CompileShaderFromFile("ParallelSortVerify.hlsl", nullptr, "RenderSortValidationPS", CompileFlagsPS.c_str(), &shaderByteCodePS); 417 | 418 | D3D12_GRAPHICS_PIPELINE_STATE_DESC descPso = {}; 419 | descPso.InputLayout = { nullptr, 0 }; 420 | descPso.pRootSignature = m_pRenderRootSignature; 421 | descPso.VS = shaderByteCodeVS; 422 | descPso.PS = shaderByteCodePS; 423 | descPso.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); 424 | descPso.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; 425 | descPso.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); 426 | descPso.BlendState.RenderTarget[0].BlendEnable = FALSE; 427 | descPso.DepthStencilState = CD3DX12_DEPTH_STENCIL_DESC(D3D12_DEFAULT); 428 | descPso.DepthStencilState.DepthEnable = FALSE; 429 | descPso.SampleMask = UINT_MAX; 430 | descPso.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; 431 | descPso.NumRenderTargets = 1; 432 | descPso.RTVFormats[0] = pSwapChain->GetFormat(); 433 | descPso.DSVFormat = DXGI_FORMAT_D32_FLOAT; 434 | descPso.SampleDesc.Count = 1; 435 | descPso.NodeMask = 0; 436 | ThrowIfFailed(m_pDevice->GetDevice()->CreateGraphicsPipelineState(&descPso, IID_PPV_ARGS(&m_pRenderResultVerificationPipeline))); 437 | SetName(m_pRenderResultVerificationPipeline, "RenderFPSResults_Pipeline"); 438 | } 439 | } 440 | 441 | // Parallel Sort termination 442 | void FFXParallelSort::OnDestroy() 443 | { 444 | // Release verification render resources 445 | m_pRenderResultVerificationPipeline->Release(); 446 | m_pRenderRootSignature->Release(); 447 | m_Validate4KTexture.OnDestroy(); 448 | m_Validate2KTexture.OnDestroy(); 449 | m_Validate1080pTexture.OnDestroy(); 450 | 451 | // Release radix sort indirect resources 452 | m_IndirectKeyCounts.OnDestroy(); 453 | m_IndirectConstantBuffer.OnDestroy(); 454 | m_IndirectCountScatterArgs.OnDestroy(); 455 | m_IndirectReduceScanArgs.OnDestroy(); 456 | m_pFPSCommandSignature->Release(); 457 | m_pFPSIndirectSetupParametersPipeline->Release(); 458 | 459 | // Release radix sort algorithm resources 460 | m_FPSScratchBuffer.OnDestroy(); 461 | m_FPSReducedScratchBuffer.OnDestroy(); 462 | m_pFPSRootSignature->Release(); 463 | m_pFPSCountPipeline->Release(); 464 | m_pFPSCountReducePipeline->Release(); 465 | m_pFPSScanPipeline->Release(); 466 | m_pFPSScanAddPipeline->Release(); 467 | m_pFPSScatterPipeline->Release(); 468 | m_pFPSScatterPayloadPipeline->Release(); 469 | 470 | // Release all of our resources 471 | m_SrcKeyBuffers[0].OnDestroy(); 472 | m_SrcKeyBuffers[1].OnDestroy(); 473 | m_SrcKeyBuffers[2].OnDestroy(); 474 | m_SrcPayloadBuffers.OnDestroy(); 475 | m_DstKeyBuffers[0].OnDestroy(); 476 | m_DstKeyBuffers[1].OnDestroy(); 477 | m_DstPayloadBuffers[0].OnDestroy(); 478 | m_DstPayloadBuffers[1].OnDestroy(); 479 | } 480 | 481 | // This allows us to validate that the sorted data is actually in ascending order. Only used when doing algorithm changes. 482 | #ifdef DEVELOPERMODE 483 | void FFXParallelSort::CreateValidationResources(ID3D12GraphicsCommandList* pCommandList, RdxDX12ResourceInfo* pKeyDstInfo) 484 | { 485 | // Create the read-back resource 486 | CD3DX12_HEAP_PROPERTIES readBackHeapProperties(D3D12_HEAP_TYPE_READBACK); 487 | CD3DX12_RESOURCE_DESC bufferDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t) * NumKeys[m_UIResolutionSize], D3D12_RESOURCE_FLAG_NONE); 488 | ThrowIfFailed(m_pDevice->GetDevice()->CreateCommittedResource(&readBackHeapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, 489 | nullptr, IID_PPV_ARGS(&m_ReadBackBufferResource))); 490 | m_ReadBackBufferResource->SetName(L"Validation Read-back Buffer"); 491 | 492 | // And the fence for us to wait on 493 | ThrowIfFailed(m_pDevice->GetDevice()->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_ReadBackFence))); 494 | m_ReadBackFence->SetName(L"Validation Read-back Fence"); 495 | 496 | // Transition, copy, and transition back 497 | pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(pKeyDstInfo->pResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE)); 498 | pCommandList->CopyBufferRegion(m_ReadBackBufferResource, 0, pKeyDstInfo->pResource, 0, sizeof(uint32_t) * NumKeys[m_UIResolutionSize]); 499 | pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(pKeyDstInfo->pResource, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)); 500 | } 501 | 502 | void FFXParallelSort::WaitForValidationResults() 503 | { 504 | if (!m_ReadBackFence && !m_ReadBackBufferResource) 505 | return; 506 | 507 | // Insert the fence to wait on and create the event to trigger when it's been processed 508 | ThrowIfFailed(m_pDevice->GetGraphicsQueue()->Signal(m_ReadBackFence, 1)); 509 | m_ReadBackFenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); 510 | m_ReadBackFence->SetEventOnCompletion(1, m_ReadBackFenceEvent); 511 | 512 | // Wait for fence to have been processed 513 | WaitForSingleObject(m_ReadBackFenceEvent, INFINITE); 514 | CloseHandle(m_ReadBackFenceEvent); 515 | 516 | // Validate data ... 517 | Trace("Validating Data"); 518 | 519 | D3D12_RANGE range; 520 | range.Begin = 0; 521 | range.End = sizeof(uint32_t) * NumKeys[m_UIResolutionSize]; 522 | void* pData; 523 | m_ReadBackBufferResource->Map(0, &range, &pData); 524 | 525 | uint32_t* SortedData = (uint32_t*)pData; 526 | 527 | // Do the validation 528 | uint32_t keysToValidate = NumKeys[m_UIResolutionSize]; 529 | bool dataValid = true; 530 | 531 | for (uint32_t i = 0; i < keysToValidate - 1; i++) 532 | { 533 | if (SortedData[i] > SortedData[i + 1]) 534 | { 535 | std::string message = "Sort invalidated. Entry "; 536 | message += std::to_string(i); 537 | message += " is larger next entry.\n"; 538 | Trace(message); 539 | dataValid = false; 540 | } 541 | } 542 | 543 | m_ReadBackBufferResource->Unmap(0, nullptr); 544 | 545 | if (dataValid) 546 | Trace("Data Valid"); 547 | 548 | // We are done with the fence and the read-back buffer 549 | m_ReadBackBufferResource->Release(); 550 | m_ReadBackBufferResource = nullptr; 551 | m_ReadBackFence->Release(); 552 | m_ReadBackFence = nullptr; 553 | } 554 | #endif // DEVELOPERMODE 555 | 556 | // Because we are sorting the data every frame, need to reset to unsorted version of data before running sort 557 | void FFXParallelSort::CopySourceDataForFrame(ID3D12GraphicsCommandList* pCommandList) 558 | { 559 | // Copy the contents the source buffer to the dstBuffer[0] each frame in order to not 560 | // lose our original data 561 | 562 | // Copy the data into the dst[0] buffers for use on first frame 563 | CD3DX12_RESOURCE_BARRIER Barriers[2] = { CD3DX12_RESOURCE_BARRIER::Transition(m_DstKeyBuffers[0].GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST), 564 | CD3DX12_RESOURCE_BARRIER::Transition(m_DstPayloadBuffers[0].GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST) }; 565 | pCommandList->ResourceBarrier(2, Barriers); 566 | 567 | pCommandList->CopyBufferRegion(m_DstKeyBuffers[0].GetResource(), 0, m_SrcKeyBuffers[m_UIResolutionSize].GetResource(), 0, sizeof(uint32_t) * NumKeys[m_UIResolutionSize]); 568 | pCommandList->CopyBufferRegion(m_DstPayloadBuffers[0].GetResource(), 0, m_SrcPayloadBuffers.GetResource(), 0, sizeof(uint32_t) * NumKeys[m_UIResolutionSize]); 569 | 570 | // Put the dst buffers back to UAVs for sort usage 571 | Barriers[0] = CD3DX12_RESOURCE_BARRIER::Transition(m_DstKeyBuffers[0].GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 572 | Barriers[1] = CD3DX12_RESOURCE_BARRIER::Transition(m_DstPayloadBuffers[0].GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 573 | pCommandList->ResourceBarrier(2, Barriers); 574 | } 575 | 576 | // Perform Parallel Sort (radix-based sort) 577 | void FFXParallelSort::Sort(ID3D12GraphicsCommandList* pCommandList, bool isBenchmarking, float benchmarkTime) 578 | { 579 | bool bIndirectDispatch = m_UIIndirectSort; 580 | 581 | std::string markerText = "FFXParallelSort"; 582 | if (bIndirectDispatch) markerText += " Indirect"; 583 | UserMarker marker(pCommandList, markerText.c_str()); 584 | 585 | FFX_ParallelSortCB constantBufferData = { 0 }; 586 | 587 | // Bind the descriptor heaps 588 | ID3D12DescriptorHeap* pDescriptorHeap = m_pResourceViewHeaps->GetCBV_SRV_UAVHeap(); 589 | pCommandList->SetDescriptorHeaps(1, &pDescriptorHeap); 590 | 591 | // Bind the root signature 592 | pCommandList->SetComputeRootSignature(m_pFPSRootSignature); 593 | 594 | // Fill in the constant buffer data structure (this will be done by a shader in the indirect version) 595 | uint32_t NumThreadgroupsToRun; 596 | uint32_t NumReducedThreadgroupsToRun; 597 | if (!bIndirectDispatch) 598 | { 599 | uint32_t NumberOfKeys = NumKeys[m_UIResolutionSize]; 600 | FFX_ParallelSort_SetConstantAndDispatchData(NumberOfKeys, m_MaxNumThreadgroups, constantBufferData, NumThreadgroupsToRun, NumReducedThreadgroupsToRun); 601 | } 602 | else 603 | { 604 | struct SetupIndirectCB 605 | { 606 | uint32_t NumKeysIndex; 607 | uint32_t MaxThreadGroups; 608 | }; 609 | SetupIndirectCB IndirectSetupCB; 610 | IndirectSetupCB.NumKeysIndex = m_UIResolutionSize; 611 | IndirectSetupCB.MaxThreadGroups = m_MaxNumThreadgroups; 612 | 613 | // Copy the data into the constant buffer 614 | D3D12_GPU_VIRTUAL_ADDRESS constantBuffer = m_pConstantBufferRing->AllocConstantBuffer(sizeof(SetupIndirectCB), &IndirectSetupCB); 615 | pCommandList->SetComputeRootConstantBufferView(1, constantBuffer); // SetupIndirect Constant buffer 616 | 617 | // Bind other buffer 618 | pCommandList->SetComputeRootDescriptorTable(12, m_IndirectKeyCountsUAV.GetGPU()); // Key counts 619 | pCommandList->SetComputeRootDescriptorTable(13, m_IndirectConstantBufferUAV.GetGPU()); // Indirect Sort Constant Buffer 620 | pCommandList->SetComputeRootDescriptorTable(14, m_IndirectCountScatterArgsUAV.GetGPU()); // Indirect Sort Count/Scatter Args 621 | pCommandList->SetComputeRootDescriptorTable(15, m_IndirectReduceScanArgsUAV.GetGPU()); // Indirect Sort Reduce/Scan Args 622 | 623 | // Dispatch 624 | pCommandList->SetPipelineState(m_pFPSIndirectSetupParametersPipeline); 625 | pCommandList->Dispatch(1, 1, 1); 626 | 627 | // When done, transition the args buffers to INDIRECT_ARGUMENT, and the constant buffer UAV to Constant buffer 628 | CD3DX12_RESOURCE_BARRIER barriers[5]; 629 | barriers[0] = CD3DX12_RESOURCE_BARRIER::UAV(m_IndirectCountScatterArgs.GetResource()); 630 | barriers[1] = CD3DX12_RESOURCE_BARRIER::UAV(m_IndirectReduceScanArgs.GetResource()); 631 | barriers[2] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectConstantBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); 632 | barriers[3] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectCountScatterArgs.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); 633 | barriers[4] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectReduceScanArgs.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT); 634 | pCommandList->ResourceBarrier(5, barriers); 635 | } 636 | 637 | // Setup resource/UAV pairs to use during sort 638 | RdxDX12ResourceInfo KeySrcInfo = { m_DstKeyBuffers[0].GetResource(), m_DstKeyUAVTable.GetGPU(0) }; 639 | RdxDX12ResourceInfo PayloadSrcInfo = { m_DstPayloadBuffers[0].GetResource(), m_DstPayloadUAVTable.GetGPU(0) }; 640 | RdxDX12ResourceInfo KeyTmpInfo = { m_DstKeyBuffers[1].GetResource(), m_DstKeyUAVTable.GetGPU(1) }; 641 | RdxDX12ResourceInfo PayloadTmpInfo = { m_DstPayloadBuffers[1].GetResource(), m_DstPayloadUAVTable.GetGPU(1) }; 642 | RdxDX12ResourceInfo ScratchBufferInfo = { m_FPSScratchBuffer.GetResource(), m_FPSScratchUAV.GetGPU() }; 643 | RdxDX12ResourceInfo ReducedScratchBufferInfo = { m_FPSReducedScratchBuffer.GetResource(), m_FPSReducedScratchUAV.GetGPU() }; 644 | 645 | // Buffers to ping-pong between when writing out sorted values 646 | const RdxDX12ResourceInfo* ReadBufferInfo(&KeySrcInfo), * WriteBufferInfo(&KeyTmpInfo); 647 | const RdxDX12ResourceInfo* ReadPayloadBufferInfo(&PayloadSrcInfo), * WritePayloadBufferInfo(&PayloadTmpInfo); 648 | bool bHasPayload = m_UISortPayload; 649 | 650 | // Setup barriers for the run 651 | CD3DX12_RESOURCE_BARRIER barriers[3]; 652 | 653 | // Perform Radix Sort (currently only support 32-bit key/payload sorting 654 | for (uint32_t Shift = 0; Shift < 32u; Shift += FFX_PARALLELSORT_SORT_BITS_PER_PASS) 655 | { 656 | // Update the bit shift 657 | pCommandList->SetComputeRoot32BitConstant(2, Shift, 0); 658 | 659 | // Copy the data into the constant buffer 660 | D3D12_GPU_VIRTUAL_ADDRESS constantBuffer; 661 | if (bIndirectDispatch) 662 | constantBuffer = m_IndirectConstantBuffer.GetResource()->GetGPUVirtualAddress(); 663 | else 664 | constantBuffer = m_pConstantBufferRing->AllocConstantBuffer(sizeof(FFX_ParallelSortCB), &constantBufferData); 665 | 666 | // Bind to root signature 667 | pCommandList->SetComputeRootConstantBufferView(0, constantBuffer); // Constant buffer 668 | pCommandList->SetComputeRootDescriptorTable(3, ReadBufferInfo->resourceGPUHandle); // SrcBuffer 669 | pCommandList->SetComputeRootDescriptorTable(5, ScratchBufferInfo.resourceGPUHandle); // Scratch buffer 670 | 671 | // Sort Count 672 | { 673 | pCommandList->SetPipelineState(m_pFPSCountPipeline); 674 | 675 | if (bIndirectDispatch) 676 | { 677 | pCommandList->ExecuteIndirect(m_pFPSCommandSignature, 1, m_IndirectCountScatterArgs.GetResource(), 0, nullptr, 0); 678 | } 679 | else 680 | { 681 | pCommandList->Dispatch(NumThreadgroupsToRun, 1, 1); 682 | } 683 | } 684 | 685 | // UAV barrier on the sum table 686 | barriers[0] = CD3DX12_RESOURCE_BARRIER::UAV(ScratchBufferInfo.pResource); 687 | pCommandList->ResourceBarrier(1, barriers); 688 | 689 | pCommandList->SetComputeRootDescriptorTable(6, ReducedScratchBufferInfo.resourceGPUHandle); // Scratch reduce buffer 690 | 691 | // Sort Reduce 692 | { 693 | pCommandList->SetPipelineState(m_pFPSCountReducePipeline); 694 | 695 | if (bIndirectDispatch) 696 | { 697 | pCommandList->ExecuteIndirect(m_pFPSCommandSignature, 1, m_IndirectReduceScanArgs.GetResource(), 0, nullptr, 0); 698 | } 699 | else 700 | { 701 | pCommandList->Dispatch(NumReducedThreadgroupsToRun, 1, 1); 702 | } 703 | 704 | // UAV barrier on the reduced sum table 705 | barriers[0] = CD3DX12_RESOURCE_BARRIER::UAV(ReducedScratchBufferInfo.pResource); 706 | pCommandList->ResourceBarrier(1, barriers); 707 | } 708 | 709 | // Sort Scan 710 | { 711 | // First do scan prefix of reduced values 712 | pCommandList->SetComputeRootDescriptorTable(9, ReducedScratchBufferInfo.resourceGPUHandle); 713 | pCommandList->SetComputeRootDescriptorTable(10, ReducedScratchBufferInfo.resourceGPUHandle); 714 | 715 | pCommandList->SetPipelineState(m_pFPSScanPipeline); 716 | if (!bIndirectDispatch) 717 | { 718 | assert(NumReducedThreadgroupsToRun < FFX_PARALLELSORT_ELEMENTS_PER_THREAD * FFX_PARALLELSORT_THREADGROUP_SIZE && "Need to account for bigger reduced histogram scan"); 719 | } 720 | pCommandList->Dispatch(1, 1, 1); 721 | 722 | // UAV barrier on the reduced sum table 723 | barriers[0] = CD3DX12_RESOURCE_BARRIER::UAV(ReducedScratchBufferInfo.pResource); 724 | pCommandList->ResourceBarrier(1, barriers); 725 | 726 | // Next do scan prefix on the histogram with partial sums that we just did 727 | pCommandList->SetComputeRootDescriptorTable(9, ScratchBufferInfo.resourceGPUHandle); 728 | pCommandList->SetComputeRootDescriptorTable(10, ScratchBufferInfo.resourceGPUHandle); 729 | pCommandList->SetComputeRootDescriptorTable(11, ReducedScratchBufferInfo.resourceGPUHandle); 730 | 731 | pCommandList->SetPipelineState(m_pFPSScanAddPipeline); 732 | if (bIndirectDispatch) 733 | { 734 | pCommandList->ExecuteIndirect(m_pFPSCommandSignature, 1, m_IndirectReduceScanArgs.GetResource(), 0, nullptr, 0); 735 | } 736 | else 737 | { 738 | pCommandList->Dispatch(NumReducedThreadgroupsToRun, 1, 1); 739 | } 740 | } 741 | 742 | // UAV barrier on the sum table 743 | barriers[0] = CD3DX12_RESOURCE_BARRIER::UAV(ScratchBufferInfo.pResource); 744 | pCommandList->ResourceBarrier(1, barriers); 745 | 746 | if (bHasPayload) 747 | { 748 | pCommandList->SetComputeRootDescriptorTable(4, ReadPayloadBufferInfo->resourceGPUHandle); // ScrPayload 749 | pCommandList->SetComputeRootDescriptorTable(8, WritePayloadBufferInfo->resourceGPUHandle); // DstPayload 750 | } 751 | 752 | pCommandList->SetComputeRootDescriptorTable(7, WriteBufferInfo->resourceGPUHandle); // DstBuffer 753 | 754 | // Sort Scatter 755 | { 756 | pCommandList->SetPipelineState(bHasPayload ? m_pFPSScatterPayloadPipeline : m_pFPSScatterPipeline); 757 | 758 | if (bIndirectDispatch) 759 | { 760 | pCommandList->ExecuteIndirect(m_pFPSCommandSignature, 1, m_IndirectCountScatterArgs.GetResource(), 0, nullptr, 0); 761 | } 762 | else 763 | { 764 | pCommandList->Dispatch(NumThreadgroupsToRun, 1, 1); 765 | } 766 | } 767 | 768 | // Finish doing everything and barrier for the next pass 769 | int numBarriers = 0; 770 | barriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::UAV(WriteBufferInfo->pResource); 771 | if (bHasPayload) 772 | barriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::UAV(WritePayloadBufferInfo->pResource); 773 | pCommandList->ResourceBarrier(numBarriers, barriers); 774 | 775 | // Swap read/write sources 776 | std::swap(ReadBufferInfo, WriteBufferInfo); 777 | if (bHasPayload) 778 | std::swap(ReadPayloadBufferInfo, WritePayloadBufferInfo); 779 | } 780 | 781 | // When we are all done, transition indirect buffers back to UAV for the next frame (if doing indirect dispatch) 782 | if (bIndirectDispatch) 783 | { 784 | barriers[0] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectCountScatterArgs.GetResource(), D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 785 | barriers[1] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectReduceScanArgs.GetResource(), D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 786 | barriers[2] = CD3DX12_RESOURCE_BARRIER::Transition(m_IndirectConstantBuffer.GetResource(), D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 787 | pCommandList->ResourceBarrier(3, barriers); 788 | } 789 | 790 | // Do we need to validate the results? If so, create a read back buffer to use for this frame 791 | #ifdef DEVELOPERMODE 792 | if (m_UIValidateSortResults && !isBenchmarking) 793 | { 794 | CreateValidationResources(pCommandList, &KeySrcInfo); 795 | // Only do this for 1 frame 796 | m_UIValidateSortResults = false; 797 | } 798 | #endif // DEVELOPERMODE 799 | } 800 | 801 | // Render Parallel Sort related GUI 802 | void FFXParallelSort::DrawGui() 803 | { 804 | if (ImGui::CollapsingHeader("FFX Parallel Sort", ImGuiTreeNodeFlags_DefaultOpen)) 805 | { 806 | static const char* ResolutionSizeStrings[] = { "1920x1080", "2560x1440", "3840x2160" }; 807 | 808 | ImVec2 textSize = ImGui::CalcTextSize("3840x2160"); 809 | if (KeySetOverride < 0) 810 | { 811 | ImGui::PushItemWidth(textSize.x * 2); 812 | ImGui::Combo("Sort Buffer Resolution", &m_UIResolutionSize, ResolutionSizeStrings, _countof(ResolutionSizeStrings)); 813 | ImGui::PopItemWidth(); 814 | } 815 | 816 | ImGui::Checkbox("Sort Payload", &m_UISortPayload); 817 | ImGui::Checkbox("Use Indirect Execution", &m_UIIndirectSort); 818 | #ifdef DEVELOPERMODE 819 | if (ImGui::Button("Validate Sort Results")) 820 | m_UIValidateSortResults = true; 821 | #endif // DEVELOPERMODE 822 | 823 | ImGui::RadioButton("Render Unsorted Keys", &m_UIVisualOutput, 0); 824 | ImGui::RadioButton("Render Sorted Keys", &m_UIVisualOutput, 1); 825 | } 826 | } 827 | 828 | // Renders the image with the sorted/unsorted indicies for visual representation 829 | void FFXParallelSort::DrawVisualization(ID3D12GraphicsCommandList* pCommandList, uint32_t RTWidth, uint32_t RTHeight) 830 | { 831 | // Setup the constant buffer 832 | ParallelSortRenderCB ConstantBuffer; 833 | ConstantBuffer.Width = RTWidth; 834 | ConstantBuffer.Height = RTHeight; 835 | static const uint32_t SortWidths[] = { 1920, 2560, 3840 }; 836 | static const uint32_t SortHeights[] = { 1080, 1440, 2160 }; 837 | ConstantBuffer.SortWidth = SortWidths[m_UIResolutionSize]; 838 | ConstantBuffer.SortHeight = SortHeights[m_UIResolutionSize]; 839 | 840 | // Bind root signature and descriptor heaps 841 | ID3D12DescriptorHeap* pDescriptorHeap = m_pResourceViewHeaps->GetCBV_SRV_UAVHeap(); 842 | pCommandList->SetDescriptorHeaps(1, &pDescriptorHeap); 843 | pCommandList->SetGraphicsRootSignature(m_pRenderRootSignature); 844 | 845 | // Bind constant buffer 846 | D3D12_GPU_VIRTUAL_ADDRESS GPUCB = m_pConstantBufferRing->AllocConstantBuffer(sizeof(ParallelSortRenderCB), &ConstantBuffer); 847 | pCommandList->SetGraphicsRootConstantBufferView(0, GPUCB); 848 | 849 | // If we are showing unsorted values, need to transition the source data buffer from copy source to UAV and back 850 | if (!m_UIVisualOutput) 851 | { 852 | CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(m_SrcKeyBuffers[m_UIResolutionSize].GetResource(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); 853 | pCommandList->ResourceBarrier(1, &barrier); 854 | pCommandList->SetGraphicsRootDescriptorTable(1, m_SrcKeyUAVTable.GetGPU(m_UIResolutionSize)); 855 | } 856 | else 857 | pCommandList->SetGraphicsRootDescriptorTable(1, m_DstKeyUAVTable.GetGPU(0)); 858 | 859 | // Bind validation texture 860 | pCommandList->SetGraphicsRootDescriptorTable(2, m_ValidateTextureSRV.GetGPU(m_UIResolutionSize)); 861 | 862 | D3D12_VIEWPORT vp = {}; 863 | vp.Width = (float)RTWidth; 864 | vp.Height = (float)RTHeight; 865 | vp.MinDepth = 0.0f; 866 | vp.MaxDepth = 1.0f; 867 | vp.TopLeftX = vp.TopLeftY = 0.0f; 868 | pCommandList->RSSetViewports(1, &vp); 869 | 870 | // Set the shader and dispatch 871 | pCommandList->IASetVertexBuffers(0, 0, nullptr); 872 | pCommandList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); 873 | pCommandList->SetPipelineState(m_pRenderResultVerificationPipeline); 874 | pCommandList->DrawInstanced(3, 1, 0, 0); 875 | 876 | // If we are showing unsorted values, need to transition the source data buffer from copy source to UAV and back 877 | if (!m_UIVisualOutput) 878 | { 879 | CD3DX12_RESOURCE_BARRIER barrier = CD3DX12_RESOURCE_BARRIER::Transition(m_SrcKeyBuffers[m_UIResolutionSize].GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); 880 | pCommandList->ResourceBarrier(1, &barrier); 881 | } 882 | } 883 | -------------------------------------------------------------------------------- /sample/src/DX12/ParallelSort.h: -------------------------------------------------------------------------------- 1 | // ParallelSort.h 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #pragma once 21 | #include 22 | 23 | using namespace CAULDRON_DX12; 24 | 25 | // Uncomment the following line to enable developer mode which compiles in data verification mechanism 26 | //#define DEVELOPERMODE 27 | 28 | struct ParallelSortRenderCB // If you change this, also change struct ParallelSortRenderCB in ParallelSortVerify.hlsl 29 | { 30 | int32_t Width; 31 | int32_t Height; 32 | int32_t SortWidth; 33 | int32_t SortHeight; 34 | }; 35 | 36 | // Convenience struct for passing resource/UAV pairs around 37 | typedef struct RdxDX12ResourceInfo 38 | { 39 | ID3D12Resource* pResource; ///< Pointer to the resource -- used for barriers and syncs (must NOT be nullptr) 40 | D3D12_GPU_DESCRIPTOR_HANDLE resourceGPUHandle; ///< The GPU Descriptor Handle to use for binding the resource 41 | } RdxDX12ResourceInfo; 42 | 43 | namespace CAULDRON_DX12 44 | { 45 | class Device; 46 | class ResourceViewHeaps; 47 | class DynamicBufferRing; 48 | class StaticBufferPool; 49 | } 50 | 51 | class FFXParallelSort 52 | { 53 | public: 54 | void OnCreate(Device* pDevice, ResourceViewHeaps* pResourceViewHeaps, DynamicBufferRing* pConstantBufferRing, UploadHeap* pUploadHeap, SwapChain* pSwapChain); 55 | void OnDestroy(); 56 | 57 | void Sort(ID3D12GraphicsCommandList* pCommandList, bool isBenchmarking, float benchmarkTime); 58 | #ifdef DEVELOPERMODE 59 | void WaitForValidationResults(); 60 | #endif // DEVELOPERMODE 61 | void CopySourceDataForFrame(ID3D12GraphicsCommandList* pCommandList); 62 | void DrawGui(); 63 | void DrawVisualization(ID3D12GraphicsCommandList* pCommandList, uint32_t RTWidth, uint32_t RTHeight); 64 | 65 | // Temp -- For command line overrides 66 | static void OverrideKeySet(int ResolutionOverride); 67 | static void OverridePayload(); 68 | // Temp -- For command line overrides 69 | 70 | private: 71 | void CreateKeyPayloadBuffers(); 72 | void CompileRadixPipeline(const char* shaderFile, const DefineList* defines, const char* entryPoint, ID3D12PipelineState*& pPipeline); 73 | #ifdef DEVELOPERMODE 74 | void CreateValidationResources(ID3D12GraphicsCommandList* pCommandList, RdxDX12ResourceInfo* pKeyDstInfo); 75 | #endif // DEVELOPERMODE 76 | 77 | // Temp -- For command line overrides 78 | static int KeySetOverride; 79 | static bool PayloadOverride; 80 | // Temp -- For command line overrides 81 | 82 | Device* m_pDevice = nullptr; 83 | UploadHeap* m_pUploadHeap = nullptr; 84 | ResourceViewHeaps* m_pResourceViewHeaps = nullptr; 85 | DynamicBufferRing* m_pConstantBufferRing = nullptr; 86 | uint32_t m_MaxNumThreadgroups = 320; // Use a generic thread group size when not on AMD hardware (taken from experiments to determine best performance threshold) 87 | 88 | // Sample resources 89 | Texture m_SrcKeyBuffers[3]; // 32 bit source key buffers (for 1080, 2K, 4K resolution) 90 | CBV_SRV_UAV m_SrcKeyUAVTable; // 32 bit source key UAVs (for 1080, 2K, 4K resolution) 91 | 92 | Texture m_SrcPayloadBuffers; // 32 bit source payload buffers 93 | CBV_SRV_UAV m_SrcPayloadUAV; // 32 bit source payload UAVs 94 | 95 | Texture m_DstKeyBuffers[2]; // 32 bit destination key buffers (when not doing in place writes) 96 | CBV_SRV_UAV m_DstKeyUAVTable; // 32 bit destination key UAVs 97 | 98 | Texture m_DstPayloadBuffers[2]; // 32 bit destination payload buffers (when not doing in place writes) 99 | CBV_SRV_UAV m_DstPayloadUAVTable; // 32 bit destination payload UAVs 100 | 101 | // Resources for parallel sort algorithm 102 | Texture m_FPSScratchBuffer; // Sort scratch buffer 103 | CBV_SRV_UAV m_FPSScratchUAV; // UAV needed for sort scratch buffer 104 | Texture m_FPSReducedScratchBuffer; // Sort reduced scratch buffer 105 | CBV_SRV_UAV m_FPSReducedScratchUAV; // UAV needed for sort reduced scratch buffer 106 | 107 | ID3D12RootSignature* m_pFPSRootSignature = nullptr; 108 | ID3D12PipelineState* m_pFPSCountPipeline = nullptr; 109 | ID3D12PipelineState* m_pFPSCountReducePipeline = nullptr; 110 | ID3D12PipelineState* m_pFPSScanPipeline = nullptr; 111 | ID3D12PipelineState* m_pFPSScanAddPipeline = nullptr; 112 | ID3D12PipelineState* m_pFPSScatterPipeline = nullptr; 113 | ID3D12PipelineState* m_pFPSScatterPayloadPipeline = nullptr; 114 | 115 | // Resources for indirect execution of algorithm 116 | Texture m_IndirectKeyCounts; // Buffer to hold num keys for indirect dispatch 117 | CBV_SRV_UAV m_IndirectKeyCountsUAV; // UAV needed for num keys buffer 118 | Texture m_IndirectConstantBuffer; // Buffer to hold radix sort constant buffer data for indirect dispatch 119 | CBV_SRV_UAV m_IndirectConstantBufferUAV; // UAV needed for indirect constant buffer 120 | Texture m_IndirectCountScatterArgs; // Buffer to hold dispatch arguments used for Count/Scatter parts of the algorithm 121 | CBV_SRV_UAV m_IndirectCountScatterArgsUAV; // UAV needed for count/scatter args buffer 122 | Texture m_IndirectReduceScanArgs; // Buffer to hold dispatch arguments used for Reduce/Scan parts of the algorithm 123 | CBV_SRV_UAV m_IndirectReduceScanArgsUAV; // UAV needed for reduce/scan args buffer 124 | 125 | ID3D12CommandSignature* m_pFPSCommandSignature; 126 | ID3D12PipelineState* m_pFPSIndirectSetupParametersPipeline = nullptr; 127 | 128 | // Resources for verification render 129 | ID3D12RootSignature* m_pRenderRootSignature = nullptr; 130 | ID3D12PipelineState* m_pRenderResultVerificationPipeline = nullptr; 131 | Texture m_Validate4KTexture; 132 | Texture m_Validate2KTexture; 133 | Texture m_Validate1080pTexture; 134 | CBV_SRV_UAV m_ValidateTextureSRV; 135 | 136 | // For correctness validation 137 | ID3D12Resource* m_ReadBackBufferResource; // For sort validation 138 | ID3D12Fence* m_ReadBackFence; // To know when we can check sort results 139 | HANDLE m_ReadBackFenceEvent; 140 | #ifdef DEVELOPERMODE 141 | bool m_UIValidateSortResults = false; // Validate the results 142 | #endif // DEVELOPERMODE 143 | 144 | // Options for UI and test to run 145 | int m_UIResolutionSize = 0; 146 | bool m_UISortPayload = false; 147 | bool m_UIIndirectSort = false; 148 | int m_UIVisualOutput = 0; 149 | }; -------------------------------------------------------------------------------- /sample/src/DX12/UI.cpp: -------------------------------------------------------------------------------- 1 | // AMD SampleDX12 sample code 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #include "UI.h" 21 | #include "Sample.h" 22 | #include "imgui.h" 23 | #include "base/FrameworkWindows.h" 24 | 25 | // To use the 'disabled UI state' functionality (ImGuiItemFlags_Disabled), include internal header 26 | // https://github.com/ocornut/imgui/issues/211#issuecomment-339241929 27 | #include "imgui_internal.h" 28 | static void DisableUIStateBegin(const bool& bEnable) 29 | { 30 | if (!bEnable) 31 | { 32 | ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true); 33 | ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * 0.5f); 34 | } 35 | }; 36 | static void DisableUIStateEnd(const bool& bEnable) 37 | { 38 | if (!bEnable) 39 | { 40 | ImGui::PopItemFlag(); 41 | ImGui::PopStyleVar(); 42 | } 43 | }; 44 | 45 | void Sample::BuildUI() 46 | { 47 | ImGuiIO& io = ImGui::GetIO(); 48 | ImGuiStyle& style = ImGui::GetStyle(); 49 | style.FrameBorderSize = 1.0f; 50 | 51 | const uint32_t W = this->GetWidth(); 52 | const uint32_t H = this->GetHeight(); 53 | 54 | const uint32_t PROFILER_WINDOW_PADDING_X = 10; 55 | const uint32_t PROFILER_WINDOW_PADDING_Y = 10; 56 | const uint32_t PROFILER_WINDOW_SIZE_X = 330; 57 | const uint32_t PROFILER_WINDOW_SIZE_Y = 450; 58 | const uint32_t PROFILER_WINDOW_POS_X = W - PROFILER_WINDOW_PADDING_X - PROFILER_WINDOW_SIZE_X; 59 | const uint32_t PROFILER_WINDOW_POS_Y = PROFILER_WINDOW_PADDING_Y; 60 | 61 | const uint32_t CONTROLS_WINDOW_POS_X = 10; 62 | const uint32_t CONTROLS_WINDOW_POS_Y = 10; 63 | const uint32_t CONTROLW_WINDOW_SIZE_X = 350; 64 | const uint32_t CONTROLW_WINDOW_SIZE_Y = 780; // assuming > 720p 65 | 66 | // Render CONTROLS window 67 | // 68 | ImGui::SetNextWindowPos(ImVec2(CONTROLS_WINDOW_POS_X, CONTROLS_WINDOW_POS_Y), ImGuiCond_FirstUseEver); 69 | ImGui::SetNextWindowSize(ImVec2(CONTROLW_WINDOW_SIZE_X, CONTROLW_WINDOW_SIZE_Y), ImGuiCond_FirstUseEver); 70 | 71 | if (m_UIState.bShowControlsWindow) 72 | { 73 | ImGui::Begin("CONTROLS (F1)", &m_UIState.bShowControlsWindow); 74 | 75 | // Render UI for Radix Sort 76 | m_pRenderer->RenderParallelSortUI(); 77 | 78 | ImGui::Spacing(); 79 | ImGui::Spacing(); 80 | 81 | if (ImGui::CollapsingHeader("Presentation Mode", ImGuiTreeNodeFlags_DefaultOpen)) 82 | { 83 | const char* fullscreenModes[] = { "Windowed", "BorderlessFullscreen", "ExclusiveFullscreen" }; 84 | if (ImGui::Combo("Fullscreen Mode", (int*)&m_fullscreenMode, fullscreenModes, _countof(fullscreenModes))) 85 | { 86 | if (m_previousFullscreenMode != m_fullscreenMode) 87 | { 88 | HandleFullScreen(); 89 | m_previousFullscreenMode = m_fullscreenMode; 90 | } 91 | } 92 | } 93 | 94 | ImGui::End(); // CONTROLS 95 | } 96 | 97 | // Render PROFILER window 98 | // 99 | if (m_UIState.bShowProfilerWindow) 100 | { 101 | constexpr size_t NUM_FRAMES = 128; 102 | static float FRAME_TIME_ARRAY[NUM_FRAMES] = { 0 }; 103 | 104 | // track highest frame rate and determine the max value of the graph based on the measured highest value 105 | static float RECENT_HIGHEST_FRAME_TIME = 0.0f; 106 | constexpr int FRAME_TIME_GRAPH_MAX_FPS[] = { 800, 240, 120, 90, 60, 45, 30, 15, 10, 5, 4, 3, 2, 1 }; 107 | static float FRAME_TIME_GRAPH_MAX_VALUES[_countof(FRAME_TIME_GRAPH_MAX_FPS)] = { 0 }; // us 108 | for (int i = 0; i < _countof(FRAME_TIME_GRAPH_MAX_FPS); ++i) { FRAME_TIME_GRAPH_MAX_VALUES[i] = 1000000.f / FRAME_TIME_GRAPH_MAX_FPS[i]; } 109 | 110 | //scrolling data and average FPS computing 111 | const std::vector& timeStamps = m_pRenderer->GetTimingValues(); 112 | const bool bTimeStampsAvailable = timeStamps.size() > 0; 113 | if (bTimeStampsAvailable) 114 | { 115 | RECENT_HIGHEST_FRAME_TIME = 0; 116 | FRAME_TIME_ARRAY[NUM_FRAMES - 1] = timeStamps.back().m_microseconds; 117 | for (uint32_t i = 0; i < NUM_FRAMES - 1; i++) 118 | { 119 | FRAME_TIME_ARRAY[i] = FRAME_TIME_ARRAY[i + 1]; 120 | } 121 | RECENT_HIGHEST_FRAME_TIME = max(RECENT_HIGHEST_FRAME_TIME, FRAME_TIME_ARRAY[NUM_FRAMES - 1]); 122 | } 123 | const float& frameTime_us = FRAME_TIME_ARRAY[NUM_FRAMES - 1]; 124 | const float frameTime_ms = frameTime_us * 0.001f; 125 | const int fps = bTimeStampsAvailable ? static_cast(1000000.0f / frameTime_us) : 0; 126 | 127 | // UI 128 | ImGui::SetNextWindowPos(ImVec2((float)PROFILER_WINDOW_POS_X, (float)PROFILER_WINDOW_POS_Y), ImGuiCond_FirstUseEver); 129 | ImGui::SetNextWindowSize(ImVec2(PROFILER_WINDOW_SIZE_X, PROFILER_WINDOW_SIZE_Y), ImGuiCond_FirstUseEver); 130 | ImGui::Begin("PROFILER (F2)", &m_UIState.bShowProfilerWindow); 131 | 132 | ImGui::Text("Resolution : %ix%i", m_Width, m_Height); 133 | ImGui::Text("API : %s", m_systemInfo.mGfxAPI.c_str()); 134 | ImGui::Text("GPU : %s", m_systemInfo.mGPUName.c_str()); 135 | ImGui::Text("CPU : %s", m_systemInfo.mCPUName.c_str()); 136 | ImGui::Text("FPS : %d (%.2f ms)", fps, frameTime_ms); 137 | 138 | if (ImGui::CollapsingHeader("GPU Timings", ImGuiTreeNodeFlags_DefaultOpen)) 139 | { 140 | std::string msOrUsButtonText = m_UIState.bShowMilliseconds ? "Switch to microseconds" : "Switch to milliseconds"; 141 | if (ImGui::Button(msOrUsButtonText.c_str())) { 142 | m_UIState.bShowMilliseconds = !m_UIState.bShowMilliseconds; 143 | } 144 | ImGui::Spacing(); 145 | 146 | // find the index of the FrameTimeGraphMaxValue as the next higher-than-recent-highest-frame-time in the pre-determined value list 147 | size_t iFrameTimeGraphMaxValue = 0; 148 | for (int i = 0; i < _countof(FRAME_TIME_GRAPH_MAX_VALUES); ++i) 149 | { 150 | if (RECENT_HIGHEST_FRAME_TIME < FRAME_TIME_GRAPH_MAX_VALUES[i]) // FRAME_TIME_GRAPH_MAX_VALUES are in increasing order 151 | { 152 | iFrameTimeGraphMaxValue = min(_countof(FRAME_TIME_GRAPH_MAX_VALUES) - 1, i + 1); 153 | break; 154 | } 155 | } 156 | ImGui::PlotLines("", FRAME_TIME_ARRAY, NUM_FRAMES, 0, "GPU frame time (us)", 0.0f, FRAME_TIME_GRAPH_MAX_VALUES[iFrameTimeGraphMaxValue], ImVec2(0, 80)); 157 | 158 | for (uint32_t i = 0; i < timeStamps.size(); i++) 159 | { 160 | float value = m_UIState.bShowMilliseconds ? timeStamps[i].m_microseconds / 1000.0f : timeStamps[i].m_microseconds; 161 | const char* pStrUnit = m_UIState.bShowMilliseconds ? "ms" : "us"; 162 | ImGui::Text("%-18s: %7.2f %s", timeStamps[i].m_label.c_str(), value, pStrUnit); 163 | } 164 | } 165 | ImGui::End(); // PROFILER 166 | } 167 | } 168 | 169 | void UIState::Initialize() 170 | { 171 | // init GUI state 172 | this->bShowControlsWindow = true; 173 | this->bShowProfilerWindow = true; 174 | } 175 | -------------------------------------------------------------------------------- /sample/src/DX12/UI.h: -------------------------------------------------------------------------------- 1 | // AMD SampleDX12 sample code 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #pragma once 21 | 22 | #include "PostProc/MagnifierPS.h" 23 | #include 24 | 25 | struct UIState 26 | { 27 | // 28 | // WINDOW MANAGEMENT 29 | // 30 | bool bShowControlsWindow; 31 | bool bShowProfilerWindow; 32 | 33 | // 34 | // PROFILER CONTROLS 35 | // 36 | bool bShowMilliseconds; 37 | 38 | // ----------------------------------------------- 39 | 40 | void Initialize(); 41 | }; -------------------------------------------------------------------------------- /sample/src/DX12/dpiawarescaling.manifest: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | true/PM 6 | 7 | 8 | -------------------------------------------------------------------------------- /sample/src/DX12/renderer.cpp: -------------------------------------------------------------------------------- 1 | // samplerenderer.cpp 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #include "Renderer.h" 21 | #include "UI.h" 22 | 23 | 24 | //-------------------------------------------------------------------------------------- 25 | // 26 | // OnCreate 27 | // 28 | //-------------------------------------------------------------------------------------- 29 | void Renderer::OnCreate(Device* pDevice, SwapChain* pSwapChain, float FontSize) 30 | { 31 | m_pDevice = pDevice; 32 | 33 | // Initialize helpers 34 | 35 | // Create all the heaps for the resources views 36 | const uint32_t cbvDescriptorCount = 4000; 37 | const uint32_t srvDescriptorCount = 8000; 38 | const uint32_t uavDescriptorCount = 10; 39 | const uint32_t dsvDescriptorCount = 10; 40 | const uint32_t rtvDescriptorCount = 60; 41 | const uint32_t samplerDescriptorCount = 20; 42 | m_ResourceViewHeaps.OnCreate(pDevice, cbvDescriptorCount, srvDescriptorCount, uavDescriptorCount, dsvDescriptorCount, rtvDescriptorCount, samplerDescriptorCount); 43 | 44 | // Create a commandlist ring for the Direct queue 45 | uint32_t commandListsPerBackBuffer = 8; 46 | m_CommandListRing.OnCreate(pDevice, BackBufferCount, commandListsPerBackBuffer, pDevice->GetGraphicsQueue()->GetDesc()); 47 | 48 | // Create a 'dynamic' constant buffer 49 | const uint32_t constantBuffersMemSize = 20 * 1024 * 1024; 50 | m_ConstantBufferRing.OnCreate(pDevice, BackBufferCount, constantBuffersMemSize, &m_ResourceViewHeaps); 51 | 52 | // Create a 'static' pool for vertices, indices and constant buffers 53 | const uint32_t staticGeometryMemSize = (2 * 128) * 1024 * 1024; 54 | m_VidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, true, "StaticGeom"); 55 | 56 | // initialize the GPU time stamps module 57 | m_GPUTimer.OnCreate(pDevice, BackBufferCount); 58 | 59 | // Quick helper to upload resources, it has it's own commandList and uses sub-allocation. 60 | const uint32_t uploadHeapMemSize = 100 * 1024 * 1024; 61 | m_UploadHeap.OnCreate(pDevice, uploadHeapMemSize); // initialize an upload heap (uses sub-allocation for faster results) 62 | 63 | // Initialize UI rendering resources 64 | m_ImGUI.OnCreate(pDevice, &m_UploadHeap, &m_ResourceViewHeaps, &m_ConstantBufferRing, pSwapChain->GetFormat(), FontSize); 65 | 66 | // Create FFX Parallel Sort pass 67 | m_ParallelSort.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_UploadHeap, pSwapChain); 68 | 69 | // Make sure upload heap has finished uploading before continuing 70 | m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); 71 | m_UploadHeap.FlushAndFinish(); 72 | } 73 | 74 | //-------------------------------------------------------------------------------------- 75 | // 76 | // OnDestroy 77 | // 78 | //-------------------------------------------------------------------------------------- 79 | void Renderer::OnDestroy() 80 | { 81 | m_ParallelSort.OnDestroy(); 82 | m_ImGUI.OnDestroy(); 83 | 84 | m_UploadHeap.OnDestroy(); 85 | m_GPUTimer.OnDestroy(); 86 | m_VidMemBufferPool.OnDestroy(); 87 | m_ConstantBufferRing.OnDestroy(); 88 | m_ResourceViewHeaps.OnDestroy(); 89 | m_CommandListRing.OnDestroy(); 90 | } 91 | 92 | //-------------------------------------------------------------------------------------- 93 | // 94 | // OnCreateWindowSizeDependentResources 95 | // 96 | //-------------------------------------------------------------------------------------- 97 | void Renderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height) 98 | { 99 | m_Width = Width; 100 | m_Height = Height; 101 | 102 | // Set the viewport & scissors rect 103 | m_Viewport = { 0.0f, 0.0f, static_cast(Width), static_cast(Height), 0.0f, 1.0f }; 104 | m_RectScissor = { 0, 0, (LONG)Width, (LONG)Height }; 105 | } 106 | 107 | //-------------------------------------------------------------------------------------- 108 | // 109 | // OnDestroyWindowSizeDependentResources 110 | // 111 | //-------------------------------------------------------------------------------------- 112 | void Renderer::OnDestroyWindowSizeDependentResources() 113 | { 114 | } 115 | 116 | void Renderer::OnUpdateDisplayDependentResources(SwapChain* pSwapChain) 117 | { 118 | // Update pipelines in case the format of the RTs changed (this happens when going HDR) 119 | m_ImGUI.UpdatePipeline(pSwapChain->GetFormat()); 120 | } 121 | 122 | //-------------------------------------------------------------------------------------- 123 | // 124 | // OnRender 125 | // 126 | //-------------------------------------------------------------------------------------- 127 | void Renderer::OnRender(const UIState* pState, SwapChain* pSwapChain, float Time, bool bIsBenchmarking) 128 | { 129 | // Timing values 130 | UINT64 gpuTicksPerSecond; 131 | m_pDevice->GetGraphicsQueue()->GetTimestampFrequency(&gpuTicksPerSecond); 132 | 133 | // Let our resource managers do some house keeping 134 | m_CommandListRing.OnBeginFrame(); 135 | m_ConstantBufferRing.OnBeginFrame(); 136 | m_GPUTimer.OnBeginFrame(gpuTicksPerSecond, &m_TimeStamps); 137 | 138 | // command buffer calls 139 | ID3D12GraphicsCommandList* pCmdLst1 = m_CommandListRing.GetNewCommandList(); 140 | pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET)); 141 | 142 | // Copy the data to sort for the frame (don't time this -- external to process) 143 | m_ParallelSort.CopySourceDataForFrame(pCmdLst1); 144 | m_GPUTimer.GetTimeStamp(pCmdLst1, "Begin Frame"); 145 | 146 | // Do sort tests ----------------------------------------------------------------------- 147 | m_ParallelSort.Sort(pCmdLst1, bIsBenchmarking, Time); 148 | m_GPUTimer.GetTimeStamp(pCmdLst1, "FFX Parallel Sort"); 149 | 150 | // submit command buffer #1 151 | ThrowIfFailed(pCmdLst1->Close()); 152 | ID3D12CommandList* CmdListList1[] = { pCmdLst1 }; 153 | m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CmdListList1); 154 | 155 | // Check against parallel sort validation if needed (just returns if not needed) 156 | #ifdef DEVELOPERMODE 157 | m_ParallelSort.WaitForValidationResults(); 158 | #endif // DEVELOPERMODE 159 | 160 | // Wait for swapchain (we are going to render to it) ----------------------------------- 161 | pSwapChain->WaitForSwapChain(); 162 | 163 | ID3D12GraphicsCommandList* pCmdLst2 = m_CommandListRing.GetNewCommandList(); 164 | pCmdLst2->RSSetViewports(1, &m_Viewport); 165 | pCmdLst2->RSSetScissorRects(1, &m_RectScissor); 166 | pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), true, nullptr); 167 | float clearColor[4] = { 0, 0, 0, 0 }; 168 | pCmdLst2->ClearRenderTargetView(*pSwapChain->GetCurrentBackBufferRTV(), clearColor, 0, nullptr); 169 | 170 | // Render sort source/results over everything except the HUD -------------------------- 171 | m_ParallelSort.DrawVisualization(pCmdLst2, m_Width, m_Height); 172 | 173 | // Render HUD ------------------------------------------------------------------------ 174 | { 175 | m_ImGUI.Draw(pCmdLst2); 176 | m_GPUTimer.GetTimeStamp(pCmdLst2, "ImGUI Rendering"); 177 | } 178 | 179 | if (!m_pScreenShotName.empty()) 180 | { 181 | m_SaveTexture.CopyRenderTargetIntoStagingTexture(m_pDevice->GetDevice(), pCmdLst2, pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_RENDER_TARGET); 182 | } 183 | 184 | // Transition swap chain into present mode 185 | pCmdLst2->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT)); 186 | m_GPUTimer.OnEndFrame(); 187 | m_GPUTimer.CollectTimings(pCmdLst2); 188 | 189 | // Close & Submit the command list #2 ------------------------------------------------- 190 | ThrowIfFailed(pCmdLst2->Close()); 191 | 192 | ID3D12CommandList* CmdListList2[] = { pCmdLst2 }; 193 | m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CmdListList2); 194 | 195 | // Handle screenshot request 196 | if (!m_pScreenShotName.empty()) 197 | { 198 | m_SaveTexture.SaveStagingTextureAsJpeg(m_pDevice->GetDevice(), m_pDevice->GetGraphicsQueue(), m_pScreenShotName.c_str()); 199 | m_pScreenShotName.clear(); 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /sample/src/DX12/renderer.h: -------------------------------------------------------------------------------- 1 | // samplerenderer.h 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #pragma once 21 | 22 | #include "stdafx.h" 23 | #include "PostProc/MagnifierPS.h" 24 | 25 | struct UIState; 26 | 27 | // We are queuing (BackBufferCount + 0.5) frames, so we need to triple buffer the resources that get modified each frame 28 | static const int BackBufferCount = 3; 29 | 30 | using namespace CAULDRON_DX12; 31 | 32 | // 33 | // This class deals with the GPU side of the sample. 34 | class Renderer 35 | { 36 | public: 37 | 38 | void OnCreate(Device* pDevice, SwapChain* pSwapChain, float FontSize); 39 | void OnDestroy(); 40 | 41 | void OnCreateWindowSizeDependentResources(SwapChain* pSwapChain, uint32_t Width, uint32_t Height); 42 | void OnDestroyWindowSizeDependentResources(); 43 | 44 | void OnUpdateDisplayDependentResources(SwapChain* pSwapChain); 45 | 46 | const std::vector& GetTimingValues() const { return m_TimeStamps; } 47 | std::string& GetScreenshotFileName() { return m_pScreenShotName; } 48 | 49 | void OnRender(const UIState *pState, SwapChain *pSwapChain, float Time, bool bIsBenchmarking); 50 | 51 | void RenderParallelSortUI() { m_ParallelSort.DrawGui(); } 52 | 53 | private: 54 | Device* m_pDevice; 55 | 56 | uint32_t m_Width; 57 | uint32_t m_Height; 58 | D3D12_VIEWPORT m_Viewport; 59 | D3D12_RECT m_RectScissor; 60 | 61 | // Initialize helper classes 62 | ResourceViewHeaps m_ResourceViewHeaps; 63 | UploadHeap m_UploadHeap; 64 | DynamicBufferRing m_ConstantBufferRing; 65 | StaticBufferPool m_VidMemBufferPool; 66 | CommandListRing m_CommandListRing; 67 | GPUTimestamps m_GPUTimer; 68 | 69 | FFXParallelSort m_ParallelSort; 70 | 71 | // GUI 72 | ImGUI m_ImGUI; 73 | 74 | // For benchmarking 75 | std::vector m_TimeStamps; 76 | 77 | // screen shot 78 | std::string m_pScreenShotName = ""; 79 | SaveTexture m_SaveTexture; 80 | }; 81 | -------------------------------------------------------------------------------- /sample/src/DX12/sample.cpp: -------------------------------------------------------------------------------- 1 | // sample.cpp 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #include "stdafx.h" 21 | #include "Sample.h" 22 | 23 | #include 24 | #include 25 | 26 | //-------------------------------------------------------------------------------------- 27 | // 28 | // OnParseCommandLine 29 | // 30 | //-------------------------------------------------------------------------------------- 31 | void Sample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight) 32 | { 33 | // set some default values 34 | *pWidth = 1920; 35 | *pHeight = 1080; 36 | m_VsyncEnabled = false; 37 | m_bIsBenchmarking = false; 38 | m_fontSize = 13.f; // default value overridden by a json file if available 39 | m_isCpuValidationLayerEnabled = false; 40 | m_isGpuValidationLayerEnabled = false; 41 | m_stablePowerState = false; 42 | 43 | // Read globals 44 | auto process = [&](json jData) 45 | { 46 | *pWidth = jData.value("width", *pWidth); 47 | *pHeight = jData.value("height", *pHeight); 48 | m_fullscreenMode = jData.value("presentationMode", m_fullscreenMode); 49 | m_isCpuValidationLayerEnabled = jData.value("CpuValidationLayerEnabled", m_isCpuValidationLayerEnabled); 50 | m_isGpuValidationLayerEnabled = jData.value("GpuValidationLayerEnabled", m_isGpuValidationLayerEnabled); 51 | m_VsyncEnabled = jData.value("vsync", m_VsyncEnabled); 52 | m_bIsBenchmarking = jData.value("benchmark", m_bIsBenchmarking); 53 | m_stablePowerState = jData.value("stablePowerState", m_stablePowerState); 54 | m_fontSize = jData.value("fontsize", m_fontSize); 55 | }; 56 | 57 | // Read config file (and override values from commandline if so) 58 | { 59 | std::ifstream f("FFXParallelSort.json"); 60 | if (!f) 61 | { 62 | MessageBox(nullptr, "Config file not found!\n", "Cauldron Panic!", MB_ICONERROR); 63 | exit(0); 64 | } 65 | 66 | try 67 | { 68 | f >> m_jsonConfigFile; 69 | } 70 | catch (json::parse_error) 71 | { 72 | MessageBox(nullptr, "Error parsing FFXParallelSort.json!\n", "Cauldron Panic!", MB_ICONERROR); 73 | exit(0); 74 | } 75 | } 76 | 77 | json globals = m_jsonConfigFile["globals"]; 78 | process(globals); 79 | 80 | // Process the command line to see if we need to do anything for the sample (i.e. benchmarking, setup certain settings, etc.) 81 | std::string charString = lpCmdLine; 82 | if (!charString.compare("")) 83 | return; // No parameters 84 | 85 | // Need to first convert the char string to a wide character set 86 | std::wstring wideString; 87 | wideString.assign(charString.begin(), charString.end()); 88 | 89 | LPWSTR* ArgList; 90 | int ArgCount, CurrentArg(0); 91 | ArgList = CommandLineToArgvW(wideString.c_str(), &ArgCount); 92 | while (CurrentArg < ArgCount) 93 | { 94 | wideString = ArgList[CurrentArg]; 95 | 96 | // Enable benchmarking 97 | if (!wideString.compare(L"-benchmark")) 98 | { 99 | m_bIsBenchmarking = true; 100 | ++CurrentArg; 101 | } 102 | 103 | // Set num keys to sort 104 | else if (!wideString.compare(L"-keyset")) 105 | { 106 | assert(ArgCount > CurrentArg + 1 && "Incorrect usage of -keyset <0-2>"); 107 | // Get the parameter 108 | int keySet = std::stoi(ArgList[CurrentArg + 1]); 109 | assert(keySet >= 0 && keySet < 3 && "Incorrect usage of -keyset <0-2>"); 110 | FFXParallelSort::OverrideKeySet(keySet); 111 | CurrentArg += 2; 112 | } 113 | 114 | // Set payload sort 115 | else if (!wideString.compare(L"-payload")) 116 | { 117 | FFXParallelSort::OverridePayload(); 118 | ++CurrentArg; 119 | } 120 | 121 | else 122 | { 123 | assert(false && "Unsupported command line parameter"); 124 | exit(0); 125 | } 126 | } 127 | } 128 | 129 | //-------------------------------------------------------------------------------------- 130 | // 131 | // OnCreate 132 | // 133 | //-------------------------------------------------------------------------------------- 134 | void Sample::OnCreate() 135 | { 136 | // Init the shader compiler 137 | InitDirectXCompiler(); 138 | CreateShaderCache(); 139 | 140 | // Create a instance of the renderer and initialize it, we need to do that for each GPU 141 | m_pRenderer = new Renderer(); 142 | m_pRenderer->OnCreate(&m_device, &m_swapChain, m_fontSize); 143 | 144 | // set benchmarking state if enabled 145 | if (m_bIsBenchmarking) 146 | { 147 | std::string deviceName; 148 | std::string driverVersion; 149 | m_device.GetDeviceInfo(&deviceName, &driverVersion); 150 | BenchmarkConfig(m_jsonConfigFile["BenchmarkSettings"], -1, nullptr, deviceName, driverVersion); 151 | } 152 | 153 | // Init GUI (non gfx stuff) 154 | ImGUI_Init((void*)m_windowHwnd); 155 | m_UIState.Initialize(); 156 | 157 | OnResize(); 158 | OnUpdateDisplay(); 159 | } 160 | 161 | //-------------------------------------------------------------------------------------- 162 | // 163 | // OnDestroy 164 | // 165 | //-------------------------------------------------------------------------------------- 166 | void Sample::OnDestroy() 167 | { 168 | ImGUI_Shutdown(); 169 | 170 | m_device.GPUFlush(); 171 | 172 | m_pRenderer->OnDestroyWindowSizeDependentResources(); 173 | m_pRenderer->OnDestroy(); 174 | 175 | delete m_pRenderer; 176 | 177 | //shut down the shader compiler 178 | DestroyShaderCache(&m_device); 179 | } 180 | 181 | //-------------------------------------------------------------------------------------- 182 | // 183 | // OnEvent, win32 sends us events and we forward them to ImGUI 184 | // 185 | //-------------------------------------------------------------------------------------- 186 | static void ToggleBool(bool& b) { b = !b; } 187 | bool Sample::OnEvent(MSG msg) 188 | { 189 | if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) 190 | return true; 191 | 192 | // handle function keys (F1, F2...) here, rest of the input is handled 193 | // by imGUI later in HandleInput() function 194 | const WPARAM& KeyPressed = msg.wParam; 195 | switch (msg.message) 196 | { 197 | case WM_KEYUP: 198 | case WM_SYSKEYUP: 199 | /* WINDOW TOGGLES */ 200 | if (KeyPressed == VK_F1) m_UIState.bShowControlsWindow ^= 1; 201 | if (KeyPressed == VK_F2) m_UIState.bShowProfilerWindow ^= 1; 202 | break; 203 | } 204 | 205 | return true; 206 | } 207 | 208 | //-------------------------------------------------------------------------------------- 209 | // 210 | // OnResize 211 | // 212 | //-------------------------------------------------------------------------------------- 213 | void Sample::OnResize() 214 | { 215 | // Destroy resources (if we are not minimized) 216 | if (m_Width && m_Height && m_pRenderer) 217 | { 218 | m_pRenderer->OnDestroyWindowSizeDependentResources(); 219 | m_pRenderer->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); 220 | } 221 | } 222 | 223 | //-------------------------------------------------------------------------------------- 224 | // 225 | // UpdateDisplay 226 | // 227 | //-------------------------------------------------------------------------------------- 228 | void Sample::OnUpdateDisplay() 229 | { 230 | // Destroy resources (if we are not minimized) 231 | if (m_pRenderer) 232 | { 233 | m_pRenderer->OnUpdateDisplayDependentResources(&m_swapChain); 234 | } 235 | } 236 | 237 | //-------------------------------------------------------------------------------------- 238 | // 239 | // OnUpdate 240 | // 241 | //-------------------------------------------------------------------------------------- 242 | void Sample::OnUpdate() 243 | { 244 | ImGuiIO& io = ImGui::GetIO(); 245 | 246 | //If the mouse was not used by the GUI then it's for the camera 247 | if (io.WantCaptureMouse) 248 | { 249 | io.MouseDelta.x = 0; 250 | io.MouseDelta.y = 0; 251 | io.MouseWheel = 0; 252 | } 253 | 254 | // Keyboard & Mouse 255 | HandleInput(io); 256 | 257 | // Increase time 258 | m_time += (float)m_deltaTime / 1000.0f; // time in seconds 259 | } 260 | 261 | void Sample::HandleInput(const ImGuiIO& io) 262 | { 263 | auto fnIsKeyTriggered = [&io](char key) { return io.KeysDown[key] && io.KeysDownDuration[key] == 0.0f; }; 264 | 265 | // Handle Keyboard/Mouse input here 266 | } 267 | 268 | //-------------------------------------------------------------------------------------- 269 | // 270 | // OnRender, updates the state from the UI, animates, transforms and renders the scene 271 | // 272 | //-------------------------------------------------------------------------------------- 273 | void Sample::OnRender() 274 | { 275 | // Do any start of frame necessities 276 | BeginFrame(); 277 | 278 | ImGUI_UpdateIO(); 279 | ImGui::NewFrame(); 280 | 281 | if (m_bIsBenchmarking) 282 | { 283 | // Benchmarking takes control of the time, and exits the app when the animation is done 284 | std::vector timeStamps = m_pRenderer->GetTimingValues(); 285 | m_time = BenchmarkLoop(timeStamps, nullptr, m_pRenderer->GetScreenshotFileName()); 286 | } 287 | else 288 | { 289 | // Build the UI. Note that the rendering of the UI happens later. 290 | BuildUI(); 291 | OnUpdate(); 292 | } 293 | 294 | // Do Render frame using AFR 295 | m_pRenderer->OnRender(&m_UIState, &m_swapChain, m_time, m_bIsBenchmarking); 296 | 297 | // Framework will handle Present and some other end of frame logic 298 | EndFrame(); 299 | } 300 | 301 | 302 | //-------------------------------------------------------------------------------------- 303 | // 304 | // WinMain 305 | // 306 | //-------------------------------------------------------------------------------------- 307 | int WINAPI WinMain(HINSTANCE hInstance, 308 | HINSTANCE hPrevInstance, 309 | LPSTR lpCmdLine, 310 | int nCmdShow) 311 | { 312 | LPCSTR Name = "FidelityFX Parallel Sort DX12 v1.1"; 313 | 314 | // create new DX sample 315 | return RunFramework(hInstance, lpCmdLine, nCmdShow, new Sample(Name)); 316 | } 317 | -------------------------------------------------------------------------------- /sample/src/DX12/sample.h: -------------------------------------------------------------------------------- 1 | // sample.h 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #pragma once 21 | 22 | #include "base/FrameworkWindows.h" 23 | #include "Renderer.h" 24 | #include "UI.h" 25 | 26 | // This class encapsulates the 'application' and is responsible for handling window events and scene updates (simulation) 27 | // Rendering and rendering resource management is done by the Renderer class 28 | 29 | class Sample : public FrameworkWindows 30 | { 31 | public: 32 | Sample(LPCSTR name) : FrameworkWindows(name) { m_time = 0.f; } 33 | void OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight) override; 34 | void OnCreate() override; 35 | void OnDestroy() override; 36 | void OnRender() override; 37 | bool OnEvent(MSG msg) override; 38 | void OnResize() override; 39 | void OnUpdateDisplay() override; 40 | 41 | void BuildUI(); 42 | void OnUpdate(); 43 | void HandleInput(const ImGuiIO& io); 44 | 45 | private: 46 | // Benchmarking support 47 | bool m_bIsBenchmarking; 48 | float m_time; 49 | 50 | Renderer* m_pRenderer = NULL; 51 | UIState m_UIState; 52 | float m_fontSize; 53 | 54 | // json config file 55 | json m_jsonConfigFile; 56 | }; 57 | -------------------------------------------------------------------------------- /sample/src/DX12/stdafx.cpp: -------------------------------------------------------------------------------- 1 | // stdafx.cpp 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #include "stdafx.h" 21 | 22 | // TODO: reference any additional headers you need in STDAFX.H 23 | // and not in this file 24 | -------------------------------------------------------------------------------- /sample/src/DX12/stdafx.h: -------------------------------------------------------------------------------- 1 | // stdafx.h 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #pragma once 21 | 22 | #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers 23 | // Windows Header Files: 24 | #include 25 | #include 26 | 27 | // C RunTime Header Files 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include "../../libs/d3d12x/d3dx12.h" 35 | 36 | // we are using DirectXMath 37 | #include 38 | using namespace DirectX; 39 | 40 | // TODO: reference additional headers your program requires here 41 | #include "Base/Imgui.h" 42 | #include "Base/ImguiHelper.h" 43 | #include "Base/Fence.h" 44 | #include "Base/FrameworkWindows.h" 45 | #include "Base/Helper.h" 46 | #include "Base/Device.h" 47 | #include "Base/Texture.h" 48 | #include "Base/SwapChain.h" 49 | #include "Base/UploadHeap.h" 50 | #include "Base/UserMarkers.h" 51 | #include "Base/GPUTimestamps.h" 52 | #include "Base/CommandListRing.h" 53 | #include "Base/StaticBufferPool.h" 54 | #include "Base/DynamicBufferRing.h" 55 | #include "Base/ResourceViewHeaps.h" 56 | #include "Base/SaveTexture.h" 57 | #include "Base/ShaderCompilerHelper.h" 58 | #include "Base/StaticConstantBufferPool.h" 59 | 60 | #include "GLTF/GltfPbrPass.h" 61 | #include "GLTF/GltfBBoxPass.h" 62 | #include "GLTF/GltfDepthPass.h" 63 | #include "GLTF/GltfMotionVectorsPass.h" 64 | 65 | #include "Misc/Misc.h" 66 | #include "Misc/Error.h" 67 | #include "Misc/Camera.h" 68 | 69 | #include "PostProc/TAA.h" 70 | #include "PostProc/Bloom.h" 71 | #include "PostProc/BlurPS.h" 72 | #include "PostProc/SkyDome.h" 73 | #include "PostProc/SkyDomeProc.h" 74 | #include "PostProc/PostProcCS.h" 75 | #include "PostProc/ToneMapping.h" 76 | #include "PostProc/ToneMappingCS.h" 77 | #include "PostProc/ColorConversionPS.h" 78 | #include "PostProc/DownSamplePS.h" 79 | #include "PostProc/ShadowResolvePass.h" 80 | 81 | #include "ParallelSort.h" 82 | #include "UI.h" 83 | 84 | #include "Widgets/wireframe.h" 85 | 86 | 87 | using namespace CAULDRON_DX12; 88 | -------------------------------------------------------------------------------- /sample/src/VK/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project (${PROJECT_NAME}) 2 | 3 | include(${CMAKE_CURRENT_SOURCE_DIR}/../../common.cmake) 4 | 5 | add_compile_options(/MP) 6 | add_compile_definitions(FFX_CPP) 7 | 8 | set(sources 9 | sample.cpp 10 | sample.h 11 | stdafx.cpp 12 | stdafx.h 13 | Renderer.cpp 14 | Renderer.h 15 | UI.cpp 16 | UI.h 17 | ParallelSort.cpp 18 | ParallelSort.h 19 | dpiawarescaling.manifest) 20 | 21 | set(shader_sources 22 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/shaders/ParallelSortCS.hlsl 23 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/shaders/ParallelSortVerify.hlsl) 24 | 25 | set(fidelityfx_sources 26 | ${CMAKE_CURRENT_SOURCE_DIR}/../../../FFX-ParallelSort/FFX_ParallelSort.h) 27 | 28 | set(common_sources 29 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/FFXParallelSort.json 30 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/Validate4K.png 31 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/Validate2K.png 32 | ${CMAKE_CURRENT_SOURCE_DIR}/../Common/Validate1080p.png) 33 | 34 | copyCommand("${shader_sources}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK) 35 | copyCommand("${fidelityfx_sources}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK/FFX-ParallelSort) 36 | copyCommand("${common_sources}" ${CMAKE_HOME_DIRECTORY}/bin) 37 | 38 | source_group("Common" FILES ${common_sources}) 39 | source_group("Shaders" FILES ${shader_sources}) 40 | source_group("FidelityFX" FILES ${fidelityfx_sources}) 41 | source_group("Sources" FILES ${sources}) 42 | source_group("Icon" FILES ${icon_src}) # defined in top-level CMakeLists.txt 43 | 44 | # prevent VS from processing/compiling these files 45 | set_source_files_properties(${Shaders_src} PROPERTIES VS_TOOL_OVERRIDE "Text") 46 | 47 | add_executable(${PROJECT_NAME} WIN32 ${common_sources} ${shader_sources} ${sources} ${fidelityfx_sources} ${icon_src}) 48 | target_link_libraries(${PROJECT_NAME} LINK_PUBLIC Cauldron_VK ImGUI Vulkan::Vulkan) 49 | set_target_properties(${PROJECT_NAME} PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_HOME_DIRECTORY}/bin" DEBUG_POSTFIX "d") 50 | 51 | addManifest(${PROJECT_NAME}) -------------------------------------------------------------------------------- /sample/src/VK/ParallelSort.h: -------------------------------------------------------------------------------- 1 | // ParallelSort.h 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #pragma once 21 | #include "vulkan/vulkan.h" 22 | 23 | using namespace CAULDRON_VK; 24 | 25 | struct ParallelSortRenderCB // If you change this, also change struct ParallelSortRenderCB in ParallelSortVerify.hlsl 26 | { 27 | int32_t Width; 28 | int32_t Height; 29 | int32_t SortWidth; 30 | int32_t SortHeight; 31 | }; 32 | 33 | namespace CAULDRON_VK 34 | { 35 | class Device; 36 | class ResourceViewHeaps; 37 | class DynamicBufferRing; 38 | class StaticBufferPool; 39 | } 40 | 41 | class FFXParallelSort 42 | { 43 | public: 44 | void OnCreate(Device* pDevice, ResourceViewHeaps* pResourceViewHeaps, DynamicBufferRing* pConstantBufferRing, UploadHeap* pUploadHeap, SwapChain* pSwapChain); 45 | void OnDestroy(); 46 | 47 | void Sort(VkCommandBuffer commandList, bool isBenchmarking, float benchmarkTime); 48 | void CopySourceDataForFrame(VkCommandBuffer commandList); 49 | void DrawGui(); 50 | void DrawVisualization(VkCommandBuffer commandList, uint32_t RTWidth, uint32_t RTHeight); 51 | 52 | // Temp -- For command line overrides 53 | static void OverrideKeySet(int ResolutionOverride); 54 | static void OverridePayload(); 55 | // Temp -- For command line overrides 56 | 57 | private: 58 | void CreateKeyPayloadBuffers(); 59 | void CompileRadixPipeline(const char* shaderFile, const DefineList* defines, const char* entryPoint, VkPipeline& pPipeline); 60 | void BindConstantBuffer(VkDescriptorBufferInfo& GPUCB, VkDescriptorSet& DescriptorSet, uint32_t Binding = 0, uint32_t Count = 1); 61 | void BindUAVBuffer(VkBuffer* pBuffer, VkDescriptorSet& DescriptorSet, uint32_t Binding = 0, uint32_t Count = 1); 62 | 63 | // Temp -- For command line overrides 64 | static int KeySetOverride; 65 | static bool PayloadOverride; 66 | // Temp -- For command line overrides 67 | 68 | Device* m_pDevice = nullptr; 69 | UploadHeap* m_pUploadHeap = nullptr; 70 | ResourceViewHeaps* m_pResourceViewHeaps = nullptr; 71 | DynamicBufferRing* m_pConstantBufferRing = nullptr; 72 | uint32_t m_MaxNumThreadgroups = 800; 73 | 74 | uint32_t m_ScratchBufferSize; 75 | uint32_t m_ReducedScratchBufferSize; 76 | 77 | // Sample resources 78 | VkBuffer m_SrcKeyBuffers[3]; // 32 bit source key buffers (for 1080, 2K, 4K resolution) 79 | VmaAllocation m_SrcKeyBufferAllocations[3]; 80 | 81 | VkBuffer m_SrcPayloadBuffers; // 32 bit source payload buffers 82 | VmaAllocation m_SrcPayloadBufferAllocation; 83 | 84 | VkBuffer m_DstKeyBuffers[2]; // 32 bit destination key buffers (when not doing in place writes) 85 | VmaAllocation m_DstKeyBufferAllocations[2]; 86 | 87 | VkBuffer m_DstPayloadBuffers[2]; // 32 bit destination payload buffers (when not doing in place writes) 88 | VmaAllocation m_DstPayloadBufferAllocations[2]; 89 | 90 | VkBuffer m_FPSScratchBuffer; // Sort scratch buffer 91 | VmaAllocation m_FPSScratchBufferAllocation; 92 | 93 | VkBuffer m_FPSReducedScratchBuffer; // Sort reduced scratch buffer 94 | VmaAllocation m_FPSReducedScratchBufferAllocation; 95 | 96 | VkDescriptorSetLayout m_SortDescriptorSetLayoutConstants; 97 | VkDescriptorSet m_SortDescriptorSetConstants[3]; 98 | VkDescriptorSetLayout m_SortDescriptorSetLayoutConstantsIndirect; 99 | VkDescriptorSet m_SortDescriptorSetConstantsIndirect[3]; 100 | 101 | VkDescriptorSetLayout m_SortDescriptorSetLayoutInputOutputs; 102 | VkDescriptorSetLayout m_SortDescriptorSetLayoutScan; 103 | VkDescriptorSetLayout m_SortDescriptorSetLayoutScratch; 104 | VkDescriptorSetLayout m_SortDescriptorSetLayoutIndirect; 105 | 106 | VkDescriptorSet m_SortDescriptorSetInputOutput[2]; 107 | VkDescriptorSet m_SortDescriptorSetScanSets[2]; 108 | VkDescriptorSet m_SortDescriptorSetScratch; 109 | VkDescriptorSet m_SortDescriptorSetIndirect; 110 | VkPipelineLayout m_SortPipelineLayout; 111 | 112 | VkPipeline m_FPSCountPipeline; 113 | VkPipeline m_FPSCountReducePipeline; 114 | VkPipeline m_FPSScanPipeline; 115 | VkPipeline m_FPSScanAddPipeline; 116 | VkPipeline m_FPSScatterPipeline; 117 | VkPipeline m_FPSScatterPayloadPipeline; 118 | 119 | // Resources for indirect execution of algorithm 120 | VkBuffer m_IndirectKeyCounts; // Buffer to hold num keys for indirect dispatch 121 | VmaAllocation m_IndirectKeyCountsAllocation; 122 | VkBuffer m_IndirectConstantBuffer; // Buffer to hold radix sort constant buffer data for indirect dispatch 123 | VmaAllocation m_IndirectConstantBufferAllocation; 124 | VkBuffer m_IndirectCountScatterArgs; // Buffer to hold dispatch arguments used for Count/Scatter parts of the algorithm 125 | VmaAllocation m_IndirectCountScatterArgsAllocation; 126 | VkBuffer m_IndirectReduceScanArgs; // Buffer to hold dispatch arguments used for Reduce/Scan parts of the algorithm 127 | VmaAllocation m_IndirectReduceScanArgsAllocation; 128 | 129 | VkPipeline m_FPSIndirectSetupParametersPipeline; 130 | 131 | // Resources for verification render 132 | Texture m_Validate4KTexture; 133 | Texture m_Validate2KTexture; 134 | Texture m_Validate1080pTexture; 135 | VkImageView m_ValidationImageViews[3]; 136 | 137 | VkDescriptorSetLayout m_RenderDescriptorSetLayout0; 138 | VkDescriptorSet m_RenderDescriptorSet0; 139 | VkDescriptorSetLayout m_RenderDescriptorSetLayout1; 140 | VkDescriptorSet m_RenderDescriptorSet1[4]; 141 | VkDescriptorSetLayout m_RenderDescriptorSetLayout2; 142 | VkDescriptorSet m_RenderDescriptorSet2[3]; 143 | VkPipelineLayout m_RenderPipelineLayout; 144 | 145 | VkPipeline m_RenderResultVerificationPipeline; 146 | 147 | // Options for UI and test to run 148 | int m_UIResolutionSize = 0; 149 | bool m_UISortPayload = false; 150 | bool m_UIIndirectSort = false; 151 | int m_UIVisualOutput = 0; 152 | }; -------------------------------------------------------------------------------- /sample/src/VK/UI.cpp: -------------------------------------------------------------------------------- 1 | // AMD SampleVK sample code 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #include "stdafx.h" 21 | 22 | #include "UI.h" 23 | #include "Sample.h" 24 | #include "imgui.h" 25 | 26 | #include "base/FrameworkWindows.h" 27 | 28 | // To use the 'disabled UI state' functionality (ImGuiItemFlags_Disabled), include internal header 29 | // https://github.com/ocornut/imgui/issues/211#issuecomment-339241929 30 | #include "imgui_internal.h" 31 | static void DisableUIStateBegin(const bool& bEnable) 32 | { 33 | if (!bEnable) 34 | { 35 | ImGui::PushItemFlag(ImGuiItemFlags_Disabled, true); 36 | ImGui::PushStyleVar(ImGuiStyleVar_Alpha, ImGui::GetStyle().Alpha * 0.5f); 37 | } 38 | }; 39 | static void DisableUIStateEnd(const bool& bEnable) 40 | { 41 | if (!bEnable) 42 | { 43 | ImGui::PopItemFlag(); 44 | ImGui::PopStyleVar(); 45 | } 46 | }; 47 | 48 | void Sample::BuildUI() 49 | { 50 | ImGuiIO& io = ImGui::GetIO(); 51 | ImGuiStyle& style = ImGui::GetStyle(); 52 | style.FrameBorderSize = 1.0f; 53 | 54 | const uint32_t W = this->GetWidth(); 55 | const uint32_t H = this->GetHeight(); 56 | 57 | const uint32_t PROFILER_WINDOW_PADDING_X = 10; 58 | const uint32_t PROFILER_WINDOW_PADDING_Y = 10; 59 | const uint32_t PROFILER_WINDOW_SIZE_X = 330; 60 | const uint32_t PROFILER_WINDOW_SIZE_Y = 450; 61 | const uint32_t PROFILER_WINDOW_POS_X = W - PROFILER_WINDOW_PADDING_X - PROFILER_WINDOW_SIZE_X; 62 | const uint32_t PROFILER_WINDOW_POS_Y = PROFILER_WINDOW_PADDING_Y; 63 | 64 | const uint32_t CONTROLS_WINDOW_POS_X = 10; 65 | const uint32_t CONTROLS_WINDOW_POS_Y = 10; 66 | const uint32_t CONTROLW_WINDOW_SIZE_X = 350; 67 | const uint32_t CONTROLW_WINDOW_SIZE_Y = 780; // assuming > 720p 68 | 69 | // Render CONTROLS window 70 | // 71 | ImGui::SetNextWindowPos(ImVec2(CONTROLS_WINDOW_POS_X, CONTROLS_WINDOW_POS_Y), ImGuiCond_FirstUseEver); 72 | ImGui::SetNextWindowSize(ImVec2(CONTROLW_WINDOW_SIZE_X, CONTROLW_WINDOW_SIZE_Y), ImGuiCond_FirstUseEver); 73 | 74 | if (m_UIState.bShowControlsWindow) 75 | { 76 | ImGui::Begin("CONTROLS (F1)", &m_UIState.bShowControlsWindow); 77 | 78 | // Render UI for Radix Sort 79 | m_pRenderer->RenderParallelSortUI(); 80 | 81 | ImGui::Spacing(); 82 | ImGui::Spacing(); 83 | 84 | if (ImGui::CollapsingHeader("Presentation Mode", ImGuiTreeNodeFlags_DefaultOpen)) 85 | { 86 | const char* fullscreenModes[] = { "Windowed", "BorderlessFullscreen", "ExclusiveFullscreen" }; 87 | if (ImGui::Combo("Fullscreen Mode", (int*)&m_fullscreenMode, fullscreenModes, _countof(fullscreenModes))) 88 | { 89 | if (m_previousFullscreenMode != m_fullscreenMode) 90 | { 91 | HandleFullScreen(); 92 | m_previousFullscreenMode = m_fullscreenMode; 93 | } 94 | } 95 | } 96 | 97 | ImGui::End(); // CONTROLS 98 | } 99 | 100 | 101 | // Render PROFILER window 102 | // 103 | if (m_UIState.bShowProfilerWindow) 104 | { 105 | constexpr size_t NUM_FRAMES = 128; 106 | static float FRAME_TIME_ARRAY[NUM_FRAMES] = { 0 }; 107 | 108 | // track highest frame rate and determine the max value of the graph based on the measured highest value 109 | static float RECENT_HIGHEST_FRAME_TIME = 0.0f; 110 | constexpr int FRAME_TIME_GRAPH_MAX_FPS[] = { 800, 240, 120, 90, 60, 45, 30, 15, 10, 5, 4, 3, 2, 1 }; 111 | static float FRAME_TIME_GRAPH_MAX_VALUES[_countof(FRAME_TIME_GRAPH_MAX_FPS)] = { 0 }; // us 112 | for (int i = 0; i < _countof(FRAME_TIME_GRAPH_MAX_FPS); ++i) { FRAME_TIME_GRAPH_MAX_VALUES[i] = 1000000.f / FRAME_TIME_GRAPH_MAX_FPS[i]; } 113 | 114 | //scrolling data and average FPS computing 115 | const std::vector& timeStamps = m_pRenderer->GetTimingValues(); 116 | const bool bTimeStampsAvailable = timeStamps.size() > 0; 117 | if (bTimeStampsAvailable) 118 | { 119 | RECENT_HIGHEST_FRAME_TIME = 0; 120 | FRAME_TIME_ARRAY[NUM_FRAMES - 1] = timeStamps.back().m_microseconds; 121 | for (uint32_t i = 0; i < NUM_FRAMES - 1; i++) 122 | { 123 | FRAME_TIME_ARRAY[i] = FRAME_TIME_ARRAY[i + 1]; 124 | } 125 | RECENT_HIGHEST_FRAME_TIME = max(RECENT_HIGHEST_FRAME_TIME, FRAME_TIME_ARRAY[NUM_FRAMES - 1]); 126 | } 127 | const float& frameTime_us = FRAME_TIME_ARRAY[NUM_FRAMES - 1]; 128 | const float frameTime_ms = frameTime_us * 0.001f; 129 | const int fps = bTimeStampsAvailable ? static_cast(1000000.0f / frameTime_us) : 0; 130 | 131 | // UI 132 | ImGui::SetNextWindowPos(ImVec2((float)PROFILER_WINDOW_POS_X, (float)PROFILER_WINDOW_POS_Y), ImGuiCond_FirstUseEver); 133 | ImGui::SetNextWindowSize(ImVec2(PROFILER_WINDOW_SIZE_X, PROFILER_WINDOW_SIZE_Y), ImGuiCond_FirstUseEver); 134 | ImGui::Begin("PROFILER (F2)", &m_UIState.bShowProfilerWindow); 135 | 136 | ImGui::Text("Resolution : %ix%i", m_Width, m_Height); 137 | ImGui::Text("API : %s", m_systemInfo.mGfxAPI.c_str()); 138 | ImGui::Text("GPU : %s", m_systemInfo.mGPUName.c_str()); 139 | ImGui::Text("CPU : %s", m_systemInfo.mCPUName.c_str()); 140 | ImGui::Text("FPS : %d (%.2f ms)", fps, frameTime_ms); 141 | 142 | if (ImGui::CollapsingHeader("GPU Timings", ImGuiTreeNodeFlags_DefaultOpen)) 143 | { 144 | std::string msOrUsButtonText = m_UIState.bShowMilliseconds ? "Switch to microseconds(us)" : "Switch to milliseconds(ms)"; 145 | if (ImGui::Button(msOrUsButtonText.c_str())) { 146 | m_UIState.bShowMilliseconds = !m_UIState.bShowMilliseconds; 147 | } 148 | ImGui::Spacing(); 149 | 150 | // find the index of the FrameTimeGraphMaxValue as the next higher-than-recent-highest-frame-time in the pre-determined value list 151 | size_t iFrameTimeGraphMaxValue = 0; 152 | for (int i = 0; i < _countof(FRAME_TIME_GRAPH_MAX_VALUES); ++i) 153 | { 154 | if (RECENT_HIGHEST_FRAME_TIME < FRAME_TIME_GRAPH_MAX_VALUES[i]) // FRAME_TIME_GRAPH_MAX_VALUES are in increasing order 155 | { 156 | iFrameTimeGraphMaxValue = min(_countof(FRAME_TIME_GRAPH_MAX_VALUES) - 1, i + 1); 157 | break; 158 | } 159 | } 160 | ImGui::PlotLines("", FRAME_TIME_ARRAY, NUM_FRAMES, 0, "GPU frame time (us)", 0.0f, FRAME_TIME_GRAPH_MAX_VALUES[iFrameTimeGraphMaxValue], ImVec2(0, 80)); 161 | 162 | for (uint32_t i = 0; i < timeStamps.size(); i++) 163 | { 164 | float value = m_UIState.bShowMilliseconds ? timeStamps[i].m_microseconds / 1000.0f : timeStamps[i].m_microseconds; 165 | const char* pStrUnit = m_UIState.bShowMilliseconds ? "ms" : "us"; 166 | ImGui::Text("%-18s: %7.2f %s", timeStamps[i].m_label.c_str(), value, pStrUnit); 167 | } 168 | } 169 | ImGui::End(); // PROFILER 170 | } 171 | } 172 | 173 | void UIState::Initialize() 174 | { 175 | // init GUI state 176 | this->bShowControlsWindow = true; 177 | this->bShowProfilerWindow = true; 178 | } 179 | -------------------------------------------------------------------------------- /sample/src/VK/UI.h: -------------------------------------------------------------------------------- 1 | // AMD SampleVK sample code 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #pragma once 21 | 22 | #include 23 | 24 | struct UIState 25 | { 26 | // 27 | // WINDOW MANAGEMENT 28 | // 29 | bool bShowControlsWindow; 30 | bool bShowProfilerWindow; 31 | 32 | // 33 | // PROFILER CONTROLS 34 | // 35 | bool bShowMilliseconds; 36 | 37 | // ----------------------------------------------- 38 | 39 | void Initialize(); 40 | }; -------------------------------------------------------------------------------- /sample/src/VK/dpiawarescaling.manifest: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | true/PM 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /sample/src/VK/renderer.cpp: -------------------------------------------------------------------------------- 1 | // samplerenderer.cpp 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #include "Renderer.h" 21 | #include "UI.h" 22 | 23 | //-------------------------------------------------------------------------------------- 24 | // 25 | // OnCreate 26 | // 27 | //-------------------------------------------------------------------------------------- 28 | void Renderer::OnCreate(Device* pDevice, SwapChain *pSwapChain, float FontSize) 29 | { 30 | m_pDevice = pDevice; 31 | 32 | // Initialize helpers 33 | 34 | // Create all the heaps for the resources views 35 | const uint32_t cbvDescriptorCount = 4000; 36 | const uint32_t srvDescriptorCount = 8000; 37 | const uint32_t uavDescriptorCount = 10; 38 | const uint32_t samplerDescriptorCount = 20; 39 | m_ResourceViewHeaps.OnCreate(pDevice, cbvDescriptorCount, srvDescriptorCount, uavDescriptorCount, samplerDescriptorCount); 40 | 41 | // Create a commandlist ring for the Direct queue 42 | uint32_t commandListsPerBackBuffer = 8; 43 | m_CommandListRing.OnCreate(pDevice, BackBufferCount, commandListsPerBackBuffer); 44 | 45 | // Create a 'dynamic' constant buffer 46 | const uint32_t constantBuffersMemSize = 20 * 1024 * 1024; 47 | m_ConstantBufferRing.OnCreate(pDevice, BackBufferCount, constantBuffersMemSize, "Uniforms"); 48 | 49 | // Create a 'static' pool for vertices, indices and constant buffers 50 | const uint32_t staticGeometryMemSize = (2 * 128) * 1024 * 1024; 51 | m_VidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, true, "StaticGeom"); 52 | 53 | // initialize the GPU time stamps module 54 | m_GPUTimer.OnCreate(pDevice, BackBufferCount); 55 | 56 | // Quick helper to upload resources, it has it's own commandList and uses sub-allocation. 57 | const uint32_t uploadHeapMemSize = 100 * 1024 * 1024; 58 | m_UploadHeap.OnCreate(pDevice, uploadHeapMemSize); // initialize an upload heap (uses sub-allocation for faster results) 59 | 60 | // Initialize UI rendering resources 61 | m_ImGUI.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_UploadHeap, &m_ConstantBufferRing, FontSize); 62 | 63 | // Create FFX Parallel Sort pass 64 | m_ParallelSort.OnCreate(pDevice, &m_ResourceViewHeaps, &m_ConstantBufferRing, &m_UploadHeap, pSwapChain); 65 | 66 | // Make sure upload heap has finished uploading before continuing 67 | m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); 68 | m_UploadHeap.FlushAndFinish(); 69 | } 70 | 71 | //-------------------------------------------------------------------------------------- 72 | // 73 | // OnDestroy 74 | // 75 | //-------------------------------------------------------------------------------------- 76 | void Renderer::OnDestroy() 77 | { 78 | m_ParallelSort.OnDestroy(); 79 | m_ImGUI.OnDestroy(); 80 | 81 | m_UploadHeap.OnDestroy(); 82 | m_GPUTimer.OnDestroy(); 83 | m_VidMemBufferPool.OnDestroy(); 84 | m_ConstantBufferRing.OnDestroy(); 85 | m_ResourceViewHeaps.OnDestroy(); 86 | m_CommandListRing.OnDestroy(); 87 | } 88 | 89 | //-------------------------------------------------------------------------------------- 90 | // 91 | // OnCreateWindowSizeDependentResources 92 | // 93 | //-------------------------------------------------------------------------------------- 94 | void Renderer::OnCreateWindowSizeDependentResources(SwapChain* pSwapChain, uint32_t Width, uint32_t Height) 95 | { 96 | m_Width = Width; 97 | m_Height = Height; 98 | 99 | // Set the viewport & scissors rect 100 | m_Viewport.x = 0; 101 | m_Viewport.y = (float)Height; 102 | m_Viewport.width = (float)Width; 103 | m_Viewport.height = -(float)(Height); 104 | m_Viewport.minDepth = (float)0.0f; 105 | m_Viewport.maxDepth = (float)1.0f; 106 | m_RectScissor.extent.width = Width; 107 | m_RectScissor.extent.height = Height; 108 | m_RectScissor.offset.x = 0; 109 | m_RectScissor.offset.y = 0; 110 | } 111 | 112 | //-------------------------------------------------------------------------------------- 113 | // 114 | // OnDestroyWindowSizeDependentResources 115 | // 116 | //-------------------------------------------------------------------------------------- 117 | void Renderer::OnDestroyWindowSizeDependentResources() 118 | { 119 | } 120 | 121 | void Renderer::OnUpdateDisplayDependentResources(SwapChain* pSwapChain) 122 | { 123 | // Update pipelines in case the format of the RTs changed (this happens when going HDR) 124 | m_ImGUI.UpdatePipeline(pSwapChain->GetRenderPass()); 125 | } 126 | 127 | //-------------------------------------------------------------------------------------- 128 | // 129 | // OnRender 130 | // 131 | //-------------------------------------------------------------------------------------- 132 | void Renderer::OnRender(const UIState* pState, SwapChain* pSwapChain, float Time, bool bIsBenchmarking) 133 | { 134 | // Let our resource managers do some house keeping 135 | m_ConstantBufferRing.OnBeginFrame(); 136 | 137 | // command buffer calls 138 | VkCommandBuffer cmdBuf1 = m_CommandListRing.GetNewCommandList(); 139 | 140 | { 141 | VkCommandBufferBeginInfo cmd_buf_info; 142 | cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 143 | cmd_buf_info.pNext = NULL; 144 | cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; 145 | cmd_buf_info.pInheritanceInfo = NULL; 146 | VkResult res = vkBeginCommandBuffer(cmdBuf1, &cmd_buf_info); 147 | assert(res == VK_SUCCESS); 148 | } 149 | 150 | m_GPUTimer.OnBeginFrame(cmdBuf1, &m_TimeStamps); 151 | 152 | // Copy the data to sort for the frame (don't time this -- external to process) 153 | m_ParallelSort.CopySourceDataForFrame(cmdBuf1); 154 | m_GPUTimer.GetTimeStamp(cmdBuf1, "Begin Frame"); 155 | 156 | // Do sort tests ----------------------------------------------------------------------- 157 | m_ParallelSort.Sort(cmdBuf1, bIsBenchmarking, Time); 158 | m_GPUTimer.GetTimeStamp(cmdBuf1, "FFX Parallel Sort"); 159 | 160 | // submit command buffer #1 161 | { 162 | VkResult res = vkEndCommandBuffer(cmdBuf1); 163 | assert(res == VK_SUCCESS); 164 | 165 | VkSubmitInfo submit_info; 166 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 167 | submit_info.pNext = NULL; 168 | submit_info.waitSemaphoreCount = 0; 169 | submit_info.pWaitSemaphores = NULL; 170 | submit_info.pWaitDstStageMask = NULL; 171 | submit_info.commandBufferCount = 1; 172 | submit_info.pCommandBuffers = &cmdBuf1; 173 | submit_info.signalSemaphoreCount = 0; 174 | submit_info.pSignalSemaphores = NULL; 175 | res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE); 176 | assert(res == VK_SUCCESS); 177 | } 178 | 179 | // Wait for swapchain (we are going to render to it) ----------------------------------- 180 | int imageIndex = pSwapChain->WaitForSwapChain(); 181 | 182 | m_CommandListRing.OnBeginFrame(); 183 | 184 | VkCommandBuffer cmdBuf2 = m_CommandListRing.GetNewCommandList(); 185 | 186 | { 187 | VkCommandBufferBeginInfo cmd_buf_info; 188 | cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; 189 | cmd_buf_info.pNext = NULL; 190 | cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; 191 | cmd_buf_info.pInheritanceInfo = NULL; 192 | VkResult res = vkBeginCommandBuffer(cmdBuf2, &cmd_buf_info); 193 | assert(res == VK_SUCCESS); 194 | } 195 | 196 | SetPerfMarkerBegin(cmdBuf2, "rendering to swap chain"); 197 | 198 | // prepare render pass 199 | { 200 | VkRenderPassBeginInfo rp_begin = {}; 201 | rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; 202 | rp_begin.pNext = NULL; 203 | rp_begin.renderPass = pSwapChain->GetRenderPass(); 204 | rp_begin.framebuffer = pSwapChain->GetFramebuffer(imageIndex); 205 | rp_begin.renderArea.offset.x = 0; 206 | rp_begin.renderArea.offset.y = 0; 207 | rp_begin.renderArea.extent.width = m_Width; 208 | rp_begin.renderArea.extent.height = m_Height; 209 | rp_begin.clearValueCount = 0; 210 | rp_begin.pClearValues = nullptr; 211 | vkCmdBeginRenderPass(cmdBuf2, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); 212 | 213 | VkClearValue clearColor; 214 | clearColor.color = { 0.f, 0.f, 0.f, 0.f }; 215 | VkClearAttachment clearAttachment; 216 | clearAttachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 217 | clearAttachment.colorAttachment = 0; 218 | clearAttachment.clearValue = clearColor; 219 | VkClearRect clearRect; 220 | clearRect.baseArrayLayer = 0; 221 | clearRect.layerCount = 1; 222 | clearRect.rect.offset = { 0, 0 }; 223 | clearRect.rect.extent.width = m_Width; 224 | clearRect.rect.extent.height = m_Height; 225 | vkCmdClearAttachments(cmdBuf2, 1, &clearAttachment, 1, &clearRect); 226 | } 227 | 228 | vkCmdSetScissor(cmdBuf2, 0, 1, &m_RectScissor); 229 | vkCmdSetViewport(cmdBuf2, 0, 1, &m_Viewport); 230 | 231 | // Render sort source/results over everything except the HUD -------------------------- 232 | m_ParallelSort.DrawVisualization(cmdBuf2, m_Width, m_Height); 233 | 234 | // Render HUD 235 | m_ImGUI.Draw(cmdBuf2); 236 | m_GPUTimer.GetTimeStamp(cmdBuf2, "ImGUI Rendering"); 237 | 238 | m_GPUTimer.OnEndFrame(); 239 | 240 | vkCmdEndRenderPass(cmdBuf2); 241 | 242 | SetPerfMarkerEnd(cmdBuf2); 243 | 244 | // Close & Submit the command list ---------------------------------------------------- 245 | { 246 | VkResult res = vkEndCommandBuffer(cmdBuf2); 247 | assert(res == VK_SUCCESS); 248 | 249 | VkSemaphore ImageAvailableSemaphore; 250 | VkSemaphore RenderFinishedSemaphores; 251 | VkFence CmdBufExecutedFences; 252 | pSwapChain->GetSemaphores(&ImageAvailableSemaphore, &RenderFinishedSemaphores, &CmdBufExecutedFences); 253 | 254 | VkPipelineStageFlags submitWaitStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; 255 | VkSubmitInfo submit_info2; 256 | submit_info2.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; 257 | submit_info2.pNext = NULL; 258 | submit_info2.waitSemaphoreCount = 1; 259 | submit_info2.pWaitSemaphores = &ImageAvailableSemaphore; 260 | submit_info2.pWaitDstStageMask = &submitWaitStage; 261 | submit_info2.commandBufferCount = 1; 262 | submit_info2.pCommandBuffers = &cmdBuf2; 263 | submit_info2.signalSemaphoreCount = 1; 264 | submit_info2.pSignalSemaphores = &RenderFinishedSemaphores; 265 | 266 | res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submit_info2, CmdBufExecutedFences); 267 | assert(res == VK_SUCCESS); 268 | } 269 | } 270 | -------------------------------------------------------------------------------- /sample/src/VK/renderer.h: -------------------------------------------------------------------------------- 1 | // samplerenderer.h 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #pragma once 21 | 22 | #include "stdafx.h" 23 | #include "PostProc/MagnifierPS.h" 24 | 25 | struct UIState; 26 | 27 | // We are queuing (BackBufferCount + 0.5) frames, so we need to triple buffer the resources that get modified each frame 28 | static const int BackBufferCount = 3; 29 | 30 | #define USE_SHADOWMASK false 31 | 32 | using namespace CAULDRON_VK; 33 | 34 | // 35 | // This class deals with the GPU side of the sample. 36 | // 37 | class Renderer 38 | { 39 | public: 40 | 41 | void OnCreate(Device* pDevice, SwapChain* pSwapChain, float FontSize); 42 | void OnDestroy(); 43 | 44 | void OnCreateWindowSizeDependentResources(SwapChain* pSwapChain, uint32_t Width, uint32_t Height); 45 | void OnDestroyWindowSizeDependentResources(); 46 | 47 | void OnUpdateDisplayDependentResources(SwapChain* pSwapChain); 48 | 49 | const std::vector& GetTimingValues() const { return m_TimeStamps; } 50 | 51 | void OnRender(const UIState* pState, SwapChain* pSwapChain, float Time, bool bIsBenchmarking); 52 | 53 | void RenderParallelSortUI() { m_ParallelSort.DrawGui(); } 54 | 55 | private: 56 | Device* m_pDevice; 57 | 58 | uint32_t m_Width; 59 | uint32_t m_Height; 60 | VkRect2D m_RectScissor; 61 | VkViewport m_Viewport; 62 | 63 | // Initialize helper classes 64 | ResourceViewHeaps m_ResourceViewHeaps; 65 | UploadHeap m_UploadHeap; 66 | DynamicBufferRing m_ConstantBufferRing; 67 | StaticBufferPool m_VidMemBufferPool; 68 | CommandListRing m_CommandListRing; 69 | GPUTimestamps m_GPUTimer; 70 | 71 | FFXParallelSort m_ParallelSort; 72 | 73 | // GUI 74 | ImGUI m_ImGUI; 75 | 76 | // For benchmarking 77 | std::vector m_TimeStamps; 78 | }; 79 | -------------------------------------------------------------------------------- /sample/src/VK/sample.cpp: -------------------------------------------------------------------------------- 1 | // sample.cpp 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #include "stdafx.h" 21 | #include "Sample.h" 22 | 23 | #include 24 | #include 25 | 26 | //-------------------------------------------------------------------------------------- 27 | // 28 | // OnParseCommandLine 29 | // 30 | //-------------------------------------------------------------------------------------- 31 | void Sample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight) 32 | { 33 | // set some default values 34 | *pWidth = 1920; 35 | *pHeight = 1080; 36 | m_VsyncEnabled = false; 37 | m_bIsBenchmarking = false; 38 | m_fontSize = 13.f; // default value overridden by a json file if available 39 | m_isCpuValidationLayerEnabled = false; 40 | m_isGpuValidationLayerEnabled = false; 41 | m_stablePowerState = false; 42 | 43 | // Read globals 44 | auto process = [&](json jData) 45 | { 46 | *pWidth = jData.value("width", *pWidth); 47 | *pHeight = jData.value("height", *pHeight); 48 | m_fullscreenMode = jData.value("presentationMode", m_fullscreenMode); 49 | m_isCpuValidationLayerEnabled = jData.value("CpuValidationLayerEnabled", m_isCpuValidationLayerEnabled); 50 | m_isGpuValidationLayerEnabled = jData.value("GpuValidationLayerEnabled", m_isGpuValidationLayerEnabled); 51 | m_VsyncEnabled = jData.value("vsync", m_VsyncEnabled); 52 | m_bIsBenchmarking = jData.value("benchmark", m_bIsBenchmarking); 53 | m_stablePowerState = jData.value("stablePowerState", m_stablePowerState); 54 | m_fontSize = jData.value("fontsize", m_fontSize); 55 | }; 56 | 57 | // Read config file (and override values from commandline if so) 58 | { 59 | std::ifstream f("FFXParallelSort.json"); 60 | if (!f) 61 | { 62 | MessageBox(nullptr, "Config file not found!\n", "Cauldron Panic!", MB_ICONERROR); 63 | exit(0); 64 | } 65 | 66 | try 67 | { 68 | f >> m_jsonConfigFile; 69 | } 70 | catch (json::parse_error) 71 | { 72 | MessageBox(nullptr, "Error parsing FFXParallelSort.json!\n", "Cauldron Panic!", MB_ICONERROR); 73 | exit(0); 74 | } 75 | } 76 | 77 | json globals = m_jsonConfigFile["globals"]; 78 | process(globals); 79 | 80 | // Process the command line to see if we need to do anything for the sample (i.e. benchmarking, setup certain settings, etc.) 81 | std::string charString = lpCmdLine; 82 | if (!charString.compare("")) 83 | return; // No parameters 84 | 85 | // Need to first convert the char string to a wide character set 86 | std::wstring wideString; 87 | wideString.assign(charString.begin(), charString.end()); 88 | 89 | LPWSTR* ArgList; 90 | int ArgCount, CurrentArg(0); 91 | ArgList = CommandLineToArgvW(wideString.c_str(), &ArgCount); 92 | while (CurrentArg < ArgCount) 93 | { 94 | wideString = ArgList[CurrentArg]; 95 | 96 | // Enable benchmarking 97 | if (!wideString.compare(L"-benchmark")) 98 | { 99 | m_bIsBenchmarking = true; 100 | ++CurrentArg; 101 | } 102 | 103 | // Set num keys to sort 104 | else if (!wideString.compare(L"-keyset")) 105 | { 106 | assert(ArgCount > CurrentArg + 1 && "Incorrect usage of -keyset <0-2>"); 107 | // Get the parameter 108 | int keySet = std::stoi(ArgList[CurrentArg + 1]); 109 | assert(keySet >= 0 && keySet < 3 && "Incorrect usage of -keyset <0-2>"); 110 | FFXParallelSort::OverrideKeySet(keySet); 111 | CurrentArg += 2; 112 | } 113 | 114 | // Set payload sort 115 | else if (!wideString.compare(L"-payload")) 116 | { 117 | FFXParallelSort::OverridePayload(); 118 | ++CurrentArg; 119 | } 120 | 121 | else 122 | { 123 | assert(false && "Unsupported command line parameter"); 124 | exit(0); 125 | } 126 | } 127 | } 128 | 129 | //-------------------------------------------------------------------------------------- 130 | // 131 | // OnCreate 132 | // 133 | //-------------------------------------------------------------------------------------- 134 | void Sample::OnCreate() 135 | { 136 | // Init the shader compiler 137 | InitDirectXCompiler(); 138 | CreateShaderCache(); 139 | 140 | // Create a instance of the renderer and initialize it, we need to do that for each GPU 141 | m_pRenderer = new Renderer(); 142 | m_pRenderer->OnCreate(&m_device, &m_swapChain, m_fontSize); 143 | 144 | // set benchmarking state if enabled 145 | if (m_bIsBenchmarking) 146 | { 147 | std::string deviceName; 148 | std::string driverVersion; 149 | m_device.GetDeviceInfo(&deviceName, &driverVersion); 150 | BenchmarkConfig(m_jsonConfigFile["BenchmarkSettings"], -1, nullptr, deviceName, driverVersion); 151 | } 152 | 153 | // Init GUI (non gfx stuff) 154 | ImGUI_Init((void*)m_windowHwnd); 155 | m_UIState.Initialize(); 156 | 157 | OnResize(); 158 | OnUpdateDisplay(); 159 | } 160 | 161 | //-------------------------------------------------------------------------------------- 162 | // 163 | // OnDestroy 164 | // 165 | //-------------------------------------------------------------------------------------- 166 | void Sample::OnDestroy() 167 | { 168 | ImGUI_Shutdown(); 169 | 170 | m_device.GPUFlush(); 171 | 172 | m_pRenderer->OnDestroyWindowSizeDependentResources(); 173 | m_pRenderer->OnDestroy(); 174 | 175 | delete m_pRenderer; 176 | 177 | //shut down the shader compiler 178 | DestroyShaderCache(&m_device); 179 | } 180 | 181 | //-------------------------------------------------------------------------------------- 182 | // 183 | // OnEvent, win32 sends us events and we forward them to ImGUI 184 | // 185 | //-------------------------------------------------------------------------------------- 186 | static void ToggleBool(bool& b) { b = !b; } 187 | bool Sample::OnEvent(MSG msg) 188 | { 189 | if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) 190 | return true; 191 | 192 | // handle function keys (F1, F2...) here, rest of the input is handled 193 | // by imGUI later in HandleInput() function 194 | const WPARAM& KeyPressed = msg.wParam; 195 | switch (msg.message) 196 | { 197 | case WM_KEYUP: 198 | case WM_SYSKEYUP: 199 | /* WINDOW TOGGLES */ 200 | if (KeyPressed == VK_F1) m_UIState.bShowControlsWindow ^= 1; 201 | if (KeyPressed == VK_F2) m_UIState.bShowProfilerWindow ^= 1; 202 | break; 203 | } 204 | 205 | return true; 206 | } 207 | 208 | //-------------------------------------------------------------------------------------- 209 | // 210 | // OnResize 211 | // 212 | //-------------------------------------------------------------------------------------- 213 | void Sample::OnResize() 214 | { 215 | // Destroy resources (if we are not minimized) 216 | if (m_Width && m_Height && m_pRenderer) 217 | { 218 | m_pRenderer->OnDestroyWindowSizeDependentResources(); 219 | m_pRenderer->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); 220 | } 221 | } 222 | 223 | //-------------------------------------------------------------------------------------- 224 | // 225 | // UpdateDisplay 226 | // 227 | //-------------------------------------------------------------------------------------- 228 | void Sample::OnUpdateDisplay() 229 | { 230 | // Destroy resources (if we are not minimized) 231 | if (m_pRenderer) 232 | { 233 | m_pRenderer->OnUpdateDisplayDependentResources(&m_swapChain); 234 | } 235 | } 236 | 237 | //-------------------------------------------------------------------------------------- 238 | // 239 | // OnUpdate 240 | // 241 | //-------------------------------------------------------------------------------------- 242 | void Sample::OnUpdate() 243 | { 244 | ImGuiIO& io = ImGui::GetIO(); 245 | 246 | //If the mouse was not used by the GUI then it's for the camera 247 | if (io.WantCaptureMouse) 248 | { 249 | io.MouseDelta.x = 0; 250 | io.MouseDelta.y = 0; 251 | io.MouseWheel = 0; 252 | } 253 | 254 | // Keyboard & Mouse 255 | HandleInput(io); 256 | 257 | // Increase time 258 | m_time += (float)m_deltaTime / 1000.0f; // time in seconds 259 | } 260 | 261 | void Sample::HandleInput(const ImGuiIO& io) 262 | { 263 | auto fnIsKeyTriggered = [&io](char key) { return io.KeysDown[key] && io.KeysDownDuration[key] == 0.0f; }; 264 | 265 | // Handle Keyboard/Mouse input here 266 | } 267 | 268 | //-------------------------------------------------------------------------------------- 269 | // 270 | // OnRender, updates the state from the UI, animates, transforms and renders the scene 271 | // 272 | //-------------------------------------------------------------------------------------- 273 | void Sample::OnRender() 274 | { 275 | // Do any start of frame necessities 276 | BeginFrame(); 277 | 278 | ImGUI_UpdateIO(); 279 | ImGui::NewFrame(); 280 | 281 | if (m_bIsBenchmarking) 282 | { 283 | // Benchmarking takes control of the time, and exits the app when the animation is done 284 | std::vector timeStamps = m_pRenderer->GetTimingValues(); 285 | std::string Filename; 286 | m_time = BenchmarkLoop(timeStamps, nullptr, Filename); 287 | } 288 | else 289 | { 290 | // Build the UI. Note that the rendering of the UI happens later. 291 | BuildUI(); 292 | OnUpdate(); 293 | } 294 | 295 | // Do Render frame using AFR 296 | m_pRenderer->OnRender(&m_UIState, &m_swapChain, m_time, m_bIsBenchmarking); 297 | 298 | // Framework will handle Present and some other end of frame logic 299 | EndFrame(); 300 | } 301 | 302 | 303 | //-------------------------------------------------------------------------------------- 304 | // 305 | // WinMain 306 | // 307 | //-------------------------------------------------------------------------------------- 308 | int WINAPI WinMain(HINSTANCE hInstance, 309 | HINSTANCE hPrevInstance, 310 | LPSTR lpCmdLine, 311 | int nCmdShow) 312 | { 313 | LPCSTR Name = "FidelityFX Parallel Sort VK v1.1"; 314 | 315 | // create new DX sample 316 | return RunFramework(hInstance, lpCmdLine, nCmdShow, new Sample(Name)); 317 | } 318 | -------------------------------------------------------------------------------- /sample/src/VK/sample.h: -------------------------------------------------------------------------------- 1 | // sample.h 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #pragma once 21 | 22 | #include "base/FrameworkWindows.h" 23 | #include "Renderer.h" 24 | #include "UI.h" 25 | 26 | // This class encapsulates the 'application' and is responsible for handling window events and scene updates (simulation) 27 | // Rendering and rendering resource management is done by the Renderer class 28 | 29 | class Sample : public FrameworkWindows 30 | { 31 | public: 32 | Sample(LPCSTR name) : FrameworkWindows(name) { m_time = 0.f; } 33 | void OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight) override; 34 | void OnCreate() override; 35 | void OnDestroy() override; 36 | void OnRender() override; 37 | bool OnEvent(MSG msg) override; 38 | void OnResize() override; 39 | void OnUpdateDisplay() override; 40 | 41 | void BuildUI(); 42 | void OnUpdate(); 43 | void HandleInput(const ImGuiIO& io); 44 | 45 | private: 46 | // Benchmarking support 47 | bool m_bIsBenchmarking; 48 | float m_time; 49 | 50 | Renderer* m_pRenderer = NULL; 51 | UIState m_UIState; 52 | float m_fontSize; 53 | 54 | // json config file 55 | json m_jsonConfigFile; 56 | }; 57 | -------------------------------------------------------------------------------- /sample/src/VK/stdafx.cpp: -------------------------------------------------------------------------------- 1 | // stdafx.cpp 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #include "stdafx.h" 21 | 22 | // TODO: reference any additional headers you need in STDAFX.H 23 | // and not in this file 24 | -------------------------------------------------------------------------------- /sample/src/VK/stdafx.h: -------------------------------------------------------------------------------- 1 | // stdafx.h 2 | // 3 | // Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files(the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions : 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE 15 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 18 | // THE SOFTWARE. 19 | 20 | #pragma once 21 | 22 | #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers 23 | // Windows Header Files: 24 | #include 25 | #include 26 | 27 | // C RunTime Header Files 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include "vulkan/vulkan.h" 35 | 36 | // we are using DirectXMath 37 | #include 38 | using namespace DirectX; 39 | 40 | // TODO: reference additional headers your program requires here 41 | #include "Base/Imgui.h" 42 | #include "Base/ImguiHelper.h" 43 | #include "Base/Helper.h" 44 | #include "Base/Device.h" 45 | #include "Base/FrameworkWindows.h" 46 | #include "Base/Texture.h" 47 | #include "Base/SwapChain.h" 48 | #include "Base/UploadHeap.h" 49 | #include "Base/GPUTimestamps.h" 50 | #include "Base/CommandListRing.h" 51 | #include "Base/StaticBufferPool.h" 52 | #include "Base/DynamicBufferRing.h" 53 | #include "Base/ResourceViewHeaps.h" 54 | #include "Base/ShaderCompilerHelper.h" 55 | 56 | #include "GLTF/GltfPbrPass.h" 57 | #include "GLTF/GltfBBoxPass.h" 58 | #include "GLTF/GltfDepthPass.h" 59 | #include "GLTF/GltfMotionVectorsPass.h" 60 | 61 | #include "Misc/Misc.h" 62 | #include "Misc/Error.h" 63 | #include "Misc/Camera.h" 64 | 65 | #include "PostProc/TAA.h" 66 | #include "PostProc/Bloom.h" 67 | #include "PostProc/BlurPS.h" 68 | #include "PostProc/SkyDome.h" 69 | #include "PostProc/SkyDomeProc.h" 70 | #include "PostProc/PostProcCS.h" 71 | #include "PostProc/ToneMapping.h" 72 | #include "PostProc/ToneMappingCS.h" 73 | #include "PostProc/ColorConversionPS.h" 74 | #include "PostProc/DownSamplePS.h" 75 | 76 | #include "ParallelSort.h" 77 | 78 | #include "Widgets/wireframe.h" 79 | 80 | 81 | using namespace CAULDRON_VK; 82 | --------------------------------------------------------------------------------