├── .gitignore ├── Bin ├── CSDownSample.cso ├── CSInit.cso ├── CSJacobi.cso ├── CSPrefixSum1.cso ├── CSPrefixSum1_rw.cso ├── CSPrefixSum1i.cso ├── CSPrefixSum1i_rw.cso ├── CSPrefixSum1u.cso ├── CSPrefixSum1u_rw.cso ├── CSPrefixSum2.cso ├── CSPrefixSum2i.cso ├── CSPrefixSum2u.cso ├── CSUpSample.cso ├── CSpAp.cso ├── CSpUpdate.cso ├── CSxUpdate.cso ├── PoissonSolver.cmd └── PoissonSolver.exe ├── PoissonSolver.sln └── PoissonSolver ├── CHConjGrad.hlsli ├── CSAx.hlsli ├── CSConjGrad.hlsli ├── CSDownSample.hlsl ├── CSInit.hlsl ├── CSJacobi.hlsl ├── CSPrefixSum1.hlsl ├── CSPrefixSum1.hlsli ├── CSPrefixSum1_rw.hlsl ├── CSPrefixSum1i.hlsl ├── CSPrefixSum1i_rw.hlsl ├── CSPrefixSum1u.hlsl ├── CSPrefixSum1u_rw.hlsl ├── CSPrefixSum2.hlsl ├── CSPrefixSum2.hlsli ├── CSPrefixSum2i.hlsl ├── CSPrefixSum2u.hlsl ├── CSScanBlockBuffer.hlsli ├── CSUpSample.hlsl ├── CSpAp.hlsl ├── CSpUpdate.hlsl ├── CSxUpdate.hlsl ├── ConjGrad.cpp ├── ConjGrad.h ├── CreateBuffers.cpp ├── CreateBuffers.h ├── Jacobi.cpp ├── Jacobi.h ├── Multigrid.cpp ├── Multigrid.h ├── PoissonSolver.cpp ├── PoissonSolver.vcxproj ├── PoissonSolver.vcxproj.filters ├── PoissonSolver.vcxproj.user ├── PrefixSum.cpp ├── PrefixSum.h └── SharedConst.h /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | 10 | # User-specific files (MonoDevelop/Xamarin Studio) 11 | *.userprefs 12 | 13 | # Build results 14 | [Dd]ebug/ 15 | [Dd]ebugPublic/ 16 | [Rr]elease/ 17 | [Rr]eleases/ 18 | x64/ 19 | x86/ 20 | bld/ 21 | [Oo]bj/ 22 | [Ll]og/ 23 | *.tlog/ 24 | 25 | # Visual Studio 2015 cache/options directory 26 | .vs/ 27 | # Uncomment if you have tasks that create the project's static files in wwwroot 28 | #wwwroot/ 29 | 30 | # MSTest test Results 31 | [Tt]est[Rr]esult*/ 32 | [Bb]uild[Ll]og.* 33 | 34 | # NUNIT 35 | *.VisualState.xml 36 | TestResult.xml 37 | 38 | # Build Results of an ATL Project 39 | [Dd]ebugPS/ 40 | [Rr]eleasePS/ 41 | dlldata.c 42 | 43 | # DNX 44 | project.lock.json 45 | artifacts/ 46 | 47 | *_i.c 48 | *_p.c 49 | *_i.h 50 | *.ilk 51 | *.iobj 52 | *.ipdb 53 | *.meta 54 | *.obj 55 | *.pch 56 | *.pdb 57 | *.pgc 58 | *.pgd 59 | *.res 60 | *.rsp 61 | *.sbr 62 | *.tlb 63 | *.tli 64 | *.tlh 65 | *.tmp 66 | *.tmp_proj 67 | *.log 68 | *.vspscc 69 | *.vssscc 70 | .builds 71 | *.pidb 72 | *.svclog 73 | *.scc 74 | 75 | # Chutzpah Test files 76 | _Chutzpah* 77 | 78 | # Visual C++ cache files 79 | ipch/ 80 | *.aps 81 | *.ncb 82 | *.opendb 83 | *.opensdf 84 | *.sdf 85 | *.cachefile 86 | *.VC.db 87 | *.VC.VC.opendb 88 | 89 | # Visual Studio profiler 90 | *.psess 91 | *.vsp 92 | *.vspx 93 | *.sap 94 | 95 | # TFS 2012 Local Workspace 96 | $tf/ 97 | 98 | # Guidance Automation Toolkit 99 | *.gpState 100 | 101 | # ReSharper is a .NET coding add-in 102 | _ReSharper*/ 103 | *.[Rr]e[Ss]harper 104 | *.DotSettings.user 105 | 106 | # JustCode is a .NET coding add-in 107 | .JustCode 108 | 109 | # TeamCity is a build add-in 110 | _TeamCity* 111 | 112 | # DotCover is a Code Coverage Tool 113 | *.dotCover 114 | 115 | # NCrunch 116 | _NCrunch_* 117 | .*crunch*.local.xml 118 | nCrunchTemp_* 119 | 120 | # MightyMoose 121 | *.mm.* 122 | AutoTest.Net/ 123 | 124 | # Web workbench (sass) 125 | .sass-cache/ 126 | 127 | # Installshield output folder 128 | [Ee]xpress/ 129 | 130 | # DocProject is a documentation generator add-in 131 | DocProject/buildhelp/ 132 | DocProject/Help/*.HxT 133 | DocProject/Help/*.HxC 134 | DocProject/Help/*.hhc 135 | DocProject/Help/*.hhk 136 | DocProject/Help/*.hhp 137 | DocProject/Help/Html2 138 | DocProject/Help/html 139 | 140 | # Click-Once directory 141 | publish/ 142 | 143 | # Publish Web Output 144 | *.[Pp]ublish.xml 145 | *.azurePubxml 146 | # TODO: Comment the next line if you want to checkin your web deploy settings 147 | # but database connection strings (with potential passwords) will be unencrypted 148 | *.pubxml 149 | *.publishproj 150 | 151 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 152 | # checkin your Azure Web App publish settings, but sensitive information contained 153 | # in these scripts will be unencrypted 154 | PublishScripts/ 155 | 156 | # NuGet Packages 157 | *.nupkg 158 | # The packages folder can be ignored because of Package Restore 159 | **/packages/* 160 | # except build/, which is used as an MSBuild target. 161 | !**/packages/build/ 162 | # Uncomment if necessary however generally it will be regenerated when needed 163 | #!**/packages/repositories.config 164 | # NuGet v3's project.json files produces more ignoreable files 165 | *.nuget.props 166 | *.nuget.targets 167 | 168 | # Microsoft Azure Build Output 169 | csx/ 170 | *.build.csdef 171 | 172 | # Microsoft Azure Emulator 173 | ecf/ 174 | rcf/ 175 | 176 | # Windows Store app package directories and files 177 | AppPackages/ 178 | BundleArtifacts/ 179 | Package.StoreAssociation.xml 180 | _pkginfo.txt 181 | 182 | # Visual Studio cache files 183 | # files ending in .cache can be ignored 184 | *.[Cc]ache 185 | # but keep track of directories ending in .cache 186 | !*.[Cc]ache/ 187 | 188 | # Others 189 | ClientBin/ 190 | ~$* 191 | *~ 192 | *.dbmdl 193 | *.dbproj.schemaview 194 | *.pfx 195 | *.publishsettings 196 | node_modules/ 197 | orleans.codegen.cs 198 | 199 | # Since there are multiple workflows, uncomment next line to ignore bower_components 200 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 201 | #bower_components/ 202 | 203 | # RIA/Silverlight projects 204 | Generated_Code/ 205 | 206 | # Backup & report files from converting an old project file 207 | # to a newer Visual Studio version. Backup files are not needed, 208 | # because we have git ;-) 209 | _UpgradeReport_Files/ 210 | Backup*/ 211 | UpgradeLog*.XML 212 | UpgradeLog*.htm 213 | 214 | # SQL Server files 215 | *.mdf 216 | *.ldf 217 | 218 | # Business Intelligence projects 219 | *.rdl.data 220 | *.bim.layout 221 | *.bim_*.settings 222 | 223 | # Microsoft Fakes 224 | FakesAssemblies/ 225 | 226 | # GhostDoc plugin setting file 227 | *.GhostDoc.xml 228 | 229 | # Node.js Tools for Visual Studio 230 | .ntvs_analysis.dat 231 | 232 | # Visual Studio 6 build log 233 | *.plg 234 | 235 | # Visual Studio 6 workspace options file 236 | *.opt 237 | 238 | # Visual Studio LightSwitch build output 239 | **/*.HTMLClient/GeneratedArtifacts 240 | **/*.DesktopClient/GeneratedArtifacts 241 | **/*.DesktopClient/ModelManifest.xml 242 | **/*.Server/GeneratedArtifacts 243 | **/*.Server/ModelManifest.xml 244 | _Pvt_Extensions 245 | 246 | # Paket dependency manager 247 | .paket/paket.exe 248 | paket-files/ 249 | 250 | # FAKE - F# Make 251 | .fake/ 252 | 253 | # JetBrains Rider 254 | .idea/ 255 | *.sln.iml 256 | 257 | # ========================= 258 | # Operating System Files 259 | # ========================= 260 | 261 | # OSX 262 | # ========================= 263 | 264 | .DS_Store 265 | .AppleDouble 266 | .LSOverride 267 | 268 | # Thumbnails 269 | ._* 270 | 271 | # Files that might appear in the root of a volume 272 | .DocumentRevisions-V100 273 | .fseventsd 274 | .Spotlight-V100 275 | .TemporaryItems 276 | .Trashes 277 | .VolumeIcon.icns 278 | 279 | # Directories potentially created on remote AFP share 280 | .AppleDB 281 | .AppleDesktop 282 | Network Trash Folder 283 | Temporary Items 284 | .apdisk 285 | 286 | # Windows 287 | # ========================= 288 | 289 | # Windows image file caches 290 | Thumbs.db 291 | ehthumbs.db 292 | 293 | # Folder config file 294 | Desktop.ini 295 | 296 | # Recycle Bin used on file shares 297 | $RECYCLE.BIN/ 298 | 299 | # Windows Installer files 300 | *.cab 301 | *.msi 302 | *.msm 303 | *.msp 304 | 305 | # Windows shortcuts 306 | *.lnk 307 | -------------------------------------------------------------------------------- /Bin/CSDownSample.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSDownSample.cso -------------------------------------------------------------------------------- /Bin/CSInit.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSInit.cso -------------------------------------------------------------------------------- /Bin/CSJacobi.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSJacobi.cso -------------------------------------------------------------------------------- /Bin/CSPrefixSum1.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSPrefixSum1.cso -------------------------------------------------------------------------------- /Bin/CSPrefixSum1_rw.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSPrefixSum1_rw.cso -------------------------------------------------------------------------------- /Bin/CSPrefixSum1i.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSPrefixSum1i.cso -------------------------------------------------------------------------------- /Bin/CSPrefixSum1i_rw.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSPrefixSum1i_rw.cso -------------------------------------------------------------------------------- /Bin/CSPrefixSum1u.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSPrefixSum1u.cso -------------------------------------------------------------------------------- /Bin/CSPrefixSum1u_rw.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSPrefixSum1u_rw.cso -------------------------------------------------------------------------------- /Bin/CSPrefixSum2.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSPrefixSum2.cso -------------------------------------------------------------------------------- /Bin/CSPrefixSum2i.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSPrefixSum2i.cso -------------------------------------------------------------------------------- /Bin/CSPrefixSum2u.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSPrefixSum2u.cso -------------------------------------------------------------------------------- /Bin/CSUpSample.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSUpSample.cso -------------------------------------------------------------------------------- /Bin/CSpAp.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSpAp.cso -------------------------------------------------------------------------------- /Bin/CSpUpdate.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSpUpdate.cso -------------------------------------------------------------------------------- /Bin/CSxUpdate.cso: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/CSxUpdate.cso -------------------------------------------------------------------------------- /Bin/PoissonSolver.cmd: -------------------------------------------------------------------------------- 1 | PoissonSolver.exe > Result.txt 2 | -------------------------------------------------------------------------------- /Bin/PoissonSolver.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StarsX/PoissonSolver/e2ce8836e0d9fd737003e13bc26057cf52bdacc1/Bin/PoissonSolver.exe -------------------------------------------------------------------------------- /PoissonSolver.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.23107.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PoissonSolver", "PoissonSolver\PoissonSolver.vcxproj", "{D7D2A4AE-092A-46FC-A30E-8B90589946C9}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {D7D2A4AE-092A-46FC-A30E-8B90589946C9}.Debug|x64.ActiveCfg = Debug|x64 17 | {D7D2A4AE-092A-46FC-A30E-8B90589946C9}.Debug|x64.Build.0 = Debug|x64 18 | {D7D2A4AE-092A-46FC-A30E-8B90589946C9}.Debug|x86.ActiveCfg = Debug|Win32 19 | {D7D2A4AE-092A-46FC-A30E-8B90589946C9}.Debug|x86.Build.0 = Debug|Win32 20 | {D7D2A4AE-092A-46FC-A30E-8B90589946C9}.Release|x64.ActiveCfg = Release|x64 21 | {D7D2A4AE-092A-46FC-A30E-8B90589946C9}.Release|x64.Build.0 = Release|x64 22 | {D7D2A4AE-092A-46FC-A30E-8B90589946C9}.Release|x86.ActiveCfg = Release|Win32 23 | {D7D2A4AE-092A-46FC-A30E-8B90589946C9}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | EndGlobal 29 | -------------------------------------------------------------------------------- /PoissonSolver/CHConjGrad.hlsli: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "SharedConst.h" 6 | 7 | #define uSlice (vDim.x * vDim.y) 8 | #define GETIDX(v) (v.z * uSlice + v.y * vDim.x + v.x) 9 | -------------------------------------------------------------------------------- /PoissonSolver/CSAx.hlsli: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | TEXTURE3D x; 6 | 7 | //-------------------------------------------------------------------------------------- 8 | // Compute Ax 9 | //-------------------------------------------------------------------------------------- 10 | float computeAx(int3 vTex, inout float q) 11 | { 12 | q = x[vTex]; 13 | 14 | float Ax = -6.0 * q; 15 | Ax += x[vTex + int3(-1, 0, 0)]; 16 | Ax += x[vTex + int3(1, 0, 0)]; 17 | Ax += x[vTex + int3(0, -1, 0)]; 18 | Ax += x[vTex + int3(0, 1, 0)]; 19 | Ax += x[vTex + int3(0, 0, -1)]; 20 | Ax += x[vTex + int3(0, 0, 1)]; 21 | 22 | return Ax; 23 | } 24 | -------------------------------------------------------------------------------- /PoissonSolver/CSConjGrad.hlsli: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "CHConjGrad.hlsli" 6 | 7 | //-------------------------------------------------------------------------------------- 8 | // Update x 9 | //-------------------------------------------------------------------------------------- 10 | Texture3D p_RO; 11 | Texture3D Ap; 12 | StructuredBuffer Acc_rr; 13 | StructuredBuffer Acc_pAp; 14 | 15 | RWTexture3D x; 16 | RWTexture3D r; 17 | RWStructuredBuffer rr; 18 | 19 | [numthreads(THREAD_GROUP_SIZE, THREAD_GROUP_SIZE, THREAD_GROUP_SIZE)] 20 | void update_x(uint3 DTid : SV_DispatchThreadID) 21 | { 22 | uint3 vDim; 23 | p_RO.GetDimensions(vDim.x, vDim.y, vDim.z); 24 | const uint iMax = vDim.x * vDim.y * vDim.z; 25 | const float a = Acc_rr[iMax] / Acc_pAp[iMax]; 26 | 27 | x[DTid] += a * p_RO[DTid]; 28 | 29 | const float ri = r[DTid] - a * Ap[DTid]; 30 | r[DTid] = ri; 31 | rr[GETIDX(DTid)] = ri * ri; 32 | } 33 | 34 | //-------------------------------------------------------------------------------------- 35 | // Update p 36 | //-------------------------------------------------------------------------------------- 37 | Texture3D r_RO; 38 | StructuredBuffer Acc_rr_new; 39 | 40 | RWTexture3D p; 41 | 42 | [numthreads(THREAD_GROUP_SIZE, THREAD_GROUP_SIZE, THREAD_GROUP_SIZE)] 43 | void update_p(uint3 DTid : SV_DispatchThreadID) 44 | { 45 | uint3 vDim; 46 | r_RO.GetDimensions(vDim.x, vDim.y, vDim.z); 47 | const uint iMax = vDim.x * vDim.y * vDim.z; 48 | const float b = Acc_rr_new[iMax] / Acc_rr[iMax]; 49 | 50 | p[DTid] = r_RO[DTid] + b * p[DTid]; 51 | } 52 | -------------------------------------------------------------------------------- /PoissonSolver/CSDownSample.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | Texture3D x; 6 | RWTexture3D y; 7 | 8 | groupshared float g_Block[2][2][2]; 9 | 10 | //-------------------------------------------------------------------------------------- 11 | // Down sampling 12 | //-------------------------------------------------------------------------------------- 13 | [numthreads(2, 2, 2)] 14 | void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID) 15 | { 16 | g_Block[GTid.x][GTid.y][GTid.z] = x[DTid]; 17 | 18 | g_Block[GTid.x][GTid.y][GTid.z] = GTid.x ? g_Block[GTid.x][GTid.y][GTid.z] + g_Block[GTid.x - 1][GTid.y][GTid.z] : g_Block[GTid.x][GTid.y][GTid.z]; 19 | g_Block[GTid.x][GTid.y][GTid.z] = GTid.y ? g_Block[GTid.x][GTid.y][GTid.z] + g_Block[GTid.x][GTid.y - 1][GTid.z] : g_Block[GTid.x][GTid.y][GTid.z]; 20 | g_Block[GTid.x][GTid.y][GTid.z] = GTid.z ? g_Block[GTid.x][GTid.y][GTid.z] + g_Block[GTid.x][GTid.y][GTid.z - 1] : g_Block[GTid.x][GTid.y][GTid.z]; 21 | 22 | if (all(GTid)) y[Gid] = g_Block[GTid.x][GTid.y][GTid.z] / 8.0; 23 | } 24 | -------------------------------------------------------------------------------- /PoissonSolver/CSInit.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #define TEXTURE3D RWTexture3D 6 | #include "CSAx.hlsli" 7 | #include "CHConjGrad.hlsli" 8 | 9 | Texture3D b; 10 | 11 | RWTexture3D r; 12 | RWStructuredBuffer rr; 13 | RWTexture3D p; 14 | 15 | [numthreads(THREAD_GROUP_SIZE, THREAD_GROUP_SIZE, THREAD_GROUP_SIZE)] 16 | void main(uint3 DTid : SV_DispatchThreadID) 17 | { 18 | float x0; 19 | const float Ax = computeAx(DTid, x0); 20 | 21 | uint3 vDim; 22 | b.GetDimensions(vDim.x, vDim.y, vDim.z); 23 | 24 | const float r0 = b[DTid] - Ax; 25 | r[DTid] = r0; 26 | p[DTid] = r0; 27 | rr[GETIDX(DTid)] = r0 * r0; 28 | } 29 | -------------------------------------------------------------------------------- /PoissonSolver/CSJacobi.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "SharedConst.h" 6 | 7 | Texture3D b; 8 | RWTexture3D x; 9 | 10 | //-------------------------------------------------------------------------------------- 11 | // Jacobi iteration 12 | //-------------------------------------------------------------------------------------- 13 | float jacobi(half2 vf, int3 vTex) 14 | { 15 | float q = vf.x * b[vTex]; 16 | q += x[vTex + int3(-1, 0, 0)]; 17 | q += x[vTex + int3(1, 0, 0)]; 18 | q += x[vTex + int3(0, -1, 0)]; 19 | q += x[vTex + int3(0, 1, 0)]; 20 | q += x[vTex + int3(0, 0, -1)]; 21 | q += x[vTex + int3(0, 0, 1)]; 22 | 23 | return q / vf.y; 24 | } 25 | 26 | [numthreads(THREAD_GROUP_SIZE, THREAD_GROUP_SIZE, THREAD_GROUP_SIZE)] 27 | void main(uint3 DTid : SV_DispatchThreadID) 28 | { 29 | const half2 vf = { -1.0, 6.0 }; 30 | x[DTid] = jacobi(vf, DTid); 31 | } 32 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum1.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | typedef float typeless; 6 | #include "CSPrefixSum1.hlsli" 7 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum1.hlsli: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "CSScanBlockBuffer.hlsli" 6 | 7 | //-------------------------------------------------------------------------------------- 8 | // Prefix sum on g_RWSrc and saving the highest results in g_RWInc. 9 | //-------------------------------------------------------------------------------------- 10 | [numthreads(NUM_THREADS_PER_GROUP, 1, 1)] 11 | void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID) 12 | { 13 | g_BlockBuffer[GTid.x] = g_RWSrc[DTid.x]; 14 | 15 | const uint uWid = GTid.x >> NUM_BITSHIFTS; 16 | const uint uWTid = GTid.x - (uWid << NUM_BITSHIFTS); 17 | 18 | GroupMemoryBarrierWithGroupSync(); 19 | ScanBlockBuffer(GTid.x, uWTid, uWid); // Scan for each group 20 | 21 | GroupMemoryBarrierWithGroupSync(); 22 | g_RWDst[DTid.x + 1] = g_BlockBuffer[GTid.x]; // Write back group local prefix sums 23 | if (GTid.x == NUM_THREADS_PER_GROUP - 1) 24 | g_RWInc[Gid.x] = g_BlockBuffer[GTid.x]; // Save highest value of each group 25 | } 26 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum1_rw.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | typedef float typeless; 6 | #define g_RWSrc g_RWDst 7 | #include "CSPrefixSum1.hlsli" 8 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum1i.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | typedef int typeless; 6 | #include "CSPrefixSum1.hlsli" 7 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum1i_rw.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | typedef int typeless; 6 | #define g_RWSrc g_RWDst 7 | #include "CSPrefixSum1.hlsli" 8 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum1u.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | typedef uint typeless; 6 | #include "CSPrefixSum1.hlsli" 7 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum1u_rw.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | typedef uint typeless; 6 | #define g_RWSrc g_RWDst 7 | #include "CSPrefixSum1.hlsli" 8 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum2.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | typedef float typeless; 6 | #include "CSPrefixSum2.hlsli" 7 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum2.hlsli: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "CSScanBlockBuffer.hlsli" 6 | 7 | //-------------------------------------------------------------------------------------- 8 | // Prefix sum on g_RWInc and writing the results back into g_RWDst. 9 | //-------------------------------------------------------------------------------------- 10 | [numthreads(NUM_THREADS_PER_GROUP, 1, 1)] 11 | void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 Gid : SV_GroupID) 12 | { 13 | g_BlockBuffer[GTid.x] = g_RWInc[GTid.x]; // This restricts the number of tiles to 1024 * 1024 14 | 15 | const uint uWid = GTid.x >> NUM_BITSHIFTS; 16 | const uint uWTid = GTid.x - (uWid << NUM_BITSHIFTS); 17 | 18 | GroupMemoryBarrierWithGroupSync(); 19 | ScanBlockBuffer(GTid.x, uWTid, uWid); 20 | 21 | GroupMemoryBarrierWithGroupSync(); 22 | g_RWDst[DTid.x + 1] += Gid.x > 0 ? g_BlockBuffer[Gid.x - 1] : 0; 23 | } 24 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum2i.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | typedef int typeless; 6 | #include "CSPrefixSum2.hlsli" 7 | -------------------------------------------------------------------------------- /PoissonSolver/CSPrefixSum2u.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | typedef uint typeless; 6 | #include "CSPrefixSum2.hlsli" 7 | -------------------------------------------------------------------------------- /PoissonSolver/CSScanBlockBuffer.hlsli: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #define NUM_THREADS_PER_GROUP 1024 6 | #define NUM_THREADS_PER_WAVE (1 << NUM_BITSHIFTS) 7 | #define NUM_BITSHIFTS 5 8 | #define NUM_WAVES (NUM_THREADS_PER_GROUP / NUM_THREADS_PER_WAVE) 9 | 10 | //-------------------------------------------------------------------------------------- 11 | // Unordered Access Buffers 12 | //-------------------------------------------------------------------------------------- 13 | RWStructuredBuffer g_RWDst; 14 | RWStructuredBuffer g_RWInc; 15 | #ifndef g_RWSrc 16 | RWStructuredBuffer g_RWSrc; 17 | #endif 18 | 19 | //-------------------------------------------------------------------------------------- 20 | // Groupshared memory. 21 | //-------------------------------------------------------------------------------------- 22 | groupshared typeless g_BlockBuffer[NUM_THREADS_PER_GROUP]; 23 | 24 | //-------------------------------------------------------------------------------------- 25 | // Does an inclusive prefix sum on g_BlockBuffer. 26 | //-------------------------------------------------------------------------------------- 27 | inline void ScanBlockBuffer(uint uIdx, uint uWTid, uint uWid) 28 | { 29 | // Step 1, prefix sum for each wavefront 30 | for (uint uStride = 1; uStride < NUM_THREADS_PER_WAVE; uStride <<= 1) 31 | g_BlockBuffer[uIdx] += uWTid >= uStride ? g_BlockBuffer[uIdx - uStride] : 0; 32 | 33 | // Step 2 + 3, cumulative result of the previous wave 34 | GroupMemoryBarrierWithGroupSync(); 35 | typeless fGroupScanResult = 0; 36 | [unroll] 37 | for (uint i = 1; i < NUM_WAVES; ++i) 38 | fGroupScanResult += uWid >= i ? g_BlockBuffer[i * NUM_THREADS_PER_WAVE - 1] : 0; 39 | 40 | // Step 4, finish prefix sum of each group 41 | GroupMemoryBarrierWithGroupSync(); 42 | g_BlockBuffer[uIdx] += fGroupScanResult; 43 | } 44 | -------------------------------------------------------------------------------- /PoissonSolver/CSUpSample.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "SharedConst.h" 6 | 7 | Texture3D x; 8 | RWTexture3D y; 9 | 10 | //-------------------------------------------------------------------------------------- 11 | // Up sampling 12 | //-------------------------------------------------------------------------------------- 13 | [numthreads(THREAD_GROUP_SIZE, THREAD_GROUP_SIZE, THREAD_GROUP_SIZE)] 14 | void main(uint3 DTid : SV_DispatchThreadID) 15 | { 16 | y[DTid] = x[DTid >> 1]; 17 | } 18 | -------------------------------------------------------------------------------- /PoissonSolver/CSpAp.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #define TEXTURE3D Texture3D 6 | #include "CSAx.hlsli" 7 | #include "CHConjGrad.hlsli" 8 | 9 | RWTexture3D Ap; 10 | RWStructuredBuffer pAp; 11 | 12 | [numthreads(THREAD_GROUP_SIZE, THREAD_GROUP_SIZE, THREAD_GROUP_SIZE)] 13 | void main(uint3 DTid : SV_DispatchThreadID) 14 | { 15 | float p; 16 | const float Ax = computeAx(DTid, p); 17 | 18 | uint3 vDim; 19 | x.GetDimensions(vDim.x, vDim.y, vDim.z); 20 | 21 | Ap[DTid] = Ax; 22 | pAp[GETIDX(DTid)] = p * Ax; 23 | } 24 | -------------------------------------------------------------------------------- /PoissonSolver/CSpUpdate.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "CSConjGrad.hlsli" 6 | -------------------------------------------------------------------------------- /PoissonSolver/CSxUpdate.hlsl: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "CSConjGrad.hlsli" 6 | -------------------------------------------------------------------------------- /PoissonSolver/ConjGrad.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "ConjGrad.h" 6 | #include "SharedConst.h" 7 | 8 | #ifndef V_RETURN 9 | #define V_RETURN(x) { hr = x; if (FAILED(hr)) return hr; } 10 | #endif 11 | 12 | #if 0 13 | #define SCAN_DATA_TYPE(t) D3DX11_SCAN_DATA_TYPE_##t 14 | #else 15 | #define SCAN_DATA_TYPE(t) PrefixSum::SCAN_DATA_TYPE_##t 16 | #endif 17 | 18 | using namespace DirectX; 19 | using namespace std; 20 | 21 | ID3D11ShaderResourceView *const g_pNullSRV = nullptr; // Helper to Clear SRVs 22 | ID3D11UnorderedAccessView *const g_pNullUAV = nullptr; // Helper to Clear UAVs 23 | ID3D11Buffer *const g_pNullBuffer = nullptr; 24 | 25 | ConjGrad::ConjGrad(ID3D11DeviceContext *pDeviceContext) 26 | : m_pd3dContext(pDeviceContext), m_uRefCount(1) 27 | { 28 | m_pd3dContext->AddRef(); 29 | m_pd3dContext->GetDevice(&m_pd3dDevice); 30 | } 31 | 32 | ConjGrad::~ConjGrad() 33 | { 34 | { 35 | if (m_pUAVr) m_pUAVr->Release(); 36 | if (m_pUAVrr) m_pUAVrr->Release(); 37 | if (m_pUAVrr0) m_pUAVrr0->Release(); 38 | 39 | if (m_pUAVp) m_pUAVp->Release(); 40 | if (m_pUAVAp) m_pUAVAp->Release(); 41 | if (m_pUAVpAp) m_pUAVpAp->Release(); 42 | } 43 | 44 | { 45 | if (m_pSRVr) m_pSRVr->Release(); 46 | if (m_pSRVAcc_rr) m_pSRVAcc_rr->Release(); 47 | if (m_pSRVAcc_rr_new) m_pSRVAcc_rr_new->Release(); 48 | 49 | if (m_pSRVp) m_pSRVp->Release(); 50 | if (m_pSRVAp) m_pSRVAp->Release(); 51 | if (m_pSRVAcc_pAp) m_pSRVAcc_pAp->Release(); 52 | } 53 | 54 | { 55 | if (m_pr) m_pr->Release(); 56 | if (m_prr[0]) m_prr[0]->Release(); 57 | if (m_prr[1]) m_prr[1]->Release(); 58 | 59 | if (m_pp) m_pp->Release(); 60 | if (m_pAp) m_pAp->Release(); 61 | if (m_ppAp) m_ppAp->Release(); 62 | } 63 | 64 | if (m_pInitShader) m_pInitShader->Release(); 65 | if (m_pShader) m_pShader->Release(); 66 | if (m_pUpdateShader) m_pUpdateShader->Release(); 67 | if (m_pApShader) m_pApShader->Release(); 68 | 69 | if (m_pScan) m_pScan->Release(); 70 | 71 | if (m_pd3dContext) m_pd3dContext->Release(); 72 | if (m_pd3dDevice) m_pd3dDevice->Release(); 73 | } 74 | 75 | HRESULT ConjGrad::CreateSolver(ID3D11DeviceContext *const pDeviceContext, 76 | DXGI_FORMAT eFormat, const XMUINT3 &vSize, ConjGrad **ppSolver, 77 | ID3D11ComputeShader *const pInitShader, ID3D11ComputeShader *const pApShader) 78 | { 79 | auto &pSolver = *ppSolver; 80 | pSolver = new ConjGrad(pDeviceContext); 81 | return pSolver->Init(eFormat, vSize, pInitShader, pApShader); 82 | } 83 | 84 | HRESULT ConjGrad::Init(DXGI_FORMAT eFormat, const XMUINT3 &vSize, 85 | ID3D11ComputeShader *const pInitShader, ID3D11ComputeShader *const pApShader) 86 | { 87 | HRESULT hr; 88 | computeElementSize(eFormat); 89 | 90 | V_RETURN(initShaders(pInitShader, pApShader)); 91 | V_RETURN(initBuffers(eFormat, vSize)); 92 | 93 | //V_RETURN(D3DX11CreateScan(m_pd3dContext, vSize.x * vSize.y * vSize.z + 1, 1, &m_pScan)); 94 | //V_RETURN(m_pScan->SetScanDirection(D3DX11_SCAN_DIRECTION_FORWARD)); 95 | V_RETURN(PrefixSum::CreateScan(m_pd3dContext, vSize.x * vSize.y * vSize.z + 1, &m_pScan)); 96 | 97 | return hr; 98 | } 99 | 100 | HRESULT ConjGrad::initBuffers(DXGI_FORMAT eFormat, const XMUINT3 &vSize) 101 | { 102 | HRESULT hr; 103 | const auto uScanSize = vSize.x * vSize.y * vSize.z + 1; 104 | 105 | // r related 106 | V_RETURN(CreateStructuredBuffer(m_pd3dDevice, m_uElementSize, uScanSize, nullptr, &m_prr[0])); 107 | V_RETURN(CreateBufferSRV(m_pd3dDevice, m_prr[0], &m_pSRVAcc_rr)); 108 | V_RETURN(CreateBufferUAV(m_pd3dDevice, m_prr[0], &m_pUAVrr0)); 109 | 110 | V_RETURN(CreateStructuredBuffer(m_pd3dDevice, m_uElementSize, uScanSize, nullptr, &m_prr[1])); 111 | V_RETURN(CreateBufferSRV(m_pd3dDevice, m_prr[1], &m_pSRVAcc_rr_new)); 112 | V_RETURN(CreateBufferUAV(m_pd3dDevice, m_prr[1], &m_pUAVrr)); 113 | 114 | V_RETURN(CreateTexture3D(m_pd3dDevice, eFormat, vSize, nullptr, &m_pr)); 115 | V_RETURN(CreateTexture3DSRV(m_pd3dDevice, m_pr, &m_pSRVr)); 116 | V_RETURN(CreateTexture3DUAV(m_pd3dDevice, m_pr, &m_pUAVr)); 117 | 118 | // p related 119 | V_RETURN(CreateTexture3D(m_pd3dDevice, eFormat, vSize, nullptr, &m_pAp)); 120 | V_RETURN(CreateTexture3DSRV(m_pd3dDevice, m_pAp, &m_pSRVAp)); 121 | V_RETURN(CreateTexture3DUAV(m_pd3dDevice, m_pAp, &m_pUAVAp)); 122 | 123 | V_RETURN(CreateStructuredBuffer(m_pd3dDevice, m_uElementSize, uScanSize, nullptr, &m_ppAp)); 124 | V_RETURN(CreateBufferSRV(m_pd3dDevice, m_ppAp, &m_pSRVAcc_pAp)); 125 | V_RETURN(CreateBufferUAV(m_pd3dDevice, m_ppAp, &m_pUAVpAp)); 126 | 127 | V_RETURN(CreateTexture3D(m_pd3dDevice, eFormat, vSize, nullptr, &m_pp)); 128 | V_RETURN(CreateTexture3DSRV(m_pd3dDevice, m_pp, &m_pSRVp)); 129 | V_RETURN(CreateTexture3DUAV(m_pd3dDevice, m_pp, &m_pUAVp)); 130 | 131 | return hr; 132 | } 133 | 134 | HRESULT ConjGrad::initShaders(ID3D11ComputeShader *const pInitShader, ID3D11ComputeShader *const pApShader) 135 | { 136 | HRESULT h, hr; 137 | 138 | ID3DBlob *shaderBuffer = nullptr; 139 | D3D11_SHADER_INPUT_BIND_DESC desc; 140 | 141 | if (pInitShader) 142 | { 143 | m_pInitShader = pInitShader; 144 | m_pInitShader->AddRef(); 145 | } 146 | else 147 | { 148 | V_RETURN(D3DReadFileToBlob(L"CSInit.cso", &shaderBuffer)); 149 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 150 | shaderBuffer->GetBufferSize(), nullptr, &m_pInitShader); 151 | if (SUCCEEDED(hr)) 152 | { 153 | ID3D11ShaderReflection *pReflector = nullptr; 154 | hr = D3DReflect(shaderBuffer->GetBufferPointer(), shaderBuffer->GetBufferSize(), 155 | IID_ID3D11ShaderReflection, (void**)&pReflector); 156 | if (SUCCEEDED(hr)) 157 | { 158 | h = pReflector->GetResourceBindingDescByName("b", &desc); 159 | if (SUCCEEDED(h)) m_uSRVSlot_b = desc.BindPoint; 160 | else hr = h; 161 | 162 | h = pReflector->GetResourceBindingDescByName("x", &desc); 163 | if (SUCCEEDED(h)) m_uUAVSlot_x0 = desc.BindPoint; 164 | else hr = h; 165 | h = pReflector->GetResourceBindingDescByName("r", &desc); 166 | if (SUCCEEDED(h)) m_uUAVSlot_r0 = desc.BindPoint; 167 | else hr = h; 168 | h = pReflector->GetResourceBindingDescByName("rr", &desc); 169 | if (SUCCEEDED(h)) m_uUAVSlot_rr0 = desc.BindPoint; 170 | else hr = h; 171 | h = pReflector->GetResourceBindingDescByName("p", &desc); 172 | if (SUCCEEDED(h)) m_uUAVSlot_p0 = desc.BindPoint; 173 | else hr = h; 174 | } 175 | if (pReflector) pReflector->Release(); 176 | } 177 | if (shaderBuffer) shaderBuffer->Release(); 178 | V_RETURN(hr); 179 | } 180 | 181 | shaderBuffer = nullptr; 182 | V_RETURN(D3DReadFileToBlob(L"CSxUpdate.cso", &shaderBuffer)); 183 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 184 | shaderBuffer->GetBufferSize(), nullptr, &m_pShader); 185 | if (SUCCEEDED(hr)) 186 | { 187 | ID3D11ShaderReflection *pReflector = nullptr; 188 | hr = D3DReflect(shaderBuffer->GetBufferPointer(), shaderBuffer->GetBufferSize(), 189 | IID_ID3D11ShaderReflection, (void**)&pReflector); 190 | if (SUCCEEDED(hr)) 191 | { 192 | h = pReflector->GetResourceBindingDescByName("p_RO", &desc); 193 | if (SUCCEEDED(h)) m_uSRVSlot_p = desc.BindPoint; 194 | else hr = h; 195 | h = pReflector->GetResourceBindingDescByName("Ap", &desc); 196 | if (SUCCEEDED(h)) m_uSRVSlot_Ap = desc.BindPoint; 197 | else hr = h; 198 | h = pReflector->GetResourceBindingDescByName("Acc_rr", &desc); 199 | if (SUCCEEDED(h)) m_uSRVSlot_Acc_rr = desc.BindPoint; 200 | else hr = h; 201 | h = pReflector->GetResourceBindingDescByName("Acc_pAp", &desc); 202 | if (SUCCEEDED(h)) m_uSRVSlot_Acc_pAp = desc.BindPoint; 203 | else hr = h; 204 | 205 | h = pReflector->GetResourceBindingDescByName("x", &desc); 206 | if (SUCCEEDED(h)) m_uUAVSlot_x = desc.BindPoint; 207 | else hr = h; 208 | h = pReflector->GetResourceBindingDescByName("r", &desc); 209 | if (SUCCEEDED(h)) m_uUAVSlot_r = desc.BindPoint; 210 | else hr = h; 211 | h = pReflector->GetResourceBindingDescByName("rr", &desc); 212 | if (SUCCEEDED(h)) m_uUAVSlot_rr = desc.BindPoint; 213 | else hr = h; 214 | } 215 | if (pReflector) pReflector->Release(); 216 | } 217 | if (shaderBuffer) shaderBuffer->Release(); 218 | V_RETURN(hr); 219 | 220 | shaderBuffer = nullptr; 221 | V_RETURN(D3DReadFileToBlob(L"CSpUpdate.cso", &shaderBuffer)); 222 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 223 | shaderBuffer->GetBufferSize(), nullptr, &m_pUpdateShader); 224 | if (SUCCEEDED(hr)) 225 | { 226 | ID3D11ShaderReflection *pReflector = nullptr; 227 | hr = D3DReflect(shaderBuffer->GetBufferPointer(), shaderBuffer->GetBufferSize(), 228 | IID_ID3D11ShaderReflection, (void**)&pReflector); 229 | if (SUCCEEDED(hr)) 230 | { 231 | h = pReflector->GetResourceBindingDescByName("r_RO", &desc); 232 | if (SUCCEEDED(h)) m_uSRVSlot_r = desc.BindPoint; 233 | else hr = h; 234 | h = pReflector->GetResourceBindingDescByName("Acc_rr", &desc); 235 | if (SUCCEEDED(h)) m_uSRVSlot_Acc_rr_prev = desc.BindPoint; 236 | else hr = h; 237 | h = pReflector->GetResourceBindingDescByName("Acc_rr_new", &desc); 238 | if (SUCCEEDED(h)) m_uSRVSlot_Acc_rr_new = desc.BindPoint; 239 | else hr = h; 240 | 241 | h = pReflector->GetResourceBindingDescByName("p", &desc); 242 | if (SUCCEEDED(h)) m_uUAVSlot_p = desc.BindPoint; 243 | else hr = h; 244 | } 245 | if (pReflector) pReflector->Release(); 246 | } 247 | if (shaderBuffer) shaderBuffer->Release(); 248 | V_RETURN(hr); 249 | 250 | shaderBuffer = nullptr; 251 | if (pApShader) 252 | { 253 | m_pApShader = pApShader; 254 | m_pApShader->AddRef(); 255 | } 256 | else { 257 | V_RETURN(D3DReadFileToBlob(L"CSpAp.cso", &shaderBuffer)); 258 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 259 | shaderBuffer->GetBufferSize(), nullptr, &m_pApShader); 260 | if (SUCCEEDED(hr)) 261 | { 262 | ID3D11ShaderReflection *pReflector = nullptr; 263 | hr = D3DReflect(shaderBuffer->GetBufferPointer(), shaderBuffer->GetBufferSize(), 264 | IID_ID3D11ShaderReflection, (void**)&pReflector); 265 | if (SUCCEEDED(hr)) 266 | { 267 | h = pReflector->GetResourceBindingDescByName("x", &desc); 268 | if (SUCCEEDED(h)) m_uSRVSlot_p_new = desc.BindPoint; 269 | else hr = h; 270 | 271 | h = pReflector->GetResourceBindingDescByName("Ap", &desc); 272 | if (SUCCEEDED(h)) m_uUAVSlot_Ap = desc.BindPoint; 273 | else hr = h; 274 | h = pReflector->GetResourceBindingDescByName("pAp", &desc); 275 | if (SUCCEEDED(h)) m_uUAVSlot_pAp = desc.BindPoint; 276 | else hr = h; 277 | } 278 | if (pReflector) pReflector->Release(); 279 | } 280 | if (shaderBuffer) shaderBuffer->Release(); 281 | V_RETURN(hr); 282 | } 283 | 284 | return hr; 285 | } 286 | 287 | void ConjGrad::Solve(const XMUINT3 &vSize, ID3D11ShaderResourceView *const pSrc, ID3D11UnorderedAccessView *const pDst, uint32_t iNumIt) 288 | { 289 | const auto UAVInitialCounts = 0u; 290 | const auto uScanSize = vSize.x * vSize.y * vSize.z + 1; 291 | 292 | // Initial solution 293 | init(vSize, pSrc, pDst); 294 | //m_pScan->Scan(D3DX11_SCAN_DATA_TYPE_FLOAT, D3DX11_SCAN_OPCODE_ADD, uScanSize, m_pUAVrr0, m_pUAVrr0); 295 | m_pScan->Scan(PrefixSum::SCAN_DATA_TYPE_FLOAT, uScanSize, m_pUAVrr0, m_pUAVrr0); 296 | m_pd3dContext->CSSetUnorderedAccessViews(0, 1, &g_pNullUAV, &UAVInitialCounts); 297 | 298 | // Iteration 299 | for (auto i = 0u; i < iNumIt; ++i) 300 | { 301 | compute_pAp(vSize); 302 | //m_pScan->Scan(D3DX11_SCAN_DATA_TYPE_FLOAT, D3DX11_SCAN_OPCODE_ADD, uScanSize, m_pUAVpAp, m_pUAVpAp); 303 | m_pScan->Scan(PrefixSum::SCAN_DATA_TYPE_FLOAT, uScanSize, m_pUAVpAp, m_pUAVpAp); 304 | m_pd3dContext->CSSetUnorderedAccessViews(0, 1, &g_pNullUAV, &UAVInitialCounts); 305 | update_x(vSize, pDst); 306 | 307 | //m_pScan->Scan(D3DX11_SCAN_DATA_TYPE_FLOAT, D3DX11_SCAN_OPCODE_ADD, uScanSize, m_pUAVrr, m_pUAVrr); 308 | m_pScan->Scan(PrefixSum::SCAN_DATA_TYPE_FLOAT, uScanSize, m_pUAVrr, m_pUAVrr); 309 | m_pd3dContext->CSSetUnorderedAccessViews(0, 1, &g_pNullUAV, &UAVInitialCounts); 310 | update_p(vSize); 311 | 312 | swapBuffers(); 313 | } 314 | } 315 | 316 | void ConjGrad::AddRef() 317 | { 318 | ++m_uRefCount; 319 | } 320 | 321 | void ConjGrad::Release() 322 | { 323 | if (--m_uRefCount < 1) delete this; 324 | } 325 | 326 | void ConjGrad::init(const XMUINT3 &vSize, ID3D11ShaderResourceView *const pSrc, ID3D11UnorderedAccessView *const pDst) 327 | { 328 | const auto UAVInitialCounts = 0u; 329 | 330 | // Setup 331 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_b, 1, &pSrc); 332 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_x0, 1, &pDst, &UAVInitialCounts); 333 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_r0, 1, &m_pUAVr, &UAVInitialCounts); 334 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_rr0, 1, &m_pUAVrr0, &UAVInitialCounts); 335 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_p0, 1, &m_pUAVp, &UAVInitialCounts); 336 | 337 | // initial solution 338 | m_pd3dContext->CSSetShader(m_pInitShader, nullptr, 0); 339 | m_pd3dContext->Dispatch(vSize.x / THREAD_GROUP_SIZE, vSize.y/ THREAD_GROUP_SIZE, vSize.z / THREAD_GROUP_SIZE); 340 | 341 | // Unset 342 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_p0, 1, &g_pNullUAV, &UAVInitialCounts); 343 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_rr0, 1, &g_pNullUAV, &UAVInitialCounts); 344 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_r0, 1, &g_pNullUAV, &UAVInitialCounts); 345 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_x0, 1, &g_pNullUAV, &UAVInitialCounts); 346 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_b, 1, &g_pNullSRV); 347 | } 348 | 349 | void ConjGrad::update_x(const XMUINT3 &vSize, ID3D11UnorderedAccessView *const pDst) 350 | { 351 | const auto UAVInitialCounts = 0u; 352 | 353 | // Setup 354 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_p, 1, &m_pSRVp); 355 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_Ap, 1, &m_pSRVAp); 356 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_Acc_rr, 1, &m_pSRVAcc_rr); 357 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_Acc_pAp, 1, &m_pSRVAcc_pAp); 358 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_x, 1, &pDst, &UAVInitialCounts); 359 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_r, 1, &m_pUAVr, &UAVInitialCounts); 360 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_rr, 1, &m_pUAVrr, &UAVInitialCounts); 361 | 362 | // update solution 363 | m_pd3dContext->CSSetShader(m_pShader, nullptr, 0); 364 | m_pd3dContext->Dispatch(vSize.x / THREAD_GROUP_SIZE, vSize.y / THREAD_GROUP_SIZE, vSize.z / THREAD_GROUP_SIZE); 365 | 366 | // Unset 367 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_rr, 1, &g_pNullUAV, &UAVInitialCounts); 368 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_r, 1, &g_pNullUAV, &UAVInitialCounts); 369 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_x, 1, &g_pNullUAV, &UAVInitialCounts); 370 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_Acc_pAp, 1, &g_pNullSRV); 371 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_Acc_rr, 1, &g_pNullSRV); 372 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_Ap, 1, &g_pNullSRV); 373 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_p, 1, &g_pNullSRV); 374 | } 375 | 376 | void ConjGrad::update_p(const XMUINT3 &vSize) 377 | { 378 | const auto UAVInitialCounts = 0u; 379 | 380 | // Setup 381 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_r, 1, &m_pSRVr); 382 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_Acc_rr_prev, 1, &m_pSRVAcc_rr); 383 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_Acc_rr_new, 1, &m_pSRVAcc_rr_new); 384 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_p, 1, &m_pUAVp, &UAVInitialCounts); 385 | 386 | // update solution 387 | m_pd3dContext->CSSetShader(m_pUpdateShader, nullptr, 0); 388 | m_pd3dContext->Dispatch(vSize.x / THREAD_GROUP_SIZE, vSize.y / THREAD_GROUP_SIZE, vSize.z / THREAD_GROUP_SIZE); 389 | 390 | // Unset 391 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_p, 1, &g_pNullUAV, &UAVInitialCounts); 392 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_Acc_rr_new, 1, &g_pNullSRV); 393 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_Acc_rr_prev, 1, &g_pNullSRV); 394 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_r, 1, &g_pNullSRV); 395 | } 396 | 397 | void ConjGrad::compute_pAp(const XMUINT3 &vSize) 398 | { 399 | const auto UAVInitialCounts = 0u; 400 | 401 | // Setup 402 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_p_new, 1, &m_pSRVp); 403 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_Ap, 1, &m_pUAVAp, &UAVInitialCounts); 404 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_pAp, 1, &m_pUAVpAp, &UAVInitialCounts); 405 | 406 | // update solution 407 | m_pd3dContext->CSSetShader(m_pApShader, nullptr, 0); 408 | m_pd3dContext->Dispatch(vSize.x / THREAD_GROUP_SIZE, vSize.y / THREAD_GROUP_SIZE, vSize.z / THREAD_GROUP_SIZE); 409 | 410 | // Unset 411 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_pAp, 1, &g_pNullUAV, &UAVInitialCounts); 412 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_Ap, 1, &g_pNullUAV, &UAVInitialCounts); 413 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot_p_new, 1, &g_pNullSRV); 414 | } 415 | 416 | void ConjGrad::swapBuffers() 417 | { 418 | ID3D11ShaderResourceView* pSRV = m_pSRVAcc_rr_new; 419 | m_pSRVAcc_rr_new = m_pSRVAcc_rr; 420 | m_pSRVAcc_rr = pSRV; 421 | 422 | ID3D11UnorderedAccessView* pUAV = m_pUAVrr0; 423 | m_pUAVrr0 = m_pUAVrr; 424 | m_pUAVrr = pUAV; 425 | } 426 | 427 | void ConjGrad::computeElementSize(DXGI_FORMAT eFormat) 428 | { 429 | switch (eFormat) 430 | { 431 | case DXGI_FORMAT_R32_FLOAT: 432 | m_uElementSize = sizeof(float); 433 | m_eScanDataType = SCAN_DATA_TYPE(FLOAT); 434 | return; 435 | case DXGI_FORMAT_R32_UINT: 436 | m_uElementSize = sizeof(uint32_t); 437 | m_eScanDataType = SCAN_DATA_TYPE(UINT); 438 | return; 439 | case DXGI_FORMAT_R32_SINT: 440 | m_uElementSize = sizeof(int32_t); 441 | m_eScanDataType = SCAN_DATA_TYPE(INT); 442 | return; 443 | case DXGI_FORMAT_R32G32_FLOAT: 444 | m_uElementSize = sizeof(XMFLOAT2); 445 | m_eScanDataType = SCAN_DATA_TYPE(FLOAT); 446 | return; 447 | case DXGI_FORMAT_R32G32_UINT: 448 | m_uElementSize = sizeof(XMUINT2); 449 | m_eScanDataType = SCAN_DATA_TYPE(UINT); 450 | return; 451 | case DXGI_FORMAT_R32G32_SINT: 452 | m_uElementSize = sizeof(XMINT2); 453 | m_eScanDataType = SCAN_DATA_TYPE(INT); 454 | return; 455 | case DXGI_FORMAT_R32G32B32_FLOAT: 456 | m_uElementSize = sizeof(XMFLOAT3); 457 | m_eScanDataType = SCAN_DATA_TYPE(FLOAT); 458 | return; 459 | case DXGI_FORMAT_R32G32B32_UINT: 460 | m_uElementSize = sizeof(XMUINT3); 461 | m_eScanDataType = SCAN_DATA_TYPE(UINT); 462 | return; 463 | case DXGI_FORMAT_R32G32B32_SINT: 464 | m_uElementSize = sizeof(XMINT3); 465 | m_eScanDataType = SCAN_DATA_TYPE(INT); 466 | return; 467 | case DXGI_FORMAT_R32G32B32A32_FLOAT: 468 | m_uElementSize = sizeof(XMFLOAT4); 469 | m_eScanDataType = SCAN_DATA_TYPE(FLOAT); 470 | return; 471 | case DXGI_FORMAT_R32G32B32A32_UINT: 472 | m_uElementSize = sizeof(XMUINT4); 473 | m_eScanDataType = SCAN_DATA_TYPE(UINT); 474 | return; 475 | case DXGI_FORMAT_R32G32B32A32_SINT: 476 | m_uElementSize = sizeof(XMINT4); 477 | m_eScanDataType = SCAN_DATA_TYPE(INT); 478 | return; 479 | } 480 | } 481 | -------------------------------------------------------------------------------- /PoissonSolver/ConjGrad.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #pragma once 6 | 7 | #ifdef PRECOMPILED_HEADER 8 | #include PRECOMPILED_HEADER_FILE 9 | #else 10 | #include 11 | #include 12 | #include 13 | //#include 14 | #endif 15 | 16 | #include 17 | #include 18 | #include "PrefixSum.h" 19 | 20 | class ConjGrad 21 | { 22 | public: 23 | ConjGrad(ID3D11DeviceContext *const pDeviceContext); 24 | virtual ~ConjGrad(); 25 | HRESULT Init(DXGI_FORMAT eFormat, const DirectX::XMUINT3 &vSize, 26 | ID3D11ComputeShader *const pInitShader = nullptr, ID3D11ComputeShader *const pApShader = nullptr); 27 | 28 | void Solve(const DirectX::XMUINT3 &vSize, ID3D11ShaderResourceView *const pSrc, ID3D11UnorderedAccessView *const pDst, uint32_t iNumIt); 29 | void AddRef(); 30 | void Release(); 31 | 32 | static HRESULT CreateSolver(ID3D11DeviceContext *const pDeviceContext, 33 | DXGI_FORMAT eFormat, const DirectX::XMUINT3 &vSize, ConjGrad **ppSolver, 34 | ID3D11ComputeShader *const pInitShader = nullptr, ID3D11ComputeShader *const pApShader = nullptr); 35 | protected: 36 | HRESULT initShaders(ID3D11ComputeShader *const pInitShader, ID3D11ComputeShader *const pApShader); 37 | HRESULT initBuffers(DXGI_FORMAT eFormat, const DirectX::XMUINT3 &vSize); 38 | void init(const DirectX::XMUINT3 &vSize, ID3D11ShaderResourceView *const pSrc, ID3D11UnorderedAccessView *const pDst); 39 | void update_x(const DirectX::XMUINT3 &vSize, ID3D11UnorderedAccessView *const pDst); 40 | void update_p(const DirectX::XMUINT3 &vSize); 41 | void compute_pAp(const DirectX::XMUINT3 &vSize); 42 | void swapBuffers(); 43 | void computeElementSize(DXGI_FORMAT eFormat); 44 | 45 | //D3DX11_SCAN_DATA_TYPE m_eScanDataType; 46 | PrefixSum::SCAN_DATA_TYPE m_eScanDataType; 47 | 48 | uint32_t m_uElementSize; 49 | 50 | uint32_t m_uRefCount; 51 | uint32_t m_uSRVSlot_b; 52 | uint32_t m_uSRVSlot_r; 53 | uint32_t m_uSRVSlot_Acc_rr; 54 | uint32_t m_uSRVSlot_Acc_rr_prev; 55 | uint32_t m_uSRVSlot_Acc_rr_new; 56 | uint32_t m_uSRVSlot_p; 57 | uint32_t m_uSRVSlot_p_new; 58 | uint32_t m_uSRVSlot_Ap; 59 | uint32_t m_uSRVSlot_Acc_pAp; 60 | 61 | uint32_t m_uUAVSlot_x0; 62 | uint32_t m_uUAVSlot_x; 63 | uint32_t m_uUAVSlot_r0; 64 | uint32_t m_uUAVSlot_r; 65 | uint32_t m_uUAVSlot_rr0; 66 | uint32_t m_uUAVSlot_rr; 67 | uint32_t m_uUAVSlot_p0; 68 | uint32_t m_uUAVSlot_p; 69 | uint32_t m_uUAVSlot_Ap; 70 | uint32_t m_uUAVSlot_pAp; 71 | 72 | ID3D11Texture3D *m_pr; 73 | ID3D11Texture3D *m_pAp; 74 | ID3D11Texture3D *m_pp; 75 | 76 | ID3D11Buffer *m_prr[2]; 77 | ID3D11Buffer *m_ppAp; 78 | 79 | ID3D11ShaderResourceView *m_pSRVr; 80 | ID3D11ShaderResourceView *m_pSRVAcc_rr; 81 | ID3D11ShaderResourceView *m_pSRVAcc_rr_new; 82 | ID3D11ShaderResourceView *m_pSRVp; 83 | ID3D11ShaderResourceView *m_pSRVAp; 84 | ID3D11ShaderResourceView *m_pSRVAcc_pAp; 85 | 86 | ID3D11UnorderedAccessView *m_pUAVr; 87 | ID3D11UnorderedAccessView *m_pUAVrr; 88 | ID3D11UnorderedAccessView *m_pUAVrr0; 89 | ID3D11UnorderedAccessView *m_pUAVp; 90 | ID3D11UnorderedAccessView *m_pUAVAp; 91 | ID3D11UnorderedAccessView *m_pUAVpAp; 92 | 93 | ID3D11ComputeShader *m_pInitShader; 94 | ID3D11ComputeShader *m_pShader; 95 | ID3D11ComputeShader *m_pUpdateShader; 96 | ID3D11ComputeShader *m_pApShader; 97 | 98 | //ID3DX11Scan *m_pScan; 99 | PrefixSum *m_pScan; 100 | 101 | ID3D11Device *m_pd3dDevice; 102 | ID3D11DeviceContext *m_pd3dContext; 103 | }; 104 | -------------------------------------------------------------------------------- /PoissonSolver/CreateBuffers.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "CreateBuffers.h" 6 | 7 | using namespace DirectX; 8 | 9 | //-------------------------------------------------------------------------------------- 10 | // Create Structured Buffer 11 | //-------------------------------------------------------------------------------------- 12 | _Use_decl_annotations_ 13 | HRESULT CreateTypedBuffer(ID3D11Device* pDevice, UINT uElementSize, UINT uCount, void* pInitData, ID3D11Buffer** ppBufOut) 14 | { 15 | *ppBufOut = nullptr; 16 | 17 | const auto desc = CD3D11_BUFFER_DESC(uElementSize * uCount, D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE); 18 | 19 | if (pInitData) 20 | { 21 | D3D11_SUBRESOURCE_DATA InitData; 22 | InitData.pSysMem = pInitData; 23 | return pDevice->CreateBuffer(&desc, &InitData, ppBufOut); 24 | } 25 | else return pDevice->CreateBuffer(&desc, nullptr, ppBufOut); 26 | } 27 | 28 | //-------------------------------------------------------------------------------------- 29 | // Create Structured Buffer 30 | //-------------------------------------------------------------------------------------- 31 | _Use_decl_annotations_ 32 | HRESULT CreateStructuredBuffer(ID3D11Device* pDevice, UINT uElementSize, UINT uCount, void* pInitData, ID3D11Buffer** ppBufOut) 33 | { 34 | *ppBufOut = nullptr; 35 | 36 | D3D11_BUFFER_DESC desc; 37 | ZeroMemory(&desc, sizeof(desc)); 38 | desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; 39 | desc.ByteWidth = uElementSize * uCount; 40 | desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; 41 | desc.StructureByteStride = uElementSize; 42 | 43 | if (pInitData) 44 | { 45 | D3D11_SUBRESOURCE_DATA InitData; 46 | InitData.pSysMem = pInitData; 47 | return pDevice->CreateBuffer(&desc, &InitData, ppBufOut); 48 | } 49 | else 50 | return pDevice->CreateBuffer(&desc, nullptr, ppBufOut); 51 | } 52 | 53 | //-------------------------------------------------------------------------------------- 54 | // Create Raw Buffer 55 | //-------------------------------------------------------------------------------------- 56 | _Use_decl_annotations_ 57 | HRESULT CreateRawBuffer(ID3D11Device* pDevice, UINT uSize, void* pInitData, ID3D11Buffer** ppBufOut) 58 | { 59 | *ppBufOut = nullptr; 60 | 61 | D3D11_BUFFER_DESC desc; 62 | ZeroMemory(&desc, sizeof(desc)); 63 | desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_INDEX_BUFFER | D3D11_BIND_VERTEX_BUFFER; 64 | desc.ByteWidth = uSize; 65 | desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; 66 | 67 | if (pInitData) 68 | { 69 | D3D11_SUBRESOURCE_DATA initData; 70 | initData.pSysMem = pInitData; 71 | return pDevice->CreateBuffer(&desc, &initData, ppBufOut); 72 | } 73 | else 74 | return pDevice->CreateBuffer(&desc, nullptr, ppBufOut); 75 | } 76 | 77 | //-------------------------------------------------------------------------------------- 78 | // Create Texture 3D 79 | //-------------------------------------------------------------------------------------- 80 | _Use_decl_annotations_ 81 | HRESULT CreateTexture3D(ID3D11Device* pDevice, DXGI_FORMAT eFormat, XMUINT3 vSize, void* pInitData, ID3D11Texture3D** ppTexOut, uint32_t uMips) 82 | { 83 | *ppTexOut = nullptr; 84 | 85 | auto desc = CD3D11_TEXTURE3D_DESC(eFormat, vSize.x, vSize.y, vSize.z, uMips); 86 | desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; 87 | 88 | if (pInitData && uMips == 1) 89 | { 90 | D3D11_SUBRESOURCE_DATA initData; 91 | initData.pSysMem = pInitData; 92 | initData.SysMemPitch = sizeof(float) * vSize.x; 93 | initData.SysMemSlicePitch = sizeof(float) * vSize.x * vSize.y; 94 | return pDevice->CreateTexture3D(&desc, &initData, ppTexOut); 95 | } 96 | else 97 | { 98 | const auto hr = pDevice->CreateTexture3D(&desc, nullptr, ppTexOut); 99 | if (SUCCEEDED(hr) && pInitData) 100 | { 101 | ID3D11DeviceContext* pContext; 102 | pDevice->GetImmediateContext(&pContext); 103 | if (pContext) 104 | { 105 | pContext->UpdateSubresource(*ppTexOut, 0, nullptr, pInitData, sizeof(float) * vSize.x, sizeof(float) * vSize.x * vSize.y); 106 | pContext->Release(); 107 | } 108 | } 109 | 110 | return hr; 111 | } 112 | } 113 | 114 | //-------------------------------------------------------------------------------------- 115 | // Create Shader Resource View for Structured or Raw Buffers 116 | //-------------------------------------------------------------------------------------- 117 | _Use_decl_annotations_ 118 | HRESULT CreateBufferSRV(ID3D11Device* pDevice, ID3D11Buffer* pBuffer, ID3D11ShaderResourceView** ppSRVOut, DXGI_FORMAT eFormat) 119 | { 120 | D3D11_BUFFER_DESC descBuf; 121 | ZeroMemory(&descBuf, sizeof(descBuf)); 122 | pBuffer->GetDesc(&descBuf); 123 | 124 | D3D11_SHADER_RESOURCE_VIEW_DESC desc; 125 | ZeroMemory(&desc, sizeof(desc)); 126 | desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX; 127 | desc.BufferEx.FirstElement = 0; 128 | 129 | if (descBuf.MiscFlags & D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS) 130 | { 131 | // This is a Raw Buffer 132 | 133 | desc.Format = DXGI_FORMAT_R32_TYPELESS; 134 | desc.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW; 135 | desc.BufferEx.NumElements = descBuf.ByteWidth / 4; 136 | } 137 | else if (descBuf.MiscFlags & D3D11_RESOURCE_MISC_BUFFER_STRUCTURED) 138 | { 139 | // This is a Structured Buffer 140 | 141 | desc.Format = DXGI_FORMAT_UNKNOWN; 142 | desc.BufferEx.NumElements = descBuf.ByteWidth / descBuf.StructureByteStride; 143 | } 144 | else if (eFormat != DXGI_FORMAT_UNKNOWN) 145 | { 146 | desc.Format = eFormat; 147 | desc.BufferEx.NumElements = descBuf.ByteWidth / 4; 148 | } 149 | else return E_INVALIDARG; 150 | 151 | return pDevice->CreateShaderResourceView(pBuffer, &desc, ppSRVOut); 152 | } 153 | 154 | //-------------------------------------------------------------------------------------- 155 | // Create Shader Resource View for Texture3D 156 | //-------------------------------------------------------------------------------------- 157 | _Use_decl_annotations_ 158 | HRESULT CreateTexture3DSRV(ID3D11Device* pDevice, ID3D11Texture3D* pTex, ID3D11ShaderResourceView** ppSRVOut, uint32_t uMDMip, uint32_t uMips) 159 | { 160 | // Setup the description of the shader resource view. 161 | const auto desc = CD3D11_SHADER_RESOURCE_VIEW_DESC(pTex, DXGI_FORMAT_UNKNOWN, uMDMip, uMips); 162 | // Create the shader resource view. 163 | return pDevice->CreateShaderResourceView(pTex, &desc, ppSRVOut); 164 | } 165 | 166 | //-------------------------------------------------------------------------------------- 167 | // Create Unordered Access View for Structured or Raw Buffers 168 | //-------------------------------------------------------------------------------------- 169 | _Use_decl_annotations_ 170 | HRESULT CreateBufferUAV(ID3D11Device* pDevice, ID3D11Buffer* pBuffer, ID3D11UnorderedAccessView** ppUAVOut, DXGI_FORMAT eFormat) 171 | { 172 | D3D11_BUFFER_DESC descBuf; 173 | ZeroMemory(&descBuf, sizeof(descBuf)); 174 | pBuffer->GetDesc(&descBuf); 175 | 176 | D3D11_UNORDERED_ACCESS_VIEW_DESC desc; 177 | ZeroMemory(&desc, sizeof(desc)); 178 | desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; 179 | desc.Buffer.FirstElement = 0; 180 | 181 | if (descBuf.MiscFlags & D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS) 182 | { 183 | // This is a Raw Buffer 184 | 185 | desc.Format = DXGI_FORMAT_R32_TYPELESS; // Format must be DXGI_FORMAT_R32_TYPELESS, when creating Raw Unordered Access View 186 | desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW; 187 | desc.Buffer.NumElements = descBuf.ByteWidth / 4; 188 | } 189 | else if (descBuf.MiscFlags & D3D11_RESOURCE_MISC_BUFFER_STRUCTURED) 190 | { 191 | // This is a Structured Buffer 192 | 193 | desc.Format = DXGI_FORMAT_UNKNOWN; // Format must be must be DXGI_FORMAT_UNKNOWN, when creating a View of a Structured Buffer 194 | desc.Buffer.NumElements = descBuf.ByteWidth / descBuf.StructureByteStride; 195 | } 196 | else if (eFormat != DXGI_FORMAT_UNKNOWN) 197 | { 198 | desc.Format = eFormat; 199 | desc.Buffer.NumElements = descBuf.ByteWidth / 4; 200 | } 201 | else return E_INVALIDARG; 202 | 203 | return pDevice->CreateUnorderedAccessView(pBuffer, &desc, ppUAVOut); 204 | } 205 | 206 | //-------------------------------------------------------------------------------------- 207 | // Create Unordered Access View for Texture3D 208 | //-------------------------------------------------------------------------------------- 209 | _Use_decl_annotations_ 210 | HRESULT CreateTexture3DUAV(ID3D11Device* pDevice, ID3D11Texture3D* pTex, ID3D11UnorderedAccessView** ppUAVOut, uint32_t uMipSlice) 211 | { 212 | // Setup the description of the shader resource view. 213 | const auto desc = CD3D11_UNORDERED_ACCESS_VIEW_DESC(pTex, DXGI_FORMAT_UNKNOWN, uMipSlice); 214 | // Create the shader resource view. 215 | return pDevice->CreateUnorderedAccessView(pTex, &desc, ppUAVOut); 216 | } 217 | -------------------------------------------------------------------------------- /PoissonSolver/CreateBuffers.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #pragma once 6 | 7 | #ifdef PRECOMPILED_HEADER 8 | #include PRECOMPILED_HEADER_FILE 9 | #else 10 | #include 11 | #include 12 | #endif 13 | 14 | HRESULT CreateTypedBuffer(_In_ ID3D11Device* pDevice, _In_ UINT uElementSize, _In_ UINT uCount, 15 | _In_reads_(uElementSize*uCount) void* pInitData, _Outptr_ ID3D11Buffer** ppBufOut); 16 | HRESULT CreateStructuredBuffer(_In_ ID3D11Device* pDevice, _In_ UINT uElementSize, _In_ UINT uCount, 17 | _In_reads_(uElementSize*uCount) void* pInitData, _Outptr_ ID3D11Buffer** ppBufOut); 18 | HRESULT CreateRawBuffer(_In_ ID3D11Device* pDevice, _In_ UINT uSize, _In_reads_(uSize) void* pInitData, _Outptr_ ID3D11Buffer** ppBufOut); 19 | HRESULT CreateTexture3D(_In_ ID3D11Device* pDevice, _In_ DXGI_FORMAT eFormat, _In_ DirectX::XMUINT3 vSize, 20 | _In_ void* pInitData, _Outptr_ ID3D11Texture3D** ppTexOut, _In_ uint32_t uMips = 1); 21 | HRESULT CreateBufferSRV(_In_ ID3D11Device* pDevice, _In_ ID3D11Buffer* pBuffer, _Outptr_ ID3D11ShaderResourceView** ppSRVOut, DXGI_FORMAT eFormat = DXGI_FORMAT_UNKNOWN); 22 | HRESULT CreateTexture3DSRV(_In_ ID3D11Device* pDevice, _In_ ID3D11Texture3D* pTex, _Outptr_ ID3D11ShaderResourceView** ppSRVOut, 23 | _In_ uint32_t uMDMip = 0, _In_ uint32_t uMips = 1); 24 | HRESULT CreateBufferUAV(_In_ ID3D11Device* pDevice, _In_ ID3D11Buffer* pBuffer, _Outptr_ ID3D11UnorderedAccessView** pUAVOut, DXGI_FORMAT eFormat = DXGI_FORMAT_UNKNOWN); 25 | HRESULT CreateTexture3DUAV(_In_ ID3D11Device* pDevice, _In_ ID3D11Texture3D* pTex, _Outptr_ ID3D11UnorderedAccessView** ppUAVOut, _In_ uint32_t uMipSlice = 0); 26 | -------------------------------------------------------------------------------- /PoissonSolver/Jacobi.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "Jacobi.h" 6 | #include "SharedConst.h" 7 | 8 | #ifndef V_RETURN 9 | #define V_RETURN(x) { hr = x; if (FAILED(hr)) return hr; } 10 | #endif 11 | 12 | using namespace DirectX; 13 | using namespace std; 14 | 15 | ID3D11ShaderResourceView *const g_pNullSRV = nullptr; // Helper to Clear SRVs 16 | ID3D11UnorderedAccessView *const g_pNullUAV = nullptr; // Helper to Clear UAVs 17 | 18 | Jacobi::Jacobi(ID3D11DeviceContext *const pDeviceContext) 19 | : m_pd3dContext(pDeviceContext), m_uRefCount(1) 20 | { 21 | m_pd3dContext->AddRef(); 22 | m_pd3dContext->GetDevice(&m_pd3dDevice); 23 | } 24 | 25 | Jacobi::~Jacobi() 26 | { 27 | if (m_pShader) m_pShader->Release(); 28 | if (m_pd3dContext) m_pd3dContext->Release(); 29 | if (m_pd3dDevice) m_pd3dDevice->Release(); 30 | } 31 | 32 | HRESULT Jacobi::CreateSolver(ID3D11DeviceContext *const pDeviceContext, Jacobi **ppSolver) 33 | { 34 | auto &pSolver = *ppSolver; 35 | pSolver = new Jacobi(pDeviceContext); 36 | return pSolver->Init(); 37 | } 38 | 39 | HRESULT Jacobi::Init() 40 | { 41 | HRESULT hr; 42 | ID3DBlob *shaderBuffer = nullptr; 43 | V_RETURN(D3DReadFileToBlob(L"CSJacobi.cso", &shaderBuffer)); 44 | 45 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 46 | shaderBuffer->GetBufferSize(), nullptr, &m_pShader); 47 | 48 | if (SUCCEEDED(hr)) 49 | { 50 | ID3D11ShaderReflection *pReflector = nullptr; 51 | auto h = D3DReflect(shaderBuffer->GetBufferPointer(), shaderBuffer->GetBufferSize(), 52 | IID_ID3D11ShaderReflection, (void**)&pReflector); 53 | 54 | if (SUCCEEDED(h)) 55 | { 56 | D3D11_SHADER_INPUT_BIND_DESC desc; 57 | h = pReflector->GetResourceBindingDescByName("b", &desc); 58 | if (SUCCEEDED(h)) m_uSRVSlot = desc.BindPoint; 59 | else hr = h; 60 | 61 | h = pReflector->GetResourceBindingDescByName("x", &desc); 62 | if (SUCCEEDED(h)) m_uUAVSlot = desc.BindPoint; 63 | else hr = h; 64 | } 65 | else hr = h; 66 | 67 | if (pReflector) pReflector->Release(); 68 | } 69 | 70 | if (shaderBuffer) shaderBuffer->Release(); 71 | 72 | return hr; 73 | } 74 | 75 | void Jacobi::Solve(const XMUINT3 &vSize, ID3D11ShaderResourceView *const pSrc, ID3D11UnorderedAccessView *const pDst, uint32_t uNumIt) 76 | { 77 | const auto UAVInitialCounts = 0u; 78 | 79 | // Setup 80 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot, 1, &pSrc); 81 | 82 | // Jacobi iterations 83 | for (auto i = 0u; i < uNumIt; ++i) jacobi(vSize, pDst); 84 | 85 | // Unset 86 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot, 1, &g_pNullUAV, &UAVInitialCounts); 87 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot, 1, &g_pNullSRV); 88 | } 89 | 90 | void Jacobi::AddRef() 91 | { 92 | ++m_uRefCount; 93 | } 94 | 95 | void Jacobi::Release() 96 | { 97 | if (--m_uRefCount < 1) delete this; 98 | } 99 | 100 | void Jacobi::jacobi(const XMUINT3 &vSize, ID3D11UnorderedAccessView *const pDst) 101 | { 102 | const auto UAVInitialCounts = 0u; 103 | 104 | // Setup 105 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot, 1, &pDst, &UAVInitialCounts); 106 | 107 | // Jacobi iteration 108 | m_pd3dContext->CSSetShader(m_pShader, nullptr, 0); 109 | m_pd3dContext->Dispatch(vSize.x / THREAD_GROUP_SIZE, vSize.y / THREAD_GROUP_SIZE, vSize.z / THREAD_GROUP_SIZE); 110 | } 111 | -------------------------------------------------------------------------------- /PoissonSolver/Jacobi.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #pragma once 6 | 7 | #ifdef PRECOMPILED_HEADER 8 | #include PRECOMPILED_HEADER_FILE 9 | #else 10 | #include 11 | #include 12 | #include 13 | #endif 14 | 15 | #include 16 | #include 17 | 18 | class Jacobi 19 | { 20 | public: 21 | Jacobi(ID3D11DeviceContext *const pDeviceContext); 22 | virtual ~Jacobi(); 23 | HRESULT Init(); 24 | 25 | void Solve(const DirectX::XMUINT3 &vSize, ID3D11ShaderResourceView *const pSrc, ID3D11UnorderedAccessView *const pDst, uint32_t uNumIt); 26 | void AddRef(); 27 | void Release(); 28 | 29 | static HRESULT CreateSolver(ID3D11DeviceContext *const pDeviceContext, Jacobi **ppSolver); 30 | protected: 31 | void jacobi(const DirectX::XMUINT3 &vSize, ID3D11UnorderedAccessView *const pDst); 32 | 33 | uint32_t m_uRefCount; 34 | uint32_t m_uSRVSlot; 35 | uint32_t m_uUAVSlot; 36 | 37 | ID3D11ComputeShader *m_pShader; 38 | 39 | ID3D11Device *m_pd3dDevice; 40 | ID3D11DeviceContext *m_pd3dContext; 41 | }; 42 | -------------------------------------------------------------------------------- /PoissonSolver/Multigrid.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "Multigrid.h" 6 | #include "SharedConst.h" 7 | 8 | #ifndef V_RETURN 9 | #define V_RETURN(x) { hr = x; if (FAILED(hr)) return hr; } 10 | #endif 11 | 12 | using namespace DirectX; 13 | //using namespace std; 14 | 15 | Multigrid::Multigrid(ID3D11DeviceContext *const pDeviceContext) : 16 | Jacobi(pDeviceContext) 17 | { 18 | } 19 | 20 | Multigrid::~Multigrid() 21 | { 22 | } 23 | 24 | HRESULT Multigrid::CreateSolver(ID3D11DeviceContext *const pDeviceContext, Multigrid **ppSolver) 25 | { 26 | auto &pSolver = *ppSolver; 27 | pSolver = new Multigrid(pDeviceContext); 28 | return pSolver->Init(); 29 | } 30 | 31 | HRESULT Multigrid::Init() 32 | { 33 | HRESULT h, hr = Jacobi::Init(); 34 | V_RETURN(hr); 35 | 36 | ID3DBlob *shaderBuffer = nullptr; 37 | D3D11_SHADER_INPUT_BIND_DESC desc; 38 | 39 | V_RETURN(D3DReadFileToBlob(L"CSDownSample.cso", &shaderBuffer)); 40 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 41 | shaderBuffer->GetBufferSize(), nullptr, &m_pDownSmpShader); 42 | if (SUCCEEDED(hr)) 43 | { 44 | ID3D11ShaderReflection *pReflector = nullptr; 45 | hr = D3DReflect(shaderBuffer->GetBufferPointer(), shaderBuffer->GetBufferSize(), 46 | IID_ID3D11ShaderReflection, (void**)&pReflector); 47 | if (SUCCEEDED(hr)) 48 | { 49 | h = pReflector->GetResourceBindingDescByName("x", &desc); 50 | if (SUCCEEDED(h)) m_uUAVSlot_xdown = desc.BindPoint; 51 | else hr = h; 52 | 53 | h = pReflector->GetResourceBindingDescByName("y", &desc); 54 | if (SUCCEEDED(h))m_uUAVSlot_ydown = desc.BindPoint; 55 | else hr = h; 56 | } 57 | if (pReflector) pReflector->Release(); 58 | } 59 | if (shaderBuffer) shaderBuffer->Release(); 60 | V_RETURN(hr); 61 | 62 | shaderBuffer = nullptr; 63 | V_RETURN(D3DReadFileToBlob(L"CSUpSample.cso", &shaderBuffer)); 64 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 65 | shaderBuffer->GetBufferSize(), nullptr, &m_pUpSmpShader); 66 | if (SUCCEEDED(hr)) 67 | { 68 | ID3D11ShaderReflection *pReflector = nullptr; 69 | hr = D3DReflect(shaderBuffer->GetBufferPointer(), shaderBuffer->GetBufferSize(), 70 | IID_ID3D11ShaderReflection, (void**)&pReflector); 71 | if (SUCCEEDED(hr)) 72 | { 73 | h = pReflector->GetResourceBindingDescByName("x", &desc); 74 | if (SUCCEEDED(h)) m_uUAVSlot_xup = desc.BindPoint; 75 | else hr = h; 76 | 77 | h = pReflector->GetResourceBindingDescByName("y", &desc); 78 | if (SUCCEEDED(h))m_uUAVSlot_yup = desc.BindPoint; 79 | else hr = h; 80 | } 81 | if (pReflector) pReflector->Release(); 82 | } 83 | if (shaderBuffer) shaderBuffer->Release(); 84 | 85 | return hr; 86 | } 87 | 88 | void Multigrid::Solve(const XMUINT3 & vSize, ID3D11ShaderResourceView* const* const ppSrcSRVs, 89 | ID3D11ShaderResourceView* const* const ppDstSRVs, ID3D11UnorderedAccessView* const* const ppSrcUAVs, 90 | ID3D11UnorderedAccessView* const* const ppDstUAVs, uint32_t uNumIt, uint32_t uMips) 91 | { 92 | const auto UAVInitialCounts = 0u; 93 | 94 | // Down sampling 95 | for (auto i = 0u; i < uMips - 1; ++i) 96 | { 97 | // Setup 98 | m_pd3dContext->CSSetShaderResources(m_uUAVSlot_xdown, 1, &ppSrcSRVs[i]); 99 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_ydown, 1, &ppSrcUAVs[i + 1], &UAVInitialCounts); 100 | 101 | m_pd3dContext->CSSetShader(m_pDownSmpShader, nullptr, 0); 102 | m_pd3dContext->Dispatch(vSize.x / 2, vSize.y / 2, vSize.z / 2); 103 | } 104 | 105 | for (int i = uMips - 1; i > 0; --i) 106 | { 107 | // Setup b 108 | m_pd3dContext->CSSetShaderResources(m_uSRVSlot, 1, &ppSrcSRVs[i]); 109 | 110 | // Jacobi iterations 111 | for (auto j = 0u; j < uNumIt >> (i + 4); ++j) jacobi(vSize, ppDstUAVs[i]); 112 | 113 | // Up sampling 114 | m_pd3dContext->CSSetShaderResources(m_uUAVSlot_xup, 1, &ppDstSRVs[i]); 115 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_yup, 1, &ppDstUAVs[i - 1], &UAVInitialCounts); 116 | m_pd3dContext->CSSetShader(m_pUpSmpShader, nullptr, 0); 117 | m_pd3dContext->Dispatch(vSize.x / THREAD_GROUP_SIZE, vSize.y / THREAD_GROUP_SIZE, vSize.z / THREAD_GROUP_SIZE); 118 | } 119 | 120 | Jacobi::Solve(vSize, *ppSrcSRVs, *ppDstUAVs, uNumIt); 121 | } 122 | -------------------------------------------------------------------------------- /PoissonSolver/Multigrid.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Jacobi.h" 4 | 5 | class Multigrid : 6 | public Jacobi 7 | { 8 | public: 9 | Multigrid(ID3D11DeviceContext *const pDeviceContext); 10 | virtual ~Multigrid(); 11 | HRESULT Init(); 12 | 13 | void Solve(const DirectX::XMUINT3 &vSize, ID3D11ShaderResourceView* const* const ppSrcSRVs, 14 | ID3D11ShaderResourceView* const* const ppDstSRVs, ID3D11UnorderedAccessView* const* const ppSrcUAVs, 15 | ID3D11UnorderedAccessView* const* const ppDstUAVs, uint32_t uNumIt, uint32_t uMips); 16 | 17 | static HRESULT CreateSolver(ID3D11DeviceContext *const pDeviceContext, Multigrid **ppSolver); 18 | 19 | protected: 20 | uint32_t m_uUAVSlot_xdown; 21 | uint32_t m_uUAVSlot_xup; 22 | uint32_t m_uUAVSlot_ydown; 23 | uint32_t m_uUAVSlot_yup; 24 | 25 | ID3D11ComputeShader *m_pDownSmpShader; 26 | ID3D11ComputeShader *m_pUpSmpShader; 27 | }; 28 | -------------------------------------------------------------------------------- /PoissonSolver/PoissonSolver.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | //#include 11 | 12 | #include "CreateBuffers.h" 13 | #include "Jacobi.h" 14 | #include "ConjGrad.h" 15 | #include "Multigrid.h" 16 | 17 | using namespace DirectX; 18 | using namespace std; 19 | 20 | #pragma comment(lib, "d3d11.lib") 21 | #pragma comment(lib, "d3dcompiler.lib") 22 | #pragma comment(lib, "dxguid.lib") 23 | //#pragma comment(lib, "d3dcsx.lib") 24 | 25 | #if D3D_COMPILER_VERSION < 46 26 | #include 27 | #endif 28 | 29 | #ifndef SAFE_RELEASE 30 | #define SAFE_RELEASE(p) { if (p) { (p)->Release(); (p)=nullptr; } } 31 | #endif 32 | 33 | // Comment out the following line to use raw buffers instead of structured buffers 34 | #define USE_STRUCTURED_BUFFERS 35 | 36 | // If defined, then the hardware/driver must report support for double-precision CS 5.0 shaders or the sample fails to run 37 | //#define TEST_DOUBLE 38 | 39 | #if defined(_MSC_VER) && (_MSC_VER<1610) && !defined(_In_reads_) 40 | #define _Outptr_ 41 | #define _Outptr_opt_ 42 | #define _In_reads_(exp) 43 | #define _In_reads_opt_(exp) 44 | #define _Out_writes_(exp) 45 | #endif 46 | 47 | #ifndef _Use_decl_annotations_ 48 | #define _Use_decl_annotations_ 49 | #endif 50 | 51 | //-------------------------------------------------------------------------------------- 52 | // Forward declarations 53 | //-------------------------------------------------------------------------------------- 54 | HRESULT CreateComputeDevice(_Outptr_ ID3D11Device** ppDeviceOut, _Outptr_ ID3D11DeviceContext** ppContextOut, _In_ bool bForceRef); 55 | ID3D11Texture3D* CreateAndCopyToDebugTex(_In_ ID3D11Device* pDevice, _In_ ID3D11DeviceContext* pd3dImmediateContext, _In_ ID3D11Texture3D* pTex); 56 | HRESULT FindDXSDKShaderFileCch(_Out_writes_(cchDest) WCHAR* strDestPath, 57 | _In_ int cchDest, 58 | _In_z_ LPCWSTR strFilename); 59 | 60 | //-------------------------------------------------------------------------------------- 61 | // Global variables 62 | //-------------------------------------------------------------------------------------- 63 | ID3D11Device* g_pDevice = nullptr; 64 | ID3D11DeviceContext* g_pContext = nullptr; 65 | 66 | Jacobi* g_pSolverJacobi = nullptr; 67 | ID3D11Texture3D* g_px = nullptr; 68 | ID3D11Texture3D* g_pb = nullptr; 69 | ID3D11ShaderResourceView** g_ppbSRVs = nullptr; 70 | ID3D11UnorderedAccessView** g_ppbUAVs = nullptr; 71 | ID3D11UnorderedAccessView* g_pxUAV = nullptr; 72 | 73 | ConjGrad* g_pSolverConjGrad = nullptr; 74 | ID3D11Texture3D* g_px_CG = nullptr; 75 | ID3D11UnorderedAccessView* g_pxUAV_CG = nullptr; 76 | 77 | Multigrid* g_pSolverMultigrid = nullptr; 78 | ID3D11Texture3D* g_px_MG = nullptr; 79 | ID3D11ShaderResourceView** g_ppxSRVs = nullptr; 80 | ID3D11UnorderedAccessView** g_ppxUAVs = nullptr; 81 | 82 | //-------------------------------------------------------------------------------------- 83 | // Entry point to the program 84 | //-------------------------------------------------------------------------------------- 85 | int __cdecl main() 86 | { 87 | // Enable run-time memory check for debug builds. 88 | #ifdef _DEBUG 89 | _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); 90 | #endif 91 | 92 | printf("Creating device..."); 93 | if (FAILED(CreateComputeDevice(&g_pDevice, &g_pContext, false))) 94 | return 1; 95 | printf("done\n"); 96 | 97 | const auto uDim = 64u; 98 | const auto uMips = max(static_cast(log2(uDim)) - 1, 1); 99 | const XMUINT3 vSize(uDim, uDim, uDim); 100 | float *b = new float[vSize.x * vSize.y * vSize.z]; 101 | 102 | printf("Creating solvers..."); 103 | if (FAILED(Jacobi::CreateSolver(g_pContext, &g_pSolverJacobi))) return 1; 104 | if (FAILED(ConjGrad::CreateSolver(g_pContext, DXGI_FORMAT_R32_FLOAT, vSize, &g_pSolverConjGrad))) return 1; 105 | if (FAILED(Multigrid::CreateSolver(g_pContext, &g_pSolverMultigrid))) return 1; 106 | printf("done\n"); 107 | 108 | printf("Creating buffers and filling them with initial data..."); 109 | // The number of elements in a buffer to be tested 110 | for (auto i = 0u; i < vSize.z; ++i) 111 | { 112 | for (auto j = 0u; j < vSize.y; ++j) 113 | { 114 | for (auto k = 0u; k < vSize.x; ++k) 115 | { 116 | b[i * vSize.x * vSize.y + j * vSize.x + k] = (rand() % 256 - 255) / 255.0f; 117 | } 118 | } 119 | } 120 | 121 | CreateTexture3D(g_pDevice, DXGI_FORMAT_R32_FLOAT, vSize, b, &g_pb, uMips); 122 | CreateTexture3D(g_pDevice, DXGI_FORMAT_R32_FLOAT, vSize, nullptr, &g_px); 123 | CreateTexture3D(g_pDevice, DXGI_FORMAT_R32_FLOAT, vSize, nullptr, &g_px_CG); 124 | CreateTexture3D(g_pDevice, DXGI_FORMAT_R32_FLOAT, vSize, nullptr, &g_px_MG, uMips); 125 | 126 | #if defined(_DEBUG) || defined(PROFILE) 127 | if (g_pb) 128 | g_pb->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("b") - 1, "b"); 129 | if (g_px) 130 | g_px->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("x") - 1, "x"); 131 | if (g_px_CG) 132 | g_px_CG->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("x_CG") - 1, "x_CG"); 133 | if (g_px_MG) 134 | g_px_MG->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("x_MG") - 1, "x_MG"); 135 | #endif 136 | 137 | printf("done\n"); 138 | 139 | printf("Creating buffer views..."); 140 | CreateTexture3DUAV(g_pDevice, g_px, &g_pxUAV); 141 | CreateTexture3DUAV(g_pDevice, g_px_CG, &g_pxUAV_CG); 142 | g_ppbSRVs = new ID3D11ShaderResourceView*[uMips]; 143 | g_ppxSRVs = new ID3D11ShaderResourceView*[uMips]; 144 | g_ppbUAVs = new ID3D11UnorderedAccessView*[uMips]; 145 | g_ppxUAVs = new ID3D11UnorderedAccessView*[uMips]; 146 | for (auto i = 0u; i < uMips; ++i) 147 | { 148 | CreateTexture3DSRV(g_pDevice, g_pb, &g_ppbSRVs[i], i); 149 | CreateTexture3DSRV(g_pDevice, g_px, &g_ppxSRVs[i], i); 150 | CreateTexture3DUAV(g_pDevice, g_pb, &g_ppbUAVs[i], i); 151 | CreateTexture3DUAV(g_pDevice, g_px_MG, &g_ppxUAVs[i], i); 152 | } 153 | 154 | #if defined(_DEBUG) || defined(PROFILE) 155 | g_pbSRV->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("b SRV") - 1, "b SRV"); 156 | if (g_pxUAV) 157 | g_pxUAV->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("x UAV") - 1, "x UAV"); 158 | if (g_pxUAV_CG) 159 | g_pxUAV_CG->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("x UAV CG") - 1, "x UAV CG"); 160 | for (auto i = 0u; i < uMips; ++i) 161 | { 162 | if (g_pbUAVs[i]) g_pbUAVs[i]->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("b UAV") - 1, "b UAV"); 163 | if (g_pxUAVs[i]) g_pxUAVs[i]->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("x UAV") - 1, "x UAV"); 164 | } 165 | #endif 166 | 167 | printf("done\n"); 168 | 169 | ID3D11Query *pQueryDisjoint, *pQueryStart, *pQueryEnd; 170 | auto desc = CD3D11_QUERY_DESC(D3D11_QUERY_TIMESTAMP_DISJOINT); 171 | g_pDevice->CreateQuery(&desc, &pQueryDisjoint); 172 | desc.Query = D3D11_QUERY_TIMESTAMP; 173 | g_pDevice->CreateQuery(&desc, &pQueryStart); 174 | g_pDevice->CreateQuery(&desc, &pQueryEnd); 175 | 176 | printf("Solving by Jacobi iteration..."); 177 | g_pContext->Begin(pQueryDisjoint); 178 | g_pContext->End(pQueryStart); 179 | g_pSolverJacobi->Solve(vSize, *g_ppbSRVs, g_pxUAV, 150 * uDim); 180 | g_pContext->End(pQueryEnd); 181 | g_pContext->End(pQueryDisjoint); 182 | 183 | UINT64 uStartTime, uEndTime; 184 | D3D11_QUERY_DATA_TIMESTAMP_DISJOINT freq; 185 | while (S_OK != g_pContext->GetData(pQueryStart, &uStartTime, sizeof(UINT64), 0)); 186 | while (S_OK != g_pContext->GetData(pQueryEnd, &uEndTime, sizeof(UINT64), 0)); 187 | while (S_OK != g_pContext->GetData(pQueryDisjoint, &freq, sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0)); 188 | double fTimeElapse = (uEndTime - uStartTime) / static_cast(freq.Frequency) * 1000.0; 189 | printf("done (%.2fms)\n", fTimeElapse); 190 | 191 | printf("Solving by conjugate gradient..."); 192 | g_pContext->Begin(pQueryDisjoint); 193 | g_pContext->End(pQueryStart); 194 | g_pSolverConjGrad->Solve(vSize, *g_ppbSRVs, g_pxUAV_CG, 15 * max(uDim / 4, 1)); 195 | g_pContext->End(pQueryEnd); 196 | g_pContext->End(pQueryDisjoint); 197 | 198 | while (S_OK != g_pContext->GetData(pQueryStart, &uStartTime, sizeof(UINT64), 0)); 199 | while (S_OK != g_pContext->GetData(pQueryEnd, &uEndTime, sizeof(UINT64), 0)); 200 | while (S_OK != g_pContext->GetData(pQueryDisjoint, &freq, sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0)); 201 | fTimeElapse = (uEndTime - uStartTime) / static_cast(freq.Frequency) * 1000.0; 202 | printf("done (%.2fms)\n", fTimeElapse); 203 | 204 | printf("Solving by multigrid..."); 205 | g_pContext->Begin(pQueryDisjoint); 206 | g_pContext->End(pQueryStart); 207 | g_pSolverMultigrid->Solve(vSize, g_ppbSRVs, g_ppxSRVs, g_ppbUAVs, g_ppxUAVs, 120 * uDim, uMips); 208 | g_pContext->End(pQueryEnd); 209 | g_pContext->End(pQueryDisjoint); 210 | 211 | while (S_OK != g_pContext->GetData(pQueryStart, &uStartTime, sizeof(UINT64), 0)); 212 | while (S_OK != g_pContext->GetData(pQueryEnd, &uEndTime, sizeof(UINT64), 0)); 213 | while (S_OK != g_pContext->GetData(pQueryDisjoint, &freq, sizeof(D3D11_QUERY_DATA_TIMESTAMP_DISJOINT), 0)); 214 | fTimeElapse = (uEndTime - uStartTime) / static_cast(freq.Frequency) * 1000.0; 215 | printf("done (%.2fms)\n", fTimeElapse); 216 | 217 | SAFE_RELEASE(pQueryEnd); 218 | SAFE_RELEASE(pQueryStart); 219 | SAFE_RELEASE(pQueryDisjoint); 220 | 221 | // Read back the result from GPU, verify its correctness against result computed by CPU 222 | { 223 | auto pRead = CreateAndCopyToDebugTex(g_pDevice, g_pContext, g_px); 224 | D3D11_MAPPED_SUBRESOURCE MappedResource; 225 | float *p; 226 | g_pContext->Map(pRead, 0, D3D11_MAP_READ, 0, &MappedResource); 227 | 228 | // Set a break point here and put down the expression "p, 1024" in your watch window to see what has been written out by our CS 229 | // This is also a common trick to debug CS programs. 230 | p = (float*)MappedResource.pData; 231 | 232 | // Verify that if Compute Shader has done right 233 | printf("Print result (by Jacobi iteration)...\n"); 234 | auto uPitch = max(vSize.x, 4); 235 | for (auto i = 0u; i < vSize.z; ++i) 236 | { 237 | for (auto j = 0u; j < vSize.y; ++j) 238 | { 239 | for (auto k = 0u; k < vSize.x; ++k) 240 | { 241 | printf("%.4f ", p[i * uPitch * vSize.y + j * uPitch + k]); 242 | } 243 | printf("\n"); 244 | } 245 | printf("\n"); 246 | } 247 | 248 | g_pContext->Unmap(pRead, 0); 249 | 250 | SAFE_RELEASE(pRead); 251 | } 252 | 253 | { 254 | ID3D11Texture3D* pRead = CreateAndCopyToDebugTex(g_pDevice, g_pContext, g_px_CG); 255 | D3D11_MAPPED_SUBRESOURCE MappedResource; 256 | float *p; 257 | g_pContext->Map(pRead, 0, D3D11_MAP_READ, 0, &MappedResource); 258 | 259 | // Set a break point here and put down the expression "p, 1024" in your watch window to see what has been written out by our CS 260 | // This is also a common trick to debug CS programs. 261 | p = (float*)MappedResource.pData; 262 | 263 | // Verify that if Compute Shader has done right 264 | printf("Print result (by conjugate gradient)...\n"); 265 | auto uPitch = max(vSize.x, 4); 266 | for (auto i = 0u; i < vSize.z; ++i) 267 | { 268 | for (auto j = 0u; j < vSize.y; ++j) 269 | { 270 | for (auto k = 0u; k < vSize.x; ++k) 271 | { 272 | printf("%.4f ", p[i * uPitch * vSize.y + j * uPitch + k]); 273 | } 274 | printf("\n"); 275 | } 276 | printf("\n"); 277 | } 278 | 279 | g_pContext->Unmap(pRead, 0); 280 | 281 | SAFE_RELEASE(pRead); 282 | } 283 | 284 | { 285 | ID3D11Texture3D* pRead = CreateAndCopyToDebugTex(g_pDevice, g_pContext, g_px_MG); 286 | D3D11_MAPPED_SUBRESOURCE MappedResource; 287 | float *p; 288 | g_pContext->Map(pRead, 0, D3D11_MAP_READ, 0, &MappedResource); 289 | 290 | // Set a break point here and put down the expression "p, 1024" in your watch window to see what has been written out by our CS 291 | // This is also a common trick to debug CS programs. 292 | p = (float*)MappedResource.pData; 293 | 294 | // Verify that if Compute Shader has done right 295 | printf("Print result (by multigrid)...\n"); 296 | auto uPitch = max(vSize.x, 4); 297 | for (auto i = 0u; i < vSize.z; ++i) 298 | { 299 | for (auto j = 0u; j < vSize.y; ++j) 300 | { 301 | for (auto k = 0u; k < vSize.x; ++k) 302 | { 303 | printf("%.4f ", p[i * uPitch * vSize.y + j * uPitch + k]); 304 | } 305 | printf("\n"); 306 | } 307 | printf("\n"); 308 | } 309 | 310 | g_pContext->Unmap(pRead, 0); 311 | 312 | SAFE_RELEASE(pRead); 313 | } 314 | 315 | printf("Cleaning up...\n"); 316 | for (auto i = 0u; i < uMips && g_ppxUAVs; ++i) SAFE_RELEASE(g_ppxUAVs[i]); 317 | SAFE_RELEASE(g_px_MG); 318 | SAFE_RELEASE(g_pSolverMultigrid); 319 | 320 | SAFE_RELEASE(g_pxUAV_CG); 321 | SAFE_RELEASE(g_px_CG); 322 | SAFE_RELEASE(g_pSolverConjGrad); 323 | 324 | SAFE_RELEASE(g_pxUAV); 325 | for (auto i = 0u; i < uMips && g_ppbUAVs; ++i) SAFE_RELEASE(g_ppbUAVs[i]); 326 | for (auto i = 0u; i < uMips && g_ppbSRVs; ++i) SAFE_RELEASE(g_ppbSRVs[i]); 327 | SAFE_RELEASE(g_pb); 328 | SAFE_RELEASE(g_px); 329 | SAFE_RELEASE(g_pSolverJacobi); 330 | 331 | SAFE_RELEASE(g_pContext); 332 | SAFE_RELEASE(g_pDevice); 333 | 334 | if (g_ppbUAVs) delete[] g_ppbUAVs; 335 | if (g_ppbSRVs) delete[] g_ppbSRVs; 336 | if (b) delete[] b; 337 | 338 | return 0; 339 | } 340 | 341 | 342 | //-------------------------------------------------------------------------------------- 343 | // Create the D3D device and device context suitable for running Compute Shaders(CS) 344 | //-------------------------------------------------------------------------------------- 345 | _Use_decl_annotations_ 346 | HRESULT CreateComputeDevice(ID3D11Device** ppDeviceOut, ID3D11DeviceContext** ppContextOut, bool bForceRef) 347 | { 348 | *ppDeviceOut = nullptr; 349 | *ppContextOut = nullptr; 350 | 351 | HRESULT hr = S_OK; 352 | 353 | UINT uCreationFlags = D3D11_CREATE_DEVICE_SINGLETHREADED; 354 | #ifdef _DEBUG 355 | uCreationFlags |= D3D11_CREATE_DEVICE_DEBUG; 356 | #endif 357 | D3D_FEATURE_LEVEL flOut; 358 | static const D3D_FEATURE_LEVEL flvl[] = { D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, D3D_FEATURE_LEVEL_10_0 }; 359 | 360 | bool bNeedRefDevice = false; 361 | if (!bForceRef) 362 | { 363 | hr = D3D11CreateDevice(nullptr, // Use default graphics card 364 | D3D_DRIVER_TYPE_HARDWARE, // Try to create a hardware accelerated device 365 | nullptr, // Do not use external software rasterizer module 366 | uCreationFlags, // Device creation flags 367 | flvl, 368 | sizeof(flvl) / sizeof(D3D_FEATURE_LEVEL), 369 | D3D11_SDK_VERSION, // SDK version 370 | ppDeviceOut, // Device out 371 | &flOut, // Actual feature level created 372 | ppContextOut); // Context out 373 | 374 | if (SUCCEEDED(hr)) 375 | { 376 | // A hardware accelerated device has been created, so check for Compute Shader support 377 | 378 | // If we have a device >= D3D_FEATURE_LEVEL_11_0 created, full CS5.0 support is guaranteed, no need for further checks 379 | if (flOut < D3D_FEATURE_LEVEL_11_0) 380 | { 381 | #ifdef TEST_DOUBLE 382 | bNeedRefDevice = true; 383 | printf("No hardware Compute Shader 5.0 capable device found (required for doubles), trying to create ref device.\n"); 384 | #else 385 | // Otherwise, we need further check whether this device support CS4.x (Compute on 10) 386 | D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS hwopts; 387 | (*ppDeviceOut)->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &hwopts, sizeof(hwopts)); 388 | if (!hwopts.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x) 389 | { 390 | bNeedRefDevice = true; 391 | printf("No hardware Compute Shader capable device found, trying to create ref device.\n"); 392 | } 393 | #endif 394 | } 395 | 396 | #ifdef TEST_DOUBLE 397 | else 398 | { 399 | // Double-precision support is an optional feature of CS 5.0 400 | D3D11_FEATURE_DATA_DOUBLES hwopts; 401 | (*ppDeviceOut)->CheckFeatureSupport(D3D11_FEATURE_DOUBLES, &hwopts, sizeof(hwopts)); 402 | if (!hwopts.DoublePrecisionFloatShaderOps) 403 | { 404 | bNeedRefDevice = true; 405 | printf("No hardware double-precision capable device found, trying to create ref device.\n"); 406 | } 407 | } 408 | #endif 409 | } 410 | } 411 | 412 | if (bForceRef || FAILED(hr) || bNeedRefDevice) 413 | { 414 | // Either because of failure on creating a hardware device or hardware lacking CS capability, we create a ref device here 415 | 416 | SAFE_RELEASE(*ppDeviceOut); 417 | SAFE_RELEASE(*ppContextOut); 418 | 419 | hr = D3D11CreateDevice(nullptr, // Use default graphics card 420 | D3D_DRIVER_TYPE_REFERENCE, // Try to create a hardware accelerated device 421 | nullptr, // Do not use external software rasterizer module 422 | uCreationFlags, // Device creation flags 423 | flvl, 424 | sizeof(flvl) / sizeof(D3D_FEATURE_LEVEL), 425 | D3D11_SDK_VERSION, // SDK version 426 | ppDeviceOut, // Device out 427 | &flOut, // Actual feature level created 428 | ppContextOut); // Context out 429 | if (FAILED(hr)) 430 | { 431 | printf("Reference rasterizer device create failure\n"); 432 | return hr; 433 | } 434 | } 435 | 436 | return hr; 437 | } 438 | 439 | //-------------------------------------------------------------------------------------- 440 | // Compile and create the CS 441 | //-------------------------------------------------------------------------------------- 442 | _Use_decl_annotations_ 443 | HRESULT CreateComputeShader(LPCWSTR pSrcFile, LPCSTR pFunctionName, 444 | ID3D11Device* pDevice, ID3D11ComputeShader** ppShaderOut) 445 | { 446 | if (!pDevice || !ppShaderOut) 447 | return E_INVALIDARG; 448 | 449 | // Finds the correct path for the shader file. 450 | // This is only required for this sample to be run correctly from within the Sample Browser, 451 | // in your own projects, these lines could be removed safely 452 | WCHAR str[MAX_PATH]; 453 | HRESULT hr = FindDXSDKShaderFileCch(str, MAX_PATH, pSrcFile); 454 | if (FAILED(hr)) 455 | return hr; 456 | 457 | DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS; 458 | #ifdef _DEBUG 459 | // Set the D3DCOMPILE_DEBUG flag to embed debug information in the shaders. 460 | // Setting this flag improves the shader debugging experience, but still allows 461 | // the shaders to be optimized and to run exactly the way they will run in 462 | // the release configuration of this program. 463 | dwShaderFlags |= D3DCOMPILE_DEBUG; 464 | 465 | // Disable optimizations to further improve shader debugging 466 | dwShaderFlags |= D3DCOMPILE_SKIP_OPTIMIZATION; 467 | #endif 468 | 469 | const D3D_SHADER_MACRO defines[] = 470 | { 471 | #ifdef USE_STRUCTURED_BUFFERS 472 | "USE_STRUCTURED_BUFFERS", "1", 473 | #endif 474 | 475 | #ifdef TEST_DOUBLE 476 | "TEST_DOUBLE", "1", 477 | #endif 478 | nullptr, nullptr 479 | }; 480 | 481 | // We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware 482 | LPCSTR pProfile = (pDevice->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0) ? "cs_5_0" : "cs_4_0"; 483 | 484 | ID3DBlob* pErrorBlob = nullptr; 485 | ID3DBlob* pBlob = nullptr; 486 | 487 | #if D3D_COMPILER_VERSION >= 46 488 | 489 | hr = D3DCompileFromFile(str, defines, D3D_COMPILE_STANDARD_FILE_INCLUDE, pFunctionName, pProfile, 490 | dwShaderFlags, 0, &pBlob, &pErrorBlob); 491 | 492 | #else 493 | 494 | hr = D3DX11CompileFromFile(str, defines, nullptr, pFunctionName, pProfile, 495 | dwShaderFlags, 0, nullptr, &pBlob, &pErrorBlob, nullptr); 496 | 497 | #endif 498 | 499 | if (FAILED(hr)) 500 | { 501 | if (pErrorBlob) 502 | OutputDebugStringA((char*)pErrorBlob->GetBufferPointer()); 503 | 504 | SAFE_RELEASE(pErrorBlob); 505 | SAFE_RELEASE(pBlob); 506 | 507 | return hr; 508 | } 509 | 510 | hr = pDevice->CreateComputeShader(pBlob->GetBufferPointer(), pBlob->GetBufferSize(), nullptr, ppShaderOut); 511 | 512 | SAFE_RELEASE(pErrorBlob); 513 | SAFE_RELEASE(pBlob); 514 | 515 | #if defined(_DEBUG) || defined(PROFILE) 516 | if (SUCCEEDED(hr)) 517 | { 518 | (*ppShaderOut)->SetPrivateData(WKPDID_D3DDebugObjectName, lstrlenA(pFunctionName), pFunctionName); 519 | } 520 | #endif 521 | 522 | return hr; 523 | } 524 | 525 | //-------------------------------------------------------------------------------------- 526 | // Create a CPU accessible buffer and download the content of a GPU buffer into it 527 | // This function is very useful for debugging CS programs 528 | //-------------------------------------------------------------------------------------- 529 | _Use_decl_annotations_ 530 | ID3D11Buffer* CreateAndCopyToDebugBuf(ID3D11Device* pDevice, ID3D11DeviceContext* pd3dImmediateContext, ID3D11Buffer* pBuffer) 531 | { 532 | ID3D11Buffer* debugbuf = nullptr; 533 | 534 | D3D11_BUFFER_DESC desc; 535 | ZeroMemory(&desc, sizeof(desc)); 536 | pBuffer->GetDesc(&desc); 537 | desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; 538 | desc.Usage = D3D11_USAGE_STAGING; 539 | desc.BindFlags = 0; 540 | desc.MiscFlags = 0; 541 | if (SUCCEEDED(pDevice->CreateBuffer(&desc, nullptr, &debugbuf))) 542 | { 543 | #if defined(_DEBUG) || defined(PROFILE) 544 | debugbuf->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("Debug") - 1, "Debug"); 545 | #endif 546 | 547 | pd3dImmediateContext->CopyResource(debugbuf, pBuffer); 548 | } 549 | 550 | return debugbuf; 551 | } 552 | 553 | //-------------------------------------------------------------------------------------- 554 | // Create a CPU accessible texture and download the content of a GPU buffer into it 555 | // This function is very useful for debugging CS programs 556 | //-------------------------------------------------------------------------------------- 557 | _Use_decl_annotations_ 558 | ID3D11Texture3D* CreateAndCopyToDebugTex(ID3D11Device* pDevice, ID3D11DeviceContext* pd3dImmediateContext, ID3D11Texture3D* pTex) 559 | { 560 | ID3D11Texture3D* debugTex = nullptr; 561 | 562 | D3D11_TEXTURE3D_DESC desc; 563 | pTex->GetDesc(&desc); 564 | desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; 565 | desc.Usage = D3D11_USAGE_STAGING; 566 | desc.BindFlags = 0; 567 | desc.MiscFlags = 0; 568 | if (SUCCEEDED(pDevice->CreateTexture3D(&desc, nullptr, &debugTex))) 569 | { 570 | #if defined(_DEBUG) || defined(PROFILE) 571 | debugTex->SetPrivateData(WKPDID_D3DDebugObjectName, sizeof("Debug") - 1, "Debug"); 572 | #endif 573 | 574 | pd3dImmediateContext->CopyResource(debugTex, pTex); 575 | } 576 | 577 | return debugTex; 578 | } 579 | 580 | //-------------------------------------------------------------------------------------- 581 | // Tries to find the location of the shader file 582 | // This is a trimmed down version of DXUTFindDXSDKMediaFileCch. 583 | //-------------------------------------------------------------------------------------- 584 | _Use_decl_annotations_ 585 | HRESULT FindDXSDKShaderFileCch(WCHAR* strDestPath, 586 | int cchDest, 587 | LPCWSTR strFilename) 588 | { 589 | if (!strFilename || strFilename[0] == 0 || !strDestPath || cchDest < 10) 590 | return E_INVALIDARG; 591 | 592 | // Get the exe name, and exe path 593 | WCHAR strExePath[MAX_PATH] = 594 | { 595 | 0 596 | }; 597 | WCHAR strExeName[MAX_PATH] = 598 | { 599 | 0 600 | }; 601 | WCHAR* strLastSlash = nullptr; 602 | GetModuleFileName(nullptr, strExePath, MAX_PATH); 603 | strExePath[MAX_PATH - 1] = 0; 604 | strLastSlash = wcsrchr(strExePath, TEXT('\\')); 605 | if (strLastSlash) 606 | { 607 | wcscpy_s(strExeName, MAX_PATH, &strLastSlash[1]); 608 | 609 | // Chop the exe name from the exe path 610 | *strLastSlash = 0; 611 | 612 | // Chop the .exe from the exe name 613 | strLastSlash = wcsrchr(strExeName, TEXT('.')); 614 | if (strLastSlash) 615 | *strLastSlash = 0; 616 | } 617 | 618 | // Search in directories: 619 | // .\ 620 | // %EXE_DIR%\..\..\%EXE_NAME% 621 | 622 | wcscpy_s(strDestPath, cchDest, strFilename); 623 | if (GetFileAttributes(strDestPath) != 0xFFFFFFFF) 624 | return S_OK; 625 | 626 | swprintf_s(strDestPath, cchDest, L"%s\\..\\..\\%s\\%s", strExePath, strExeName, strFilename); 627 | if (GetFileAttributes(strDestPath) != 0xFFFFFFFF) 628 | return S_OK; 629 | 630 | // On failure, return the file as the path but also return an error code 631 | wcscpy_s(strDestPath, cchDest, strFilename); 632 | 633 | return E_FAIL; 634 | } 635 | -------------------------------------------------------------------------------- /PoissonSolver/PoissonSolver.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {D7D2A4AE-092A-46FC-A30E-8B90589946C9} 23 | PoissonSolver 24 | 10.0.17763.0 25 | 26 | 27 | 28 | Application 29 | true 30 | v141 31 | Unicode 32 | 33 | 34 | Application 35 | false 36 | v141 37 | true 38 | Unicode 39 | 40 | 41 | Application 42 | true 43 | v141 44 | Unicode 45 | 46 | 47 | Application 48 | false 49 | v141 50 | true 51 | Unicode 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | Level3 75 | Disabled 76 | true 77 | 78 | 79 | true 80 | 81 | 82 | COPY /Y "$(OutDir)*.cso" "$(ProjectDir)..\Bin\" 83 | COPY /Y "$(OutDir)*.exe" "$(ProjectDir)..\Bin\" 84 | 85 | 86 | 87 | 88 | Level3 89 | Disabled 90 | true 91 | 92 | 93 | true 94 | 95 | 96 | COPY /Y "$(OutDir)*.cso" "$(ProjectDir)..\Bin\" 97 | COPY /Y "$(OutDir)*.exe" "$(ProjectDir)..\Bin\" 98 | 99 | 100 | 101 | 102 | Level3 103 | MaxSpeed 104 | true 105 | true 106 | true 107 | 108 | 109 | true 110 | true 111 | true 112 | 113 | 114 | COPY /Y "$(OutDir)*.cso" "$(ProjectDir)..\Bin\" 115 | COPY /Y "$(OutDir)*.exe" "$(ProjectDir)..\Bin\" 116 | 117 | 118 | 119 | 120 | Level3 121 | MaxSpeed 122 | true 123 | true 124 | true 125 | 126 | 127 | true 128 | true 129 | true 130 | 131 | 132 | COPY /Y "$(OutDir)*.cso" "$(ProjectDir)..\Bin\" 133 | COPY /Y "$(OutDir)*.exe" "$(ProjectDir)..\Bin\" 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | Compute 155 | 5.0 156 | Compute 157 | 5.0 158 | Compute 159 | 5.0 160 | Compute 161 | 5.0 162 | 163 | 164 | 5.0 165 | 5.0 166 | 5.0 167 | 5.0 168 | Compute 169 | Compute 170 | Compute 171 | Compute 172 | 173 | 174 | Compute 175 | Compute 176 | Compute 177 | Compute 178 | 5.0 179 | 5.0 180 | 5.0 181 | 5.0 182 | 183 | 184 | Compute 185 | 5.0 186 | Compute 187 | 5.0 188 | Compute 189 | 5.0 190 | Compute 191 | 5.0 192 | 193 | 194 | Compute 195 | 5.0 196 | Compute 197 | 5.0 198 | Compute 199 | 5.0 200 | Compute 201 | 5.0 202 | 203 | 204 | Compute 205 | 5.0 206 | Compute 207 | 5.0 208 | Compute 209 | 5.0 210 | Compute 211 | 5.0 212 | 213 | 214 | 5.0 215 | 5.0 216 | 5.0 217 | 5.0 218 | Compute 219 | Compute 220 | Compute 221 | Compute 222 | 223 | 224 | Compute 225 | 5.0 226 | Compute 227 | 5.0 228 | Compute 229 | 5.0 230 | Compute 231 | 5.0 232 | 233 | 234 | Compute 235 | 5.0 236 | Compute 237 | 5.0 238 | Compute 239 | 5.0 240 | Compute 241 | 5.0 242 | 243 | 244 | Compute 245 | 5.0 246 | Compute 247 | 5.0 248 | Compute 249 | 5.0 250 | Compute 251 | 5.0 252 | 253 | 254 | Compute 255 | 5.0 256 | Compute 257 | 5.0 258 | Compute 259 | 5.0 260 | Compute 261 | 5.0 262 | 263 | 264 | Compute 265 | 5.0 266 | Compute 267 | 5.0 268 | Compute 269 | 5.0 270 | Compute 271 | 5.0 272 | 273 | 274 | Compute 275 | 5.0 276 | Compute 277 | 5.0 278 | Compute 279 | 5.0 280 | Compute 281 | 5.0 282 | update_p 283 | update_p 284 | update_p 285 | update_p 286 | 287 | 288 | Compute 289 | 5.0 290 | Compute 291 | 5.0 292 | Compute 293 | 5.0 294 | Compute 295 | 5.0 296 | 297 | 298 | Compute 299 | 5.0 300 | Compute 301 | 5.0 302 | Compute 303 | 5.0 304 | Compute 305 | 5.0 306 | update_x 307 | update_x 308 | update_x 309 | update_x 310 | 311 | 312 | Compute 313 | 5.0 314 | Compute 315 | 5.0 316 | Compute 317 | 5.0 318 | Compute 319 | 5.0 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | -------------------------------------------------------------------------------- /PoissonSolver/PoissonSolver.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | {846a9373-7920-4810-b762-16ad4ed340de} 18 | 19 | 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | Source Files 29 | 30 | 31 | Source Files 32 | 33 | 34 | Source Files 35 | 36 | 37 | Source Files 38 | 39 | 40 | 41 | 42 | Header Files 43 | 44 | 45 | Header Files 46 | 47 | 48 | Header Files 49 | 50 | 51 | Header Files 52 | 53 | 54 | Header Files 55 | 56 | 57 | Header Files 58 | 59 | 60 | 61 | 62 | Shaders 63 | 64 | 65 | Shaders 66 | 67 | 68 | Shaders 69 | 70 | 71 | Shaders 72 | 73 | 74 | Shaders 75 | 76 | 77 | Shaders 78 | 79 | 80 | Shaders 81 | 82 | 83 | Shaders 84 | 85 | 86 | Shaders 87 | 88 | 89 | Shaders 90 | 91 | 92 | Shaders 93 | 94 | 95 | Shaders 96 | 97 | 98 | Shaders 99 | 100 | 101 | Shaders 102 | 103 | 104 | Shaders 105 | 106 | 107 | Shaders 108 | 109 | 110 | 111 | 112 | Shaders 113 | 114 | 115 | Shaders 116 | 117 | 118 | Shaders 119 | 120 | 121 | Shaders 122 | 123 | 124 | Shaders 125 | 126 | 127 | Shaders 128 | 129 | 130 | -------------------------------------------------------------------------------- /PoissonSolver/PoissonSolver.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | WindowsLocalDebugger 5 | $(ProjectDir)..\Bin\ 6 | 7 | 8 | WindowsLocalDebugger 9 | $(ProjectDir)..\Bin\ 10 | 11 | 12 | WindowsLocalDebugger 13 | $(ProjectDir)..\Bin\ 14 | 15 | 16 | WindowsLocalDebugger 17 | $(ProjectDir)..\Bin\ 18 | 19 | -------------------------------------------------------------------------------- /PoissonSolver/PrefixSum.cpp: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #include "PrefixSum.h" 6 | 7 | #ifndef V_RETURN 8 | #define V_RETURN(x) { hr = x; if (FAILED(hr)) return hr; } 9 | #endif 10 | 11 | ID3D11UnorderedAccessView *const g_pNullUAV = nullptr; // Helper to Clear UAVs 12 | 13 | PrefixSum::PrefixSum(ID3D11DeviceContext *pDeviceContext) 14 | : m_pd3dContext(pDeviceContext), m_uRefCount(1) 15 | { 16 | m_pd3dContext->AddRef(); 17 | m_pd3dContext->GetDevice(&m_pd3dDevice); 18 | } 19 | 20 | PrefixSum::~PrefixSum() 21 | { 22 | if (m_pd3dContext) m_pd3dContext->Release(); 23 | if (m_pd3dDevice) m_pd3dDevice->Release(); 24 | } 25 | 26 | HRESULT PrefixSum::Init(const uint32_t uSize) 27 | { 28 | HRESULT h, hr; 29 | ID3DBlob *shaderBuffer = nullptr; 30 | V_RETURN(D3DReadFileToBlob(L"CSPrefixSum1.cso", &shaderBuffer)); 31 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 32 | shaderBuffer->GetBufferSize(), nullptr, &m_pShaders[CS_PREFIXSUM1_FLOAT]); 33 | if (SUCCEEDED(hr)) 34 | { 35 | ID3D11ShaderReflection *pReflector = nullptr; 36 | hr = D3DReflect(shaderBuffer->GetBufferPointer(), shaderBuffer->GetBufferSize(), 37 | IID_ID3D11ShaderReflection, (void**)&pReflector); 38 | if (SUCCEEDED(hr)) 39 | { 40 | D3D11_SHADER_INPUT_BIND_DESC desc; 41 | h = pReflector->GetResourceBindingDescByName("g_RWDst", &desc); 42 | if (SUCCEEDED(h)) m_uUAVSlot_Dst = desc.BindPoint; 43 | else hr = h; 44 | 45 | h = pReflector->GetResourceBindingDescByName("g_RWInc", &desc); 46 | if (SUCCEEDED(h)) m_uUAVSlot_Inc = desc.BindPoint; 47 | else hr = h; 48 | 49 | h = pReflector->GetResourceBindingDescByName("g_RWSrc", &desc); 50 | if (SUCCEEDED(h)) m_uUAVSlot_Src = desc.BindPoint; 51 | else hr = h; 52 | } 53 | if (pReflector) pReflector->Release(); 54 | } 55 | if (shaderBuffer) shaderBuffer->Release(); 56 | V_RETURN(hr); 57 | 58 | shaderBuffer = nullptr; 59 | V_RETURN(D3DReadFileToBlob(L"CSPrefixSum1_rw.cso", &shaderBuffer)); 60 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 61 | shaderBuffer->GetBufferSize(), nullptr, &m_pShaders[CS_PREFIXSUM1_FLOAT_RW]); 62 | if (shaderBuffer) shaderBuffer->Release(); 63 | V_RETURN(hr); 64 | 65 | shaderBuffer = nullptr; 66 | V_RETURN(D3DReadFileToBlob(L"CSPrefixSum2.cso", &shaderBuffer)); 67 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 68 | shaderBuffer->GetBufferSize(), nullptr, &m_pShaders[CS_PREFIXSUM2_FLOAT]); 69 | if (shaderBuffer) shaderBuffer->Release(); 70 | V_RETURN(hr); 71 | 72 | shaderBuffer = nullptr; 73 | V_RETURN(D3DReadFileToBlob(L"CSPrefixSum1i.cso", &shaderBuffer)); 74 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 75 | shaderBuffer->GetBufferSize(), nullptr, &m_pShaders[CS_PREFIXSUM1_INT]); 76 | if (shaderBuffer) shaderBuffer->Release(); 77 | V_RETURN(hr); 78 | 79 | shaderBuffer = nullptr; 80 | V_RETURN(D3DReadFileToBlob(L"CSPrefixSum1i_rw.cso", &shaderBuffer)); 81 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 82 | shaderBuffer->GetBufferSize(), nullptr, &m_pShaders[CS_PREFIXSUM1_INT_RW]); 83 | if (shaderBuffer) shaderBuffer->Release(); 84 | V_RETURN(hr); 85 | 86 | shaderBuffer = nullptr; 87 | V_RETURN(D3DReadFileToBlob(L"CSPrefixSum2i.cso", &shaderBuffer)); 88 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 89 | shaderBuffer->GetBufferSize(), nullptr, &m_pShaders[CS_PREFIXSUM2_INT]); 90 | if (shaderBuffer) shaderBuffer->Release(); 91 | V_RETURN(hr); 92 | 93 | shaderBuffer = nullptr; 94 | V_RETURN(D3DReadFileToBlob(L"CSPrefixSum1u.cso", &shaderBuffer)); 95 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 96 | shaderBuffer->GetBufferSize(), nullptr, &m_pShaders[CS_PREFIXSUM1_UINT]); 97 | if (shaderBuffer) shaderBuffer->Release(); 98 | V_RETURN(hr); 99 | 100 | shaderBuffer = nullptr; 101 | V_RETURN(D3DReadFileToBlob(L"CSPrefixSum1u_rw.cso", &shaderBuffer)); 102 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 103 | shaderBuffer->GetBufferSize(), nullptr, &m_pShaders[CS_PREFIXSUM1_UINT_RW]); 104 | if (shaderBuffer) shaderBuffer->Release(); 105 | V_RETURN(hr); 106 | 107 | shaderBuffer = nullptr; 108 | V_RETURN(D3DReadFileToBlob(L"CSPrefixSum2u.cso", &shaderBuffer)); 109 | hr = m_pd3dDevice->CreateComputeShader(shaderBuffer->GetBufferPointer(), 110 | shaderBuffer->GetBufferSize(), nullptr, &m_pShaders[CS_PREFIXSUM2_UINT]); 111 | if (shaderBuffer) shaderBuffer->Release(); 112 | V_RETURN(hr); 113 | 114 | V_RETURN(CreateStructuredBuffer(m_pd3dDevice, sizeof(uint32_t), uSize, nullptr, &m_pInc)); 115 | V_RETURN(CreateBufferUAV(m_pd3dDevice, m_pInc, &m_pUAVInc)); 116 | 117 | return hr; 118 | } 119 | 120 | void PrefixSum::Scan(const SCAN_DATA_TYPE dataType, const uint32_t uSize, 121 | ID3D11UnorderedAccessView *const pUAVSrc, ID3D11UnorderedAccessView *const pUAVDst) 122 | { 123 | const auto UAVInitialCounts = 0u; 124 | uint32_t CS_PREFIXSUM1, CS_PREFIXSUM1_RW, CS_PREFIXSUM2; 125 | 126 | switch (dataType) 127 | { 128 | case SCAN_DATA_TYPE_FLOAT: 129 | CS_PREFIXSUM1 = CS_PREFIXSUM1_FLOAT; 130 | CS_PREFIXSUM1_RW = CS_PREFIXSUM1_FLOAT_RW; 131 | CS_PREFIXSUM2 = CS_PREFIXSUM2_FLOAT; 132 | break; 133 | case SCAN_DATA_TYPE_INT: 134 | CS_PREFIXSUM1 = CS_PREFIXSUM1_INT; 135 | CS_PREFIXSUM1_RW = CS_PREFIXSUM1_INT_RW; 136 | CS_PREFIXSUM2 = CS_PREFIXSUM2_INT; 137 | break; 138 | default: 139 | CS_PREFIXSUM1 = CS_PREFIXSUM1_UINT; 140 | CS_PREFIXSUM1_RW = CS_PREFIXSUM1_UINT_RW; 141 | CS_PREFIXSUM2 = CS_PREFIXSUM2_UINT; 142 | } 143 | 144 | // 1 Group for 1024 consecutive counters. 145 | const auto uNumGroups = static_cast(ceil(uSize / 1024.0f)); 146 | 147 | // Prefix sum 148 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_Dst, 1, &pUAVDst, &UAVInitialCounts); 149 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_Inc, 1, &m_pUAVInc, &UAVInitialCounts); 150 | if (pUAVSrc != pUAVDst) 151 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_Src, 1, &pUAVSrc, &UAVInitialCounts); 152 | 153 | m_pd3dContext->CSSetShader(m_pShaders[pUAVSrc == pUAVDst ? CS_PREFIXSUM1_RW : CS_PREFIXSUM1], nullptr, 0); 154 | m_pd3dContext->Dispatch(uNumGroups, 1, 1); 155 | 156 | m_pd3dContext->CSSetShader(m_pShaders[CS_PREFIXSUM2], nullptr, 0); 157 | m_pd3dContext->Dispatch(uNumGroups, 1, 1); 158 | 159 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_Src, 1, &g_pNullUAV, &UAVInitialCounts); 160 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_Inc, 1, &g_pNullUAV, &UAVInitialCounts); 161 | m_pd3dContext->CSSetUnorderedAccessViews(m_uUAVSlot_Dst, 1, &g_pNullUAV, &UAVInitialCounts); 162 | } 163 | 164 | void PrefixSum::AddRef() 165 | { 166 | ++m_uRefCount; 167 | } 168 | 169 | void PrefixSum::Release() 170 | { 171 | if (--m_uRefCount < 1) delete this; 172 | } 173 | 174 | HRESULT PrefixSum::CreateScan(ID3D11DeviceContext *const pDeviceContext, const uint32_t uSize, PrefixSum **ppScan) 175 | { 176 | auto &pScan = *ppScan; 177 | pScan = new PrefixSum(pDeviceContext); 178 | 179 | return pScan->Init(uSize); 180 | } 181 | -------------------------------------------------------------------------------- /PoissonSolver/PrefixSum.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #pragma once 6 | 7 | #ifdef PRECOMPILED_HEADER 8 | #include PRECOMPILED_HEADER_FILE 9 | #else 10 | #include 11 | #include 12 | #include 13 | #include 14 | #endif 15 | 16 | #include "CreateBuffers.h" 17 | 18 | class PrefixSum 19 | { 20 | public: 21 | enum SCAN_DATA_TYPE 22 | { 23 | SCAN_DATA_TYPE_FLOAT = 1, 24 | SCAN_DATA_TYPE_INT, 25 | SCAN_DATA_TYPE_UINT 26 | }; 27 | 28 | PrefixSum(ID3D11DeviceContext *const pDeviceContext); 29 | virtual ~PrefixSum(); 30 | 31 | HRESULT Init(const uint32_t uSize); 32 | 33 | void Scan(const SCAN_DATA_TYPE dataType, const uint32_t uSize, ID3D11UnorderedAccessView *const pUAVSrc, ID3D11UnorderedAccessView *const pUAVDst); 34 | void AddRef(); 35 | void Release(); 36 | 37 | static HRESULT CreateScan(ID3D11DeviceContext *const pDeviceContext, const uint32_t uSize, PrefixSum **ppScan); 38 | 39 | protected: 40 | enum ComputeShaderID : uint32_t 41 | { 42 | CS_PREFIXSUM1_FLOAT, 43 | CS_PREFIXSUM1_FLOAT_RW, 44 | CS_PREFIXSUM2_FLOAT, 45 | CS_PREFIXSUM1_INT, 46 | CS_PREFIXSUM1_INT_RW, 47 | CS_PREFIXSUM2_INT, 48 | CS_PREFIXSUM1_UINT, 49 | CS_PREFIXSUM1_UINT_RW, 50 | CS_PREFIXSUM2_UINT, 51 | 52 | NUM_CS 53 | }; 54 | 55 | uint32_t m_uRefCount; 56 | 57 | uint32_t m_uUAVSlot_Dst; 58 | uint32_t m_uUAVSlot_Inc; 59 | uint32_t m_uUAVSlot_Src; 60 | 61 | ID3D11Buffer *m_pInc; 62 | ID3D11UnorderedAccessView *m_pUAVInc; 63 | 64 | ID3D11ComputeShader *m_pShaders[NUM_CS]; 65 | 66 | ID3D11Device *m_pd3dDevice; 67 | ID3D11DeviceContext *m_pd3dContext; 68 | }; 69 | -------------------------------------------------------------------------------- /PoissonSolver/SharedConst.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------------------- 2 | // By XU, Tianchen 3 | //-------------------------------------------------------------------------------------- 4 | 5 | #define THREAD_GROUP_SIZE 8 6 | --------------------------------------------------------------------------------