├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── CMakeSettings.json ├── LICENSE.md ├── README.md ├── XenosRecomp ├── CMakeLists.txt ├── constant_table.h ├── dxc_compiler.cpp ├── dxc_compiler.h ├── main.cpp ├── pch.h ├── shader.h ├── shader_code.h ├── shader_common.h ├── shader_recompiler.cpp └── shader_recompiler.h └── thirdparty └── CMakeLists.txt /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | [Oo]ut/ 35 | 36 | # Visual Studio 2015/2017 cache/options directory 37 | .vs/ 38 | # Uncomment if you have tasks that create the project's static files in wwwroot 39 | #wwwroot/ 40 | 41 | # Visual Studio 2017 auto generated files 42 | Generated\ Files/ 43 | 44 | # MSTest test Results 45 | [Tt]est[Rr]esult*/ 46 | [Bb]uild[Ll]og.* 47 | 48 | # NUnit 49 | *.VisualState.xml 50 | TestResult.xml 51 | nunit-*.xml 52 | 53 | # Build Results of an ATL Project 54 | [Dd]ebugPS/ 55 | [Rr]eleasePS/ 56 | dlldata.c 57 | 58 | # Benchmark Results 59 | BenchmarkDotNet.Artifacts/ 60 | 61 | # .NET Core 62 | project.lock.json 63 | project.fragment.lock.json 64 | artifacts/ 65 | 66 | # ASP.NET Scaffolding 67 | ScaffoldingReadMe.txt 68 | 69 | # StyleCop 70 | StyleCopReport.xml 71 | 72 | # Files built by Visual Studio 73 | *_i.c 74 | *_p.c 75 | *_h.h 76 | *.ilk 77 | *.meta 78 | *.obj 79 | *.iobj 80 | *.pch 81 | *.pdb 82 | *.ipdb 83 | *.pgc 84 | *.pgd 85 | *.rsp 86 | *.sbr 87 | *.tlb 88 | *.tli 89 | *.tlh 90 | *.tmp 91 | *.tmp_proj 92 | *_wpftmp.csproj 93 | *.log 94 | *.tlog 95 | *.vspscc 96 | *.vssscc 97 | .builds 98 | *.pidb 99 | *.svclog 100 | *.scc 101 | 102 | # Chutzpah Test files 103 | _Chutzpah* 104 | 105 | # Visual C++ cache files 106 | ipch/ 107 | *.aps 108 | *.ncb 109 | *.opendb 110 | *.opensdf 111 | *.sdf 112 | *.cachefile 113 | *.VC.db 114 | *.VC.VC.opendb 115 | 116 | # Visual Studio profiler 117 | *.psess 118 | *.vsp 119 | *.vspx 120 | *.sap 121 | 122 | # Visual Studio Trace Files 123 | *.e2e 124 | 125 | # TFS 2012 Local Workspace 126 | $tf/ 127 | 128 | # Guidance Automation Toolkit 129 | *.gpState 130 | 131 | # ReSharper is a .NET coding add-in 132 | _ReSharper*/ 133 | *.[Rr]e[Ss]harper 134 | *.DotSettings.user 135 | 136 | # TeamCity is a build add-in 137 | _TeamCity* 138 | 139 | # DotCover is a Code Coverage Tool 140 | *.dotCover 141 | 142 | # AxoCover is a Code Coverage Tool 143 | .axoCover/* 144 | !.axoCover/settings.json 145 | 146 | # Coverlet is a free, cross platform Code Coverage Tool 147 | coverage*.json 148 | coverage*.xml 149 | coverage*.info 150 | 151 | # Visual Studio code coverage results 152 | *.coverage 153 | *.coveragexml 154 | 155 | # NCrunch 156 | _NCrunch_* 157 | .*crunch*.local.xml 158 | nCrunchTemp_* 159 | 160 | # MightyMoose 161 | *.mm.* 162 | AutoTest.Net/ 163 | 164 | # Web workbench (sass) 165 | .sass-cache/ 166 | 167 | # Installshield output folder 168 | [Ee]xpress/ 169 | 170 | # DocProject is a documentation generator add-in 171 | DocProject/buildhelp/ 172 | DocProject/Help/*.HxT 173 | DocProject/Help/*.HxC 174 | DocProject/Help/*.hhc 175 | DocProject/Help/*.hhk 176 | DocProject/Help/*.hhp 177 | DocProject/Help/Html2 178 | DocProject/Help/html 179 | 180 | # Click-Once directory 181 | publish/ 182 | 183 | # Publish Web Output 184 | *.[Pp]ublish.xml 185 | *.azurePubxml 186 | # Note: Comment the next line if you want to checkin your web deploy settings, 187 | # but database connection strings (with potential passwords) will be unencrypted 188 | *.pubxml 189 | *.publishproj 190 | 191 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 192 | # checkin your Azure Web App publish settings, but sensitive information contained 193 | # in these scripts will be unencrypted 194 | PublishScripts/ 195 | 196 | # NuGet Packages 197 | *.nupkg 198 | # NuGet Symbol Packages 199 | *.snupkg 200 | # The packages folder can be ignored because of Package Restore 201 | **/[Pp]ackages/* 202 | # except build/, which is used as an MSBuild target. 203 | !**/[Pp]ackages/build/ 204 | # Uncomment if necessary however generally it will be regenerated when needed 205 | #!**/[Pp]ackages/repositories.config 206 | # NuGet v3's project.json files produces more ignorable files 207 | *.nuget.props 208 | *.nuget.targets 209 | 210 | # Microsoft Azure Build Output 211 | csx/ 212 | *.build.csdef 213 | 214 | # Microsoft Azure Emulator 215 | ecf/ 216 | rcf/ 217 | 218 | # Windows Store app package directories and files 219 | AppPackages/ 220 | BundleArtifacts/ 221 | Package.StoreAssociation.xml 222 | _pkginfo.txt 223 | *.appx 224 | *.appxbundle 225 | *.appxupload 226 | 227 | # Visual Studio cache files 228 | # files ending in .cache can be ignored 229 | *.[Cc]ache 230 | # but keep track of directories ending in .cache 231 | !?*.[Cc]ache/ 232 | 233 | # Others 234 | ClientBin/ 235 | ~$* 236 | *~ 237 | *.dbmdl 238 | *.dbproj.schemaview 239 | *.jfm 240 | *.pfx 241 | *.publishsettings 242 | orleans.codegen.cs 243 | 244 | # Including strong name files can present a security risk 245 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 246 | #*.snk 247 | 248 | # Since there are multiple workflows, uncomment next line to ignore bower_components 249 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 250 | #bower_components/ 251 | 252 | # RIA/Silverlight projects 253 | Generated_Code/ 254 | 255 | # Backup & report files from converting an old project file 256 | # to a newer Visual Studio version. Backup files are not needed, 257 | # because we have git ;-) 258 | _UpgradeReport_Files/ 259 | Backup*/ 260 | UpgradeLog*.XML 261 | UpgradeLog*.htm 262 | ServiceFabricBackup/ 263 | *.rptproj.bak 264 | 265 | # SQL Server files 266 | *.mdf 267 | *.ldf 268 | *.ndf 269 | 270 | # Business Intelligence projects 271 | *.rdl.data 272 | *.bim.layout 273 | *.bim_*.settings 274 | *.rptproj.rsuser 275 | *- [Bb]ackup.rdl 276 | *- [Bb]ackup ([0-9]).rdl 277 | *- [Bb]ackup ([0-9][0-9]).rdl 278 | 279 | # Microsoft Fakes 280 | FakesAssemblies/ 281 | 282 | # GhostDoc plugin setting file 283 | *.GhostDoc.xml 284 | 285 | # Node.js Tools for Visual Studio 286 | .ntvs_analysis.dat 287 | node_modules/ 288 | 289 | # Visual Studio 6 build log 290 | *.plg 291 | 292 | # Visual Studio 6 workspace options file 293 | *.opt 294 | 295 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 296 | *.vbw 297 | 298 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 299 | *.vbp 300 | 301 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 302 | *.dsw 303 | *.dsp 304 | 305 | # Visual Studio 6 technical files 306 | *.ncb 307 | *.aps 308 | 309 | # Visual Studio LightSwitch build output 310 | **/*.HTMLClient/GeneratedArtifacts 311 | **/*.DesktopClient/GeneratedArtifacts 312 | **/*.DesktopClient/ModelManifest.xml 313 | **/*.Server/GeneratedArtifacts 314 | **/*.Server/ModelManifest.xml 315 | _Pvt_Extensions 316 | 317 | # Paket dependency manager 318 | .paket/paket.exe 319 | paket-files/ 320 | 321 | # FAKE - F# Make 322 | .fake/ 323 | 324 | # CodeRush personal settings 325 | .cr/personal 326 | 327 | # Python Tools for Visual Studio (PTVS) 328 | __pycache__/ 329 | *.pyc 330 | 331 | # Cake - Uncomment if you are using it 332 | # tools/** 333 | # !tools/packages.config 334 | 335 | # Tabs Studio 336 | *.tss 337 | 338 | # Telerik's JustMock configuration file 339 | *.jmconfig 340 | 341 | # BizTalk build output 342 | *.btp.cs 343 | *.btm.cs 344 | *.odx.cs 345 | *.xsd.cs 346 | 347 | # OpenCover UI analysis results 348 | OpenCover/ 349 | 350 | # Azure Stream Analytics local run output 351 | ASALocalRun/ 352 | 353 | # MSBuild Binary and Structured Log 354 | *.binlog 355 | 356 | # NVidia Nsight GPU debugger configuration file 357 | *.nvuser 358 | 359 | # MFractors (Xamarin productivity tool) working folder 360 | .mfractor/ 361 | 362 | # Local History for Visual Studio 363 | .localhistory/ 364 | 365 | # Visual Studio History (VSHistory) files 366 | .vshistory/ 367 | 368 | # BeatPulse healthcheck temp database 369 | healthchecksdb 370 | 371 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 372 | MigrationBackup/ 373 | 374 | # Ionide (cross platform F# VS Code tools) working folder 375 | .ionide/ 376 | 377 | # Fody - auto-generated XML schema 378 | FodyWeavers.xsd 379 | 380 | # VS Code files for those working on multiple tools 381 | .vscode/* 382 | !.vscode/settings.json 383 | !.vscode/tasks.json 384 | !.vscode/launch.json 385 | !.vscode/extensions.json 386 | *.code-workspace 387 | 388 | # Local History for Visual Studio Code 389 | .history/ 390 | 391 | # Windows Installer files from build outputs 392 | *.cab 393 | *.msi 394 | *.msix 395 | *.msm 396 | *.msp 397 | 398 | # JetBrains Rider 399 | *.sln.iml 400 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "thirdparty/smol-v"] 2 | path = thirdparty/smol-v 3 | url = https://github.com/aras-p/smol-v 4 | [submodule "thirdparty/zstd"] 5 | path = thirdparty/zstd 6 | url = https://github.com/facebook/zstd.git 7 | [submodule "thirdparty/xxHash"] 8 | path = thirdparty/xxHash 9 | url = https://github.com/Cyan4973/xxHash.git 10 | [submodule "thirdparty/fmt"] 11 | path = thirdparty/fmt 12 | url = https://github.com/fmtlib/fmt.git 13 | [submodule "thirdparty/dxc-bin"] 14 | path = thirdparty/dxc-bin 15 | url = https://github.com/renderbag/dxc-bin.git 16 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.20) 2 | 3 | set(CMAKE_CXX_STANDARD 17) 4 | 5 | project("XenosRecomp-ALL") 6 | 7 | set(XENOS_RECOMP_THIRDPARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/thirdparty") 8 | 9 | add_subdirectory(${XENOS_RECOMP_THIRDPARTY_ROOT}) 10 | add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/XenosRecomp") 11 | -------------------------------------------------------------------------------- /CMakeSettings.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "name": "x64-Clang-Debug", 5 | "generator": "Ninja", 6 | "configurationType": "Debug", 7 | "buildRoot": "${projectDir}\\out\\build\\${name}", 8 | "installRoot": "${projectDir}\\out\\install\\${name}", 9 | "cmakeCommandArgs": "", 10 | "buildCommandArgs": "", 11 | "ctestCommandArgs": "", 12 | "inheritEnvironments": [ "clang_cl_x64_x64" ], 13 | "variables": [ 14 | { 15 | "name": "VCPKG_TARGET_TRIPLET", 16 | "value": "x64-windows", 17 | "type": "STRING" 18 | } 19 | ] 20 | }, 21 | { 22 | "name": "x64-Clang-Release", 23 | "generator": "Ninja", 24 | "configurationType": "RelWithDebInfo", 25 | "buildRoot": "${projectDir}\\out\\build\\${name}", 26 | "installRoot": "${projectDir}\\out\\install\\${name}", 27 | "cmakeCommandArgs": "", 28 | "buildCommandArgs": "", 29 | "ctestCommandArgs": "", 30 | "inheritEnvironments": [ "clang_cl_x64_x64" ], 31 | "variables": [ 32 | { 33 | "name": "VCPKG_TARGET_TRIPLET", 34 | "value": "x64-windows", 35 | "type": "STRING" 36 | } 37 | ] 38 | } 39 | ] 40 | } -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2025 hedge-dev and contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | - The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # XenosRecomp 2 | 3 | XenosRecomp is a tool that converts Xbox 360 shader binaries to HLSL. The resulting files can be recompiled to DXIL and SPIR-V using the DirectX Shader Compiler (DXC) for use in Direct3D 12 (D3D12) and Vulkan. 4 | 5 | The current implementation is designed around [Unleashed Recompiled](https://github.com/hedge-dev/UnleashedRecomp), a recompilation project that implements a translation layer for the renderer rather than emulating the Xbox 360 GPU. Unleashed Recompiled specific implementations are placed under the `UNLEASHED_RECOMP` preprocessor macro. 6 | 7 | Users are expected to modify the recompiler to fit their needs. **Do not expect the recompiler to work out of the box.** 8 | 9 | ## Implementation Details 10 | 11 | Several components of the recompiler are currently incomplete or missing. Unimplemented or inaccurate features exist mainly because they were either unnecessary for Unleashed Recompiled or did not cause visible issues. 12 | 13 | ### Shader Container 14 | 15 | Xbox 360 shaders are stored in a container that includes constant buffer reflection data, definitions, interpolators, vertex declarations, instructions, and more. It has been reverse-engineered just enough for use in Unleashed Recompiled, but additional research may be needed for other games. 16 | 17 | ### Instructions 18 | 19 | Vector/ALU instructions are converted directly and should work in most cases. 20 | 21 | Issues might happen when instructions perform dynamic constant indexing on multiple operands. 22 | 23 | Instructions that result in `INF` or `NaN` might not be handled correctly. Most operations are clamped to `FLT_MAX`, but their behavior has not been verified in all scenarios. 24 | 25 | Dynamic register indexing is unimplemented. A possible solution is converting registers into an array that instructions dynamically index into, instead of treating them as separate local variables. 26 | 27 | ### Control Flow 28 | 29 | Since HLSL does not support `goto`, control flow instructions are implemented using a `while` loop with a `switch` statement, where a local `pc` variable determines the currently executing block. 30 | 31 | The current implementation has not been thoroughly tested, as Sonic Unleashed contains very few shaders with complex control flow. However, any issues should be relatively easy to fix if problematic cases can be found. 32 | 33 | For shaders with simple control flow, the recompiler may choose to flatten it, removing the while loop and switch statements. This allows DXC to optimize the shader more efficiently. 34 | 35 | ### Constants 36 | 37 | Both vertex and pixel shader stages use three constant buffers: 38 | 39 | * Vertex shader constants: 4096 bytes (256 `float4` registers) 40 | * Pixel shader constants: 3584 bytes (224 `float4` registers) 41 | * Shared constants: Used specifically by Unleashed Recompiled 42 | 43 | Vertex and pixel shader constants are copied directly from the guest render device, and shaders expect them in little-endian format. 44 | 45 | Constant buffer registers are populated using reflection data embedded in the shader binaries. If this data is missing, the recompiler will not function. However, support can be added by defining a `float4` array that covers the entire register range. 46 | 47 | Integer constants are unimplemented. If the target game requires them, you will need to make new constant buffer slots or append them to the existing ones. 48 | 49 | Vertex and pixel shader boolean constants each contain 16 elements. These are packed into a 32-bit integer and stored in the shared constants buffer, where the Nth bit represents the value of the Nth boolean register. The Xbox 360 GPU supposedly supports up to 128 boolean registers, which may require increasing the size of the `g_Booleans` data type for other games. 50 | 51 | All constant buffers are implemented as root constant buffers in D3D12, making them easy to upload to the GPU using a linear allocator. In Vulkan, the GPU virtual addresses of constant buffers are passed as push constants. Constants are accessed via preprocessor macros that load values from the GPU virtual addresses using `vk::RawBufferLoad`. These macros ensure the shader function body remains the same for both DXIL and SPIR-V. 52 | 53 | Out-of-bounds dynamic constant accesses should return 0. However, since root constant buffers in D3D12 and raw buffer loads in Vulkan do not enforce this behavior, the shader developer must handle it. To solve this, each dynamic index access is clamped to the valid range, and out-of-bounds registers are forced to become 0. 54 | 55 | ### Vertex Fetch 56 | 57 | A common approach to vertex fetching is passing vertex data as a shader resource view and building special shaders depending on the vertex declaration. Instead, Unleashed Recompiled converts vertex declarations into native D3D12/Vulkan input declarations, allowing vertex shaders to receive data as inputs. While this has its limitations, it removes the need for runtime shader permutation compilation based on vertex declarations. 58 | 59 | Unleashed Recompiled endian swaps vertex data before uploading it to the GPU by treating buffers as arrays of 32-bit integers. This causes the element order for 8-bit and 16-bit vertex formats to be swizzled. While no visual errors have been observed for 8-bit formats, 16-bit formats get swizzled to YXWZ. This is corrected using a `g_SwappedTexcoords` variable in the shared constants buffer, where each bit indicates whether the corresponding `TEXCOORD` semantic requires re-swizzling. While this assumption holds for Sonic Unleashed, other games may require additional support for other semantics. 60 | 61 | Xbox 360 supports the `R11G11B10` vertex format, which is unsupported on desktop hardware. The recompiler implements this by using a specialization constant that manually unpacks this format for `NORMAL`, `TANGENT` and `BINORMAL` semantics in the vertex shader. Similar to `TEXCOORD` swizzling, this assumes the format is only used for these semantics. 62 | 63 | Certain semantics are forced to be `uint4` instead of `float4` for specific shaders in Sonic Unleashed. This is also something that needs to be handled manually for other games. 64 | 65 | Instanced geometry is handled completely manually on the Xbox 360. In Sonic Unleashed, the index buffer is passed as a vertex stream, and shaders use it to arbitrarily fetch vertex data, relying on a `g_IndexCount` constant to determine the index of the current instance. Unleashed Recompiled handles this by expecting instanced data to be in the second vertex stream and the index buffer to be in the `POSITION1` semantic. This behavior is completely game specific and must be manually implemented for other games. 66 | 67 | Vulkan vertex locations are currently hardcoded for Unleashed Recompiled, chosen based on Sonic Unleashed's shaders while taking the 16 location limit into account. A generic solution would assign unique locations per vertex shader and dynamically create vertex declarations at runtime. 68 | 69 | Mini vertex fetch instructions and vertex fetch bindings are unimplemented. 70 | 71 | ### Textures & Samplers 72 | 73 | Textures and samplers use a bindless approach. Descriptor indices are stored in the shared constant buffer, with separate indices for each texture type to prevent mismatches in the shader. 1D textures are unimplemented but could be added easily. 74 | 75 | Several texture fetch features, such as specifying LOD levels or sampler filters, are unimplemented. Currently, only the pixel offset value is supported, which is primarily used for shadow mapping. 76 | 77 | Some Xbox 360 sampler types may be unsupported on desktop hardware. These cases are unhandled and require specialized implementations in the recompiler. 78 | 79 | Cube textures are normally sampled using the `cube` instruction, which computes the face index and 2D texture coordinates. This can be implemented on desktop hardware by sampling `Texture2DArray`, however this lacks linear filtering across cube edges. The recompiler instead stores an array of cube map directions locally. Each `cube` instruction stores a direction in this array, and the output register holds the direction index. When the shader performs a texture fetch, the direction is dynamically retrieved from the array and used in `TextureCube` sampling. DXC optimizes this array away, ensuring the final DXIL/SPIR-V shader uses the direction directly. 80 | 81 | This approach works well for simple control flow but may cause issues with complex shaders where optimizations might fail, leading to the array actually being dynamically indexed. A proper solution could implement the `cube` instruction exactly as the hardware does, and then reverse this computation during texture sampling. I chose not to do this approach in the end, as DXC was unable to optimize away redundant computations due to the lossy nature of the calculation. 82 | 83 | ### Specialization Constants 84 | 85 | The recompiler implements several specialization constants, primarily as enhancements for Unleashed Recompiled. Currently, these are simple flags that enable or disable specific shader behaviors. The generic ones include: 86 | 87 | - A flag indicating that the `NORMAL`, `TANGENT`, and `BINORMAL` semantics use the `R11G11B10` vertex format, enabling manual unpacking in the vertex shader. 88 | - A flag indicating that the pixel shader performs alpha testing. Since modern desktop hardware lacks a fixed function pipeline for alpha testing, this flag inserts a "less than alpha threshold" check at the end of the pixel shader. Additional comparison types may need to be implemented depending on the target game. 89 | 90 | While specialization constants are straightforward to implement in SPIR-V, DXIL lacks native support for them. This is solved by compiling shaders as libraries with a declared, but unimplemented function that returns the specialization constant value. At runtime, Unleashed Recompiled generates an implementation of this function, compiles it into a library, and links it with the shader to produce a final specialized shader binary. For more details on this technique, [check out this article](https://therealmjp.github.io/posts/dxil-linking/). 91 | 92 | ### Other Unimplemented Features 93 | 94 | * Memory export. 95 | * Point size. 96 | * Possibly more that I am not aware of. 97 | 98 | ## Usage 99 | 100 | Shaders can be directly converted to HLSL by providing the input file path, output HLSL file path, and the path to the `shader_common.h` file located in the XenosRecomp project directory: 101 | 102 | ``` 103 | XenosRecomp [input shader file path] [output HLSL file path] [header file path] 104 | ``` 105 | 106 | ### Shader Cache 107 | 108 | Alternatively, the recompiler can process an entire directory by scanning for shader binaries within the specified path. In this mode, valid shaders are converted and recompiled into a DXIL/SPIR-V cache, formatted for use with Unleashed Recompiled. This cache is then exported as a .cpp file for direct embedding into the executable: 109 | 110 | ``` 111 | XenosRecomp [input directory path] [output .cpp file path] [header file path] 112 | ``` 113 | 114 | At runtime, shaders are mapped to their recompiled versions using a 64-bit XXH3 hash lookup. This scanning method is particularly useful for games that store embedded shaders within executables or uncompressed archive formats. 115 | 116 | SPIR-V shaders are compressed using smol-v to improve zstd compression efficiency, while DXIL shaders are compressed as-is. 117 | 118 | ## Building 119 | 120 | The project requires CMake 3.20 and a C++ compiler with C++17 support to build. While compilers other than Clang might work, they have not been tested. Since the repository includes submodules, ensure you clone it recursively. 121 | 122 | ## Special Thanks 123 | 124 | This recompiler would not have been possible without the [Xenia](https://github.com/xenia-project/xenia) emulator. Nearly every aspect of the development was guided by referencing Xenia's shader translator and research. 125 | 126 | ## Final Words 127 | 128 | I hope this recompiler proves useful in some way to help with your own recompilation efforts! While the implementation isn't as generic as I hoped it would be, the optimization opportunities from game specific implementations were too significant to ignore and paid off in the end. 129 | 130 | If you find and fix mistakes in the recompiler or successfully implement missing features in a generic way, contributions would be greatly appreciated. 131 | -------------------------------------------------------------------------------- /XenosRecomp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(XenosRecomp) 2 | 3 | if (WIN32) 4 | option(XENOS_RECOMP_DXIL "Generate DXIL shader cache" ON) 5 | endif() 6 | 7 | set(SMOLV_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty/smol-v/source") 8 | 9 | add_executable(XenosRecomp 10 | constant_table.h 11 | dxc_compiler.cpp 12 | dxc_compiler.h 13 | main.cpp 14 | pch.h 15 | shader.h 16 | shader_code.h 17 | shader_recompiler.cpp 18 | shader_recompiler.h 19 | "${SMOLV_SOURCE_DIR}/smolv.cpp") 20 | 21 | target_link_libraries(XenosRecomp PRIVATE 22 | Microsoft::DirectXShaderCompiler 23 | xxHash::xxhash 24 | libzstd_static 25 | fmt::fmt) 26 | 27 | target_include_directories(XenosRecomp PRIVATE ${SMOLV_SOURCE_DIR}) 28 | 29 | target_precompile_headers(XenosRecomp PRIVATE pch.h) 30 | 31 | if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") 32 | target_compile_options(XenosRecomp PRIVATE -Wno-switch -Wno-unused-variable -Wno-null-arithmetic -fms-extensions) 33 | 34 | include(CheckCXXSymbolExists) 35 | check_cxx_symbol_exists(_LIBCPP_VERSION version LIBCPP) 36 | if(LIBCPP) 37 | # Allows using std::execution 38 | target_compile_options(XenosRecomp PRIVATE -fexperimental-library) 39 | endif() 40 | endif() 41 | 42 | if (WIN32) 43 | target_compile_definitions(XenosRecomp PRIVATE _CRT_SECURE_NO_WARNINGS) 44 | add_custom_command(TARGET XenosRecomp POST_BUILD 45 | COMMAND ${CMAKE_COMMAND} -E copy $ $ 46 | COMMAND_EXPAND_LISTS 47 | ) 48 | endif() 49 | 50 | if (XENOS_RECOMP_DXIL) 51 | target_compile_definitions(XenosRecomp PRIVATE XENOS_RECOMP_DXIL) 52 | target_link_libraries(XenosRecomp PRIVATE Microsoft::DXIL) 53 | endif() 54 | -------------------------------------------------------------------------------- /XenosRecomp/constant_table.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | enum class ParameterClass : uint16_t // D3DXPARAMETER_CLASS 4 | { 5 | Scalar, 6 | Vector, 7 | MatrixRows, 8 | MatrixColumns, 9 | Object, 10 | Struct 11 | }; 12 | 13 | enum class ParameterType : uint16_t // D3DXPARAMETER_TYPE 14 | { 15 | Void, 16 | Bool, 17 | Int, 18 | Float, 19 | String, 20 | Texture, 21 | Texture1D, 22 | Texture2D, 23 | Texture3D, 24 | TextureCube, 25 | Sampler, 26 | Sampler1D, 27 | Sampler2D, 28 | Sampler3D, 29 | SamplerCube, 30 | PixelShader, 31 | VertexShader, 32 | PixelFragment, 33 | VertexFragment, 34 | Unsupported 35 | }; 36 | 37 | struct StructMemberInfo // D3DXSHADER_STRUCTMEMBERINFO 38 | { 39 | be name; 40 | be typeInfo; 41 | }; 42 | 43 | struct TypeInfo // D3DXSHADER_TYPEINFO 44 | { 45 | be parameterClass; 46 | be parameterType; 47 | be rows; 48 | be columns; 49 | be elements; 50 | be structMembers; 51 | be structMemberInfo; 52 | }; 53 | 54 | enum class RegisterSet : uint16_t // D3DXREGISTER_SET 55 | { 56 | Bool, 57 | Int4, 58 | Float4, 59 | Sampler 60 | }; 61 | 62 | struct ConstantInfo // D3DXSHADER_CONSTANTINFO 63 | { 64 | be name; 65 | be registerSet; 66 | be registerIndex; 67 | be registerCount; 68 | be reserved; 69 | be typeInfo; 70 | be defaultValue; 71 | }; 72 | 73 | struct ConstantTable // D3DXSHADER_CONSTANTTABLE 74 | { 75 | be size; 76 | be creator; 77 | be version; 78 | be constants; 79 | be constantInfo; 80 | be flags; 81 | be target; 82 | }; 83 | 84 | struct ConstantTableContainer 85 | { 86 | be size; 87 | ConstantTable constantTable; 88 | }; -------------------------------------------------------------------------------- /XenosRecomp/dxc_compiler.cpp: -------------------------------------------------------------------------------- 1 | #include "dxc_compiler.h" 2 | 3 | DxcCompiler::DxcCompiler() 4 | { 5 | HRESULT hr = DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&dxcCompiler)); 6 | assert(SUCCEEDED(hr)); 7 | } 8 | 9 | DxcCompiler::~DxcCompiler() 10 | { 11 | dxcCompiler->Release(); 12 | } 13 | 14 | IDxcBlob* DxcCompiler::compile(const std::string& shaderSource, bool compilePixelShader, bool compileLibrary, bool compileSpirv) 15 | { 16 | DxcBuffer source{}; 17 | source.Ptr = shaderSource.c_str(); 18 | source.Size = shaderSource.size(); 19 | 20 | const wchar_t* args[32]{}; 21 | uint32_t argCount = 0; 22 | 23 | const wchar_t* target = nullptr; 24 | if (compileLibrary) 25 | { 26 | assert(!compileSpirv); 27 | target = L"-T lib_6_3"; 28 | } 29 | else 30 | { 31 | if (compilePixelShader) 32 | target = L"-T ps_6_0"; 33 | else 34 | target = L"-T vs_6_0"; 35 | } 36 | 37 | args[argCount++] = target; 38 | args[argCount++] = L"-HV 2021"; 39 | args[argCount++] = L"-all-resources-bound"; 40 | 41 | if (compileSpirv) 42 | { 43 | args[argCount++] = L"-spirv"; 44 | args[argCount++] = L"-fvk-use-dx-layout"; 45 | 46 | if (!compilePixelShader) 47 | args[argCount++] = L"-fvk-invert-y"; 48 | } 49 | else 50 | { 51 | args[argCount++] = L"-Wno-ignored-attributes"; 52 | args[argCount++] = L"-Qstrip_reflect"; 53 | } 54 | 55 | args[argCount++] = L"-Qstrip_debug"; 56 | 57 | #ifdef UNLEASHED_RECOMP 58 | args[argCount++] = L"-DUNLEASHED_RECOMP"; 59 | #endif 60 | 61 | IDxcResult* result = nullptr; 62 | HRESULT hr = dxcCompiler->Compile(&source, args, argCount, nullptr, IID_PPV_ARGS(&result)); 63 | 64 | IDxcBlob* object = nullptr; 65 | if (SUCCEEDED(hr)) 66 | { 67 | assert(result != nullptr); 68 | 69 | HRESULT status; 70 | hr = result->GetStatus(&status); 71 | assert(SUCCEEDED(hr)); 72 | 73 | if (FAILED(status)) 74 | { 75 | if (result->HasOutput(DXC_OUT_ERRORS)) 76 | { 77 | IDxcBlobUtf8* errors = nullptr; 78 | hr = result->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), nullptr); 79 | assert(SUCCEEDED(hr) && errors != nullptr); 80 | 81 | fputs(errors->GetStringPointer(), stderr); 82 | 83 | errors->Release(); 84 | } 85 | } 86 | else 87 | { 88 | hr = result->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&object), nullptr); 89 | assert(SUCCEEDED(hr) && object != nullptr); 90 | } 91 | 92 | result->Release(); 93 | } 94 | else 95 | { 96 | assert(result == nullptr); 97 | } 98 | 99 | return object; 100 | } 101 | -------------------------------------------------------------------------------- /XenosRecomp/dxc_compiler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | struct DxcCompiler 4 | { 5 | IDxcCompiler3* dxcCompiler = nullptr; 6 | 7 | DxcCompiler(); 8 | ~DxcCompiler(); 9 | 10 | IDxcBlob* compile(const std::string& shaderSource, bool compilePixelShader, bool compileLibrary, bool compileSpirv); 11 | }; 12 | -------------------------------------------------------------------------------- /XenosRecomp/main.cpp: -------------------------------------------------------------------------------- 1 | #include "shader.h" 2 | #include "shader_recompiler.h" 3 | #include "dxc_compiler.h" 4 | 5 | static std::unique_ptr readAllBytes(const char* filePath, size_t& fileSize) 6 | { 7 | FILE* file = fopen(filePath, "rb"); 8 | fseek(file, 0, SEEK_END); 9 | fileSize = ftell(file); 10 | fseek(file, 0, SEEK_SET); 11 | auto data = std::make_unique(fileSize); 12 | fread(data.get(), 1, fileSize, file); 13 | fclose(file); 14 | return data; 15 | } 16 | 17 | static void writeAllBytes(const char* filePath, const void* data, size_t dataSize) 18 | { 19 | FILE* file = fopen(filePath, "wb"); 20 | fwrite(data, 1, dataSize, file); 21 | fclose(file); 22 | } 23 | 24 | struct RecompiledShader 25 | { 26 | uint8_t* data = nullptr; 27 | IDxcBlob* dxil = nullptr; 28 | std::vector spirv; 29 | uint32_t specConstantsMask = 0; 30 | }; 31 | 32 | int main(int argc, char** argv) 33 | { 34 | #ifndef XENOS_RECOMP_INPUT 35 | if (argc < 4) 36 | { 37 | printf("Usage: XenosRecomp [input path] [output path] [shader common header file path]"); 38 | return 0; 39 | } 40 | #endif 41 | 42 | const char* input = 43 | #ifdef XENOS_RECOMP_INPUT 44 | XENOS_RECOMP_INPUT 45 | #else 46 | argv[1] 47 | #endif 48 | ; 49 | 50 | const char* output = 51 | #ifdef XENOS_RECOMP_OUTPUT 52 | XENOS_RECOMP_OUTPUT 53 | #else 54 | argv[2] 55 | #endif 56 | ; 57 | 58 | const char* includeInput = 59 | #ifdef XENOS_RECOMP_INCLUDE_INPUT 60 | XENOS_RECOMP_INCLUDE_INPUT 61 | #else 62 | argv[3] 63 | #endif 64 | ; 65 | 66 | size_t includeSize = 0; 67 | auto includeData = readAllBytes(includeInput, includeSize); 68 | std::string_view include(reinterpret_cast(includeData.get()), includeSize); 69 | 70 | if (std::filesystem::is_directory(input)) 71 | { 72 | std::vector> files; 73 | std::map shaders; 74 | 75 | for (auto& file : std::filesystem::recursive_directory_iterator(input)) 76 | { 77 | if (std::filesystem::is_directory(file)) 78 | { 79 | continue; 80 | } 81 | 82 | size_t fileSize = 0; 83 | auto fileData = readAllBytes(file.path().string().c_str(), fileSize); 84 | bool foundAny = false; 85 | 86 | for (size_t i = 0; fileSize > sizeof(ShaderContainer) && i < fileSize - sizeof(ShaderContainer) - 1;) 87 | { 88 | auto shaderContainer = reinterpret_cast(fileData.get() + i); 89 | size_t dataSize = shaderContainer->virtualSize + shaderContainer->physicalSize; 90 | 91 | if ((shaderContainer->flags & 0xFFFFFF00) == 0x102A1100 && 92 | dataSize <= (fileSize - i) && 93 | shaderContainer->field1C == 0 && 94 | shaderContainer->field20 == 0) 95 | { 96 | XXH64_hash_t hash = XXH3_64bits(shaderContainer, dataSize); 97 | auto shader = shaders.try_emplace(hash); 98 | if (shader.second) 99 | { 100 | shader.first->second.data = fileData.get() + i; 101 | foundAny = true; 102 | } 103 | 104 | i += dataSize; 105 | } 106 | else 107 | { 108 | i += sizeof(uint32_t); 109 | } 110 | } 111 | 112 | if (foundAny) 113 | files.emplace_back(std::move(fileData)); 114 | } 115 | 116 | std::atomic progress = 0; 117 | 118 | std::for_each(std::execution::par_unseq, shaders.begin(), shaders.end(), [&](auto& hashShaderPair) 119 | { 120 | auto& shader = hashShaderPair.second; 121 | 122 | thread_local ShaderRecompiler recompiler; 123 | recompiler = {}; 124 | recompiler.recompile(shader.data, include); 125 | 126 | shader.specConstantsMask = recompiler.specConstantsMask; 127 | 128 | thread_local DxcCompiler dxcCompiler; 129 | 130 | #ifdef XENOS_RECOMP_DXIL 131 | shader.dxil = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, recompiler.specConstantsMask != 0, false); 132 | assert(shader.dxil != nullptr); 133 | assert(*(reinterpret_cast(shader.dxil->GetBufferPointer()) + 1) != 0 && "DXIL was not signed properly!"); 134 | #endif 135 | 136 | IDxcBlob* spirv = dxcCompiler.compile(recompiler.out, recompiler.isPixelShader, false, true); 137 | assert(spirv != nullptr); 138 | 139 | bool result = smolv::Encode(spirv->GetBufferPointer(), spirv->GetBufferSize(), shader.spirv, smolv::kEncodeFlagStripDebugInfo); 140 | assert(result); 141 | 142 | spirv->Release(); 143 | 144 | size_t currentProgress = ++progress; 145 | if ((currentProgress % 10) == 0 || (currentProgress == shaders.size() - 1)) 146 | fmt::println("Recompiling shaders... {}%", currentProgress / float(shaders.size()) * 100.0f); 147 | }); 148 | 149 | fmt::println("Creating shader cache..."); 150 | 151 | StringBuffer f; 152 | f.println("#include \"shader_cache.h\""); 153 | f.println("ShaderCacheEntry g_shaderCacheEntries[] = {{"); 154 | 155 | std::vector dxil; 156 | std::vector spirv; 157 | 158 | for (auto& [hash, shader] : shaders) 159 | { 160 | f.println("\t{{ 0x{:X}, {}, {}, {}, {}, {} }},", 161 | hash, dxil.size(), (shader.dxil != nullptr) ? shader.dxil->GetBufferSize() : 0, spirv.size(), shader.spirv.size(), shader.specConstantsMask); 162 | 163 | if (shader.dxil != nullptr) 164 | { 165 | dxil.insert(dxil.end(), reinterpret_cast(shader.dxil->GetBufferPointer()), 166 | reinterpret_cast(shader.dxil->GetBufferPointer()) + shader.dxil->GetBufferSize()); 167 | } 168 | 169 | spirv.insert(spirv.end(), shader.spirv.begin(), shader.spirv.end()); 170 | } 171 | 172 | f.println("}};"); 173 | 174 | fmt::println("Compressing DXIL cache..."); 175 | 176 | int level = ZSTD_maxCLevel(); 177 | 178 | #ifdef XENOS_RECOMP_DXIL 179 | std::vector dxilCompressed(ZSTD_compressBound(dxil.size())); 180 | dxilCompressed.resize(ZSTD_compress(dxilCompressed.data(), dxilCompressed.size(), dxil.data(), dxil.size(), level)); 181 | 182 | f.print("const uint8_t g_compressedDxilCache[] = {{"); 183 | 184 | for (auto data : dxilCompressed) 185 | f.print("{},", data); 186 | 187 | f.println("}};"); 188 | f.println("const size_t g_dxilCacheCompressedSize = {};", dxilCompressed.size()); 189 | f.println("const size_t g_dxilCacheDecompressedSize = {};", dxil.size()); 190 | #endif 191 | 192 | fmt::println("Compressing SPIRV cache..."); 193 | 194 | std::vector spirvCompressed(ZSTD_compressBound(spirv.size())); 195 | spirvCompressed.resize(ZSTD_compress(spirvCompressed.data(), spirvCompressed.size(), spirv.data(), spirv.size(), level)); 196 | 197 | f.print("const uint8_t g_compressedSpirvCache[] = {{"); 198 | 199 | for (auto data : spirvCompressed) 200 | f.print("{},", data); 201 | 202 | f.println("}};"); 203 | 204 | f.println("const size_t g_spirvCacheCompressedSize = {};", spirvCompressed.size()); 205 | f.println("const size_t g_spirvCacheDecompressedSize = {};", spirv.size()); 206 | f.println("const size_t g_shaderCacheEntryCount = {};", shaders.size()); 207 | 208 | writeAllBytes(output, f.out.data(), f.out.size()); 209 | } 210 | else 211 | { 212 | ShaderRecompiler recompiler; 213 | size_t fileSize; 214 | recompiler.recompile(readAllBytes(input, fileSize).get(), include); 215 | writeAllBytes(output, recompiler.out.data(), recompiler.out.size()); 216 | } 217 | 218 | return 0; 219 | } 220 | -------------------------------------------------------------------------------- /XenosRecomp/pch.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef _WIN32 4 | #include 5 | #endif 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | template 23 | static T byteSwap(T value) 24 | { 25 | if constexpr (sizeof(T) == 1) 26 | return value; 27 | else if constexpr (sizeof(T) == 2) 28 | return static_cast(__builtin_bswap16(static_cast(value))); 29 | else if constexpr (sizeof(T) == 4) 30 | return static_cast(__builtin_bswap32(static_cast(value))); 31 | else if constexpr (sizeof(T) == 8) 32 | return static_cast(__builtin_bswap64(static_cast(value))); 33 | 34 | assert(false && "Unexpected byte size."); 35 | return value; 36 | } 37 | 38 | template 39 | struct be 40 | { 41 | T value; 42 | 43 | T get() const 44 | { 45 | if constexpr (std::is_enum_v) 46 | return T(byteSwap(std::underlying_type_t(value))); 47 | else 48 | return byteSwap(value); 49 | } 50 | 51 | operator T() const 52 | { 53 | return get(); 54 | } 55 | }; 56 | -------------------------------------------------------------------------------- /XenosRecomp/shader.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "constant_table.h" 4 | 5 | struct Float4Definition 6 | { 7 | be registerIndex; 8 | be count; 9 | be physicalOffset; 10 | }; 11 | 12 | struct Int4Definition 13 | { 14 | be registerIndex; 15 | be count; 16 | be values[]; 17 | }; 18 | 19 | struct DefinitionTable 20 | { 21 | be field0; 22 | be field4; 23 | be field8; 24 | be fieldC; 25 | be size; 26 | be definitions[]; // float4, int4 and bool separated by null terminators 27 | }; 28 | 29 | struct Shader 30 | { 31 | be physicalOffset; 32 | be size; 33 | be field8; 34 | be fieldC; // svPos register: (fieldC >> 8) & 0xFF 35 | be field10; 36 | be interpolatorInfo; // interpolator count: (interpolatorInfo >> 5) & 0x1F 37 | }; 38 | 39 | enum class DeclUsage : uint32_t 40 | { 41 | Position = 0, 42 | BlendWeight = 1, 43 | BlendIndices = 2, 44 | Normal = 3, 45 | PointSize = 4, 46 | TexCoord = 5, 47 | Tangent = 6, 48 | Binormal = 7, 49 | TessFactor = 8, 50 | PositionT = 9, 51 | Color = 10, 52 | Fog = 11, 53 | Depth = 12, 54 | Sample = 13 55 | }; 56 | 57 | struct VertexElement 58 | { 59 | uint32_t address : 12; 60 | DeclUsage usage : 4; 61 | uint32_t usageIndex : 4; 62 | }; 63 | 64 | struct Interpolator 65 | { 66 | uint32_t usageIndex : 4; 67 | DeclUsage usage : 4; 68 | uint32_t reg : 4; 69 | uint32_t : 20; 70 | }; 71 | 72 | struct VertexShader : Shader 73 | { 74 | be field18; 75 | be vertexElementCount; 76 | be field20; 77 | be vertexElementsAndInterpolators[]; // field18 + vertex elements + interpolators 78 | }; 79 | 80 | enum PixelShaderOutputs : uint32_t 81 | { 82 | PIXEL_SHADER_OUTPUT_COLOR0 = 0x1, 83 | PIXEL_SHADER_OUTPUT_COLOR1 = 0x2, 84 | PIXEL_SHADER_OUTPUT_COLOR2 = 0x4, 85 | PIXEL_SHADER_OUTPUT_COLOR3 = 0x8, 86 | PIXEL_SHADER_OUTPUT_DEPTH = 0x10 87 | }; 88 | 89 | struct PixelShader : Shader 90 | { 91 | be field18; 92 | be outputs; 93 | be interpolators[]; 94 | }; 95 | 96 | struct ShaderContainer 97 | { 98 | be flags; 99 | be virtualSize; 100 | be physicalSize; 101 | be fieldC; 102 | be constantTableOffset; 103 | be definitionTableOffset; 104 | be shaderOffset; 105 | be field1C; 106 | be field20; 107 | }; -------------------------------------------------------------------------------- /XenosRecomp/shader_code.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | enum class ControlFlowOpcode : uint32_t 4 | { 5 | Nop = 0, 6 | Exec = 1, 7 | ExecEnd = 2, 8 | CondExec = 3, 9 | CondExecEnd = 4, 10 | CondExecPred = 5, 11 | CondExecPredEnd = 6, 12 | LoopStart = 7, 13 | LoopEnd = 8, 14 | CondCall = 9, 15 | Return = 10, 16 | CondJmp = 11, 17 | Alloc = 12, 18 | CondExecPredClean = 13, 19 | CondExecPredCleanEnd = 14, 20 | MarkVsFetchDone = 15, 21 | }; 22 | 23 | struct ControlFlowExecInstruction 24 | { 25 | uint32_t address : 12; 26 | uint32_t count : 3; 27 | uint32_t isYield : 1; 28 | uint32_t sequence : 12; 29 | uint32_t vertexCacheHigh : 4; 30 | uint32_t vertexCacheLow : 2; 31 | uint32_t : 7; 32 | uint32_t isPredicateClean : 1; 33 | uint32_t : 1; 34 | uint32_t absoluteAddressing : 1; 35 | ControlFlowOpcode opcode : 4; 36 | }; 37 | 38 | struct ControlFlowExecPredInstruction 39 | { 40 | uint32_t address : 12; 41 | uint32_t count : 3; 42 | uint32_t isYield : 1; 43 | uint32_t sequence : 12; 44 | uint32_t vertexCacheHigh : 4; 45 | uint32_t vertexCacheLow : 2; 46 | uint32_t : 7; 47 | uint32_t isPredicateClean : 1; 48 | uint32_t condition : 1; 49 | uint32_t absoluteAddressing : 1; 50 | ControlFlowOpcode opcode : 4; 51 | }; 52 | 53 | struct ControlFlowCondExecInstruction 54 | { 55 | uint32_t address : 12; 56 | uint32_t count : 3; 57 | uint32_t isYield : 1; 58 | uint32_t sequence : 12; 59 | uint32_t vertexCacheHigh : 4; 60 | uint32_t vertexCacheLow : 2; 61 | uint32_t boolAddress : 8; 62 | uint32_t condition : 1; 63 | uint32_t absoluteAddressing : 1; 64 | ControlFlowOpcode opcode : 4; 65 | }; 66 | 67 | struct ControlFlowCondExecPredInstruction 68 | { 69 | uint32_t address : 12; 70 | uint32_t count : 3; 71 | uint32_t isYield : 1; 72 | uint32_t sequence : 12; 73 | uint32_t vertexCacheHigh : 4; 74 | uint32_t vertexCacheLow : 2; 75 | uint32_t : 7; 76 | uint32_t isPredicateClean : 1; 77 | uint32_t condition : 1; 78 | uint32_t absoluteAddressing : 1; 79 | ControlFlowOpcode opcode : 4; 80 | }; 81 | 82 | struct ControlFlowLoopStartInstruction 83 | { 84 | uint32_t address : 13; 85 | uint32_t isRepeat : 1; 86 | uint32_t : 2; 87 | uint32_t loopId : 5; 88 | uint32_t : 11; 89 | uint32_t : 11; 90 | uint32_t absoluteAddressing : 1; 91 | ControlFlowOpcode opcode : 4; 92 | }; 93 | 94 | struct ControlFlowLoopEndInstruction 95 | { 96 | uint32_t address : 13; 97 | uint32_t : 3; 98 | uint32_t loopId : 5; 99 | uint32_t isPredicatedBreak : 1; 100 | uint32_t : 10; 101 | uint32_t : 10; 102 | uint32_t condition : 1; 103 | uint32_t absoluteAddressing : 1; 104 | ControlFlowOpcode opcode : 4; 105 | }; 106 | 107 | struct ControlFlowCondCallInstruction 108 | { 109 | uint32_t address : 13; 110 | uint32_t isUnconditional : 1; 111 | uint32_t isPredicated : 1; 112 | uint32_t : 17; 113 | uint32_t : 2; 114 | uint32_t boolAddress : 8; 115 | uint32_t condition : 1; 116 | uint32_t absoluteAddressing : 1; 117 | ControlFlowOpcode opcode : 4; 118 | }; 119 | 120 | struct ControlFlowReturnInstruction 121 | { 122 | uint32_t : 32; 123 | uint32_t : 11; 124 | uint32_t absoluteAddressing : 1; 125 | ControlFlowOpcode opcode : 4; 126 | }; 127 | 128 | struct ControlFlowCondJmpInstruction 129 | { 130 | uint32_t address : 13; 131 | uint32_t isUnconditional : 1; 132 | uint32_t isPredicated : 1; 133 | uint32_t : 17; 134 | uint32_t : 1; 135 | uint32_t direction : 1; 136 | uint32_t boolAddress : 8; 137 | uint32_t condition : 1; 138 | uint32_t absoluteAddressing : 1; 139 | ControlFlowOpcode opcode : 4; 140 | }; 141 | 142 | struct ControlFlowAllocInstruction 143 | { 144 | uint32_t size : 3; 145 | uint32_t : 29; 146 | uint32_t : 8; 147 | uint32_t isUnserialized : 1; 148 | uint32_t allocType : 2; 149 | uint32_t : 1; 150 | ControlFlowOpcode opcode : 4; 151 | }; 152 | 153 | union ControlFlowInstruction 154 | { 155 | ControlFlowExecInstruction exec; 156 | ControlFlowCondExecInstruction condExec; 157 | ControlFlowCondExecPredInstruction condExecPred; 158 | ControlFlowLoopStartInstruction loopStart; 159 | ControlFlowLoopEndInstruction loopEnd; 160 | ControlFlowCondCallInstruction condCall; 161 | ControlFlowReturnInstruction ret; 162 | ControlFlowCondJmpInstruction condJmp; 163 | ControlFlowAllocInstruction alloc; 164 | 165 | struct 166 | { 167 | uint32_t : 32; 168 | uint32_t : 12; 169 | ControlFlowOpcode opcode : 4; 170 | }; 171 | }; 172 | 173 | enum class FetchOpcode : uint32_t 174 | { 175 | VertexFetch = 0, 176 | TextureFetch = 1, 177 | GetTextureBorderColorFrac = 16, 178 | GetTextureComputedLod = 17, 179 | GetTextureGradients = 18, 180 | GetTextureWeights = 19, 181 | SetTextureLod = 24, 182 | SetTextureGradientsHorz = 25, 183 | SetTextureGradientsVert = 26 184 | }; 185 | 186 | enum class FetchDestinationSwizzle : uint32_t 187 | { 188 | X = 0, 189 | Y = 1, 190 | Z = 2, 191 | W = 3, 192 | Zero = 4, 193 | One = 5, 194 | Keep = 7 195 | }; 196 | 197 | struct VertexFetchInstruction 198 | { 199 | struct 200 | { 201 | FetchOpcode opcode : 5; 202 | uint32_t srcRegister : 6; 203 | uint32_t srcRegisterAm : 1; 204 | uint32_t dstRegister : 6; 205 | uint32_t dstRegisterAam : 1; 206 | uint32_t mustBeOne : 1; 207 | uint32_t constIndex : 5; 208 | uint32_t constIndexSelect : 2; 209 | uint32_t prefetchCount : 3; 210 | uint32_t srcSwizzle : 2; 211 | }; 212 | struct 213 | { 214 | uint32_t dstSwizzle : 12; 215 | uint32_t formatCompAll : 1; 216 | uint32_t numFormatAll : 1; 217 | uint32_t signedRfModeAll : 1; 218 | uint32_t isIndexRounded : 1; 219 | uint32_t format : 6; 220 | uint32_t reserved2 : 2; 221 | int32_t expAdjust : 6; 222 | uint32_t isMiniFetch : 1; 223 | uint32_t isPredicated : 1; 224 | }; 225 | struct 226 | { 227 | uint32_t stride : 8; 228 | int32_t offset : 23; 229 | uint32_t predicateCondition : 1; 230 | }; 231 | }; 232 | 233 | enum class TextureDimension : uint32_t 234 | { 235 | Texture1D, 236 | Texture2D, 237 | Texture3D, 238 | TextureCube 239 | }; 240 | 241 | struct TextureFetchInstruction 242 | { 243 | struct 244 | { 245 | FetchOpcode opcode : 5; 246 | uint32_t srcRegister : 6; 247 | uint32_t srcRegisterAm : 1; 248 | uint32_t dstRegister : 6; 249 | uint32_t dstRegisterAm : 1; 250 | uint32_t fetchValidOnly : 1; 251 | uint32_t constIndex : 5; 252 | uint32_t texCoordDenorm : 1; 253 | uint32_t srcSwizzle : 6; 254 | }; 255 | struct 256 | { 257 | uint32_t dstSwizzle : 12; 258 | uint32_t magFilter : 2; 259 | uint32_t minFilter : 2; 260 | uint32_t mipFilter : 2; 261 | uint32_t anisoFilter : 3; 262 | uint32_t arbitraryFilter : 3; 263 | uint32_t volMagFilter : 2; 264 | uint32_t volMinFilter : 2; 265 | uint32_t useCompLod : 1; 266 | uint32_t useRegLod : 1; 267 | uint32_t : 1; 268 | uint32_t isPredicated : 1; 269 | }; 270 | struct 271 | { 272 | uint32_t useRegGradients : 1; 273 | uint32_t sampleLocation : 1; 274 | int32_t lodBias : 7; 275 | uint32_t : 5; 276 | TextureDimension dimension : 2; 277 | int32_t offsetX : 5; 278 | int32_t offsetY : 5; 279 | int32_t offsetZ : 5; 280 | uint32_t predCondition : 1; 281 | }; 282 | }; 283 | 284 | union FetchInstruction 285 | { 286 | VertexFetchInstruction vertexFetch; 287 | TextureFetchInstruction textureFetch; 288 | 289 | struct 290 | { 291 | FetchOpcode opcode : 5; 292 | uint32_t : 27; 293 | uint32_t : 32; 294 | }; 295 | }; 296 | 297 | enum class AluScalarOpcode : uint32_t 298 | { 299 | Adds = 0, 300 | AddsPrev = 1, 301 | Muls = 2, 302 | MulsPrev = 3, 303 | MulsPrev2 = 4, 304 | Maxs = 5, 305 | Mins = 6, 306 | Seqs = 7, 307 | Sgts = 8, 308 | Sges = 9, 309 | Snes = 10, 310 | Frcs = 11, 311 | Truncs = 12, 312 | Floors = 13, 313 | Exp = 14, 314 | Logc = 15, 315 | Log = 16, 316 | Rcpc = 17, 317 | Rcpf = 18, 318 | Rcp = 19, 319 | Rsqc = 20, 320 | Rsqf = 21, 321 | Rsq = 22, 322 | MaxAs = 23, 323 | MaxAsf = 24, 324 | Subs = 25, 325 | SubsPrev = 26, 326 | SetpEq = 27, 327 | SetpNe = 28, 328 | SetpGt = 29, 329 | SetpGe = 30, 330 | SetpInv = 31, 331 | SetpPop = 32, 332 | SetpClr = 33, 333 | SetpRstr = 34, 334 | KillsEq = 35, 335 | KillsGt = 36, 336 | KillsGe = 37, 337 | KillsNe = 38, 338 | KillsOne = 39, 339 | Sqrt = 40, 340 | Mulsc0 = 42, 341 | Mulsc1 = 43, 342 | Addsc0 = 44, 343 | Addsc1 = 45, 344 | Subsc0 = 46, 345 | Subsc1 = 47, 346 | Sin = 48, 347 | Cos = 49, 348 | RetainPrev = 50 349 | }; 350 | 351 | enum class AluVectorOpcode : uint32_t 352 | { 353 | Add = 0, 354 | Mul = 1, 355 | Max = 2, 356 | Min = 3, 357 | Seq = 4, 358 | Sgt = 5, 359 | Sge = 6, 360 | Sne = 7, 361 | Frc = 8, 362 | Trunc = 9, 363 | Floor = 10, 364 | Mad = 11, 365 | CndEq = 12, 366 | CndGe = 13, 367 | CndGt = 14, 368 | Dp4 = 15, 369 | Dp3 = 16, 370 | Dp2Add = 17, 371 | Cube = 18, 372 | Max4 = 19, 373 | SetpEqPush = 20, 374 | SetpNePush = 21, 375 | SetpGtPush = 22, 376 | SetpGePush = 23, 377 | KillEq = 24, 378 | KillGt = 25, 379 | KillGe = 26, 380 | KillNe = 27, 381 | Dst = 28, 382 | MaxA = 29 383 | }; 384 | 385 | enum class ExportRegister : uint32_t 386 | { 387 | VSInterpolator0 = 0, 388 | VSInterpolator1, 389 | VSInterpolator2, 390 | VSInterpolator3, 391 | VSInterpolator4, 392 | VSInterpolator5, 393 | VSInterpolator6, 394 | VSInterpolator7, 395 | VSInterpolator8, 396 | VSInterpolator9, 397 | VSInterpolator10, 398 | VSInterpolator11, 399 | VSInterpolator12, 400 | VSInterpolator13, 401 | VSInterpolator14, 402 | VSInterpolator15, 403 | VSPosition = 62, 404 | VSPointSizeEdgeFlagKillVertex = 63, 405 | PSColor0 = 0, 406 | PSColor1, 407 | PSColor2, 408 | PSColor3, 409 | PSDepth = 61, 410 | ExportAddress = 32, 411 | ExportData0 = 33, 412 | ExportData1, 413 | ExportData2, 414 | ExportData3, 415 | ExportData4, 416 | }; 417 | 418 | struct AluInstruction 419 | { 420 | struct 421 | { 422 | uint32_t vectorDest : 6; 423 | uint32_t vectorDestRelative : 1; 424 | uint32_t absConstants : 1; 425 | uint32_t scalarDest : 6; 426 | uint32_t scalarDestRelative : 1; 427 | uint32_t exportData : 1; 428 | uint32_t vectorWriteMask : 4; 429 | uint32_t scalarWriteMask : 4; 430 | uint32_t vectorSaturate : 1; 431 | uint32_t scalarSaturate : 1; 432 | AluScalarOpcode scalarOpcode : 6; 433 | }; 434 | struct 435 | { 436 | uint32_t src3Swizzle : 8; 437 | uint32_t src2Swizzle : 8; 438 | uint32_t src1Swizzle : 8; 439 | uint32_t src3Negate : 1; 440 | uint32_t src2Negate : 1; 441 | uint32_t src1Negate : 1; 442 | uint32_t predicateCondition : 1; 443 | uint32_t isPredicated : 1; 444 | uint32_t constAddressRegisterRelative : 1; 445 | uint32_t const1Relative : 1; 446 | uint32_t const0Relative : 1; 447 | }; 448 | struct 449 | { 450 | uint32_t src3Register : 8; 451 | uint32_t src2Register : 8; 452 | uint32_t src1Register : 8; 453 | AluVectorOpcode vectorOpcode : 5; 454 | uint32_t src3Select : 1; 455 | uint32_t src2Select : 1; 456 | uint32_t src1Select : 1; 457 | }; 458 | }; -------------------------------------------------------------------------------- /XenosRecomp/shader_common.h: -------------------------------------------------------------------------------- 1 | #ifndef SHADER_COMMON_H_INCLUDED 2 | #define SHADER_COMMON_H_INCLUDED 3 | 4 | #define SPEC_CONSTANT_R11G11B10_NORMAL (1 << 0) 5 | #define SPEC_CONSTANT_ALPHA_TEST (1 << 1) 6 | 7 | #ifdef UNLEASHED_RECOMP 8 | #define SPEC_CONSTANT_BICUBIC_GI_FILTER (1 << 2) 9 | #define SPEC_CONSTANT_ALPHA_TO_COVERAGE (1 << 3) 10 | #define SPEC_CONSTANT_REVERSE_Z (1 << 4) 11 | #endif 12 | 13 | #if !defined(__cplusplus) || defined(__INTELLISENSE__) 14 | 15 | #define FLT_MIN asfloat(0xff7fffff) 16 | #define FLT_MAX asfloat(0x7f7fffff) 17 | 18 | #ifdef __spirv__ 19 | 20 | struct PushConstants 21 | { 22 | uint64_t VertexShaderConstants; 23 | uint64_t PixelShaderConstants; 24 | uint64_t SharedConstants; 25 | }; 26 | 27 | [[vk::push_constant]] ConstantBuffer g_PushConstants; 28 | 29 | #define g_Booleans vk::RawBufferLoad(g_PushConstants.SharedConstants + 256) 30 | #define g_SwappedTexcoords vk::RawBufferLoad(g_PushConstants.SharedConstants + 260) 31 | #define g_HalfPixelOffset vk::RawBufferLoad(g_PushConstants.SharedConstants + 264) 32 | #define g_AlphaThreshold vk::RawBufferLoad(g_PushConstants.SharedConstants + 272) 33 | 34 | [[vk::constant_id(0)]] const uint g_SpecConstants = 0; 35 | 36 | #define g_SpecConstants() g_SpecConstants 37 | 38 | #else 39 | 40 | #define DEFINE_SHARED_CONSTANTS() \ 41 | uint g_Booleans : packoffset(c16.x); \ 42 | uint g_SwappedTexcoords : packoffset(c16.y); \ 43 | float2 g_HalfPixelOffset : packoffset(c16.z); \ 44 | float g_AlphaThreshold : packoffset(c17.x); 45 | 46 | uint g_SpecConstants(); 47 | 48 | #endif 49 | 50 | Texture2D g_Texture2DDescriptorHeap[] : register(t0, space0); 51 | Texture3D g_Texture3DDescriptorHeap[] : register(t0, space1); 52 | TextureCube g_TextureCubeDescriptorHeap[] : register(t0, space2); 53 | SamplerState g_SamplerDescriptorHeap[] : register(s0, space3); 54 | 55 | uint2 getTexture2DDimensions(Texture2D texture) 56 | { 57 | uint2 dimensions; 58 | texture.GetDimensions(dimensions.x, dimensions.y); 59 | return dimensions; 60 | } 61 | 62 | float4 tfetch2D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset) 63 | { 64 | Texture2D texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex]; 65 | return texture.Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord + offset / getTexture2DDimensions(texture)); 66 | } 67 | 68 | float2 getWeights2D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset) 69 | { 70 | Texture2D texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex]; 71 | return select(isnan(texCoord), 0.0, frac(texCoord * getTexture2DDimensions(texture) + offset - 0.5)); 72 | } 73 | 74 | float w0(float a) 75 | { 76 | return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f); 77 | } 78 | 79 | float w1(float a) 80 | { 81 | return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f); 82 | } 83 | 84 | float w2(float a) 85 | { 86 | return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f); 87 | } 88 | 89 | float w3(float a) 90 | { 91 | return (1.0f / 6.0f) * (a * a * a); 92 | } 93 | 94 | float g0(float a) 95 | { 96 | return w0(a) + w1(a); 97 | } 98 | 99 | float g1(float a) 100 | { 101 | return w2(a) + w3(a); 102 | } 103 | 104 | float h0(float a) 105 | { 106 | return -1.0f + w1(a) / (w0(a) + w1(a)) + 0.5f; 107 | } 108 | 109 | float h1(float a) 110 | { 111 | return 1.0f + w3(a) / (w2(a) + w3(a)) + 0.5f; 112 | } 113 | 114 | float4 tfetch2DBicubic(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float2 texCoord, float2 offset) 115 | { 116 | Texture2D texture = g_Texture2DDescriptorHeap[resourceDescriptorIndex]; 117 | SamplerState samplerState = g_SamplerDescriptorHeap[samplerDescriptorIndex]; 118 | uint2 dimensions = getTexture2DDimensions(texture); 119 | 120 | float x = texCoord.x * dimensions.x + offset.x; 121 | float y = texCoord.y * dimensions.y + offset.y; 122 | 123 | x -= 0.5f; 124 | y -= 0.5f; 125 | float px = floor(x); 126 | float py = floor(y); 127 | float fx = x - px; 128 | float fy = y - py; 129 | 130 | float g0x = g0(fx); 131 | float g1x = g1(fx); 132 | float h0x = h0(fx); 133 | float h1x = h1(fx); 134 | float h0y = h0(fy); 135 | float h1y = h1(fy); 136 | 137 | float4 r = 138 | g0(fy) * (g0x * texture.Sample(samplerState, float2(px + h0x, py + h0y) / float2(dimensions)) + 139 | g1x * texture.Sample(samplerState, float2(px + h1x, py + h0y) / float2(dimensions))) + 140 | g1(fy) * (g0x * texture.Sample(samplerState, float2(px + h0x, py + h1y) / float2(dimensions)) + 141 | g1x * texture.Sample(samplerState, float2(px + h1x, py + h1y) / float2(dimensions))); 142 | 143 | return r; 144 | } 145 | 146 | float4 tfetch3D(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord) 147 | { 148 | return g_Texture3DDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], texCoord); 149 | } 150 | 151 | struct CubeMapData 152 | { 153 | float3 cubeMapDirections[2]; 154 | uint cubeMapIndex; 155 | }; 156 | 157 | float4 tfetchCube(uint resourceDescriptorIndex, uint samplerDescriptorIndex, float3 texCoord, inout CubeMapData cubeMapData) 158 | { 159 | return g_TextureCubeDescriptorHeap[resourceDescriptorIndex].Sample(g_SamplerDescriptorHeap[samplerDescriptorIndex], cubeMapData.cubeMapDirections[texCoord.z]); 160 | } 161 | 162 | float4 tfetchR11G11B10(uint4 value) 163 | { 164 | if (g_SpecConstants() & SPEC_CONSTANT_R11G11B10_NORMAL) 165 | { 166 | return float4( 167 | (value.x & 0x00000400 ? -1.0 : 0.0) + ((value.x & 0x3FF) / 1024.0), 168 | (value.x & 0x00200000 ? -1.0 : 0.0) + (((value.x >> 11) & 0x3FF) / 1024.0), 169 | (value.x & 0x80000000 ? -1.0 : 0.0) + (((value.x >> 22) & 0x1FF) / 512.0), 170 | 0.0); 171 | } 172 | else 173 | { 174 | return asfloat(value); 175 | } 176 | } 177 | 178 | float4 tfetchTexcoord(uint swappedTexcoords, float4 value, uint semanticIndex) 179 | { 180 | return (swappedTexcoords & (1ull << semanticIndex)) != 0 ? value.yxwz : value; 181 | } 182 | 183 | float4 cube(float4 value, inout CubeMapData cubeMapData) 184 | { 185 | uint index = cubeMapData.cubeMapIndex; 186 | cubeMapData.cubeMapDirections[index] = value.xyz; 187 | ++cubeMapData.cubeMapIndex; 188 | 189 | return float4(0.0, 0.0, 0.0, index); 190 | } 191 | 192 | float4 dst(float4 src0, float4 src1) 193 | { 194 | float4 dest; 195 | dest.x = 1.0; 196 | dest.y = src0.y * src1.y; 197 | dest.z = src0.z; 198 | dest.w = src1.w; 199 | return dest; 200 | } 201 | 202 | float4 max4(float4 src0) 203 | { 204 | return max(max(src0.x, src0.y), max(src0.z, src0.w)); 205 | } 206 | 207 | float2 getPixelCoord(uint resourceDescriptorIndex, float2 texCoord) 208 | { 209 | return getTexture2DDimensions(g_Texture2DDescriptorHeap[resourceDescriptorIndex]) * texCoord; 210 | } 211 | 212 | float computeMipLevel(float2 pixelCoord) 213 | { 214 | float2 dx = ddx(pixelCoord); 215 | float2 dy = ddy(pixelCoord); 216 | float deltaMaxSqr = max(dot(dx, dx), dot(dy, dy)); 217 | return max(0.0, 0.5 * log2(deltaMaxSqr)); 218 | } 219 | 220 | #endif 221 | 222 | #endif 223 | -------------------------------------------------------------------------------- /XenosRecomp/shader_recompiler.cpp: -------------------------------------------------------------------------------- 1 | #include "shader_recompiler.h" 2 | #include "shader_common.h" 3 | 4 | static constexpr char SWIZZLES[] = 5 | { 6 | 'x', 7 | 'y', 8 | 'z', 9 | 'w', 10 | '0', 11 | '1', 12 | '_', 13 | '_' 14 | }; 15 | 16 | static constexpr const char* USAGE_TYPES[] = 17 | { 18 | "float4", // POSITION 19 | "float4", // BLENDWEIGHT 20 | "uint4", // BLENDINDICES 21 | "uint4", // NORMAL 22 | "float4", // PSIZE 23 | "float4", // TEXCOORD 24 | "uint4", // TANGENT 25 | "uint4", // BINORMAL 26 | "float4", // TESSFACTOR 27 | "float4", // POSITIONT 28 | "float4", // COLOR 29 | "float4", // FOG 30 | "float4", // DEPTH 31 | "float4", // SAMPLE 32 | }; 33 | 34 | static constexpr const char* USAGE_VARIABLES[] = 35 | { 36 | "Position", 37 | "BlendWeight", 38 | "BlendIndices", 39 | "Normal", 40 | "PointSize", 41 | "TexCoord", 42 | "Tangent", 43 | "Binormal", 44 | "TessFactor", 45 | "PositionT", 46 | "Color", 47 | "Fog", 48 | "Depth", 49 | "Sample" 50 | }; 51 | 52 | static constexpr const char* USAGE_SEMANTICS[] = 53 | { 54 | "POSITION", 55 | "BLENDWEIGHT", 56 | "BLENDINDICES", 57 | "NORMAL", 58 | "PSIZE", 59 | "TEXCOORD", 60 | "TANGENT", 61 | "BINORMAL", 62 | "TESSFACTOR", 63 | "POSITIONT", 64 | "COLOR", 65 | "FOG", 66 | "DEPTH", 67 | "SAMPLE" 68 | }; 69 | 70 | struct DeclUsageLocation 71 | { 72 | DeclUsage usage; 73 | uint32_t usageIndex; 74 | uint32_t location; 75 | }; 76 | 77 | // NOTE: These are specialized Vulkan locations for Unleashed Recompiled. Change as necessary. Likely not going to work with other games. 78 | static constexpr DeclUsageLocation USAGE_LOCATIONS[] = 79 | { 80 | { DeclUsage::Position, 0, 0 }, 81 | { DeclUsage::Normal, 0, 1 }, 82 | { DeclUsage::Tangent, 0, 2 }, 83 | { DeclUsage::Binormal, 0, 3 }, 84 | { DeclUsage::TexCoord, 0, 4 }, 85 | { DeclUsage::TexCoord, 1, 5 }, 86 | { DeclUsage::TexCoord, 2, 6 }, 87 | { DeclUsage::TexCoord, 3, 7 }, 88 | { DeclUsage::Color, 0, 8 }, 89 | { DeclUsage::BlendIndices, 0, 9 }, 90 | { DeclUsage::BlendWeight, 0, 10 }, 91 | { DeclUsage::Color, 1, 11 }, 92 | { DeclUsage::TexCoord, 4, 12 }, 93 | { DeclUsage::TexCoord, 5, 13 }, 94 | { DeclUsage::TexCoord, 6, 14 }, 95 | { DeclUsage::TexCoord, 7, 15 }, 96 | { DeclUsage::Position, 1, 15 }, 97 | }; 98 | 99 | static constexpr std::pair INTERPOLATORS[] = 100 | { 101 | { DeclUsage::TexCoord, 0 }, 102 | { DeclUsage::TexCoord, 1 }, 103 | { DeclUsage::TexCoord, 2 }, 104 | { DeclUsage::TexCoord, 3 }, 105 | { DeclUsage::TexCoord, 4 }, 106 | { DeclUsage::TexCoord, 5 }, 107 | { DeclUsage::TexCoord, 6 }, 108 | { DeclUsage::TexCoord, 7 }, 109 | { DeclUsage::TexCoord, 8 }, 110 | { DeclUsage::TexCoord, 9 }, 111 | { DeclUsage::TexCoord, 10 }, 112 | { DeclUsage::TexCoord, 11 }, 113 | { DeclUsage::TexCoord, 12 }, 114 | { DeclUsage::TexCoord, 13 }, 115 | { DeclUsage::TexCoord, 14 }, 116 | { DeclUsage::TexCoord, 15 }, 117 | { DeclUsage::Color, 0 }, 118 | { DeclUsage::Color, 1 } 119 | }; 120 | 121 | static constexpr std::string_view TEXTURE_DIMENSIONS[] = 122 | { 123 | "2D", 124 | "3D", 125 | "Cube" 126 | }; 127 | 128 | static FetchDestinationSwizzle getDestSwizzle(uint32_t dstSwizzle, uint32_t index) 129 | { 130 | return FetchDestinationSwizzle((dstSwizzle >> (index * 3)) & 0x7); 131 | } 132 | 133 | void ShaderRecompiler::printDstSwizzle(uint32_t dstSwizzle, bool operand) 134 | { 135 | for (size_t i = 0; i < 4; i++) 136 | { 137 | const auto swizzle = getDestSwizzle(dstSwizzle, i); 138 | if (swizzle >= FetchDestinationSwizzle::X && swizzle <= FetchDestinationSwizzle::W) 139 | out += SWIZZLES[operand ? uint32_t(swizzle) : i]; 140 | } 141 | } 142 | 143 | void ShaderRecompiler::printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle) 144 | { 145 | for (size_t i = 0; i < 4; i++) 146 | { 147 | const auto swizzle = getDestSwizzle(dstSwizzle, i); 148 | if (swizzle == FetchDestinationSwizzle::Zero) 149 | { 150 | indent(); 151 | println("r{}.{} = 0.0;", dstRegister, SWIZZLES[i]); 152 | } 153 | else if (swizzle == FetchDestinationSwizzle::One) 154 | { 155 | indent(); 156 | println("r{}.{} = 1.0;", dstRegister, SWIZZLES[i]); 157 | } 158 | } 159 | } 160 | 161 | void ShaderRecompiler::recompile(const VertexFetchInstruction& instr, uint32_t address) 162 | { 163 | if (instr.isPredicated) 164 | { 165 | indent(); 166 | println("if ({}p0)", instr.predicateCondition ? "" : "!"); 167 | 168 | indent(); 169 | out += "{\n"; 170 | ++indentation; 171 | } 172 | 173 | indent(); 174 | print("r{}.", instr.dstRegister); 175 | printDstSwizzle(instr.dstSwizzle, false); 176 | 177 | out += " = "; 178 | 179 | auto findResult = vertexElements.find(address); 180 | assert(findResult != vertexElements.end()); 181 | 182 | switch (findResult->second.usage) 183 | { 184 | case DeclUsage::Normal: 185 | case DeclUsage::Tangent: 186 | case DeclUsage::Binormal: 187 | specConstantsMask |= SPEC_CONSTANT_R11G11B10_NORMAL; 188 | print("tfetchR11G11B10("); 189 | break; 190 | 191 | case DeclUsage::TexCoord: 192 | print("tfetchTexcoord(g_SwappedTexcoords, "); 193 | break; 194 | } 195 | 196 | print("i{}{}", USAGE_VARIABLES[uint32_t(findResult->second.usage)], uint32_t(findResult->second.usageIndex)); 197 | 198 | switch (findResult->second.usage) 199 | { 200 | case DeclUsage::Normal: 201 | case DeclUsage::Tangent: 202 | case DeclUsage::Binormal: 203 | out += ')'; 204 | break; 205 | 206 | case DeclUsage::TexCoord: 207 | print(", {})", uint32_t(findResult->second.usageIndex)); 208 | break; 209 | } 210 | 211 | out += '.'; 212 | printDstSwizzle(instr.dstSwizzle, true); 213 | 214 | out += ";\n"; 215 | 216 | printDstSwizzle01(instr.dstRegister, instr.dstSwizzle); 217 | 218 | if (instr.isPredicated) 219 | { 220 | --indentation; 221 | indent(); 222 | out += "}\n"; 223 | } 224 | } 225 | 226 | void ShaderRecompiler::recompile(const TextureFetchInstruction& instr, bool bicubic) 227 | { 228 | if (instr.opcode != FetchOpcode::TextureFetch && instr.opcode != FetchOpcode::GetTextureWeights) 229 | return; 230 | 231 | if (instr.isPredicated) 232 | { 233 | indent(); 234 | println("if ({}p0)", instr.predCondition ? "" : "!"); 235 | 236 | indent(); 237 | out += "{\n"; 238 | ++indentation; 239 | } 240 | 241 | auto printSrcRegister = [&](size_t componentCount) 242 | { 243 | print("r{}.", instr.srcRegister); 244 | 245 | for (size_t i = 0; i < componentCount; i++) 246 | out += SWIZZLES[((instr.srcSwizzle >> (i * 2))) & 0x3]; 247 | }; 248 | 249 | std::string constName; 250 | const char* constNamePtr = nullptr; 251 | #ifdef UNLEASHED_RECOMP 252 | bool subtractFromOne = false; 253 | #endif 254 | 255 | auto findResult = samplers.find(instr.constIndex); 256 | if (findResult != samplers.end()) 257 | { 258 | constNamePtr = findResult->second; 259 | 260 | #ifdef UNLEASHED_RECOMP 261 | subtractFromOne = hasMtxPrevInvViewProjection && strcmp(constNamePtr, "sampZBuffer") == 0; 262 | #endif 263 | } 264 | else 265 | { 266 | constName = fmt::format("s{}", instr.constIndex); 267 | constNamePtr = constName.c_str(); 268 | } 269 | 270 | #ifdef UNLEASHED_RECOMP 271 | if (instr.constIndex == 0 && instr.dimension == TextureDimension::Texture2D) 272 | { 273 | indent(); 274 | print("pixelCoord = getPixelCoord({}_Texture2DDescriptorIndex, ", constNamePtr); 275 | printSrcRegister(2); 276 | out += ");\n"; 277 | } 278 | #endif 279 | 280 | indent(); 281 | print("r{}.", instr.dstRegister); 282 | printDstSwizzle(instr.dstSwizzle, false); 283 | 284 | out += " = "; 285 | switch (instr.opcode) 286 | { 287 | case FetchOpcode::TextureFetch: 288 | { 289 | #ifdef UNLEASHED_RECOMP 290 | if (subtractFromOne) 291 | out += "1.0 - "; 292 | #endif 293 | 294 | out += "tfetch"; 295 | break; 296 | } 297 | case FetchOpcode::GetTextureWeights: 298 | { 299 | out += "getWeights"; 300 | break; 301 | } 302 | } 303 | 304 | std::string_view dimension; 305 | uint32_t componentCount = 0; 306 | 307 | switch (instr.dimension) 308 | { 309 | case TextureDimension::Texture1D: 310 | dimension = "1D"; 311 | componentCount = 1; 312 | break; 313 | case TextureDimension::Texture2D: 314 | dimension = "2D"; 315 | componentCount = 2; 316 | break; 317 | case TextureDimension::Texture3D: 318 | dimension = "3D"; 319 | componentCount = 3; 320 | break; 321 | case TextureDimension::TextureCube: 322 | dimension = "Cube"; 323 | componentCount = 3; 324 | break; 325 | } 326 | 327 | out += dimension; 328 | 329 | #ifdef UNLEASHED_RECOMP 330 | if (bicubic) 331 | out += "Bicubic"; 332 | #endif 333 | 334 | print("({0}_Texture{1}DescriptorIndex, {0}_SamplerDescriptorIndex, ", constNamePtr, dimension); 335 | printSrcRegister(componentCount); 336 | 337 | switch (instr.dimension) 338 | { 339 | case TextureDimension::Texture2D: 340 | print(", float2({}, {})", instr.offsetX * 0.5f, instr.offsetY * 0.5f); 341 | break; 342 | case TextureDimension::TextureCube: 343 | out += ", cubeMapData"; 344 | break; 345 | } 346 | 347 | out += ")."; 348 | 349 | printDstSwizzle(instr.dstSwizzle, true); 350 | 351 | out += ";\n"; 352 | 353 | printDstSwizzle01(instr.dstRegister, instr.dstSwizzle); 354 | 355 | if (instr.isPredicated) 356 | { 357 | --indentation; 358 | indent(); 359 | out += "}\n"; 360 | } 361 | } 362 | 363 | void ShaderRecompiler::recompile(const AluInstruction& instr) 364 | { 365 | if (instr.isPredicated) 366 | { 367 | indent(); 368 | println("if ({}p0)", instr.predicateCondition ? "" : "!"); 369 | 370 | indent(); 371 | out += "{\n"; 372 | ++indentation; 373 | } 374 | 375 | enum 376 | { 377 | VECTOR_0, 378 | VECTOR_1, 379 | VECTOR_2, 380 | SCALAR_0, 381 | SCALAR_1, 382 | SCALAR_CONSTANT_0, 383 | SCALAR_CONSTANT_1 384 | }; 385 | 386 | auto op = [&](size_t operand) 387 | { 388 | size_t reg = 0; 389 | size_t swizzle = 0; 390 | bool select = true; 391 | bool negate = false; 392 | bool abs = false; 393 | 394 | switch (operand) 395 | { 396 | case SCALAR_CONSTANT_0: 397 | reg = instr.src3Register; 398 | swizzle = instr.src3Swizzle; 399 | select = false; 400 | negate = instr.src3Negate; 401 | abs = instr.absConstants; 402 | break; 403 | 404 | case SCALAR_CONSTANT_1: 405 | reg = (uint32_t(instr.scalarOpcode) & 1) | (instr.src3Select << 1) | (instr.src3Swizzle & 0x3C); 406 | swizzle = instr.src3Swizzle; 407 | select = true; 408 | negate = instr.src3Negate; 409 | abs = instr.absConstants; 410 | break; 411 | 412 | default: 413 | switch (operand) 414 | { 415 | case VECTOR_0: 416 | reg = instr.src1Register; 417 | swizzle = instr.src1Swizzle; 418 | select = instr.src1Select; 419 | negate = instr.src1Negate; 420 | break; 421 | case VECTOR_1: 422 | reg = instr.src2Register; 423 | swizzle = instr.src2Swizzle; 424 | select = instr.src2Select; 425 | negate = instr.src2Negate; 426 | break; 427 | case VECTOR_2: 428 | case SCALAR_0: 429 | case SCALAR_1: 430 | reg = instr.src3Register; 431 | swizzle = instr.src3Swizzle; 432 | select = instr.src3Select; 433 | negate = instr.src3Negate; 434 | break; 435 | } 436 | 437 | if (select) 438 | { 439 | abs = (reg & 0x80) != 0; 440 | reg &= 0x3F; 441 | } 442 | else 443 | { 444 | abs = instr.absConstants; 445 | } 446 | 447 | break; 448 | } 449 | 450 | std::string regFormatted; 451 | 452 | if (select) 453 | { 454 | regFormatted = fmt::format("r{}", reg); 455 | } 456 | else 457 | { 458 | auto findResult = float4Constants.find(reg); 459 | if (findResult != float4Constants.end()) 460 | { 461 | const char* constantName = reinterpret_cast(constantTableData + findResult->second->name); 462 | if (findResult->second->registerCount > 1) 463 | { 464 | #ifdef UNLEASHED_RECOMP 465 | if (hasMtxProjection && strcmp(constantName, "g_MtxProjection") == 0) 466 | { 467 | regFormatted = fmt::format("(iterationIndex == 0 ? mtxProjectionReverseZ[{0}] : mtxProjection[{0}])", 468 | reg - findResult->second->registerIndex); 469 | } 470 | else 471 | #endif 472 | { 473 | regFormatted = fmt::format("{}({}{})", constantName, 474 | reg - findResult->second->registerIndex, instr.const0Relative ? (instr.constAddressRegisterRelative ? " + a0" : " + aL") : ""); 475 | } 476 | } 477 | else 478 | { 479 | assert(!instr.const0Relative && !instr.const1Relative); 480 | regFormatted = constantName; 481 | } 482 | } 483 | else 484 | { 485 | assert(!instr.const0Relative && !instr.const1Relative); 486 | regFormatted = fmt::format("c{}", reg); 487 | } 488 | } 489 | 490 | std::string result; 491 | 492 | if (negate) 493 | result += '-'; 494 | 495 | if (abs) 496 | result += "abs("; 497 | 498 | result += regFormatted; 499 | result += '.'; 500 | 501 | switch (operand) 502 | { 503 | case VECTOR_0: 504 | case VECTOR_1: 505 | case VECTOR_2: 506 | { 507 | uint32_t mask; 508 | 509 | switch (instr.vectorOpcode) 510 | { 511 | case AluVectorOpcode::Dp2Add: 512 | mask = (operand == VECTOR_2) ? 0b1 : 0b11; 513 | break; 514 | 515 | case AluVectorOpcode::Dp3: 516 | mask = 0b111; 517 | break; 518 | 519 | case AluVectorOpcode::Dp4: 520 | case AluVectorOpcode::Max4: 521 | mask = 0b1111; 522 | break; 523 | 524 | default: 525 | mask = instr.vectorWriteMask != 0 ? instr.vectorWriteMask : 0b1; 526 | break; 527 | } 528 | 529 | for (size_t i = 0; i < 4; i++) 530 | { 531 | if ((mask >> i) & 0x1) 532 | result += SWIZZLES[((swizzle >> (i * 2)) + i) & 0x3]; 533 | } 534 | 535 | break; 536 | } 537 | 538 | case SCALAR_0: 539 | case SCALAR_CONSTANT_0: 540 | result += SWIZZLES[((swizzle >> 6) + 3) & 0x3]; 541 | break; 542 | 543 | case SCALAR_1: 544 | case SCALAR_CONSTANT_1: 545 | result += SWIZZLES[swizzle & 0x3]; 546 | break; 547 | } 548 | 549 | if (abs) 550 | result += ")"; 551 | 552 | return result; 553 | }; 554 | 555 | switch (instr.vectorOpcode) 556 | { 557 | case AluVectorOpcode::KillEq: 558 | indent(); 559 | println("clip(any({} == {}) ? -1 : 1);", op(VECTOR_0), op(VECTOR_1)); 560 | break; 561 | 562 | case AluVectorOpcode::KillGt: 563 | indent(); 564 | println("clip(any({} > {}) ? -1 : 1);", op(VECTOR_0), op(VECTOR_1)); 565 | break; 566 | 567 | case AluVectorOpcode::KillGe: 568 | indent(); 569 | println("clip(any({} >= {}) ? -1 : 1);", op(VECTOR_0), op(VECTOR_1)); 570 | break; 571 | 572 | case AluVectorOpcode::KillNe: 573 | indent(); 574 | println("clip(any({} != {}) ? -1 : 1);", op(VECTOR_0), op(VECTOR_1)); 575 | break; 576 | } 577 | 578 | bool closeIfBracket = false; 579 | 580 | std::string_view exportRegister; 581 | if (instr.exportData) 582 | { 583 | if (isPixelShader) 584 | { 585 | switch (ExportRegister(instr.vectorDest)) 586 | { 587 | case ExportRegister::PSColor0: 588 | exportRegister = "oC0"; 589 | break; 590 | case ExportRegister::PSColor1: 591 | exportRegister = "oC1"; 592 | break; 593 | case ExportRegister::PSColor2: 594 | exportRegister = "oC2"; 595 | break; 596 | case ExportRegister::PSColor3: 597 | exportRegister = "oC3"; 598 | break; 599 | case ExportRegister::PSDepth: 600 | exportRegister = "oDepth"; 601 | break; 602 | } 603 | } 604 | else 605 | { 606 | switch (ExportRegister(instr.vectorDest)) 607 | { 608 | case ExportRegister::VSPosition: 609 | exportRegister = "oPos"; 610 | 611 | #ifdef UNLEASHED_RECOMP 612 | if (hasMtxProjection) 613 | { 614 | indent(); 615 | out += "if ((g_SpecConstants() & SPEC_CONSTANT_REVERSE_Z) == 0 || iterationIndex == 0)\n"; 616 | indent(); 617 | out += "{\n"; 618 | ++indentation; 619 | 620 | closeIfBracket = true; 621 | } 622 | #endif 623 | 624 | break; 625 | 626 | default: 627 | { 628 | auto findResult = interpolators.find(instr.vectorDest); 629 | assert(findResult != interpolators.end()); 630 | exportRegister = findResult->second; 631 | break; 632 | } 633 | } 634 | } 635 | } 636 | 637 | if (instr.vectorOpcode >= AluVectorOpcode::SetpEqPush && instr.vectorOpcode <= AluVectorOpcode::SetpGePush) 638 | { 639 | indent(); 640 | print("p0 = {} == 0.0 && {} ", op(VECTOR_0), op(VECTOR_1)); 641 | 642 | switch (instr.vectorOpcode) 643 | { 644 | case AluVectorOpcode::SetpEqPush: 645 | out += "=="; 646 | break; 647 | case AluVectorOpcode::SetpNePush: 648 | out += "!="; 649 | break; 650 | case AluVectorOpcode::SetpGtPush: 651 | out += ">"; 652 | break; 653 | case AluVectorOpcode::SetpGePush: 654 | out += ">="; 655 | break; 656 | } 657 | 658 | out += " 0.0;\n"; 659 | } 660 | else if (instr.vectorOpcode >= AluVectorOpcode::MaxA) 661 | { 662 | indent(); 663 | println("a0 = (int)clamp(floor(({}).w + 0.5), -256.0, 255.0);", op(VECTOR_0)); 664 | } 665 | 666 | uint32_t vectorWriteMask = instr.vectorWriteMask; 667 | if (instr.exportData) 668 | vectorWriteMask &= ~instr.scalarWriteMask; 669 | 670 | if (vectorWriteMask != 0) 671 | { 672 | indent(); 673 | if (!exportRegister.empty()) 674 | { 675 | out += exportRegister; 676 | out += '.'; 677 | } 678 | else 679 | { 680 | print("r{}.", instr.vectorDest); 681 | } 682 | 683 | for (size_t i = 0; i < 4; i++) 684 | { 685 | if ((vectorWriteMask >> i) & 0x1) 686 | out += SWIZZLES[i]; 687 | } 688 | 689 | out += " = "; 690 | 691 | if (instr.vectorSaturate) 692 | out += "saturate("; 693 | 694 | switch (instr.vectorOpcode) 695 | { 696 | case AluVectorOpcode::Add: 697 | print("{} + {}", op(VECTOR_0), op(VECTOR_1)); 698 | break; 699 | 700 | case AluVectorOpcode::Mul: 701 | print("{} * {}", op(VECTOR_0), op(VECTOR_1)); 702 | break; 703 | 704 | case AluVectorOpcode::Max: 705 | case AluVectorOpcode::MaxA: 706 | print("max({}, {})", op(VECTOR_0), op(VECTOR_1)); 707 | break; 708 | 709 | case AluVectorOpcode::Min: 710 | print("min({}, {})", op(VECTOR_0), op(VECTOR_1)); 711 | break; 712 | 713 | case AluVectorOpcode::Seq: 714 | print("{} == {}", op(VECTOR_0), op(VECTOR_1)); 715 | break; 716 | 717 | case AluVectorOpcode::Sgt: 718 | print("{} > {}", op(VECTOR_0), op(VECTOR_1)); 719 | break; 720 | 721 | case AluVectorOpcode::Sge: 722 | print("{} >= {}", op(VECTOR_0), op(VECTOR_1)); 723 | break; 724 | 725 | case AluVectorOpcode::Sne: 726 | print("{} != {}", op(VECTOR_0), op(VECTOR_1)); 727 | break; 728 | 729 | case AluVectorOpcode::Frc: 730 | print("frac({})", op(VECTOR_0)); 731 | break; 732 | 733 | case AluVectorOpcode::Trunc: 734 | print("trunc({})", op(VECTOR_0)); 735 | break; 736 | 737 | case AluVectorOpcode::Floor: 738 | print("floor({})", op(VECTOR_0)); 739 | break; 740 | 741 | case AluVectorOpcode::Mad: 742 | print("{} * {} + {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); 743 | break; 744 | 745 | case AluVectorOpcode::CndEq: 746 | print("select({} == 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); 747 | break; 748 | 749 | case AluVectorOpcode::CndGe: 750 | print("select({} >= 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); 751 | break; 752 | 753 | case AluVectorOpcode::CndGt: 754 | print("select({} > 0.0, {}, {})", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); 755 | break; 756 | 757 | case AluVectorOpcode::Dp4: 758 | case AluVectorOpcode::Dp3: 759 | print("dot({}, {})", op(VECTOR_0), op(VECTOR_1)); 760 | break; 761 | 762 | case AluVectorOpcode::Dp2Add: 763 | print("dot({}, {}) + {}", op(VECTOR_0), op(VECTOR_1), op(VECTOR_2)); 764 | break; 765 | 766 | case AluVectorOpcode::Cube: 767 | print("cube(r{}, cubeMapData)", instr.src1Register); 768 | break; 769 | 770 | case AluVectorOpcode::Max4: 771 | print("max4({})", op(VECTOR_0)); 772 | break; 773 | 774 | case AluVectorOpcode::SetpEqPush: 775 | case AluVectorOpcode::SetpNePush: 776 | case AluVectorOpcode::SetpGtPush: 777 | case AluVectorOpcode::SetpGePush: 778 | print("p0 ? 0.0 : {} + 1.0", op(VECTOR_0)); 779 | break; 780 | 781 | case AluVectorOpcode::KillEq: 782 | print("any({} == {})", op(VECTOR_0), op(VECTOR_1)); 783 | break; 784 | 785 | case AluVectorOpcode::KillGt: 786 | print("any({} > {})", op(VECTOR_0), op(VECTOR_1)); 787 | break; 788 | 789 | case AluVectorOpcode::KillGe: 790 | print("any({} >= {})", op(VECTOR_0), op(VECTOR_1)); 791 | break; 792 | 793 | case AluVectorOpcode::KillNe: 794 | print("any({} != {})", op(VECTOR_0), op(VECTOR_1)); 795 | break; 796 | 797 | case AluVectorOpcode::Dst: 798 | print("dst({}, {})", op(VECTOR_0), op(VECTOR_1)); 799 | break; 800 | } 801 | 802 | if (instr.vectorSaturate) 803 | out += ')'; 804 | 805 | out += ";\n"; 806 | } 807 | 808 | if (instr.scalarOpcode != AluScalarOpcode::RetainPrev) 809 | { 810 | if (instr.scalarOpcode >= AluScalarOpcode::SetpEq && instr.scalarOpcode <= AluScalarOpcode::SetpRstr) 811 | { 812 | indent(); 813 | out += "p0 = "; 814 | 815 | switch (instr.scalarOpcode) 816 | { 817 | case AluScalarOpcode::SetpEq: 818 | print("{} == 0.0", op(SCALAR_0)); 819 | break; 820 | 821 | case AluScalarOpcode::SetpNe: 822 | print("{} != 0.0", op(SCALAR_0)); 823 | break; 824 | 825 | case AluScalarOpcode::SetpGt: 826 | print("{} > 0.0", op(SCALAR_0)); 827 | break; 828 | 829 | case AluScalarOpcode::SetpGe: 830 | print("{} >= 0.0", op(SCALAR_0)); 831 | break; 832 | 833 | case AluScalarOpcode::SetpInv: 834 | print("{} == 1.0", op(SCALAR_0)); 835 | break; 836 | 837 | case AluScalarOpcode::SetpPop: 838 | print("{} - 1.0 <= 0.0", op(SCALAR_0)); 839 | break; 840 | 841 | case AluScalarOpcode::SetpClr: 842 | out += "false"; 843 | break; 844 | 845 | case AluScalarOpcode::SetpRstr: 846 | print("{} == 0.0", op(SCALAR_0)); 847 | break; 848 | } 849 | 850 | out += ";\n"; 851 | } 852 | 853 | indent(); 854 | out += "ps = "; 855 | if (instr.scalarSaturate) 856 | out += "saturate("; 857 | 858 | switch (instr.scalarOpcode) 859 | { 860 | case AluScalarOpcode::Adds: 861 | print("{} + {}", op(SCALAR_0), op(SCALAR_1)); 862 | break; 863 | 864 | case AluScalarOpcode::AddsPrev: 865 | print("{} + ps", op(SCALAR_0)); 866 | break; 867 | 868 | case AluScalarOpcode::Muls: 869 | print("{} * {}", op(SCALAR_0), op(SCALAR_1)); 870 | break; 871 | 872 | case AluScalarOpcode::MulsPrev: 873 | case AluScalarOpcode::MulsPrev2: 874 | print("{} * ps", op(SCALAR_0)); 875 | break; 876 | 877 | case AluScalarOpcode::Maxs: 878 | case AluScalarOpcode::MaxAs: 879 | case AluScalarOpcode::MaxAsf: 880 | print("max({}, {})", op(SCALAR_0), op(SCALAR_1)); 881 | break; 882 | 883 | case AluScalarOpcode::Mins: 884 | print("min({}, {})", op(SCALAR_0), op(SCALAR_1)); 885 | break; 886 | 887 | case AluScalarOpcode::Seqs: 888 | print("{} == 0.0", op(SCALAR_0)); 889 | break; 890 | 891 | case AluScalarOpcode::Sgts: 892 | print("{} > 0.0", op(SCALAR_0)); 893 | break; 894 | 895 | case AluScalarOpcode::Sges: 896 | print("{} >= 0.0", op(SCALAR_0)); 897 | break; 898 | 899 | case AluScalarOpcode::Snes: 900 | print("{} != 0.0", op(SCALAR_0)); 901 | break; 902 | 903 | case AluScalarOpcode::Frcs: 904 | print("frac({})", op(SCALAR_0)); 905 | break; 906 | 907 | case AluScalarOpcode::Truncs: 908 | print("trunc({})", op(SCALAR_0)); 909 | break; 910 | 911 | case AluScalarOpcode::Floors: 912 | print("floor({})", op(SCALAR_0)); 913 | break; 914 | 915 | case AluScalarOpcode::Exp: 916 | print("exp2({})", op(SCALAR_0)); 917 | break; 918 | 919 | case AluScalarOpcode::Logc: 920 | case AluScalarOpcode::Log: 921 | print("clamp(log2({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); 922 | break; 923 | 924 | case AluScalarOpcode::Rcpc: 925 | case AluScalarOpcode::Rcpf: 926 | case AluScalarOpcode::Rcp: 927 | print("clamp(rcp({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); 928 | break; 929 | 930 | case AluScalarOpcode::Rsqc: 931 | case AluScalarOpcode::Rsqf: 932 | case AluScalarOpcode::Rsq: 933 | print("clamp(rsqrt({}), FLT_MIN, FLT_MAX)", op(SCALAR_0)); 934 | break; 935 | 936 | case AluScalarOpcode::Subs: 937 | print("{} - {}", op(SCALAR_0), op(SCALAR_1)); 938 | break; 939 | 940 | case AluScalarOpcode::SubsPrev: 941 | print("{} - ps", op(SCALAR_0)); 942 | break; 943 | 944 | case AluScalarOpcode::SetpEq: 945 | case AluScalarOpcode::SetpNe: 946 | case AluScalarOpcode::SetpGt: 947 | case AluScalarOpcode::SetpGe: 948 | out += "p0 ? 0.0 : 1.0"; 949 | break; 950 | 951 | case AluScalarOpcode::SetpInv: 952 | print("{0} == 0.0 ? 1.0 : {0}", op(SCALAR_0)); 953 | break; 954 | 955 | case AluScalarOpcode::SetpPop: 956 | print("p0 ? 0.0 : ({} - 1.0)", op(SCALAR_0)); 957 | break; 958 | 959 | case AluScalarOpcode::SetpClr: 960 | out += "FLT_MAX"; 961 | break; 962 | 963 | case AluScalarOpcode::SetpRstr: 964 | print("p0 ? 0.0 : {}", op(SCALAR_0)); 965 | break; 966 | 967 | case AluScalarOpcode::KillsEq: 968 | print("{} == 0.0", op(SCALAR_0)); 969 | break; 970 | 971 | case AluScalarOpcode::KillsGt: 972 | print("{} > 0.0", op(SCALAR_0)); 973 | break; 974 | 975 | case AluScalarOpcode::KillsGe: 976 | print("{} >= 0.0", op(SCALAR_0)); 977 | break; 978 | 979 | case AluScalarOpcode::KillsNe: 980 | print("{} != 0.0", op(SCALAR_0)); 981 | break; 982 | 983 | case AluScalarOpcode::KillsOne: 984 | print("{} == 1.0", op(SCALAR_0)); 985 | break; 986 | 987 | case AluScalarOpcode::Sqrt: 988 | print("sqrt({})", op(SCALAR_0)); 989 | break; 990 | 991 | case AluScalarOpcode::Mulsc0: 992 | case AluScalarOpcode::Mulsc1: 993 | print("{} * {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); 994 | break; 995 | 996 | case AluScalarOpcode::Addsc0: 997 | case AluScalarOpcode::Addsc1: 998 | print("{} + {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); 999 | break; 1000 | 1001 | case AluScalarOpcode::Subsc0: 1002 | case AluScalarOpcode::Subsc1: 1003 | print("{} - {}", op(SCALAR_CONSTANT_0), op(SCALAR_CONSTANT_1)); 1004 | break; 1005 | 1006 | case AluScalarOpcode::Sin: 1007 | print("sin({})", op(SCALAR_0)); 1008 | break; 1009 | 1010 | case AluScalarOpcode::Cos: 1011 | print("cos({})", op(SCALAR_0)); 1012 | break; 1013 | } 1014 | 1015 | if (instr.scalarSaturate) 1016 | out += ')'; 1017 | 1018 | out += ";\n"; 1019 | 1020 | switch (instr.scalarOpcode) 1021 | { 1022 | case AluScalarOpcode::MaxAs: 1023 | indent(); 1024 | println("a0 = (int)clamp(floor({} + 0.5), -256.0, 255.0);", op(SCALAR_0)); 1025 | break; 1026 | case AluScalarOpcode::MaxAsf: 1027 | indent(); 1028 | println("a0 = (int)clamp(floor({}), -256.0, 255.0);", op(SCALAR_0)); 1029 | break; 1030 | } 1031 | } 1032 | 1033 | uint32_t scalarWriteMask = instr.scalarWriteMask; 1034 | if (instr.exportData) 1035 | scalarWriteMask &= ~instr.vectorWriteMask; 1036 | 1037 | if (scalarWriteMask != 0) 1038 | { 1039 | indent(); 1040 | if (!exportRegister.empty()) 1041 | { 1042 | out += exportRegister; 1043 | out += '.'; 1044 | } 1045 | else 1046 | { 1047 | print("r{}.", instr.scalarDest); 1048 | } 1049 | 1050 | for (size_t i = 0; i < 4; i++) 1051 | { 1052 | if ((scalarWriteMask >> i) & 0x1) 1053 | out += SWIZZLES[i]; 1054 | } 1055 | 1056 | out += " = ps;\n"; 1057 | } 1058 | 1059 | if (instr.exportData) 1060 | { 1061 | uint32_t zeroMask = instr.scalarDestRelative ? (0b1111 & ~(instr.vectorWriteMask | instr.scalarWriteMask)) : 0; 1062 | uint32_t oneMask = instr.vectorWriteMask & instr.scalarWriteMask; 1063 | 1064 | for (size_t i = 0; i < 4; i++) 1065 | { 1066 | uint32_t mask = 1 << i; 1067 | if (zeroMask & mask) 1068 | { 1069 | indent(); 1070 | println("{}.{} = 0.0;", exportRegister, SWIZZLES[i]); 1071 | } 1072 | else if (oneMask & mask) 1073 | { 1074 | indent(); 1075 | println("{}.{} = 1.0;", exportRegister, SWIZZLES[i]); 1076 | } 1077 | } 1078 | } 1079 | 1080 | if (instr.scalarOpcode >= AluScalarOpcode::KillsEq && instr.scalarOpcode <= AluScalarOpcode::KillsOne) 1081 | { 1082 | indent(); 1083 | out += "clip(ps != 0.0 ? -1 : 1);\n"; 1084 | } 1085 | 1086 | if (closeIfBracket) 1087 | { 1088 | --indentation; 1089 | indent(); 1090 | out += "}\n"; 1091 | } 1092 | 1093 | if (instr.isPredicated) 1094 | { 1095 | --indentation; 1096 | indent(); 1097 | out += "}\n"; 1098 | } 1099 | } 1100 | 1101 | void ShaderRecompiler::recompile(const uint8_t* shaderData, const std::string_view& include) 1102 | { 1103 | const auto shaderContainer = reinterpret_cast(shaderData); 1104 | 1105 | assert((shaderContainer->flags & 0xFFFFFF00) == 0x102A1100); 1106 | assert(shaderContainer->constantTableOffset != NULL); 1107 | 1108 | out += include; 1109 | out += '\n'; 1110 | 1111 | isPixelShader = (shaderContainer->flags & 0x1) == 0; 1112 | 1113 | const auto constantTableContainer = reinterpret_cast(shaderData + shaderContainer->constantTableOffset); 1114 | constantTableData = reinterpret_cast(&constantTableContainer->constantTable); 1115 | 1116 | out += "#ifdef __spirv__\n\n"; 1117 | 1118 | #ifdef UNLEASHED_RECOMP 1119 | bool isMetaInstancer = false; 1120 | bool hasIndexCount = false; 1121 | #endif 1122 | 1123 | for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++) 1124 | { 1125 | const auto constantInfo = reinterpret_cast( 1126 | constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo)); 1127 | 1128 | const char* constantName = reinterpret_cast(constantTableData + constantInfo->name); 1129 | 1130 | #ifdef UNLEASHED_RECOMP 1131 | if (!isPixelShader) 1132 | { 1133 | if (strcmp(constantName, "g_MtxProjection") == 0) 1134 | hasMtxProjection = true; 1135 | else if (strcmp(constantName, "g_InstanceTypes") == 0) 1136 | isMetaInstancer = true; 1137 | else if (strcmp(constantName, "g_IndexCount") == 0) 1138 | hasIndexCount = true; 1139 | } 1140 | else 1141 | { 1142 | if (strcmp(constantName, "g_MtxPrevInvViewProjection") == 0) 1143 | hasMtxPrevInvViewProjection = true; 1144 | } 1145 | #endif 1146 | 1147 | switch (constantInfo->registerSet) 1148 | { 1149 | case RegisterSet::Float4: 1150 | { 1151 | const char* shaderName = isPixelShader ? "Pixel" : "Vertex"; 1152 | 1153 | if (constantInfo->registerCount > 1) 1154 | { 1155 | uint32_t tailCount = (isPixelShader ? 224 : 256) - constantInfo->registerIndex; 1156 | 1157 | println("#define {}(INDEX) select((INDEX) < {}, vk::RawBufferLoad(g_PushConstants.{}ShaderConstants + ({} + min(INDEX, {})) * 16, 0x10), 0.0)", 1158 | constantName, tailCount, shaderName, constantInfo->registerIndex.get(), tailCount - 1); 1159 | } 1160 | else 1161 | { 1162 | println("#define {} vk::RawBufferLoad(g_PushConstants.{}ShaderConstants + {}, 0x10)", 1163 | constantName, shaderName, constantInfo->registerIndex * 16); 1164 | } 1165 | 1166 | for (uint16_t j = 0; j < constantInfo->registerCount; j++) 1167 | float4Constants.emplace(constantInfo->registerIndex + j, constantInfo); 1168 | 1169 | break; 1170 | } 1171 | 1172 | case RegisterSet::Sampler: 1173 | { 1174 | for (size_t j = 0; j < std::size(TEXTURE_DIMENSIONS); j++) 1175 | { 1176 | println("#define {}_Texture{}DescriptorIndex vk::RawBufferLoad(g_PushConstants.SharedConstants + {})", 1177 | constantName, TEXTURE_DIMENSIONS[j], j * 64 + constantInfo->registerIndex * 4); 1178 | } 1179 | 1180 | println("#define {}_SamplerDescriptorIndex vk::RawBufferLoad(g_PushConstants.SharedConstants + {})", 1181 | constantName, std::size(TEXTURE_DIMENSIONS) * 64 + constantInfo->registerIndex * 4); 1182 | 1183 | samplers.emplace(constantInfo->registerIndex, constantName); 1184 | break; 1185 | } 1186 | 1187 | } 1188 | } 1189 | 1190 | out += "\n#else\n\n"; 1191 | 1192 | println("cbuffer {}ShaderConstants : register(b{}, space4)", isPixelShader ? "Pixel" : "Vertex", isPixelShader ? 1 : 0); 1193 | out += "{\n"; 1194 | 1195 | for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++) 1196 | { 1197 | const auto constantInfo = reinterpret_cast( 1198 | constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo)); 1199 | 1200 | if (constantInfo->registerSet == RegisterSet::Float4) 1201 | { 1202 | const char* constantName = reinterpret_cast(constantTableData + constantInfo->name); 1203 | 1204 | print("\tfloat4 {}", constantName); 1205 | 1206 | if (constantInfo->registerCount > 1) 1207 | print("[{}]", constantInfo->registerCount.get()); 1208 | 1209 | println(" : packoffset(c{});", constantInfo->registerIndex.get()); 1210 | 1211 | if (constantInfo->registerCount > 1) 1212 | { 1213 | uint32_t tailCount = (isPixelShader ? 224 : 256) - constantInfo->registerIndex; 1214 | println("#define {0}(INDEX) select((INDEX) < {1}, {0}[min(INDEX, {2})], 0.0)", constantName, tailCount, tailCount - 1); 1215 | } 1216 | } 1217 | } 1218 | 1219 | out += "};\n\n"; 1220 | 1221 | out += "cbuffer SharedConstants : register(b2, space4)\n"; 1222 | out += "{\n"; 1223 | 1224 | for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++) 1225 | { 1226 | const auto constantInfo = reinterpret_cast( 1227 | constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo)); 1228 | 1229 | if (constantInfo->registerSet == RegisterSet::Sampler) 1230 | { 1231 | const char* constantName = reinterpret_cast(constantTableData + constantInfo->name); 1232 | 1233 | for (size_t j = 0; j < std::size(TEXTURE_DIMENSIONS); j++) 1234 | { 1235 | println("\tuint {}_Texture{}DescriptorIndex : packoffset(c{}.{});", 1236 | constantName, TEXTURE_DIMENSIONS[j], j * 4 + constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]); 1237 | } 1238 | 1239 | println("\tuint {}_SamplerDescriptorIndex : packoffset(c{}.{});", 1240 | constantName, 4 * std::size(TEXTURE_DIMENSIONS) + constantInfo->registerIndex / 4, SWIZZLES[constantInfo->registerIndex % 4]); 1241 | } 1242 | } 1243 | 1244 | out += "\tDEFINE_SHARED_CONSTANTS();\n"; 1245 | out += "};\n\n"; 1246 | 1247 | out += "#endif\n"; 1248 | 1249 | for (uint32_t i = 0; i < constantTableContainer->constantTable.constants; i++) 1250 | { 1251 | const auto constantInfo = reinterpret_cast( 1252 | constantTableData + constantTableContainer->constantTable.constantInfo + i * sizeof(ConstantInfo)); 1253 | 1254 | if (constantInfo->registerSet == RegisterSet::Bool) 1255 | { 1256 | const char* constantName = reinterpret_cast(constantTableData + constantInfo->name); 1257 | println("\t#define {} (1 << {})", constantName, constantInfo->registerIndex + (isPixelShader ? 16 : 0)); 1258 | boolConstants.emplace(constantInfo->registerIndex, constantName); 1259 | } 1260 | } 1261 | 1262 | out += '\n'; 1263 | 1264 | const auto shader = reinterpret_cast(shaderData + shaderContainer->shaderOffset); 1265 | 1266 | out += "#ifndef __spirv__\n"; 1267 | 1268 | if (isPixelShader) 1269 | out += "[shader(\"pixel\")]\n"; 1270 | else 1271 | out += "[shader(\"vertex\")]\n"; 1272 | 1273 | out += "#endif\n"; 1274 | 1275 | out += "void main(\n"; 1276 | 1277 | if (isPixelShader) 1278 | { 1279 | out += "\tin float4 iPos : SV_Position,\n"; 1280 | 1281 | for (auto& [usage, usageIndex] : INTERPOLATORS) 1282 | println("\tin float4 i{0}{1} : {2}{1},", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); 1283 | 1284 | out += "#ifdef __spirv__\n"; 1285 | out += "\tin bool iFace : SV_IsFrontFace\n"; 1286 | out += "#else\n"; 1287 | out += "\tin uint iFace : SV_IsFrontFace\n"; 1288 | out += "#endif\n"; 1289 | 1290 | auto pixelShader = reinterpret_cast(shader); 1291 | if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR0) 1292 | out += ",\n\tout float4 oC0 : SV_Target0"; 1293 | if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR1) 1294 | out += ",\n\tout float4 oC1 : SV_Target1"; 1295 | if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR2) 1296 | out += ",\n\tout float4 oC2 : SV_Target2"; 1297 | if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_COLOR3) 1298 | out += ",\n\tout float4 oC3 : SV_Target3"; 1299 | if (pixelShader->outputs & PIXEL_SHADER_OUTPUT_DEPTH) 1300 | out += ",\n\tout float oDepth : SV_Depth"; 1301 | } 1302 | else 1303 | { 1304 | auto vertexShader = reinterpret_cast(shader); 1305 | for (uint32_t i = 0; i < vertexShader->vertexElementCount; i++) 1306 | { 1307 | union 1308 | { 1309 | VertexElement vertexElement; 1310 | uint32_t value; 1311 | }; 1312 | 1313 | value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + i]; 1314 | 1315 | const char* usageType = USAGE_TYPES[uint32_t(vertexElement.usage)]; 1316 | 1317 | #ifdef UNLEASHED_RECOMP 1318 | if ((vertexElement.usage == DeclUsage::TexCoord && vertexElement.usageIndex == 2 && isMetaInstancer) || 1319 | (vertexElement.usage == DeclUsage::Position && vertexElement.usageIndex == 1)) 1320 | { 1321 | usageType = "uint4"; 1322 | } 1323 | #endif 1324 | 1325 | out += '\t'; 1326 | 1327 | for (auto& usageLocation : USAGE_LOCATIONS) 1328 | { 1329 | if (usageLocation.usage == vertexElement.usage && usageLocation.usageIndex == vertexElement.usageIndex) 1330 | { 1331 | print("[[vk::location({})]] ", usageLocation.location); 1332 | break; 1333 | } 1334 | } 1335 | 1336 | println("in {0} i{1}{2} : {3}{2},", usageType, USAGE_VARIABLES[uint32_t(vertexElement.usage)], 1337 | uint32_t(vertexElement.usageIndex), USAGE_SEMANTICS[uint32_t(vertexElement.usage)]); 1338 | 1339 | vertexElements.emplace(uint32_t(vertexElement.address), vertexElement); 1340 | } 1341 | 1342 | #ifdef UNLEASHED_RECOMP 1343 | if (hasIndexCount) 1344 | { 1345 | out += "\tin uint iVertexId : SV_VertexID,\n"; 1346 | out += "\tin uint iInstanceId : SV_InstanceID,\n"; 1347 | } 1348 | #endif 1349 | 1350 | out += "\tout float4 oPos : SV_Position"; 1351 | 1352 | for (auto& [usage, usageIndex] : INTERPOLATORS) 1353 | print(",\n\tout float4 o{0}{1} : {2}{1}", USAGE_VARIABLES[uint32_t(usage)], usageIndex, USAGE_SEMANTICS[uint32_t(usage)]); 1354 | } 1355 | 1356 | out += ")\n"; 1357 | out += "{\n"; 1358 | 1359 | #ifdef UNLEASHED_RECOMP 1360 | if (hasMtxProjection) 1361 | { 1362 | specConstantsMask |= SPEC_CONSTANT_REVERSE_Z; 1363 | 1364 | out += "\toPos = 0.0;\n"; 1365 | 1366 | out += "\tfloat4x4 mtxProjection = float4x4(g_MtxProjection(0), g_MtxProjection(1), g_MtxProjection(2), g_MtxProjection(3));\n"; 1367 | out += "\tfloat4x4 mtxProjectionReverseZ = mul(mtxProjection, float4x4(1, 0, 0, 0, 0, 1, 0, 0, 0, 0, -1, 0, 0, 0, 1, 1));\n"; 1368 | 1369 | out += "\t[unroll] for (int iterationIndex = 0; iterationIndex < 2; iterationIndex++)\n"; 1370 | out += "\t{\n"; 1371 | } 1372 | #endif 1373 | 1374 | if (shaderContainer->definitionTableOffset != NULL) 1375 | { 1376 | auto definitionTable = reinterpret_cast(shaderData + shaderContainer->definitionTableOffset); 1377 | auto definitions = definitionTable->definitions; 1378 | while (*definitions != 0) 1379 | { 1380 | auto definition = reinterpret_cast(definitions); 1381 | auto value = reinterpret_cast*>(shaderData + shaderContainer->virtualSize + definition->physicalOffset); 1382 | for (uint16_t i = 0; i < (definition->count + 3) / 4; i++) 1383 | { 1384 | println("\tfloat4 c{} = asfloat(uint4(0x{:X}, 0x{:X}, 0x{:X}, 0x{:X}));", 1385 | definition->registerIndex + i - (isPixelShader ? 256 : 0), value[0].get(), value[1].get(), value[2].get(), value[3].get()); 1386 | 1387 | value += 4; 1388 | } 1389 | definitions += 2; 1390 | } 1391 | ++definitions; 1392 | while (*definitions != 0) 1393 | { 1394 | auto definition = reinterpret_cast(definitions); 1395 | for (uint16_t i = 0; i < definition->count; i++) 1396 | { 1397 | union 1398 | { 1399 | uint32_t value; 1400 | struct 1401 | { 1402 | int8_t x; 1403 | int8_t y; 1404 | int8_t z; 1405 | int8_t w; 1406 | }; 1407 | }; 1408 | 1409 | value = definition->values[i].get(); 1410 | 1411 | println("\tint4 i{} = int4({}, {}, {}, {});", 1412 | (definition->registerIndex - 8992) / 4 + i, x, y, z, w); 1413 | } 1414 | definitions += 2; 1415 | definitions += definition->count; 1416 | } 1417 | 1418 | out += "\n"; 1419 | } 1420 | 1421 | bool printedRegisters[32]{}; 1422 | 1423 | uint32_t interpolatorCount = (shader->interpolatorInfo >> 5) & 0x1F; 1424 | 1425 | for (uint32_t i = 0; i < interpolatorCount; i++) 1426 | { 1427 | union 1428 | { 1429 | Interpolator interpolator; 1430 | uint32_t value; 1431 | }; 1432 | 1433 | if (isPixelShader) 1434 | { 1435 | value = reinterpret_cast(shader)->interpolators[i]; 1436 | println("\tfloat4 r{} = i{}{};", uint32_t(interpolator.reg), USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex)); 1437 | printedRegisters[interpolator.reg] = true; 1438 | } 1439 | else 1440 | { 1441 | auto vertexShader = reinterpret_cast(shader); 1442 | value = vertexShader->vertexElementsAndInterpolators[vertexShader->field18 + vertexShader->vertexElementCount + i]; 1443 | interpolators.emplace(i, fmt::format("o{}{}", USAGE_VARIABLES[uint32_t(interpolator.usage)], uint32_t(interpolator.usageIndex))); 1444 | } 1445 | } 1446 | 1447 | if (!isPixelShader) 1448 | { 1449 | #ifdef UNLEASHED_RECOMP 1450 | if (!hasMtxProjection) 1451 | out += "\toPos = 0.0;\n"; 1452 | #endif 1453 | 1454 | for (auto& [usage, usageIndex] : INTERPOLATORS) 1455 | println("\to{}{} = 0.0;", USAGE_VARIABLES[uint32_t(usage)], usageIndex); 1456 | 1457 | out += "\n"; 1458 | } 1459 | 1460 | for (size_t i = 0; i < 32; i++) 1461 | { 1462 | if (!printedRegisters[i]) 1463 | { 1464 | print("\tfloat4 r{} = ", i); 1465 | if (isPixelShader && i == ((shader->fieldC >> 8) & 0xFF)) 1466 | { 1467 | out += "float4((iPos.xy - 0.5) * float2(iFace ? 1.0 : -1.0, 1.0), 0.0, 0.0);\n"; 1468 | } 1469 | #ifdef UNLEASHED_RECOMP 1470 | else if (!isPixelShader && hasIndexCount && i == 0) 1471 | { 1472 | out += "float4(iVertexId + g_IndexCount.x * iInstanceId, 0.0, 0.0, 0.0);\n"; 1473 | } 1474 | #endif 1475 | else 1476 | { 1477 | out += "0.0;\n"; 1478 | } 1479 | } 1480 | } 1481 | 1482 | out += "\tint a0 = 0;\n"; 1483 | out += "\tint aL = 0;\n"; 1484 | out += "\tbool p0 = false;\n"; 1485 | out += "\tfloat ps = 0.0;\n"; 1486 | if (isPixelShader) 1487 | { 1488 | #ifdef UNLEASHED_RECOMP 1489 | out += "\tfloat2 pixelCoord = 0.0;\n"; 1490 | #endif 1491 | out += "\tCubeMapData cubeMapData = (CubeMapData)0;\n"; 1492 | } 1493 | 1494 | const be* code = reinterpret_cast*>(shaderData + shaderContainer->virtualSize + shader->physicalOffset); 1495 | 1496 | union 1497 | { 1498 | ControlFlowInstruction controlFlow[2]; 1499 | struct 1500 | { 1501 | uint32_t code0; 1502 | uint32_t code1; 1503 | uint32_t code2; 1504 | uint32_t code3; 1505 | }; 1506 | }; 1507 | 1508 | auto controlFlowCode = code; 1509 | uint32_t instrAddress = 0; 1510 | uint32_t instrSize = shader->size; 1511 | bool simpleControlFlow = true; 1512 | 1513 | while (instrAddress < instrSize) 1514 | { 1515 | code0 = controlFlowCode[0]; 1516 | code1 = controlFlowCode[1] & 0xFFFF; 1517 | code2 = (controlFlowCode[1] >> 16) | (controlFlowCode[2] << 16); 1518 | code3 = controlFlowCode[2] >> 16; 1519 | 1520 | for (auto& cfInstr : controlFlow) 1521 | { 1522 | uint32_t address = 0; 1523 | 1524 | switch (cfInstr.opcode) 1525 | { 1526 | case ControlFlowOpcode::Exec: 1527 | case ControlFlowOpcode::ExecEnd: 1528 | address = cfInstr.exec.address; 1529 | break; 1530 | 1531 | case ControlFlowOpcode::CondExec: 1532 | case ControlFlowOpcode::CondExecEnd: 1533 | case ControlFlowOpcode::CondExecPredClean: 1534 | case ControlFlowOpcode::CondExecPredCleanEnd: 1535 | address = cfInstr.condExec.address; 1536 | break; 1537 | 1538 | case ControlFlowOpcode::CondExecPred: 1539 | case ControlFlowOpcode::CondExecPredEnd: 1540 | address = cfInstr.condExecPred.address; 1541 | break; 1542 | 1543 | case ControlFlowOpcode::CondJmp: 1544 | { 1545 | if (cfInstr.condJmp.isUnconditional || cfInstr.condJmp.direction) 1546 | simpleControlFlow = false; 1547 | else 1548 | ++ifEndLabels[cfInstr.condJmp.address]; 1549 | 1550 | break; 1551 | } 1552 | } 1553 | 1554 | if (address != 0) 1555 | instrSize = std::min(instrSize, address * 12); 1556 | } 1557 | 1558 | controlFlowCode += 3; 1559 | instrAddress += 12; 1560 | } 1561 | 1562 | if (simpleControlFlow) 1563 | { 1564 | out += '\n'; 1565 | indentation = 1; 1566 | } 1567 | else 1568 | { 1569 | out += "\n\tuint pc = 0;\n"; 1570 | out += "\twhile (true)\n"; 1571 | out += "\t{\n"; 1572 | out += "\t\tswitch (pc)\n"; 1573 | out += "\t\t{\n"; 1574 | } 1575 | 1576 | controlFlowCode = code; 1577 | instrAddress = 0; 1578 | uint32_t pc = 0; 1579 | 1580 | while (instrAddress < instrSize) 1581 | { 1582 | code0 = controlFlowCode[0]; 1583 | code1 = controlFlowCode[1] & 0xFFFF; 1584 | code2 = (controlFlowCode[1] >> 16) | (controlFlowCode[2] << 16); 1585 | code3 = controlFlowCode[2] >> 16; 1586 | 1587 | for (auto& cfInstr : controlFlow) 1588 | { 1589 | if (!simpleControlFlow) 1590 | { 1591 | indentation = 3; 1592 | println("\t\tcase {}:", pc); 1593 | } 1594 | else 1595 | { 1596 | auto findResult = ifEndLabels.find(pc); 1597 | if (findResult != ifEndLabels.end()) 1598 | { 1599 | for (uint32_t i = 0; i < findResult->second; i++) 1600 | { 1601 | --indentation; 1602 | indent(); 1603 | out += "}\n"; 1604 | } 1605 | } 1606 | } 1607 | 1608 | ++pc; 1609 | 1610 | uint32_t address = 0; 1611 | uint32_t count = 0; 1612 | uint32_t sequence = 0; 1613 | bool shouldReturn = false; 1614 | bool shouldCloseCurlyBracket = false; 1615 | 1616 | switch (cfInstr.opcode) 1617 | { 1618 | case ControlFlowOpcode::Exec: 1619 | case ControlFlowOpcode::ExecEnd: 1620 | address = cfInstr.exec.address; 1621 | count = cfInstr.exec.count; 1622 | sequence = cfInstr.exec.sequence; 1623 | shouldReturn = (cfInstr.opcode == ControlFlowOpcode::ExecEnd); 1624 | break; 1625 | 1626 | case ControlFlowOpcode::CondExec: 1627 | case ControlFlowOpcode::CondExecEnd: 1628 | case ControlFlowOpcode::CondExecPredClean: 1629 | case ControlFlowOpcode::CondExecPredCleanEnd: 1630 | address = cfInstr.condExec.address; 1631 | count = cfInstr.condExec.count; 1632 | sequence = cfInstr.condExec.sequence; 1633 | shouldReturn = (cfInstr.opcode == ControlFlowOpcode::CondExecEnd || cfInstr.opcode == ControlFlowOpcode::CondExecEnd); 1634 | break; 1635 | 1636 | case ControlFlowOpcode::CondExecPred: 1637 | case ControlFlowOpcode::CondExecPredEnd: 1638 | address = cfInstr.condExecPred.address; 1639 | count = cfInstr.condExecPred.count; 1640 | sequence = cfInstr.condExecPred.sequence; 1641 | shouldReturn = (cfInstr.opcode == ControlFlowOpcode::CondExecPredEnd); 1642 | break; 1643 | 1644 | case ControlFlowOpcode::LoopStart: 1645 | if (simpleControlFlow) 1646 | { 1647 | indent(); 1648 | #ifdef UNLEASHED_RECOMP 1649 | print("[unroll] "); 1650 | #endif 1651 | println("for (aL = 0; aL < i{}.x; aL++)", uint32_t(cfInstr.loopStart.loopId)); 1652 | indent(); 1653 | out += "{\n"; 1654 | ++indentation; 1655 | } 1656 | else 1657 | { 1658 | out += "\t\t\taL = 0;\n"; 1659 | } 1660 | break; 1661 | 1662 | case ControlFlowOpcode::LoopEnd: 1663 | if (simpleControlFlow) 1664 | { 1665 | --indentation; 1666 | indent(); 1667 | out += "}\n"; 1668 | } 1669 | else 1670 | { 1671 | out += "\t\t\t++aL;\n"; 1672 | println("\t\t\tif (aL < i{}.x)", uint32_t(cfInstr.loopEnd.loopId)); 1673 | out += "\t\t\t{\n"; 1674 | println("\t\t\t\tpc = {};", uint32_t(cfInstr.loopEnd.address)); 1675 | out += "\t\t\t\tcontinue;\n"; 1676 | out += "\t\t\t}\n"; 1677 | } 1678 | break; 1679 | 1680 | case ControlFlowOpcode::CondJmp: 1681 | { 1682 | if (cfInstr.condJmp.isUnconditional) 1683 | { 1684 | assert(!simpleControlFlow); 1685 | println("\t\t\tpc = {};", uint32_t(cfInstr.condJmp.address)); 1686 | out += "\t\t\tcontinue;\n"; 1687 | } 1688 | else 1689 | { 1690 | indent(); 1691 | if (cfInstr.condJmp.isPredicated) 1692 | { 1693 | println("if ({}p0)", cfInstr.condJmp.condition ^ simpleControlFlow ? "" : "!"); 1694 | } 1695 | else 1696 | { 1697 | auto findResult = boolConstants.find(cfInstr.condJmp.boolAddress); 1698 | if (findResult != boolConstants.end()) 1699 | println("if ((g_Booleans & {}) {}= 0)", findResult->second, cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "="); 1700 | else 1701 | println("if (b{} {}= 0)", uint32_t(cfInstr.condJmp.boolAddress), cfInstr.condJmp.condition ^ simpleControlFlow ? "!" : "="); 1702 | } 1703 | 1704 | if (simpleControlFlow) 1705 | { 1706 | indent(); 1707 | out += "{\n"; 1708 | ++indentation; 1709 | } 1710 | else 1711 | { 1712 | out += "\t\t\t{\n"; 1713 | println("\t\t\t\tpc = {};", uint32_t(cfInstr.condJmp.address)); 1714 | out += "\t\t\t\tcontinue;\n"; 1715 | out += "\t\t\t}\n"; 1716 | } 1717 | } 1718 | break; 1719 | } 1720 | } 1721 | 1722 | auto instructionCode = code + address * 3; 1723 | 1724 | for (uint32_t i = 0; i < count; i++) 1725 | { 1726 | union 1727 | { 1728 | VertexFetchInstruction vertexFetch; 1729 | TextureFetchInstruction textureFetch; 1730 | AluInstruction alu; 1731 | struct 1732 | { 1733 | uint32_t code0; 1734 | uint32_t code1; 1735 | uint32_t code2; 1736 | }; 1737 | }; 1738 | 1739 | code0 = instructionCode[0]; 1740 | code1 = instructionCode[1]; 1741 | code2 = instructionCode[2]; 1742 | 1743 | if ((sequence & 0x1) != 0) 1744 | { 1745 | if (vertexFetch.opcode == FetchOpcode::VertexFetch) 1746 | { 1747 | recompile(vertexFetch, address + i); 1748 | } 1749 | else 1750 | { 1751 | #ifdef UNLEASHED_RECOMP 1752 | if (textureFetch.constIndex == 10) // g_GISampler 1753 | { 1754 | specConstantsMask |= SPEC_CONSTANT_BICUBIC_GI_FILTER; 1755 | 1756 | indent(); 1757 | out += "if (g_SpecConstants() & SPEC_CONSTANT_BICUBIC_GI_FILTER)"; 1758 | indent(); 1759 | out += '{'; 1760 | 1761 | ++indentation; 1762 | recompile(textureFetch, true); 1763 | --indentation; 1764 | 1765 | indent(); 1766 | out += "}"; 1767 | indent(); 1768 | out += "else"; 1769 | indent(); 1770 | out += '{'; 1771 | 1772 | ++indentation; 1773 | recompile(textureFetch, false); 1774 | --indentation; 1775 | 1776 | indent(); 1777 | out += '}'; 1778 | } 1779 | else 1780 | #endif 1781 | { 1782 | recompile(textureFetch, false); 1783 | } 1784 | } 1785 | } 1786 | else 1787 | { 1788 | recompile(alu); 1789 | } 1790 | 1791 | sequence >>= 2; 1792 | instructionCode += 3; 1793 | } 1794 | 1795 | if (shouldReturn) 1796 | { 1797 | if (isPixelShader) 1798 | { 1799 | specConstantsMask |= SPEC_CONSTANT_ALPHA_TEST; 1800 | 1801 | indent(); 1802 | out += "[branch] if (g_SpecConstants() & SPEC_CONSTANT_ALPHA_TEST)"; 1803 | indent(); 1804 | out += '{'; 1805 | 1806 | indent(); 1807 | out += "\tclip(oC0.w - g_AlphaThreshold);\n"; 1808 | 1809 | indent(); 1810 | out += "}"; 1811 | 1812 | #ifdef UNLEASHED_RECOMP 1813 | specConstantsMask |= SPEC_CONSTANT_ALPHA_TO_COVERAGE; 1814 | 1815 | indent(); 1816 | out += "else if (g_SpecConstants() & SPEC_CONSTANT_ALPHA_TO_COVERAGE)"; 1817 | indent(); 1818 | out += '{'; 1819 | 1820 | indent(); 1821 | out += "\toC0.w *= 1.0 + computeMipLevel(pixelCoord) * 0.25;\n"; 1822 | indent(); 1823 | out += "\toC0.w = 0.5 + (oC0.w - g_AlphaThreshold) / max(fwidth(oC0.w), 1e-6);\n"; 1824 | 1825 | indent(); 1826 | out += '}'; 1827 | #endif 1828 | } 1829 | else 1830 | { 1831 | out += "\toPos.xy += g_HalfPixelOffset * oPos.w;\n"; 1832 | } 1833 | 1834 | if (simpleControlFlow) 1835 | { 1836 | indent(); 1837 | #ifdef UNLEASHED_RECOMP 1838 | if (hasMtxProjection) 1839 | { 1840 | out += "continue;\n"; 1841 | } 1842 | else 1843 | #endif 1844 | { 1845 | out += "return;\n"; 1846 | } 1847 | } 1848 | else 1849 | { 1850 | out += "\t\t\tbreak;\n"; 1851 | } 1852 | } 1853 | 1854 | if (shouldCloseCurlyBracket) 1855 | { 1856 | --indentation; 1857 | indent(); 1858 | out += "}\n"; 1859 | } 1860 | } 1861 | 1862 | controlFlowCode += 3; 1863 | instrAddress += 12; 1864 | } 1865 | 1866 | if (!simpleControlFlow) 1867 | { 1868 | out += "\t\t\tbreak;\n"; 1869 | out += "\t\t}\n"; 1870 | out += "\t\tbreak;\n"; 1871 | out += "\t}\n"; 1872 | } 1873 | 1874 | #ifdef UNLEASHED_RECOMP 1875 | if (hasMtxProjection) 1876 | out += "\t}\n"; 1877 | #endif 1878 | 1879 | out += "}"; 1880 | } 1881 | -------------------------------------------------------------------------------- /XenosRecomp/shader_recompiler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "shader.h" 4 | #include "shader_code.h" 5 | 6 | struct StringBuffer 7 | { 8 | std::string out; 9 | 10 | template 11 | void print(fmt::format_string fmt, Args&&... args) 12 | { 13 | fmt::vformat_to(std::back_inserter(out), fmt.get(), fmt::make_format_args(args...)); 14 | } 15 | 16 | template 17 | void println(fmt::format_string fmt, Args&&... args) 18 | { 19 | fmt::vformat_to(std::back_inserter(out), fmt.get(), fmt::make_format_args(args...)); 20 | out += '\n'; 21 | } 22 | }; 23 | 24 | struct ShaderRecompiler : StringBuffer 25 | { 26 | uint32_t indentation = 0; 27 | bool isPixelShader = false; 28 | const uint8_t* constantTableData = nullptr; 29 | std::unordered_map vertexElements; 30 | std::unordered_map interpolators; 31 | std::unordered_map float4Constants; 32 | std::unordered_map boolConstants; 33 | std::unordered_map samplers; 34 | std::unordered_map ifEndLabels; 35 | uint32_t specConstantsMask = 0; 36 | 37 | #ifdef UNLEASHED_RECOMP 38 | bool hasMtxProjection = false; 39 | bool hasMtxPrevInvViewProjection = false; 40 | #endif 41 | 42 | void indent() 43 | { 44 | for (uint32_t i = 0; i < indentation; i++) 45 | out += '\t'; 46 | } 47 | 48 | void printDstSwizzle(uint32_t dstSwizzle, bool operand); 49 | void printDstSwizzle01(uint32_t dstRegister, uint32_t dstSwizzle); 50 | 51 | void recompile(const VertexFetchInstruction& instr, uint32_t address); 52 | void recompile(const TextureFetchInstruction& instr, bool bicubic); 53 | void recompile(const AluInstruction& instr); 54 | 55 | void recompile(const uint8_t* shaderData, const std::string_view& include); 56 | }; 57 | -------------------------------------------------------------------------------- /thirdparty/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | if (NOT TARGET fmt::fmt) 2 | add_subdirectory("${XENOS_RECOMP_THIRDPARTY_ROOT}/fmt") 3 | endif() 4 | 5 | if (NOT TARGET xxHash::xxhash) 6 | add_subdirectory("${XENOS_RECOMP_THIRDPARTY_ROOT}/xxHash/cmake_unofficial") 7 | endif() 8 | 9 | if (NOT TARGET libzstd) 10 | add_subdirectory("${XENOS_RECOMP_THIRDPARTY_ROOT}/zstd/build/cmake") 11 | endif() 12 | 13 | if (NOT TARGET Microsoft::DirectXShaderCompiler) 14 | add_subdirectory("${XENOS_RECOMP_THIRDPARTY_ROOT}/dxc-bin") 15 | endif() 16 | --------------------------------------------------------------------------------