├── .gitattributes ├── .gitignore ├── GPURealTimeBC6H.sln ├── GPURealTimeBC6H.vcxproj ├── LICENSE ├── README.md ├── app.cpp ├── app.h ├── bin ├── GPURealTimeBC6HRelease.exe ├── atrium.dds ├── backyard.dds ├── blit.hlsl ├── compress.hlsl ├── d3dcompiler_47.dll ├── desk.dds ├── memorial.dds └── yucca.dds ├── dds.cpp ├── dds.h ├── stdafx.cpp ├── stdafx.h └── winmain.cpp /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vs 2 | Debug 3 | Release 4 | GPURealTimeBC6HDebug.exe 5 | *.ilk 6 | *.pdb 7 | *.iobj 8 | *.ipdb 9 | *.vcxproj.user -------------------------------------------------------------------------------- /GPURealTimeBC6H.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2013 4 | VisualStudioVersion = 12.0.31101.0 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GPURealTimeBC6H", "GPURealTimeBC6H.vcxproj", "{B51BC917-ED36-4511-9EB1-835F1EBC8B19}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Win32 = Debug|Win32 11 | Release|Win32 = Release|Win32 12 | EndGlobalSection 13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 14 | {B51BC917-ED36-4511-9EB1-835F1EBC8B19}.Debug|Win32.ActiveCfg = Debug|Win32 15 | {B51BC917-ED36-4511-9EB1-835F1EBC8B19}.Debug|Win32.Build.0 = Debug|Win32 16 | {B51BC917-ED36-4511-9EB1-835F1EBC8B19}.Release|Win32.ActiveCfg = Release|Win32 17 | {B51BC917-ED36-4511-9EB1-835F1EBC8B19}.Release|Win32.Build.0 = Release|Win32 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | EndGlobal 23 | -------------------------------------------------------------------------------- /GPURealTimeBC6H.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | 14 | {B51BC917-ED36-4511-9EB1-835F1EBC8B19} 15 | Win32Proj 16 | GPURealTimeBC6H 17 | 10.0.17763.0 18 | 19 | 20 | 21 | Application 22 | true 23 | v141 24 | Unicode 25 | 26 | 27 | Application 28 | false 29 | v141 30 | true 31 | Unicode 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | true 45 | bin\ 46 | $(ProjectName)$(Configuration) 47 | 48 | 49 | false 50 | bin\ 51 | $(ProjectName)$(Configuration) 52 | 53 | 54 | 55 | Use 56 | Level3 57 | Disabled 58 | WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions) 59 | 60 | 61 | Windows 62 | true 63 | kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);d3d11.lib;d3dcompiler.lib 64 | 65 | 66 | 67 | 68 | Level3 69 | Use 70 | MaxSpeed 71 | true 72 | true 73 | WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions) 74 | 75 | 76 | Windows 77 | true 78 | true 79 | true 80 | kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);d3d11.lib;d3dcompiler.lib 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | Create 93 | Create 94 | 95 | 96 | 97 | 98 | 99 | true 100 | true 101 | 102 | 103 | true 104 | true 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2015 Krzysztof Narkowicz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | GPURealTimeBC6H 2 | ======= 3 | 4 | Real-time BC6H compressor which runs on a GPU. Includes a small testbed application. This compressor is used in a few released AA/AAA games. 5 | 6 | Compressor has two presets: 7 | * "Fast" - compresses a standard 256x256x6 cubemap in 0.02ms on NV P4000 (GPU perf in between NV GTX 1060 and NV GTX 1070). Compression quality is comparable to fast presets of offline compressors. 8 | * "Quality" - compresses a standard 256x256x6 cubemap in 0.528ms on on NV P4000. Compression quality is comparable to normal presets of offline compressors. 9 | 10 | How It Works 11 | === 12 | BC6H is a pretty complex format with multiple possible block modes. To prune search space a bit one mode was selected for the fast compression setting(mode 11) and two for the quality compression setting (mode 2 and mode 6) which proved to have the best trade off between quality and performance. 13 | 14 | Fast mode is based on computing a color bounding box ("Real-Time DXT Compression" by J.M.P. van Waveren, 2006), then ordering colors by a diagonal of this bounding box and using least square fit to find optimal endpoints ("High Quality DXT Compression using CUDA" by Ignacio Castaño, 2007). All computations are made in log2 space in order to optimize for perceptual error (after all resulting image will be tone mapped). 15 | 16 | Quality mode has two passes. First pass searches for a best partition by computing color bounding box per partition and computing error as distance of every texel from that line. In the second pass best partition is encoded using similar approach as the fast mode. 17 | 18 | Quality 19 | === 20 | Quality compared using RMSLE (lower is better). 21 | 22 | | | GPU Real-Time BC6H "Fast" | GPU Real-Time BC6H "Quality" | Intel "Very fast" | Intel "Fast" | Intel "Basic" | Intel "Slow" | Intel "Very slow" | DirectXTex 23 | | ------- | ------------------------- | ----------------------------- | ----------------- | ------------ | ------------- | ------------ | ----------------- | ---------- 24 | | Atrium | 0.0074 | 0.0066 | 0.0080 | 0.0069 | 0.0067 | 0.0067 | 0.0067 | 0.0079 25 | | Backyard | 0.0073 | 0.0070 | 0.0072 | 0.0067 | 0.0065 | 0.0065 | 0.0065 | 0.0075 26 | | Desk | 0.0447 | 0.0328 | 0.0470 | 0.0307 | 0.0298 | 0.0294 | 0.0293 | 0.0413 27 | | Memorial | 0.0158 | 0.0126 | 0.0192 | 0.0135 | 0.0133 | 0.0132 | 0.0131 | 0.0243 28 | | Yucca | 0.0168 | 0.0123 | 0.0145 | 0.0108 | 0.0105 | 0.0103 | 0.0103 | 0.0124 29 | 30 | License 31 | === 32 | This work is dual-licensed under either public domain or MIT. 33 | -------------------------------------------------------------------------------- /app.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "app.h" 3 | #include "dds.h" 4 | 5 | #define SAFE_RELEASE( x ) { if ( x ) { x->Release(); x = nullptr; } } 6 | 7 | CApp gApp; 8 | 9 | char const* ImagePathArr[] = { "atrium.dds", "backyard.dds", "desk.dds", "memorial.dds", "yucca.dds" }; 10 | const uint32_t BC_BLOCK_SIZE = 4; 11 | 12 | struct SShaderCB 13 | { 14 | Vec2 m_screenSizeRcp; 15 | unsigned m_textureSizeInBlocks[2]; 16 | 17 | Vec2 m_imageSizeRcp; 18 | Vec2 m_texelBias; 19 | 20 | float m_texelScale; 21 | float m_exposure; 22 | uint32_t m_blitMode; 23 | uint32_t m_padding; 24 | }; 25 | 26 | // https://gist.github.com/rygorous/2144712 27 | static float HalfToFloat(uint16_t h) 28 | { 29 | union FP32 30 | { 31 | uint32_t u; 32 | float f; 33 | struct 34 | { 35 | unsigned Mantissa : 23; 36 | unsigned Exponent : 8; 37 | unsigned Sign : 1; 38 | }; 39 | }; 40 | 41 | static const FP32 magic = { (254 - 15) << 23 }; 42 | static const FP32 was_infnan = { (127 + 16) << 23 }; 43 | 44 | FP32 o; 45 | o.u = (h & 0x7fff) << 13; // exponent/mantissa bits 46 | o.f *= magic.f; // exponent adjust 47 | if (o.f >= was_infnan.f) // make sure Inf/NaN survive 48 | o.u |= 255 << 23; 49 | o.u |= (h & 0x8000) << 16; // sign bit 50 | return o.f; 51 | } 52 | 53 | uint32_t DivideAndRoundUp(uint32_t x, uint32_t divisor) 54 | { 55 | return (x + divisor - 1) / divisor; 56 | } 57 | 58 | CApp::CApp() 59 | { 60 | } 61 | 62 | CApp::~CApp() 63 | { 64 | DestoryImage(); 65 | DestroyTargets(); 66 | DestroyShaders(); 67 | SAFE_RELEASE(m_ctx); 68 | SAFE_RELEASE(m_swapChain); 69 | SAFE_RELEASE(m_device); 70 | } 71 | 72 | bool CApp::Init(HWND windowHandle) 73 | { 74 | m_windowHandle = windowHandle; 75 | 76 | RECT clientRect; 77 | GetClientRect(windowHandle, &clientRect); 78 | m_backbufferWidth = clientRect.right - clientRect.left; 79 | m_backbufferHeight = clientRect.bottom - clientRect.top; 80 | 81 | D3D_FEATURE_LEVEL featureLevels[] = { D3D_FEATURE_LEVEL_11_0 }; 82 | D3D_FEATURE_LEVEL retFeatureLevel; 83 | 84 | DXGI_SWAP_CHAIN_DESC swapDesc; 85 | ZeroMemory(&swapDesc, sizeof(swapDesc)); 86 | swapDesc.BufferDesc.Width = m_backbufferWidth; 87 | swapDesc.BufferDesc.Height = m_backbufferHeight; 88 | swapDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; 89 | swapDesc.BufferDesc.RefreshRate.Numerator = 60; 90 | swapDesc.BufferDesc.RefreshRate.Denominator = 1; 91 | swapDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; 92 | swapDesc.BufferCount = 2; 93 | swapDesc.OutputWindow = windowHandle; 94 | swapDesc.SampleDesc.Count = 1; 95 | swapDesc.SampleDesc.Quality = 0; 96 | swapDesc.Windowed = true; 97 | 98 | unsigned flags = 0; 99 | #ifdef _DEBUG 100 | flags |= D3D11_CREATE_DEVICE_DEBUG; 101 | #endif 102 | 103 | HRESULT res; 104 | res = D3D11CreateDeviceAndSwapChain(nullptr, D3D_DRIVER_TYPE_HARDWARE, 0, flags, featureLevels, ARRAYSIZE(featureLevels), D3D11_SDK_VERSION, &swapDesc, &m_swapChain, &m_device, &retFeatureLevel, &m_ctx); 105 | _ASSERT(SUCCEEDED(res)); 106 | 107 | ID3D11Texture2D* backBuffer = NULL; 108 | res = m_swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID*)&backBuffer); 109 | _ASSERT(SUCCEEDED(res)); 110 | 111 | res = m_device->CreateRenderTargetView(backBuffer, nullptr, &m_backBufferView); 112 | _ASSERT(SUCCEEDED(res)); 113 | backBuffer->Release(); 114 | 115 | CreateImage(); 116 | CreateShaders(); 117 | CreateTargets(); 118 | CreateQueries(); 119 | CreateConstantBuffer(); 120 | 121 | HRESULT hr; 122 | D3D11_SAMPLER_DESC samplerDesc = 123 | { 124 | D3D11_FILTER_MIN_MAG_MIP_POINT, 125 | D3D11_TEXTURE_ADDRESS_BORDER, 126 | D3D11_TEXTURE_ADDRESS_BORDER, 127 | D3D11_TEXTURE_ADDRESS_BORDER, 128 | 0.0f, 129 | 1, 130 | D3D11_COMPARISON_ALWAYS, 131 | 0.0f, 132 | 0.0f, 133 | 0.0f, 134 | 0.0f, 135 | 0.0f, 136 | D3D11_FLOAT32_MAX 137 | }; 138 | hr = m_device->CreateSamplerState(&samplerDesc, &m_pointSampler); 139 | _ASSERT(SUCCEEDED(hr)); 140 | 141 | D3D11_BUFFER_DESC bd; 142 | ZeroMemory(&bd, sizeof(bd)); 143 | bd.Usage = D3D11_USAGE_DEFAULT; 144 | bd.ByteWidth = sizeof(uint16_t) * 4; 145 | bd.BindFlags = D3D11_BIND_INDEX_BUFFER; 146 | bd.CPUAccessFlags = 0; 147 | 148 | uint16_t indices[] = { 0, 1, 2, 3 }; 149 | D3D11_SUBRESOURCE_DATA initData; 150 | ZeroMemory(&initData, sizeof(initData)); 151 | initData.pSysMem = indices; 152 | 153 | hr = m_device->CreateBuffer(&bd, &initData, &m_ib); 154 | _ASSERT(SUCCEEDED(hr)); 155 | 156 | return true; 157 | } 158 | 159 | void CApp::CreateTargets() 160 | { 161 | D3D11_TEXTURE2D_DESC texDesc; 162 | texDesc.Width = DivideAndRoundUp(m_imageWidth, BC_BLOCK_SIZE); 163 | texDesc.Height = DivideAndRoundUp(m_imageHeight, BC_BLOCK_SIZE); 164 | texDesc.MipLevels = 1; 165 | texDesc.ArraySize = 1; 166 | texDesc.Format = DXGI_FORMAT_R32G32B32A32_UINT; 167 | texDesc.SampleDesc.Count = 1; 168 | texDesc.SampleDesc.Quality = 0; 169 | texDesc.Usage = D3D11_USAGE_DEFAULT; 170 | texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; 171 | texDesc.CPUAccessFlags = 0; 172 | texDesc.MiscFlags = 0; 173 | HRESULT hr = m_device->CreateTexture2D(&texDesc, nullptr, &m_compressTargetRes); 174 | _ASSERT(SUCCEEDED(hr)); 175 | 176 | hr = m_device->CreateUnorderedAccessView(m_compressTargetRes, nullptr, &m_compressTargetUAV); 177 | _ASSERT(SUCCEEDED(hr)); 178 | 179 | texDesc.Width = m_imageWidth; 180 | texDesc.Height = m_imageHeight; 181 | texDesc.MipLevels = 1; 182 | texDesc.ArraySize = 1; 183 | texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; 184 | texDesc.SampleDesc.Count = 1; 185 | texDesc.SampleDesc.Quality = 0; 186 | texDesc.Usage = D3D11_USAGE_DEFAULT; 187 | texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; 188 | texDesc.CPUAccessFlags = 0; 189 | texDesc.MiscFlags = 0; 190 | hr = m_device->CreateTexture2D(&texDesc, nullptr, &m_tmpTargetRes); 191 | _ASSERT(SUCCEEDED(hr)); 192 | 193 | hr = m_device->CreateRenderTargetView(m_tmpTargetRes, nullptr, &m_tmpTargetView); 194 | _ASSERT(SUCCEEDED(hr)); 195 | 196 | texDesc.Width = m_imageWidth; 197 | texDesc.Height = m_imageHeight; 198 | texDesc.MipLevels = 1; 199 | texDesc.ArraySize = 1; 200 | texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; 201 | texDesc.SampleDesc.Count = 1; 202 | texDesc.SampleDesc.Quality = 0; 203 | texDesc.Usage = D3D11_USAGE_STAGING; 204 | texDesc.BindFlags = 0; 205 | texDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; 206 | texDesc.MiscFlags = 0; 207 | hr = m_device->CreateTexture2D(&texDesc, nullptr, &m_tmpStagingRes); 208 | _ASSERT(SUCCEEDED(hr)); 209 | } 210 | 211 | void CApp::DestroyTargets() 212 | { 213 | SAFE_RELEASE(m_compressTargetUAV); 214 | SAFE_RELEASE(m_compressTargetRes); 215 | SAFE_RELEASE(m_tmpTargetView); 216 | SAFE_RELEASE(m_tmpTargetRes); 217 | SAFE_RELEASE(m_tmpStagingRes); 218 | } 219 | 220 | void CApp::CreateQueries() 221 | { 222 | D3D11_QUERY_DESC queryDesc; 223 | queryDesc.MiscFlags = 0; 224 | 225 | for (unsigned i = 0; i < MAX_QUERY_FRAME_NUM; ++i) 226 | { 227 | queryDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; 228 | m_device->CreateQuery(&queryDesc, &m_disjointQueries[i]); 229 | 230 | queryDesc.Query = D3D11_QUERY_TIMESTAMP; 231 | m_device->CreateQuery(&queryDesc, &m_timeBeginQueries[i]); 232 | m_device->CreateQuery(&queryDesc, &m_timeEndQueries[i]); 233 | } 234 | } 235 | 236 | void CApp::CreateConstantBuffer() 237 | { 238 | D3D11_BUFFER_DESC desc; 239 | ZeroMemory(&desc, sizeof(desc)); 240 | desc.Usage = D3D11_USAGE_DYNAMIC; 241 | desc.ByteWidth = sizeof(SShaderCB); 242 | desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; 243 | desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; 244 | 245 | m_device->CreateBuffer(&desc, nullptr, &m_constantBuffer); 246 | } 247 | 248 | void CApp::CreateImage() 249 | { 250 | SImage img; 251 | DDS::LoadA16B16G16R16F(ImagePathArr[m_imageID], img); 252 | 253 | m_imageWidth = img.m_width; 254 | m_imageHeight = img.m_height; 255 | 256 | D3D11_SUBRESOURCE_DATA initialData; 257 | initialData.pSysMem = img.m_data; 258 | initialData.SysMemPitch = img.m_width * 4 * 2; 259 | initialData.SysMemSlicePitch = 0; 260 | 261 | D3D11_TEXTURE2D_DESC desc; 262 | ZeroMemory(&desc, sizeof(desc)); 263 | desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; 264 | desc.Width = img.m_width; 265 | desc.Height = img.m_height; 266 | desc.MipLevels = 1; 267 | desc.ArraySize = 1; 268 | desc.Usage = D3D11_USAGE_IMMUTABLE; 269 | desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; 270 | desc.SampleDesc.Count = 1; 271 | desc.SampleDesc.Quality = 0; 272 | HRESULT hr = m_device->CreateTexture2D(&desc, &initialData, &m_sourceTextureRes); 273 | _ASSERT(SUCCEEDED(hr)); 274 | 275 | D3D11_SHADER_RESOURCE_VIEW_DESC resViewDesc; 276 | resViewDesc.Format = desc.Format; 277 | resViewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; 278 | resViewDesc.Texture2D.MostDetailedMip = 0; 279 | resViewDesc.Texture2D.MipLevels = desc.MipLevels; 280 | hr = m_device->CreateShaderResourceView(m_sourceTextureRes, &resViewDesc, &m_sourceTextureView); 281 | _ASSERT(SUCCEEDED(hr)); 282 | 283 | desc.Format = DXGI_FORMAT_BC6H_UF16; 284 | desc.Usage = D3D11_USAGE_DEFAULT; 285 | hr = m_device->CreateTexture2D(&desc, nullptr, &m_compressedTextureRes); 286 | _ASSERT(SUCCEEDED(hr)); 287 | 288 | resViewDesc.Format = desc.Format; 289 | resViewDesc.Texture2D.MostDetailedMip = 0; 290 | resViewDesc.Texture2D.MipLevels = desc.MipLevels; 291 | 292 | hr = m_device->CreateShaderResourceView(m_compressedTextureRes, &resViewDesc, &m_compressedTextureView); 293 | _ASSERT(SUCCEEDED(hr)); 294 | } 295 | 296 | void CApp::DestoryImage() 297 | { 298 | SAFE_RELEASE(m_compressedTextureView); 299 | SAFE_RELEASE(m_compressedTextureRes); 300 | SAFE_RELEASE(m_sourceTextureView); 301 | SAFE_RELEASE(m_sourceTextureRes); 302 | } 303 | 304 | void CApp::CreateShaders() 305 | { 306 | unsigned shaderFlags = D3DCOMPILE_ENABLE_STRICTNESS; 307 | #ifdef _DEBUG 308 | shaderFlags |= D3DCOMPILE_DEBUG | D3DCOMPILE_PREFER_FLOW_CONTROL; 309 | #endif 310 | 311 | HRESULT hr; 312 | ID3DBlob* shaderBlob = nullptr; 313 | ID3DBlob* errorBlob = nullptr; 314 | 315 | // Compression compute shaders 316 | for (uint32_t ModeIndex = 0; ModeIndex < COMPRESSION_MODE_NUM; ++ModeIndex) 317 | { 318 | D3D_SHADER_MACRO macros[2]; 319 | macros[0].Name = "QUALITY"; 320 | macros[0].Definition = (ModeIndex == 0 ? "0" : "1"); 321 | macros[1].Name = nullptr; 322 | macros[1].Definition = nullptr; 323 | 324 | hr = D3DCompileFromFile(L"compress.hlsl", macros, nullptr, "CSMain", "cs_5_0", shaderFlags, 0, &shaderBlob, &errorBlob); 325 | if (SUCCEEDED(hr)) 326 | { 327 | m_device->CreateComputeShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, &m_compressCS[ModeIndex]); 328 | } 329 | else 330 | { 331 | OutputDebugStringA((char const*)errorBlob->GetBufferPointer()); 332 | } 333 | } 334 | 335 | // Blit vertex and pixel shader 336 | { 337 | hr = D3DCompileFromFile(L"blit.hlsl", nullptr, nullptr, "VSMain", "vs_5_0", shaderFlags, 0, &shaderBlob, &errorBlob); 338 | if (SUCCEEDED(hr)) 339 | { 340 | m_device->CreateVertexShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, &m_blitVS); 341 | } 342 | else 343 | { 344 | OutputDebugStringA((char const*)errorBlob->GetBufferPointer()); 345 | } 346 | 347 | hr = D3DCompileFromFile(L"blit.hlsl", nullptr, nullptr, "PSMain", "ps_5_0", shaderFlags, 0, &shaderBlob, &errorBlob); 348 | if (SUCCEEDED(hr)) 349 | { 350 | m_device->CreatePixelShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, &m_blitPS); 351 | } 352 | else 353 | { 354 | OutputDebugStringA((char const*)errorBlob->GetBufferPointer()); 355 | } 356 | } 357 | } 358 | 359 | void CApp::DestroyShaders() 360 | { 361 | SAFE_RELEASE(m_blitVS); 362 | SAFE_RELEASE(m_blitPS); 363 | 364 | for (unsigned i = 0; i < ARRAYSIZE(m_compressCS); ++i) 365 | { 366 | SAFE_RELEASE(m_compressCS[i]); 367 | } 368 | } 369 | 370 | void CApp::Release() 371 | { 372 | DestroyShaders(); 373 | } 374 | 375 | void CApp::OnKeyDown(WPARAM wParam) 376 | { 377 | switch (wParam) 378 | { 379 | case 'R': 380 | DestroyShaders(); 381 | CreateShaders(); 382 | m_updateRMSE = true; 383 | OutputDebugStringA("Recompiled shaders\n"); 384 | break; 385 | 386 | case 'N': 387 | m_imageID = (m_imageID + 1) % ARRAYSIZE(ImagePathArr); 388 | DestoryImage(); 389 | DestroyTargets(); 390 | CreateImage(); 391 | CreateTargets(); 392 | m_imageZoom = 0.0f; 393 | m_texelScale = 1.0f; 394 | m_texelBias.x = 0.0f; 395 | m_texelBias.y = 0.0f; 396 | m_imageExposure = 0.0f; 397 | m_updateRMSE = true; 398 | break; 399 | 400 | case 'E': 401 | // Flip between source and compressed image 402 | m_blitMode = (m_blitMode + 1) % 2; 403 | m_updateTitle = true; 404 | break; 405 | 406 | case '1': 407 | m_blitMode = 0; 408 | m_updateTitle = true; 409 | break; 410 | 411 | case '2': 412 | m_blitMode = 1; 413 | m_updateTitle = true; 414 | break; 415 | 416 | case '3': 417 | m_blitMode = 2; 418 | m_updateTitle = true; 419 | break; 420 | 421 | case '4': 422 | m_blitMode = 3; 423 | m_updateTitle = true; 424 | break; 425 | 426 | case 'Q': 427 | m_compressionMode = (m_compressionMode + 1) % COMPRESSION_MODE_NUM; 428 | m_updateTitle = true; 429 | m_updateRMSE = true; 430 | break; 431 | 432 | case VK_ADD: 433 | m_imageExposure += 0.1f; 434 | m_updateTitle = true; 435 | break; 436 | 437 | case VK_SUBTRACT: 438 | m_imageExposure -= 0.1f; 439 | m_updateTitle = true; 440 | break; 441 | } 442 | } 443 | 444 | void CApp::OnLButtonDown(int mouseX, int mouseY) 445 | { 446 | m_dragEnabled = true; 447 | m_dragStart.x = m_texelBias.x + mouseX * m_texelScale; 448 | m_dragStart.y = m_texelBias.y + mouseY * m_texelScale; 449 | } 450 | 451 | void CApp::OnLButtonUp(int mouseX, int mouseY) 452 | { 453 | m_dragEnabled = false; 454 | } 455 | 456 | void CApp::OnMouseMove(int mouseX, int mouseY) 457 | { 458 | if (m_dragEnabled && GetKeyState(VK_LBUTTON) >= 0) 459 | { 460 | m_dragEnabled = false; 461 | } 462 | 463 | if (m_dragEnabled) 464 | { 465 | m_texelBias.x = m_dragStart.x - mouseX * m_texelScale; 466 | m_texelBias.y = m_dragStart.y - mouseY * m_texelScale; 467 | } 468 | } 469 | 470 | void CApp::OnMouseWheel(int zDelta) 471 | { 472 | m_imageZoom -= zDelta * 0.001f; 473 | m_texelScale = powf(2.0f, m_imageZoom); 474 | } 475 | 476 | void CApp::OnResize() 477 | { 478 | RECT clientRect; 479 | GetClientRect(m_windowHandle, &clientRect); 480 | unsigned const newBackbufferWidth = max(clientRect.right - clientRect.left, 64); 481 | unsigned const newBackbufferHeight = max(clientRect.bottom - clientRect.top, 64); 482 | 483 | if (m_backbufferWidth != newBackbufferWidth && m_backbufferHeight != newBackbufferHeight) 484 | { 485 | m_ctx->ClearState(); 486 | SAFE_RELEASE(m_backBufferView); 487 | m_swapChain->ResizeBuffers(2, newBackbufferWidth, newBackbufferHeight, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, 0); 488 | 489 | ID3D11Texture2D* backBuffer = nullptr; 490 | HRESULT hr = m_swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID*)&backBuffer); 491 | _ASSERT(SUCCEEDED(hr)); 492 | 493 | hr = m_device->CreateRenderTargetView(backBuffer, nullptr, &m_backBufferView); 494 | _ASSERT(SUCCEEDED(hr)); 495 | backBuffer->Release(); 496 | 497 | m_backbufferWidth = newBackbufferWidth; 498 | m_backbufferHeight = newBackbufferHeight; 499 | } 500 | } 501 | 502 | void CApp::UpdateTitle() 503 | { 504 | const wchar_t* blitModeNames[BLIT_MODE_NUM] = 505 | { 506 | L"Source", 507 | L"Compressed", 508 | L"DiffRGB", 509 | L"DiffLum" 510 | }; 511 | 512 | wchar_t title[256]; 513 | title[0] = 0; 514 | swprintf(title, ARRAYSIZE(title), L"Time:%.3fms rgbRMSLE:%.4f lumRMSLE:%.4f [q]Mode:%s [1,2,3,4]Show:%s [-/+]Exposure:%.1f [n]%S%dx%d [r]Reloadshaders", 515 | m_compressionTime, m_rgbRMSLE, m_lumRMSLE, m_compressionMode == 1 ? L"Quality" : L"Fast", blitModeNames[m_blitMode], m_imageExposure, ImagePathArr[m_imageID], m_imageWidth, m_imageHeight); 516 | 517 | SetWindowText(m_windowHandle, title); 518 | } 519 | 520 | void CApp::Render() 521 | { 522 | m_ctx->ClearState(); 523 | 524 | m_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); 525 | m_ctx->IASetIndexBuffer(m_ib, DXGI_FORMAT_R16_UINT, 0); 526 | 527 | SShaderCB shaderCB; 528 | shaderCB.m_textureSizeInBlocks[0] = DivideAndRoundUp(m_imageWidth, BC_BLOCK_SIZE); 529 | shaderCB.m_textureSizeInBlocks[1] = DivideAndRoundUp(m_imageHeight, BC_BLOCK_SIZE); 530 | shaderCB.m_imageSizeRcp.x = 1.0f / m_imageWidth; 531 | shaderCB.m_imageSizeRcp.y = 1.0f / m_imageHeight; 532 | shaderCB.m_screenSizeRcp.x = 1.0f / m_backbufferWidth; 533 | shaderCB.m_screenSizeRcp.y = 1.0f / m_backbufferHeight; 534 | shaderCB.m_texelBias = m_texelBias; 535 | shaderCB.m_texelScale = m_texelScale; 536 | shaderCB.m_exposure = exp(m_imageExposure); 537 | shaderCB.m_blitMode = m_blitMode; 538 | 539 | D3D11_MAPPED_SUBRESOURCE mappedRes; 540 | m_ctx->Map(m_constantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedRes); 541 | memcpy(mappedRes.pData, &shaderCB, sizeof(shaderCB)); 542 | m_ctx->Unmap(m_constantBuffer, 0); 543 | 544 | m_ctx->Begin(m_disjointQueries[m_frameID % MAX_QUERY_FRAME_NUM]); 545 | m_ctx->End(m_timeBeginQueries[m_frameID % MAX_QUERY_FRAME_NUM]); 546 | 547 | if (m_compressCS[m_compressionMode]) 548 | { 549 | m_ctx->CSSetShader(m_compressCS[m_compressionMode], nullptr, 0); 550 | m_ctx->CSSetUnorderedAccessViews(0, 1, &m_compressTargetUAV, nullptr); 551 | m_ctx->CSSetShaderResources(0, 1, &m_sourceTextureView); 552 | m_ctx->CSSetSamplers(0, 1, &m_pointSampler); 553 | m_ctx->CSSetConstantBuffers(0, 1, &m_constantBuffer); 554 | 555 | uint32_t threadsX = 8; 556 | uint32_t threadsY = 8; 557 | m_ctx->Dispatch(DivideAndRoundUp(m_imageWidth, BC_BLOCK_SIZE * threadsX), DivideAndRoundUp(m_imageHeight, BC_BLOCK_SIZE * threadsY), 1); 558 | } 559 | 560 | m_ctx->End(m_timeEndQueries[m_frameID % MAX_QUERY_FRAME_NUM]); 561 | m_ctx->End(m_disjointQueries[m_frameID % MAX_QUERY_FRAME_NUM]); 562 | 563 | m_ctx->CopyResource(m_compressedTextureRes, m_compressTargetRes); 564 | 565 | if (m_blitVS && m_blitPS) 566 | { 567 | m_ctx->OMSetRenderTargets(1, &m_backBufferView, nullptr); 568 | D3D11_VIEWPORT vp; 569 | vp.Width = (float)m_backbufferWidth; 570 | vp.Height = (float)m_backbufferHeight; 571 | vp.MinDepth = 0.0f; 572 | vp.MaxDepth = 1.0f; 573 | vp.TopLeftX = 0; 574 | vp.TopLeftY = 0; 575 | m_ctx->RSSetViewports(1, &vp); 576 | 577 | m_ctx->VSSetShader(m_blitVS, nullptr, 0); 578 | m_ctx->PSSetShader(m_blitPS, nullptr, 0); 579 | m_ctx->PSSetShaderResources(0, 1, &m_sourceTextureView); 580 | m_ctx->PSSetShaderResources(1, 1, &m_compressedTextureView); 581 | m_ctx->PSSetSamplers(0, 1, &m_pointSampler); 582 | m_ctx->PSSetConstantBuffers(0, 1, &m_constantBuffer); 583 | 584 | m_ctx->DrawIndexed(4, 0, 0); 585 | } 586 | 587 | if (m_updateRMSE) 588 | { 589 | UpdateRMSE(); 590 | m_updateRMSE = false; 591 | } 592 | 593 | ++m_frameID; 594 | m_swapChain->Present(0, 0); 595 | 596 | D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData; 597 | uint64_t timeStart; 598 | uint64_t timeEnd; 599 | 600 | if (m_frameID > m_frameID % MAX_QUERY_FRAME_NUM) 601 | { 602 | while (m_ctx->GetData(m_disjointQueries[m_frameID % MAX_QUERY_FRAME_NUM], &disjointData, sizeof(disjointData), 0) != S_OK) 603 | { 604 | int e = 0; 605 | } 606 | 607 | while (m_ctx->GetData(m_timeBeginQueries[m_frameID % MAX_QUERY_FRAME_NUM], &timeStart, sizeof(timeStart), 0) != S_OK) 608 | { 609 | int e = 0; 610 | } 611 | 612 | while (m_ctx->GetData(m_timeEndQueries[m_frameID % MAX_QUERY_FRAME_NUM], &timeEnd, sizeof(timeEnd), 0) != S_OK) 613 | { 614 | int e = 0; 615 | } 616 | 617 | if (!disjointData.Disjoint) 618 | { 619 | uint64_t delta = (timeEnd - timeStart) * 1000; 620 | m_timeAcc += delta / (float)disjointData.Frequency; 621 | ++m_timeAccSampleNum; 622 | } 623 | 624 | if (m_timeAccSampleNum > 100) 625 | { 626 | m_compressionTime = m_timeAcc / m_timeAccSampleNum; 627 | m_timeAcc = 0.0f; 628 | m_timeAccSampleNum = 0; 629 | m_updateTitle = true; 630 | } 631 | } 632 | 633 | if (m_updateTitle) 634 | { 635 | UpdateTitle(); 636 | m_updateTitle = false; 637 | } 638 | } 639 | 640 | void CApp::CopyTexture(Vec3* image, ID3D11ShaderResourceView* srcView) 641 | { 642 | if (m_blitVS && m_blitPS) 643 | { 644 | m_ctx->OMSetRenderTargets(1, &m_tmpTargetView, nullptr); 645 | D3D11_VIEWPORT vp; 646 | vp.Width = (float)m_imageWidth; 647 | vp.Height = (float)m_imageHeight; 648 | vp.MinDepth = 0.0f; 649 | vp.MaxDepth = 1.0f; 650 | vp.TopLeftX = 0; 651 | vp.TopLeftY = 0; 652 | m_ctx->RSSetViewports(1, &vp); 653 | 654 | m_ctx->VSSetShader(m_blitVS, nullptr, 0); 655 | m_ctx->PSSetShader(m_blitPS, nullptr, 0); 656 | m_ctx->PSSetShaderResources(0, 1, &srcView); 657 | m_ctx->PSSetShaderResources(1, 1, &srcView); 658 | m_ctx->PSSetSamplers(0, 1, &m_pointSampler); 659 | 660 | m_ctx->DrawIndexed(4, 0, 0); 661 | m_ctx->CopyResource(m_tmpStagingRes, m_tmpTargetRes); 662 | 663 | D3D11_MAPPED_SUBRESOURCE mappedRes; 664 | m_ctx->Map(m_tmpStagingRes, 0, D3D11_MAP_READ, 0, &mappedRes); 665 | if (mappedRes.pData) 666 | { 667 | for (unsigned y = 0; y < m_imageHeight; ++y) 668 | { 669 | for (unsigned x = 0; x < m_imageWidth; ++x) 670 | { 671 | uint16_t tmp[4]; 672 | memcpy(&tmp, (uint8_t*)mappedRes.pData + mappedRes.RowPitch * y + x * sizeof(tmp), sizeof(tmp)); 673 | 674 | image[x + y * m_imageWidth].x = HalfToFloat(tmp[0]); 675 | image[x + y * m_imageWidth].y = HalfToFloat(tmp[1]); 676 | image[x + y * m_imageWidth].z = HalfToFloat(tmp[2]); 677 | } 678 | } 679 | 680 | m_ctx->Unmap(m_tmpStagingRes, 0); 681 | } 682 | } 683 | } 684 | 685 | void CApp::UpdateRMSE() 686 | { 687 | SShaderCB shaderCB; 688 | shaderCB.m_imageSizeRcp.x = 1.0f / m_imageWidth; 689 | shaderCB.m_imageSizeRcp.y = 1.0f / m_imageHeight; 690 | shaderCB.m_screenSizeRcp.x = 1.0f / m_backbufferWidth; 691 | shaderCB.m_screenSizeRcp.y = 1.0f / m_backbufferHeight; 692 | shaderCB.m_texelBias = Vec2(0.0f, 0.0f); 693 | shaderCB.m_texelScale = 1.0f; 694 | shaderCB.m_exposure = 1.0f; 695 | shaderCB.m_blitMode = 0; 696 | 697 | D3D11_MAPPED_SUBRESOURCE mappedRes; 698 | m_ctx->Map(m_constantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedRes); 699 | memcpy(mappedRes.pData, &shaderCB, sizeof(shaderCB)); 700 | m_ctx->Unmap(m_constantBuffer, 0); 701 | m_ctx->PSSetConstantBuffers(0, 1, &m_constantBuffer); 702 | 703 | 704 | Vec3* imageA = new Vec3[m_imageWidth * m_imageHeight]; 705 | Vec3* imageB = new Vec3[m_imageWidth * m_imageHeight]; 706 | 707 | CopyTexture(imageA, m_sourceTextureView); 708 | CopyTexture(imageB, m_compressedTextureView); 709 | 710 | // Compute RGB and Luminance RMSE errors in log space 711 | double rSum = 0.0; 712 | double gSum = 0.0; 713 | double bSum = 0.0; 714 | for (unsigned y = 0; y < m_imageHeight; ++y) 715 | { 716 | for (unsigned x = 0; x < m_imageWidth; ++x) 717 | { 718 | double x0 = imageA[x + y * m_imageWidth].x; 719 | double y0 = imageA[x + y * m_imageWidth].y; 720 | double z0 = imageA[x + y * m_imageWidth].z; 721 | double x1 = imageB[x + y * m_imageWidth].x; 722 | double y1 = imageB[x + y * m_imageWidth].y; 723 | double z1 = imageB[x + y * m_imageWidth].z; 724 | 725 | double dx = log(x1 + 1.0) - log(x0 + 1.0); 726 | double dy = log(y1 + 1.0) - log(y0 + 1.0); 727 | double dz = log(z1 + 1.0) - log(z0 + 1.0); 728 | rSum += dx * dx; 729 | gSum += dy * dy; 730 | bSum += dy * dy; 731 | } 732 | } 733 | m_rgbRMSLE = (float)sqrt((rSum + gSum + bSum) / (3.0 * m_imageWidth * m_imageHeight)); 734 | m_lumRMSLE = (float)sqrt((0.299 * rSum + 0.587 * gSum + 0.114 * bSum) / (1.0 * m_imageWidth * m_imageHeight)); 735 | 736 | delete imageA; 737 | delete imageB; 738 | 739 | char rmseString[256]; 740 | rmseString[0] = 0; 741 | sprintf_s(rmseString, "rgbRMSLE:%.4f lumRMSLE:%.4f Mode:%s %s\n", m_rgbRMSLE, m_lumRMSLE, m_compressionMode == 1 ? "Quality" : "Fast", ImagePathArr[m_imageID]); 742 | OutputDebugStringA(rmseString); 743 | 744 | m_updateTitle = true; 745 | } 746 | -------------------------------------------------------------------------------- /app.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | struct Vec2 4 | { 5 | Vec2() 6 | { 7 | } 8 | 9 | Vec2(float x_, float y_) 10 | : x(x_) 11 | , y(y_) 12 | { 13 | } 14 | 15 | float x; 16 | float y; 17 | }; 18 | 19 | struct Vec3 20 | { 21 | float x; 22 | float y; 23 | float z; 24 | }; 25 | 26 | uint32_t const MAX_QUERY_FRAME_NUM = 5; 27 | uint32_t const COMPRESSION_MODE_NUM = 2; 28 | uint32_t const BLIT_MODE_NUM = 4; 29 | 30 | class CApp 31 | { 32 | public: 33 | CApp(); 34 | ~CApp(); 35 | 36 | bool Init(HWND windowHandle); 37 | void Release(); 38 | void Render(); 39 | void OnKeyDown(WPARAM wParam); 40 | void OnLButtonDown(int mouseX, int mouseY); 41 | void OnLButtonUp(int mouseX, int mouseY); 42 | void OnMouseMove(int mouseX, int mouseY); 43 | void OnMouseWheel(int zDelta); 44 | void OnResize(); 45 | 46 | ID3D11Device* GetDevice() { return m_device; } 47 | ID3D11DeviceContext* GetCtx() { return m_ctx; } 48 | 49 | 50 | private: 51 | unsigned m_backbufferWidth = 1280; 52 | unsigned m_backbufferHeight = 720; 53 | 54 | ID3D11Device* m_device = nullptr; 55 | ID3D11DeviceContext* m_ctx = nullptr; 56 | IDXGISwapChain* m_swapChain = nullptr; 57 | ID3D11RenderTargetView* m_backBufferView = nullptr; 58 | ID3D11SamplerState* m_pointSampler = nullptr; 59 | ID3D11Buffer* m_constantBuffer = nullptr; 60 | 61 | ID3D11Query* m_disjointQueries[MAX_QUERY_FRAME_NUM]; 62 | ID3D11Query* m_timeBeginQueries[MAX_QUERY_FRAME_NUM]; 63 | ID3D11Query* m_timeEndQueries[MAX_QUERY_FRAME_NUM]; 64 | float m_timeAcc = 0.0f; 65 | unsigned m_timeAccSampleNum = 0; 66 | float m_compressionTime = 0.0f; 67 | 68 | // Shaders 69 | ID3D11VertexShader* m_blitVS = nullptr; 70 | ID3D11PixelShader* m_blitPS = nullptr; 71 | ID3D11ComputeShader* m_compressCS[COMPRESSION_MODE_NUM] = { nullptr }; 72 | 73 | // Resources 74 | ID3D11Buffer* m_ib = nullptr; 75 | ID3D11Texture2D* m_sourceTextureRes = nullptr; 76 | ID3D11ShaderResourceView* m_sourceTextureView = nullptr; 77 | ID3D11Texture2D* m_compressedTextureRes = nullptr; 78 | ID3D11ShaderResourceView* m_compressedTextureView = nullptr; 79 | ID3D11Texture2D* m_compressTargetRes = nullptr; 80 | ID3D11UnorderedAccessView* m_compressTargetUAV = nullptr; 81 | ID3D11Texture2D* m_tmpTargetRes = nullptr; 82 | ID3D11RenderTargetView* m_tmpTargetView = nullptr; 83 | ID3D11Texture2D* m_tmpStagingRes = nullptr; 84 | 85 | HWND m_windowHandle = 0; 86 | Vec2 m_texelBias = Vec2(0.0f, 0.0f); 87 | float m_texelScale = 1.0f; 88 | float m_imageZoom = 0.0f; 89 | float m_imageExposure = 0.0f; 90 | bool m_dragEnabled = false; 91 | Vec2 m_dragStart = Vec2(0.0f, 0.0f); 92 | bool m_updateRMSE = true; 93 | bool m_updateTitle = true; 94 | uint32_t m_imageID = 0; 95 | uint32_t m_imageWidth = 0; 96 | uint32_t m_imageHeight = 0; 97 | uint64_t m_frameID = 0; 98 | 99 | uint32_t m_compressionMode = 0; 100 | uint32_t m_blitMode = 1; 101 | 102 | // Compression error 103 | float m_rgbRMSLE = 0.0f; 104 | float m_lumRMSLE = 0.0f; 105 | 106 | void CreateImage(); 107 | void DestoryImage(); 108 | void CreateShaders(); 109 | void DestroyShaders(); 110 | void CreateTargets(); 111 | void DestroyTargets(); 112 | void CreateQueries(); 113 | void CreateConstantBuffer(); 114 | void UpdateRMSE(); 115 | void UpdateTitle(); 116 | void CopyTexture(Vec3* image, ID3D11ShaderResourceView* srcView); 117 | }; 118 | 119 | extern CApp gApp; -------------------------------------------------------------------------------- /bin/GPURealTimeBC6HRelease.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/GPURealTimeBC6HRelease.exe -------------------------------------------------------------------------------- /bin/atrium.dds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/atrium.dds -------------------------------------------------------------------------------- /bin/backyard.dds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/backyard.dds -------------------------------------------------------------------------------- /bin/blit.hlsl: -------------------------------------------------------------------------------- 1 | Texture2D TextureA : register(t0); 2 | Texture2D TextureB : register(t1); 3 | SamplerState PointSampler : register(s0); 4 | 5 | struct PSInput 6 | { 7 | float4 m_pos : SV_POSITION; 8 | }; 9 | 10 | cbuffer MainCB : register(b0) 11 | { 12 | float2 ScreenSizeRcp; 13 | uint2 TextureSizeInBlocks; 14 | float2 TextureSizeRcp; 15 | float2 TexelBias; 16 | float TexelScale; 17 | float Exposure; 18 | uint BlitMode; 19 | }; 20 | 21 | float Luminance(float3 x) 22 | { 23 | float3 luminanceWeights = float3(0.299f, 0.587f, 0.114f); 24 | return dot(x, luminanceWeights); 25 | } 26 | 27 | PSInput VSMain(uint vertexID : SV_VertexID) 28 | { 29 | PSInput output; 30 | 31 | float x = vertexID >> 1; 32 | float y = vertexID & 1; 33 | 34 | output.m_pos = float4(2.0f * x - 1.0f, 2.0f * y - 1.0f, 0.0f, 1.0f); 35 | 36 | return output; 37 | } 38 | 39 | float3 PSMain(PSInput i) : SV_Target 40 | { 41 | float2 uv = (i.m_pos * TexelScale + TexelBias) * TextureSizeRcp; 42 | 43 | float3 a = TextureA.SampleLevel(PointSampler, uv, 0.0f) * Exposure; 44 | float3 b = TextureB.SampleLevel(PointSampler, uv, 0.0f) * Exposure; 45 | float3 delta = log(a + 1.0f) - log(b + 1.0f); 46 | float3 deltaSq = delta * delta * 16.0f; 47 | 48 | if (BlitMode == 0) 49 | { 50 | return a; 51 | } 52 | 53 | if (BlitMode == 1) 54 | { 55 | return b; 56 | } 57 | 58 | if (BlitMode == 2) 59 | { 60 | return deltaSq; 61 | } 62 | 63 | return Luminance(deltaSq); 64 | } 65 | -------------------------------------------------------------------------------- /bin/compress.hlsl: -------------------------------------------------------------------------------- 1 | #pragma warning(disable : 3078) // "loop control variable conflicts with a previous declaration in the outer scope" 2 | 3 | 4 | // Whether to use P2 modes (4 endpoints) for compression. Slow, but improves quality. 5 | #define ENCODE_P2 (QUALITY == 1) 6 | 7 | // Improve quality at small performance loss 8 | #define INSET_COLOR_BBOX 1 9 | #define OPTIMIZE_ENDPOINTS 1 10 | 11 | // Whether to optimize for luminance error or for RGB error 12 | #define LUMINANCE_WEIGHTS 1 13 | 14 | 15 | static const float HALF_MAX = 65504.0f; 16 | static const uint PATTERN_NUM = 32; 17 | 18 | Texture2D SrcTexture : register(t0); 19 | RWTexture2D OutputTexture : register(u0); 20 | SamplerState PointSampler : register(s0); 21 | 22 | cbuffer MainCB : register(b0) 23 | { 24 | float2 ScreenSizeRcp; 25 | uint2 TextureSizeInBlocks; 26 | float2 TextureSizeRcp; 27 | float2 TexelBias; 28 | float TexelScale; 29 | float Exposure; 30 | uint BlitMode; 31 | }; 32 | 33 | float CalcMSLE(float3 a, float3 b) 34 | { 35 | float3 delta = log2((b + 1.0f) / (a + 1.0f)); 36 | float3 deltaSq = delta * delta; 37 | 38 | #if LUMINANCE_WEIGHTS 39 | float3 luminanceWeights = float3(0.299f, 0.587f, 0.114f); 40 | deltaSq *= luminanceWeights; 41 | #endif 42 | 43 | return deltaSq.x + deltaSq.y + deltaSq.z; 44 | } 45 | 46 | uint PatternFixupID(uint i) 47 | { 48 | uint ret = 15; 49 | ret = ((3441033216 >> i) & 0x1) ? 2 : ret; 50 | ret = ((845414400 >> i) & 0x1) ? 8 : ret; 51 | return ret; 52 | } 53 | 54 | uint Pattern(uint p, uint i) 55 | { 56 | uint p2 = p / 2; 57 | uint p3 = p - p2 * 2; 58 | 59 | uint enc = 0; 60 | enc = p2 == 0 ? 2290666700 : enc; 61 | enc = p2 == 1 ? 3972591342 : enc; 62 | enc = p2 == 2 ? 4276930688 : enc; 63 | enc = p2 == 3 ? 3967876808 : enc; 64 | enc = p2 == 4 ? 4293707776 : enc; 65 | enc = p2 == 5 ? 3892379264 : enc; 66 | enc = p2 == 6 ? 4278255592 : enc; 67 | enc = p2 == 7 ? 4026597360 : enc; 68 | enc = p2 == 8 ? 9369360 : enc; 69 | enc = p2 == 9 ? 147747072 : enc; 70 | enc = p2 == 10 ? 1930428556 : enc; 71 | enc = p2 == 11 ? 2362323200 : enc; 72 | enc = p2 == 12 ? 823134348 : enc; 73 | enc = p2 == 13 ? 913073766 : enc; 74 | enc = p2 == 14 ? 267393000 : enc; 75 | enc = p2 == 15 ? 966553998 : enc; 76 | 77 | enc = p3 ? enc >> 16 : enc; 78 | uint ret = (enc >> i) & 0x1; 79 | return ret; 80 | } 81 | 82 | float3 Quantize7(float3 x) 83 | { 84 | return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f); 85 | } 86 | 87 | float3 Quantize9(float3 x) 88 | { 89 | return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f); 90 | } 91 | 92 | float3 Quantize10(float3 x) 93 | { 94 | return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f); 95 | } 96 | 97 | float3 Unquantize7(float3 x) 98 | { 99 | return (x * 65536.0f + 0x8000) / 128.0f; 100 | } 101 | 102 | float3 Unquantize9(float3 x) 103 | { 104 | return (x * 65536.0f + 0x8000) / 512.0f; 105 | } 106 | 107 | float3 Unquantize10(float3 x) 108 | { 109 | return (x * 65536.0f + 0x8000) / 1024.0f; 110 | } 111 | 112 | float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) 113 | { 114 | float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f); 115 | return f16tof32(uint3(comp)); 116 | } 117 | 118 | void Swap(inout float3 a, inout float3 b) 119 | { 120 | float3 tmp = a; 121 | a = b; 122 | b = tmp; 123 | } 124 | 125 | void Swap(inout float a, inout float b) 126 | { 127 | float tmp = a; 128 | a = b; 129 | b = tmp; 130 | } 131 | 132 | uint ComputeIndex3(float texelPos, float endPoint0Pos, float endPoint1Pos) 133 | { 134 | float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos); 135 | return (uint) clamp(r * 6.98182f + 0.00909f + 0.5f, 0.0f, 7.0f); 136 | } 137 | 138 | uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) 139 | { 140 | float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos); 141 | return (uint) clamp(r * 14.93333f + 0.03333f + 0.5f, 0.0f, 15.0f); 142 | } 143 | 144 | void SignExtend(inout float3 v1, uint mask, uint signFlag) 145 | { 146 | int3 v = (int3) v1; 147 | v.x = (v.x & mask) | (v.x < 0 ? signFlag : 0); 148 | v.y = (v.y & mask) | (v.y < 0 ? signFlag : 0); 149 | v.z = (v.z & mask) | (v.z < 0 ? signFlag : 0); 150 | v1 = v; 151 | } 152 | 153 | // Refine endpoints by insetting bounding box in log2 RGB space 154 | void InsetColorBBoxP1(float3 texels[16], inout float3 blockMin, inout float3 blockMax) 155 | { 156 | float3 refinedBlockMin = blockMax; 157 | float3 refinedBlockMax = blockMin; 158 | 159 | for (uint i = 0; i < 16; ++i) 160 | { 161 | refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]); 162 | refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]); 163 | } 164 | 165 | float3 logRefinedBlockMax = log2(refinedBlockMax + 1.0f); 166 | float3 logRefinedBlockMin = log2(refinedBlockMin + 1.0f); 167 | 168 | float3 logBlockMax = log2(blockMax + 1.0f); 169 | float3 logBlockMin = log2(blockMin + 1.0f); 170 | float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f); 171 | 172 | logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt); 173 | logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt); 174 | 175 | blockMin = exp2(logBlockMin) - 1.0f; 176 | blockMax = exp2(logBlockMax) - 1.0f; 177 | } 178 | 179 | // Refine endpoints by insetting bounding box in log2 RGB space 180 | void InsetColorBBoxP2(float3 texels[16], uint pattern, uint patternSelector, inout float3 blockMin, inout float3 blockMax) 181 | { 182 | float3 refinedBlockMin = blockMax; 183 | float3 refinedBlockMax = blockMin; 184 | 185 | for (uint i = 0; i < 16; ++i) 186 | { 187 | uint paletteID = Pattern(pattern, i); 188 | if (paletteID == patternSelector) 189 | { 190 | refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]); 191 | refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]); 192 | } 193 | } 194 | 195 | float3 logRefinedBlockMax = log2(refinedBlockMax + 1.0f); 196 | float3 logRefinedBlockMin = log2(refinedBlockMin + 1.0f); 197 | 198 | float3 logBlockMax = log2(blockMax + 1.0f); 199 | float3 logBlockMin = log2(blockMin + 1.0f); 200 | float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f); 201 | 202 | logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt); 203 | logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt); 204 | 205 | blockMin = exp2(logBlockMin) - 1.0f; 206 | blockMax = exp2(logBlockMax) - 1.0f; 207 | } 208 | 209 | // Least squares optimization to find best endpoints for the selected block indices 210 | void OptimizeEndpointsP1(float3 texels[16], inout float3 blockMin, inout float3 blockMax, in float3 blockMinNonInset, in float3 blockMaxNonInset) 211 | { 212 | float3 blockDir = blockMax - blockMin; 213 | blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z); 214 | 215 | float endPoint0Pos = f32tof16(dot(blockMin, blockDir)); 216 | float endPoint1Pos = f32tof16(dot(blockMax, blockDir)); 217 | 218 | float3 alphaTexelSum = 0.0f; 219 | float3 betaTexelSum = 0.0f; 220 | float alphaBetaSum = 0.0f; 221 | float alphaSqSum = 0.0f; 222 | float betaSqSum = 0.0f; 223 | 224 | for (int i = 0; i < 16; i++) 225 | { 226 | float texelPos = f32tof16(dot(texels[i], blockDir)); 227 | uint texelIndex = ComputeIndex4(texelPos, endPoint0Pos, endPoint1Pos); 228 | 229 | float beta = saturate(texelIndex / 15.0f); 230 | float alpha = 1.0f - beta; 231 | 232 | float3 texelF16 = f32tof16(texels[i].xyz); 233 | alphaTexelSum += alpha * texelF16; 234 | betaTexelSum += beta * texelF16; 235 | 236 | alphaBetaSum += alpha * beta; 237 | 238 | alphaSqSum += alpha * alpha; 239 | betaSqSum += beta * beta; 240 | } 241 | 242 | float det = alphaSqSum * betaSqSum - alphaBetaSum * alphaBetaSum; 243 | 244 | if (abs(det) > 0.00001f) 245 | { 246 | float detRcp = rcp(det); 247 | blockMin = clamp(f16tof32(clamp(detRcp * (alphaTexelSum * betaSqSum - betaTexelSum * alphaBetaSum), 0.0f, HALF_MAX)), blockMinNonInset, blockMaxNonInset); 248 | blockMax = clamp(f16tof32(clamp(detRcp * (betaTexelSum * alphaSqSum - alphaTexelSum * alphaBetaSum), 0.0f, HALF_MAX)), blockMinNonInset, blockMaxNonInset); 249 | } 250 | } 251 | 252 | // Least squares optimization to find best endpoints for the selected block indices 253 | void OptimizeEndpointsP2(float3 texels[16], uint pattern, uint patternSelector, inout float3 blockMin, inout float3 blockMax) 254 | { 255 | float3 blockDir = blockMax - blockMin; 256 | blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z); 257 | 258 | float endPoint0Pos = f32tof16(dot(blockMin, blockDir)); 259 | float endPoint1Pos = f32tof16(dot(blockMax, blockDir)); 260 | 261 | float3 alphaTexelSum = 0.0f; 262 | float3 betaTexelSum = 0.0f; 263 | float alphaBetaSum = 0.0f; 264 | float alphaSqSum = 0.0f; 265 | float betaSqSum = 0.0f; 266 | 267 | for (int i = 0; i < 16; i++) 268 | { 269 | uint paletteID = Pattern(pattern, i); 270 | if (paletteID == patternSelector) 271 | { 272 | float texelPos = f32tof16(dot(texels[i], blockDir)); 273 | uint texelIndex = ComputeIndex3(texelPos, endPoint0Pos, endPoint1Pos); 274 | 275 | float beta = saturate(texelIndex / 7.0f); 276 | float alpha = 1.0f - beta; 277 | 278 | float3 texelF16 = f32tof16(texels[i].xyz); 279 | alphaTexelSum += alpha * texelF16; 280 | betaTexelSum += beta * texelF16; 281 | 282 | alphaBetaSum += alpha * beta; 283 | 284 | alphaSqSum += alpha * alpha; 285 | betaSqSum += beta * beta; 286 | } 287 | } 288 | 289 | float det = alphaSqSum * betaSqSum - alphaBetaSum * alphaBetaSum; 290 | 291 | if (abs(det) > 0.00001f) 292 | { 293 | float detRcp = rcp(det); 294 | blockMin = f16tof32(clamp(detRcp * (alphaTexelSum * betaSqSum - betaTexelSum * alphaBetaSum), 0.0f, HALF_MAX)); 295 | blockMax = f16tof32(clamp(detRcp * (betaTexelSum * alphaSqSum - alphaTexelSum * alphaBetaSum), 0.0f, HALF_MAX)); 296 | } 297 | } 298 | 299 | void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) 300 | { 301 | // compute endpoints (min/max RGB bbox) 302 | float3 blockMin = texels[0]; 303 | float3 blockMax = texels[0]; 304 | for (uint i = 1; i < 16; ++i) 305 | { 306 | blockMin = min(blockMin, texels[i]); 307 | blockMax = max(blockMax, texels[i]); 308 | } 309 | 310 | float3 blockMinNonInset = blockMin; 311 | float3 blockMaxNonInset = blockMax; 312 | #if INSET_COLOR_BBOX 313 | InsetColorBBoxP1(texels, blockMin, blockMax); 314 | #endif 315 | 316 | #if OPTIMIZE_ENDPOINTS 317 | OptimizeEndpointsP1(texels, blockMin, blockMax, blockMinNonInset, blockMaxNonInset); 318 | #endif 319 | 320 | 321 | float3 blockDir = blockMax - blockMin; 322 | blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z); 323 | 324 | float3 endpoint0 = Quantize10(blockMin); 325 | float3 endpoint1 = Quantize10(blockMax); 326 | float endPoint0Pos = f32tof16(dot(blockMin, blockDir)); 327 | float endPoint1Pos = f32tof16(dot(blockMax, blockDir)); 328 | 329 | // check if endpoint swap is required 330 | float fixupTexelPos = f32tof16(dot(texels[0], blockDir)); 331 | uint fixupIndex = ComputeIndex4(fixupTexelPos, endPoint0Pos, endPoint1Pos); 332 | if (fixupIndex > 7) 333 | { 334 | Swap(endPoint0Pos, endPoint1Pos); 335 | Swap(endpoint0, endpoint1); 336 | } 337 | 338 | // compute indices 339 | uint indices[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 340 | for (uint i = 0; i < 16; ++i) 341 | { 342 | float texelPos = f32tof16(dot(texels[i], blockDir)); 343 | indices[i] = ComputeIndex4(texelPos, endPoint0Pos, endPoint1Pos); 344 | } 345 | 346 | // compute compression error (MSLE) 347 | float3 endpoint0Unq = Unquantize10(endpoint0); 348 | float3 endpoint1Unq = Unquantize10(endpoint1); 349 | float msle = 0.0f; 350 | for (uint i = 0; i < 16; ++i) 351 | { 352 | float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f); 353 | float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight); 354 | 355 | msle += CalcMSLE(texels[i], texelUnc); 356 | } 357 | 358 | 359 | // encode block for mode 11 360 | blockMSLE = msle; 361 | block.x = 0x03; 362 | 363 | // endpoints 364 | block.x |= (uint) endpoint0.x << 5; 365 | block.x |= (uint) endpoint0.y << 15; 366 | block.x |= (uint) endpoint0.z << 25; 367 | block.y |= (uint) endpoint0.z >> 7; 368 | block.y |= (uint) endpoint1.x << 3; 369 | block.y |= (uint) endpoint1.y << 13; 370 | block.y |= (uint) endpoint1.z << 23; 371 | block.z |= (uint) endpoint1.z >> 9; 372 | 373 | // indices 374 | block.z |= indices[0] << 1; 375 | block.z |= indices[1] << 4; 376 | block.z |= indices[2] << 8; 377 | block.z |= indices[3] << 12; 378 | block.z |= indices[4] << 16; 379 | block.z |= indices[5] << 20; 380 | block.z |= indices[6] << 24; 381 | block.z |= indices[7] << 28; 382 | block.w |= indices[8] << 0; 383 | block.w |= indices[9] << 4; 384 | block.w |= indices[10] << 8; 385 | block.w |= indices[11] << 12; 386 | block.w |= indices[12] << 16; 387 | block.w |= indices[13] << 20; 388 | block.w |= indices[14] << 24; 389 | block.w |= indices[15] << 28; 390 | } 391 | 392 | float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) 393 | { 394 | float3 w = Point - PointOnLine; 395 | float3 x = w - dot(w, LineDirection) * LineDirection; 396 | return dot(x, x); 397 | } 398 | 399 | // Evaluate how good is given P2 pattern for encoding current block 400 | float EvaluateP2Pattern(int pattern, float3 texels[16]) 401 | { 402 | float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); 403 | float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f); 404 | float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); 405 | float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f); 406 | 407 | for (uint i = 0; i < 16; ++i) 408 | { 409 | uint paletteID = Pattern(pattern, i); 410 | if (paletteID == 0) 411 | { 412 | p0BlockMin = min(p0BlockMin, texels[i]); 413 | p0BlockMax = max(p0BlockMax, texels[i]); 414 | } 415 | else 416 | { 417 | p1BlockMin = min(p1BlockMin, texels[i]); 418 | p1BlockMax = max(p1BlockMax, texels[i]); 419 | } 420 | } 421 | 422 | float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin); 423 | float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin); 424 | 425 | float sqDistanceFromLine = 0.0f; 426 | 427 | for (uint i = 0; i < 16; ++i) 428 | { 429 | uint paletteID = Pattern(pattern, i); 430 | if (paletteID == 0) 431 | { 432 | sqDistanceFromLine += DistToLineSq(p0BlockMin, p0BlockDir, texels[i]); 433 | } 434 | else 435 | { 436 | sqDistanceFromLine += DistToLineSq(p1BlockMin, p1BlockDir, texels[i]); 437 | } 438 | } 439 | 440 | return sqDistanceFromLine; 441 | } 442 | 443 | void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, int pattern, float3 texels[16]) 444 | { 445 | float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); 446 | float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f); 447 | float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); 448 | float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f); 449 | 450 | for (uint i = 0; i < 16; ++i) 451 | { 452 | uint paletteID = Pattern(pattern, i); 453 | if (paletteID == 0) 454 | { 455 | p0BlockMin = min(p0BlockMin, texels[i]); 456 | p0BlockMax = max(p0BlockMax, texels[i]); 457 | } 458 | else 459 | { 460 | p1BlockMin = min(p1BlockMin, texels[i]); 461 | p1BlockMax = max(p1BlockMax, texels[i]); 462 | } 463 | } 464 | 465 | #if INSET_COLOR_BBOX 466 | // Disabled because it was a negligible quality increase 467 | //InsetColorBBoxP2(texels, pattern, 0, p0BlockMin, p0BlockMax); 468 | //InsetColorBBoxP2(texels, pattern, 1, p1BlockMin, p1BlockMax); 469 | #endif 470 | 471 | #if OPTIMIZE_ENDPOINTS 472 | OptimizeEndpointsP2(texels, pattern, 0, p0BlockMin, p0BlockMax); 473 | OptimizeEndpointsP2(texels, pattern, 1, p1BlockMin, p1BlockMax); 474 | #endif 475 | 476 | float3 p0BlockDir = p0BlockMax - p0BlockMin; 477 | float3 p1BlockDir = p1BlockMax - p1BlockMin; 478 | p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z); 479 | p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z); 480 | 481 | 482 | float p0Endpoint0Pos = f32tof16(dot(p0BlockMin, p0BlockDir)); 483 | float p0Endpoint1Pos = f32tof16(dot(p0BlockMax, p0BlockDir)); 484 | float p1Endpoint0Pos = f32tof16(dot(p1BlockMin, p1BlockDir)); 485 | float p1Endpoint1Pos = f32tof16(dot(p1BlockMax, p1BlockDir)); 486 | 487 | 488 | uint fixupID = PatternFixupID(pattern); 489 | float p0FixupTexelPos = f32tof16(dot(texels[0], p0BlockDir)); 490 | float p1FixupTexelPos = f32tof16(dot(texels[fixupID], p1BlockDir)); 491 | uint p0FixupIndex = ComputeIndex3(p0FixupTexelPos, p0Endpoint0Pos, p0Endpoint1Pos); 492 | uint p1FixupIndex = ComputeIndex3(p1FixupTexelPos, p1Endpoint0Pos, p1Endpoint1Pos); 493 | if (p0FixupIndex > 3) 494 | { 495 | Swap(p0Endpoint0Pos, p0Endpoint1Pos); 496 | Swap(p0BlockMin, p0BlockMax); 497 | } 498 | if (p1FixupIndex > 3) 499 | { 500 | Swap(p1Endpoint0Pos, p1Endpoint1Pos); 501 | Swap(p1BlockMin, p1BlockMax); 502 | } 503 | 504 | uint indices[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 505 | for (uint i = 0; i < 16; ++i) 506 | { 507 | float p0TexelPos = f32tof16(dot(texels[i], p0BlockDir)); 508 | float p1TexelPos = f32tof16(dot(texels[i], p1BlockDir)); 509 | uint p0Index = ComputeIndex3(p0TexelPos, p0Endpoint0Pos, p0Endpoint1Pos); 510 | uint p1Index = ComputeIndex3(p1TexelPos, p1Endpoint0Pos, p1Endpoint1Pos); 511 | 512 | uint paletteID = Pattern(pattern, i); 513 | indices[i] = paletteID == 0 ? p0Index : p1Index; 514 | } 515 | 516 | float3 endpoint760 = floor(Quantize7(p0BlockMin)); 517 | float3 endpoint761 = floor(Quantize7(p0BlockMax)); 518 | float3 endpoint762 = floor(Quantize7(p1BlockMin)); 519 | float3 endpoint763 = floor(Quantize7(p1BlockMax)); 520 | 521 | float3 endpoint950 = floor(Quantize9(p0BlockMin)); 522 | float3 endpoint951 = floor(Quantize9(p0BlockMax)); 523 | float3 endpoint952 = floor(Quantize9(p1BlockMin)); 524 | float3 endpoint953 = floor(Quantize9(p1BlockMax)); 525 | 526 | endpoint761 = endpoint761 - endpoint760; 527 | endpoint762 = endpoint762 - endpoint760; 528 | endpoint763 = endpoint763 - endpoint760; 529 | 530 | endpoint951 = endpoint951 - endpoint950; 531 | endpoint952 = endpoint952 - endpoint950; 532 | endpoint953 = endpoint953 - endpoint950; 533 | 534 | int maxVal76 = 0x1F; 535 | endpoint761 = clamp(endpoint761, -maxVal76, maxVal76); 536 | endpoint762 = clamp(endpoint762, -maxVal76, maxVal76); 537 | endpoint763 = clamp(endpoint763, -maxVal76, maxVal76); 538 | 539 | int maxVal95 = 0xF; 540 | endpoint951 = clamp(endpoint951, -maxVal95, maxVal95); 541 | endpoint952 = clamp(endpoint952, -maxVal95, maxVal95); 542 | endpoint953 = clamp(endpoint953, -maxVal95, maxVal95); 543 | 544 | float3 endpoint760Unq = Unquantize7(endpoint760); 545 | float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761); 546 | float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762); 547 | float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763); 548 | float3 endpoint950Unq = Unquantize9(endpoint950); 549 | float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951); 550 | float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952); 551 | float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953); 552 | 553 | float msle76 = 0.0f; 554 | float msle95 = 0.0f; 555 | for (uint i = 0; i < 16; ++i) 556 | { 557 | uint paletteID = Pattern(pattern, i); 558 | 559 | float3 tmp760Unq = paletteID == 0 ? endpoint760Unq : endpoint762Unq; 560 | float3 tmp761Unq = paletteID == 0 ? endpoint761Unq : endpoint763Unq; 561 | float3 tmp950Unq = paletteID == 0 ? endpoint950Unq : endpoint952Unq; 562 | float3 tmp951Unq = paletteID == 0 ? endpoint951Unq : endpoint953Unq; 563 | 564 | float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f); 565 | float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight); 566 | float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight); 567 | 568 | msle76 += CalcMSLE(texels[i], texelUnc76); 569 | msle95 += CalcMSLE(texels[i], texelUnc95); 570 | } 571 | 572 | SignExtend(endpoint761, 0x1F, 0x20); 573 | SignExtend(endpoint762, 0x1F, 0x20); 574 | SignExtend(endpoint763, 0x1F, 0x20); 575 | 576 | SignExtend(endpoint951, 0xF, 0x10); 577 | SignExtend(endpoint952, 0xF, 0x10); 578 | SignExtend(endpoint953, 0xF, 0x10); 579 | 580 | // encode block 581 | float p2MSLE = min(msle76, msle95); 582 | if (p2MSLE < blockMSLE) 583 | { 584 | blockMSLE = p2MSLE; 585 | block = uint4(0, 0, 0, 0); 586 | 587 | if (p2MSLE == msle76) 588 | { 589 | // 7.6 590 | block.x = 0x1; 591 | block.x |= ((uint) endpoint762.y & 0x20) >> 3; 592 | block.x |= ((uint) endpoint763.y & 0x10) >> 1; 593 | block.x |= ((uint) endpoint763.y & 0x20) >> 1; 594 | block.x |= (uint) endpoint760.x << 5; 595 | block.x |= ((uint) endpoint763.z & 0x01) << 12; 596 | block.x |= ((uint) endpoint763.z & 0x02) << 12; 597 | block.x |= ((uint) endpoint762.z & 0x10) << 10; 598 | block.x |= (uint) endpoint760.y << 15; 599 | block.x |= ((uint) endpoint762.z & 0x20) << 17; 600 | block.x |= ((uint) endpoint763.z & 0x04) << 21; 601 | block.x |= ((uint) endpoint762.y & 0x10) << 20; 602 | block.x |= (uint) endpoint760.z << 25; 603 | block.y |= ((uint) endpoint763.z & 0x08) >> 3; 604 | block.y |= ((uint) endpoint763.z & 0x20) >> 4; 605 | block.y |= ((uint) endpoint763.z & 0x10) >> 2; 606 | block.y |= (uint) endpoint761.x << 3; 607 | block.y |= ((uint) endpoint762.y & 0x0F) << 9; 608 | block.y |= (uint) endpoint761.y << 13; 609 | block.y |= ((uint) endpoint763.y & 0x0F) << 19; 610 | block.y |= (uint) endpoint761.z << 23; 611 | block.y |= ((uint) endpoint762.z & 0x07) << 29; 612 | block.z |= ((uint) endpoint762.z & 0x08) >> 3; 613 | block.z |= (uint) endpoint762.x << 1; 614 | block.z |= (uint) endpoint763.x << 7; 615 | } 616 | else 617 | { 618 | // 9.5 619 | block.x = 0xE; 620 | block.x |= (uint) endpoint950.x << 5; 621 | block.x |= ((uint) endpoint952.z & 0x10) << 10; 622 | block.x |= (uint) endpoint950.y << 15; 623 | block.x |= ((uint) endpoint952.y & 0x10) << 20; 624 | block.x |= (uint) endpoint950.z << 25; 625 | block.y |= (uint) endpoint950.z >> 7; 626 | block.y |= ((uint) endpoint953.z & 0x10) >> 2; 627 | block.y |= (uint) endpoint951.x << 3; 628 | block.y |= ((uint) endpoint953.y & 0x10) << 4; 629 | block.y |= ((uint) endpoint952.y & 0x0F) << 9; 630 | block.y |= (uint) endpoint951.y << 13; 631 | block.y |= ((uint) endpoint953.z & 0x01) << 18; 632 | block.y |= ((uint) endpoint953.y & 0x0F) << 19; 633 | block.y |= (uint) endpoint951.z << 23; 634 | block.y |= ((uint) endpoint953.z & 0x02) << 27; 635 | block.y |= (uint) endpoint952.z << 29; 636 | block.z |= ((uint) endpoint952.z & 0x08) >> 3; 637 | block.z |= (uint) endpoint952.x << 1; 638 | block.z |= ((uint) endpoint953.z & 0x04) << 4; 639 | block.z |= (uint) endpoint953.x << 7; 640 | block.z |= ((uint) endpoint953.z & 0x08) << 9; 641 | } 642 | 643 | block.z |= pattern << 13; 644 | uint blockFixupID = PatternFixupID(pattern); 645 | if (blockFixupID == 15) 646 | { 647 | block.z |= indices[0] << 18; 648 | block.z |= indices[1] << 20; 649 | block.z |= indices[2] << 23; 650 | block.z |= indices[3] << 26; 651 | block.z |= indices[4] << 29; 652 | block.w |= indices[5] << 0; 653 | block.w |= indices[6] << 3; 654 | block.w |= indices[7] << 6; 655 | block.w |= indices[8] << 9; 656 | block.w |= indices[9] << 12; 657 | block.w |= indices[10] << 15; 658 | block.w |= indices[11] << 18; 659 | block.w |= indices[12] << 21; 660 | block.w |= indices[13] << 24; 661 | block.w |= indices[14] << 27; 662 | block.w |= indices[15] << 30; 663 | } 664 | else if (blockFixupID == 2) 665 | { 666 | block.z |= indices[0] << 18; 667 | block.z |= indices[1] << 20; 668 | block.z |= indices[2] << 23; 669 | block.z |= indices[3] << 25; 670 | block.z |= indices[4] << 28; 671 | block.z |= indices[5] << 31; 672 | block.w |= indices[5] >> 1; 673 | block.w |= indices[6] << 2; 674 | block.w |= indices[7] << 5; 675 | block.w |= indices[8] << 8; 676 | block.w |= indices[9] << 11; 677 | block.w |= indices[10] << 14; 678 | block.w |= indices[11] << 17; 679 | block.w |= indices[12] << 20; 680 | block.w |= indices[13] << 23; 681 | block.w |= indices[14] << 26; 682 | block.w |= indices[15] << 29; 683 | } 684 | else 685 | { 686 | block.z |= indices[0] << 18; 687 | block.z |= indices[1] << 20; 688 | block.z |= indices[2] << 23; 689 | block.z |= indices[3] << 26; 690 | block.z |= indices[4] << 29; 691 | block.w |= indices[5] << 0; 692 | block.w |= indices[6] << 3; 693 | block.w |= indices[7] << 6; 694 | block.w |= indices[8] << 9; 695 | block.w |= indices[9] << 11; 696 | block.w |= indices[10] << 14; 697 | block.w |= indices[11] << 17; 698 | block.w |= indices[12] << 20; 699 | block.w |= indices[13] << 23; 700 | block.w |= indices[14] << 26; 701 | block.w |= indices[15] << 29; 702 | } 703 | } 704 | } 705 | 706 | [numthreads(8, 8, 1)] 707 | void CSMain(uint3 groupID : SV_GroupID, 708 | uint3 dispatchThreadID : SV_DispatchThreadID, 709 | uint3 groupThreadID : SV_GroupThreadID) 710 | { 711 | uint2 blockCoord = dispatchThreadID.xy; 712 | 713 | if (all(blockCoord < TextureSizeInBlocks)) 714 | { 715 | // Gather texels for current 4x4 block 716 | // 0 1 2 3 717 | // 4 5 6 7 718 | // 8 9 10 11 719 | // 12 13 14 15 720 | float2 uv = blockCoord * TextureSizeRcp * 4.0f + TextureSizeRcp; 721 | float2 block0UV = uv; 722 | float2 block1UV = uv + float2(2.0f * TextureSizeRcp.x, 0.0f); 723 | float2 block2UV = uv + float2(0.0f, 2.0f * TextureSizeRcp.y); 724 | float2 block3UV = uv + float2(2.0f * TextureSizeRcp.x, 2.0f * TextureSizeRcp.y); 725 | float4 block0X = SrcTexture.GatherRed(PointSampler, block0UV); 726 | float4 block1X = SrcTexture.GatherRed(PointSampler, block1UV); 727 | float4 block2X = SrcTexture.GatherRed(PointSampler, block2UV); 728 | float4 block3X = SrcTexture.GatherRed(PointSampler, block3UV); 729 | float4 block0Y = SrcTexture.GatherGreen(PointSampler, block0UV); 730 | float4 block1Y = SrcTexture.GatherGreen(PointSampler, block1UV); 731 | float4 block2Y = SrcTexture.GatherGreen(PointSampler, block2UV); 732 | float4 block3Y = SrcTexture.GatherGreen(PointSampler, block3UV); 733 | float4 block0Z = SrcTexture.GatherBlue(PointSampler, block0UV); 734 | float4 block1Z = SrcTexture.GatherBlue(PointSampler, block1UV); 735 | float4 block2Z = SrcTexture.GatherBlue(PointSampler, block2UV); 736 | float4 block3Z = SrcTexture.GatherBlue(PointSampler, block3UV); 737 | 738 | float3 texels[16]; 739 | texels[0] = float3(block0X.w, block0Y.w, block0Z.w); 740 | texels[1] = float3(block0X.z, block0Y.z, block0Z.z); 741 | texels[2] = float3(block1X.w, block1Y.w, block1Z.w); 742 | texels[3] = float3(block1X.z, block1Y.z, block1Z.z); 743 | texels[4] = float3(block0X.x, block0Y.x, block0Z.x); 744 | texels[5] = float3(block0X.y, block0Y.y, block0Z.y); 745 | texels[6] = float3(block1X.x, block1Y.x, block1Z.x); 746 | texels[7] = float3(block1X.y, block1Y.y, block1Z.y); 747 | texels[8] = float3(block2X.w, block2Y.w, block2Z.w); 748 | texels[9] = float3(block2X.z, block2Y.z, block2Z.z); 749 | texels[10] = float3(block3X.w, block3Y.w, block3Z.w); 750 | texels[11] = float3(block3X.z, block3Y.z, block3Z.z); 751 | texels[12] = float3(block2X.x, block2Y.x, block2Z.x); 752 | texels[13] = float3(block2X.y, block2Y.y, block2Z.y); 753 | texels[14] = float3(block3X.x, block3Y.x, block3Z.x); 754 | texels[15] = float3(block3X.y, block3Y.y, block3Z.y); 755 | 756 | uint4 block = uint4(0, 0, 0, 0); 757 | float blockMSLE = 0.0f; 758 | 759 | EncodeP1(block, blockMSLE, texels); 760 | 761 | #if ENCODE_P2 762 | // First find pattern which is a best fit for a current block 763 | float bestScore = EvaluateP2Pattern(0, texels); 764 | uint bestPattern = 0; 765 | 766 | for (uint patternIndex = 1; patternIndex < 32; ++patternIndex) 767 | { 768 | float score = EvaluateP2Pattern(patternIndex, texels); 769 | if (score < bestScore) 770 | { 771 | bestPattern = patternIndex; 772 | bestScore = score; 773 | } 774 | } 775 | 776 | // Then encode it 777 | EncodeP2Pattern(block, blockMSLE, bestPattern, texels); 778 | #endif 779 | 780 | OutputTexture[blockCoord] = block; 781 | } 782 | } -------------------------------------------------------------------------------- /bin/d3dcompiler_47.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/d3dcompiler_47.dll -------------------------------------------------------------------------------- /bin/desk.dds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/desk.dds -------------------------------------------------------------------------------- /bin/memorial.dds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/memorial.dds -------------------------------------------------------------------------------- /bin/yucca.dds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/yucca.dds -------------------------------------------------------------------------------- /dds.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "dds.h" 3 | 4 | unsigned const DDS_MAGIC = 0x20534444; // "DDS " 5 | 6 | #define DDS_HEADER_FLAGS_TEXTURE 0x00001007 // DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT 7 | #define DDS_HEADER_FLAGS_MIPMAP 0x00020000 // DDSD_MIPMAPCOUNT 8 | #define DDS_HEADER_FLAGS_VOLUME 0x00800000 // DDSD_DEPTH 9 | #define DDS_HEADER_FLAGS_PITCH 0x00000008 // DDSD_PITCH 10 | #define DDS_HEADER_FLAGS_LINEARSIZE 0x00080000 // DDSD_LINEARSIZE 11 | 12 | #define DDS_SURFACE_FLAGS_TEXTURE 0x00001000 // DDSCAPS_TEXTURE 13 | #define DDS_SURFACE_FLAGS_MIPMAP 0x00400008 // DDSCAPS_COMPLEX | DDSCAPS_MIPMAP 14 | #define DDS_SURFACE_FLAGS_CUBEMAP 0x00000008 // DDSCAPS_COMPLEX 15 | 16 | #define DDS_FOURCC 0x00000004 // DDPF_FOURCC 17 | #define DDS_RGB 0x00000040 // DDPF_RGB 18 | #define DDS_RGBA 0x00000041 // DDPF_RGB | DDPF_ALPHAPIXELS 19 | #define DDS_ALPHA 0x00000002 // DDPF_ALPHA 20 | #define DDS_LUM 0x00020000 // DDPF_LUM 21 | 22 | struct DDS_PIXELFORMAT 23 | { 24 | uint32_t dwSize; 25 | uint32_t dwFlags; 26 | uint32_t dwFourCC; 27 | uint32_t dwRGBBitCount; 28 | uint32_t dwRBitMask; 29 | uint32_t dwGBitMask; 30 | uint32_t dwBBitMask; 31 | uint32_t dwABitMask; 32 | }; 33 | 34 | #ifndef MAKEFOURCC 35 | # define MAKEFOURCC(ch0, ch1, ch2, ch3) ((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | ((uint32_t)(uint8_t)(ch2) << 16) | ((uint32_t)(uint8_t)(ch3) << 24 )) 36 | #endif 37 | 38 | DDS_PIXELFORMAT const DDSPF_DX10 = { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('D','X','1','0'), 0, 0, 0, 0, 0 }; 39 | DDS_PIXELFORMAT const DDSPF_R16G16B16A16_FLOAT = { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, 113, 0, 0, 0, 0, 0 }; 40 | DDS_PIXELFORMAT const DDSPF_BC6H = { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, 808540228, 0, 0, 0, 0, 0 }; 41 | 42 | struct DDS_HEADER 43 | { 44 | uint32_t dwMagic; 45 | uint32_t dwSize; 46 | uint32_t dwFlags; 47 | uint32_t dwHeight; 48 | uint32_t dwWidth; 49 | uint32_t dwPitchOrLinearSize; 50 | uint32_t dwDepth; 51 | uint32_t dwMipMapCount; 52 | uint32_t dwReserved1[11]; 53 | DDS_PIXELFORMAT ddspf; 54 | uint32_t dwSurfaceFlags; 55 | uint32_t dwCubemapFlags; 56 | uint32_t dwReserved2[3]; 57 | }; 58 | 59 | struct DDS_HEADER_DXT10 60 | { 61 | uint32_t dxgiFormat; 62 | uint32_t resourceDimension; 63 | uint32_t miscFlag; 64 | uint32_t arraySize; 65 | uint32_t reserved; 66 | }; 67 | 68 | bool DDS::LoadA16B16G16R16F(char const* filename, SImage& img) 69 | { 70 | img.m_width = 0; 71 | img.m_height = 0; 72 | img.m_data = nullptr; 73 | img.m_dataSize = 0; 74 | 75 | FILE* f = nullptr; 76 | fopen_s(&f, filename, "rb"); 77 | if (!f) 78 | { 79 | return false; 80 | } 81 | 82 | DDS_HEADER hdr; 83 | fread(&hdr, sizeof(hdr), 1, f); 84 | 85 | if (hdr.dwMagic == DDS_MAGIC && memcmp(&hdr.ddspf, &DDSPF_R16G16B16A16_FLOAT, sizeof(hdr.ddspf)) == 0) 86 | { 87 | img.m_dataSize = hdr.dwWidth * hdr.dwHeight * 8; 88 | img.m_data = new uint8_t[img.m_dataSize]; 89 | img.m_width = hdr.dwWidth; 90 | img.m_height = hdr.dwHeight; 91 | fread(img.m_data, img.m_dataSize, 1, f); 92 | fclose(f); 93 | return true; 94 | } 95 | 96 | fclose(f); 97 | return false; 98 | } 99 | 100 | bool DDS::LoadBC6H(char const* filename, SImage& img) 101 | { 102 | img.m_width = 0; 103 | img.m_height = 0; 104 | img.m_data = nullptr; 105 | img.m_dataSize = 0; 106 | 107 | FILE* f = nullptr; 108 | fopen_s(&f, filename, "rb"); 109 | if (!f) 110 | { 111 | return false; 112 | } 113 | 114 | DDS_HEADER hdr; 115 | fread(&hdr, sizeof(hdr), 1, f); 116 | 117 | if (hdr.dwMagic == DDS_MAGIC && memcmp(&hdr.ddspf, &DDSPF_DX10, sizeof(hdr.ddspf)) == 0) 118 | { 119 | DDS_HEADER_DXT10 hdrDX10; 120 | fread(&hdrDX10, sizeof(hdrDX10), 1, f); 121 | 122 | img.m_dataSize = hdr.dwWidth * hdr.dwHeight; 123 | img.m_data = new uint8_t[img.m_dataSize]; 124 | img.m_width = hdr.dwWidth; 125 | img.m_height = hdr.dwHeight; 126 | fread(img.m_data, img.m_dataSize, 1, f); 127 | fclose(f); 128 | return true; 129 | } 130 | 131 | fclose(f); 132 | return false; 133 | } -------------------------------------------------------------------------------- /dds.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | struct SImage 4 | { 5 | unsigned m_width; 6 | unsigned m_height; 7 | uint8_t* m_data; 8 | unsigned m_dataSize; 9 | }; 10 | 11 | namespace DDS 12 | { 13 | bool LoadA16B16G16R16F(char const* filename, SImage& img); 14 | bool LoadBC6H(char const* filename, SImage& img); 15 | } -------------------------------------------------------------------------------- /stdafx.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" -------------------------------------------------------------------------------- /stdafx.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define WIN32_LEAN_AND_MEAN 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include -------------------------------------------------------------------------------- /winmain.cpp: -------------------------------------------------------------------------------- 1 | #include "stdafx.h" 2 | #include "app.h" 3 | 4 | bool gDestroy = false; 5 | bool gActive = true; 6 | HWND gWndHandle; 7 | 8 | LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) 9 | { 10 | switch (msg) 11 | { 12 | case WM_DESTROY: 13 | gDestroy = true; 14 | break; 15 | 16 | case WM_ACTIVATE: 17 | gActive = wParam != WA_INACTIVE; 18 | break; 19 | 20 | case WM_KEYDOWN: 21 | gApp.OnKeyDown(wParam); 22 | break; 23 | 24 | case WM_LBUTTONDOWN: 25 | gApp.OnLButtonDown(LOWORD(lParam), HIWORD(lParam)); 26 | break; 27 | 28 | case WM_LBUTTONUP: 29 | gApp.OnLButtonUp(LOWORD(lParam), HIWORD(lParam)); 30 | break; 31 | 32 | case WM_MOUSEMOVE: 33 | gApp.OnMouseMove(LOWORD(lParam), HIWORD(lParam)); 34 | break; 35 | 36 | case WM_MOUSEWHEEL: 37 | gApp.OnMouseWheel(GET_WHEEL_DELTA_WPARAM(wParam)); 38 | break; 39 | 40 | case WM_SIZE: 41 | gApp.OnResize(); 42 | break; 43 | } 44 | 45 | return DefWindowProc(hWnd, msg, wParam, lParam); 46 | } 47 | 48 | void MainLoop(HINSTANCE hInst) 49 | { 50 | MSG Msg; 51 | while (!gDestroy) 52 | { 53 | if (PeekMessage(&Msg, NULL, 0, 0, PM_NOREMOVE)) 54 | { 55 | if (!GetMessage(&Msg, NULL, 0, 0)) 56 | return; 57 | 58 | TranslateMessage(&Msg); 59 | DispatchMessage(&Msg); 60 | } 61 | else 62 | { 63 | if (gActive) 64 | { 65 | gApp.Render(); 66 | Sleep(1); 67 | } 68 | } 69 | } 70 | } 71 | 72 | INT WINAPI WinMain(HINSTANCE hInst, HINSTANCE, LPSTR lpCmdLine, INT) 73 | { 74 | wchar_t const* appName = L"rt_bc6h_encoder_gpu"; 75 | WNDCLASSEX wc = { sizeof(WNDCLASSEX), 0, MsgProc, 0L, 0L, GetModuleHandle(NULL), NULL, NULL, NULL, NULL, appName, NULL }; 76 | 77 | RegisterClassEx(&wc); 78 | 79 | DWORD const dwStyle = WS_SYSMENU | WS_MAXIMIZEBOX | WS_MINIMIZEBOX | WS_SIZEBOX; 80 | RECT rcWindowSize; 81 | SetRect(&rcWindowSize, 0, 0, 1280, 720); 82 | AdjustWindowRect(&rcWindowSize, dwStyle, FALSE); 83 | 84 | RECT rcDesktop; 85 | GetClientRect(GetDesktopWindow(), &rcDesktop); 86 | 87 | if (rcWindowSize.bottom < rcDesktop.bottom) 88 | { 89 | rcWindowSize.bottom -= rcWindowSize.top; 90 | rcWindowSize.top = 0; 91 | } 92 | 93 | if (rcWindowSize.right < rcDesktop.right) 94 | { 95 | int iTranslate = (rcDesktop.right - (rcWindowSize.right - rcWindowSize.left)) / 2; 96 | rcWindowSize.left += iTranslate; 97 | rcWindowSize.right += iTranslate; 98 | } 99 | 100 | gWndHandle = CreateWindow(appName, appName, dwStyle, rcWindowSize.left, rcWindowSize.top, 101 | rcWindowSize.right - rcWindowSize.left, rcWindowSize.bottom - rcWindowSize.top, 102 | GetDesktopWindow(), nullptr, wc.hInstance, nullptr); 103 | 104 | gApp.Init(gWndHandle); 105 | ShowWindow(gWndHandle, SW_SHOWDEFAULT); 106 | UpdateWindow(gWndHandle); 107 | 108 | MainLoop(hInst); 109 | 110 | UnregisterClass(appName, wc.hInstance); 111 | gApp.Release(); 112 | return 0; 113 | } --------------------------------------------------------------------------------