├── .gitattributes
├── .gitignore
├── GPURealTimeBC6H.sln
├── GPURealTimeBC6H.vcxproj
├── LICENSE
├── README.md
├── app.cpp
├── app.h
├── bin
├── GPURealTimeBC6HRelease.exe
├── atrium.dds
├── backyard.dds
├── blit.hlsl
├── compress.hlsl
├── d3dcompiler_47.dll
├── desk.dds
├── memorial.dds
└── yucca.dds
├── dds.cpp
├── dds.h
├── stdafx.cpp
├── stdafx.h
└── winmain.cpp
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 |
7 | # Standard to msysgit
8 | *.doc diff=astextplain
9 | *.DOC diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot diff=astextplain
13 | *.DOT diff=astextplain
14 | *.pdf diff=astextplain
15 | *.PDF diff=astextplain
16 | *.rtf diff=astextplain
17 | *.RTF diff=astextplain
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vs
2 | Debug
3 | Release
4 | GPURealTimeBC6HDebug.exe
5 | *.ilk
6 | *.pdb
7 | *.iobj
8 | *.ipdb
9 | *.vcxproj.user
--------------------------------------------------------------------------------
/GPURealTimeBC6H.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 2013
4 | VisualStudioVersion = 12.0.31101.0
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GPURealTimeBC6H", "GPURealTimeBC6H.vcxproj", "{B51BC917-ED36-4511-9EB1-835F1EBC8B19}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Win32 = Debug|Win32
11 | Release|Win32 = Release|Win32
12 | EndGlobalSection
13 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
14 | {B51BC917-ED36-4511-9EB1-835F1EBC8B19}.Debug|Win32.ActiveCfg = Debug|Win32
15 | {B51BC917-ED36-4511-9EB1-835F1EBC8B19}.Debug|Win32.Build.0 = Debug|Win32
16 | {B51BC917-ED36-4511-9EB1-835F1EBC8B19}.Release|Win32.ActiveCfg = Release|Win32
17 | {B51BC917-ED36-4511-9EB1-835F1EBC8B19}.Release|Win32.Build.0 = Release|Win32
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | EndGlobal
23 |
--------------------------------------------------------------------------------
/GPURealTimeBC6H.vcxproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Debug
6 | Win32
7 |
8 |
9 | Release
10 | Win32
11 |
12 |
13 |
14 | {B51BC917-ED36-4511-9EB1-835F1EBC8B19}
15 | Win32Proj
16 | GPURealTimeBC6H
17 | 10.0.17763.0
18 |
19 |
20 |
21 | Application
22 | true
23 | v141
24 | Unicode
25 |
26 |
27 | Application
28 | false
29 | v141
30 | true
31 | Unicode
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 | true
45 | bin\
46 | $(ProjectName)$(Configuration)
47 |
48 |
49 | false
50 | bin\
51 | $(ProjectName)$(Configuration)
52 |
53 |
54 |
55 | Use
56 | Level3
57 | Disabled
58 | WIN32;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)
59 |
60 |
61 | Windows
62 | true
63 | kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);d3d11.lib;d3dcompiler.lib
64 |
65 |
66 |
67 |
68 | Level3
69 | Use
70 | MaxSpeed
71 | true
72 | true
73 | WIN32;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)
74 |
75 |
76 | Windows
77 | true
78 | true
79 | true
80 | kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);d3d11.lib;d3dcompiler.lib
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 | Create
93 | Create
94 |
95 |
96 |
97 |
98 |
99 | true
100 | true
101 |
102 |
103 | true
104 | true
105 |
106 |
107 |
108 |
109 |
110 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2015 Krzysztof Narkowicz
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | GPURealTimeBC6H
2 | =======
3 |
4 | Real-time BC6H compressor which runs on a GPU. Includes a small testbed application. This compressor is used in a few released AA/AAA games.
5 |
6 | Compressor has two presets:
7 | * "Fast" - compresses a standard 256x256x6 cubemap in 0.02ms on NV P4000 (GPU perf in between NV GTX 1060 and NV GTX 1070). Compression quality is comparable to fast presets of offline compressors.
8 | * "Quality" - compresses a standard 256x256x6 cubemap in 0.528ms on on NV P4000. Compression quality is comparable to normal presets of offline compressors.
9 |
10 | How It Works
11 | ===
12 | BC6H is a pretty complex format with multiple possible block modes. To prune search space a bit one mode was selected for the fast compression setting(mode 11) and two for the quality compression setting (mode 2 and mode 6) which proved to have the best trade off between quality and performance.
13 |
14 | Fast mode is based on computing a color bounding box ("Real-Time DXT Compression" by J.M.P. van Waveren, 2006), then ordering colors by a diagonal of this bounding box and using least square fit to find optimal endpoints ("High Quality DXT Compression using CUDA" by Ignacio Castaño, 2007). All computations are made in log2 space in order to optimize for perceptual error (after all resulting image will be tone mapped).
15 |
16 | Quality mode has two passes. First pass searches for a best partition by computing color bounding box per partition and computing error as distance of every texel from that line. In the second pass best partition is encoded using similar approach as the fast mode.
17 |
18 | Quality
19 | ===
20 | Quality compared using RMSLE (lower is better).
21 |
22 | | | GPU Real-Time BC6H "Fast" | GPU Real-Time BC6H "Quality" | Intel "Very fast" | Intel "Fast" | Intel "Basic" | Intel "Slow" | Intel "Very slow" | DirectXTex
23 | | ------- | ------------------------- | ----------------------------- | ----------------- | ------------ | ------------- | ------------ | ----------------- | ----------
24 | | Atrium | 0.0074 | 0.0066 | 0.0080 | 0.0069 | 0.0067 | 0.0067 | 0.0067 | 0.0079
25 | | Backyard | 0.0073 | 0.0070 | 0.0072 | 0.0067 | 0.0065 | 0.0065 | 0.0065 | 0.0075
26 | | Desk | 0.0447 | 0.0328 | 0.0470 | 0.0307 | 0.0298 | 0.0294 | 0.0293 | 0.0413
27 | | Memorial | 0.0158 | 0.0126 | 0.0192 | 0.0135 | 0.0133 | 0.0132 | 0.0131 | 0.0243
28 | | Yucca | 0.0168 | 0.0123 | 0.0145 | 0.0108 | 0.0105 | 0.0103 | 0.0103 | 0.0124
29 |
30 | License
31 | ===
32 | This work is dual-licensed under either public domain or MIT.
33 |
--------------------------------------------------------------------------------
/app.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "app.h"
3 | #include "dds.h"
4 |
5 | #define SAFE_RELEASE( x ) { if ( x ) { x->Release(); x = nullptr; } }
6 |
7 | CApp gApp;
8 |
9 | char const* ImagePathArr[] = { "atrium.dds", "backyard.dds", "desk.dds", "memorial.dds", "yucca.dds" };
10 | const uint32_t BC_BLOCK_SIZE = 4;
11 |
12 | struct SShaderCB
13 | {
14 | Vec2 m_screenSizeRcp;
15 | unsigned m_textureSizeInBlocks[2];
16 |
17 | Vec2 m_imageSizeRcp;
18 | Vec2 m_texelBias;
19 |
20 | float m_texelScale;
21 | float m_exposure;
22 | uint32_t m_blitMode;
23 | uint32_t m_padding;
24 | };
25 |
26 | // https://gist.github.com/rygorous/2144712
27 | static float HalfToFloat(uint16_t h)
28 | {
29 | union FP32
30 | {
31 | uint32_t u;
32 | float f;
33 | struct
34 | {
35 | unsigned Mantissa : 23;
36 | unsigned Exponent : 8;
37 | unsigned Sign : 1;
38 | };
39 | };
40 |
41 | static const FP32 magic = { (254 - 15) << 23 };
42 | static const FP32 was_infnan = { (127 + 16) << 23 };
43 |
44 | FP32 o;
45 | o.u = (h & 0x7fff) << 13; // exponent/mantissa bits
46 | o.f *= magic.f; // exponent adjust
47 | if (o.f >= was_infnan.f) // make sure Inf/NaN survive
48 | o.u |= 255 << 23;
49 | o.u |= (h & 0x8000) << 16; // sign bit
50 | return o.f;
51 | }
52 |
53 | uint32_t DivideAndRoundUp(uint32_t x, uint32_t divisor)
54 | {
55 | return (x + divisor - 1) / divisor;
56 | }
57 |
58 | CApp::CApp()
59 | {
60 | }
61 |
62 | CApp::~CApp()
63 | {
64 | DestoryImage();
65 | DestroyTargets();
66 | DestroyShaders();
67 | SAFE_RELEASE(m_ctx);
68 | SAFE_RELEASE(m_swapChain);
69 | SAFE_RELEASE(m_device);
70 | }
71 |
72 | bool CApp::Init(HWND windowHandle)
73 | {
74 | m_windowHandle = windowHandle;
75 |
76 | RECT clientRect;
77 | GetClientRect(windowHandle, &clientRect);
78 | m_backbufferWidth = clientRect.right - clientRect.left;
79 | m_backbufferHeight = clientRect.bottom - clientRect.top;
80 |
81 | D3D_FEATURE_LEVEL featureLevels[] = { D3D_FEATURE_LEVEL_11_0 };
82 | D3D_FEATURE_LEVEL retFeatureLevel;
83 |
84 | DXGI_SWAP_CHAIN_DESC swapDesc;
85 | ZeroMemory(&swapDesc, sizeof(swapDesc));
86 | swapDesc.BufferDesc.Width = m_backbufferWidth;
87 | swapDesc.BufferDesc.Height = m_backbufferHeight;
88 | swapDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
89 | swapDesc.BufferDesc.RefreshRate.Numerator = 60;
90 | swapDesc.BufferDesc.RefreshRate.Denominator = 1;
91 | swapDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
92 | swapDesc.BufferCount = 2;
93 | swapDesc.OutputWindow = windowHandle;
94 | swapDesc.SampleDesc.Count = 1;
95 | swapDesc.SampleDesc.Quality = 0;
96 | swapDesc.Windowed = true;
97 |
98 | unsigned flags = 0;
99 | #ifdef _DEBUG
100 | flags |= D3D11_CREATE_DEVICE_DEBUG;
101 | #endif
102 |
103 | HRESULT res;
104 | res = D3D11CreateDeviceAndSwapChain(nullptr, D3D_DRIVER_TYPE_HARDWARE, 0, flags, featureLevels, ARRAYSIZE(featureLevels), D3D11_SDK_VERSION, &swapDesc, &m_swapChain, &m_device, &retFeatureLevel, &m_ctx);
105 | _ASSERT(SUCCEEDED(res));
106 |
107 | ID3D11Texture2D* backBuffer = NULL;
108 | res = m_swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID*)&backBuffer);
109 | _ASSERT(SUCCEEDED(res));
110 |
111 | res = m_device->CreateRenderTargetView(backBuffer, nullptr, &m_backBufferView);
112 | _ASSERT(SUCCEEDED(res));
113 | backBuffer->Release();
114 |
115 | CreateImage();
116 | CreateShaders();
117 | CreateTargets();
118 | CreateQueries();
119 | CreateConstantBuffer();
120 |
121 | HRESULT hr;
122 | D3D11_SAMPLER_DESC samplerDesc =
123 | {
124 | D3D11_FILTER_MIN_MAG_MIP_POINT,
125 | D3D11_TEXTURE_ADDRESS_BORDER,
126 | D3D11_TEXTURE_ADDRESS_BORDER,
127 | D3D11_TEXTURE_ADDRESS_BORDER,
128 | 0.0f,
129 | 1,
130 | D3D11_COMPARISON_ALWAYS,
131 | 0.0f,
132 | 0.0f,
133 | 0.0f,
134 | 0.0f,
135 | 0.0f,
136 | D3D11_FLOAT32_MAX
137 | };
138 | hr = m_device->CreateSamplerState(&samplerDesc, &m_pointSampler);
139 | _ASSERT(SUCCEEDED(hr));
140 |
141 | D3D11_BUFFER_DESC bd;
142 | ZeroMemory(&bd, sizeof(bd));
143 | bd.Usage = D3D11_USAGE_DEFAULT;
144 | bd.ByteWidth = sizeof(uint16_t) * 4;
145 | bd.BindFlags = D3D11_BIND_INDEX_BUFFER;
146 | bd.CPUAccessFlags = 0;
147 |
148 | uint16_t indices[] = { 0, 1, 2, 3 };
149 | D3D11_SUBRESOURCE_DATA initData;
150 | ZeroMemory(&initData, sizeof(initData));
151 | initData.pSysMem = indices;
152 |
153 | hr = m_device->CreateBuffer(&bd, &initData, &m_ib);
154 | _ASSERT(SUCCEEDED(hr));
155 |
156 | return true;
157 | }
158 |
159 | void CApp::CreateTargets()
160 | {
161 | D3D11_TEXTURE2D_DESC texDesc;
162 | texDesc.Width = DivideAndRoundUp(m_imageWidth, BC_BLOCK_SIZE);
163 | texDesc.Height = DivideAndRoundUp(m_imageHeight, BC_BLOCK_SIZE);
164 | texDesc.MipLevels = 1;
165 | texDesc.ArraySize = 1;
166 | texDesc.Format = DXGI_FORMAT_R32G32B32A32_UINT;
167 | texDesc.SampleDesc.Count = 1;
168 | texDesc.SampleDesc.Quality = 0;
169 | texDesc.Usage = D3D11_USAGE_DEFAULT;
170 | texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
171 | texDesc.CPUAccessFlags = 0;
172 | texDesc.MiscFlags = 0;
173 | HRESULT hr = m_device->CreateTexture2D(&texDesc, nullptr, &m_compressTargetRes);
174 | _ASSERT(SUCCEEDED(hr));
175 |
176 | hr = m_device->CreateUnorderedAccessView(m_compressTargetRes, nullptr, &m_compressTargetUAV);
177 | _ASSERT(SUCCEEDED(hr));
178 |
179 | texDesc.Width = m_imageWidth;
180 | texDesc.Height = m_imageHeight;
181 | texDesc.MipLevels = 1;
182 | texDesc.ArraySize = 1;
183 | texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;
184 | texDesc.SampleDesc.Count = 1;
185 | texDesc.SampleDesc.Quality = 0;
186 | texDesc.Usage = D3D11_USAGE_DEFAULT;
187 | texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE;
188 | texDesc.CPUAccessFlags = 0;
189 | texDesc.MiscFlags = 0;
190 | hr = m_device->CreateTexture2D(&texDesc, nullptr, &m_tmpTargetRes);
191 | _ASSERT(SUCCEEDED(hr));
192 |
193 | hr = m_device->CreateRenderTargetView(m_tmpTargetRes, nullptr, &m_tmpTargetView);
194 | _ASSERT(SUCCEEDED(hr));
195 |
196 | texDesc.Width = m_imageWidth;
197 | texDesc.Height = m_imageHeight;
198 | texDesc.MipLevels = 1;
199 | texDesc.ArraySize = 1;
200 | texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;
201 | texDesc.SampleDesc.Count = 1;
202 | texDesc.SampleDesc.Quality = 0;
203 | texDesc.Usage = D3D11_USAGE_STAGING;
204 | texDesc.BindFlags = 0;
205 | texDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
206 | texDesc.MiscFlags = 0;
207 | hr = m_device->CreateTexture2D(&texDesc, nullptr, &m_tmpStagingRes);
208 | _ASSERT(SUCCEEDED(hr));
209 | }
210 |
211 | void CApp::DestroyTargets()
212 | {
213 | SAFE_RELEASE(m_compressTargetUAV);
214 | SAFE_RELEASE(m_compressTargetRes);
215 | SAFE_RELEASE(m_tmpTargetView);
216 | SAFE_RELEASE(m_tmpTargetRes);
217 | SAFE_RELEASE(m_tmpStagingRes);
218 | }
219 |
220 | void CApp::CreateQueries()
221 | {
222 | D3D11_QUERY_DESC queryDesc;
223 | queryDesc.MiscFlags = 0;
224 |
225 | for (unsigned i = 0; i < MAX_QUERY_FRAME_NUM; ++i)
226 | {
227 | queryDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
228 | m_device->CreateQuery(&queryDesc, &m_disjointQueries[i]);
229 |
230 | queryDesc.Query = D3D11_QUERY_TIMESTAMP;
231 | m_device->CreateQuery(&queryDesc, &m_timeBeginQueries[i]);
232 | m_device->CreateQuery(&queryDesc, &m_timeEndQueries[i]);
233 | }
234 | }
235 |
236 | void CApp::CreateConstantBuffer()
237 | {
238 | D3D11_BUFFER_DESC desc;
239 | ZeroMemory(&desc, sizeof(desc));
240 | desc.Usage = D3D11_USAGE_DYNAMIC;
241 | desc.ByteWidth = sizeof(SShaderCB);
242 | desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
243 | desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
244 |
245 | m_device->CreateBuffer(&desc, nullptr, &m_constantBuffer);
246 | }
247 |
248 | void CApp::CreateImage()
249 | {
250 | SImage img;
251 | DDS::LoadA16B16G16R16F(ImagePathArr[m_imageID], img);
252 |
253 | m_imageWidth = img.m_width;
254 | m_imageHeight = img.m_height;
255 |
256 | D3D11_SUBRESOURCE_DATA initialData;
257 | initialData.pSysMem = img.m_data;
258 | initialData.SysMemPitch = img.m_width * 4 * 2;
259 | initialData.SysMemSlicePitch = 0;
260 |
261 | D3D11_TEXTURE2D_DESC desc;
262 | ZeroMemory(&desc, sizeof(desc));
263 | desc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;
264 | desc.Width = img.m_width;
265 | desc.Height = img.m_height;
266 | desc.MipLevels = 1;
267 | desc.ArraySize = 1;
268 | desc.Usage = D3D11_USAGE_IMMUTABLE;
269 | desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
270 | desc.SampleDesc.Count = 1;
271 | desc.SampleDesc.Quality = 0;
272 | HRESULT hr = m_device->CreateTexture2D(&desc, &initialData, &m_sourceTextureRes);
273 | _ASSERT(SUCCEEDED(hr));
274 |
275 | D3D11_SHADER_RESOURCE_VIEW_DESC resViewDesc;
276 | resViewDesc.Format = desc.Format;
277 | resViewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
278 | resViewDesc.Texture2D.MostDetailedMip = 0;
279 | resViewDesc.Texture2D.MipLevels = desc.MipLevels;
280 | hr = m_device->CreateShaderResourceView(m_sourceTextureRes, &resViewDesc, &m_sourceTextureView);
281 | _ASSERT(SUCCEEDED(hr));
282 |
283 | desc.Format = DXGI_FORMAT_BC6H_UF16;
284 | desc.Usage = D3D11_USAGE_DEFAULT;
285 | hr = m_device->CreateTexture2D(&desc, nullptr, &m_compressedTextureRes);
286 | _ASSERT(SUCCEEDED(hr));
287 |
288 | resViewDesc.Format = desc.Format;
289 | resViewDesc.Texture2D.MostDetailedMip = 0;
290 | resViewDesc.Texture2D.MipLevels = desc.MipLevels;
291 |
292 | hr = m_device->CreateShaderResourceView(m_compressedTextureRes, &resViewDesc, &m_compressedTextureView);
293 | _ASSERT(SUCCEEDED(hr));
294 | }
295 |
296 | void CApp::DestoryImage()
297 | {
298 | SAFE_RELEASE(m_compressedTextureView);
299 | SAFE_RELEASE(m_compressedTextureRes);
300 | SAFE_RELEASE(m_sourceTextureView);
301 | SAFE_RELEASE(m_sourceTextureRes);
302 | }
303 |
304 | void CApp::CreateShaders()
305 | {
306 | unsigned shaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
307 | #ifdef _DEBUG
308 | shaderFlags |= D3DCOMPILE_DEBUG | D3DCOMPILE_PREFER_FLOW_CONTROL;
309 | #endif
310 |
311 | HRESULT hr;
312 | ID3DBlob* shaderBlob = nullptr;
313 | ID3DBlob* errorBlob = nullptr;
314 |
315 | // Compression compute shaders
316 | for (uint32_t ModeIndex = 0; ModeIndex < COMPRESSION_MODE_NUM; ++ModeIndex)
317 | {
318 | D3D_SHADER_MACRO macros[2];
319 | macros[0].Name = "QUALITY";
320 | macros[0].Definition = (ModeIndex == 0 ? "0" : "1");
321 | macros[1].Name = nullptr;
322 | macros[1].Definition = nullptr;
323 |
324 | hr = D3DCompileFromFile(L"compress.hlsl", macros, nullptr, "CSMain", "cs_5_0", shaderFlags, 0, &shaderBlob, &errorBlob);
325 | if (SUCCEEDED(hr))
326 | {
327 | m_device->CreateComputeShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, &m_compressCS[ModeIndex]);
328 | }
329 | else
330 | {
331 | OutputDebugStringA((char const*)errorBlob->GetBufferPointer());
332 | }
333 | }
334 |
335 | // Blit vertex and pixel shader
336 | {
337 | hr = D3DCompileFromFile(L"blit.hlsl", nullptr, nullptr, "VSMain", "vs_5_0", shaderFlags, 0, &shaderBlob, &errorBlob);
338 | if (SUCCEEDED(hr))
339 | {
340 | m_device->CreateVertexShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, &m_blitVS);
341 | }
342 | else
343 | {
344 | OutputDebugStringA((char const*)errorBlob->GetBufferPointer());
345 | }
346 |
347 | hr = D3DCompileFromFile(L"blit.hlsl", nullptr, nullptr, "PSMain", "ps_5_0", shaderFlags, 0, &shaderBlob, &errorBlob);
348 | if (SUCCEEDED(hr))
349 | {
350 | m_device->CreatePixelShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, &m_blitPS);
351 | }
352 | else
353 | {
354 | OutputDebugStringA((char const*)errorBlob->GetBufferPointer());
355 | }
356 | }
357 | }
358 |
359 | void CApp::DestroyShaders()
360 | {
361 | SAFE_RELEASE(m_blitVS);
362 | SAFE_RELEASE(m_blitPS);
363 |
364 | for (unsigned i = 0; i < ARRAYSIZE(m_compressCS); ++i)
365 | {
366 | SAFE_RELEASE(m_compressCS[i]);
367 | }
368 | }
369 |
370 | void CApp::Release()
371 | {
372 | DestroyShaders();
373 | }
374 |
375 | void CApp::OnKeyDown(WPARAM wParam)
376 | {
377 | switch (wParam)
378 | {
379 | case 'R':
380 | DestroyShaders();
381 | CreateShaders();
382 | m_updateRMSE = true;
383 | OutputDebugStringA("Recompiled shaders\n");
384 | break;
385 |
386 | case 'N':
387 | m_imageID = (m_imageID + 1) % ARRAYSIZE(ImagePathArr);
388 | DestoryImage();
389 | DestroyTargets();
390 | CreateImage();
391 | CreateTargets();
392 | m_imageZoom = 0.0f;
393 | m_texelScale = 1.0f;
394 | m_texelBias.x = 0.0f;
395 | m_texelBias.y = 0.0f;
396 | m_imageExposure = 0.0f;
397 | m_updateRMSE = true;
398 | break;
399 |
400 | case 'E':
401 | // Flip between source and compressed image
402 | m_blitMode = (m_blitMode + 1) % 2;
403 | m_updateTitle = true;
404 | break;
405 |
406 | case '1':
407 | m_blitMode = 0;
408 | m_updateTitle = true;
409 | break;
410 |
411 | case '2':
412 | m_blitMode = 1;
413 | m_updateTitle = true;
414 | break;
415 |
416 | case '3':
417 | m_blitMode = 2;
418 | m_updateTitle = true;
419 | break;
420 |
421 | case '4':
422 | m_blitMode = 3;
423 | m_updateTitle = true;
424 | break;
425 |
426 | case 'Q':
427 | m_compressionMode = (m_compressionMode + 1) % COMPRESSION_MODE_NUM;
428 | m_updateTitle = true;
429 | m_updateRMSE = true;
430 | break;
431 |
432 | case VK_ADD:
433 | m_imageExposure += 0.1f;
434 | m_updateTitle = true;
435 | break;
436 |
437 | case VK_SUBTRACT:
438 | m_imageExposure -= 0.1f;
439 | m_updateTitle = true;
440 | break;
441 | }
442 | }
443 |
444 | void CApp::OnLButtonDown(int mouseX, int mouseY)
445 | {
446 | m_dragEnabled = true;
447 | m_dragStart.x = m_texelBias.x + mouseX * m_texelScale;
448 | m_dragStart.y = m_texelBias.y + mouseY * m_texelScale;
449 | }
450 |
451 | void CApp::OnLButtonUp(int mouseX, int mouseY)
452 | {
453 | m_dragEnabled = false;
454 | }
455 |
456 | void CApp::OnMouseMove(int mouseX, int mouseY)
457 | {
458 | if (m_dragEnabled && GetKeyState(VK_LBUTTON) >= 0)
459 | {
460 | m_dragEnabled = false;
461 | }
462 |
463 | if (m_dragEnabled)
464 | {
465 | m_texelBias.x = m_dragStart.x - mouseX * m_texelScale;
466 | m_texelBias.y = m_dragStart.y - mouseY * m_texelScale;
467 | }
468 | }
469 |
470 | void CApp::OnMouseWheel(int zDelta)
471 | {
472 | m_imageZoom -= zDelta * 0.001f;
473 | m_texelScale = powf(2.0f, m_imageZoom);
474 | }
475 |
476 | void CApp::OnResize()
477 | {
478 | RECT clientRect;
479 | GetClientRect(m_windowHandle, &clientRect);
480 | unsigned const newBackbufferWidth = max(clientRect.right - clientRect.left, 64);
481 | unsigned const newBackbufferHeight = max(clientRect.bottom - clientRect.top, 64);
482 |
483 | if (m_backbufferWidth != newBackbufferWidth && m_backbufferHeight != newBackbufferHeight)
484 | {
485 | m_ctx->ClearState();
486 | SAFE_RELEASE(m_backBufferView);
487 | m_swapChain->ResizeBuffers(2, newBackbufferWidth, newBackbufferHeight, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, 0);
488 |
489 | ID3D11Texture2D* backBuffer = nullptr;
490 | HRESULT hr = m_swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID*)&backBuffer);
491 | _ASSERT(SUCCEEDED(hr));
492 |
493 | hr = m_device->CreateRenderTargetView(backBuffer, nullptr, &m_backBufferView);
494 | _ASSERT(SUCCEEDED(hr));
495 | backBuffer->Release();
496 |
497 | m_backbufferWidth = newBackbufferWidth;
498 | m_backbufferHeight = newBackbufferHeight;
499 | }
500 | }
501 |
502 | void CApp::UpdateTitle()
503 | {
504 | const wchar_t* blitModeNames[BLIT_MODE_NUM] =
505 | {
506 | L"Source",
507 | L"Compressed",
508 | L"DiffRGB",
509 | L"DiffLum"
510 | };
511 |
512 | wchar_t title[256];
513 | title[0] = 0;
514 | swprintf(title, ARRAYSIZE(title), L"Time:%.3fms rgbRMSLE:%.4f lumRMSLE:%.4f [q]Mode:%s [1,2,3,4]Show:%s [-/+]Exposure:%.1f [n]%S%dx%d [r]Reloadshaders",
515 | m_compressionTime, m_rgbRMSLE, m_lumRMSLE, m_compressionMode == 1 ? L"Quality" : L"Fast", blitModeNames[m_blitMode], m_imageExposure, ImagePathArr[m_imageID], m_imageWidth, m_imageHeight);
516 |
517 | SetWindowText(m_windowHandle, title);
518 | }
519 |
520 | void CApp::Render()
521 | {
522 | m_ctx->ClearState();
523 |
524 | m_ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
525 | m_ctx->IASetIndexBuffer(m_ib, DXGI_FORMAT_R16_UINT, 0);
526 |
527 | SShaderCB shaderCB;
528 | shaderCB.m_textureSizeInBlocks[0] = DivideAndRoundUp(m_imageWidth, BC_BLOCK_SIZE);
529 | shaderCB.m_textureSizeInBlocks[1] = DivideAndRoundUp(m_imageHeight, BC_BLOCK_SIZE);
530 | shaderCB.m_imageSizeRcp.x = 1.0f / m_imageWidth;
531 | shaderCB.m_imageSizeRcp.y = 1.0f / m_imageHeight;
532 | shaderCB.m_screenSizeRcp.x = 1.0f / m_backbufferWidth;
533 | shaderCB.m_screenSizeRcp.y = 1.0f / m_backbufferHeight;
534 | shaderCB.m_texelBias = m_texelBias;
535 | shaderCB.m_texelScale = m_texelScale;
536 | shaderCB.m_exposure = exp(m_imageExposure);
537 | shaderCB.m_blitMode = m_blitMode;
538 |
539 | D3D11_MAPPED_SUBRESOURCE mappedRes;
540 | m_ctx->Map(m_constantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedRes);
541 | memcpy(mappedRes.pData, &shaderCB, sizeof(shaderCB));
542 | m_ctx->Unmap(m_constantBuffer, 0);
543 |
544 | m_ctx->Begin(m_disjointQueries[m_frameID % MAX_QUERY_FRAME_NUM]);
545 | m_ctx->End(m_timeBeginQueries[m_frameID % MAX_QUERY_FRAME_NUM]);
546 |
547 | if (m_compressCS[m_compressionMode])
548 | {
549 | m_ctx->CSSetShader(m_compressCS[m_compressionMode], nullptr, 0);
550 | m_ctx->CSSetUnorderedAccessViews(0, 1, &m_compressTargetUAV, nullptr);
551 | m_ctx->CSSetShaderResources(0, 1, &m_sourceTextureView);
552 | m_ctx->CSSetSamplers(0, 1, &m_pointSampler);
553 | m_ctx->CSSetConstantBuffers(0, 1, &m_constantBuffer);
554 |
555 | uint32_t threadsX = 8;
556 | uint32_t threadsY = 8;
557 | m_ctx->Dispatch(DivideAndRoundUp(m_imageWidth, BC_BLOCK_SIZE * threadsX), DivideAndRoundUp(m_imageHeight, BC_BLOCK_SIZE * threadsY), 1);
558 | }
559 |
560 | m_ctx->End(m_timeEndQueries[m_frameID % MAX_QUERY_FRAME_NUM]);
561 | m_ctx->End(m_disjointQueries[m_frameID % MAX_QUERY_FRAME_NUM]);
562 |
563 | m_ctx->CopyResource(m_compressedTextureRes, m_compressTargetRes);
564 |
565 | if (m_blitVS && m_blitPS)
566 | {
567 | m_ctx->OMSetRenderTargets(1, &m_backBufferView, nullptr);
568 | D3D11_VIEWPORT vp;
569 | vp.Width = (float)m_backbufferWidth;
570 | vp.Height = (float)m_backbufferHeight;
571 | vp.MinDepth = 0.0f;
572 | vp.MaxDepth = 1.0f;
573 | vp.TopLeftX = 0;
574 | vp.TopLeftY = 0;
575 | m_ctx->RSSetViewports(1, &vp);
576 |
577 | m_ctx->VSSetShader(m_blitVS, nullptr, 0);
578 | m_ctx->PSSetShader(m_blitPS, nullptr, 0);
579 | m_ctx->PSSetShaderResources(0, 1, &m_sourceTextureView);
580 | m_ctx->PSSetShaderResources(1, 1, &m_compressedTextureView);
581 | m_ctx->PSSetSamplers(0, 1, &m_pointSampler);
582 | m_ctx->PSSetConstantBuffers(0, 1, &m_constantBuffer);
583 |
584 | m_ctx->DrawIndexed(4, 0, 0);
585 | }
586 |
587 | if (m_updateRMSE)
588 | {
589 | UpdateRMSE();
590 | m_updateRMSE = false;
591 | }
592 |
593 | ++m_frameID;
594 | m_swapChain->Present(0, 0);
595 |
596 | D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjointData;
597 | uint64_t timeStart;
598 | uint64_t timeEnd;
599 |
600 | if (m_frameID > m_frameID % MAX_QUERY_FRAME_NUM)
601 | {
602 | while (m_ctx->GetData(m_disjointQueries[m_frameID % MAX_QUERY_FRAME_NUM], &disjointData, sizeof(disjointData), 0) != S_OK)
603 | {
604 | int e = 0;
605 | }
606 |
607 | while (m_ctx->GetData(m_timeBeginQueries[m_frameID % MAX_QUERY_FRAME_NUM], &timeStart, sizeof(timeStart), 0) != S_OK)
608 | {
609 | int e = 0;
610 | }
611 |
612 | while (m_ctx->GetData(m_timeEndQueries[m_frameID % MAX_QUERY_FRAME_NUM], &timeEnd, sizeof(timeEnd), 0) != S_OK)
613 | {
614 | int e = 0;
615 | }
616 |
617 | if (!disjointData.Disjoint)
618 | {
619 | uint64_t delta = (timeEnd - timeStart) * 1000;
620 | m_timeAcc += delta / (float)disjointData.Frequency;
621 | ++m_timeAccSampleNum;
622 | }
623 |
624 | if (m_timeAccSampleNum > 100)
625 | {
626 | m_compressionTime = m_timeAcc / m_timeAccSampleNum;
627 | m_timeAcc = 0.0f;
628 | m_timeAccSampleNum = 0;
629 | m_updateTitle = true;
630 | }
631 | }
632 |
633 | if (m_updateTitle)
634 | {
635 | UpdateTitle();
636 | m_updateTitle = false;
637 | }
638 | }
639 |
640 | void CApp::CopyTexture(Vec3* image, ID3D11ShaderResourceView* srcView)
641 | {
642 | if (m_blitVS && m_blitPS)
643 | {
644 | m_ctx->OMSetRenderTargets(1, &m_tmpTargetView, nullptr);
645 | D3D11_VIEWPORT vp;
646 | vp.Width = (float)m_imageWidth;
647 | vp.Height = (float)m_imageHeight;
648 | vp.MinDepth = 0.0f;
649 | vp.MaxDepth = 1.0f;
650 | vp.TopLeftX = 0;
651 | vp.TopLeftY = 0;
652 | m_ctx->RSSetViewports(1, &vp);
653 |
654 | m_ctx->VSSetShader(m_blitVS, nullptr, 0);
655 | m_ctx->PSSetShader(m_blitPS, nullptr, 0);
656 | m_ctx->PSSetShaderResources(0, 1, &srcView);
657 | m_ctx->PSSetShaderResources(1, 1, &srcView);
658 | m_ctx->PSSetSamplers(0, 1, &m_pointSampler);
659 |
660 | m_ctx->DrawIndexed(4, 0, 0);
661 | m_ctx->CopyResource(m_tmpStagingRes, m_tmpTargetRes);
662 |
663 | D3D11_MAPPED_SUBRESOURCE mappedRes;
664 | m_ctx->Map(m_tmpStagingRes, 0, D3D11_MAP_READ, 0, &mappedRes);
665 | if (mappedRes.pData)
666 | {
667 | for (unsigned y = 0; y < m_imageHeight; ++y)
668 | {
669 | for (unsigned x = 0; x < m_imageWidth; ++x)
670 | {
671 | uint16_t tmp[4];
672 | memcpy(&tmp, (uint8_t*)mappedRes.pData + mappedRes.RowPitch * y + x * sizeof(tmp), sizeof(tmp));
673 |
674 | image[x + y * m_imageWidth].x = HalfToFloat(tmp[0]);
675 | image[x + y * m_imageWidth].y = HalfToFloat(tmp[1]);
676 | image[x + y * m_imageWidth].z = HalfToFloat(tmp[2]);
677 | }
678 | }
679 |
680 | m_ctx->Unmap(m_tmpStagingRes, 0);
681 | }
682 | }
683 | }
684 |
685 | void CApp::UpdateRMSE()
686 | {
687 | SShaderCB shaderCB;
688 | shaderCB.m_imageSizeRcp.x = 1.0f / m_imageWidth;
689 | shaderCB.m_imageSizeRcp.y = 1.0f / m_imageHeight;
690 | shaderCB.m_screenSizeRcp.x = 1.0f / m_backbufferWidth;
691 | shaderCB.m_screenSizeRcp.y = 1.0f / m_backbufferHeight;
692 | shaderCB.m_texelBias = Vec2(0.0f, 0.0f);
693 | shaderCB.m_texelScale = 1.0f;
694 | shaderCB.m_exposure = 1.0f;
695 | shaderCB.m_blitMode = 0;
696 |
697 | D3D11_MAPPED_SUBRESOURCE mappedRes;
698 | m_ctx->Map(m_constantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedRes);
699 | memcpy(mappedRes.pData, &shaderCB, sizeof(shaderCB));
700 | m_ctx->Unmap(m_constantBuffer, 0);
701 | m_ctx->PSSetConstantBuffers(0, 1, &m_constantBuffer);
702 |
703 |
704 | Vec3* imageA = new Vec3[m_imageWidth * m_imageHeight];
705 | Vec3* imageB = new Vec3[m_imageWidth * m_imageHeight];
706 |
707 | CopyTexture(imageA, m_sourceTextureView);
708 | CopyTexture(imageB, m_compressedTextureView);
709 |
710 | // Compute RGB and Luminance RMSE errors in log space
711 | double rSum = 0.0;
712 | double gSum = 0.0;
713 | double bSum = 0.0;
714 | for (unsigned y = 0; y < m_imageHeight; ++y)
715 | {
716 | for (unsigned x = 0; x < m_imageWidth; ++x)
717 | {
718 | double x0 = imageA[x + y * m_imageWidth].x;
719 | double y0 = imageA[x + y * m_imageWidth].y;
720 | double z0 = imageA[x + y * m_imageWidth].z;
721 | double x1 = imageB[x + y * m_imageWidth].x;
722 | double y1 = imageB[x + y * m_imageWidth].y;
723 | double z1 = imageB[x + y * m_imageWidth].z;
724 |
725 | double dx = log(x1 + 1.0) - log(x0 + 1.0);
726 | double dy = log(y1 + 1.0) - log(y0 + 1.0);
727 | double dz = log(z1 + 1.0) - log(z0 + 1.0);
728 | rSum += dx * dx;
729 | gSum += dy * dy;
730 | bSum += dy * dy;
731 | }
732 | }
733 | m_rgbRMSLE = (float)sqrt((rSum + gSum + bSum) / (3.0 * m_imageWidth * m_imageHeight));
734 | m_lumRMSLE = (float)sqrt((0.299 * rSum + 0.587 * gSum + 0.114 * bSum) / (1.0 * m_imageWidth * m_imageHeight));
735 |
736 | delete imageA;
737 | delete imageB;
738 |
739 | char rmseString[256];
740 | rmseString[0] = 0;
741 | sprintf_s(rmseString, "rgbRMSLE:%.4f lumRMSLE:%.4f Mode:%s %s\n", m_rgbRMSLE, m_lumRMSLE, m_compressionMode == 1 ? "Quality" : "Fast", ImagePathArr[m_imageID]);
742 | OutputDebugStringA(rmseString);
743 |
744 | m_updateTitle = true;
745 | }
746 |
--------------------------------------------------------------------------------
/app.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | struct Vec2
4 | {
5 | Vec2()
6 | {
7 | }
8 |
9 | Vec2(float x_, float y_)
10 | : x(x_)
11 | , y(y_)
12 | {
13 | }
14 |
15 | float x;
16 | float y;
17 | };
18 |
19 | struct Vec3
20 | {
21 | float x;
22 | float y;
23 | float z;
24 | };
25 |
26 | uint32_t const MAX_QUERY_FRAME_NUM = 5;
27 | uint32_t const COMPRESSION_MODE_NUM = 2;
28 | uint32_t const BLIT_MODE_NUM = 4;
29 |
30 | class CApp
31 | {
32 | public:
33 | CApp();
34 | ~CApp();
35 |
36 | bool Init(HWND windowHandle);
37 | void Release();
38 | void Render();
39 | void OnKeyDown(WPARAM wParam);
40 | void OnLButtonDown(int mouseX, int mouseY);
41 | void OnLButtonUp(int mouseX, int mouseY);
42 | void OnMouseMove(int mouseX, int mouseY);
43 | void OnMouseWheel(int zDelta);
44 | void OnResize();
45 |
46 | ID3D11Device* GetDevice() { return m_device; }
47 | ID3D11DeviceContext* GetCtx() { return m_ctx; }
48 |
49 |
50 | private:
51 | unsigned m_backbufferWidth = 1280;
52 | unsigned m_backbufferHeight = 720;
53 |
54 | ID3D11Device* m_device = nullptr;
55 | ID3D11DeviceContext* m_ctx = nullptr;
56 | IDXGISwapChain* m_swapChain = nullptr;
57 | ID3D11RenderTargetView* m_backBufferView = nullptr;
58 | ID3D11SamplerState* m_pointSampler = nullptr;
59 | ID3D11Buffer* m_constantBuffer = nullptr;
60 |
61 | ID3D11Query* m_disjointQueries[MAX_QUERY_FRAME_NUM];
62 | ID3D11Query* m_timeBeginQueries[MAX_QUERY_FRAME_NUM];
63 | ID3D11Query* m_timeEndQueries[MAX_QUERY_FRAME_NUM];
64 | float m_timeAcc = 0.0f;
65 | unsigned m_timeAccSampleNum = 0;
66 | float m_compressionTime = 0.0f;
67 |
68 | // Shaders
69 | ID3D11VertexShader* m_blitVS = nullptr;
70 | ID3D11PixelShader* m_blitPS = nullptr;
71 | ID3D11ComputeShader* m_compressCS[COMPRESSION_MODE_NUM] = { nullptr };
72 |
73 | // Resources
74 | ID3D11Buffer* m_ib = nullptr;
75 | ID3D11Texture2D* m_sourceTextureRes = nullptr;
76 | ID3D11ShaderResourceView* m_sourceTextureView = nullptr;
77 | ID3D11Texture2D* m_compressedTextureRes = nullptr;
78 | ID3D11ShaderResourceView* m_compressedTextureView = nullptr;
79 | ID3D11Texture2D* m_compressTargetRes = nullptr;
80 | ID3D11UnorderedAccessView* m_compressTargetUAV = nullptr;
81 | ID3D11Texture2D* m_tmpTargetRes = nullptr;
82 | ID3D11RenderTargetView* m_tmpTargetView = nullptr;
83 | ID3D11Texture2D* m_tmpStagingRes = nullptr;
84 |
85 | HWND m_windowHandle = 0;
86 | Vec2 m_texelBias = Vec2(0.0f, 0.0f);
87 | float m_texelScale = 1.0f;
88 | float m_imageZoom = 0.0f;
89 | float m_imageExposure = 0.0f;
90 | bool m_dragEnabled = false;
91 | Vec2 m_dragStart = Vec2(0.0f, 0.0f);
92 | bool m_updateRMSE = true;
93 | bool m_updateTitle = true;
94 | uint32_t m_imageID = 0;
95 | uint32_t m_imageWidth = 0;
96 | uint32_t m_imageHeight = 0;
97 | uint64_t m_frameID = 0;
98 |
99 | uint32_t m_compressionMode = 0;
100 | uint32_t m_blitMode = 1;
101 |
102 | // Compression error
103 | float m_rgbRMSLE = 0.0f;
104 | float m_lumRMSLE = 0.0f;
105 |
106 | void CreateImage();
107 | void DestoryImage();
108 | void CreateShaders();
109 | void DestroyShaders();
110 | void CreateTargets();
111 | void DestroyTargets();
112 | void CreateQueries();
113 | void CreateConstantBuffer();
114 | void UpdateRMSE();
115 | void UpdateTitle();
116 | void CopyTexture(Vec3* image, ID3D11ShaderResourceView* srcView);
117 | };
118 |
119 | extern CApp gApp;
--------------------------------------------------------------------------------
/bin/GPURealTimeBC6HRelease.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/GPURealTimeBC6HRelease.exe
--------------------------------------------------------------------------------
/bin/atrium.dds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/atrium.dds
--------------------------------------------------------------------------------
/bin/backyard.dds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/backyard.dds
--------------------------------------------------------------------------------
/bin/blit.hlsl:
--------------------------------------------------------------------------------
1 | Texture2D TextureA : register(t0);
2 | Texture2D TextureB : register(t1);
3 | SamplerState PointSampler : register(s0);
4 |
5 | struct PSInput
6 | {
7 | float4 m_pos : SV_POSITION;
8 | };
9 |
10 | cbuffer MainCB : register(b0)
11 | {
12 | float2 ScreenSizeRcp;
13 | uint2 TextureSizeInBlocks;
14 | float2 TextureSizeRcp;
15 | float2 TexelBias;
16 | float TexelScale;
17 | float Exposure;
18 | uint BlitMode;
19 | };
20 |
21 | float Luminance(float3 x)
22 | {
23 | float3 luminanceWeights = float3(0.299f, 0.587f, 0.114f);
24 | return dot(x, luminanceWeights);
25 | }
26 |
27 | PSInput VSMain(uint vertexID : SV_VertexID)
28 | {
29 | PSInput output;
30 |
31 | float x = vertexID >> 1;
32 | float y = vertexID & 1;
33 |
34 | output.m_pos = float4(2.0f * x - 1.0f, 2.0f * y - 1.0f, 0.0f, 1.0f);
35 |
36 | return output;
37 | }
38 |
39 | float3 PSMain(PSInput i) : SV_Target
40 | {
41 | float2 uv = (i.m_pos * TexelScale + TexelBias) * TextureSizeRcp;
42 |
43 | float3 a = TextureA.SampleLevel(PointSampler, uv, 0.0f) * Exposure;
44 | float3 b = TextureB.SampleLevel(PointSampler, uv, 0.0f) * Exposure;
45 | float3 delta = log(a + 1.0f) - log(b + 1.0f);
46 | float3 deltaSq = delta * delta * 16.0f;
47 |
48 | if (BlitMode == 0)
49 | {
50 | return a;
51 | }
52 |
53 | if (BlitMode == 1)
54 | {
55 | return b;
56 | }
57 |
58 | if (BlitMode == 2)
59 | {
60 | return deltaSq;
61 | }
62 |
63 | return Luminance(deltaSq);
64 | }
65 |
--------------------------------------------------------------------------------
/bin/compress.hlsl:
--------------------------------------------------------------------------------
1 | #pragma warning(disable : 3078) // "loop control variable conflicts with a previous declaration in the outer scope"
2 |
3 |
4 | // Whether to use P2 modes (4 endpoints) for compression. Slow, but improves quality.
5 | #define ENCODE_P2 (QUALITY == 1)
6 |
7 | // Improve quality at small performance loss
8 | #define INSET_COLOR_BBOX 1
9 | #define OPTIMIZE_ENDPOINTS 1
10 |
11 | // Whether to optimize for luminance error or for RGB error
12 | #define LUMINANCE_WEIGHTS 1
13 |
14 |
15 | static const float HALF_MAX = 65504.0f;
16 | static const uint PATTERN_NUM = 32;
17 |
18 | Texture2D SrcTexture : register(t0);
19 | RWTexture2D OutputTexture : register(u0);
20 | SamplerState PointSampler : register(s0);
21 |
22 | cbuffer MainCB : register(b0)
23 | {
24 | float2 ScreenSizeRcp;
25 | uint2 TextureSizeInBlocks;
26 | float2 TextureSizeRcp;
27 | float2 TexelBias;
28 | float TexelScale;
29 | float Exposure;
30 | uint BlitMode;
31 | };
32 |
33 | float CalcMSLE(float3 a, float3 b)
34 | {
35 | float3 delta = log2((b + 1.0f) / (a + 1.0f));
36 | float3 deltaSq = delta * delta;
37 |
38 | #if LUMINANCE_WEIGHTS
39 | float3 luminanceWeights = float3(0.299f, 0.587f, 0.114f);
40 | deltaSq *= luminanceWeights;
41 | #endif
42 |
43 | return deltaSq.x + deltaSq.y + deltaSq.z;
44 | }
45 |
46 | uint PatternFixupID(uint i)
47 | {
48 | uint ret = 15;
49 | ret = ((3441033216 >> i) & 0x1) ? 2 : ret;
50 | ret = ((845414400 >> i) & 0x1) ? 8 : ret;
51 | return ret;
52 | }
53 |
54 | uint Pattern(uint p, uint i)
55 | {
56 | uint p2 = p / 2;
57 | uint p3 = p - p2 * 2;
58 |
59 | uint enc = 0;
60 | enc = p2 == 0 ? 2290666700 : enc;
61 | enc = p2 == 1 ? 3972591342 : enc;
62 | enc = p2 == 2 ? 4276930688 : enc;
63 | enc = p2 == 3 ? 3967876808 : enc;
64 | enc = p2 == 4 ? 4293707776 : enc;
65 | enc = p2 == 5 ? 3892379264 : enc;
66 | enc = p2 == 6 ? 4278255592 : enc;
67 | enc = p2 == 7 ? 4026597360 : enc;
68 | enc = p2 == 8 ? 9369360 : enc;
69 | enc = p2 == 9 ? 147747072 : enc;
70 | enc = p2 == 10 ? 1930428556 : enc;
71 | enc = p2 == 11 ? 2362323200 : enc;
72 | enc = p2 == 12 ? 823134348 : enc;
73 | enc = p2 == 13 ? 913073766 : enc;
74 | enc = p2 == 14 ? 267393000 : enc;
75 | enc = p2 == 15 ? 966553998 : enc;
76 |
77 | enc = p3 ? enc >> 16 : enc;
78 | uint ret = (enc >> i) & 0x1;
79 | return ret;
80 | }
81 |
82 | float3 Quantize7(float3 x)
83 | {
84 | return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f);
85 | }
86 |
87 | float3 Quantize9(float3 x)
88 | {
89 | return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f);
90 | }
91 |
92 | float3 Quantize10(float3 x)
93 | {
94 | return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f);
95 | }
96 |
97 | float3 Unquantize7(float3 x)
98 | {
99 | return (x * 65536.0f + 0x8000) / 128.0f;
100 | }
101 |
102 | float3 Unquantize9(float3 x)
103 | {
104 | return (x * 65536.0f + 0x8000) / 512.0f;
105 | }
106 |
107 | float3 Unquantize10(float3 x)
108 | {
109 | return (x * 65536.0f + 0x8000) / 1024.0f;
110 | }
111 |
112 | float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight)
113 | {
114 | float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f);
115 | return f16tof32(uint3(comp));
116 | }
117 |
118 | void Swap(inout float3 a, inout float3 b)
119 | {
120 | float3 tmp = a;
121 | a = b;
122 | b = tmp;
123 | }
124 |
125 | void Swap(inout float a, inout float b)
126 | {
127 | float tmp = a;
128 | a = b;
129 | b = tmp;
130 | }
131 |
132 | uint ComputeIndex3(float texelPos, float endPoint0Pos, float endPoint1Pos)
133 | {
134 | float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos);
135 | return (uint) clamp(r * 6.98182f + 0.00909f + 0.5f, 0.0f, 7.0f);
136 | }
137 |
138 | uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos)
139 | {
140 | float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos);
141 | return (uint) clamp(r * 14.93333f + 0.03333f + 0.5f, 0.0f, 15.0f);
142 | }
143 |
144 | void SignExtend(inout float3 v1, uint mask, uint signFlag)
145 | {
146 | int3 v = (int3) v1;
147 | v.x = (v.x & mask) | (v.x < 0 ? signFlag : 0);
148 | v.y = (v.y & mask) | (v.y < 0 ? signFlag : 0);
149 | v.z = (v.z & mask) | (v.z < 0 ? signFlag : 0);
150 | v1 = v;
151 | }
152 |
153 | // Refine endpoints by insetting bounding box in log2 RGB space
154 | void InsetColorBBoxP1(float3 texels[16], inout float3 blockMin, inout float3 blockMax)
155 | {
156 | float3 refinedBlockMin = blockMax;
157 | float3 refinedBlockMax = blockMin;
158 |
159 | for (uint i = 0; i < 16; ++i)
160 | {
161 | refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]);
162 | refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]);
163 | }
164 |
165 | float3 logRefinedBlockMax = log2(refinedBlockMax + 1.0f);
166 | float3 logRefinedBlockMin = log2(refinedBlockMin + 1.0f);
167 |
168 | float3 logBlockMax = log2(blockMax + 1.0f);
169 | float3 logBlockMin = log2(blockMin + 1.0f);
170 | float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
171 |
172 | logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt);
173 | logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt);
174 |
175 | blockMin = exp2(logBlockMin) - 1.0f;
176 | blockMax = exp2(logBlockMax) - 1.0f;
177 | }
178 |
179 | // Refine endpoints by insetting bounding box in log2 RGB space
180 | void InsetColorBBoxP2(float3 texels[16], uint pattern, uint patternSelector, inout float3 blockMin, inout float3 blockMax)
181 | {
182 | float3 refinedBlockMin = blockMax;
183 | float3 refinedBlockMax = blockMin;
184 |
185 | for (uint i = 0; i < 16; ++i)
186 | {
187 | uint paletteID = Pattern(pattern, i);
188 | if (paletteID == patternSelector)
189 | {
190 | refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]);
191 | refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]);
192 | }
193 | }
194 |
195 | float3 logRefinedBlockMax = log2(refinedBlockMax + 1.0f);
196 | float3 logRefinedBlockMin = log2(refinedBlockMin + 1.0f);
197 |
198 | float3 logBlockMax = log2(blockMax + 1.0f);
199 | float3 logBlockMin = log2(blockMin + 1.0f);
200 | float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f);
201 |
202 | logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt);
203 | logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt);
204 |
205 | blockMin = exp2(logBlockMin) - 1.0f;
206 | blockMax = exp2(logBlockMax) - 1.0f;
207 | }
208 |
209 | // Least squares optimization to find best endpoints for the selected block indices
210 | void OptimizeEndpointsP1(float3 texels[16], inout float3 blockMin, inout float3 blockMax, in float3 blockMinNonInset, in float3 blockMaxNonInset)
211 | {
212 | float3 blockDir = blockMax - blockMin;
213 | blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z);
214 |
215 | float endPoint0Pos = f32tof16(dot(blockMin, blockDir));
216 | float endPoint1Pos = f32tof16(dot(blockMax, blockDir));
217 |
218 | float3 alphaTexelSum = 0.0f;
219 | float3 betaTexelSum = 0.0f;
220 | float alphaBetaSum = 0.0f;
221 | float alphaSqSum = 0.0f;
222 | float betaSqSum = 0.0f;
223 |
224 | for (int i = 0; i < 16; i++)
225 | {
226 | float texelPos = f32tof16(dot(texels[i], blockDir));
227 | uint texelIndex = ComputeIndex4(texelPos, endPoint0Pos, endPoint1Pos);
228 |
229 | float beta = saturate(texelIndex / 15.0f);
230 | float alpha = 1.0f - beta;
231 |
232 | float3 texelF16 = f32tof16(texels[i].xyz);
233 | alphaTexelSum += alpha * texelF16;
234 | betaTexelSum += beta * texelF16;
235 |
236 | alphaBetaSum += alpha * beta;
237 |
238 | alphaSqSum += alpha * alpha;
239 | betaSqSum += beta * beta;
240 | }
241 |
242 | float det = alphaSqSum * betaSqSum - alphaBetaSum * alphaBetaSum;
243 |
244 | if (abs(det) > 0.00001f)
245 | {
246 | float detRcp = rcp(det);
247 | blockMin = clamp(f16tof32(clamp(detRcp * (alphaTexelSum * betaSqSum - betaTexelSum * alphaBetaSum), 0.0f, HALF_MAX)), blockMinNonInset, blockMaxNonInset);
248 | blockMax = clamp(f16tof32(clamp(detRcp * (betaTexelSum * alphaSqSum - alphaTexelSum * alphaBetaSum), 0.0f, HALF_MAX)), blockMinNonInset, blockMaxNonInset);
249 | }
250 | }
251 |
252 | // Least squares optimization to find best endpoints for the selected block indices
253 | void OptimizeEndpointsP2(float3 texels[16], uint pattern, uint patternSelector, inout float3 blockMin, inout float3 blockMax)
254 | {
255 | float3 blockDir = blockMax - blockMin;
256 | blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z);
257 |
258 | float endPoint0Pos = f32tof16(dot(blockMin, blockDir));
259 | float endPoint1Pos = f32tof16(dot(blockMax, blockDir));
260 |
261 | float3 alphaTexelSum = 0.0f;
262 | float3 betaTexelSum = 0.0f;
263 | float alphaBetaSum = 0.0f;
264 | float alphaSqSum = 0.0f;
265 | float betaSqSum = 0.0f;
266 |
267 | for (int i = 0; i < 16; i++)
268 | {
269 | uint paletteID = Pattern(pattern, i);
270 | if (paletteID == patternSelector)
271 | {
272 | float texelPos = f32tof16(dot(texels[i], blockDir));
273 | uint texelIndex = ComputeIndex3(texelPos, endPoint0Pos, endPoint1Pos);
274 |
275 | float beta = saturate(texelIndex / 7.0f);
276 | float alpha = 1.0f - beta;
277 |
278 | float3 texelF16 = f32tof16(texels[i].xyz);
279 | alphaTexelSum += alpha * texelF16;
280 | betaTexelSum += beta * texelF16;
281 |
282 | alphaBetaSum += alpha * beta;
283 |
284 | alphaSqSum += alpha * alpha;
285 | betaSqSum += beta * beta;
286 | }
287 | }
288 |
289 | float det = alphaSqSum * betaSqSum - alphaBetaSum * alphaBetaSum;
290 |
291 | if (abs(det) > 0.00001f)
292 | {
293 | float detRcp = rcp(det);
294 | blockMin = f16tof32(clamp(detRcp * (alphaTexelSum * betaSqSum - betaTexelSum * alphaBetaSum), 0.0f, HALF_MAX));
295 | blockMax = f16tof32(clamp(detRcp * (betaTexelSum * alphaSqSum - alphaTexelSum * alphaBetaSum), 0.0f, HALF_MAX));
296 | }
297 | }
298 |
299 | void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16])
300 | {
301 | // compute endpoints (min/max RGB bbox)
302 | float3 blockMin = texels[0];
303 | float3 blockMax = texels[0];
304 | for (uint i = 1; i < 16; ++i)
305 | {
306 | blockMin = min(blockMin, texels[i]);
307 | blockMax = max(blockMax, texels[i]);
308 | }
309 |
310 | float3 blockMinNonInset = blockMin;
311 | float3 blockMaxNonInset = blockMax;
312 | #if INSET_COLOR_BBOX
313 | InsetColorBBoxP1(texels, blockMin, blockMax);
314 | #endif
315 |
316 | #if OPTIMIZE_ENDPOINTS
317 | OptimizeEndpointsP1(texels, blockMin, blockMax, blockMinNonInset, blockMaxNonInset);
318 | #endif
319 |
320 |
321 | float3 blockDir = blockMax - blockMin;
322 | blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z);
323 |
324 | float3 endpoint0 = Quantize10(blockMin);
325 | float3 endpoint1 = Quantize10(blockMax);
326 | float endPoint0Pos = f32tof16(dot(blockMin, blockDir));
327 | float endPoint1Pos = f32tof16(dot(blockMax, blockDir));
328 |
329 | // check if endpoint swap is required
330 | float fixupTexelPos = f32tof16(dot(texels[0], blockDir));
331 | uint fixupIndex = ComputeIndex4(fixupTexelPos, endPoint0Pos, endPoint1Pos);
332 | if (fixupIndex > 7)
333 | {
334 | Swap(endPoint0Pos, endPoint1Pos);
335 | Swap(endpoint0, endpoint1);
336 | }
337 |
338 | // compute indices
339 | uint indices[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
340 | for (uint i = 0; i < 16; ++i)
341 | {
342 | float texelPos = f32tof16(dot(texels[i], blockDir));
343 | indices[i] = ComputeIndex4(texelPos, endPoint0Pos, endPoint1Pos);
344 | }
345 |
346 | // compute compression error (MSLE)
347 | float3 endpoint0Unq = Unquantize10(endpoint0);
348 | float3 endpoint1Unq = Unquantize10(endpoint1);
349 | float msle = 0.0f;
350 | for (uint i = 0; i < 16; ++i)
351 | {
352 | float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f);
353 | float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight);
354 |
355 | msle += CalcMSLE(texels[i], texelUnc);
356 | }
357 |
358 |
359 | // encode block for mode 11
360 | blockMSLE = msle;
361 | block.x = 0x03;
362 |
363 | // endpoints
364 | block.x |= (uint) endpoint0.x << 5;
365 | block.x |= (uint) endpoint0.y << 15;
366 | block.x |= (uint) endpoint0.z << 25;
367 | block.y |= (uint) endpoint0.z >> 7;
368 | block.y |= (uint) endpoint1.x << 3;
369 | block.y |= (uint) endpoint1.y << 13;
370 | block.y |= (uint) endpoint1.z << 23;
371 | block.z |= (uint) endpoint1.z >> 9;
372 |
373 | // indices
374 | block.z |= indices[0] << 1;
375 | block.z |= indices[1] << 4;
376 | block.z |= indices[2] << 8;
377 | block.z |= indices[3] << 12;
378 | block.z |= indices[4] << 16;
379 | block.z |= indices[5] << 20;
380 | block.z |= indices[6] << 24;
381 | block.z |= indices[7] << 28;
382 | block.w |= indices[8] << 0;
383 | block.w |= indices[9] << 4;
384 | block.w |= indices[10] << 8;
385 | block.w |= indices[11] << 12;
386 | block.w |= indices[12] << 16;
387 | block.w |= indices[13] << 20;
388 | block.w |= indices[14] << 24;
389 | block.w |= indices[15] << 28;
390 | }
391 |
392 | float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point)
393 | {
394 | float3 w = Point - PointOnLine;
395 | float3 x = w - dot(w, LineDirection) * LineDirection;
396 | return dot(x, x);
397 | }
398 |
399 | // Evaluate how good is given P2 pattern for encoding current block
400 | float EvaluateP2Pattern(int pattern, float3 texels[16])
401 | {
402 | float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
403 | float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f);
404 | float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
405 | float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f);
406 |
407 | for (uint i = 0; i < 16; ++i)
408 | {
409 | uint paletteID = Pattern(pattern, i);
410 | if (paletteID == 0)
411 | {
412 | p0BlockMin = min(p0BlockMin, texels[i]);
413 | p0BlockMax = max(p0BlockMax, texels[i]);
414 | }
415 | else
416 | {
417 | p1BlockMin = min(p1BlockMin, texels[i]);
418 | p1BlockMax = max(p1BlockMax, texels[i]);
419 | }
420 | }
421 |
422 | float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin);
423 | float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin);
424 |
425 | float sqDistanceFromLine = 0.0f;
426 |
427 | for (uint i = 0; i < 16; ++i)
428 | {
429 | uint paletteID = Pattern(pattern, i);
430 | if (paletteID == 0)
431 | {
432 | sqDistanceFromLine += DistToLineSq(p0BlockMin, p0BlockDir, texels[i]);
433 | }
434 | else
435 | {
436 | sqDistanceFromLine += DistToLineSq(p1BlockMin, p1BlockDir, texels[i]);
437 | }
438 | }
439 |
440 | return sqDistanceFromLine;
441 | }
442 |
443 | void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, int pattern, float3 texels[16])
444 | {
445 | float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
446 | float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f);
447 | float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX);
448 | float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f);
449 |
450 | for (uint i = 0; i < 16; ++i)
451 | {
452 | uint paletteID = Pattern(pattern, i);
453 | if (paletteID == 0)
454 | {
455 | p0BlockMin = min(p0BlockMin, texels[i]);
456 | p0BlockMax = max(p0BlockMax, texels[i]);
457 | }
458 | else
459 | {
460 | p1BlockMin = min(p1BlockMin, texels[i]);
461 | p1BlockMax = max(p1BlockMax, texels[i]);
462 | }
463 | }
464 |
465 | #if INSET_COLOR_BBOX
466 | // Disabled because it was a negligible quality increase
467 | //InsetColorBBoxP2(texels, pattern, 0, p0BlockMin, p0BlockMax);
468 | //InsetColorBBoxP2(texels, pattern, 1, p1BlockMin, p1BlockMax);
469 | #endif
470 |
471 | #if OPTIMIZE_ENDPOINTS
472 | OptimizeEndpointsP2(texels, pattern, 0, p0BlockMin, p0BlockMax);
473 | OptimizeEndpointsP2(texels, pattern, 1, p1BlockMin, p1BlockMax);
474 | #endif
475 |
476 | float3 p0BlockDir = p0BlockMax - p0BlockMin;
477 | float3 p1BlockDir = p1BlockMax - p1BlockMin;
478 | p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z);
479 | p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z);
480 |
481 |
482 | float p0Endpoint0Pos = f32tof16(dot(p0BlockMin, p0BlockDir));
483 | float p0Endpoint1Pos = f32tof16(dot(p0BlockMax, p0BlockDir));
484 | float p1Endpoint0Pos = f32tof16(dot(p1BlockMin, p1BlockDir));
485 | float p1Endpoint1Pos = f32tof16(dot(p1BlockMax, p1BlockDir));
486 |
487 |
488 | uint fixupID = PatternFixupID(pattern);
489 | float p0FixupTexelPos = f32tof16(dot(texels[0], p0BlockDir));
490 | float p1FixupTexelPos = f32tof16(dot(texels[fixupID], p1BlockDir));
491 | uint p0FixupIndex = ComputeIndex3(p0FixupTexelPos, p0Endpoint0Pos, p0Endpoint1Pos);
492 | uint p1FixupIndex = ComputeIndex3(p1FixupTexelPos, p1Endpoint0Pos, p1Endpoint1Pos);
493 | if (p0FixupIndex > 3)
494 | {
495 | Swap(p0Endpoint0Pos, p0Endpoint1Pos);
496 | Swap(p0BlockMin, p0BlockMax);
497 | }
498 | if (p1FixupIndex > 3)
499 | {
500 | Swap(p1Endpoint0Pos, p1Endpoint1Pos);
501 | Swap(p1BlockMin, p1BlockMax);
502 | }
503 |
504 | uint indices[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
505 | for (uint i = 0; i < 16; ++i)
506 | {
507 | float p0TexelPos = f32tof16(dot(texels[i], p0BlockDir));
508 | float p1TexelPos = f32tof16(dot(texels[i], p1BlockDir));
509 | uint p0Index = ComputeIndex3(p0TexelPos, p0Endpoint0Pos, p0Endpoint1Pos);
510 | uint p1Index = ComputeIndex3(p1TexelPos, p1Endpoint0Pos, p1Endpoint1Pos);
511 |
512 | uint paletteID = Pattern(pattern, i);
513 | indices[i] = paletteID == 0 ? p0Index : p1Index;
514 | }
515 |
516 | float3 endpoint760 = floor(Quantize7(p0BlockMin));
517 | float3 endpoint761 = floor(Quantize7(p0BlockMax));
518 | float3 endpoint762 = floor(Quantize7(p1BlockMin));
519 | float3 endpoint763 = floor(Quantize7(p1BlockMax));
520 |
521 | float3 endpoint950 = floor(Quantize9(p0BlockMin));
522 | float3 endpoint951 = floor(Quantize9(p0BlockMax));
523 | float3 endpoint952 = floor(Quantize9(p1BlockMin));
524 | float3 endpoint953 = floor(Quantize9(p1BlockMax));
525 |
526 | endpoint761 = endpoint761 - endpoint760;
527 | endpoint762 = endpoint762 - endpoint760;
528 | endpoint763 = endpoint763 - endpoint760;
529 |
530 | endpoint951 = endpoint951 - endpoint950;
531 | endpoint952 = endpoint952 - endpoint950;
532 | endpoint953 = endpoint953 - endpoint950;
533 |
534 | int maxVal76 = 0x1F;
535 | endpoint761 = clamp(endpoint761, -maxVal76, maxVal76);
536 | endpoint762 = clamp(endpoint762, -maxVal76, maxVal76);
537 | endpoint763 = clamp(endpoint763, -maxVal76, maxVal76);
538 |
539 | int maxVal95 = 0xF;
540 | endpoint951 = clamp(endpoint951, -maxVal95, maxVal95);
541 | endpoint952 = clamp(endpoint952, -maxVal95, maxVal95);
542 | endpoint953 = clamp(endpoint953, -maxVal95, maxVal95);
543 |
544 | float3 endpoint760Unq = Unquantize7(endpoint760);
545 | float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761);
546 | float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762);
547 | float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763);
548 | float3 endpoint950Unq = Unquantize9(endpoint950);
549 | float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951);
550 | float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952);
551 | float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953);
552 |
553 | float msle76 = 0.0f;
554 | float msle95 = 0.0f;
555 | for (uint i = 0; i < 16; ++i)
556 | {
557 | uint paletteID = Pattern(pattern, i);
558 |
559 | float3 tmp760Unq = paletteID == 0 ? endpoint760Unq : endpoint762Unq;
560 | float3 tmp761Unq = paletteID == 0 ? endpoint761Unq : endpoint763Unq;
561 | float3 tmp950Unq = paletteID == 0 ? endpoint950Unq : endpoint952Unq;
562 | float3 tmp951Unq = paletteID == 0 ? endpoint951Unq : endpoint953Unq;
563 |
564 | float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f);
565 | float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight);
566 | float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight);
567 |
568 | msle76 += CalcMSLE(texels[i], texelUnc76);
569 | msle95 += CalcMSLE(texels[i], texelUnc95);
570 | }
571 |
572 | SignExtend(endpoint761, 0x1F, 0x20);
573 | SignExtend(endpoint762, 0x1F, 0x20);
574 | SignExtend(endpoint763, 0x1F, 0x20);
575 |
576 | SignExtend(endpoint951, 0xF, 0x10);
577 | SignExtend(endpoint952, 0xF, 0x10);
578 | SignExtend(endpoint953, 0xF, 0x10);
579 |
580 | // encode block
581 | float p2MSLE = min(msle76, msle95);
582 | if (p2MSLE < blockMSLE)
583 | {
584 | blockMSLE = p2MSLE;
585 | block = uint4(0, 0, 0, 0);
586 |
587 | if (p2MSLE == msle76)
588 | {
589 | // 7.6
590 | block.x = 0x1;
591 | block.x |= ((uint) endpoint762.y & 0x20) >> 3;
592 | block.x |= ((uint) endpoint763.y & 0x10) >> 1;
593 | block.x |= ((uint) endpoint763.y & 0x20) >> 1;
594 | block.x |= (uint) endpoint760.x << 5;
595 | block.x |= ((uint) endpoint763.z & 0x01) << 12;
596 | block.x |= ((uint) endpoint763.z & 0x02) << 12;
597 | block.x |= ((uint) endpoint762.z & 0x10) << 10;
598 | block.x |= (uint) endpoint760.y << 15;
599 | block.x |= ((uint) endpoint762.z & 0x20) << 17;
600 | block.x |= ((uint) endpoint763.z & 0x04) << 21;
601 | block.x |= ((uint) endpoint762.y & 0x10) << 20;
602 | block.x |= (uint) endpoint760.z << 25;
603 | block.y |= ((uint) endpoint763.z & 0x08) >> 3;
604 | block.y |= ((uint) endpoint763.z & 0x20) >> 4;
605 | block.y |= ((uint) endpoint763.z & 0x10) >> 2;
606 | block.y |= (uint) endpoint761.x << 3;
607 | block.y |= ((uint) endpoint762.y & 0x0F) << 9;
608 | block.y |= (uint) endpoint761.y << 13;
609 | block.y |= ((uint) endpoint763.y & 0x0F) << 19;
610 | block.y |= (uint) endpoint761.z << 23;
611 | block.y |= ((uint) endpoint762.z & 0x07) << 29;
612 | block.z |= ((uint) endpoint762.z & 0x08) >> 3;
613 | block.z |= (uint) endpoint762.x << 1;
614 | block.z |= (uint) endpoint763.x << 7;
615 | }
616 | else
617 | {
618 | // 9.5
619 | block.x = 0xE;
620 | block.x |= (uint) endpoint950.x << 5;
621 | block.x |= ((uint) endpoint952.z & 0x10) << 10;
622 | block.x |= (uint) endpoint950.y << 15;
623 | block.x |= ((uint) endpoint952.y & 0x10) << 20;
624 | block.x |= (uint) endpoint950.z << 25;
625 | block.y |= (uint) endpoint950.z >> 7;
626 | block.y |= ((uint) endpoint953.z & 0x10) >> 2;
627 | block.y |= (uint) endpoint951.x << 3;
628 | block.y |= ((uint) endpoint953.y & 0x10) << 4;
629 | block.y |= ((uint) endpoint952.y & 0x0F) << 9;
630 | block.y |= (uint) endpoint951.y << 13;
631 | block.y |= ((uint) endpoint953.z & 0x01) << 18;
632 | block.y |= ((uint) endpoint953.y & 0x0F) << 19;
633 | block.y |= (uint) endpoint951.z << 23;
634 | block.y |= ((uint) endpoint953.z & 0x02) << 27;
635 | block.y |= (uint) endpoint952.z << 29;
636 | block.z |= ((uint) endpoint952.z & 0x08) >> 3;
637 | block.z |= (uint) endpoint952.x << 1;
638 | block.z |= ((uint) endpoint953.z & 0x04) << 4;
639 | block.z |= (uint) endpoint953.x << 7;
640 | block.z |= ((uint) endpoint953.z & 0x08) << 9;
641 | }
642 |
643 | block.z |= pattern << 13;
644 | uint blockFixupID = PatternFixupID(pattern);
645 | if (blockFixupID == 15)
646 | {
647 | block.z |= indices[0] << 18;
648 | block.z |= indices[1] << 20;
649 | block.z |= indices[2] << 23;
650 | block.z |= indices[3] << 26;
651 | block.z |= indices[4] << 29;
652 | block.w |= indices[5] << 0;
653 | block.w |= indices[6] << 3;
654 | block.w |= indices[7] << 6;
655 | block.w |= indices[8] << 9;
656 | block.w |= indices[9] << 12;
657 | block.w |= indices[10] << 15;
658 | block.w |= indices[11] << 18;
659 | block.w |= indices[12] << 21;
660 | block.w |= indices[13] << 24;
661 | block.w |= indices[14] << 27;
662 | block.w |= indices[15] << 30;
663 | }
664 | else if (blockFixupID == 2)
665 | {
666 | block.z |= indices[0] << 18;
667 | block.z |= indices[1] << 20;
668 | block.z |= indices[2] << 23;
669 | block.z |= indices[3] << 25;
670 | block.z |= indices[4] << 28;
671 | block.z |= indices[5] << 31;
672 | block.w |= indices[5] >> 1;
673 | block.w |= indices[6] << 2;
674 | block.w |= indices[7] << 5;
675 | block.w |= indices[8] << 8;
676 | block.w |= indices[9] << 11;
677 | block.w |= indices[10] << 14;
678 | block.w |= indices[11] << 17;
679 | block.w |= indices[12] << 20;
680 | block.w |= indices[13] << 23;
681 | block.w |= indices[14] << 26;
682 | block.w |= indices[15] << 29;
683 | }
684 | else
685 | {
686 | block.z |= indices[0] << 18;
687 | block.z |= indices[1] << 20;
688 | block.z |= indices[2] << 23;
689 | block.z |= indices[3] << 26;
690 | block.z |= indices[4] << 29;
691 | block.w |= indices[5] << 0;
692 | block.w |= indices[6] << 3;
693 | block.w |= indices[7] << 6;
694 | block.w |= indices[8] << 9;
695 | block.w |= indices[9] << 11;
696 | block.w |= indices[10] << 14;
697 | block.w |= indices[11] << 17;
698 | block.w |= indices[12] << 20;
699 | block.w |= indices[13] << 23;
700 | block.w |= indices[14] << 26;
701 | block.w |= indices[15] << 29;
702 | }
703 | }
704 | }
705 |
706 | [numthreads(8, 8, 1)]
707 | void CSMain(uint3 groupID : SV_GroupID,
708 | uint3 dispatchThreadID : SV_DispatchThreadID,
709 | uint3 groupThreadID : SV_GroupThreadID)
710 | {
711 | uint2 blockCoord = dispatchThreadID.xy;
712 |
713 | if (all(blockCoord < TextureSizeInBlocks))
714 | {
715 | // Gather texels for current 4x4 block
716 | // 0 1 2 3
717 | // 4 5 6 7
718 | // 8 9 10 11
719 | // 12 13 14 15
720 | float2 uv = blockCoord * TextureSizeRcp * 4.0f + TextureSizeRcp;
721 | float2 block0UV = uv;
722 | float2 block1UV = uv + float2(2.0f * TextureSizeRcp.x, 0.0f);
723 | float2 block2UV = uv + float2(0.0f, 2.0f * TextureSizeRcp.y);
724 | float2 block3UV = uv + float2(2.0f * TextureSizeRcp.x, 2.0f * TextureSizeRcp.y);
725 | float4 block0X = SrcTexture.GatherRed(PointSampler, block0UV);
726 | float4 block1X = SrcTexture.GatherRed(PointSampler, block1UV);
727 | float4 block2X = SrcTexture.GatherRed(PointSampler, block2UV);
728 | float4 block3X = SrcTexture.GatherRed(PointSampler, block3UV);
729 | float4 block0Y = SrcTexture.GatherGreen(PointSampler, block0UV);
730 | float4 block1Y = SrcTexture.GatherGreen(PointSampler, block1UV);
731 | float4 block2Y = SrcTexture.GatherGreen(PointSampler, block2UV);
732 | float4 block3Y = SrcTexture.GatherGreen(PointSampler, block3UV);
733 | float4 block0Z = SrcTexture.GatherBlue(PointSampler, block0UV);
734 | float4 block1Z = SrcTexture.GatherBlue(PointSampler, block1UV);
735 | float4 block2Z = SrcTexture.GatherBlue(PointSampler, block2UV);
736 | float4 block3Z = SrcTexture.GatherBlue(PointSampler, block3UV);
737 |
738 | float3 texels[16];
739 | texels[0] = float3(block0X.w, block0Y.w, block0Z.w);
740 | texels[1] = float3(block0X.z, block0Y.z, block0Z.z);
741 | texels[2] = float3(block1X.w, block1Y.w, block1Z.w);
742 | texels[3] = float3(block1X.z, block1Y.z, block1Z.z);
743 | texels[4] = float3(block0X.x, block0Y.x, block0Z.x);
744 | texels[5] = float3(block0X.y, block0Y.y, block0Z.y);
745 | texels[6] = float3(block1X.x, block1Y.x, block1Z.x);
746 | texels[7] = float3(block1X.y, block1Y.y, block1Z.y);
747 | texels[8] = float3(block2X.w, block2Y.w, block2Z.w);
748 | texels[9] = float3(block2X.z, block2Y.z, block2Z.z);
749 | texels[10] = float3(block3X.w, block3Y.w, block3Z.w);
750 | texels[11] = float3(block3X.z, block3Y.z, block3Z.z);
751 | texels[12] = float3(block2X.x, block2Y.x, block2Z.x);
752 | texels[13] = float3(block2X.y, block2Y.y, block2Z.y);
753 | texels[14] = float3(block3X.x, block3Y.x, block3Z.x);
754 | texels[15] = float3(block3X.y, block3Y.y, block3Z.y);
755 |
756 | uint4 block = uint4(0, 0, 0, 0);
757 | float blockMSLE = 0.0f;
758 |
759 | EncodeP1(block, blockMSLE, texels);
760 |
761 | #if ENCODE_P2
762 | // First find pattern which is a best fit for a current block
763 | float bestScore = EvaluateP2Pattern(0, texels);
764 | uint bestPattern = 0;
765 |
766 | for (uint patternIndex = 1; patternIndex < 32; ++patternIndex)
767 | {
768 | float score = EvaluateP2Pattern(patternIndex, texels);
769 | if (score < bestScore)
770 | {
771 | bestPattern = patternIndex;
772 | bestScore = score;
773 | }
774 | }
775 |
776 | // Then encode it
777 | EncodeP2Pattern(block, blockMSLE, bestPattern, texels);
778 | #endif
779 |
780 | OutputTexture[blockCoord] = block;
781 | }
782 | }
--------------------------------------------------------------------------------
/bin/d3dcompiler_47.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/d3dcompiler_47.dll
--------------------------------------------------------------------------------
/bin/desk.dds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/desk.dds
--------------------------------------------------------------------------------
/bin/memorial.dds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/memorial.dds
--------------------------------------------------------------------------------
/bin/yucca.dds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knarkowicz/GPURealTimeBC6H/0416fd30fd85e29d3ae673dee2cb9d0d09a36f61/bin/yucca.dds
--------------------------------------------------------------------------------
/dds.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "dds.h"
3 |
4 | unsigned const DDS_MAGIC = 0x20534444; // "DDS "
5 |
6 | #define DDS_HEADER_FLAGS_TEXTURE 0x00001007 // DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT
7 | #define DDS_HEADER_FLAGS_MIPMAP 0x00020000 // DDSD_MIPMAPCOUNT
8 | #define DDS_HEADER_FLAGS_VOLUME 0x00800000 // DDSD_DEPTH
9 | #define DDS_HEADER_FLAGS_PITCH 0x00000008 // DDSD_PITCH
10 | #define DDS_HEADER_FLAGS_LINEARSIZE 0x00080000 // DDSD_LINEARSIZE
11 |
12 | #define DDS_SURFACE_FLAGS_TEXTURE 0x00001000 // DDSCAPS_TEXTURE
13 | #define DDS_SURFACE_FLAGS_MIPMAP 0x00400008 // DDSCAPS_COMPLEX | DDSCAPS_MIPMAP
14 | #define DDS_SURFACE_FLAGS_CUBEMAP 0x00000008 // DDSCAPS_COMPLEX
15 |
16 | #define DDS_FOURCC 0x00000004 // DDPF_FOURCC
17 | #define DDS_RGB 0x00000040 // DDPF_RGB
18 | #define DDS_RGBA 0x00000041 // DDPF_RGB | DDPF_ALPHAPIXELS
19 | #define DDS_ALPHA 0x00000002 // DDPF_ALPHA
20 | #define DDS_LUM 0x00020000 // DDPF_LUM
21 |
22 | struct DDS_PIXELFORMAT
23 | {
24 | uint32_t dwSize;
25 | uint32_t dwFlags;
26 | uint32_t dwFourCC;
27 | uint32_t dwRGBBitCount;
28 | uint32_t dwRBitMask;
29 | uint32_t dwGBitMask;
30 | uint32_t dwBBitMask;
31 | uint32_t dwABitMask;
32 | };
33 |
34 | #ifndef MAKEFOURCC
35 | # define MAKEFOURCC(ch0, ch1, ch2, ch3) ((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | ((uint32_t)(uint8_t)(ch2) << 16) | ((uint32_t)(uint8_t)(ch3) << 24 ))
36 | #endif
37 |
38 | DDS_PIXELFORMAT const DDSPF_DX10 = { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, MAKEFOURCC('D','X','1','0'), 0, 0, 0, 0, 0 };
39 | DDS_PIXELFORMAT const DDSPF_R16G16B16A16_FLOAT = { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, 113, 0, 0, 0, 0, 0 };
40 | DDS_PIXELFORMAT const DDSPF_BC6H = { sizeof(DDS_PIXELFORMAT), DDS_FOURCC, 808540228, 0, 0, 0, 0, 0 };
41 |
42 | struct DDS_HEADER
43 | {
44 | uint32_t dwMagic;
45 | uint32_t dwSize;
46 | uint32_t dwFlags;
47 | uint32_t dwHeight;
48 | uint32_t dwWidth;
49 | uint32_t dwPitchOrLinearSize;
50 | uint32_t dwDepth;
51 | uint32_t dwMipMapCount;
52 | uint32_t dwReserved1[11];
53 | DDS_PIXELFORMAT ddspf;
54 | uint32_t dwSurfaceFlags;
55 | uint32_t dwCubemapFlags;
56 | uint32_t dwReserved2[3];
57 | };
58 |
59 | struct DDS_HEADER_DXT10
60 | {
61 | uint32_t dxgiFormat;
62 | uint32_t resourceDimension;
63 | uint32_t miscFlag;
64 | uint32_t arraySize;
65 | uint32_t reserved;
66 | };
67 |
68 | bool DDS::LoadA16B16G16R16F(char const* filename, SImage& img)
69 | {
70 | img.m_width = 0;
71 | img.m_height = 0;
72 | img.m_data = nullptr;
73 | img.m_dataSize = 0;
74 |
75 | FILE* f = nullptr;
76 | fopen_s(&f, filename, "rb");
77 | if (!f)
78 | {
79 | return false;
80 | }
81 |
82 | DDS_HEADER hdr;
83 | fread(&hdr, sizeof(hdr), 1, f);
84 |
85 | if (hdr.dwMagic == DDS_MAGIC && memcmp(&hdr.ddspf, &DDSPF_R16G16B16A16_FLOAT, sizeof(hdr.ddspf)) == 0)
86 | {
87 | img.m_dataSize = hdr.dwWidth * hdr.dwHeight * 8;
88 | img.m_data = new uint8_t[img.m_dataSize];
89 | img.m_width = hdr.dwWidth;
90 | img.m_height = hdr.dwHeight;
91 | fread(img.m_data, img.m_dataSize, 1, f);
92 | fclose(f);
93 | return true;
94 | }
95 |
96 | fclose(f);
97 | return false;
98 | }
99 |
100 | bool DDS::LoadBC6H(char const* filename, SImage& img)
101 | {
102 | img.m_width = 0;
103 | img.m_height = 0;
104 | img.m_data = nullptr;
105 | img.m_dataSize = 0;
106 |
107 | FILE* f = nullptr;
108 | fopen_s(&f, filename, "rb");
109 | if (!f)
110 | {
111 | return false;
112 | }
113 |
114 | DDS_HEADER hdr;
115 | fread(&hdr, sizeof(hdr), 1, f);
116 |
117 | if (hdr.dwMagic == DDS_MAGIC && memcmp(&hdr.ddspf, &DDSPF_DX10, sizeof(hdr.ddspf)) == 0)
118 | {
119 | DDS_HEADER_DXT10 hdrDX10;
120 | fread(&hdrDX10, sizeof(hdrDX10), 1, f);
121 |
122 | img.m_dataSize = hdr.dwWidth * hdr.dwHeight;
123 | img.m_data = new uint8_t[img.m_dataSize];
124 | img.m_width = hdr.dwWidth;
125 | img.m_height = hdr.dwHeight;
126 | fread(img.m_data, img.m_dataSize, 1, f);
127 | fclose(f);
128 | return true;
129 | }
130 |
131 | fclose(f);
132 | return false;
133 | }
--------------------------------------------------------------------------------
/dds.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | struct SImage
4 | {
5 | unsigned m_width;
6 | unsigned m_height;
7 | uint8_t* m_data;
8 | unsigned m_dataSize;
9 | };
10 |
11 | namespace DDS
12 | {
13 | bool LoadA16B16G16R16F(char const* filename, SImage& img);
14 | bool LoadBC6H(char const* filename, SImage& img);
15 | }
--------------------------------------------------------------------------------
/stdafx.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
--------------------------------------------------------------------------------
/stdafx.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #define WIN32_LEAN_AND_MEAN
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 |
13 | #include
14 | #include
--------------------------------------------------------------------------------
/winmain.cpp:
--------------------------------------------------------------------------------
1 | #include "stdafx.h"
2 | #include "app.h"
3 |
4 | bool gDestroy = false;
5 | bool gActive = true;
6 | HWND gWndHandle;
7 |
8 | LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
9 | {
10 | switch (msg)
11 | {
12 | case WM_DESTROY:
13 | gDestroy = true;
14 | break;
15 |
16 | case WM_ACTIVATE:
17 | gActive = wParam != WA_INACTIVE;
18 | break;
19 |
20 | case WM_KEYDOWN:
21 | gApp.OnKeyDown(wParam);
22 | break;
23 |
24 | case WM_LBUTTONDOWN:
25 | gApp.OnLButtonDown(LOWORD(lParam), HIWORD(lParam));
26 | break;
27 |
28 | case WM_LBUTTONUP:
29 | gApp.OnLButtonUp(LOWORD(lParam), HIWORD(lParam));
30 | break;
31 |
32 | case WM_MOUSEMOVE:
33 | gApp.OnMouseMove(LOWORD(lParam), HIWORD(lParam));
34 | break;
35 |
36 | case WM_MOUSEWHEEL:
37 | gApp.OnMouseWheel(GET_WHEEL_DELTA_WPARAM(wParam));
38 | break;
39 |
40 | case WM_SIZE:
41 | gApp.OnResize();
42 | break;
43 | }
44 |
45 | return DefWindowProc(hWnd, msg, wParam, lParam);
46 | }
47 |
48 | void MainLoop(HINSTANCE hInst)
49 | {
50 | MSG Msg;
51 | while (!gDestroy)
52 | {
53 | if (PeekMessage(&Msg, NULL, 0, 0, PM_NOREMOVE))
54 | {
55 | if (!GetMessage(&Msg, NULL, 0, 0))
56 | return;
57 |
58 | TranslateMessage(&Msg);
59 | DispatchMessage(&Msg);
60 | }
61 | else
62 | {
63 | if (gActive)
64 | {
65 | gApp.Render();
66 | Sleep(1);
67 | }
68 | }
69 | }
70 | }
71 |
72 | INT WINAPI WinMain(HINSTANCE hInst, HINSTANCE, LPSTR lpCmdLine, INT)
73 | {
74 | wchar_t const* appName = L"rt_bc6h_encoder_gpu";
75 | WNDCLASSEX wc = { sizeof(WNDCLASSEX), 0, MsgProc, 0L, 0L, GetModuleHandle(NULL), NULL, NULL, NULL, NULL, appName, NULL };
76 |
77 | RegisterClassEx(&wc);
78 |
79 | DWORD const dwStyle = WS_SYSMENU | WS_MAXIMIZEBOX | WS_MINIMIZEBOX | WS_SIZEBOX;
80 | RECT rcWindowSize;
81 | SetRect(&rcWindowSize, 0, 0, 1280, 720);
82 | AdjustWindowRect(&rcWindowSize, dwStyle, FALSE);
83 |
84 | RECT rcDesktop;
85 | GetClientRect(GetDesktopWindow(), &rcDesktop);
86 |
87 | if (rcWindowSize.bottom < rcDesktop.bottom)
88 | {
89 | rcWindowSize.bottom -= rcWindowSize.top;
90 | rcWindowSize.top = 0;
91 | }
92 |
93 | if (rcWindowSize.right < rcDesktop.right)
94 | {
95 | int iTranslate = (rcDesktop.right - (rcWindowSize.right - rcWindowSize.left)) / 2;
96 | rcWindowSize.left += iTranslate;
97 | rcWindowSize.right += iTranslate;
98 | }
99 |
100 | gWndHandle = CreateWindow(appName, appName, dwStyle, rcWindowSize.left, rcWindowSize.top,
101 | rcWindowSize.right - rcWindowSize.left, rcWindowSize.bottom - rcWindowSize.top,
102 | GetDesktopWindow(), nullptr, wc.hInstance, nullptr);
103 |
104 | gApp.Init(gWndHandle);
105 | ShowWindow(gWndHandle, SW_SHOWDEFAULT);
106 | UpdateWindow(gWndHandle);
107 |
108 | MainLoop(hInst);
109 |
110 | UnregisterClass(appName, wc.hInstance);
111 | gApp.Release();
112 | return 0;
113 | }
--------------------------------------------------------------------------------