├── .editorconfig
├── .gitignore
├── ConvectionKernels.h
├── ConvectionKernels.sln
├── ConvectionKernels.vcxproj
├── ConvectionKernels.vcxproj.filters
├── ConvectionKernels_API.cpp
├── ConvectionKernels_AggregatedError.h
├── ConvectionKernels_BC67.cpp
├── ConvectionKernels_BC67.h
├── ConvectionKernels_BC6H_IO.cpp
├── ConvectionKernels_BC6H_IO.h
├── ConvectionKernels_BC7_Prio.h
├── ConvectionKernels_BC7_PrioData.cpp
├── ConvectionKernels_BC7_SingleColor.h
├── ConvectionKernels_BCCommon.cpp
├── ConvectionKernels_BCCommon.h
├── ConvectionKernels_Config.h
├── ConvectionKernels_ETC.cpp
├── ConvectionKernels_ETC.h
├── ConvectionKernels_ETC1.h
├── ConvectionKernels_ETC2.h
├── ConvectionKernels_ETC2_Rounding.h
├── ConvectionKernels_EndpointRefiner.h
├── ConvectionKernels_EndpointSelector.h
├── ConvectionKernels_FakeBT709_Rounding.h
├── ConvectionKernels_IndexSelector.cpp
├── ConvectionKernels_IndexSelector.h
├── ConvectionKernels_IndexSelectorHDR.h
├── ConvectionKernels_PackedCovarianceMatrix.h
├── ConvectionKernels_ParallelMath.h
├── ConvectionKernels_S3TC.cpp
├── ConvectionKernels_S3TC.h
├── ConvectionKernels_S3TC_SingleColor.h
├── ConvectionKernels_SingleFile.cpp
├── ConvectionKernels_UnfinishedEndpoints.h
├── ConvectionKernels_Util.cpp
├── ConvectionKernels_Util.h
├── LICENSE.txt
├── MakeTables
    ├── App.config
    ├── MakeTables.csproj
    ├── Program.cs
    └── Properties
    │   └── AssemblyInfo.cs
├── README.md
├── etc2packer
    ├── .editorconfig
    ├── etc2packer.cpp
    ├── etc2packer.h
    ├── etc2packer.vcxproj
    ├── etc2packer.vcxproj.filters
    ├── ktxheader.h
    └── stb_image
    │   ├── stb_image.cpp
    │   ├── stb_image.h
    │   ├── stb_image.vcxproj
    │   ├── stb_image.vcxproj.filters
    │   ├── stb_image_write.cpp
    │   └── stb_image_write.h
└── etc_notes.txt


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*.{cpp,h,inl,fx,hlsl}]
 4 | indent_size = 4
 5 | indent_style = space
 6 | trim_trailing_whitespace = true
 7 | insert_final_newline = true
 8 | end_of_line = crlf
 9 | charset = latin1
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.psess
 2 | *.vsp
 3 | *.log
 4 | *.err
 5 | *.wrn
 6 | *.suo
 7 | *.sdf
 8 | *.user
 9 | *.i
10 | *.vspscc
11 | *.opensdf
12 | *.opendb
13 | *.ipch
14 | *.cache
15 | *.tlog
16 | *.lastbuildstate
17 | *.ilk
18 | *.VC.db
19 | *.nupkg
20 | *.obj
21 | *.idb
22 | .vs
23 | /Bin
24 | /ipch
25 | /Debug
26 | /Profile
27 | /Release
28 | /Tests
29 | /wiki
30 | *.inc
31 | *.pdb
32 | *.csv
33 | *.dds
34 | /*/bin/*
35 | /*/obj/*
36 | /*/x64/*
37 | /x64/*/*.exe
38 | /x64/*/*.obj
39 | /x64/*/*.iobj
40 | /x64/*/*.ipdb
41 | 


--------------------------------------------------------------------------------
/ConvectionKernels.h:
--------------------------------------------------------------------------------
  1 | /*
  2 | Convection Texture Tools
  3 | Copyright (c) 2018 Eric Lasota
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining
  6 | a copy of this software and associated documentation files (the
  7 | "Software"), to deal in the Software without restriction, including
  8 | without limitation the rights to use, copy, modify, merge, publish,
  9 | distribute, sublicense, and/or sell copies of the Software, and to
 10 | permit persons to whom the Software is furnished to do so, subject
 11 | to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included
 14 | in all copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 | */
 24 | #pragma once
 25 | #ifndef __CVTT_CONVECTION_KERNELS__
 26 | #define __CVTT_CONVECTION_KERNELS__
 27 | 
 28 | #include <stddef.h>
 29 | #include <stdint.h>
 30 | 
 31 | namespace cvtt
 32 | {
 33 |     namespace Flags
 34 |     {
 35 |         // Use fast indexing in BC7 encoding (about 2x faster, slightly worse quality)
 36 |         const uint32_t BC7_FastIndexing         = 0x008;
 37 | 
 38 |         // Try precomputed single-color lookups where applicable (slightly slower, small quality increase on specific blocks)
 39 |         const uint32_t BC7_TrySingleColor       = 0x010;
 40 | 
 41 |         // Don't allow non-zero or non-max alpha values in blocks that only contain one or the other
 42 |         const uint32_t BC7_RespectPunchThrough  = 0x020;
 43 | 
 44 |         // Use fast indexing in HDR formats (faster, worse quality)
 45 |         const uint32_t BC6H_FastIndexing        = 0x040;
 46 | 
 47 |         // Exhaustive search RGB orderings when encoding BC1-BC3 (much slower, better quality)
 48 |         const uint32_t S3TC_Exhaustive          = 0x080;
 49 | 
 50 |         // Penalize distant endpoints, improving quality on inaccurate GPU decoders
 51 |         const uint32_t S3TC_Paranoid            = 0x100;
 52 | 
 53 |         // Uniform color channel importance
 54 |         const uint32_t Uniform                  = 0x200;
 55 | 
 56 |         // Use fake BT.709 color space for etc2comp compatibility (slower)
 57 |         const uint32_t ETC_UseFakeBT709         = 0x400;
 58 | 
 59 |         // Use accurate quantization functions when quantizing fake BT.709 (much slower, marginal improvement on specific blocks)
 60 |         const uint32_t ETC_FakeBT709Accurate    = 0x800;
 61 | 
 62 |         // Misc useful default flag combinations
 63 |         const uint32_t Fastest = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);
 64 |         const uint32_t Faster = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);
 65 |         const uint32_t Fast = (BC7_FastIndexing | S3TC_Paranoid);
 66 |         const uint32_t Default = (BC7_FastIndexing | S3TC_Paranoid);
 67 |         const uint32_t Better = (S3TC_Paranoid | S3TC_Exhaustive);
 68 |         const uint32_t Ultra = (BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive | ETC_FakeBT709Accurate);
 69 |     }
 70 | 
 71 |     const unsigned int NumParallelBlocks = 8;
 72 | 
 73 |     struct Options
 74 |     {
 75 |         uint32_t flags;         // Bitmask of cvtt::Flags values
 76 |         float threshold;        // Alpha test threshold for BC1
 77 |         float redWeight;        // Red channel importance
 78 |         float greenWeight;      // Green channel importance
 79 |         float blueWeight;       // Blue channel importance
 80 |         float alphaWeight;      // Alpha channel importance
 81 | 
 82 |         int refineRoundsBC7;   // Number of refine rounds for BC7
 83 |         int refineRoundsBC6H;   // Number of refine rounds for BC6H (max 3)
 84 |         int refineRoundsIIC;    // Number of refine rounds for independent interpolated channels (BC3 alpha, BC4, BC5)
 85 |         int refineRoundsS3TC;   // Number of refine rounds for S3TC RGB
 86 | 
 87 |         int seedPoints;         // Number of seed points (min 1, max 4)
 88 | 
 89 |         Options()
 90 |             : flags(Flags::Default)
 91 |             , threshold(0.5f)
 92 |             , redWeight(0.2125f / 0.7154f)
 93 |             , greenWeight(1.0f)
 94 |             , blueWeight(0.0721f / 0.7154f)
 95 |             , alphaWeight(1.0f)
 96 |             , refineRoundsBC7(2)
 97 |             , refineRoundsBC6H(3)
 98 |             , refineRoundsIIC(8)
 99 |             , refineRoundsS3TC(2)
100 |             , seedPoints(4)
101 |         {
102 |         }
103 |     };
104 | 
105 |     struct BC7FineTuningParams
106 |     {
107 |         // Seed point counts for each mode+configuration combination
108 |         uint8_t mode0SP[16];
109 |         uint8_t mode1SP[64];
110 |         uint8_t mode2SP[64];
111 |         uint8_t mode3SP[64];
112 |         uint8_t mode4SP[4][2];
113 |         uint8_t mode5SP[4];
114 |         uint8_t mode6SP;
115 |         uint8_t mode7SP[64];
116 | 
117 |         BC7FineTuningParams()
118 |         {
119 |             for (int i = 0; i < 16; i++)
120 |                 this->mode0SP[i] = 4;
121 | 
122 |             for (int i = 0; i < 64; i++)
123 |             {
124 |                 this->mode1SP[i] = 4;
125 |                 this->mode2SP[i] = 4;
126 |                 this->mode3SP[i] = 4;
127 |                 this->mode7SP[i] = 4;
128 |             }
129 | 
130 |             for (int i = 0; i < 4; i++)
131 |             {
132 |                 for (int j = 0; j < 2; j++)
133 |                     this->mode4SP[i][j] = 4;
134 | 
135 |                 this->mode5SP[i] = 4;
136 |             }
137 | 
138 |             this->mode6SP = 4;
139 |         }
140 |     };
141 | 
142 |     struct BC7EncodingPlan
143 |     {
144 |         static const int kNumRGBAShapes = 129;
145 |         static const int kNumRGBShapes = 243;
146 | 
147 |         uint64_t mode1PartitionEnabled;
148 |         uint64_t mode2PartitionEnabled;
149 |         uint64_t mode3PartitionEnabled;
150 |         uint16_t mode0PartitionEnabled;
151 |         uint64_t mode7RGBAPartitionEnabled;
152 |         uint64_t mode7RGBPartitionEnabled;
153 |         uint8_t mode4SP[4][2];
154 |         uint8_t mode5SP[4];
155 |         bool mode6Enabled;
156 | 
157 |         uint8_t seedPointsForShapeRGB[kNumRGBShapes];
158 |         uint8_t seedPointsForShapeRGBA[kNumRGBAShapes];
159 | 
160 |         uint8_t rgbaShapeList[kNumRGBAShapes];
161 |         uint8_t rgbaNumShapesToEvaluate;
162 | 
163 |         uint8_t rgbShapeList[kNumRGBShapes];
164 |         uint8_t rgbNumShapesToEvaluate;
165 | 
166 |         BC7EncodingPlan()
167 |         {
168 |             for (int i = 0; i < kNumRGBShapes; i++)
169 |             {
170 |                 this->rgbShapeList[i] = i;
171 |                 this->seedPointsForShapeRGB[i] = 4;
172 |             }
173 |             this->rgbNumShapesToEvaluate = kNumRGBShapes;
174 | 
175 |             for (int i = 0; i < kNumRGBAShapes; i++)
176 |             {
177 |                 this->rgbaShapeList[i] = i;
178 |                 this->seedPointsForShapeRGBA[i] = 4;
179 |             }
180 |             this->rgbaNumShapesToEvaluate = kNumRGBAShapes;
181 | 
182 | 
183 |             this->mode0PartitionEnabled = 0xffff;
184 |             this->mode1PartitionEnabled = 0xffffffffffffffffULL;
185 |             this->mode2PartitionEnabled = 0xffffffffffffffffULL;
186 |             this->mode3PartitionEnabled = 0xffffffffffffffffULL;
187 |             this->mode6Enabled = true;
188 |             this->mode7RGBPartitionEnabled = 0xffffffffffffffffULL;
189 |             this->mode7RGBAPartitionEnabled = 0xffffffffffffffffULL;
190 | 
191 |             for (int i = 0; i < 4; i++)
192 |             {
193 |                 for (int j = 0; j < 2; j++)
194 |                     this->mode4SP[i][j] = 4;
195 | 
196 |                 this->mode5SP[i] = 4;
197 |             }
198 |         }
199 |     };
200 | 
201 |     // RGBA input block for unsigned 8-bit formats
202 |     struct PixelBlockU8
203 |     {
204 |         uint8_t m_pixels[16][4];
205 |     };
206 | 
207 |     // RGBA input block for signed 8-bit formats
208 |     struct PixelBlockS8
209 |     {
210 |         int8_t m_pixels[16][4];
211 |     };
212 | 
213 |     struct PixelBlockScalarS16
214 |     {
215 |         int16_t m_pixels[16];
216 |     };
217 | 
218 |     // RGBA input block for half-precision float formats (bit-cast to int16_t)
219 |     struct PixelBlockF16
220 |     {
221 |         int16_t m_pixels[16][4];
222 |     };
223 | 
224 |     class ETC2CompressionData
225 |     {
226 |     protected:
227 |         ETC2CompressionData() {}
228 |     };
229 | 
230 |     class ETC1CompressionData
231 |     {
232 |     protected:
233 |         ETC1CompressionData() {}
234 |     };
235 | 
236 |     namespace Kernels
237 |     {
238 |         typedef void* allocFunc_t(void *context, size_t size);
239 |         typedef void freeFunc_t(void *context, void* ptr, size_t size);
240 | 
241 |         // NOTE: All functions accept and output NumParallelBlocks blocks at once
242 |         void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
243 |         void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
244 |         void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
245 |         void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
246 |         void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options);
247 |         void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
248 |         void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options);
249 |         void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);
250 |         void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);
251 |         void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, const BC7EncodingPlan &encodingPlan);
252 |         void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC1CompressionData *compressionData);
253 |         void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC2CompressionData *compressionData);
254 |         void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
255 |         void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
256 | 
257 |         void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options);
258 |         void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options);
259 | 
260 |         // Generates a BC7 encoding plan from a quality parameter that ranges from 1 (fastest) to 100 (best)
261 |         void ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality);
262 | 
263 |         // Generates a BC7 encoding plan from fine-tuning parameters.
264 |         bool ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams &params);
265 | 
266 |         // ETC compression requires temporary storage that normally consumes a large amount of stack space.
267 |         // To allocate and release it, use one of these functions.
268 |         ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options);
269 |         void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc);
270 | 
271 |         ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context);
272 |         void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc);
273 | 
274 |         void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC);
275 |         void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC);
276 |         void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC);
277 |     }
278 | }
279 | 
280 | #endif
281 | 


--------------------------------------------------------------------------------
/ConvectionKernels.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 15
 4 | VisualStudioVersion = 15.0.27130.2020
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ConvectionKernels", "ConvectionKernels.vcxproj", "{5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}"
 7 | EndProject
 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MakeTables", "MakeTables\MakeTables.csproj", "{867F8F36-10EA-4594-AA41-34BC5B74A65A}"
 9 | EndProject
10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "etc2packer", "etc2packer\etc2packer.vcxproj", "{23B20484-6E2E-4102-8362-33A29A8D1933}"
11 | EndProject
12 | Global
13 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
14 | 		Debug|x64 = Debug|x64
15 | 		Release|x64 = Release|x64
16 | 	EndGlobalSection
17 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
18 | 		{5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}.Debug|x64.ActiveCfg = Debug|x64
19 | 		{5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}.Debug|x64.Build.0 = Debug|x64
20 | 		{5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}.Release|x64.ActiveCfg = Release|x64
21 | 		{5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}.Release|x64.Build.0 = Release|x64
22 | 		{867F8F36-10EA-4594-AA41-34BC5B74A65A}.Debug|x64.ActiveCfg = Debug|Any CPU
23 | 		{867F8F36-10EA-4594-AA41-34BC5B74A65A}.Debug|x64.Build.0 = Debug|Any CPU
24 | 		{867F8F36-10EA-4594-AA41-34BC5B74A65A}.Release|x64.ActiveCfg = Release|Any CPU
25 | 		{867F8F36-10EA-4594-AA41-34BC5B74A65A}.Release|x64.Build.0 = Release|Any CPU
26 | 		{23B20484-6E2E-4102-8362-33A29A8D1933}.Debug|x64.ActiveCfg = Debug|x64
27 | 		{23B20484-6E2E-4102-8362-33A29A8D1933}.Debug|x64.Build.0 = Debug|x64
28 | 		{23B20484-6E2E-4102-8362-33A29A8D1933}.Release|x64.ActiveCfg = Release|x64
29 | 		{23B20484-6E2E-4102-8362-33A29A8D1933}.Release|x64.Build.0 = Release|x64
30 | 	EndGlobalSection
31 | 	GlobalSection(SolutionProperties) = preSolution
32 | 		HideSolutionNode = FALSE
33 | 	EndGlobalSection
34 | 	GlobalSection(ExtensibilityGlobals) = postSolution
35 | 		SolutionGuid = {66CE399E-5954-472F-9A80-D109E5F06A54}
36 | 	EndGlobalSection
37 | EndGlobal
38 | 


--------------------------------------------------------------------------------
/ConvectionKernels.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|x64">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>x64</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|x64">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |   </ItemGroup>
 13 |   <PropertyGroup Label="Globals">
 14 |     <VCProjectVersion>15.0</VCProjectVersion>
 15 |     <ProjectGuid>{5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}</ProjectGuid>
 16 |     <RootNamespace>ConvectionKernels</RootNamespace>
 17 |     <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
 18 |   </PropertyGroup>
 19 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 20 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 21 |     <ConfigurationType>StaticLibrary</ConfigurationType>
 22 |     <UseDebugLibraries>true</UseDebugLibraries>
 23 |     <PlatformToolset>v141</PlatformToolset>
 24 |     <CharacterSet>MultiByte</CharacterSet>
 25 |   </PropertyGroup>
 26 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 27 |     <ConfigurationType>StaticLibrary</ConfigurationType>
 28 |     <UseDebugLibraries>false</UseDebugLibraries>
 29 |     <PlatformToolset>v141</PlatformToolset>
 30 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 31 |     <CharacterSet>MultiByte</CharacterSet>
 32 |   </PropertyGroup>
 33 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 34 |   <ImportGroup Label="ExtensionSettings">
 35 |   </ImportGroup>
 36 |   <ImportGroup Label="Shared">
 37 |   </ImportGroup>
 38 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 39 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 40 |   </ImportGroup>
 41 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 42 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 43 |   </ImportGroup>
 44 |   <PropertyGroup Label="UserMacros" />
 45 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 46 |     <OutDir>Bin\Desktop_2017\$(Platform)\$(Configuration)\</OutDir>
 47 |     <IntDir>Bin\Desktop_2017\$(Platform)\$(Configuration)\</IntDir>
 48 |   </PropertyGroup>
 49 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 50 |     <OutDir>Bin\Desktop_2017\$(Platform)\$(Configuration)\</OutDir>
 51 |     <IntDir>Bin\Desktop_2017\$(Platform)\$(Configuration)\</IntDir>
 52 |   </PropertyGroup>
 53 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 54 |     <ClCompile>
 55 |       <WarningLevel>Level3</WarningLevel>
 56 |       <Optimization>MaxSpeed</Optimization>
 57 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 58 |       <IntrinsicFunctions>true</IntrinsicFunctions>
 59 |       <SDLCheck>true</SDLCheck>
 60 |       <ConformanceMode>true</ConformanceMode>
 61 |     </ClCompile>
 62 |     <Link>
 63 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
 64 |       <OptimizeReferences>true</OptimizeReferences>
 65 |     </Link>
 66 |   </ItemDefinitionGroup>
 67 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 68 |     <ClCompile>
 69 |       <WarningLevel>Level3</WarningLevel>
 70 |       <Optimization>Disabled</Optimization>
 71 |       <SDLCheck>true</SDLCheck>
 72 |       <ConformanceMode>true</ConformanceMode>
 73 |     </ClCompile>
 74 |   </ItemDefinitionGroup>
 75 |   <ItemGroup>
 76 |     <ClCompile Include="ConvectionKernels_API.cpp" />
 77 |     <ClCompile Include="ConvectionKernels_BC67.cpp" />
 78 |     <ClCompile Include="ConvectionKernels_BC6H_IO.cpp" />
 79 |     <ClCompile Include="ConvectionKernels_BC7_PrioData.cpp" />
 80 |     <ClCompile Include="ConvectionKernels_BCCommon.cpp" />
 81 |     <ClCompile Include="ConvectionKernels_ETC.cpp" />
 82 |     <ClCompile Include="ConvectionKernels_IndexSelector.cpp" />
 83 |     <ClCompile Include="ConvectionKernels_S3TC.cpp" />
 84 |     <ClCompile Include="ConvectionKernels_SingleFile.cpp" />
 85 |     <ClCompile Include="ConvectionKernels_Util.cpp" />
 86 |   </ItemGroup>
 87 |   <ItemGroup>
 88 |     <ClInclude Include="ConvectionKernels.h" />
 89 |     <ClInclude Include="ConvectionKernels_AggregatedError.h" />
 90 |     <ClInclude Include="ConvectionKernels_BC67.h" />
 91 |     <ClInclude Include="ConvectionKernels_BC6H_IO.h" />
 92 |     <ClInclude Include="ConvectionKernels_BC7_Prio.h" />
 93 |     <ClInclude Include="ConvectionKernels_BC7_SingleColor.h" />
 94 |     <ClInclude Include="ConvectionKernels_BCCommon.h" />
 95 |     <ClInclude Include="ConvectionKernels_Config.h" />
 96 |     <ClInclude Include="ConvectionKernels_EndpointRefiner.h" />
 97 |     <ClInclude Include="ConvectionKernels_EndpointSelector.h" />
 98 |     <ClInclude Include="ConvectionKernels_ETC.h" />
 99 |     <ClInclude Include="ConvectionKernels_ETC1.h" />
100 |     <ClInclude Include="ConvectionKernels_ETC2.h" />
101 |     <ClInclude Include="ConvectionKernels_ETC2_Rounding.h" />
102 |     <ClInclude Include="ConvectionKernels_FakeBT709_Rounding.h" />
103 |     <ClInclude Include="ConvectionKernels_IndexSelector.h" />
104 |     <ClInclude Include="ConvectionKernels_IndexSelectorHDR.h" />
105 |     <ClInclude Include="ConvectionKernels_PackedCovarianceMatrix.h" />
106 |     <ClInclude Include="ConvectionKernels_ParallelMath.h" />
107 |     <ClInclude Include="ConvectionKernels_S3TC.h" />
108 |     <ClInclude Include="ConvectionKernels_S3TC_SingleColor.h" />
109 |     <ClInclude Include="ConvectionKernels_UnfinishedEndpoints.h" />
110 |     <ClInclude Include="ConvectionKernels_Util.h" />
111 |   </ItemGroup>
112 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
113 |   <ImportGroup Label="ExtensionTargets">
114 |   </ImportGroup>
115 | </Project>


--------------------------------------------------------------------------------
/ConvectionKernels.vcxproj.filters:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup>
  4 |     <Filter Include="Source Files">
  5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
  6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
  7 |     </Filter>
  8 |     <Filter Include="Header Files">
  9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
 10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
 11 |     </Filter>
 12 |     <Filter Include="Resource Files">
 13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
 14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
 15 |     </Filter>
 16 |   </ItemGroup>
 17 |   <ItemGroup>
 18 |     <ClCompile Include="ConvectionKernels_BC6H_IO.cpp">
 19 |       <Filter>Source Files</Filter>
 20 |     </ClCompile>
 21 |     <ClCompile Include="ConvectionKernels_API.cpp">
 22 |       <Filter>Source Files</Filter>
 23 |     </ClCompile>
 24 |     <ClCompile Include="ConvectionKernels_BC67.cpp">
 25 |       <Filter>Source Files</Filter>
 26 |     </ClCompile>
 27 |     <ClCompile Include="ConvectionKernels_BCCommon.cpp">
 28 |       <Filter>Source Files</Filter>
 29 |     </ClCompile>
 30 |     <ClCompile Include="ConvectionKernels_ETC.cpp">
 31 |       <Filter>Source Files</Filter>
 32 |     </ClCompile>
 33 |     <ClCompile Include="ConvectionKernels_IndexSelector.cpp">
 34 |       <Filter>Source Files</Filter>
 35 |     </ClCompile>
 36 |     <ClCompile Include="ConvectionKernels_S3TC.cpp">
 37 |       <Filter>Source Files</Filter>
 38 |     </ClCompile>
 39 |     <ClCompile Include="ConvectionKernels_SingleFile.cpp">
 40 |       <Filter>Source Files</Filter>
 41 |     </ClCompile>
 42 |     <ClCompile Include="ConvectionKernels_Util.cpp">
 43 |       <Filter>Source Files</Filter>
 44 |     </ClCompile>
 45 |     <ClCompile Include="ConvectionKernels_BC7_PrioData.cpp">
 46 |       <Filter>Source Files</Filter>
 47 |     </ClCompile>
 48 |   </ItemGroup>
 49 |   <ItemGroup>
 50 |     <ClInclude Include="ConvectionKernels_AggregatedError.h">
 51 |       <Filter>Header Files</Filter>
 52 |     </ClInclude>
 53 |     <ClInclude Include="ConvectionKernels_BC6H_IO.h">
 54 |       <Filter>Header Files</Filter>
 55 |     </ClInclude>
 56 |     <ClInclude Include="ConvectionKernels_BC7_SingleColor.h">
 57 |       <Filter>Header Files</Filter>
 58 |     </ClInclude>
 59 |     <ClInclude Include="ConvectionKernels_BCCommon.h">
 60 |       <Filter>Header Files</Filter>
 61 |     </ClInclude>
 62 |     <ClInclude Include="ConvectionKernels_BC67.h">
 63 |       <Filter>Header Files</Filter>
 64 |     </ClInclude>
 65 |     <ClInclude Include="ConvectionKernels.h">
 66 |       <Filter>Header Files</Filter>
 67 |     </ClInclude>
 68 |     <ClInclude Include="ConvectionKernels_Config.h">
 69 |       <Filter>Header Files</Filter>
 70 |     </ClInclude>
 71 |     <ClInclude Include="ConvectionKernels_ETC.h">
 72 |       <Filter>Header Files</Filter>
 73 |     </ClInclude>
 74 |     <ClInclude Include="ConvectionKernels_ETC1.h">
 75 |       <Filter>Header Files</Filter>
 76 |     </ClInclude>
 77 |     <ClInclude Include="ConvectionKernels_ETC2.h">
 78 |       <Filter>Header Files</Filter>
 79 |     </ClInclude>
 80 |     <ClInclude Include="ConvectionKernels_IndexSelector.h">
 81 |       <Filter>Header Files</Filter>
 82 |     </ClInclude>
 83 |     <ClInclude Include="ConvectionKernels_IndexSelectorHDR.h">
 84 |       <Filter>Header Files</Filter>
 85 |     </ClInclude>
 86 |     <ClInclude Include="ConvectionKernels_PackedCovarianceMatrix.h">
 87 |       <Filter>Header Files</Filter>
 88 |     </ClInclude>
 89 |     <ClInclude Include="ConvectionKernels_ParallelMath.h">
 90 |       <Filter>Header Files</Filter>
 91 |     </ClInclude>
 92 |     <ClInclude Include="ConvectionKernels_S3TC.h">
 93 |       <Filter>Header Files</Filter>
 94 |     </ClInclude>
 95 |     <ClInclude Include="ConvectionKernels_UnfinishedEndpoints.h">
 96 |       <Filter>Header Files</Filter>
 97 |     </ClInclude>
 98 |     <ClInclude Include="ConvectionKernels_Util.h">
 99 |       <Filter>Header Files</Filter>
100 |     </ClInclude>
101 |     <ClInclude Include="ConvectionKernels_EndpointSelector.h">
102 |       <Filter>Header Files</Filter>
103 |     </ClInclude>
104 |     <ClInclude Include="ConvectionKernels_EndpointRefiner.h">
105 |       <Filter>Header Files</Filter>
106 |     </ClInclude>
107 |     <ClInclude Include="ConvectionKernels_S3TC_SingleColor.h">
108 |       <Filter>Header Files</Filter>
109 |     </ClInclude>
110 |     <ClInclude Include="ConvectionKernels_ETC2_Rounding.h">
111 |       <Filter>Header Files</Filter>
112 |     </ClInclude>
113 |     <ClInclude Include="ConvectionKernels_FakeBT709_Rounding.h">
114 |       <Filter>Header Files</Filter>
115 |     </ClInclude>
116 |     <ClInclude Include="ConvectionKernels_BC7_Prio.h">
117 |       <Filter>Header Files</Filter>
118 |     </ClInclude>
119 |   </ItemGroup>
120 | </Project>


--------------------------------------------------------------------------------
/ConvectionKernels_API.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | Convection Texture Tools
  3 | Copyright (c) 2018-2019 Eric Lasota
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining
  6 | a copy of this software and associated documentation files (the
  7 | "Software"), to deal in the Software without restriction, including
  8 | without limitation the rights to use, copy, modify, merge, publish,
  9 | distribute, sublicense, and/or sell copies of the Software, and to
 10 | permit persons to whom the Software is furnished to do so, subject
 11 | to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included
 14 | in all copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 | */
 24 | #include "ConvectionKernels_Config.h"
 25 | 
 26 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
 27 | 
 28 | #include <stdint.h>
 29 | #include "ConvectionKernels.h"
 30 | #include "ConvectionKernels_Util.h"
 31 | #include "ConvectionKernels_BC67.h"
 32 | #include "ConvectionKernels_ETC.h"
 33 | #include "ConvectionKernels_S3TC.h"
 34 | 
 35 | #include <assert.h>
 36 | 
 37 | namespace cvtt
 38 | {
 39 |     namespace Kernels
 40 |     {
 41 |         void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, const BC7EncodingPlan &encodingPlan)
 42 |         {
 43 |             assert(pBlocks);
 44 |             assert(pBC);
 45 | 
 46 |             float channelWeights[4];
 47 |             Util::FillWeights(options, channelWeights);
 48 | 
 49 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
 50 |             {
 51 |                 Internal::BC7Computer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, encodingPlan, options.refineRoundsBC7);
 52 |                 pBC += ParallelMath::ParallelSize * 16;
 53 |             }
 54 |         }
 55 | 
 56 |         void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options)
 57 |         {
 58 |             assert(pBlocks);
 59 |             assert(pBC);
 60 | 
 61 |             float channelWeights[4];
 62 |             Util::FillWeights(options, channelWeights);
 63 | 
 64 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
 65 |             {
 66 |                 Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, false, options.seedPoints, options.refineRoundsBC6H);
 67 |                 pBC += ParallelMath::ParallelSize * 16;
 68 |             }
 69 |         }
 70 | 
 71 |         void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options)
 72 |         {
 73 |             assert(pBlocks);
 74 |             assert(pBC);
 75 | 
 76 |             float channelWeights[4];
 77 |             Util::FillWeights(options, channelWeights);
 78 | 
 79 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
 80 |             {
 81 |                 Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, true, options.seedPoints, options.refineRoundsBC6H);
 82 |                 pBC += ParallelMath::ParallelSize * 16;
 83 |             }
 84 |         }
 85 | 
 86 |         void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options)
 87 |         {
 88 |             assert(pBlocks);
 89 |             assert(pBC);
 90 | 
 91 |             float channelWeights[4];
 92 |             Util::FillWeights(options, channelWeights);
 93 | 
 94 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
 95 |             {
 96 |                 Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC, 8, channelWeights, true, options.threshold, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
 97 |                 pBC += ParallelMath::ParallelSize * 8;
 98 |             }
 99 |         }
100 | 
101 |         void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
102 |         {
103 |             assert(pBlocks);
104 |             assert(pBC);
105 | 
106 |             float channelWeights[4];
107 |             Util::FillWeights(options, channelWeights);
108 | 
109 |             for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
110 |             {
111 |                 Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
112 |                 Internal::S3TCComputer::PackExplicitAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16);
113 |                 pBC += ParallelMath::ParallelSize * 16;
114 |             }
115 |         }
116 | 
117 |         void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
118 |         {
119 |             assert(pBlocks);
120 |             assert(pBC);
121 | 
122 |             float channelWeights[4];
123 |             Util::FillWeights(options, channelWeights);
124 | 
125 |             for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
126 |             {
127 |                 Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
128 |                 Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16, false, options.seedPoints, options.refineRoundsIIC);
129 |                 pBC += ParallelMath::ParallelSize * 16;
130 |             }
131 |         }
132 | 
133 |         void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
134 |         {
135 |             assert(pBlocks);
136 |             assert(pBC);
137 | 
138 |             float channelWeights[4];
139 |             Util::FillWeights(options, channelWeights);
140 | 
141 |             for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
142 |             {
143 |                 Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 8, false, options.seedPoints, options.refineRoundsIIC);
144 |                 pBC += ParallelMath::ParallelSize * 8;
145 |             }
146 |         }
147 | 
148 |         void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options)
149 |         {
150 |             assert(pBlocks);
151 |             assert(pBC);
152 | 
153 |             float channelWeights[4];
154 |             Util::FillWeights(options, channelWeights);
155 | 
156 |             for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
157 |             {
158 |                 PixelBlockU8 inputBlocks[ParallelMath::ParallelSize];
159 |                 Util::BiasSignedInput(inputBlocks, pBlocks + blockBase);
160 | 
161 |                 Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 8, true, options.seedPoints, options.refineRoundsIIC);
162 |                 pBC += ParallelMath::ParallelSize * 8;
163 |             }
164 |         }
165 | 
166 |         void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
167 |         {
168 |             assert(pBlocks);
169 |             assert(pBC);
170 | 
171 |             float channelWeights[4];
172 |             Util::FillWeights(options, channelWeights);
173 | 
174 |             for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
175 |             {
176 |                 Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 16, false, options.seedPoints, options.refineRoundsIIC);
177 |                 Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 1, pBC + 8, 16, false, options.seedPoints, options.refineRoundsIIC);
178 |                 pBC += ParallelMath::ParallelSize * 16;
179 |             }
180 |         }
181 | 
182 |         void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options)
183 |         {
184 |             assert(pBlocks);
185 |             assert(pBC);
186 | 
187 |             float channelWeights[4];
188 |             Util::FillWeights(options, channelWeights);
189 | 
190 |             for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
191 |             {
192 |                 PixelBlockU8 inputBlocks[ParallelMath::ParallelSize];
193 |                 Util::BiasSignedInput(inputBlocks, pBlocks + blockBase);
194 | 
195 |                 Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 16, true, options.seedPoints, options.refineRoundsIIC);
196 |                 Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 1, pBC + 8, 16, true, options.seedPoints, options.refineRoundsIIC);
197 |                 pBC += ParallelMath::ParallelSize * 16;
198 |             }
199 |         }
200 | 
201 |         void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC1CompressionData *compressionData)
202 |         {
203 |             assert(pBlocks);
204 |             assert(pBC);
205 | 
206 |             float channelWeights[4];
207 |             Util::FillWeights(options, channelWeights);
208 | 
209 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
210 |             {
211 |                 Internal::ETCComputer::CompressETC1Block(pBC, pBlocks + blockBase, compressionData, options);
212 |                 pBC += ParallelMath::ParallelSize * 8;
213 |             }
214 |         }
215 | 
216 |         void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData)
217 |         {
218 |             assert(pBlocks);
219 |             assert(pBC);
220 | 
221 |             float channelWeights[4];
222 |             Util::FillWeights(options, channelWeights);
223 | 
224 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
225 |             {
226 |                 Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, false);
227 |                 pBC += ParallelMath::ParallelSize * 8;
228 |             }
229 |         }
230 | 
231 |         void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData)
232 |         {
233 |             assert(pBlocks);
234 |             assert(pBC);
235 | 
236 |             float channelWeights[4];
237 |             Util::FillWeights(options, channelWeights);
238 | 
239 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
240 |             {
241 |                 Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, true);
242 |                 pBC += ParallelMath::ParallelSize * 8;
243 |             }
244 |         }
245 | 
246 |         void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options)
247 |         {
248 |             assert(pBlocks);
249 |             assert(pBC);
250 | 
251 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
252 |             {
253 |                 Internal::ETCComputer::CompressETC2AlphaBlock(pBC, pBlocks + blockBase, options);
254 |                 pBC += ParallelMath::ParallelSize * 8;
255 |             }
256 |         }
257 | 
258 |         void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options)
259 |         {
260 |             assert(pBlocks);
261 |             assert(pBC);
262 | 
263 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
264 |             {
265 |                 Internal::ETCComputer::CompressEACBlock(pBC, pBlocks + blockBase, isSigned, options);
266 |                 pBC += ParallelMath::ParallelSize * 8;
267 |             }
268 |         }
269 | 
270 |         void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData)
271 |         {
272 |             uint8_t alphaBlockData[cvtt::NumParallelBlocks * 8];
273 |             uint8_t colorBlockData[cvtt::NumParallelBlocks * 8];
274 | 
275 |             EncodeETC2(colorBlockData, pBlocks, options, compressionData);
276 |             EncodeETC2Alpha(alphaBlockData, pBlocks, options);
277 | 
278 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
279 |             {
280 |                 for (size_t blockData = 0; blockData < 8; blockData++)
281 |                     pBC[blockBase * 16 + blockData] = alphaBlockData[blockBase * 8 + blockData];
282 | 
283 |                 for (size_t blockData = 0; blockData < 8; blockData++)
284 |                     pBC[blockBase * 16 + 8 + blockData] = colorBlockData[blockBase * 8 + blockData];
285 |             }
286 |         }
287 | 
288 |         void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC)
289 |         {
290 |             assert(pBlocks);
291 |             assert(pBC);
292 | 
293 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
294 |             {
295 |                 Internal::BC7Computer::UnpackOne(pBlocks[blockBase], pBC);
296 |                 pBC += 16;
297 |             }
298 |         }
299 | 
300 |         void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC)
301 |         {
302 |             assert(pBlocks);
303 |             assert(pBC);
304 | 
305 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
306 |             {
307 |                 Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, false);
308 |                 pBC += 16;
309 |             }
310 |         }
311 | 
312 |         void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC)
313 |         {
314 |             assert(pBlocks);
315 |             assert(pBC);
316 | 
317 |             for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
318 |             {
319 |                 Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, true);
320 |                 pBC += 16;
321 |             }
322 |         }
323 | 
324 |         ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context)
325 |         {
326 |             return cvtt::Internal::ETCComputer::AllocETC1Data(allocFunc, context);
327 |         }
328 | 
329 |         void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc)
330 |         {
331 |             cvtt::Internal::ETCComputer::ReleaseETC1Data(compressionData, freeFunc);
332 |         }
333 | 
334 |         ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options)
335 |         {
336 |             return cvtt::Internal::ETCComputer::AllocETC2Data(allocFunc, context, options);
337 |         }
338 | 
339 |         void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc)
340 |         {
341 |             cvtt::Internal::ETCComputer::ReleaseETC2Data(compressionData, freeFunc);
342 |         }
343 |     }
344 | }
345 | 
346 | #endif
347 | 


--------------------------------------------------------------------------------
/ConvectionKernels_AggregatedError.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #ifndef __CVTT_AGGREGATEDERROR_H__
 3 | #define __CVTT_AGGREGATEDERROR_H__
 4 | 
 5 | #include "ConvectionKernels_ParallelMath.h"
 6 | 
 7 | namespace cvtt
 8 | {
 9 |     namespace Internal
10 |     {
11 |         template<int TVectorSize>
12 |         class AggregatedError
13 |         {
14 |         public:
15 |             typedef ParallelMath::UInt16 MUInt16;
16 |             typedef ParallelMath::UInt31 MUInt31;
17 |             typedef ParallelMath::Float MFloat;
18 | 
19 |             AggregatedError()
20 |             {
21 |                 for (int ch = 0; ch < TVectorSize; ch++)
22 |                     m_errorUnweighted[ch] = ParallelMath::MakeUInt31(0);
23 |             }
24 | 
25 |             void Add(const MUInt16 &channelErrorUnweighted, int ch)
26 |             {
27 |                 m_errorUnweighted[ch] = m_errorUnweighted[ch] + ParallelMath::ToUInt31(channelErrorUnweighted);
28 |             }
29 | 
30 |             MFloat Finalize(uint32_t flags, const float channelWeightsSq[TVectorSize]) const
31 |             {
32 |                 if (flags & cvtt::Flags::Uniform)
33 |                 {
34 |                     MUInt31 total = m_errorUnweighted[0];
35 |                     for (int ch = 1; ch < TVectorSize; ch++)
36 |                         total = total + m_errorUnweighted[ch];
37 |                     return ParallelMath::ToFloat(total);
38 |                 }
39 |                 else
40 |                 {
41 |                     MFloat total = ParallelMath::ToFloat(m_errorUnweighted[0]) * channelWeightsSq[0];
42 |                     for (int ch = 1; ch < TVectorSize; ch++)
43 |                         total = total + ParallelMath::ToFloat(m_errorUnweighted[ch]) * channelWeightsSq[ch];
44 |                     return total;
45 |                 }
46 |             }
47 | 
48 |         private:
49 |             MUInt31 m_errorUnweighted[TVectorSize];
50 |         };
51 |     }
52 | }
53 | 
54 | #endif
55 | 
56 | 


--------------------------------------------------------------------------------
/ConvectionKernels_BC67.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "ConvectionKernels_ParallelMath.h"
  4 | 
  5 | 
  6 | namespace cvtt
  7 | {
  8 |     namespace Tables
  9 |     {
 10 |         namespace BC7SC
 11 |         {
 12 |             struct Table;
 13 |         }
 14 |     }
 15 | 
 16 |     namespace Internal
 17 |     {
 18 |         namespace BC67
 19 |         {
 20 |             struct WorkInfo;
 21 |         }
 22 | 
 23 |         template<int TVectorSize>
 24 |         class IndexSelectorHDR;
 25 |     }
 26 | 
 27 |     struct PixelBlockU8;
 28 | }
 29 | 
 30 | namespace cvtt
 31 | {
 32 |     namespace Internal
 33 |     {
 34 |         class BC7Computer
 35 |         {
 36 |         public:
 37 |             static void Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds);
 38 |             static void UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock);
 39 | 
 40 |         private:
 41 |             static const int MaxTweakRounds = 4;
 42 | 
 43 |             typedef ParallelMath::SInt16 MSInt16;
 44 |             typedef ParallelMath::UInt15 MUInt15;
 45 |             typedef ParallelMath::UInt16 MUInt16;
 46 |             typedef ParallelMath::SInt32 MSInt32;
 47 |             typedef ParallelMath::Float MFloat;
 48 | 
 49 |             static void TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2]);
 50 |             static void Quantize(MUInt15* color, int bits, int channels);
 51 |             static void QuantizeP(MUInt15* color, int bits, uint16_t p, int channels);
 52 |             static void Unquantize(MUInt15* color, int bits, int channels);
 53 |             static void CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2]);
 54 |             static void CompressEndpoints1(MUInt15 ep[2][4], uint16_t p);
 55 |             static void CompressEndpoints2(MUInt15 ep[2][4]);
 56 |             static void CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2]);
 57 |             static void CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2]);
 58 |             static void CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2]);
 59 |             static void CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2]);
 60 |             static void CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2]);
 61 |             static void TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn);
 62 |             static void TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn);
 63 |             static void TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn);
 64 | 
 65 |             template<class T>
 66 |             static void Swap(T& a, T& b);
 67 |         };
 68 | 
 69 | 
 70 |         class BC6HComputer
 71 |         {
 72 |         public:
 73 |             static void Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds);
 74 |             static void UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned);
 75 | 
 76 |         private:
 77 |             typedef ParallelMath::Float MFloat;
 78 |             typedef ParallelMath::SInt16 MSInt16;
 79 |             typedef ParallelMath::UInt16 MUInt16;
 80 |             typedef ParallelMath::UInt15 MUInt15;
 81 |             typedef ParallelMath::AInt16 MAInt16;
 82 |             typedef ParallelMath::SInt32 MSInt32;
 83 |             typedef ParallelMath::UInt31 MUInt31;
 84 | 
 85 |             static const int MaxTweakRounds = 4;
 86 |             static const int MaxRefineRounds = 3;
 87 | 
 88 |             static MSInt16 QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru);
 89 |             static MUInt15 QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru);
 90 |             static void UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL);
 91 |             static void UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished);
 92 |             static void QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn);
 93 |             static void QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn);
 94 |             static void EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal);
 95 |             static void EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal);
 96 |             static void SignExtendSingle(int &v, int bits);
 97 |         };
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/ConvectionKernels_BC6H_IO.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdint.h>
 4 | #include "ConvectionKernels_BC6H_IO.h"
 5 | 
 6 | namespace cvtt
 7 | {
 8 |     namespace BC6H_IO
 9 |     {
10 |         typedef void (*ReadFunc_t)(const uint32_t *encoded, uint16_t &d, uint16_t &rw, uint16_t &rx, uint16_t &ry, uint16_t &rz, uint16_t &gw, uint16_t &gx, uint16_t &gy, uint16_t &gz, uint16_t &bw, uint16_t &bx, uint16_t &by, uint16_t &bz);
11 |         typedef void (*WriteFunc_t)(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz);
12 | 
13 |         extern const ReadFunc_t g_readFuncs[14];
14 |         extern const WriteFunc_t g_writeFuncs[14];
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/ConvectionKernels_BC7_Prio.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdint.h>
 4 | 
 5 | namespace cvtt { namespace Tables { namespace BC7Prio {
 6 |     extern const uint16_t *g_bc7PrioCodesRGB;
 7 |     extern const int g_bc7NumPrioCodesRGB;
 8 | 
 9 |     extern const uint16_t *g_bc7PrioCodesRGBA;
10 |     extern const int g_bc7NumPrioCodesRGBA;
11 | 
12 |     int UnpackMode(uint16_t packed);
13 |     int UnpackSeedPointCount(uint16_t packed);
14 |     int UnpackPartition(uint16_t packed);
15 |     int UnpackRotation(uint16_t packed);
16 |     int UnpackIndexSelector(uint16_t packed);
17 | }}}
18 | 


--------------------------------------------------------------------------------
/ConvectionKernels_BCCommon.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Convection Texture Tools
 3 | Copyright (c) 2018-2019 Eric Lasota
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject
11 | to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included
14 | in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | -------------------------------------------------------------------------------------
25 | 
26 | Portions based on DirectX Texture Library (DirectXTex)
27 | 
28 | Copyright (c) Microsoft Corporation. All rights reserved.
29 | Licensed under the MIT License.
30 | 
31 | http://go.microsoft.com/fwlink/?LinkId=248926
32 | */
33 | #include "ConvectionKernels_Config.h"
34 | 
35 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
36 | 
37 | #include "ConvectionKernels_BCCommon.h"
38 | 
39 | int cvtt::Internal::BCCommon::TweakRoundsForRange(int range)
40 | {
41 |     if (range == 3)
42 |         return 3;
43 |     return 4;
44 | }
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/ConvectionKernels_BCCommon.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #ifndef __CVTT_BCCOMMON_H__
  3 | #define __CVTT_BCCOMMON_H__
  4 | 
  5 | #include "ConvectionKernels_AggregatedError.h"
  6 | #include "ConvectionKernels_ParallelMath.h"
  7 | 
  8 | namespace cvtt
  9 | {
 10 |     namespace Internal
 11 |     {
 12 |         class BCCommon
 13 |         {
 14 |         public:
 15 |             typedef ParallelMath::Float MFloat;
 16 |             typedef ParallelMath::UInt16 MUInt16;
 17 |             typedef ParallelMath::UInt15 MUInt15;
 18 |             typedef ParallelMath::AInt16 MAInt16;
 19 |             typedef ParallelMath::SInt16 MSInt16;
 20 |             typedef ParallelMath::SInt32 MSInt32;
 21 | 
 22 |             static int TweakRoundsForRange(int range);
 23 | 
 24 |             template<int TVectorSize>
 25 |             static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, AggregatedError<TVectorSize> &aggError)
 26 |             {
 27 |                 for (int ch = 0; ch < numRealChannels; ch++)
 28 |                     aggError.Add(ParallelMath::SqDiffUInt8(reconstructed[ch], original[ch]), ch);
 29 |             }
 30 | 
 31 |             template<int TVectorSize>
 32 |             static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], AggregatedError<TVectorSize> &aggError)
 33 |             {
 34 |                 ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, TVectorSize, aggError);
 35 |             }
 36 | 
 37 |             template<int TVectorSize>
 38 |             static MFloat ComputeErrorLDRSimple(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, const float *channelWeightsSq)
 39 |             {
 40 |                 AggregatedError<TVectorSize> aggError;
 41 |                 ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, numRealChannels, aggError);
 42 |                 return aggError.Finalize(flags, channelWeightsSq);
 43 |             }
 44 | 
 45 |             template<int TVectorSize>
 46 |             static MFloat ComputeErrorHDRFast(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize])
 47 |             {
 48 |                 MFloat error = ParallelMath::MakeFloatZero();
 49 |                 if (flags & Flags::Uniform)
 50 |                 {
 51 |                     for (int ch = 0; ch < TVectorSize; ch++)
 52 |                         error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]);
 53 |                 }
 54 |                 else
 55 |                 {
 56 |                     for (int ch = 0; ch < TVectorSize; ch++)
 57 |                         error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]);
 58 |                 }
 59 | 
 60 |                 return error;
 61 |             }
 62 | 
 63 |             template<int TVectorSize>
 64 |             static MFloat ComputeErrorHDRSlow(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize])
 65 |             {
 66 |                 MFloat error = ParallelMath::MakeFloatZero();
 67 |                 if (flags & Flags::Uniform)
 68 |                 {
 69 |                     for (int ch = 0; ch < TVectorSize; ch++)
 70 |                         error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]);
 71 |                 }
 72 |                 else
 73 |                 {
 74 |                     for (int ch = 0; ch < TVectorSize; ch++)
 75 |                         error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]);
 76 |                 }
 77 | 
 78 |                 return error;
 79 |             }
 80 | 
 81 |             template<int TChannelCount>
 82 |             static void PreWeightPixelsLDR(MFloat preWeightedPixels[16][TChannelCount], const MUInt15 pixels[16][TChannelCount], const float channelWeights[TChannelCount])
 83 |             {
 84 |                 for (int px = 0; px < 16; px++)
 85 |                 {
 86 |                     for (int ch = 0; ch < TChannelCount; ch++)
 87 |                         preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch];
 88 |                 }
 89 |             }
 90 | 
 91 |             template<int TChannelCount>
 92 |             static void PreWeightPixelsHDR(MFloat preWeightedPixels[16][TChannelCount], const MSInt16 pixels[16][TChannelCount], const float channelWeights[TChannelCount])
 93 |             {
 94 |                 for (int px = 0; px < 16; px++)
 95 |                 {
 96 |                     for (int ch = 0; ch < TChannelCount; ch++)
 97 |                         preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch];
 98 |                 }
 99 |             }
100 |         };
101 |     }
102 | }
103 | 
104 | #endif
105 | 


--------------------------------------------------------------------------------
/ConvectionKernels_Config.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #ifndef __CVTT_CONFIG_H__
 3 | #define __CVTT_CONFIG_H__
 4 | 
 5 | #if (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(_M_X64) || defined(__SSE2__)
 6 | #define CVTT_USE_SSE2
 7 | #endif
 8 | 
 9 | // Define this to compile everything as a single source file
10 | //#define CVTT_SINGLE_FILE
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/ConvectionKernels_ETC.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #ifndef __CVTT_CONVECTIONKERNELS_ETC_H__
  3 | #define __CVTT_CONVECTIONKERNELS_ETC_H__
  4 | 
  5 | #include "ConvectionKernels.h"
  6 | #include "ConvectionKernels_ParallelMath.h"
  7 | 
  8 | namespace cvtt
  9 | {
 10 |     struct Options;
 11 | 
 12 |     namespace Internal
 13 |     {
 14 |         class ETCComputer
 15 |         {
 16 |         public:
 17 |             static void CompressETC1Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC1CompressionData *compressionData, const Options &options);
 18 |             static void CompressETC2Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC2CompressionData *compressionData, const Options &options, bool punchthroughAlpha);
 19 |             static void CompressETC2AlphaBlock(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, const Options &options);
 20 |             static void CompressEACBlock(uint8_t *outputBuffer, const PixelBlockScalarS16 *inputBlocks, bool isSigned, const Options &options);
 21 | 
 22 |             static ETC2CompressionData *AllocETC2Data(cvtt::Kernels::allocFunc_t allocFunc, void *context, const cvtt::Options &options);
 23 |             static void ReleaseETC2Data(ETC2CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc);
 24 | 
 25 |             static ETC1CompressionData *AllocETC1Data(cvtt::Kernels::allocFunc_t allocFunc, void *context);
 26 |             static void ReleaseETC1Data(ETC1CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc);
 27 | 
 28 |         private:
 29 |             typedef ParallelMath::Float MFloat;
 30 |             typedef ParallelMath::SInt16 MSInt16;
 31 |             typedef ParallelMath::UInt15 MUInt15;
 32 |             typedef ParallelMath::UInt16 MUInt16;
 33 |             typedef ParallelMath::SInt32 MSInt32;
 34 |             typedef ParallelMath::UInt31 MUInt31;
 35 | 
 36 |             struct DifferentialResolveStorage
 37 |             {
 38 |                 static const unsigned int MaxAttemptsPerSector = 57 + 81 + 81 + 81 + 81 + 81 + 81 + 81;
 39 | 
 40 |                 MUInt15 diffNumAttempts[2];
 41 |                 MFloat diffErrors[2][MaxAttemptsPerSector];
 42 |                 MUInt16 diffSelectors[2][MaxAttemptsPerSector];
 43 |                 MUInt15 diffColors[2][MaxAttemptsPerSector];
 44 |                 MUInt15 diffTables[2][MaxAttemptsPerSector];
 45 | 
 46 |                 uint16_t attemptSortIndexes[2][MaxAttemptsPerSector];
 47 |             };
 48 | 
 49 |             struct HModeEval
 50 |             {
 51 |                 MFloat errors[62][16];
 52 |                 MUInt16 signBits[62];
 53 |                 MUInt15 uniqueQuantizedColors[62];
 54 |                 MUInt15 numUniqueColors[2];
 55 |             };
 56 | 
 57 |             struct ETC1CompressionDataInternal : public cvtt::ETC1CompressionData
 58 |             {
 59 |                 explicit ETC1CompressionDataInternal(void *context)
 60 |                     : m_context(context)
 61 |                 {
 62 |                 }
 63 | 
 64 |                 DifferentialResolveStorage m_drs;
 65 |                 void *m_context;
 66 |             };
 67 | 
 68 |             struct ETC2CompressionDataInternal : public cvtt::ETC2CompressionData
 69 |             {
 70 |                 explicit ETC2CompressionDataInternal(void *context, const cvtt::Options &options);
 71 | 
 72 |                 HModeEval m_h;
 73 |                 DifferentialResolveStorage m_drs;
 74 | 
 75 |                 void *m_context;
 76 |                 float m_chromaSideAxis0[3];
 77 |                 float m_chromaSideAxis1[3];
 78 |             };
 79 | 
 80 |             static MFloat ComputeErrorUniform(const MUInt15 pixelA[3], const MUInt15 pixelB[3]);
 81 |             static MFloat ComputeErrorWeighted(const MUInt15 reconstructed[3], const MFloat pixelB[3], const Options options);
 82 |             static MFloat ComputeErrorFakeBT709(const MUInt15 reconstructed[3], const MFloat pixelB[3]);
 83 | 
 84 |             static void TestHalfBlock(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const MSInt16 modifiers[4], bool isDifferential, const Options &options);
 85 |             static void TestHalfBlockPunchthrough(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const ParallelMath::Int16CompFlag isTransparent[8], const MUInt15 modifier, const Options &options);
 86 |             static void FindBestDifferentialCombination(int flip, int d, const ParallelMath::Int16CompFlag canIgnoreSector[2], ParallelMath::Int16CompFlag& bestIsThisMode, MFloat& bestTotalError, MUInt15& bestFlip, MUInt15& bestD, MUInt15 bestColors[2], MUInt16 bestSelectors[2], MUInt15 bestTables[2], DifferentialResolveStorage &drs);
 87 | 
 88 |             static ParallelMath::Int16CompFlag ETCDifferentialIsLegalForChannel(const MUInt15 &a, const MUInt15 &b);
 89 |             static ParallelMath::Int16CompFlag ETCDifferentialIsLegal(const MUInt15 &a, const MUInt15 &b);
 90 |             static bool ETCDifferentialIsLegalForChannelScalar(const uint16_t &a, const uint16_t &b);
 91 |             static bool ETCDifferentialIsLegalScalar(const uint16_t &a, const uint16_t &b);
 92 | 
 93 |             static void EncodeTMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options);
 94 |             static void EncodeHMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag groupings[16], const MUInt15 pixels[16][3], HModeEval &he, const MFloat preWeightedPixels[16][3], const Options &options);
 95 | 
 96 |             static void EncodeVirtualTModePunchthrough(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], const ParallelMath::Int16CompFlag& anyTransparent, const ParallelMath::Int16CompFlag& allTransparent, const Options &options);
 97 | 
 98 |             static MUInt15 DecodePlanarCoeff(const MUInt15 &coeff, int ch);
 99 |             static void EncodePlanar(uint8_t *outputBuffer, MFloat &bestError, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options);
100 | 
101 |             static void CompressETC1BlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], DifferentialResolveStorage& compressionData, const Options &options, bool punchthrough);
102 |             static void CompressETC1PunchthroughBlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], DifferentialResolveStorage& compressionData, const Options &options);
103 |             static void CompressETC2AlphaBlockInternal(uint8_t *outputBuffer, const MUInt15 pixels[16], bool is11Bit, bool isSigned, const Options &options);
104 | 
105 |             static void ExtractBlocks(MUInt15 pixels[16][3], MFloat preWeightedPixels[16][3], const PixelBlockU8 *inputBlocks, const Options &options);
106 | 
107 |             static void ResolveHalfBlockFakeBT709RoundingAccurate(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential);
108 |             static void ResolveHalfBlockFakeBT709RoundingFast(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential);
109 |             static void ResolveTHFakeBT709Rounding(MUInt15 quantized[3], const MUInt15 target[3], const MUInt15 &granularity);
110 |             static void ConvertToFakeBT709(MFloat yuv[3], const MUInt15 color[3]);
111 |             static void ConvertToFakeBT709(MFloat yuv[3], const MFloat color[3]);
112 |             static void ConvertToFakeBT709(MFloat yuv[3], const MFloat &r, const MFloat &g, const MFloat &b);
113 |             static void ConvertFromFakeBT709(MFloat rgb[3], const MFloat yuv[3]);
114 | 
115 |             static void QuantizeETC2Alpha(int tableIndex, const MUInt15& value, const MUInt15& baseValue, const MUInt15& multiplier, bool is11Bit, bool isSigned, MUInt15& outIndexes, MUInt15& outQuantizedValues);
116 | 
117 |             static void EmitTModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 lineColor[3], const ParallelMath::ScalarUInt16 isolatedColor[3], int32_t packedSelectors, ParallelMath::ScalarUInt16 table, bool opaque);
118 |             static void EmitHModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 blockColors[2], ParallelMath::ScalarUInt16 sectorBits, ParallelMath::ScalarUInt16 signBits, ParallelMath::ScalarUInt16 table, bool opaque);
119 |             static void EmitETC1Block(uint8_t *outputBuffer, int blockBestFlip, int blockBestD, const int blockBestColors[2][3], const int blockBestTables[2], const ParallelMath::ScalarUInt16 blockBestSelectors[2], bool transparent);
120 | 
121 |             static const int g_flipTables[2][2][8];
122 |         };
123 |     }
124 | }
125 | 
126 | #endif
127 | 


--------------------------------------------------------------------------------
/ConvectionKernels_ETC1.h:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | namespace cvtt
 4 | {
 5 |     namespace Tables
 6 |     {
 7 |         namespace ETC1
 8 |         {
 9 |             const int16_t g_potentialOffsets4[] =
10 |             {
11 |                 57, -64, -58, -54, -52, -48, -46, -44, -42, -40, -38, -36, -34, -32, -30, -28, -26, -24, -22, -20, -18, -16, -14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 52, 54, 58, 64,
12 |                 81, -136, -124, -114, -112, -102, -100, -92, -90, -88, -80, -78, -76, -70, -68, -66, -64, -58, -56, -54, -52, -48, -46, -44, -42, -40, -36, -34, -32, -30, -26, -24, -22, -20, -18, -14, -12, -10, -8, -4, -2, 0, 2, 4, 8, 10, 12, 14, 18, 20, 22, 24, 26, 30, 32, 34, 36, 40, 42, 44, 46, 48, 52, 54, 56, 58, 64, 66, 68, 70, 76, 78, 80, 88, 90, 92, 100, 102, 112, 114, 124, 136,
13 |                 81, -232, -212, -194, -192, -174, -172, -156, -154, -152, -136, -134, -132, -118, -116, -114, -112, -98, -96, -94, -92, -80, -78, -76, -74, -72, -60, -58, -56, -54, -42, -40, -38, -36, -34, -22, -20, -18, -16, -4, -2, 0, 2, 4, 16, 18, 20, 22, 34, 36, 38, 40, 42, 54, 56, 58, 60, 72, 74, 76, 78, 80, 92, 94, 96, 98, 112, 114, 116, 118, 132, 134, 136, 152, 154, 156, 172, 174, 192, 194, 212, 232,
14 |                 81, -336, -307, -281, -278, -252, -249, -226, -223, -220, -197, -194, -191, -171, -168, -165, -162, -142, -139, -136, -133, -116, -113, -110, -107, -104, -87, -84, -81, -78, -61, -58, -55, -52, -49, -32, -29, -26, -23, -6, -3, 0, 3, 6, 23, 26, 29, 32, 49, 52, 55, 58, 61, 78, 81, 84, 87, 104, 107, 110, 113, 116, 133, 136, 139, 142, 162, 165, 168, 171, 191, 194, 197, 220, 223, 226, 249, 252, 278, 281, 307, 336,
15 |                 81, -480, -438, -402, -396, -360, -354, -324, -318, -312, -282, -276, -270, -246, -240, -234, -228, -204, -198, -192, -186, -168, -162, -156, -150, -144, -126, -120, -114, -108, -90, -84, -78, -72, -66, -48, -42, -36, -30, -12, -6, 0, 6, 12, 30, 36, 42, 48, 66, 72, 78, 84, 90, 108, 114, 120, 126, 144, 150, 156, 162, 168, 186, 192, 198, 204, 228, 234, 240, 246, 270, 276, 282, 312, 318, 324, 354, 360, 396, 402, 438, 480,
16 |                 81, -640, -584, -536, -528, -480, -472, -432, -424, -416, -376, -368, -360, -328, -320, -312, -304, -272, -264, -256, -248, -224, -216, -208, -200, -192, -168, -160, -152, -144, -120, -112, -104, -96, -88, -64, -56, -48, -40, -16, -8, 0, 8, 16, 40, 48, 56, 64, 88, 96, 104, 112, 120, 144, 152, 160, 168, 192, 200, 208, 216, 224, 248, 256, 264, 272, 304, 312, 320, 328, 360, 368, 376, 416, 424, 432, 472, 480, 528, 536, 584, 640,
17 |                 81, -848, -775, -709, -702, -636, -629, -570, -563, -556, -497, -490, -483, -431, -424, -417, -410, -358, -351, -344, -337, -292, -285, -278, -271, -264, -219, -212, -205, -198, -153, -146, -139, -132, -125, -80, -73, -66, -59, -14, -7, 0, 7, 14, 59, 66, 73, 80, 125, 132, 139, 146, 153, 198, 205, 212, 219, 264, 271, 278, 285, 292, 337, 344, 351, 358, 410, 417, 424, 431, 483, 490, 497, 556, 563, 570, 629, 636, 702, 709, 775, 848,
18 |                 81, -1464, -1328, -1234, -1192, -1098, -1056, -1004, -962, -920, -868, -826, -784, -774, -732, -690, -648, -638, -596, -554, -544, -512, -502, -460, -418, -408, -376, -366, -324, -314, -282, -272, -230, -188, -178, -146, -136, -94, -84, -52, -42, 0, 42, 52, 84, 94, 136, 146, 178, 188, 230, 272, 282, 314, 324, 366, 376, 408, 418, 460, 502, 512, 544, 554, 596, 638, 648, 690, 732, 774, 784, 826, 868, 920, 962, 1004, 1056, 1098, 1192, 1234, 1328, 1464
19 |             };
20 | 
21 |             const unsigned int g_maxPotentialOffsets = 81;
22 | 
23 |             const int16_t g_thModifierTable[8] =
24 |             {
25 |                 3, 6, 11, 16, 23, 32, 41, 64
26 |             };
27 |         }
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/ConvectionKernels_ETC2.h:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | namespace cvtt
 4 | {
 5 |     namespace Tables
 6 |     {
 7 |         namespace ETC2
 8 |         {
 9 |             const int16_t g_thModifierTable[8] =
10 |             {
11 |                 3, 6, 11, 16, 23, 32, 41, 64
12 |             };
13 | 
14 |             const int16_t g_alphaModifierTablePositive[16][4] =
15 |             {
16 |                 { 2, 5, 8, 14, },
17 |                 { 2, 6, 9, 12, },
18 |                 { 1, 4, 7, 12, },
19 |                 { 1, 3, 5, 12, },
20 |                 { 2, 5, 7, 11, },
21 |                 { 2, 6, 8, 10, },
22 |                 { 3, 6, 7, 10, },
23 |                 { 2, 4, 7, 10, },
24 |                 { 1, 5, 7, 9, },
25 |                 { 1, 4, 7, 9, },
26 |                 { 1, 3, 7, 9, },
27 |                 { 1, 4, 6, 9, },
28 |                 { 2, 3, 6, 9, },
29 |                 { 0, 1, 2, 9, },
30 |                 { 3, 5, 7, 8, },
31 |                 { 2, 4, 6, 8, },
32 |             };
33 |         }
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/ConvectionKernels_ETC2_Rounding.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <stdint.h>
 3 | 
 4 | // This file is generated by the MakeTables app.  Do not edit this file manually.
 5 | 
 6 | namespace cvtt { namespace Tables { namespace ETC2 {
 7 |     const int g_alphaRoundingTableWidth = 13;
 8 |     const uint8_t g_alphaRoundingTables[16][13] =
 9 |     {
10 |         { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3 },
11 |         { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3 },
12 |         { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3 },
13 |         { 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 },
14 |         { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 },
15 |         { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3 },
16 |         { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 },
17 |         { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3 },
18 |         { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 },
19 |         { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 },
20 |         { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 },
21 |         { 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3 },
22 |         { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3 },
23 |         { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3 },
24 |         { 0, 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3 },
25 |         { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3 },
26 |     };
27 | }}}
28 | 


--------------------------------------------------------------------------------
/ConvectionKernels_EndpointRefiner.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #ifndef __CVTT_ENDPOINTREFINER_H__
  3 | #define __CVTT_ENDPOINTREFINER_H__
  4 | 
  5 | #include "ConvectionKernels_ParallelMath.h"
  6 | 
  7 | namespace cvtt
  8 | {
  9 |     namespace Internal
 10 |     {
 11 |         // Solve for a, b where v = a*t + b
 12 |         // This allows endpoints to be mapped to where T=0 and T=1
 13 |         // Least squares from totals:
 14 |         // a = (tv - t*v/w)/(tt - t*t/w)
 15 |         // b = (v - a*t)/w
 16 |         template<int TVectorSize>
 17 |         class EndpointRefiner
 18 |         {
 19 |         public:
 20 |             typedef ParallelMath::Float MFloat;
 21 |             typedef ParallelMath::UInt16 MUInt16;
 22 |             typedef ParallelMath::UInt15 MUInt15;
 23 |             typedef ParallelMath::AInt16 MAInt16;
 24 |             typedef ParallelMath::SInt16 MSInt16;
 25 |             typedef ParallelMath::SInt32 MSInt32;
 26 | 
 27 |             MFloat m_tv[TVectorSize];
 28 |             MFloat m_v[TVectorSize];
 29 |             MFloat m_tt;
 30 |             MFloat m_t;
 31 |             MFloat m_w;
 32 |             int m_wu;
 33 | 
 34 |             float m_rcpMaxIndex;
 35 |             float m_channelWeights[TVectorSize];
 36 |             float m_rcpChannelWeights[TVectorSize];
 37 | 
 38 |             void Init(int indexRange, const float channelWeights[TVectorSize])
 39 |             {
 40 |                 for (int ch = 0; ch < TVectorSize; ch++)
 41 |                 {
 42 |                     m_tv[ch] = ParallelMath::MakeFloatZero();
 43 |                     m_v[ch] = ParallelMath::MakeFloatZero();
 44 |                 }
 45 |                 m_tt = ParallelMath::MakeFloatZero();
 46 |                 m_t = ParallelMath::MakeFloatZero();
 47 |                 m_w = ParallelMath::MakeFloatZero();
 48 | 
 49 |                 m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1);
 50 | 
 51 |                 for (int ch = 0; ch < TVectorSize; ch++)
 52 |                 {
 53 |                     m_channelWeights[ch] = channelWeights[ch];
 54 |                     m_rcpChannelWeights[ch] = 1.0f;
 55 |                     if (m_channelWeights[ch] != 0.0f)
 56 |                         m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch];
 57 |                 }
 58 | 
 59 |                 m_wu = 0;
 60 |             }
 61 | 
 62 |             void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight)
 63 |             {
 64 |                 MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
 65 | 
 66 |                 for (int ch = 0; ch < TVectorSize; ch++)
 67 |                 {
 68 |                     MFloat v = pwFloatPixel[ch] * weight;
 69 | 
 70 |                     m_tv[ch] = m_tv[ch] + t * v;
 71 |                     m_v[ch] = m_v[ch] + v;
 72 |                 }
 73 |                 m_tt = m_tt + weight * t * t;
 74 |                 m_t = m_t + weight * t;
 75 |                 m_w = m_w + weight;
 76 |             }
 77 | 
 78 |             void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels)
 79 |             {
 80 |                 MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
 81 | 
 82 |                 for (int ch = 0; ch < numRealChannels; ch++)
 83 |                 {
 84 |                     MFloat v = pwFloatPixel[ch];
 85 | 
 86 |                     m_tv[ch] = m_tv[ch] + t * v;
 87 |                     m_v[ch] = m_v[ch] + v;
 88 |                 }
 89 |                 m_tt = m_tt + t * t;
 90 |                 m_t = m_t + t;
 91 |                 m_wu++;
 92 |             }
 93 | 
 94 |             void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index)
 95 |             {
 96 |                 ContributeUnweightedPW(floatPixel, index, TVectorSize);
 97 |             }
 98 | 
 99 |             void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize])
100 |             {
101 |                 // a = (tv - t*v/w)/(tt - t*t/w)
102 |                 // b = (v - a*t)/w
103 |                 MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));
104 | 
105 |                 ParallelMath::MakeSafeDenominator(w);
106 |                 MFloat wRcp = ParallelMath::Reciprocal(w);
107 | 
108 |                 MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;
109 | 
110 |                 ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());
111 |                 ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f));
112 | 
113 |                 for (int ch = 0; ch < TVectorSize; ch++)
114 |                 {
115 |                     /*
116 |                     if (adenom == 0.0)
117 |                     p1 = p2 = er.v / er.w;
118 |                     else
119 |                     {
120 |                     float4 a = (er.tv - er.t*er.v / er.w) / adenom;
121 |                     float4 b = (er.v - a * er.t) / er.w;
122 |                     p1 = b;
123 |                     p2 = a + b;
124 |                     }
125 |                     */
126 | 
127 |                     MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;
128 |                     MFloat b = (m_v[ch] - a * m_t) * wRcp;
129 | 
130 |                     MFloat p1 = b;
131 |                     MFloat p2 = a + b;
132 | 
133 |                     ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));
134 |                     ParallelMath::ConditionalSet(p2, adenomZero, p1);
135 | 
136 |                     // Unweight
137 |                     float inverseWeight = m_rcpChannelWeights[ch];
138 | 
139 |                     endPoint[0][ch] = p1 * inverseWeight;
140 |                     endPoint[1][ch] = p2 * inverseWeight;
141 |                 }
142 |             }
143 | 
144 |             void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
145 |             {
146 |                 MFloat floatEndPoint[2][TVectorSize];
147 |                 GetRefinedEndpoints(floatEndPoint);
148 | 
149 |                 for (int epi = 0; epi < 2; epi++)
150 |                     for (int ch = 0; ch < TVectorSize; ch++)
151 |                         endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode);
152 |             }
153 | 
154 |             void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)
155 |             {
156 |                 GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);
157 |             }
158 | 
159 |             void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)
160 |             {
161 |                 MFloat floatEndPoint[2][TVectorSize];
162 |                 GetRefinedEndpoints(floatEndPoint);
163 | 
164 |                 for (int epi = 0; epi < 2; epi++)
165 |                 {
166 |                     for (int ch = 0; ch < TVectorSize; ch++)
167 |                     {
168 |                         MFloat f = floatEndPoint[epi][ch];
169 |                         if (isSigned)
170 |                             endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode));
171 |                         else
172 |                             endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode));
173 |                     }
174 |                 }
175 |             }
176 |         };
177 |     }
178 | }
179 | 
180 | #endif
181 | 
182 | 


--------------------------------------------------------------------------------
/ConvectionKernels_EndpointSelector.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #ifndef __CVTT_ENDPOINTSELECTOR_H__
  3 | #define __CVTT_ENDPOINTSELECTOR_H__
  4 | 
  5 | #include "ConvectionKernels_ParallelMath.h"
  6 | #include "ConvectionKernels_UnfinishedEndpoints.h"
  7 | #include "ConvectionKernels_PackedCovarianceMatrix.h"
  8 | 
  9 | namespace cvtt
 10 | {
 11 |     namespace Internal
 12 |     {
 13 |         static const int NumEndpointSelectorPasses = 3;
 14 | 
 15 |         template<int TVectorSize, int TIterationCount>
 16 |         class EndpointSelector
 17 |         {
 18 |         public:
 19 |             typedef ParallelMath::Float MFloat;
 20 | 
 21 |             EndpointSelector()
 22 |             {
 23 |                 for (int ch = 0; ch < TVectorSize; ch++)
 24 |                 {
 25 |                     m_centroid[ch] = ParallelMath::MakeFloatZero();
 26 |                     m_direction[ch] = ParallelMath::MakeFloatZero();
 27 |                 }
 28 |                 m_weightTotal = ParallelMath::MakeFloatZero();
 29 |                 m_minDist = ParallelMath::MakeFloat(FLT_MAX);
 30 |                 m_maxDist = ParallelMath::MakeFloat(-FLT_MAX);
 31 |             }
 32 | 
 33 |             void ContributePass(const MFloat *value, int pass, const MFloat &weight)
 34 |             {
 35 |                 if (pass == 0)
 36 |                     ContributeCentroid(value, weight);
 37 |                 else if (pass == 1)
 38 |                     ContributeDirection(value, weight);
 39 |                 else if (pass == 2)
 40 |                     ContributeMinMax(value);
 41 |             }
 42 | 
 43 |             void FinishPass(int pass)
 44 |             {
 45 |                 if (pass == 0)
 46 |                     FinishCentroid();
 47 |                 else if (pass == 1)
 48 |                     FinishDirection();
 49 |             }
 50 | 
 51 |             UnfinishedEndpoints<TVectorSize> GetEndpoints(const float channelWeights[TVectorSize]) const
 52 |             {
 53 |                 MFloat unweightedBase[TVectorSize];
 54 |                 MFloat unweightedOffset[TVectorSize];
 55 | 
 56 |                 for (int ch = 0; ch < TVectorSize; ch++)
 57 |                 {
 58 |                     MFloat min = m_centroid[ch] + m_direction[ch] * m_minDist;
 59 |                     MFloat max = m_centroid[ch] + m_direction[ch] * m_maxDist;
 60 | 
 61 |                     float safeWeight = channelWeights[ch];
 62 |                     if (safeWeight == 0.f)
 63 |                         safeWeight = 1.0f;
 64 | 
 65 |                     unweightedBase[ch] = min / channelWeights[ch];
 66 |                     unweightedOffset[ch] = (max - min) / channelWeights[ch];
 67 |                 }
 68 | 
 69 |                 return UnfinishedEndpoints<TVectorSize>(unweightedBase, unweightedOffset);
 70 |             }
 71 | 
 72 |         private:
 73 |             void ContributeCentroid(const MFloat *value, const MFloat &weight)
 74 |             {
 75 |                 for (int ch = 0; ch < TVectorSize; ch++)
 76 |                     m_centroid[ch] = m_centroid[ch] + value[ch] * weight;
 77 |                 m_weightTotal = m_weightTotal + weight;
 78 |             }
 79 | 
 80 |             void FinishCentroid()
 81 |             {
 82 |                 MFloat denom = m_weightTotal;
 83 |                 ParallelMath::MakeSafeDenominator(denom);
 84 | 
 85 |                 for (int ch = 0; ch < TVectorSize; ch++)
 86 |                     m_centroid[ch] = m_centroid[ch] / denom;
 87 |             }
 88 | 
 89 |             void ContributeDirection(const MFloat *value, const MFloat &weight)
 90 |             {
 91 |                 MFloat diff[TVectorSize];
 92 |                 for (int ch = 0; ch < TVectorSize; ch++)
 93 |                     diff[ch] = value[ch] - m_centroid[ch];
 94 | 
 95 |                 m_covarianceMatrix.Add(diff, weight);
 96 |             }
 97 | 
 98 |             void FinishDirection()
 99 |             {
100 |                 MFloat approx[TVectorSize];
101 |                 for (int ch = 0; ch < TVectorSize; ch++)
102 |                     approx[ch] = ParallelMath::MakeFloat(1.0f);
103 | 
104 |                 for (int i = 0; i < TIterationCount; i++)
105 |                 {
106 |                     MFloat product[TVectorSize];
107 |                     m_covarianceMatrix.Product(product, approx);
108 | 
109 |                     MFloat largestComponent = product[0];
110 |                     for (int ch = 1; ch < TVectorSize; ch++)
111 |                         largestComponent = ParallelMath::Max(largestComponent, product[ch]);
112 | 
113 |                     // product = largestComponent*newApprox
114 |                     ParallelMath::MakeSafeDenominator(largestComponent);
115 |                     for (int ch = 0; ch < TVectorSize; ch++)
116 |                         approx[ch] = product[ch] / largestComponent;
117 |                 }
118 | 
119 |                 // Normalize
120 |                 MFloat approxLen = ParallelMath::MakeFloatZero();
121 |                 for (int ch = 0; ch < TVectorSize; ch++)
122 |                     approxLen = approxLen + approx[ch] * approx[ch];
123 | 
124 |                 approxLen = ParallelMath::Sqrt(approxLen);
125 | 
126 |                 ParallelMath::MakeSafeDenominator(approxLen);
127 | 
128 |                 for (int ch = 0; ch < TVectorSize; ch++)
129 |                     m_direction[ch] = approx[ch] / approxLen;
130 |             }
131 | 
132 |             void ContributeMinMax(const MFloat *value)
133 |             {
134 |                 MFloat dist = ParallelMath::MakeFloatZero();
135 |                 for (int ch = 0; ch < TVectorSize; ch++)
136 |                     dist = dist + m_direction[ch] * (value[ch] - m_centroid[ch]);
137 | 
138 |                 m_minDist = ParallelMath::Min(m_minDist, dist);
139 |                 m_maxDist = ParallelMath::Max(m_maxDist, dist);
140 |             }
141 | 
142 |             ParallelMath::Float m_centroid[TVectorSize];
143 |             ParallelMath::Float m_direction[TVectorSize];
144 |             PackedCovarianceMatrix<TVectorSize> m_covarianceMatrix;
145 |             ParallelMath::Float m_weightTotal;
146 | 
147 |             ParallelMath::Float m_minDist;
148 |             ParallelMath::Float m_maxDist;
149 |         };
150 |     }
151 | }
152 | 
153 | #endif
154 | 


--------------------------------------------------------------------------------
/ConvectionKernels_FakeBT709_Rounding.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <stdint.h>
  3 | 
  4 | // This file is generated by the MakeTables app.  Do not edit this file manually.
  5 | 
  6 | namespace cvtt { namespace Tables { namespace FakeBT709 {
  7 |     const uint8_t g_rounding16[] =
  8 |     {
  9 |         0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
 10 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
 11 |         0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 12 |         0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 13 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 14 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 15 |         0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 16 |         0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 17 |         0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 
 18 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
 19 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
 20 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
 21 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
 22 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
 23 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
 24 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
 25 | 
 26 |         0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
 27 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
 28 |         0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 29 |         0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 30 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 31 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 32 |         0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 33 |         0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 34 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
 35 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
 36 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
 37 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
 38 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
 39 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
 40 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
 41 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
 42 | 
 43 |         0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
 44 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
 45 |         0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 46 |         0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 47 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 48 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 49 |         0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 50 |         0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 51 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
 52 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
 53 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
 54 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
 55 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
 56 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
 57 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
 58 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
 59 | 
 60 |         0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
 61 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
 62 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
 63 |         0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 64 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 65 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 66 |         0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 67 |         0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 68 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
 69 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
 70 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
 71 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
 72 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
 73 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
 74 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
 75 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
 76 | 
 77 |         0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
 78 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
 79 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
 80 |         0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 81 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 82 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 83 |         0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 84 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
 85 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
 86 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
 87 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
 88 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
 89 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
 90 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
 91 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
 92 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
 93 | 
 94 |         0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
 95 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
 96 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
 97 |         0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 98 |         0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
 99 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
100 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
101 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
102 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
103 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
104 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
105 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
106 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
107 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
108 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
109 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
110 | 
111 |         0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
112 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
113 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
114 |         0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 4, 4, 4, 4, 4, 
115 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
116 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
117 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
118 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
119 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
120 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
121 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
122 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
123 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
124 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
125 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
126 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
127 | 
128 |         0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 
129 |         0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 
130 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
131 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
132 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
133 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
134 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
135 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
136 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 6, 
137 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
138 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
139 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
140 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
141 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
142 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
143 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
144 | 
145 |         0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 
146 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
147 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
148 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
149 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
150 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
151 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
152 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
153 |         1, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 
154 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
155 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
156 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
157 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
158 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
159 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
160 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 
161 | 
162 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
163 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
164 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
165 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
166 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
167 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
168 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
169 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
170 |         1, 1, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
171 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
172 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
173 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
174 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
175 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
176 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 
177 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
178 | 
179 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
180 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
181 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
182 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
183 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
184 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
185 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
186 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
187 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
188 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
189 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
190 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
191 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 
192 |         3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 
193 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
194 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
195 | 
196 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
197 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
198 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
199 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
200 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
201 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
202 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
203 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
204 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
205 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
206 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
207 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 
208 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
209 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
210 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
211 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
212 | 
213 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
214 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
215 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
216 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
217 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
218 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
219 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
220 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
221 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
222 |         2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 
223 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
224 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
225 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
226 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
227 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
228 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
229 | 
230 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
231 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
232 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
233 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
234 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
235 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
236 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
237 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
238 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
239 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
240 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
241 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
242 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
243 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
244 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
245 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
246 | 
247 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
248 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
249 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
250 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
251 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
252 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
253 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
254 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
255 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
256 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
257 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
258 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
259 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
260 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
261 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
262 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
263 | 
264 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
265 |         1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 
266 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
267 |         1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
268 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
269 |         1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
270 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
271 |         1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
272 |         1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
273 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
274 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 
275 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
276 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 
277 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
278 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 
279 |         3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 
280 | 
281 |     };
282 | }}}
283 | 


--------------------------------------------------------------------------------
/ConvectionKernels_IndexSelector.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Convection Texture Tools
 3 | Copyright (c) 2018-2019 Eric Lasota
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject
11 | to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included
14 | in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | -------------------------------------------------------------------------------------
25 | 
26 | Portions based on DirectX Texture Library (DirectXTex)
27 | 
28 | Copyright (c) Microsoft Corporation. All rights reserved.
29 | Licensed under the MIT License.
30 | 
31 | http://go.microsoft.com/fwlink/?LinkId=248926
32 | */
33 | #include "ConvectionKernels_Config.h"
34 | 
35 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
36 | 
37 | #include "ConvectionKernels_IndexSelector.h"
38 | 
39 | namespace cvtt
40 | {
41 |     namespace Internal
42 |     {
43 |         const ParallelMath::UInt16 g_weightReciprocals[17] =
44 |         {
45 |             ParallelMath::MakeUInt16(0),        // -1 
46 |             ParallelMath::MakeUInt16(0),        // 0
47 |             ParallelMath::MakeUInt16(32768),    // 1
48 |             ParallelMath::MakeUInt16(16384),    // 2
49 |             ParallelMath::MakeUInt16(10923),    // 3
50 |             ParallelMath::MakeUInt16(8192),     // 4
51 |             ParallelMath::MakeUInt16(6554),     // 5
52 |             ParallelMath::MakeUInt16(5461),     // 6
53 |             ParallelMath::MakeUInt16(4681),     // 7
54 |             ParallelMath::MakeUInt16(4096),     // 8
55 |             ParallelMath::MakeUInt16(3641),     // 9
56 |             ParallelMath::MakeUInt16(3277),     // 10
57 |             ParallelMath::MakeUInt16(2979),     // 11
58 |             ParallelMath::MakeUInt16(2731),     // 12
59 |             ParallelMath::MakeUInt16(2521),     // 13
60 |             ParallelMath::MakeUInt16(2341),     // 14
61 |             ParallelMath::MakeUInt16(2185),     // 15
62 |         };
63 |     }
64 | }
65 | 
66 | #endif
67 | 


--------------------------------------------------------------------------------
/ConvectionKernels_IndexSelector.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #ifndef __CVTT_INDEXSELECTOR_H__
  3 | #define __CVTT_INDEXSELECTOR_H__
  4 | 
  5 | #include "ConvectionKernels_ParallelMath.h"
  6 | 
  7 | namespace cvtt
  8 | {
  9 |     namespace Internal
 10 |     {
 11 |         extern const ParallelMath::UInt16 g_weightReciprocals[17];
 12 | 
 13 |         template<int TVectorSize>
 14 |         class IndexSelector
 15 |         {
 16 |         public:
 17 |             typedef ParallelMath::Float MFloat;
 18 |             typedef ParallelMath::UInt16 MUInt16;
 19 |             typedef ParallelMath::UInt15 MUInt15;
 20 |             typedef ParallelMath::SInt16 MSInt16;
 21 |             typedef ParallelMath::AInt16 MAInt16;
 22 |             typedef ParallelMath::SInt32 MSInt32;
 23 |             typedef ParallelMath::UInt31 MUInt31;
 24 | 
 25 | 
 26 |             template<class TInterpolationEPType, class TColorEPType>
 27 |             void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range)
 28 |             {
 29 |                 // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space.
 30 |                 // We need to select indexes using the color-space endpoints.
 31 | 
 32 |                 m_isUniform = true;
 33 |                 for (int ch = 1; ch < TVectorSize; ch++)
 34 |                 {
 35 |                     if (channelWeights[ch] != channelWeights[0])
 36 |                         m_isUniform = false;
 37 |                 }
 38 | 
 39 |                 // To work with channel weights, we need something where:
 40 |                 // pxDiff = px - ep[0]
 41 |                 // epDiff = ep[1] - ep[0]
 42 |                 //
 43 |                 // weightedEPDiff = epDiff * channelWeights
 44 |                 // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff)
 45 |                 // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff)
 46 |                 // index = normalizedIndex * maxValue
 47 |                 //
 48 |                 // Equivalent to:
 49 |                 // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights)
 50 |                 // index = dot(axis, pxDiff)
 51 | 
 52 |                 for (int ep = 0; ep < 2; ep++)
 53 |                     for (int ch = 0; ch < TVectorSize; ch++)
 54 |                         m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]);
 55 | 
 56 |                 m_range = range;
 57 |                 m_maxValue = static_cast<float>(range - 1);
 58 | 
 59 |                 MFloat epDiffWeighted[TVectorSize];
 60 |                 for (int ch = 0; ch < TVectorSize; ch++)
 61 |                 {
 62 |                     m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]);
 63 |                     MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]);
 64 |                     epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch];
 65 |                 }
 66 | 
 67 |                 MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0];
 68 |                 for (int ch = 1; ch < TVectorSize; ch++)
 69 |                     lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch];
 70 | 
 71 |                 ParallelMath::MakeSafeDenominator(lenSquared);
 72 | 
 73 |                 MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared;
 74 | 
 75 |                 for (int ch = 0; ch < TVectorSize; ch++)
 76 |                     m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared;
 77 |             }
 78 | 
 79 |             template<bool TSigned>
 80 |             void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range)
 81 |             {
 82 |                 MAInt16 converted[2][TVectorSize];
 83 |                 for (int epi = 0; epi < 2; epi++)
 84 |                     for (int ch = 0; ch < TVectorSize; ch++)
 85 |                         converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]);
 86 | 
 87 |                 Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range);
 88 |             }
 89 | 
 90 |             void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
 91 |             {
 92 |                 MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
 93 | 
 94 |                 for (int ch = 0; ch < numRealChannels; ch++)
 95 |                 {
 96 |                     MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
 97 |                     MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
 98 |                     pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6));
 99 |                 }
100 |             }
101 | 
102 |             void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
103 |             {
104 |                 MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7));
105 | 
106 |                 for (int ch = 0; ch < numRealChannels; ch++)
107 |                 {
108 |                     MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
109 |                     MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
110 |                     pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8));
111 |                 }
112 |             }
113 | 
114 |             void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel)
115 |             {
116 |                 ReconstructLDR_BC7(index, pixel, TVectorSize);
117 |             }
118 | 
119 |             void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel)
120 |             {
121 |                 ReconstructLDRPrecise(index, pixel, TVectorSize);
122 |             }
123 | 
124 |             MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
125 |             {
126 |                 MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0];
127 |                 for (int ch = 1; ch < TVectorSize; ch++)
128 |                     dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch];
129 | 
130 |                 return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn);
131 |             }
132 | 
133 |         protected:
134 |             MAInt16 m_endPoint[2][TVectorSize];
135 | 
136 |         private:
137 |             MFloat m_origin[TVectorSize];
138 |             MFloat m_axis[TVectorSize];
139 |             int m_range;
140 |             float m_maxValue;
141 |             bool m_isUniform;
142 |         };
143 |     }
144 | }
145 | 
146 | #endif
147 | 
148 | 


--------------------------------------------------------------------------------
/ConvectionKernels_IndexSelectorHDR.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #ifndef __CVTT_INDEXSELECTORHDR_H__
  3 | #define __CVTT_INDEXSELECTORHDR_H__
  4 | 
  5 | #include "ConvectionKernels_ParallelMath.h"
  6 | #include "ConvectionKernels_IndexSelector.h"
  7 | 
  8 | namespace cvtt
  9 | {
 10 |     namespace Internal
 11 |     {
 12 |         ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v);
 13 |         ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v);
 14 | 
 15 |         template<int TVectorSize>
 16 |         class IndexSelectorHDR : public IndexSelector<TVectorSize>
 17 |         {
 18 |         public:
 19 |             typedef ParallelMath::UInt15 MUInt15;
 20 |             typedef ParallelMath::UInt16 MUInt16;
 21 |             typedef ParallelMath::UInt31 MUInt31;
 22 |             typedef ParallelMath::SInt16 MSInt16;
 23 |             typedef ParallelMath::SInt32 MSInt32;
 24 |             typedef ParallelMath::Float MFloat;
 25 | 
 26 |         private:
 27 | 
 28 |             MUInt15 InvertSingle(const MUInt15& anIndex) const
 29 |             {
 30 |                 MUInt15 inverted = m_maxValueMinusOne - anIndex;
 31 |                 return ParallelMath::Select(m_isInverted, inverted, anIndex);
 32 |             }
 33 | 
 34 |             void ReconstructHDRSignedUninverted(const MUInt15 &index, MSInt16* pixel) const
 35 |             {
 36 |                 MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
 37 | 
 38 |                 for (int ch = 0; ch < TVectorSize; ch++)
 39 |                 {
 40 |                     MSInt16 ep0 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[0][ch]);
 41 |                     MSInt16 ep1 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[1][ch]);
 42 | 
 43 |                     MSInt32 pixel32 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1);
 44 | 
 45 |                     pixel32 = ParallelMath::RightShift(pixel32 + ParallelMath::MakeSInt32(32), 6);
 46 | 
 47 |                     pixel[ch] = UnscaleHDRValueSigned(ParallelMath::ToSInt16(pixel32));
 48 |                 }
 49 |             }
 50 | 
 51 |             void ReconstructHDRUnsignedUninverted(const MUInt15 &index, MSInt16* pixel) const
 52 |             {
 53 |                 MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
 54 | 
 55 |                 for (int ch = 0; ch < TVectorSize; ch++)
 56 |                 {
 57 |                     MUInt16 ep0 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[0][ch]);
 58 |                     MUInt16 ep1 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[1][ch]);
 59 | 
 60 |                     MUInt31 pixel31 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1);
 61 | 
 62 |                     pixel31 = ParallelMath::RightShift(pixel31 + ParallelMath::MakeUInt31(32), 6);
 63 | 
 64 |                     pixel[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::ToUInt16(pixel31)));
 65 |                 }
 66 |             }
 67 | 
 68 |             MFloat ErrorForInterpolatorComponent(int index, int ch, const MFloat *pixel) const
 69 |             {
 70 |                 MFloat diff = pixel[ch] - m_reconstructedInterpolators[index][ch];
 71 |                 return diff * diff;
 72 |             }
 73 | 
 74 |             MFloat ErrorForInterpolator(int index, const MFloat *pixel) const
 75 |             {
 76 |                 MFloat error = ErrorForInterpolatorComponent(index, 0, pixel);
 77 |                 for (int ch = 1; ch < TVectorSize; ch++)
 78 |                     error = error + ErrorForInterpolatorComponent(index, ch, pixel);
 79 |                 return error;
 80 |             }
 81 | 
 82 |         public:
 83 | 
 84 |             void InitHDR(int range, bool isSigned, bool fastIndexing, const float *channelWeights)
 85 |             {
 86 |                 assert(range <= 16);
 87 | 
 88 |                 m_range = range;
 89 | 
 90 |                 m_isInverted = ParallelMath::MakeBoolInt16(false);
 91 |                 m_maxValueMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(range - 1));
 92 | 
 93 |                 if (!fastIndexing)
 94 |                 {
 95 |                     for (int i = 0; i < range; i++)
 96 |                     {
 97 |                         MSInt16 recon2CL[TVectorSize];
 98 | 
 99 |                         if (isSigned)
100 |                             ReconstructHDRSignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL);
101 |                         else
102 |                             ReconstructHDRUnsignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL);
103 | 
104 |                         for (int ch = 0; ch < TVectorSize; ch++)
105 |                             m_reconstructedInterpolators[i][ch] = ParallelMath::TwosCLHalfToFloat(recon2CL[ch]) * channelWeights[ch];
106 |                     }
107 |                 }
108 |             }
109 | 
110 |             void ReconstructHDRSigned(const MUInt15 &index, MSInt16* pixel) const
111 |             {
112 |                 ReconstructHDRSignedUninverted(InvertSingle(index), pixel);
113 |             }
114 | 
115 |             void ReconstructHDRUnsigned(const MUInt15 &index, MSInt16* pixel) const
116 |             {
117 |                 ReconstructHDRUnsignedUninverted(InvertSingle(index), pixel);
118 |             }
119 | 
120 |             void ConditionalInvert(const ParallelMath::Int16CompFlag &invert)
121 |             {
122 |                 m_isInverted = invert;
123 |             }
124 | 
125 |             MUInt15 SelectIndexHDRSlow(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope*) const
126 |             {
127 |                 MUInt15 index = ParallelMath::MakeUInt15(0);
128 | 
129 |                 MFloat bestError = ErrorForInterpolator(0, pixel);
130 |                 for (int i = 1; i < m_range; i++)
131 |                 {
132 |                     MFloat error = ErrorForInterpolator(i, pixel);
133 |                     ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
134 |                     ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(static_cast<uint16_t>(i)));
135 |                     bestError = ParallelMath::Min(bestError, error);
136 |                 }
137 | 
138 |                 return InvertSingle(index);
139 |             }
140 | 
141 |             MUInt15 SelectIndexHDRFast(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
142 |             {
143 |                 return InvertSingle(this->SelectIndexLDR(pixel, rtn));
144 |             }
145 | 
146 |         private:
147 |             MFloat m_reconstructedInterpolators[16][TVectorSize];
148 |             ParallelMath::Int16CompFlag m_isInverted;
149 |             MUInt15 m_maxValueMinusOne;
150 |             int m_range;
151 |         };
152 |     }
153 | }
154 | #endif
155 | 
156 | 


--------------------------------------------------------------------------------
/ConvectionKernels_PackedCovarianceMatrix.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #ifndef __CVTT_COVARIANCEMATRIX_H__
 3 | #define __CVTT_COVARIANCEMATRIX_H__
 4 | 
 5 | namespace cvtt
 6 | {
 7 |     namespace Internal
 8 |     {
 9 | 
10 |         template<int TMatrixSize>
11 |         class PackedCovarianceMatrix
12 |         {
13 |         public:
14 |             // 0: xx,
15 |             // 1: xy, yy
16 |             // 3: xz, yz, zz 
17 |             // 6: xw, yw, zw, ww
18 |             // ... etc.
19 |             static const int PyramidSize = (TMatrixSize * (TMatrixSize + 1)) / 2;
20 | 
21 |             typedef ParallelMath::Float MFloat;
22 | 
23 |             PackedCovarianceMatrix()
24 |             {
25 |                 for (int i = 0; i < PyramidSize; i++)
26 |                     m_values[i] = ParallelMath::MakeFloatZero();
27 |             }
28 | 
29 |             void Add(const ParallelMath::Float *vec, const ParallelMath::Float &weight)
30 |             {
31 |                 int index = 0;
32 |                 for (int row = 0; row < TMatrixSize; row++)
33 |                 {
34 |                     for (int col = 0; col <= row; col++)
35 |                     {
36 |                         m_values[index] = m_values[index] + vec[row] * vec[col] * weight;
37 |                         index++;
38 |                     }
39 |                 }
40 |             }
41 | 
42 |             void Product(MFloat *outVec, const MFloat *inVec)
43 |             {
44 |                 for (int row = 0; row < TMatrixSize; row++)
45 |                 {
46 |                     MFloat sum = ParallelMath::MakeFloatZero();
47 | 
48 |                     int index = (row * (row + 1)) >> 1;
49 |                     for (int col = 0; col < TMatrixSize; col++)
50 |                     {
51 |                         sum = sum + inVec[col] * m_values[index];
52 |                         if (col >= row)
53 |                             index += col + 1;
54 |                         else
55 |                             index++;
56 |                     }
57 | 
58 |                     outVec[row] = sum;
59 |                 }
60 |             }
61 | 
62 |         private:
63 |             ParallelMath::Float m_values[PyramidSize];
64 |         };
65 |     }
66 | }
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/ConvectionKernels_S3TC.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #ifndef __CVTT_S3TC_H__
 3 | #define __CVTT_S3TC_H__
 4 | 
 5 | #include "ConvectionKernels_ParallelMath.h"
 6 | 
 7 | namespace cvtt
 8 | {
 9 |     namespace Internal
10 |     {
11 |         template<int TVectorSize>
12 |         class EndpointRefiner;
13 |     }
14 | 
15 |     struct PixelBlockU8;
16 | }
17 | 
18 | namespace cvtt
19 | {
20 |     namespace Internal
21 |     {
22 |         class S3TCComputer
23 |         {
24 |         public:
25 |             typedef ParallelMath::Float MFloat;
26 |             typedef ParallelMath::SInt16 MSInt16;
27 |             typedef ParallelMath::UInt15 MUInt15;
28 |             typedef ParallelMath::UInt16 MUInt16;
29 |             typedef ParallelMath::SInt32 MSInt32;
30 | 
31 |             static void Init(MFloat& error);
32 |             static void QuantizeTo6Bits(MUInt15& v);
33 |             static void QuantizeTo5Bits(MUInt15& v);
34 |             static void QuantizeTo565(MUInt15 endPoint[3]);
35 |             static MFloat ParanoidFactorForSpan(const MSInt16& span);
36 |             static MFloat ParanoidDiff(const MUInt15& a, const MUInt15& b, const MFloat& d);
37 |             static void TestSingleColor(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], int range, const float* channelWeights,
38 |                 MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, const ParallelMath::RoundTowardNearestForScope *rtn);
39 |             static void TestEndpoints(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], const MUInt15 unquantizedEndPoints[2][3], int range, const float* channelWeights,
40 |                 MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, EndpointRefiner<3> *refiner, const ParallelMath::RoundTowardNearestForScope *rtn);
41 |             static void TestCounts(uint32_t flags, const int *counts, int nCounts, const MUInt15 &numElements, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], bool alphaTest,
42 |                 const MFloat floatSortedInputs[16][4], const MFloat preWeightedFloatSortedInputs[16][4], const float *channelWeights, MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange,
43 |                 const ParallelMath::RoundTowardNearestForScope* rtn);
44 |             static void PackExplicitAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride);
45 |             static void PackInterpolatedAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride, bool isSigned, int maxTweakRounds, int numRefineRounds);
46 |             static void PackRGB(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, size_t packedBlockStride, const float channelWeights[4], bool alphaTest, float alphaThreshold, bool exhaustive, int maxTweakRounds, int numRefineRounds);
47 |         };
48 |     }
49 | }
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/ConvectionKernels_SingleFile.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Convection Texture Tools
 3 | Copyright (c) 2018-2019 Eric Lasota
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject
11 | to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included
14 | in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | -------------------------------------------------------------------------------------
25 | 
26 | Portions based on DirectX Texture Library (DirectXTex)
27 | 
28 | Copyright (c) Microsoft Corporation. All rights reserved.
29 | Licensed under the MIT License.
30 | 
31 | http://go.microsoft.com/fwlink/?LinkId=248926
32 | */
33 | #include "ConvectionKernels_Config.h"
34 | 
35 | #if defined(CVTT_SINGLE_FILE)
36 | #define CVTT_SINGLE_FILE_IMPL
37 | 
38 | #include "ConvectionKernels_API.cpp"
39 | #include "ConvectionKernels_BC67.cpp"
40 | #include "ConvectionKernels_BC6H_IO.cpp"
41 | #include "ConvectionKernels_BC7_PrioData.cpp"
42 | #include "ConvectionKernels_BCCommon.cpp"
43 | #include "ConvectionKernels_ETC.cpp"
44 | #include "ConvectionKernels_IndexSelector.cpp"
45 | #include "ConvectionKernels_S3TC.cpp"
46 | #include "ConvectionKernels_Util.cpp"
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/ConvectionKernels_UnfinishedEndpoints.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "ConvectionKernels_Util.h"
  4 | 
  5 | namespace cvtt
  6 | {
  7 |     namespace Internal
  8 |     {
  9 |         template<int TVectorSize>
 10 |         class UnfinishedEndpoints
 11 |         {
 12 |         public:
 13 |             typedef ParallelMath::Float MFloat;
 14 |             typedef ParallelMath::UInt16 MUInt16;
 15 |             typedef ParallelMath::UInt15 MUInt15;
 16 |             typedef ParallelMath::SInt16 MSInt16;
 17 |             typedef ParallelMath::SInt32 MSInt32;
 18 | 
 19 |             UnfinishedEndpoints()
 20 |             {
 21 |             }
 22 | 
 23 |             UnfinishedEndpoints(const MFloat *base, const MFloat *offset)
 24 |             {
 25 |                 for (int ch = 0; ch < TVectorSize; ch++)
 26 |                     m_base[ch] = base[ch];
 27 |                 for (int ch = 0; ch < TVectorSize; ch++)
 28 |                     m_offset[ch] = offset[ch];
 29 |             }
 30 | 
 31 |             UnfinishedEndpoints(const UnfinishedEndpoints& other)
 32 |             {
 33 |                 for (int ch = 0; ch < TVectorSize; ch++)
 34 |                     m_base[ch] = other.m_base[ch];
 35 |                 for (int ch = 0; ch < TVectorSize; ch++)
 36 |                     m_offset[ch] = other.m_offset[ch];
 37 |             }
 38 | 
 39 |             void FinishHDRUnsigned(int tweak, int range, MSInt16 *outEP0, MSInt16 *outEP1, ParallelMath::RoundTowardNearestForScope *roundingMode)
 40 |             {
 41 |                 float tweakFactors[2];
 42 |                 Util::ComputeTweakFactors(tweak, range, tweakFactors);
 43 | 
 44 |                 for (int ch = 0; ch < TVectorSize; ch++)
 45 |                 {
 46 |                     MUInt15 channelEPs[2];
 47 |                     for (int epi = 0; epi < 2; epi++)
 48 |                     {
 49 |                         MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], 0.0f, 31743.0f);
 50 |                         channelEPs[epi] = ParallelMath::RoundAndConvertToU15(f, roundingMode);
 51 |                     }
 52 | 
 53 |                     outEP0[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[0]);
 54 |                     outEP1[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[1]);
 55 |                 }
 56 |             }
 57 | 
 58 |             void FinishHDRSigned(int tweak, int range, MSInt16* outEP0, MSInt16* outEP1, ParallelMath::RoundTowardNearestForScope* roundingMode)
 59 |             {
 60 |                 float tweakFactors[2];
 61 |                 Util::ComputeTweakFactors(tweak, range, tweakFactors);
 62 | 
 63 |                 for (int ch = 0; ch < TVectorSize; ch++)
 64 |                 {
 65 |                     MSInt16 channelEPs[2];
 66 |                     for (int epi = 0; epi < 2; epi++)
 67 |                     {
 68 |                         MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], -31743.0f, 31743.0f);
 69 |                         channelEPs[epi] = ParallelMath::RoundAndConvertToS16(f, roundingMode);
 70 |                     }
 71 | 
 72 |                     outEP0[ch] = channelEPs[0];
 73 |                     outEP1[ch] = channelEPs[1];
 74 |                 }
 75 |             }
 76 | 
 77 |             void FinishLDR(int tweak, int range, MUInt15* outEP0, MUInt15* outEP1)
 78 |             {
 79 |                 ParallelMath::RoundTowardNearestForScope roundingMode;
 80 | 
 81 |                 float tweakFactors[2];
 82 |                 Util::ComputeTweakFactors(tweak, range, tweakFactors);
 83 | 
 84 |                 for (int ch = 0; ch < TVectorSize; ch++)
 85 |                 {
 86 |                     MFloat ep0f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[0], 0.0f, 255.0f);
 87 |                     MFloat ep1f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[1], 0.0f, 255.0f);
 88 |                     outEP0[ch] = ParallelMath::RoundAndConvertToU15(ep0f, &roundingMode);
 89 |                     outEP1[ch] = ParallelMath::RoundAndConvertToU15(ep1f, &roundingMode);
 90 |                 }
 91 |             }
 92 | 
 93 |             template<int TNewVectorSize>
 94 |             UnfinishedEndpoints<TNewVectorSize> ExpandTo(float filler)
 95 |             {
 96 |                 MFloat newBase[TNewVectorSize];
 97 |                 MFloat newOffset[TNewVectorSize];
 98 | 
 99 |                 for (int ch = 0; ch < TNewVectorSize && ch < TVectorSize; ch++)
100 |                 {
101 |                     newBase[ch] = m_base[ch];
102 |                     newOffset[ch] = m_offset[ch];
103 |                 }
104 | 
105 |                 MFloat fillerV = ParallelMath::MakeFloat(filler);
106 | 
107 |                 for (int ch = TVectorSize; ch < TNewVectorSize; ch++)
108 |                 {
109 |                     newBase[ch] = fillerV;
110 |                     newOffset[ch] = ParallelMath::MakeFloatZero();
111 |                 }
112 | 
113 |                 return UnfinishedEndpoints<TNewVectorSize>(newBase, newOffset);
114 |             }
115 | 
116 |         private:
117 |             MFloat m_base[TVectorSize];
118 |             MFloat m_offset[TVectorSize];
119 |         };
120 |     }
121 | }
122 | 


--------------------------------------------------------------------------------
/ConvectionKernels_Util.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | Convection Texture Tools
 3 | Copyright (c) 2018-2019 Eric Lasota
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject
11 | to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included
14 | in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | -------------------------------------------------------------------------------------
25 | 
26 | Portions based on DirectX Texture Library (DirectXTex)
27 | 
28 | Copyright (c) Microsoft Corporation. All rights reserved.
29 | Licensed under the MIT License.
30 | 
31 | http://go.microsoft.com/fwlink/?LinkId=248926
32 | */
33 | #include "ConvectionKernels_Config.h"
34 | 
35 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
36 | 
37 | #include "ConvectionKernels.h"
38 | #include "ConvectionKernels_ParallelMath.h"
39 | 
40 | #include <algorithm>
41 | 
42 | namespace cvtt
43 | {
44 |     namespace Util
45 |     {
46 |         // Signed input blocks are converted into unsigned space, with the maximum value being 254
47 |         void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize])
48 |         {
49 |             for (size_t block = 0; block < ParallelMath::ParallelSize; block++)
50 |             {
51 |                 const PixelBlockS8& inputSignedBlock = inputSigned[block];
52 |                 PixelBlockU8& inputNormalizedBlock = inputNormalized[block];
53 | 
54 |                 for (size_t px = 0; px < 16; px++)
55 |                 {
56 |                     for (size_t ch = 0; ch < 4; ch++)
57 |                         inputNormalizedBlock.m_pixels[px][ch] = static_cast<uint8_t>(std::max<int>(inputSignedBlock.m_pixels[px][ch], -127) + 127);
58 |                 }
59 |             }
60 |         }
61 | 
62 |         void FillWeights(const Options &options, float channelWeights[4])
63 |         {
64 |             if (options.flags & Flags::Uniform)
65 |                 channelWeights[0] = channelWeights[1] = channelWeights[2] = channelWeights[3] = 1.0f;
66 |             else
67 |             {
68 |                 channelWeights[0] = options.redWeight;
69 |                 channelWeights[1] = options.greenWeight;
70 |                 channelWeights[2] = options.blueWeight;
71 |                 channelWeights[3] = options.alphaWeight;
72 |             }
73 |         }
74 | 
75 |         void ComputeTweakFactors(int tweak, int range, float *outFactors)
76 |         {
77 |             int totalUnits = range - 1;
78 |             int minOutsideUnits = ((tweak >> 1) & 1);
79 |             int maxOutsideUnits = (tweak & 1);
80 |             int insideUnits = totalUnits - minOutsideUnits - maxOutsideUnits;
81 | 
82 |             outFactors[0] = -static_cast<float>(minOutsideUnits) / static_cast<float>(insideUnits);
83 |             outFactors[1] = static_cast<float>(maxOutsideUnits) / static_cast<float>(insideUnits) + 1.0f;
84 |         }
85 |     }
86 | }
87 | 
88 | #endif
89 | 


--------------------------------------------------------------------------------
/ConvectionKernels_Util.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ConvectionKernels_ParallelMath.h"
 4 | 
 5 | namespace cvtt
 6 | {
 7 |     struct PixelBlockU8;
 8 |     struct PixelBlockS8;
 9 |     struct Options;
10 | }
11 | 
12 | namespace cvtt
13 | {
14 |     namespace Util
15 |     {
16 |         // Signed input blocks are converted into unsigned space, with the maximum value being 254
17 |         void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize]);
18 |         void FillWeights(const Options &options, float channelWeights[4]);
19 |         void ComputeTweakFactors(int tweak, int range, float *outFactors);
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Convection Texture Tools Stand-Alone Kernels
 2 | 
 3 | Copyright (c) 2018 Eric Lasota
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject
11 | to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included
14 | in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | **************************************************************************
25 | 
26 | Based on DirectX Texture Library
27 | 
28 | Copyright (c) 2018 Microsoft Corp
29 | 
30 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 
31 | software and associated documentation files (the "Software"), to deal in the Software 
32 | without restriction, including without limitation the rights to use, copy, modify, 
33 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 
34 | permit persons to whom the Software is furnished to do so, subject to the following 
35 | conditions: 
36 | 
37 | The above copyright notice and this permission notice shall be included in all copies 
38 | or substantial portions of the Software.  
39 | 
40 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
41 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
42 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
43 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
44 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 
45 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/MakeTables/App.config:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0" encoding="utf-8" ?>
2 | <configuration>
3 |     <startup> 
4 |         <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6.2" />
5 |     </startup>
6 | </configuration>


--------------------------------------------------------------------------------
/MakeTables/MakeTables.csproj:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
 4 |   <PropertyGroup>
 5 |     <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
 6 |     <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
 7 |     <ProjectGuid>{867F8F36-10EA-4594-AA41-34BC5B74A65A}</ProjectGuid>
 8 |     <OutputType>Exe</OutputType>
 9 |     <RootNamespace>MakeTables</RootNamespace>
10 |     <AssemblyName>MakeTables</AssemblyName>
11 |     <TargetFrameworkVersion>v4.6.2</TargetFrameworkVersion>
12 |     <FileAlignment>512</FileAlignment>
13 |     <AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
14 |   </PropertyGroup>
15 |   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
16 |     <PlatformTarget>AnyCPU</PlatformTarget>
17 |     <DebugSymbols>true</DebugSymbols>
18 |     <DebugType>full</DebugType>
19 |     <Optimize>false</Optimize>
20 |     <OutputPath>bin\Debug\</OutputPath>
21 |     <DefineConstants>DEBUG;TRACE</DefineConstants>
22 |     <ErrorReport>prompt</ErrorReport>
23 |     <WarningLevel>4</WarningLevel>
24 |   </PropertyGroup>
25 |   <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
26 |     <PlatformTarget>AnyCPU</PlatformTarget>
27 |     <DebugType>pdbonly</DebugType>
28 |     <Optimize>true</Optimize>
29 |     <OutputPath>bin\Release\</OutputPath>
30 |     <DefineConstants>TRACE</DefineConstants>
31 |     <ErrorReport>prompt</ErrorReport>
32 |     <WarningLevel>4</WarningLevel>
33 |   </PropertyGroup>
34 |   <ItemGroup>
35 |     <Reference Include="System" />
36 |     <Reference Include="System.Core" />
37 |     <Reference Include="System.Xml.Linq" />
38 |     <Reference Include="System.Data.DataSetExtensions" />
39 |     <Reference Include="Microsoft.CSharp" />
40 |     <Reference Include="System.Data" />
41 |     <Reference Include="System.Net.Http" />
42 |     <Reference Include="System.Xml" />
43 |   </ItemGroup>
44 |   <ItemGroup>
45 |     <Compile Include="Program.cs" />
46 |     <Compile Include="Properties\AssemblyInfo.cs" />
47 |   </ItemGroup>
48 |   <ItemGroup>
49 |     <None Include="App.config" />
50 |   </ItemGroup>
51 |   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
52 | </Project>


--------------------------------------------------------------------------------
/MakeTables/Program.cs:
--------------------------------------------------------------------------------
  1 | ﻿using System;
  2 | using System.Collections.Generic;
  3 | using System.IO;
  4 | 
  5 | namespace MakeTables
  6 | {
  7 |     class Program
  8 |     {
  9 |         static int BitExpand(int v, int bits)
 10 |         {
 11 |             v <<= (8 - bits);
 12 |             return (v | (v >> bits));
 13 |         }
 14 | 
 15 |         static int BitExpandP(int v, int bits, int parityBit)
 16 |         {
 17 |             v <<= (8 - bits);
 18 |             v |= (parityBit << (7 - bits));
 19 |             v |= (v >> (bits + 1));
 20 |             return v;
 21 |         }
 22 | 
 23 |         static int[] aWeight2 = { 0, 21, 43, 64 };
 24 |         static int[] aWeight3 = { 0, 9, 18, 27, 37, 46, 55, 64 };
 25 |         static int[] aWeight4 = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
 26 | 
 27 |         static void EmitTableBC7(StreamWriter w, int bits, int parityBits, int parityBitMin, int parityBitMax, int targetIndex, int maxIndex, string name)
 28 |         {
 29 |             int parityBitsCombined = parityBitMin;
 30 |             if (parityBits == 2)
 31 |                 parityBitsCombined += (parityBitMax << 1);
 32 | 
 33 |             w.WriteLine("Table " + name + "=");
 34 |             w.WriteLine("{");
 35 |             w.WriteLine("    " + targetIndex + ",");
 36 |             w.WriteLine("    " + parityBitsCombined + ",");
 37 |             w.WriteLine("    {");
 38 | 
 39 |             int epRange = 1 << bits;
 40 | 
 41 |             for (int i = 0; i < 256; i++)
 42 |             {
 43 |                 if (i % 8 == 0)
 44 |                     w.Write("        ");
 45 | 
 46 |                 double bestError = double.MaxValue;
 47 |                 int bestMin = 0;
 48 |                 int bestMax = 0;
 49 |                 int bestActualColor = 0;
 50 | 
 51 |                 int[] weightTable = null;
 52 |                 if (maxIndex == 3)
 53 |                     weightTable = aWeight2;
 54 |                 else if (maxIndex == 7)
 55 |                     weightTable = aWeight3;
 56 |                 else if (maxIndex == 15)
 57 |                     weightTable = aWeight4;
 58 | 
 59 |                 for (int min = 0; min < epRange; min++)
 60 |                 {
 61 |                     int minExpanded = (parityBits != 0) ? BitExpandP(min, bits, parityBitMin) : BitExpand(min, bits);
 62 | 
 63 |                     for (int max = 0; max < epRange; max++)
 64 |                     {
 65 |                         int maxExpanded = (parityBits != 0) ? BitExpandP(max, bits, parityBitMax) : BitExpand(max, bits);
 66 | 
 67 |                         int interpolated = (((64 - weightTable[targetIndex]) * minExpanded + weightTable[targetIndex] * maxExpanded + 32) >> 6);
 68 | 
 69 |                         double delta = interpolated - i;
 70 | 
 71 |                         double error = delta * delta;
 72 | 
 73 |                         if (error < bestError)
 74 |                         {
 75 |                             bestError = error;
 76 |                             bestActualColor = interpolated;
 77 |                             bestMin = minExpanded;
 78 |                             bestMax = maxExpanded;
 79 |                         }
 80 |                     }
 81 |                 }
 82 | 
 83 |                 w.Write("{ " + bestMin.ToString() + ", " + bestMax.ToString() + ", " + bestActualColor.ToString() + " },");
 84 |                 if (i % 8 == 7)
 85 |                     w.WriteLine();
 86 |                 else
 87 |                     w.Write(" ");
 88 |             }
 89 | 
 90 |             w.WriteLine("    }");
 91 |             w.WriteLine("};");
 92 |             w.WriteLine();
 93 |         }
 94 | 
 95 |         static void EmitTable(StreamWriter w, int bits, int maxIndex, double paranoia, string name)
 96 |         {
 97 |             w.WriteLine("TableEntry " + name + "[256] =");
 98 |             w.WriteLine("{");
 99 | 
100 |             int epRange = 1 << bits;
101 | 
102 |             for (int i = 0; i < 256; i++)
103 |             {
104 |                 if (i % 8 == 0)
105 |                     w.Write("    ");
106 | 
107 |                 double bestError = double.MaxValue;
108 |                 int bestSpan = 255;
109 |                 int bestMin = 0;
110 |                 int bestMax = 0;
111 |                 int bestActualColor = 0;
112 | 
113 |                 for (int min = 0; min < epRange; min++)
114 |                 {
115 |                     int minExpanded = BitExpand(min, bits);
116 | 
117 |                     for (int max = 0; max < epRange; max++)
118 |                     {
119 |                         int maxExpanded = BitExpand(max, bits);
120 | 
121 |                         int interpolated = (minExpanded * (maxIndex - 1) + maxExpanded) / maxIndex;
122 |                         int epSpan = Math.Abs(minExpanded - maxExpanded);
123 | 
124 |                         double delta = Math.Abs(interpolated - i) + epSpan * paranoia;
125 | 
126 |                         double error = delta * delta;
127 | 
128 |                         if (error < bestError || (error == bestError && epSpan < bestSpan))
129 |                         {
130 |                             bestError = error;
131 |                             bestSpan = epSpan;
132 |                             bestActualColor = interpolated;
133 |                             bestMin = minExpanded;
134 |                             bestMax = maxExpanded;
135 |                         }
136 |                     }
137 |                 }
138 | 
139 |                 w.Write("{ " + bestMin.ToString() + ", " + bestMax.ToString() + ", " + bestActualColor.ToString() + ", " + bestSpan.ToString() + " },");
140 |                 if (i % 8 == 7)
141 |                     w.WriteLine();
142 |                 else
143 |                     w.Write(" ");
144 |             }
145 | 
146 |             w.WriteLine("};");
147 |             w.WriteLine();
148 |         }
149 | 
150 |         static void MakeETC2AlphaRoundingTables(string path)
151 |         {
152 |             int numRounders = 13;
153 | 
154 |             int[] etc2alphatable =
155 |             {
156 |                 2, 5, 8, 14,
157 |                 2, 6, 9, 12,
158 |                 1, 4, 7, 12,
159 |                 1, 3, 5, 12,
160 |                 2, 5, 7, 11,
161 |                 2, 6, 8, 10,
162 |                 3, 6, 7, 10,
163 |                 2, 4, 7, 10,
164 |                 1, 5, 7, 9,
165 |                 1, 4, 7, 9,
166 |                 1, 3, 7, 9,
167 |                 1, 4, 6, 9,
168 |                 2, 3, 6, 9,
169 |                 0, 1, 2, 9,
170 |                 3, 5, 7, 8,
171 |                 2, 4, 6, 8,
172 |             };
173 | 
174 |             using (StreamWriter w = new StreamWriter(path))
175 |             {
176 |                 w.WriteLine("#pragma once");
177 |                 w.WriteLine("#include <stdint.h>");
178 |                 w.WriteLine();
179 |                 w.WriteLine("// This file is generated by the MakeTables app.  Do not edit this file manually.");
180 |                 w.WriteLine();
181 | 
182 |                 w.WriteLine("namespace cvtt { namespace Tables { namespace ETC2 {");
183 |                 w.WriteLine("    const int g_alphaRoundingTableWidth = " + numRounders.ToString() + ";");
184 |                 w.WriteLine("    const uint8_t g_alphaRoundingTables[16][" + numRounders.ToString() + "] =");
185 |                 w.WriteLine("    {");
186 | 
187 |                 for (int table = 0; table < 16; table++)
188 |                 {
189 |                     w.Write("        { ");
190 | 
191 |                     int baseIndex = table * 4;
192 |                     for (int rounder = 0; rounder < numRounders; rounder++)
193 |                     {
194 |                         int bestIndex = 0;
195 |                         int bestDistance = 9999;
196 | 
197 |                         for (int index = 0; index < 4; index++)
198 |                         {
199 |                             int absDiff = Math.Abs(rounder - etc2alphatable[baseIndex + index]);
200 |                             if (absDiff < bestDistance)
201 |                             {
202 |                                 bestDistance = absDiff;
203 |                                 bestIndex = index;
204 |                             }
205 |                         }
206 | 
207 |                         if (rounder != 0)
208 |                             w.Write(", ");
209 | 
210 |                         w.Write(bestIndex.ToString());
211 |                     }
212 | 
213 |                     w.WriteLine(" },");
214 |                 }
215 | 
216 |                 w.WriteLine("    };");
217 |                 w.WriteLine("}}}");
218 |             }
219 |         }
220 | 
221 |         static void ConvertToFakeBT709(out double y, out double u, out double v, double pr, double pg, double pb)
222 |         {
223 |             double r = pr;
224 |             double g = pg;
225 |             double b = pb;
226 | 
227 |             y = r * 0.368233989135369 + g * 1.23876274963149 + b * 0.125054068802017;
228 |             u = r * 0.5f - g * 0.4541529 - b * 0.04584709;
229 |             v = r * -0.081014709086133 - g * 0.272538676238785 + b * 0.353553390593274;
230 |         }
231 | 
232 |         static void EmitFakeBT709RoundingTable(StreamWriter sw, int tableResolution)
233 |         {
234 |             for (int r = 0; r < tableResolution; r++)
235 |             {
236 |                 for (int g = 0; g < tableResolution; g++)
237 |                 {
238 |                     sw.Write("        ");
239 | 
240 |                     for (int b = 0; b < tableResolution; b++)
241 |                     {
242 |                         double y, u, v;
243 | 
244 |                         ConvertToFakeBT709(out y, out u, out v, r, g, b);
245 | 
246 |                         double bestDiff = double.MaxValue;
247 |                         int bestOctant = 0;
248 |                         for (int compareOctant = 0; compareOctant < 8; compareOctant++)
249 |                         {
250 |                             double or = ((compareOctant & 1) == 0) ? 0 : tableResolution;
251 |                             double og = ((compareOctant & 2) == 0) ? 0 : tableResolution;
252 |                             double ob = ((compareOctant & 4) == 0) ? 0 : tableResolution;
253 | 
254 |                             double oy, ou, ov;
255 |                             ConvertToFakeBT709(out oy, out ou, out ov, or, og, ob);
256 | 
257 |                             double dy = oy - y;
258 |                             double du = ou - u;
259 |                             double dv = ov - v;
260 | 
261 |                             double error = dy * dy + du * du + dv * dv;
262 |                             if (error < bestDiff)
263 |                             {
264 |                                 bestDiff = error;
265 |                                 bestOctant = compareOctant;
266 |                             }
267 |                         }
268 | 
269 |                         sw.Write(bestOctant);
270 |                         sw.Write(", ");
271 |                     }
272 |                     sw.WriteLine();
273 |                 }
274 |                 sw.WriteLine();
275 |             }
276 |         }
277 | 
278 |         static void MakeFakeBT709RoundingTables(string path)
279 |         {
280 |             using (StreamWriter w = new StreamWriter(path))
281 |             {
282 |                 w.WriteLine("#pragma once");
283 |                 w.WriteLine("#include <stdint.h>");
284 |                 w.WriteLine();
285 |                 w.WriteLine("// This file is generated by the MakeTables app.  Do not edit this file manually.");
286 |                 w.WriteLine();
287 | 
288 |                 w.WriteLine("namespace cvtt { namespace Tables { namespace FakeBT709 {");
289 | 
290 |                 for (int r = 16; r <= 16; r++)
291 |                 {
292 |                     w.WriteLine("    const uint8_t g_rounding" + r.ToString() + "[] =");
293 |                     w.WriteLine("    {");
294 | 
295 |                     EmitFakeBT709RoundingTable(w, r);
296 | 
297 |                     w.WriteLine("    };");
298 |                 }
299 |                 w.WriteLine("}}}");
300 |             }
301 |         }
302 | 
303 |         static void Main(string[] args)
304 |         {
305 |             string[] filenames = { "ConvectionKernels_BC7_SingleColor.h", "ConvectionKernels_S3TC_SingleColor.h" };
306 | 
307 |             for (int i = 0; i < 2; i++)
308 |             {
309 |                 using (StreamWriter w = new StreamWriter(filenames[i]))
310 |                 {
311 |                     bool bc7 = (i == 0);
312 | 
313 |                     w.WriteLine("#pragma once");
314 |                     w.WriteLine("#include <stdint.h>");
315 |                     w.WriteLine();
316 |                     w.WriteLine("// This file is generated by the MakeTables app.  Do not edit this file manually.");
317 |                     w.WriteLine();
318 | 
319 |                     if (bc7)
320 |                         w.WriteLine("namespace cvtt { namespace Tables { namespace BC7SC {");
321 |                     else
322 |                         w.WriteLine("namespace cvtt { namespace Tables { namespace S3TCSC {");
323 | 
324 |                     w.WriteLine();
325 |                     w.WriteLine("struct TableEntry");
326 |                     w.WriteLine("{");
327 |                     w.WriteLine("    uint8_t m_min;");
328 |                     w.WriteLine("    uint8_t m_max;");
329 |                     w.WriteLine("    uint8_t m_actualColor;");
330 |                     if (!bc7)
331 |                         w.WriteLine("    uint8_t m_span;");
332 |                     w.WriteLine("};");
333 |                     w.WriteLine();
334 | 
335 |                     if (bc7)
336 |                     {
337 |                         w.WriteLine("struct Table");
338 |                         w.WriteLine("{");
339 |                         w.WriteLine("    uint8_t m_index;");
340 |                         if (bc7)
341 |                             w.WriteLine("    uint8_t m_pBits;");
342 |                         w.WriteLine("    TableEntry m_entries[256];");
343 |                         w.WriteLine("};");
344 |                         w.WriteLine();
345 | 
346 |                         // Mode 0: 5-bit endpoints, 2 P-bits, 3-bit indexes
347 |                         EmitTableBC7(w, 4, 2, 0, 0, 1, 7, "g_mode0_p00_i1");
348 |                         EmitTableBC7(w, 4, 2, 0, 0, 2, 7, "g_mode0_p00_i2");
349 |                         EmitTableBC7(w, 4, 2, 0, 0, 3, 7, "g_mode0_p00_i3");
350 |                         EmitTableBC7(w, 4, 2, 0, 1, 1, 7, "g_mode0_p01_i1");
351 |                         EmitTableBC7(w, 4, 2, 0, 1, 2, 7, "g_mode0_p01_i2");
352 |                         EmitTableBC7(w, 4, 2, 0, 1, 3, 7, "g_mode0_p01_i3");
353 |                         EmitTableBC7(w, 4, 2, 1, 0, 1, 7, "g_mode0_p10_i1");
354 |                         EmitTableBC7(w, 4, 2, 1, 0, 2, 7, "g_mode0_p10_i2");
355 |                         EmitTableBC7(w, 4, 2, 1, 1, 3, 7, "g_mode0_p10_i3");
356 |                         EmitTableBC7(w, 4, 2, 1, 1, 1, 7, "g_mode0_p11_i1");
357 |                         EmitTableBC7(w, 4, 2, 1, 1, 2, 7, "g_mode0_p11_i2");
358 |                         EmitTableBC7(w, 4, 2, 1, 1, 3, 7, "g_mode0_p11_i3");
359 | 
360 |                         // Mode 1: 6-bit endpoints, 1 P-bit, 3-bit indexes
361 |                         EmitTableBC7(w, 6, 1, 0, 0, 1, 7, "g_mode1_p0_i1");
362 |                         EmitTableBC7(w, 6, 1, 0, 0, 2, 7, "g_mode1_p0_i2");
363 |                         EmitTableBC7(w, 6, 1, 0, 0, 3, 7, "g_mode1_p0_i3");
364 |                         EmitTableBC7(w, 6, 1, 1, 1, 1, 7, "g_mode1_p1_i1");
365 |                         EmitTableBC7(w, 6, 1, 1, 1, 2, 7, "g_mode1_p1_i2");
366 |                         EmitTableBC7(w, 6, 1, 1, 1, 3, 7, "g_mode1_p1_i3");
367 | 
368 |                         // Mode 2: 5-bit endpoints, 0 P-bits, 2-bit indexes
369 |                         EmitTableBC7(w, 5, 0, 0, 0, 1, 3, "g_mode2");
370 | 
371 |                         // Mode 3: 7-bit endpoints, 1 P-bit, 2-bit indexes
372 |                         EmitTableBC7(w, 7, 1, 0, 0, 1, 3, "g_mode3_p0");
373 |                         EmitTableBC7(w, 7, 1, 1, 1, 1, 3, "g_mode3_p1");
374 | 
375 |                         // Mode 4: 5-bit RGB endpoints, 6-bit alpha endpoints, no P-bits, 2 or 3-bit indexes
376 |                         EmitTableBC7(w, 5, 0, 0, 0, 1, 3, "g_mode4_rgb_low");
377 |                         EmitTableBC7(w, 5, 0, 0, 0, 1, 7, "g_mode4_rgb_high_i1");
378 |                         EmitTableBC7(w, 5, 0, 0, 0, 2, 7, "g_mode4_rgb_high_i2");
379 |                         EmitTableBC7(w, 5, 0, 0, 0, 3, 7, "g_mode4_rgb_high_i3");
380 |                         EmitTableBC7(w, 6, 0, 0, 0, 1, 3, "g_mode4_a_low");
381 |                         EmitTableBC7(w, 6, 0, 0, 0, 1, 7, "g_mode4_a_high_i1");
382 |                         EmitTableBC7(w, 6, 0, 0, 0, 2, 7, "g_mode4_a_high_i2");
383 |                         EmitTableBC7(w, 6, 0, 0, 0, 3, 7, "g_mode4_a_high_i3");
384 | 
385 |                         // Mode 5: 7-bit RGB endpoints, 8-bit alpha endpoints (omit), no P-bits, 2-bit indexes
386 |                         EmitTableBC7(w, 7, 0, 0, 0, 1, 3, "g_mode5_rgb_low");
387 | 
388 |                         // Mode 6: 7-bit RGB endpoints, 1 P-bit, 4-bit indexes
389 |                         EmitTableBC7(w, 7, 1, 0, 0, 1, 15, "g_mode6_p0_i1");
390 |                         EmitTableBC7(w, 7, 1, 0, 0, 2, 15, "g_mode6_p0_i2");
391 |                         EmitTableBC7(w, 7, 1, 0, 0, 3, 15, "g_mode6_p0_i3");
392 |                         EmitTableBC7(w, 7, 1, 0, 0, 4, 15, "g_mode6_p0_i4");
393 |                         EmitTableBC7(w, 7, 1, 0, 0, 5, 15, "g_mode6_p0_i5");
394 |                         EmitTableBC7(w, 7, 1, 0, 0, 6, 15, "g_mode6_p0_i6");
395 |                         EmitTableBC7(w, 7, 1, 0, 0, 7, 15, "g_mode6_p0_i7");
396 |                         EmitTableBC7(w, 7, 1, 1, 1, 1, 15, "g_mode6_p1_i1");
397 |                         EmitTableBC7(w, 7, 1, 1, 1, 2, 15, "g_mode6_p1_i2");
398 |                         EmitTableBC7(w, 7, 1, 1, 1, 3, 15, "g_mode6_p1_i3");
399 |                         EmitTableBC7(w, 7, 1, 1, 1, 4, 15, "g_mode6_p1_i4");
400 |                         EmitTableBC7(w, 7, 1, 1, 1, 5, 15, "g_mode6_p1_i5");
401 |                         EmitTableBC7(w, 7, 1, 1, 1, 6, 15, "g_mode6_p1_i6");
402 |                         EmitTableBC7(w, 7, 1, 1, 1, 7, 15, "g_mode6_p1_i7");
403 | 
404 |                         // Mode 7: 5-bit RGB endpoints, 2 P-bits, 2-bit indexes
405 |                         EmitTableBC7(w, 7, 2, 0, 0, 1, 3, "g_mode7_p00");
406 |                         EmitTableBC7(w, 7, 2, 0, 1, 1, 3, "g_mode7_p01");
407 |                         EmitTableBC7(w, 7, 2, 1, 0, 1, 3, "g_mode7_p10");
408 |                         EmitTableBC7(w, 7, 2, 1, 1, 1, 3, "g_mode7_p11");
409 |                     }
410 |                     else
411 |                     {
412 |                         EmitTable(w, 5, 3, 0.0, "g_singleColor5_3");
413 |                         EmitTable(w, 6, 3, 0.0, "g_singleColor6_3");
414 |                         EmitTable(w, 5, 2, 0.0, "g_singleColor5_2");
415 |                         EmitTable(w, 6, 2, 0.0, "g_singleColor6_2");
416 |                         EmitTable(w, 5, 3, 0.03, "g_singleColor5_3_p");
417 |                         EmitTable(w, 6, 3, 0.03, "g_singleColor6_3_p");
418 |                         EmitTable(w, 5, 2, 0.03, "g_singleColor5_2_p");
419 |                         EmitTable(w, 6, 2, 0.03, "g_singleColor6_2_p");
420 |                     }
421 | 
422 |                     w.WriteLine("}}}");
423 |                 }
424 |             }
425 | 
426 |             MakeETC2AlphaRoundingTables("ConvectionKernels_ETC2_Rounding.h");
427 |             MakeFakeBT709RoundingTables("ConvectionKernels_FakeBT709_Rounding.h");
428 |         }
429 |     }
430 | }
431 | 


--------------------------------------------------------------------------------
/MakeTables/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
 1 | ﻿using System.Reflection;
 2 | using System.Runtime.CompilerServices;
 3 | using System.Runtime.InteropServices;
 4 | 
 5 | // General Information about an assembly is controlled through the following
 6 | // set of attributes. Change these attribute values to modify the information
 7 | // associated with an assembly.
 8 | [assembly: AssemblyTitle("MakeSingleColorTables")]
 9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("HP Inc.")]
12 | [assembly: AssemblyProduct("MakeSingleColorTables")]
13 | [assembly: AssemblyCopyright("Copyright © HP Inc. 2019")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 | 
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components.  If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible(false)]
21 | 
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid("867f8f36-10ea-4594-aa41-34bc5b74a65a")]
24 | 
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | //      Major Version
28 | //      Minor Version
29 | //      Build Number
30 | //      Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ConvectionKernels
 2 | These are the stand-alone texture compression kernels for Convection Texture Tools (CVTT), you can embed these in other applications.
 3 | https://github.com/elasota/cvtt
 4 | 
 5 | The CVTT codecs are designed to get very high quality at good speed by leveraging effective heuristics and a SPMD-style design that makes heavy use of SIMD ops and 16-bit math.
 6 | 
 7 | Compressed texture format support:
 8 |  * BC1 (DXT1): Complete
 9 |  * BC2 (DXT3): Complete
10 |  * BC3 (DXT5): Complete
11 |  * BC4: Complete
12 |  * BC5: Complete
13 |  * BC6H: Experimental
14 |  * BC7: Complete
15 |  * ETC1: Complete
16 |  * ETC2 RGB: Complete
17 |  * ETC2 RGBA: Complete
18 |  * ETC2 with punchthrough alpha: Complete
19 |  * 11-bit EAC: Experimental
20 |  * PVRTC: Not supported
21 |  * ASTC: Not supported
22 | 
23 | 
24 | # Basic usage
25 | 
26 | Include "ConvectionKernels.h"
27 | 
28 | Depending on the input format, blocks should be pre-packed into one of the PixelBlock structures: PixelBlockU8 for unsigned LDR formats (BC1, BC2, BC3, BC7, BC4U, BC5U), PixelBlockS8 for signed LDR formats (BC4S, BC5S), and PixelBlockF16 for HDR formats (BC6H).  The block pixel order is left-to-right, top-to-bottom, and the channel order is red, green, blue, alpha.
29 | 
30 | BC6H floats are stored as int16_t in the pixel block structure, which should be bit-cast from the 16-bit float input.  Converting other float precisions to 16-bit is outside of the scope of the kernels.
31 | 
32 | Create an Options structure and fill it out:
33 |   * flags: A bitwise OR mask of one of cvtt::Flags, which enable or disable various features.
34 |   * threshold: The alpha threshold for encoding BC1 with alpha test.  Any alpha value lower than than the threshold will use transparent alpha.
35 |   * redWeight: Red channel relative importance
36 |   * blueWeight: Blue channel relative importance
37 |   * alphaWeight: Alpha channel relative importance
38 | 
39 | For some modes, you must pass an encoding plan, which controls how the encoder will behave.  You should NOT attempt to initialize the encoding plan yourself, either use a default-initialized encoding plan (which will run at maximum quality), or use ConfigureBC7EncodingPlanFromQuality or ConfigureBC7EncodingPlanFromFineTuningParams to configure a lower-quality encoding plan.  Configuring an encoding plan is somewhat slow and you should only do it once per encode job.
40 | 
41 | Once you've done both of those things, call the corresponding encode function to digest the input blocks and emit output blocks.
42 | 
43 | **VERY IMPORTANT**: The encode functions must be given a list of cvtt::NumParallelBlocks blocks, and will emit cvtt::NumParallelBlocks output blocks.  If you want to encode fewer blocks, then you must pad the input structure with unused block data, and the output buffer must still contain enough space.
44 | 
45 | # ETC compression
46 | 
47 | The ETC encoders require significantly more temporary data storage than the other encoders, so the storage must be allocated before using the encoders.
48 | 
49 | To allocate the temporary data:
50 |   * Create an allocation function compatible with cvtt::Kernels::allocFunc_t, which accepts a context pointer and byte size and returns a buffer of at least that size.  The returned buffer must be byte-aligned for SIMD usage (i.e. 16 byte alignment on Intel).
51 |   * Use the AllocETC1Data or AllocETC2Data functions, pass the allocation function and a context pointer, which will be passed back to the allocation function.
52 | 
53 | To release the temporary data:
54 |   * Create a free function compatible with cvtt::Kernels::freeFunc_t, which accepts a context pointer, a pointer to the buffer allocated by the allocation func, and the original size.
55 |   * Use the ReleaseETC1Data or ReleaseETC2Data functions, pass the original compression data structure returned by the allocation function, and the free function.
56 | 
57 | Once allocated, the compression data can be reused over multiple calls to the encode functions, and depending on architecture, can usually be used by a different thread than the one that allocated it, as long as multiple encode functions are not using it at once.
58 | 


--------------------------------------------------------------------------------
/etc2packer/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*.{cpp,h,inl,fx,hlsl}]
 4 | indent_size = 4
 5 | indent_style = space
 6 | trim_trailing_whitespace = true
 7 | insert_final_newline = true
 8 | end_of_line = crlf
 9 | charset = latin1
10 | 


--------------------------------------------------------------------------------
/etc2packer/etc2packer.cpp:
--------------------------------------------------------------------------------
  1 | // This is a simple example application for using CVTT's ETC kernels to compress ETC textures.
  2 | // It only compresses a single texture level.
  3 | 
  4 | #include <string.h>
  5 | #include <algorithm>
  6 | 
  7 | #include "stb_image/stb_image.h"
  8 | 
  9 | #include "ktxheader.h"
 10 | #include "etc2packer.h"
 11 | #include "../ConvectionKernels.h"
 12 | 
 13 | static void *allocshim(void *context, size_t size)
 14 | {
 15 |     return _aligned_malloc(size, 16);
 16 | }
 17 | 
 18 | static void freeshim(void *context, void *ptr, size_t size)
 19 | {
 20 |     _aligned_free(ptr);
 21 | }
 22 | 
 23 | enum TargetFormat
 24 | {
 25 |     ETC1,
 26 |     ETC2_RGB,
 27 |     ETC2_RGBA,
 28 |     ETC2_Punchthrough,
 29 |     R11_Unsigned,
 30 |     R11_Signed,
 31 | };
 32 | 
 33 | const char *g_formatNames[] =
 34 | {
 35 |     "etc1",
 36 |     "etc2rgb",
 37 |     "etc2rgba",
 38 |     "etc2punchthrough",
 39 |     "r11u",
 40 |     "r11s",
 41 | };
 42 | 
 43 | void PrintUsageAndExit()
 44 | {
 45 |     fprintf(stderr, "Usage: etc2packer [options] input output\n");
 46 |     fprintf(stderr, "Options:\n");
 47 |     fprintf(stderr, "-format <format> - Selects output format.  Format is one of:\n");
 48 |     fprintf(stderr, "   etc1 - ETC1\n");
 49 |     fprintf(stderr, "   etc2rgb - ETC2 RGB\n");
 50 |     fprintf(stderr, "   etc2rgba - ETC2 RGBA\n");
 51 |     fprintf(stderr, "   etc2punchthrough - ETC2 RGB with punchthrough alpha\n");
 52 |     fprintf(stderr, "-fakebt709 - Use fake BT.709 error metric (same as etc2comp, significantly slower)\n");
 53 |     fprintf(stderr, "-uniform - Use uniform color weights (overrides -fakebt709)\n");
 54 |     exit(-1);
 55 | }
 56 | 
 57 | int main(int argc, const char **argv)
 58 | {
 59 |     TargetFormat targetFormat = ETC2_RGB;
 60 |     bool useFakeBT709 = false;
 61 |     bool useUniform = false;
 62 | 
 63 |     const char *inputPath = NULL;
 64 |     const char *outputPath = NULL;
 65 | 
 66 |     if (argc < 3)
 67 |         PrintUsageAndExit();
 68 | 
 69 |     for (int i = 1; i < argc; i++)
 70 |     {
 71 |         if (!strcmp(argv[i], "-format"))
 72 |         {
 73 |             i++;
 74 |             if (i == argc)
 75 |                 PrintUsageAndExit();
 76 | 
 77 |             bool foundFormat = false;
 78 |             for (int f = 0; f < sizeof(g_formatNames) / sizeof(g_formatNames[0]); f++)
 79 |             {
 80 |                 if (!strcmp(argv[i], g_formatNames[f]))
 81 |                 {
 82 |                     targetFormat = static_cast<TargetFormat>(f);
 83 |                     foundFormat = true;
 84 |                     break;
 85 |                 }
 86 |             }
 87 |         }
 88 |         else if (!strcmp(argv[i], "-fakebt709"))
 89 |         {
 90 |             useFakeBT709 = true;
 91 |         }
 92 |         else if (!strcmp(argv[i], "-uniform"))
 93 |         {
 94 |             useUniform = true;
 95 |         }
 96 |         else
 97 |         {
 98 |             if (i != argc - 2)
 99 |                 PrintUsageAndExit();
100 | 
101 |             inputPath = argv[i];
102 |             outputPath = argv[i + 1];
103 |             break;
104 |         }
105 |     }
106 | 
107 |     int w, h, channels;
108 |     stbi_uc *image = stbi_load(inputPath, &w, &h, &channels, 4);
109 | 
110 |     if (!image)
111 |     {
112 |         fprintf(stderr, "Could not load input image\n");
113 |         return -1;
114 |     }
115 | 
116 |     static const uint8_t ktxIdentifier[12] =
117 |     {
118 |         0xAB, 0x4B, 0x54, 0x58, // first four bytes of Byte[12] identifier
119 |         0x20, 0x31, 0x31, 0xBB, // next four bytes of Byte[12] identifier
120 |         0x0D, 0x0A, 0x1A, 0x0A  // final four bytes of Byte[12] identifier
121 |     };
122 | 
123 | 	KtxHeader_t ktxHeader;
124 | 	memcpy(ktxHeader.identifier, ktxIdentifier, 12);
125 | 
126 | 	ktxHeader.endianness = 0x04030201;
127 | 	ktxHeader.glType = 0;
128 | 	ktxHeader.glTypeSize = 1;
129 | 	ktxHeader.glFormat = 0;
130 | 
131 | 	ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_RGB8;
132 | 	ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_RGB8;
133 | 
134 | 	ktxHeader.pixelWidth = w;
135 | 	ktxHeader.pixelHeight = h;
136 | 	ktxHeader.pixelDepth = 0;
137 | 	ktxHeader.numberOfArrayElements = 0;
138 | 	ktxHeader.numberOfFaces = 0;
139 | 	ktxHeader.bytesOfKeyValueData = 0;
140 | 
141 | 	ktxHeader.pixelDepth = 0;
142 | 	ktxHeader.numberOfArrayElements = 0;
143 | 	ktxHeader.numberOfFaces = 1;
144 | 	ktxHeader.numberOfMipmapLevels = 1;
145 | 
146 |     unsigned int blockSizeBytes = 8;
147 | 
148 |     switch (targetFormat)
149 |     {
150 |     case ETC1:
151 |         ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC1_RGB8;
152 |         ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC1_RGB8;
153 |         blockSizeBytes = 8;
154 |         break;
155 |     case ETC2_RGB:
156 |         ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_RGB8;
157 |         ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_RGB8;
158 |         blockSizeBytes = 8;
159 |         break;
160 |     case ETC2_RGBA:
161 |         ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_RGBA8;
162 |         ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_RGBA8;
163 |         blockSizeBytes = 16;
164 |         break;
165 |     case ETC2_Punchthrough:
166 |         ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_RGB8A1;
167 |         ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_RGB8A1;
168 |         blockSizeBytes = 8;
169 |         break;
170 |     case R11_Unsigned:
171 |         ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_R11;
172 |         ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_R11;
173 |         blockSizeBytes = 8;
174 |         break;
175 |     case R11_Signed:
176 |         ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_SIGNED_R11;
177 |         ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_R11;
178 |         blockSizeBytes = 8;
179 |         break;
180 |     }
181 | 
182 |     uint8_t alphaOutputBlock[8 * cvtt::NumParallelBlocks];
183 |     uint8_t outputBlock[8 * cvtt::NumParallelBlocks];
184 | 
185 | 	FILE *f = fopen(outputPath, "wb");
186 |     if (!f)
187 |     {
188 |         fprintf(stderr, "Could not open output file\n");
189 |         return -1;
190 |     }
191 | 
192 |     int blockWidth = (w + 3) / 4;
193 |     int blockHeight = (h + 3) / 4;
194 | 
195 | 	fwrite(&ktxHeader, sizeof(ktxHeader), 1, f);
196 | 	uint32_t dataSize = blockWidth * blockHeight * blockSizeBytes;
197 | 	fwrite(&dataSize, 4, 1, f);
198 | 
199 |     cvtt::Options options;
200 | 
201 |     if (useUniform)
202 |         options.flags |= cvtt::Flags::Uniform;
203 |     else if (useFakeBT709)
204 |         options.flags |= cvtt::Flags::ETC_UseFakeBT709;
205 | 
206 |     cvtt::ETC1CompressionData* compressionData1 = NULL;
207 |     cvtt::ETC2CompressionData* compressionData2 = NULL;
208 | 
209 |     if (targetFormat == ETC1)
210 |         compressionData1 = cvtt::Kernels::AllocETC1Data(allocshim, nullptr);
211 | 
212 |     if (targetFormat == ETC2_RGB || targetFormat == ETC2_RGBA || targetFormat == ETC2_Punchthrough)
213 |         compressionData2 = cvtt::Kernels::AllocETC2Data(allocshim, nullptr, options);
214 | 
215 | 	for (int y = 0; y < h; y += 4)
216 | 	{
217 |         cvtt::PixelBlockU8 pixelBlocks[8];
218 |         cvtt::PixelBlockScalarS16 pixelBlockSigned[8];
219 |         cvtt::PixelBlockScalarS16 pixelBlockUnsigned[8];
220 |         for (int x = 0; x < w; x += 32)
221 | 		{
222 |             for (int block = 0; block < cvtt::NumParallelBlocks; block++)
223 |             {
224 |                 for (int subY = 0; subY < 4; subY++)
225 |                 {
226 |                     int clampedY = std::min(y + subY, h - 1);
227 | 
228 |                     const uint8_t *inputRow = image + (clampedY) * w * 4;
229 |                     for (int subX = 0; subX < 4; subX++)
230 |                     {
231 |                         int clampedX = std::min(x + subX + block * 4, w - 1);
232 | 
233 |                         int rgba[4];
234 |                         for (int ch = 0; ch < 4; ch++)
235 |                             rgba[ch] = inputRow[clampedX * 4 + ch];
236 | 
237 |                         for (int ch = 0; ch < 4; ch++)
238 |                             pixelBlocks[block].m_pixels[subY * 4 + subX][ch] = rgba[ch];
239 | 
240 |                         double rgbaTotal = rgba[0] + rgba[1] + rgba[2];
241 |                         double normalizedUnsigned = rgbaTotal / (255.0 * 3.0);
242 |                         double normalizedSigned = normalizedUnsigned * 2.0 - 1.0;
243 | 
244 |                         pixelBlockUnsigned[block].m_pixels[subY * 4 + subX] = static_cast<int>(floor(normalizedUnsigned * 2047.0 + 0.5));
245 |                         pixelBlockSigned[block].m_pixels[subY * 4 + subX] = static_cast<int>(floor(normalizedUnsigned * 1023.0 + 0.5));
246 |                     }
247 |                 }
248 |             }
249 | 
250 |             if (targetFormat == ETC2_RGBA)
251 |                 cvtt::Kernels::EncodeETC2Alpha(alphaOutputBlock, pixelBlocks, options);
252 | 
253 |             switch (targetFormat)
254 |             {
255 |             case ETC1:
256 |                 cvtt::Kernels::EncodeETC1(outputBlock, pixelBlocks, options, compressionData1);
257 |                 break;
258 |             case R11_Unsigned:
259 |                 cvtt::Kernels::EncodeETC2Alpha11(outputBlock, pixelBlockUnsigned, false, options);
260 |                 break;
261 |             case R11_Signed:
262 |                 cvtt::Kernels::EncodeETC2Alpha11(outputBlock, pixelBlockSigned, true, options);
263 |                 break;
264 |             case ETC2_Punchthrough:
265 |                 cvtt::Kernels::EncodeETC2PunchthroughAlpha(outputBlock, pixelBlocks, options, compressionData2);
266 |                 break;
267 |             case ETC2_RGB:
268 |             case ETC2_RGBA:
269 |                 cvtt::Kernels::EncodeETC2(outputBlock, pixelBlocks, options, compressionData2);
270 |                 break;
271 |             }
272 | 
273 |             int writableBlocks = std::min<int>(cvtt::NumParallelBlocks, (w - x + 3) / 4);
274 | 
275 |             for (int block = 0; block < writableBlocks; block++)
276 |             {
277 |                 if (targetFormat == ETC2_RGBA)
278 |                     fwrite(alphaOutputBlock + block * 8, 8, 1, f);
279 |                 fwrite(outputBlock + block * 8, 8, 1, f);
280 |             }
281 |         }
282 | 	}
283 | 
284 |     if (compressionData1)
285 |         cvtt::Kernels::ReleaseETC1Data(compressionData1, freeshim);
286 | 
287 |     if (compressionData2)
288 |         cvtt::Kernels::ReleaseETC2Data(compressionData2, freeshim);
289 | 
290 | 	stbi_image_free(image);
291 | 
292 | 	return 0;
293 | }
294 | 


--------------------------------------------------------------------------------
/etc2packer/etc2packer.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | 
3 | void CompressETC2Block(uint8_t * outputBuffer, const uint8_t * inputBuffer);
4 | 


--------------------------------------------------------------------------------
/etc2packer/etc2packer.vcxproj:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup Label="ProjectConfigurations">
 4 |     <ProjectConfiguration Include="Debug|x64">
 5 |       <Configuration>Debug</Configuration>
 6 |       <Platform>x64</Platform>
 7 |     </ProjectConfiguration>
 8 |     <ProjectConfiguration Include="Release|x64">
 9 |       <Configuration>Release</Configuration>
10 |       <Platform>x64</Platform>
11 |     </ProjectConfiguration>
12 |   </ItemGroup>
13 |   <PropertyGroup Label="Globals">
14 |     <VCProjectVersion>15.0</VCProjectVersion>
15 |     <ProjectGuid>{23B20484-6E2E-4102-8362-33A29A8D1933}</ProjectGuid>
16 |     <RootNamespace>etc2packer</RootNamespace>
17 |     <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
18 |   </PropertyGroup>
19 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
20 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
21 |     <ConfigurationType>Application</ConfigurationType>
22 |     <UseDebugLibraries>true</UseDebugLibraries>
23 |     <PlatformToolset>v141</PlatformToolset>
24 |     <CharacterSet>MultiByte</CharacterSet>
25 |   </PropertyGroup>
26 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
27 |     <ConfigurationType>Application</ConfigurationType>
28 |     <UseDebugLibraries>false</UseDebugLibraries>
29 |     <PlatformToolset>v141</PlatformToolset>
30 |     <WholeProgramOptimization>true</WholeProgramOptimization>
31 |     <CharacterSet>MultiByte</CharacterSet>
32 |   </PropertyGroup>
33 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
34 |   <ImportGroup Label="ExtensionSettings">
35 |   </ImportGroup>
36 |   <ImportGroup Label="Shared">
37 |   </ImportGroup>
38 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
39 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
40 |   </ImportGroup>
41 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
42 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
43 |   </ImportGroup>
44 |   <PropertyGroup Label="UserMacros" />
45 |   <PropertyGroup />
46 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
47 |     <ClCompile>
48 |       <WarningLevel>Level3</WarningLevel>
49 |       <Optimization>Disabled</Optimization>
50 |       <SDLCheck>true</SDLCheck>
51 |       <ConformanceMode>true</ConformanceMode>
52 |       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
53 |     </ClCompile>
54 |   </ItemDefinitionGroup>
55 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
56 |     <ClCompile>
57 |       <WarningLevel>Level3</WarningLevel>
58 |       <Optimization>MaxSpeed</Optimization>
59 |       <FunctionLevelLinking>true</FunctionLevelLinking>
60 |       <IntrinsicFunctions>true</IntrinsicFunctions>
61 |       <SDLCheck>true</SDLCheck>
62 |       <ConformanceMode>true</ConformanceMode>
63 |       <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
64 |     </ClCompile>
65 |     <Link>
66 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
67 |       <OptimizeReferences>true</OptimizeReferences>
68 |     </Link>
69 |   </ItemDefinitionGroup>
70 |   <ItemGroup>
71 |     <ClCompile Include="etc2packer.cpp" />
72 |     <ClCompile Include="stb_image\stb_image.cpp" />
73 |   </ItemGroup>
74 |   <ItemGroup>
75 |     <ClInclude Include="ConvectionKernels\ConvectionKernels_ETC2_Rounding.h" />
76 |     <ClInclude Include="ConvectionKernels\ConvectionKernels_FakeBT709_Rounding.h" />
77 |     <ClInclude Include="etc2packer.h" />
78 |     <ClInclude Include="ktxheader.h" />
79 |   </ItemGroup>
80 |   <ItemGroup>
81 |     <ProjectReference Include="..\ConvectionKernels.vcxproj">
82 |       <Project>{5e4f0557-b7d8-4d9b-9d3a-2b966c9c1b47}</Project>
83 |     </ProjectReference>
84 |   </ItemGroup>
85 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
86 |   <ImportGroup Label="ExtensionTargets">
87 |   </ImportGroup>
88 | </Project>


--------------------------------------------------------------------------------
/etc2packer/etc2packer.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="Resource Files">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="etc2packer.cpp">
19 |       <Filter>Source Files</Filter>
20 |     </ClCompile>
21 |     <ClCompile Include="stb_image\stb_image.cpp">
22 |       <Filter>Source Files</Filter>
23 |     </ClCompile>
24 |   </ItemGroup>
25 |   <ItemGroup>
26 |     <ClInclude Include="ktxheader.h">
27 |       <Filter>Source Files</Filter>
28 |     </ClInclude>
29 |     <ClInclude Include="etc2packer.h">
30 |       <Filter>Header Files</Filter>
31 |     </ClInclude>
32 |     <ClInclude Include="ConvectionKernels\ConvectionKernels_ETC2_Rounding.h">
33 |       <Filter>Header Files</Filter>
34 |     </ClInclude>
35 |     <ClInclude Include="ConvectionKernels\ConvectionKernels_FakeBT709_Rounding.h">
36 |       <Filter>Header Files</Filter>
37 |     </ClInclude>
38 |   </ItemGroup>
39 | </Project>


--------------------------------------------------------------------------------
/etc2packer/ktxheader.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdint.h>
 4 | 
 5 | typedef struct KtxHeader_s
 6 | {
 7 | 	enum class InternalFormat
 8 | 	{
 9 | 		ETC1_RGB8 = 0x8D64,
10 | 		ETC1_ALPHA8 = ETC1_RGB8,
11 | 		//
12 | 		ETC2_R11 = 0x9270,
13 | 		ETC2_SIGNED_R11 = 0x9271,
14 | 		ETC2_RG11 = 0x9272,
15 | 		ETC2_SIGNED_RG11 = 0x9273,
16 | 		ETC2_RGB8 = 0x9274,
17 | 		ETC2_SRGB8 = 0x9275,
18 | 		ETC2_RGB8A1 = 0x9276,
19 | 		ETC2_SRGB8_PUNCHTHROUGH_ALPHA1 = 0x9277,
20 | 		ETC2_RGBA8 = 0x9278
21 | 	};
22 | 
23 | 	enum class BaseInternalFormat
24 | 	{
25 | 		ETC2_R11 = 0x1903,
26 | 		ETC2_RG11 = 0x8227,
27 | 		ETC1_RGB8 = 0x1907,
28 | 		ETC1_ALPHA8 = ETC1_RGB8,
29 | 		//
30 | 		ETC2_RGB8 = 0x1907,
31 | 		ETC2_RGB8A1 = 0x1908,
32 | 		ETC2_RGBA8 = 0x1908,
33 | 	};
34 | 
35 | 	uint8_t identifier[12];
36 | 	uint32_t endianness;
37 | 	uint32_t glType;
38 | 	uint32_t glTypeSize;
39 | 	uint32_t glFormat;
40 | 	uint32_t glInternalFormat;
41 | 	uint32_t glBaseInternalFormat;
42 | 	uint32_t pixelWidth;
43 | 	uint32_t pixelHeight;
44 | 	uint32_t pixelDepth;
45 | 	uint32_t numberOfArrayElements;
46 | 	uint32_t numberOfFaces;
47 | 	uint32_t numberOfMipmapLevels;
48 | 	uint32_t bytesOfKeyValueData;
49 | } KtxHeader_t;
50 | 


--------------------------------------------------------------------------------
/etc2packer/stb_image/stb_image.cpp:
--------------------------------------------------------------------------------
1 | #define STB_IMAGE_IMPLEMENTATION
2 | #include "stb_image.h"
3 | 


--------------------------------------------------------------------------------
/etc2packer/stb_image/stb_image.vcxproj:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Release|Win32">
  9 |       <Configuration>Release</Configuration>
 10 |       <Platform>Win32</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Debug|x64">
 13 |       <Configuration>Debug</Configuration>
 14 |       <Platform>x64</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <VCProjectVersion>15.0</VCProjectVersion>
 23 |     <ProjectGuid>{BF8EC93D-003C-45E9-878B-16DECD78808D}</ProjectGuid>
 24 |     <RootNamespace>stbimage</RootNamespace>
 25 |     <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
 26 |   </PropertyGroup>
 27 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 28 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 29 |     <ConfigurationType>StaticLibrary</ConfigurationType>
 30 |     <UseDebugLibraries>true</UseDebugLibraries>
 31 |     <PlatformToolset>v141</PlatformToolset>
 32 |     <CharacterSet>MultiByte</CharacterSet>
 33 |   </PropertyGroup>
 34 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 35 |     <ConfigurationType>StaticLibrary</ConfigurationType>
 36 |     <UseDebugLibraries>false</UseDebugLibraries>
 37 |     <PlatformToolset>v141</PlatformToolset>
 38 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 39 |     <CharacterSet>MultiByte</CharacterSet>
 40 |   </PropertyGroup>
 41 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 42 |     <ConfigurationType>StaticLibrary</ConfigurationType>
 43 |     <UseDebugLibraries>true</UseDebugLibraries>
 44 |     <PlatformToolset>v141</PlatformToolset>
 45 |     <CharacterSet>MultiByte</CharacterSet>
 46 |   </PropertyGroup>
 47 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 48 |     <ConfigurationType>StaticLibrary</ConfigurationType>
 49 |     <UseDebugLibraries>false</UseDebugLibraries>
 50 |     <PlatformToolset>v141</PlatformToolset>
 51 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 52 |     <CharacterSet>MultiByte</CharacterSet>
 53 |   </PropertyGroup>
 54 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 55 |   <ImportGroup Label="ExtensionSettings">
 56 |   </ImportGroup>
 57 |   <ImportGroup Label="Shared">
 58 |   </ImportGroup>
 59 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 60 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 61 |   </ImportGroup>
 62 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 63 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 64 |   </ImportGroup>
 65 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 66 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 67 |   </ImportGroup>
 68 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 69 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 70 |   </ImportGroup>
 71 |   <PropertyGroup Label="UserMacros" />
 72 |   <PropertyGroup />
 73 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 74 |     <ClCompile>
 75 |       <WarningLevel>Level3</WarningLevel>
 76 |       <Optimization>MaxSpeed</Optimization>
 77 |       <FunctionLevelLinking>true</FunctionLevelLinking>
 78 |       <IntrinsicFunctions>true</IntrinsicFunctions>
 79 |       <SDLCheck>true</SDLCheck>
 80 |       <ConformanceMode>true</ConformanceMode>
 81 |     </ClCompile>
 82 |     <Link>
 83 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
 84 |       <OptimizeReferences>true</OptimizeReferences>
 85 |     </Link>
 86 |   </ItemDefinitionGroup>
 87 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 88 |     <ClCompile>
 89 |       <WarningLevel>Level3</WarningLevel>
 90 |       <Optimization>Disabled</Optimization>
 91 |       <SDLCheck>true</SDLCheck>
 92 |       <ConformanceMode>true</ConformanceMode>
 93 |     </ClCompile>
 94 |   </ItemDefinitionGroup>
 95 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 96 |     <ClCompile>
 97 |       <WarningLevel>Level3</WarningLevel>
 98 |       <Optimization>Disabled</Optimization>
 99 |       <SDLCheck>true</SDLCheck>
100 |       <ConformanceMode>true</ConformanceMode>
101 |     </ClCompile>
102 |   </ItemDefinitionGroup>
103 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
104 |     <ClCompile>
105 |       <WarningLevel>Level3</WarningLevel>
106 |       <Optimization>MaxSpeed</Optimization>
107 |       <FunctionLevelLinking>true</FunctionLevelLinking>
108 |       <IntrinsicFunctions>true</IntrinsicFunctions>
109 |       <SDLCheck>true</SDLCheck>
110 |       <ConformanceMode>true</ConformanceMode>
111 |     </ClCompile>
112 |     <Link>
113 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
114 |       <OptimizeReferences>true</OptimizeReferences>
115 |     </Link>
116 |   </ItemDefinitionGroup>
117 |   <ItemGroup>
118 |     <ClCompile Include="stb_image.cpp" />
119 |     <ClCompile Include="stb_image_write.cpp" />
120 |   </ItemGroup>
121 |   <ItemGroup>
122 |     <ClInclude Include="stb_image.h" />
123 |     <ClInclude Include="stb_image_write.h" />
124 |   </ItemGroup>
125 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
126 |   <ImportGroup Label="ExtensionTargets">
127 |   </ImportGroup>
128 | </Project>


--------------------------------------------------------------------------------
/etc2packer/stb_image/stb_image.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="Resource Files">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="stb_image.cpp">
19 |       <Filter>Source Files</Filter>
20 |     </ClCompile>
21 |     <ClCompile Include="stb_image_write.cpp">
22 |       <Filter>Source Files</Filter>
23 |     </ClCompile>
24 |   </ItemGroup>
25 |   <ItemGroup>
26 |     <ClInclude Include="stb_image_write.h">
27 |       <Filter>Source Files</Filter>
28 |     </ClInclude>
29 |     <ClInclude Include="stb_image.h">
30 |       <Filter>Source Files</Filter>
31 |     </ClInclude>
32 |   </ItemGroup>
33 | </Project>


--------------------------------------------------------------------------------
/etc2packer/stb_image/stb_image_write.cpp:
--------------------------------------------------------------------------------
1 | #define STB_IMAGE_WRITE_IMPLEMENTATION
2 | #include "stb_image_write.h"
3 | 


--------------------------------------------------------------------------------
/etc_notes.txt:
--------------------------------------------------------------------------------
 1 | The ETC1 compressor uses modified cluster fit:
 2 | 
 3 | Assume that there exists an ideal base color and set of selectors for a given table.
 4 | For a given table and set of selectors, the ideal base color can be determined by subtracting the offsets from each pixel and averaging them.
 5 | Doing that is equivalent to subtracting the average offset from the average color.
 6 | Because positive and negative selectors of the same magnitude cancel out, the search space of possible average offsets is reduced: 57 unique offsets for the first table and 81 for the others.
 7 | Most of the offsets result in the same color as another average offset due to quantization of the base color, so those can be de-duplicated.
 8 | So:
 9 | - Start with a high-precision average color.
10 | - Apply precomputed luma offsets to it.
11 | - Quantize and de-duplicate the base colors.
12 | - Find the ideal selectors for each base color.
13 | 
14 | Differential mode is solved by just finding the best legal combination from those attempts.
15 | 
16 | There are several scenarios where this is not ideal:
17 | - Clamping behavior can sometimes be leveraged for a more accurate block.
18 | - Differentials can sometimes be moved slightly closer to become legal.
19 | - This only works when MSE is the error metric (i.e. not normal maps)
20 | - This only works when pixel weights are of equal importance (i.e. not using weight by alpha or edge deblocking)
21 | 
22 | T and H mode just work by generating clustering assignments by computing a chrominance line and splitting the block in half by the chrominance midpoint and using those to determine the averages.
23 | 
24 | Planar mode is just solved algebraically.
25 | 
26 | If you want to emulate etc2comp's default settings, add the flag ETC_UseFakeBT709 to use its modified Rec. 709 error coefficients.
27 | Doing that will significantly slow down encoding because it requires much more complicated quantization math.


--------------------------------------------------------------------------------