├── .editorconfig ├── .gitignore ├── ConvectionKernels.h ├── ConvectionKernels.sln ├── ConvectionKernels.vcxproj ├── ConvectionKernels.vcxproj.filters ├── ConvectionKernels_API.cpp ├── ConvectionKernels_AggregatedError.h ├── ConvectionKernels_BC67.cpp ├── ConvectionKernels_BC67.h ├── ConvectionKernels_BC6H_IO.cpp ├── ConvectionKernels_BC6H_IO.h ├── ConvectionKernels_BC7_Prio.h ├── ConvectionKernels_BC7_PrioData.cpp ├── ConvectionKernels_BC7_SingleColor.h ├── ConvectionKernels_BCCommon.cpp ├── ConvectionKernels_BCCommon.h ├── ConvectionKernels_Config.h ├── ConvectionKernels_ETC.cpp ├── ConvectionKernels_ETC.h ├── ConvectionKernels_ETC1.h ├── ConvectionKernels_ETC2.h ├── ConvectionKernels_ETC2_Rounding.h ├── ConvectionKernels_EndpointRefiner.h ├── ConvectionKernels_EndpointSelector.h ├── ConvectionKernels_FakeBT709_Rounding.h ├── ConvectionKernels_IndexSelector.cpp ├── ConvectionKernels_IndexSelector.h ├── ConvectionKernels_IndexSelectorHDR.h ├── ConvectionKernels_PackedCovarianceMatrix.h ├── ConvectionKernels_ParallelMath.h ├── ConvectionKernels_S3TC.cpp ├── ConvectionKernels_S3TC.h ├── ConvectionKernels_S3TC_SingleColor.h ├── ConvectionKernels_SingleFile.cpp ├── ConvectionKernels_UnfinishedEndpoints.h ├── ConvectionKernels_Util.cpp ├── ConvectionKernels_Util.h ├── LICENSE.txt ├── MakeTables ├── App.config ├── MakeTables.csproj ├── Program.cs └── Properties │ └── AssemblyInfo.cs ├── README.md ├── etc2packer ├── .editorconfig ├── etc2packer.cpp ├── etc2packer.h ├── etc2packer.vcxproj ├── etc2packer.vcxproj.filters ├── ktxheader.h └── stb_image │ ├── stb_image.cpp │ ├── stb_image.h │ ├── stb_image.vcxproj │ ├── stb_image.vcxproj.filters │ ├── stb_image_write.cpp │ └── stb_image_write.h └── etc_notes.txt /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.{cpp,h,inl,fx,hlsl}] 4 | indent_size = 4 5 | indent_style = space 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | end_of_line = crlf 9 | charset = latin1 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.psess 2 | *.vsp 3 | *.log 4 | *.err 5 | *.wrn 6 | *.suo 7 | *.sdf 8 | *.user 9 | *.i 10 | *.vspscc 11 | *.opensdf 12 | *.opendb 13 | *.ipch 14 | *.cache 15 | *.tlog 16 | *.lastbuildstate 17 | *.ilk 18 | *.VC.db 19 | *.nupkg 20 | *.obj 21 | *.idb 22 | .vs 23 | /Bin 24 | /ipch 25 | /Debug 26 | /Profile 27 | /Release 28 | /Tests 29 | /wiki 30 | *.inc 31 | *.pdb 32 | *.csv 33 | *.dds 34 | /*/bin/* 35 | /*/obj/* 36 | /*/x64/* 37 | /x64/*/*.exe 38 | /x64/*/*.obj 39 | /x64/*/*.iobj 40 | /x64/*/*.ipdb 41 | -------------------------------------------------------------------------------- /ConvectionKernels.h: -------------------------------------------------------------------------------- 1 | /* 2 | Convection Texture Tools 3 | Copyright (c) 2018 Eric Lasota 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject 11 | to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included 14 | in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | */ 24 | #pragma once 25 | #ifndef __CVTT_CONVECTION_KERNELS__ 26 | #define __CVTT_CONVECTION_KERNELS__ 27 | 28 | #include 29 | #include 30 | 31 | namespace cvtt 32 | { 33 | namespace Flags 34 | { 35 | // Use fast indexing in BC7 encoding (about 2x faster, slightly worse quality) 36 | const uint32_t BC7_FastIndexing = 0x008; 37 | 38 | // Try precomputed single-color lookups where applicable (slightly slower, small quality increase on specific blocks) 39 | const uint32_t BC7_TrySingleColor = 0x010; 40 | 41 | // Don't allow non-zero or non-max alpha values in blocks that only contain one or the other 42 | const uint32_t BC7_RespectPunchThrough = 0x020; 43 | 44 | // Use fast indexing in HDR formats (faster, worse quality) 45 | const uint32_t BC6H_FastIndexing = 0x040; 46 | 47 | // Exhaustive search RGB orderings when encoding BC1-BC3 (much slower, better quality) 48 | const uint32_t S3TC_Exhaustive = 0x080; 49 | 50 | // Penalize distant endpoints, improving quality on inaccurate GPU decoders 51 | const uint32_t S3TC_Paranoid = 0x100; 52 | 53 | // Uniform color channel importance 54 | const uint32_t Uniform = 0x200; 55 | 56 | // Use fake BT.709 color space for etc2comp compatibility (slower) 57 | const uint32_t ETC_UseFakeBT709 = 0x400; 58 | 59 | // Use accurate quantization functions when quantizing fake BT.709 (much slower, marginal improvement on specific blocks) 60 | const uint32_t ETC_FakeBT709Accurate = 0x800; 61 | 62 | // Misc useful default flag combinations 63 | const uint32_t Fastest = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid); 64 | const uint32_t Faster = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid); 65 | const uint32_t Fast = (BC7_FastIndexing | S3TC_Paranoid); 66 | const uint32_t Default = (BC7_FastIndexing | S3TC_Paranoid); 67 | const uint32_t Better = (S3TC_Paranoid | S3TC_Exhaustive); 68 | const uint32_t Ultra = (BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive | ETC_FakeBT709Accurate); 69 | } 70 | 71 | const unsigned int NumParallelBlocks = 8; 72 | 73 | struct Options 74 | { 75 | uint32_t flags; // Bitmask of cvtt::Flags values 76 | float threshold; // Alpha test threshold for BC1 77 | float redWeight; // Red channel importance 78 | float greenWeight; // Green channel importance 79 | float blueWeight; // Blue channel importance 80 | float alphaWeight; // Alpha channel importance 81 | 82 | int refineRoundsBC7; // Number of refine rounds for BC7 83 | int refineRoundsBC6H; // Number of refine rounds for BC6H (max 3) 84 | int refineRoundsIIC; // Number of refine rounds for independent interpolated channels (BC3 alpha, BC4, BC5) 85 | int refineRoundsS3TC; // Number of refine rounds for S3TC RGB 86 | 87 | int seedPoints; // Number of seed points (min 1, max 4) 88 | 89 | Options() 90 | : flags(Flags::Default) 91 | , threshold(0.5f) 92 | , redWeight(0.2125f / 0.7154f) 93 | , greenWeight(1.0f) 94 | , blueWeight(0.0721f / 0.7154f) 95 | , alphaWeight(1.0f) 96 | , refineRoundsBC7(2) 97 | , refineRoundsBC6H(3) 98 | , refineRoundsIIC(8) 99 | , refineRoundsS3TC(2) 100 | , seedPoints(4) 101 | { 102 | } 103 | }; 104 | 105 | struct BC7FineTuningParams 106 | { 107 | // Seed point counts for each mode+configuration combination 108 | uint8_t mode0SP[16]; 109 | uint8_t mode1SP[64]; 110 | uint8_t mode2SP[64]; 111 | uint8_t mode3SP[64]; 112 | uint8_t mode4SP[4][2]; 113 | uint8_t mode5SP[4]; 114 | uint8_t mode6SP; 115 | uint8_t mode7SP[64]; 116 | 117 | BC7FineTuningParams() 118 | { 119 | for (int i = 0; i < 16; i++) 120 | this->mode0SP[i] = 4; 121 | 122 | for (int i = 0; i < 64; i++) 123 | { 124 | this->mode1SP[i] = 4; 125 | this->mode2SP[i] = 4; 126 | this->mode3SP[i] = 4; 127 | this->mode7SP[i] = 4; 128 | } 129 | 130 | for (int i = 0; i < 4; i++) 131 | { 132 | for (int j = 0; j < 2; j++) 133 | this->mode4SP[i][j] = 4; 134 | 135 | this->mode5SP[i] = 4; 136 | } 137 | 138 | this->mode6SP = 4; 139 | } 140 | }; 141 | 142 | struct BC7EncodingPlan 143 | { 144 | static const int kNumRGBAShapes = 129; 145 | static const int kNumRGBShapes = 243; 146 | 147 | uint64_t mode1PartitionEnabled; 148 | uint64_t mode2PartitionEnabled; 149 | uint64_t mode3PartitionEnabled; 150 | uint16_t mode0PartitionEnabled; 151 | uint64_t mode7RGBAPartitionEnabled; 152 | uint64_t mode7RGBPartitionEnabled; 153 | uint8_t mode4SP[4][2]; 154 | uint8_t mode5SP[4]; 155 | bool mode6Enabled; 156 | 157 | uint8_t seedPointsForShapeRGB[kNumRGBShapes]; 158 | uint8_t seedPointsForShapeRGBA[kNumRGBAShapes]; 159 | 160 | uint8_t rgbaShapeList[kNumRGBAShapes]; 161 | uint8_t rgbaNumShapesToEvaluate; 162 | 163 | uint8_t rgbShapeList[kNumRGBShapes]; 164 | uint8_t rgbNumShapesToEvaluate; 165 | 166 | BC7EncodingPlan() 167 | { 168 | for (int i = 0; i < kNumRGBShapes; i++) 169 | { 170 | this->rgbShapeList[i] = i; 171 | this->seedPointsForShapeRGB[i] = 4; 172 | } 173 | this->rgbNumShapesToEvaluate = kNumRGBShapes; 174 | 175 | for (int i = 0; i < kNumRGBAShapes; i++) 176 | { 177 | this->rgbaShapeList[i] = i; 178 | this->seedPointsForShapeRGBA[i] = 4; 179 | } 180 | this->rgbaNumShapesToEvaluate = kNumRGBAShapes; 181 | 182 | 183 | this->mode0PartitionEnabled = 0xffff; 184 | this->mode1PartitionEnabled = 0xffffffffffffffffULL; 185 | this->mode2PartitionEnabled = 0xffffffffffffffffULL; 186 | this->mode3PartitionEnabled = 0xffffffffffffffffULL; 187 | this->mode6Enabled = true; 188 | this->mode7RGBPartitionEnabled = 0xffffffffffffffffULL; 189 | this->mode7RGBAPartitionEnabled = 0xffffffffffffffffULL; 190 | 191 | for (int i = 0; i < 4; i++) 192 | { 193 | for (int j = 0; j < 2; j++) 194 | this->mode4SP[i][j] = 4; 195 | 196 | this->mode5SP[i] = 4; 197 | } 198 | } 199 | }; 200 | 201 | // RGBA input block for unsigned 8-bit formats 202 | struct PixelBlockU8 203 | { 204 | uint8_t m_pixels[16][4]; 205 | }; 206 | 207 | // RGBA input block for signed 8-bit formats 208 | struct PixelBlockS8 209 | { 210 | int8_t m_pixels[16][4]; 211 | }; 212 | 213 | struct PixelBlockScalarS16 214 | { 215 | int16_t m_pixels[16]; 216 | }; 217 | 218 | // RGBA input block for half-precision float formats (bit-cast to int16_t) 219 | struct PixelBlockF16 220 | { 221 | int16_t m_pixels[16][4]; 222 | }; 223 | 224 | class ETC2CompressionData 225 | { 226 | protected: 227 | ETC2CompressionData() {} 228 | }; 229 | 230 | class ETC1CompressionData 231 | { 232 | protected: 233 | ETC1CompressionData() {} 234 | }; 235 | 236 | namespace Kernels 237 | { 238 | typedef void* allocFunc_t(void *context, size_t size); 239 | typedef void freeFunc_t(void *context, void* ptr, size_t size); 240 | 241 | // NOTE: All functions accept and output NumParallelBlocks blocks at once 242 | void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); 243 | void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); 244 | void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); 245 | void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); 246 | void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options); 247 | void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); 248 | void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options); 249 | void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options); 250 | void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options); 251 | void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, const BC7EncodingPlan &encodingPlan); 252 | void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC1CompressionData *compressionData); 253 | void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC2CompressionData *compressionData); 254 | void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData); 255 | void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData); 256 | 257 | void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options); 258 | void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options); 259 | 260 | // Generates a BC7 encoding plan from a quality parameter that ranges from 1 (fastest) to 100 (best) 261 | void ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality); 262 | 263 | // Generates a BC7 encoding plan from fine-tuning parameters. 264 | bool ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams ¶ms); 265 | 266 | // ETC compression requires temporary storage that normally consumes a large amount of stack space. 267 | // To allocate and release it, use one of these functions. 268 | ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options); 269 | void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc); 270 | 271 | ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context); 272 | void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc); 273 | 274 | void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC); 275 | void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC); 276 | void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC); 277 | } 278 | } 279 | 280 | #endif 281 | -------------------------------------------------------------------------------- /ConvectionKernels.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.27130.2020 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ConvectionKernels", "ConvectionKernels.vcxproj", "{5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MakeTables", "MakeTables\MakeTables.csproj", "{867F8F36-10EA-4594-AA41-34BC5B74A65A}" 9 | EndProject 10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "etc2packer", "etc2packer\etc2packer.vcxproj", "{23B20484-6E2E-4102-8362-33A29A8D1933}" 11 | EndProject 12 | Global 13 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 14 | Debug|x64 = Debug|x64 15 | Release|x64 = Release|x64 16 | EndGlobalSection 17 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 18 | {5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}.Debug|x64.ActiveCfg = Debug|x64 19 | {5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}.Debug|x64.Build.0 = Debug|x64 20 | {5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}.Release|x64.ActiveCfg = Release|x64 21 | {5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47}.Release|x64.Build.0 = Release|x64 22 | {867F8F36-10EA-4594-AA41-34BC5B74A65A}.Debug|x64.ActiveCfg = Debug|Any CPU 23 | {867F8F36-10EA-4594-AA41-34BC5B74A65A}.Debug|x64.Build.0 = Debug|Any CPU 24 | {867F8F36-10EA-4594-AA41-34BC5B74A65A}.Release|x64.ActiveCfg = Release|Any CPU 25 | {867F8F36-10EA-4594-AA41-34BC5B74A65A}.Release|x64.Build.0 = Release|Any CPU 26 | {23B20484-6E2E-4102-8362-33A29A8D1933}.Debug|x64.ActiveCfg = Debug|x64 27 | {23B20484-6E2E-4102-8362-33A29A8D1933}.Debug|x64.Build.0 = Debug|x64 28 | {23B20484-6E2E-4102-8362-33A29A8D1933}.Release|x64.ActiveCfg = Release|x64 29 | {23B20484-6E2E-4102-8362-33A29A8D1933}.Release|x64.Build.0 = Release|x64 30 | EndGlobalSection 31 | GlobalSection(SolutionProperties) = preSolution 32 | HideSolutionNode = FALSE 33 | EndGlobalSection 34 | GlobalSection(ExtensibilityGlobals) = postSolution 35 | SolutionGuid = {66CE399E-5954-472F-9A80-D109E5F06A54} 36 | EndGlobalSection 37 | EndGlobal 38 | -------------------------------------------------------------------------------- /ConvectionKernels.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | 15.0 15 | {5E4F0557-B7D8-4D9B-9D3A-2B966C9C1B47} 16 | ConvectionKernels 17 | 10.0.16299.0 18 | 19 | 20 | 21 | StaticLibrary 22 | true 23 | v141 24 | MultiByte 25 | 26 | 27 | StaticLibrary 28 | false 29 | v141 30 | true 31 | MultiByte 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | Bin\Desktop_2017\$(Platform)\$(Configuration)\ 47 | Bin\Desktop_2017\$(Platform)\$(Configuration)\ 48 | 49 | 50 | Bin\Desktop_2017\$(Platform)\$(Configuration)\ 51 | Bin\Desktop_2017\$(Platform)\$(Configuration)\ 52 | 53 | 54 | 55 | Level3 56 | MaxSpeed 57 | true 58 | true 59 | true 60 | true 61 | 62 | 63 | true 64 | true 65 | 66 | 67 | 68 | 69 | Level3 70 | Disabled 71 | true 72 | true 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /ConvectionKernels.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | Source Files 29 | 30 | 31 | Source Files 32 | 33 | 34 | Source Files 35 | 36 | 37 | Source Files 38 | 39 | 40 | Source Files 41 | 42 | 43 | Source Files 44 | 45 | 46 | Source Files 47 | 48 | 49 | 50 | 51 | Header Files 52 | 53 | 54 | Header Files 55 | 56 | 57 | Header Files 58 | 59 | 60 | Header Files 61 | 62 | 63 | Header Files 64 | 65 | 66 | Header Files 67 | 68 | 69 | Header Files 70 | 71 | 72 | Header Files 73 | 74 | 75 | Header Files 76 | 77 | 78 | Header Files 79 | 80 | 81 | Header Files 82 | 83 | 84 | Header Files 85 | 86 | 87 | Header Files 88 | 89 | 90 | Header Files 91 | 92 | 93 | Header Files 94 | 95 | 96 | Header Files 97 | 98 | 99 | Header Files 100 | 101 | 102 | Header Files 103 | 104 | 105 | Header Files 106 | 107 | 108 | Header Files 109 | 110 | 111 | Header Files 112 | 113 | 114 | Header Files 115 | 116 | 117 | Header Files 118 | 119 | 120 | -------------------------------------------------------------------------------- /ConvectionKernels_API.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Convection Texture Tools 3 | Copyright (c) 2018-2019 Eric Lasota 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject 11 | to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included 14 | in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | */ 24 | #include "ConvectionKernels_Config.h" 25 | 26 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) 27 | 28 | #include 29 | #include "ConvectionKernels.h" 30 | #include "ConvectionKernels_Util.h" 31 | #include "ConvectionKernels_BC67.h" 32 | #include "ConvectionKernels_ETC.h" 33 | #include "ConvectionKernels_S3TC.h" 34 | 35 | #include 36 | 37 | namespace cvtt 38 | { 39 | namespace Kernels 40 | { 41 | void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, const BC7EncodingPlan &encodingPlan) 42 | { 43 | assert(pBlocks); 44 | assert(pBC); 45 | 46 | float channelWeights[4]; 47 | Util::FillWeights(options, channelWeights); 48 | 49 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 50 | { 51 | Internal::BC7Computer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, encodingPlan, options.refineRoundsBC7); 52 | pBC += ParallelMath::ParallelSize * 16; 53 | } 54 | } 55 | 56 | void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options) 57 | { 58 | assert(pBlocks); 59 | assert(pBC); 60 | 61 | float channelWeights[4]; 62 | Util::FillWeights(options, channelWeights); 63 | 64 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 65 | { 66 | Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, false, options.seedPoints, options.refineRoundsBC6H); 67 | pBC += ParallelMath::ParallelSize * 16; 68 | } 69 | } 70 | 71 | void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options) 72 | { 73 | assert(pBlocks); 74 | assert(pBC); 75 | 76 | float channelWeights[4]; 77 | Util::FillWeights(options, channelWeights); 78 | 79 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 80 | { 81 | Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, true, options.seedPoints, options.refineRoundsBC6H); 82 | pBC += ParallelMath::ParallelSize * 16; 83 | } 84 | } 85 | 86 | void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options) 87 | { 88 | assert(pBlocks); 89 | assert(pBC); 90 | 91 | float channelWeights[4]; 92 | Util::FillWeights(options, channelWeights); 93 | 94 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 95 | { 96 | Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC, 8, channelWeights, true, options.threshold, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); 97 | pBC += ParallelMath::ParallelSize * 8; 98 | } 99 | } 100 | 101 | void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) 102 | { 103 | assert(pBlocks); 104 | assert(pBC); 105 | 106 | float channelWeights[4]; 107 | Util::FillWeights(options, channelWeights); 108 | 109 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 110 | { 111 | Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); 112 | Internal::S3TCComputer::PackExplicitAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16); 113 | pBC += ParallelMath::ParallelSize * 16; 114 | } 115 | } 116 | 117 | void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) 118 | { 119 | assert(pBlocks); 120 | assert(pBC); 121 | 122 | float channelWeights[4]; 123 | Util::FillWeights(options, channelWeights); 124 | 125 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 126 | { 127 | Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); 128 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16, false, options.seedPoints, options.refineRoundsIIC); 129 | pBC += ParallelMath::ParallelSize * 16; 130 | } 131 | } 132 | 133 | void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) 134 | { 135 | assert(pBlocks); 136 | assert(pBC); 137 | 138 | float channelWeights[4]; 139 | Util::FillWeights(options, channelWeights); 140 | 141 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 142 | { 143 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 8, false, options.seedPoints, options.refineRoundsIIC); 144 | pBC += ParallelMath::ParallelSize * 8; 145 | } 146 | } 147 | 148 | void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options) 149 | { 150 | assert(pBlocks); 151 | assert(pBC); 152 | 153 | float channelWeights[4]; 154 | Util::FillWeights(options, channelWeights); 155 | 156 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 157 | { 158 | PixelBlockU8 inputBlocks[ParallelMath::ParallelSize]; 159 | Util::BiasSignedInput(inputBlocks, pBlocks + blockBase); 160 | 161 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 8, true, options.seedPoints, options.refineRoundsIIC); 162 | pBC += ParallelMath::ParallelSize * 8; 163 | } 164 | } 165 | 166 | void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) 167 | { 168 | assert(pBlocks); 169 | assert(pBC); 170 | 171 | float channelWeights[4]; 172 | Util::FillWeights(options, channelWeights); 173 | 174 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 175 | { 176 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 16, false, options.seedPoints, options.refineRoundsIIC); 177 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 1, pBC + 8, 16, false, options.seedPoints, options.refineRoundsIIC); 178 | pBC += ParallelMath::ParallelSize * 16; 179 | } 180 | } 181 | 182 | void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options) 183 | { 184 | assert(pBlocks); 185 | assert(pBC); 186 | 187 | float channelWeights[4]; 188 | Util::FillWeights(options, channelWeights); 189 | 190 | for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 191 | { 192 | PixelBlockU8 inputBlocks[ParallelMath::ParallelSize]; 193 | Util::BiasSignedInput(inputBlocks, pBlocks + blockBase); 194 | 195 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 16, true, options.seedPoints, options.refineRoundsIIC); 196 | Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 1, pBC + 8, 16, true, options.seedPoints, options.refineRoundsIIC); 197 | pBC += ParallelMath::ParallelSize * 16; 198 | } 199 | } 200 | 201 | void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC1CompressionData *compressionData) 202 | { 203 | assert(pBlocks); 204 | assert(pBC); 205 | 206 | float channelWeights[4]; 207 | Util::FillWeights(options, channelWeights); 208 | 209 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 210 | { 211 | Internal::ETCComputer::CompressETC1Block(pBC, pBlocks + blockBase, compressionData, options); 212 | pBC += ParallelMath::ParallelSize * 8; 213 | } 214 | } 215 | 216 | void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData) 217 | { 218 | assert(pBlocks); 219 | assert(pBC); 220 | 221 | float channelWeights[4]; 222 | Util::FillWeights(options, channelWeights); 223 | 224 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 225 | { 226 | Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, false); 227 | pBC += ParallelMath::ParallelSize * 8; 228 | } 229 | } 230 | 231 | void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData) 232 | { 233 | assert(pBlocks); 234 | assert(pBC); 235 | 236 | float channelWeights[4]; 237 | Util::FillWeights(options, channelWeights); 238 | 239 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 240 | { 241 | Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, true); 242 | pBC += ParallelMath::ParallelSize * 8; 243 | } 244 | } 245 | 246 | void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options) 247 | { 248 | assert(pBlocks); 249 | assert(pBC); 250 | 251 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 252 | { 253 | Internal::ETCComputer::CompressETC2AlphaBlock(pBC, pBlocks + blockBase, options); 254 | pBC += ParallelMath::ParallelSize * 8; 255 | } 256 | } 257 | 258 | void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options) 259 | { 260 | assert(pBlocks); 261 | assert(pBC); 262 | 263 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) 264 | { 265 | Internal::ETCComputer::CompressEACBlock(pBC, pBlocks + blockBase, isSigned, options); 266 | pBC += ParallelMath::ParallelSize * 8; 267 | } 268 | } 269 | 270 | void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData) 271 | { 272 | uint8_t alphaBlockData[cvtt::NumParallelBlocks * 8]; 273 | uint8_t colorBlockData[cvtt::NumParallelBlocks * 8]; 274 | 275 | EncodeETC2(colorBlockData, pBlocks, options, compressionData); 276 | EncodeETC2Alpha(alphaBlockData, pBlocks, options); 277 | 278 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) 279 | { 280 | for (size_t blockData = 0; blockData < 8; blockData++) 281 | pBC[blockBase * 16 + blockData] = alphaBlockData[blockBase * 8 + blockData]; 282 | 283 | for (size_t blockData = 0; blockData < 8; blockData++) 284 | pBC[blockBase * 16 + 8 + blockData] = colorBlockData[blockBase * 8 + blockData]; 285 | } 286 | } 287 | 288 | void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC) 289 | { 290 | assert(pBlocks); 291 | assert(pBC); 292 | 293 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) 294 | { 295 | Internal::BC7Computer::UnpackOne(pBlocks[blockBase], pBC); 296 | pBC += 16; 297 | } 298 | } 299 | 300 | void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC) 301 | { 302 | assert(pBlocks); 303 | assert(pBC); 304 | 305 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) 306 | { 307 | Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, false); 308 | pBC += 16; 309 | } 310 | } 311 | 312 | void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC) 313 | { 314 | assert(pBlocks); 315 | assert(pBC); 316 | 317 | for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) 318 | { 319 | Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, true); 320 | pBC += 16; 321 | } 322 | } 323 | 324 | ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context) 325 | { 326 | return cvtt::Internal::ETCComputer::AllocETC1Data(allocFunc, context); 327 | } 328 | 329 | void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc) 330 | { 331 | cvtt::Internal::ETCComputer::ReleaseETC1Data(compressionData, freeFunc); 332 | } 333 | 334 | ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options) 335 | { 336 | return cvtt::Internal::ETCComputer::AllocETC2Data(allocFunc, context, options); 337 | } 338 | 339 | void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc) 340 | { 341 | cvtt::Internal::ETCComputer::ReleaseETC2Data(compressionData, freeFunc); 342 | } 343 | } 344 | } 345 | 346 | #endif 347 | -------------------------------------------------------------------------------- /ConvectionKernels_AggregatedError.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef __CVTT_AGGREGATEDERROR_H__ 3 | #define __CVTT_AGGREGATEDERROR_H__ 4 | 5 | #include "ConvectionKernels_ParallelMath.h" 6 | 7 | namespace cvtt 8 | { 9 | namespace Internal 10 | { 11 | template 12 | class AggregatedError 13 | { 14 | public: 15 | typedef ParallelMath::UInt16 MUInt16; 16 | typedef ParallelMath::UInt31 MUInt31; 17 | typedef ParallelMath::Float MFloat; 18 | 19 | AggregatedError() 20 | { 21 | for (int ch = 0; ch < TVectorSize; ch++) 22 | m_errorUnweighted[ch] = ParallelMath::MakeUInt31(0); 23 | } 24 | 25 | void Add(const MUInt16 &channelErrorUnweighted, int ch) 26 | { 27 | m_errorUnweighted[ch] = m_errorUnweighted[ch] + ParallelMath::ToUInt31(channelErrorUnweighted); 28 | } 29 | 30 | MFloat Finalize(uint32_t flags, const float channelWeightsSq[TVectorSize]) const 31 | { 32 | if (flags & cvtt::Flags::Uniform) 33 | { 34 | MUInt31 total = m_errorUnweighted[0]; 35 | for (int ch = 1; ch < TVectorSize; ch++) 36 | total = total + m_errorUnweighted[ch]; 37 | return ParallelMath::ToFloat(total); 38 | } 39 | else 40 | { 41 | MFloat total = ParallelMath::ToFloat(m_errorUnweighted[0]) * channelWeightsSq[0]; 42 | for (int ch = 1; ch < TVectorSize; ch++) 43 | total = total + ParallelMath::ToFloat(m_errorUnweighted[ch]) * channelWeightsSq[ch]; 44 | return total; 45 | } 46 | } 47 | 48 | private: 49 | MUInt31 m_errorUnweighted[TVectorSize]; 50 | }; 51 | } 52 | } 53 | 54 | #endif 55 | 56 | -------------------------------------------------------------------------------- /ConvectionKernels_BC67.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ConvectionKernels_ParallelMath.h" 4 | 5 | 6 | namespace cvtt 7 | { 8 | namespace Tables 9 | { 10 | namespace BC7SC 11 | { 12 | struct Table; 13 | } 14 | } 15 | 16 | namespace Internal 17 | { 18 | namespace BC67 19 | { 20 | struct WorkInfo; 21 | } 22 | 23 | template 24 | class IndexSelectorHDR; 25 | } 26 | 27 | struct PixelBlockU8; 28 | } 29 | 30 | namespace cvtt 31 | { 32 | namespace Internal 33 | { 34 | class BC7Computer 35 | { 36 | public: 37 | static void Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds); 38 | static void UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock); 39 | 40 | private: 41 | static const int MaxTweakRounds = 4; 42 | 43 | typedef ParallelMath::SInt16 MSInt16; 44 | typedef ParallelMath::UInt15 MUInt15; 45 | typedef ParallelMath::UInt16 MUInt16; 46 | typedef ParallelMath::SInt32 MSInt32; 47 | typedef ParallelMath::Float MFloat; 48 | 49 | static void TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2]); 50 | static void Quantize(MUInt15* color, int bits, int channels); 51 | static void QuantizeP(MUInt15* color, int bits, uint16_t p, int channels); 52 | static void Unquantize(MUInt15* color, int bits, int channels); 53 | static void CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2]); 54 | static void CompressEndpoints1(MUInt15 ep[2][4], uint16_t p); 55 | static void CompressEndpoints2(MUInt15 ep[2][4]); 56 | static void CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2]); 57 | static void CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2]); 58 | static void CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2]); 59 | static void CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2]); 60 | static void CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2]); 61 | static void TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn); 62 | static void TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn); 63 | static void TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn); 64 | 65 | template 66 | static void Swap(T& a, T& b); 67 | }; 68 | 69 | 70 | class BC6HComputer 71 | { 72 | public: 73 | static void Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds); 74 | static void UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned); 75 | 76 | private: 77 | typedef ParallelMath::Float MFloat; 78 | typedef ParallelMath::SInt16 MSInt16; 79 | typedef ParallelMath::UInt16 MUInt16; 80 | typedef ParallelMath::UInt15 MUInt15; 81 | typedef ParallelMath::AInt16 MAInt16; 82 | typedef ParallelMath::SInt32 MSInt32; 83 | typedef ParallelMath::UInt31 MUInt31; 84 | 85 | static const int MaxTweakRounds = 4; 86 | static const int MaxRefineRounds = 3; 87 | 88 | static MSInt16 QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru); 89 | static MUInt15 QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru); 90 | static void UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL); 91 | static void UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished); 92 | static void QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn); 93 | static void QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn); 94 | static void EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal); 95 | static void EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal); 96 | static void SignExtendSingle(int &v, int bits); 97 | }; 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /ConvectionKernels_BC6H_IO.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "ConvectionKernels_BC6H_IO.h" 5 | 6 | namespace cvtt 7 | { 8 | namespace BC6H_IO 9 | { 10 | typedef void (*ReadFunc_t)(const uint32_t *encoded, uint16_t &d, uint16_t &rw, uint16_t &rx, uint16_t &ry, uint16_t &rz, uint16_t &gw, uint16_t &gx, uint16_t &gy, uint16_t &gz, uint16_t &bw, uint16_t &bx, uint16_t &by, uint16_t &bz); 11 | typedef void (*WriteFunc_t)(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz); 12 | 13 | extern const ReadFunc_t g_readFuncs[14]; 14 | extern const WriteFunc_t g_writeFuncs[14]; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /ConvectionKernels_BC7_Prio.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace cvtt { namespace Tables { namespace BC7Prio { 6 | extern const uint16_t *g_bc7PrioCodesRGB; 7 | extern const int g_bc7NumPrioCodesRGB; 8 | 9 | extern const uint16_t *g_bc7PrioCodesRGBA; 10 | extern const int g_bc7NumPrioCodesRGBA; 11 | 12 | int UnpackMode(uint16_t packed); 13 | int UnpackSeedPointCount(uint16_t packed); 14 | int UnpackPartition(uint16_t packed); 15 | int UnpackRotation(uint16_t packed); 16 | int UnpackIndexSelector(uint16_t packed); 17 | }}} 18 | -------------------------------------------------------------------------------- /ConvectionKernels_BCCommon.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Convection Texture Tools 3 | Copyright (c) 2018-2019 Eric Lasota 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject 11 | to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included 14 | in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | ------------------------------------------------------------------------------------- 25 | 26 | Portions based on DirectX Texture Library (DirectXTex) 27 | 28 | Copyright (c) Microsoft Corporation. All rights reserved. 29 | Licensed under the MIT License. 30 | 31 | http://go.microsoft.com/fwlink/?LinkId=248926 32 | */ 33 | #include "ConvectionKernels_Config.h" 34 | 35 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) 36 | 37 | #include "ConvectionKernels_BCCommon.h" 38 | 39 | int cvtt::Internal::BCCommon::TweakRoundsForRange(int range) 40 | { 41 | if (range == 3) 42 | return 3; 43 | return 4; 44 | } 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /ConvectionKernels_BCCommon.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef __CVTT_BCCOMMON_H__ 3 | #define __CVTT_BCCOMMON_H__ 4 | 5 | #include "ConvectionKernels_AggregatedError.h" 6 | #include "ConvectionKernels_ParallelMath.h" 7 | 8 | namespace cvtt 9 | { 10 | namespace Internal 11 | { 12 | class BCCommon 13 | { 14 | public: 15 | typedef ParallelMath::Float MFloat; 16 | typedef ParallelMath::UInt16 MUInt16; 17 | typedef ParallelMath::UInt15 MUInt15; 18 | typedef ParallelMath::AInt16 MAInt16; 19 | typedef ParallelMath::SInt16 MSInt16; 20 | typedef ParallelMath::SInt32 MSInt32; 21 | 22 | static int TweakRoundsForRange(int range); 23 | 24 | template 25 | static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, AggregatedError &aggError) 26 | { 27 | for (int ch = 0; ch < numRealChannels; ch++) 28 | aggError.Add(ParallelMath::SqDiffUInt8(reconstructed[ch], original[ch]), ch); 29 | } 30 | 31 | template 32 | static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], AggregatedError &aggError) 33 | { 34 | ComputeErrorLDR(flags, reconstructed, original, TVectorSize, aggError); 35 | } 36 | 37 | template 38 | static MFloat ComputeErrorLDRSimple(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, const float *channelWeightsSq) 39 | { 40 | AggregatedError aggError; 41 | ComputeErrorLDR(flags, reconstructed, original, numRealChannels, aggError); 42 | return aggError.Finalize(flags, channelWeightsSq); 43 | } 44 | 45 | template 46 | static MFloat ComputeErrorHDRFast(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize]) 47 | { 48 | MFloat error = ParallelMath::MakeFloatZero(); 49 | if (flags & Flags::Uniform) 50 | { 51 | for (int ch = 0; ch < TVectorSize; ch++) 52 | error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]); 53 | } 54 | else 55 | { 56 | for (int ch = 0; ch < TVectorSize; ch++) 57 | error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]); 58 | } 59 | 60 | return error; 61 | } 62 | 63 | template 64 | static MFloat ComputeErrorHDRSlow(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize]) 65 | { 66 | MFloat error = ParallelMath::MakeFloatZero(); 67 | if (flags & Flags::Uniform) 68 | { 69 | for (int ch = 0; ch < TVectorSize; ch++) 70 | error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]); 71 | } 72 | else 73 | { 74 | for (int ch = 0; ch < TVectorSize; ch++) 75 | error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]); 76 | } 77 | 78 | return error; 79 | } 80 | 81 | template 82 | static void PreWeightPixelsLDR(MFloat preWeightedPixels[16][TChannelCount], const MUInt15 pixels[16][TChannelCount], const float channelWeights[TChannelCount]) 83 | { 84 | for (int px = 0; px < 16; px++) 85 | { 86 | for (int ch = 0; ch < TChannelCount; ch++) 87 | preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch]; 88 | } 89 | } 90 | 91 | template 92 | static void PreWeightPixelsHDR(MFloat preWeightedPixels[16][TChannelCount], const MSInt16 pixels[16][TChannelCount], const float channelWeights[TChannelCount]) 93 | { 94 | for (int px = 0; px < 16; px++) 95 | { 96 | for (int ch = 0; ch < TChannelCount; ch++) 97 | preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch]; 98 | } 99 | } 100 | }; 101 | } 102 | } 103 | 104 | #endif 105 | -------------------------------------------------------------------------------- /ConvectionKernels_Config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef __CVTT_CONFIG_H__ 3 | #define __CVTT_CONFIG_H__ 4 | 5 | #if (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(_M_X64) || defined(__SSE2__) 6 | #define CVTT_USE_SSE2 7 | #endif 8 | 9 | // Define this to compile everything as a single source file 10 | //#define CVTT_SINGLE_FILE 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /ConvectionKernels_ETC.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef __CVTT_CONVECTIONKERNELS_ETC_H__ 3 | #define __CVTT_CONVECTIONKERNELS_ETC_H__ 4 | 5 | #include "ConvectionKernels.h" 6 | #include "ConvectionKernels_ParallelMath.h" 7 | 8 | namespace cvtt 9 | { 10 | struct Options; 11 | 12 | namespace Internal 13 | { 14 | class ETCComputer 15 | { 16 | public: 17 | static void CompressETC1Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC1CompressionData *compressionData, const Options &options); 18 | static void CompressETC2Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC2CompressionData *compressionData, const Options &options, bool punchthroughAlpha); 19 | static void CompressETC2AlphaBlock(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, const Options &options); 20 | static void CompressEACBlock(uint8_t *outputBuffer, const PixelBlockScalarS16 *inputBlocks, bool isSigned, const Options &options); 21 | 22 | static ETC2CompressionData *AllocETC2Data(cvtt::Kernels::allocFunc_t allocFunc, void *context, const cvtt::Options &options); 23 | static void ReleaseETC2Data(ETC2CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc); 24 | 25 | static ETC1CompressionData *AllocETC1Data(cvtt::Kernels::allocFunc_t allocFunc, void *context); 26 | static void ReleaseETC1Data(ETC1CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc); 27 | 28 | private: 29 | typedef ParallelMath::Float MFloat; 30 | typedef ParallelMath::SInt16 MSInt16; 31 | typedef ParallelMath::UInt15 MUInt15; 32 | typedef ParallelMath::UInt16 MUInt16; 33 | typedef ParallelMath::SInt32 MSInt32; 34 | typedef ParallelMath::UInt31 MUInt31; 35 | 36 | struct DifferentialResolveStorage 37 | { 38 | static const unsigned int MaxAttemptsPerSector = 57 + 81 + 81 + 81 + 81 + 81 + 81 + 81; 39 | 40 | MUInt15 diffNumAttempts[2]; 41 | MFloat diffErrors[2][MaxAttemptsPerSector]; 42 | MUInt16 diffSelectors[2][MaxAttemptsPerSector]; 43 | MUInt15 diffColors[2][MaxAttemptsPerSector]; 44 | MUInt15 diffTables[2][MaxAttemptsPerSector]; 45 | 46 | uint16_t attemptSortIndexes[2][MaxAttemptsPerSector]; 47 | }; 48 | 49 | struct HModeEval 50 | { 51 | MFloat errors[62][16]; 52 | MUInt16 signBits[62]; 53 | MUInt15 uniqueQuantizedColors[62]; 54 | MUInt15 numUniqueColors[2]; 55 | }; 56 | 57 | struct ETC1CompressionDataInternal : public cvtt::ETC1CompressionData 58 | { 59 | explicit ETC1CompressionDataInternal(void *context) 60 | : m_context(context) 61 | { 62 | } 63 | 64 | DifferentialResolveStorage m_drs; 65 | void *m_context; 66 | }; 67 | 68 | struct ETC2CompressionDataInternal : public cvtt::ETC2CompressionData 69 | { 70 | explicit ETC2CompressionDataInternal(void *context, const cvtt::Options &options); 71 | 72 | HModeEval m_h; 73 | DifferentialResolveStorage m_drs; 74 | 75 | void *m_context; 76 | float m_chromaSideAxis0[3]; 77 | float m_chromaSideAxis1[3]; 78 | }; 79 | 80 | static MFloat ComputeErrorUniform(const MUInt15 pixelA[3], const MUInt15 pixelB[3]); 81 | static MFloat ComputeErrorWeighted(const MUInt15 reconstructed[3], const MFloat pixelB[3], const Options options); 82 | static MFloat ComputeErrorFakeBT709(const MUInt15 reconstructed[3], const MFloat pixelB[3]); 83 | 84 | static void TestHalfBlock(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const MSInt16 modifiers[4], bool isDifferential, const Options &options); 85 | static void TestHalfBlockPunchthrough(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const ParallelMath::Int16CompFlag isTransparent[8], const MUInt15 modifier, const Options &options); 86 | static void FindBestDifferentialCombination(int flip, int d, const ParallelMath::Int16CompFlag canIgnoreSector[2], ParallelMath::Int16CompFlag& bestIsThisMode, MFloat& bestTotalError, MUInt15& bestFlip, MUInt15& bestD, MUInt15 bestColors[2], MUInt16 bestSelectors[2], MUInt15 bestTables[2], DifferentialResolveStorage &drs); 87 | 88 | static ParallelMath::Int16CompFlag ETCDifferentialIsLegalForChannel(const MUInt15 &a, const MUInt15 &b); 89 | static ParallelMath::Int16CompFlag ETCDifferentialIsLegal(const MUInt15 &a, const MUInt15 &b); 90 | static bool ETCDifferentialIsLegalForChannelScalar(const uint16_t &a, const uint16_t &b); 91 | static bool ETCDifferentialIsLegalScalar(const uint16_t &a, const uint16_t &b); 92 | 93 | static void EncodeTMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options); 94 | static void EncodeHMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag groupings[16], const MUInt15 pixels[16][3], HModeEval &he, const MFloat preWeightedPixels[16][3], const Options &options); 95 | 96 | static void EncodeVirtualTModePunchthrough(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], const ParallelMath::Int16CompFlag& anyTransparent, const ParallelMath::Int16CompFlag& allTransparent, const Options &options); 97 | 98 | static MUInt15 DecodePlanarCoeff(const MUInt15 &coeff, int ch); 99 | static void EncodePlanar(uint8_t *outputBuffer, MFloat &bestError, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options); 100 | 101 | static void CompressETC1BlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], DifferentialResolveStorage& compressionData, const Options &options, bool punchthrough); 102 | static void CompressETC1PunchthroughBlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], DifferentialResolveStorage& compressionData, const Options &options); 103 | static void CompressETC2AlphaBlockInternal(uint8_t *outputBuffer, const MUInt15 pixels[16], bool is11Bit, bool isSigned, const Options &options); 104 | 105 | static void ExtractBlocks(MUInt15 pixels[16][3], MFloat preWeightedPixels[16][3], const PixelBlockU8 *inputBlocks, const Options &options); 106 | 107 | static void ResolveHalfBlockFakeBT709RoundingAccurate(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential); 108 | static void ResolveHalfBlockFakeBT709RoundingFast(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential); 109 | static void ResolveTHFakeBT709Rounding(MUInt15 quantized[3], const MUInt15 target[3], const MUInt15 &granularity); 110 | static void ConvertToFakeBT709(MFloat yuv[3], const MUInt15 color[3]); 111 | static void ConvertToFakeBT709(MFloat yuv[3], const MFloat color[3]); 112 | static void ConvertToFakeBT709(MFloat yuv[3], const MFloat &r, const MFloat &g, const MFloat &b); 113 | static void ConvertFromFakeBT709(MFloat rgb[3], const MFloat yuv[3]); 114 | 115 | static void QuantizeETC2Alpha(int tableIndex, const MUInt15& value, const MUInt15& baseValue, const MUInt15& multiplier, bool is11Bit, bool isSigned, MUInt15& outIndexes, MUInt15& outQuantizedValues); 116 | 117 | static void EmitTModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 lineColor[3], const ParallelMath::ScalarUInt16 isolatedColor[3], int32_t packedSelectors, ParallelMath::ScalarUInt16 table, bool opaque); 118 | static void EmitHModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 blockColors[2], ParallelMath::ScalarUInt16 sectorBits, ParallelMath::ScalarUInt16 signBits, ParallelMath::ScalarUInt16 table, bool opaque); 119 | static void EmitETC1Block(uint8_t *outputBuffer, int blockBestFlip, int blockBestD, const int blockBestColors[2][3], const int blockBestTables[2], const ParallelMath::ScalarUInt16 blockBestSelectors[2], bool transparent); 120 | 121 | static const int g_flipTables[2][2][8]; 122 | }; 123 | } 124 | } 125 | 126 | #endif 127 | -------------------------------------------------------------------------------- /ConvectionKernels_ETC1.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace cvtt 4 | { 5 | namespace Tables 6 | { 7 | namespace ETC1 8 | { 9 | const int16_t g_potentialOffsets4[] = 10 | { 11 | 57, -64, -58, -54, -52, -48, -46, -44, -42, -40, -38, -36, -34, -32, -30, -28, -26, -24, -22, -20, -18, -16, -14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 52, 54, 58, 64, 12 | 81, -136, -124, -114, -112, -102, -100, -92, -90, -88, -80, -78, -76, -70, -68, -66, -64, -58, -56, -54, -52, -48, -46, -44, -42, -40, -36, -34, -32, -30, -26, -24, -22, -20, -18, -14, -12, -10, -8, -4, -2, 0, 2, 4, 8, 10, 12, 14, 18, 20, 22, 24, 26, 30, 32, 34, 36, 40, 42, 44, 46, 48, 52, 54, 56, 58, 64, 66, 68, 70, 76, 78, 80, 88, 90, 92, 100, 102, 112, 114, 124, 136, 13 | 81, -232, -212, -194, -192, -174, -172, -156, -154, -152, -136, -134, -132, -118, -116, -114, -112, -98, -96, -94, -92, -80, -78, -76, -74, -72, -60, -58, -56, -54, -42, -40, -38, -36, -34, -22, -20, -18, -16, -4, -2, 0, 2, 4, 16, 18, 20, 22, 34, 36, 38, 40, 42, 54, 56, 58, 60, 72, 74, 76, 78, 80, 92, 94, 96, 98, 112, 114, 116, 118, 132, 134, 136, 152, 154, 156, 172, 174, 192, 194, 212, 232, 14 | 81, -336, -307, -281, -278, -252, -249, -226, -223, -220, -197, -194, -191, -171, -168, -165, -162, -142, -139, -136, -133, -116, -113, -110, -107, -104, -87, -84, -81, -78, -61, -58, -55, -52, -49, -32, -29, -26, -23, -6, -3, 0, 3, 6, 23, 26, 29, 32, 49, 52, 55, 58, 61, 78, 81, 84, 87, 104, 107, 110, 113, 116, 133, 136, 139, 142, 162, 165, 168, 171, 191, 194, 197, 220, 223, 226, 249, 252, 278, 281, 307, 336, 15 | 81, -480, -438, -402, -396, -360, -354, -324, -318, -312, -282, -276, -270, -246, -240, -234, -228, -204, -198, -192, -186, -168, -162, -156, -150, -144, -126, -120, -114, -108, -90, -84, -78, -72, -66, -48, -42, -36, -30, -12, -6, 0, 6, 12, 30, 36, 42, 48, 66, 72, 78, 84, 90, 108, 114, 120, 126, 144, 150, 156, 162, 168, 186, 192, 198, 204, 228, 234, 240, 246, 270, 276, 282, 312, 318, 324, 354, 360, 396, 402, 438, 480, 16 | 81, -640, -584, -536, -528, -480, -472, -432, -424, -416, -376, -368, -360, -328, -320, -312, -304, -272, -264, -256, -248, -224, -216, -208, -200, -192, -168, -160, -152, -144, -120, -112, -104, -96, -88, -64, -56, -48, -40, -16, -8, 0, 8, 16, 40, 48, 56, 64, 88, 96, 104, 112, 120, 144, 152, 160, 168, 192, 200, 208, 216, 224, 248, 256, 264, 272, 304, 312, 320, 328, 360, 368, 376, 416, 424, 432, 472, 480, 528, 536, 584, 640, 17 | 81, -848, -775, -709, -702, -636, -629, -570, -563, -556, -497, -490, -483, -431, -424, -417, -410, -358, -351, -344, -337, -292, -285, -278, -271, -264, -219, -212, -205, -198, -153, -146, -139, -132, -125, -80, -73, -66, -59, -14, -7, 0, 7, 14, 59, 66, 73, 80, 125, 132, 139, 146, 153, 198, 205, 212, 219, 264, 271, 278, 285, 292, 337, 344, 351, 358, 410, 417, 424, 431, 483, 490, 497, 556, 563, 570, 629, 636, 702, 709, 775, 848, 18 | 81, -1464, -1328, -1234, -1192, -1098, -1056, -1004, -962, -920, -868, -826, -784, -774, -732, -690, -648, -638, -596, -554, -544, -512, -502, -460, -418, -408, -376, -366, -324, -314, -282, -272, -230, -188, -178, -146, -136, -94, -84, -52, -42, 0, 42, 52, 84, 94, 136, 146, 178, 188, 230, 272, 282, 314, 324, 366, 376, 408, 418, 460, 502, 512, 544, 554, 596, 638, 648, 690, 732, 774, 784, 826, 868, 920, 962, 1004, 1056, 1098, 1192, 1234, 1328, 1464 19 | }; 20 | 21 | const unsigned int g_maxPotentialOffsets = 81; 22 | 23 | const int16_t g_thModifierTable[8] = 24 | { 25 | 3, 6, 11, 16, 23, 32, 41, 64 26 | }; 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /ConvectionKernels_ETC2.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace cvtt 4 | { 5 | namespace Tables 6 | { 7 | namespace ETC2 8 | { 9 | const int16_t g_thModifierTable[8] = 10 | { 11 | 3, 6, 11, 16, 23, 32, 41, 64 12 | }; 13 | 14 | const int16_t g_alphaModifierTablePositive[16][4] = 15 | { 16 | { 2, 5, 8, 14, }, 17 | { 2, 6, 9, 12, }, 18 | { 1, 4, 7, 12, }, 19 | { 1, 3, 5, 12, }, 20 | { 2, 5, 7, 11, }, 21 | { 2, 6, 8, 10, }, 22 | { 3, 6, 7, 10, }, 23 | { 2, 4, 7, 10, }, 24 | { 1, 5, 7, 9, }, 25 | { 1, 4, 7, 9, }, 26 | { 1, 3, 7, 9, }, 27 | { 1, 4, 6, 9, }, 28 | { 2, 3, 6, 9, }, 29 | { 0, 1, 2, 9, }, 30 | { 3, 5, 7, 8, }, 31 | { 2, 4, 6, 8, }, 32 | }; 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /ConvectionKernels_ETC2_Rounding.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | // This file is generated by the MakeTables app. Do not edit this file manually. 5 | 6 | namespace cvtt { namespace Tables { namespace ETC2 { 7 | const int g_alphaRoundingTableWidth = 13; 8 | const uint8_t g_alphaRoundingTables[16][13] = 9 | { 10 | { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3 }, 11 | { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3 }, 12 | { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3 }, 13 | { 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 }, 14 | { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }, 15 | { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3 }, 16 | { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 }, 17 | { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3 }, 18 | { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 }, 19 | { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }, 20 | { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }, 21 | { 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3 }, 22 | { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3 }, 23 | { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3 }, 24 | { 0, 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3 }, 25 | { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3 }, 26 | }; 27 | }}} 28 | -------------------------------------------------------------------------------- /ConvectionKernels_EndpointRefiner.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef __CVTT_ENDPOINTREFINER_H__ 3 | #define __CVTT_ENDPOINTREFINER_H__ 4 | 5 | #include "ConvectionKernels_ParallelMath.h" 6 | 7 | namespace cvtt 8 | { 9 | namespace Internal 10 | { 11 | // Solve for a, b where v = a*t + b 12 | // This allows endpoints to be mapped to where T=0 and T=1 13 | // Least squares from totals: 14 | // a = (tv - t*v/w)/(tt - t*t/w) 15 | // b = (v - a*t)/w 16 | template 17 | class EndpointRefiner 18 | { 19 | public: 20 | typedef ParallelMath::Float MFloat; 21 | typedef ParallelMath::UInt16 MUInt16; 22 | typedef ParallelMath::UInt15 MUInt15; 23 | typedef ParallelMath::AInt16 MAInt16; 24 | typedef ParallelMath::SInt16 MSInt16; 25 | typedef ParallelMath::SInt32 MSInt32; 26 | 27 | MFloat m_tv[TVectorSize]; 28 | MFloat m_v[TVectorSize]; 29 | MFloat m_tt; 30 | MFloat m_t; 31 | MFloat m_w; 32 | int m_wu; 33 | 34 | float m_rcpMaxIndex; 35 | float m_channelWeights[TVectorSize]; 36 | float m_rcpChannelWeights[TVectorSize]; 37 | 38 | void Init(int indexRange, const float channelWeights[TVectorSize]) 39 | { 40 | for (int ch = 0; ch < TVectorSize; ch++) 41 | { 42 | m_tv[ch] = ParallelMath::MakeFloatZero(); 43 | m_v[ch] = ParallelMath::MakeFloatZero(); 44 | } 45 | m_tt = ParallelMath::MakeFloatZero(); 46 | m_t = ParallelMath::MakeFloatZero(); 47 | m_w = ParallelMath::MakeFloatZero(); 48 | 49 | m_rcpMaxIndex = 1.0f / static_cast(indexRange - 1); 50 | 51 | for (int ch = 0; ch < TVectorSize; ch++) 52 | { 53 | m_channelWeights[ch] = channelWeights[ch]; 54 | m_rcpChannelWeights[ch] = 1.0f; 55 | if (m_channelWeights[ch] != 0.0f) 56 | m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch]; 57 | } 58 | 59 | m_wu = 0; 60 | } 61 | 62 | void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight) 63 | { 64 | MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex; 65 | 66 | for (int ch = 0; ch < TVectorSize; ch++) 67 | { 68 | MFloat v = pwFloatPixel[ch] * weight; 69 | 70 | m_tv[ch] = m_tv[ch] + t * v; 71 | m_v[ch] = m_v[ch] + v; 72 | } 73 | m_tt = m_tt + weight * t * t; 74 | m_t = m_t + weight * t; 75 | m_w = m_w + weight; 76 | } 77 | 78 | void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels) 79 | { 80 | MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex; 81 | 82 | for (int ch = 0; ch < numRealChannels; ch++) 83 | { 84 | MFloat v = pwFloatPixel[ch]; 85 | 86 | m_tv[ch] = m_tv[ch] + t * v; 87 | m_v[ch] = m_v[ch] + v; 88 | } 89 | m_tt = m_tt + t * t; 90 | m_t = m_t + t; 91 | m_wu++; 92 | } 93 | 94 | void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index) 95 | { 96 | ContributeUnweightedPW(floatPixel, index, TVectorSize); 97 | } 98 | 99 | void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize]) 100 | { 101 | // a = (tv - t*v/w)/(tt - t*t/w) 102 | // b = (v - a*t)/w 103 | MFloat w = m_w + ParallelMath::MakeFloat(static_cast(m_wu)); 104 | 105 | ParallelMath::MakeSafeDenominator(w); 106 | MFloat wRcp = ParallelMath::Reciprocal(w); 107 | 108 | MFloat adenom = (m_tt * w - m_t * m_t) * wRcp; 109 | 110 | ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero()); 111 | ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f)); 112 | 113 | for (int ch = 0; ch < TVectorSize; ch++) 114 | { 115 | /* 116 | if (adenom == 0.0) 117 | p1 = p2 = er.v / er.w; 118 | else 119 | { 120 | float4 a = (er.tv - er.t*er.v / er.w) / adenom; 121 | float4 b = (er.v - a * er.t) / er.w; 122 | p1 = b; 123 | p2 = a + b; 124 | } 125 | */ 126 | 127 | MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom; 128 | MFloat b = (m_v[ch] - a * m_t) * wRcp; 129 | 130 | MFloat p1 = b; 131 | MFloat p2 = a + b; 132 | 133 | ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp)); 134 | ParallelMath::ConditionalSet(p2, adenomZero, p1); 135 | 136 | // Unweight 137 | float inverseWeight = m_rcpChannelWeights[ch]; 138 | 139 | endPoint[0][ch] = p1 * inverseWeight; 140 | endPoint[1][ch] = p2 * inverseWeight; 141 | } 142 | } 143 | 144 | void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode) 145 | { 146 | MFloat floatEndPoint[2][TVectorSize]; 147 | GetRefinedEndpoints(floatEndPoint); 148 | 149 | for (int epi = 0; epi < 2; epi++) 150 | for (int ch = 0; ch < TVectorSize; ch++) 151 | endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode); 152 | } 153 | 154 | void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode) 155 | { 156 | GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode); 157 | } 158 | 159 | void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode) 160 | { 161 | MFloat floatEndPoint[2][TVectorSize]; 162 | GetRefinedEndpoints(floatEndPoint); 163 | 164 | for (int epi = 0; epi < 2; epi++) 165 | { 166 | for (int ch = 0; ch < TVectorSize; ch++) 167 | { 168 | MFloat f = floatEndPoint[epi][ch]; 169 | if (isSigned) 170 | endPoint[epi][ch] = ParallelMath::LosslessCast::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode)); 171 | else 172 | endPoint[epi][ch] = ParallelMath::LosslessCast::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode)); 173 | } 174 | } 175 | } 176 | }; 177 | } 178 | } 179 | 180 | #endif 181 | 182 | -------------------------------------------------------------------------------- /ConvectionKernels_EndpointSelector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef __CVTT_ENDPOINTSELECTOR_H__ 3 | #define __CVTT_ENDPOINTSELECTOR_H__ 4 | 5 | #include "ConvectionKernels_ParallelMath.h" 6 | #include "ConvectionKernels_UnfinishedEndpoints.h" 7 | #include "ConvectionKernels_PackedCovarianceMatrix.h" 8 | 9 | namespace cvtt 10 | { 11 | namespace Internal 12 | { 13 | static const int NumEndpointSelectorPasses = 3; 14 | 15 | template 16 | class EndpointSelector 17 | { 18 | public: 19 | typedef ParallelMath::Float MFloat; 20 | 21 | EndpointSelector() 22 | { 23 | for (int ch = 0; ch < TVectorSize; ch++) 24 | { 25 | m_centroid[ch] = ParallelMath::MakeFloatZero(); 26 | m_direction[ch] = ParallelMath::MakeFloatZero(); 27 | } 28 | m_weightTotal = ParallelMath::MakeFloatZero(); 29 | m_minDist = ParallelMath::MakeFloat(FLT_MAX); 30 | m_maxDist = ParallelMath::MakeFloat(-FLT_MAX); 31 | } 32 | 33 | void ContributePass(const MFloat *value, int pass, const MFloat &weight) 34 | { 35 | if (pass == 0) 36 | ContributeCentroid(value, weight); 37 | else if (pass == 1) 38 | ContributeDirection(value, weight); 39 | else if (pass == 2) 40 | ContributeMinMax(value); 41 | } 42 | 43 | void FinishPass(int pass) 44 | { 45 | if (pass == 0) 46 | FinishCentroid(); 47 | else if (pass == 1) 48 | FinishDirection(); 49 | } 50 | 51 | UnfinishedEndpoints GetEndpoints(const float channelWeights[TVectorSize]) const 52 | { 53 | MFloat unweightedBase[TVectorSize]; 54 | MFloat unweightedOffset[TVectorSize]; 55 | 56 | for (int ch = 0; ch < TVectorSize; ch++) 57 | { 58 | MFloat min = m_centroid[ch] + m_direction[ch] * m_minDist; 59 | MFloat max = m_centroid[ch] + m_direction[ch] * m_maxDist; 60 | 61 | float safeWeight = channelWeights[ch]; 62 | if (safeWeight == 0.f) 63 | safeWeight = 1.0f; 64 | 65 | unweightedBase[ch] = min / channelWeights[ch]; 66 | unweightedOffset[ch] = (max - min) / channelWeights[ch]; 67 | } 68 | 69 | return UnfinishedEndpoints(unweightedBase, unweightedOffset); 70 | } 71 | 72 | private: 73 | void ContributeCentroid(const MFloat *value, const MFloat &weight) 74 | { 75 | for (int ch = 0; ch < TVectorSize; ch++) 76 | m_centroid[ch] = m_centroid[ch] + value[ch] * weight; 77 | m_weightTotal = m_weightTotal + weight; 78 | } 79 | 80 | void FinishCentroid() 81 | { 82 | MFloat denom = m_weightTotal; 83 | ParallelMath::MakeSafeDenominator(denom); 84 | 85 | for (int ch = 0; ch < TVectorSize; ch++) 86 | m_centroid[ch] = m_centroid[ch] / denom; 87 | } 88 | 89 | void ContributeDirection(const MFloat *value, const MFloat &weight) 90 | { 91 | MFloat diff[TVectorSize]; 92 | for (int ch = 0; ch < TVectorSize; ch++) 93 | diff[ch] = value[ch] - m_centroid[ch]; 94 | 95 | m_covarianceMatrix.Add(diff, weight); 96 | } 97 | 98 | void FinishDirection() 99 | { 100 | MFloat approx[TVectorSize]; 101 | for (int ch = 0; ch < TVectorSize; ch++) 102 | approx[ch] = ParallelMath::MakeFloat(1.0f); 103 | 104 | for (int i = 0; i < TIterationCount; i++) 105 | { 106 | MFloat product[TVectorSize]; 107 | m_covarianceMatrix.Product(product, approx); 108 | 109 | MFloat largestComponent = product[0]; 110 | for (int ch = 1; ch < TVectorSize; ch++) 111 | largestComponent = ParallelMath::Max(largestComponent, product[ch]); 112 | 113 | // product = largestComponent*newApprox 114 | ParallelMath::MakeSafeDenominator(largestComponent); 115 | for (int ch = 0; ch < TVectorSize; ch++) 116 | approx[ch] = product[ch] / largestComponent; 117 | } 118 | 119 | // Normalize 120 | MFloat approxLen = ParallelMath::MakeFloatZero(); 121 | for (int ch = 0; ch < TVectorSize; ch++) 122 | approxLen = approxLen + approx[ch] * approx[ch]; 123 | 124 | approxLen = ParallelMath::Sqrt(approxLen); 125 | 126 | ParallelMath::MakeSafeDenominator(approxLen); 127 | 128 | for (int ch = 0; ch < TVectorSize; ch++) 129 | m_direction[ch] = approx[ch] / approxLen; 130 | } 131 | 132 | void ContributeMinMax(const MFloat *value) 133 | { 134 | MFloat dist = ParallelMath::MakeFloatZero(); 135 | for (int ch = 0; ch < TVectorSize; ch++) 136 | dist = dist + m_direction[ch] * (value[ch] - m_centroid[ch]); 137 | 138 | m_minDist = ParallelMath::Min(m_minDist, dist); 139 | m_maxDist = ParallelMath::Max(m_maxDist, dist); 140 | } 141 | 142 | ParallelMath::Float m_centroid[TVectorSize]; 143 | ParallelMath::Float m_direction[TVectorSize]; 144 | PackedCovarianceMatrix m_covarianceMatrix; 145 | ParallelMath::Float m_weightTotal; 146 | 147 | ParallelMath::Float m_minDist; 148 | ParallelMath::Float m_maxDist; 149 | }; 150 | } 151 | } 152 | 153 | #endif 154 | -------------------------------------------------------------------------------- /ConvectionKernels_FakeBT709_Rounding.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | // This file is generated by the MakeTables app. Do not edit this file manually. 5 | 6 | namespace cvtt { namespace Tables { namespace FakeBT709 { 7 | const uint8_t g_rounding16[] = 8 | { 9 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 10 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 11 | 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 12 | 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 13 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 14 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 15 | 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 16 | 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 17 | 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 18 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 19 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 20 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 21 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 22 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 23 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 24 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 25 | 26 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 27 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 28 | 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 29 | 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 30 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 31 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 32 | 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 33 | 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 34 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 35 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 36 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 37 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 38 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 39 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 40 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 41 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 42 | 43 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 44 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 45 | 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 46 | 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 47 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 48 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 49 | 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 50 | 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 51 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 52 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 53 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 54 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 55 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 56 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 57 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 58 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 59 | 60 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 61 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 62 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 63 | 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 64 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 65 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 66 | 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 67 | 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 68 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 69 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 70 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 71 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 72 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 73 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 74 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 75 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 76 | 77 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 78 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 79 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 80 | 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 81 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 82 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 83 | 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 84 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 85 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 86 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 87 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 88 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 89 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 90 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 91 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 92 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 93 | 94 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 95 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 96 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 97 | 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 98 | 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 99 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 100 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 101 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 102 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 103 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 104 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 105 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 106 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 107 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 108 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 109 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 110 | 111 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 112 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 113 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 114 | 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 4, 4, 4, 4, 4, 115 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 116 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 117 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 118 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 119 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 120 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 121 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 122 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 123 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 124 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 125 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 126 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 127 | 128 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 129 | 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 130 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 131 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 132 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 133 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 134 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 135 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 136 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 6, 137 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 138 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 139 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 140 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 141 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 142 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 143 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 144 | 145 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 146 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 147 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 148 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 149 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 150 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 151 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 152 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 153 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 154 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 155 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 156 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 157 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 158 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 159 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 160 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, 161 | 162 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 163 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 164 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 165 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 166 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 167 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 168 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 169 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 170 | 1, 1, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 171 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 172 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 173 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 174 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 175 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 176 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 177 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 178 | 179 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 180 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 181 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 182 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 183 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 184 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 185 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 186 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 187 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 188 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 189 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 190 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 191 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 192 | 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 193 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 194 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 195 | 196 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 197 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 198 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 199 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 200 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 201 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 202 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 203 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 204 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 205 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 206 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 207 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 208 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 209 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 210 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 211 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 212 | 213 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 214 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 215 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 216 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 217 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 218 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 219 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 220 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 221 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 222 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 223 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 224 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 225 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 226 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 227 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 228 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 229 | 230 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 231 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 232 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 233 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 234 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 235 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 236 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 237 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 238 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 239 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 240 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 241 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 242 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 243 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 244 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 245 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 246 | 247 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 248 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 249 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 250 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 251 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 252 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 253 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 254 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 255 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 256 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 257 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 258 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 259 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 260 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 261 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 262 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 263 | 264 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 265 | 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 266 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 267 | 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 268 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 269 | 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 270 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 271 | 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 272 | 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 273 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 274 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 275 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 276 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 277 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 278 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 279 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 280 | 281 | }; 282 | }}} 283 | -------------------------------------------------------------------------------- /ConvectionKernels_IndexSelector.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Convection Texture Tools 3 | Copyright (c) 2018-2019 Eric Lasota 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject 11 | to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included 14 | in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | ------------------------------------------------------------------------------------- 25 | 26 | Portions based on DirectX Texture Library (DirectXTex) 27 | 28 | Copyright (c) Microsoft Corporation. All rights reserved. 29 | Licensed under the MIT License. 30 | 31 | http://go.microsoft.com/fwlink/?LinkId=248926 32 | */ 33 | #include "ConvectionKernels_Config.h" 34 | 35 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) 36 | 37 | #include "ConvectionKernels_IndexSelector.h" 38 | 39 | namespace cvtt 40 | { 41 | namespace Internal 42 | { 43 | const ParallelMath::UInt16 g_weightReciprocals[17] = 44 | { 45 | ParallelMath::MakeUInt16(0), // -1 46 | ParallelMath::MakeUInt16(0), // 0 47 | ParallelMath::MakeUInt16(32768), // 1 48 | ParallelMath::MakeUInt16(16384), // 2 49 | ParallelMath::MakeUInt16(10923), // 3 50 | ParallelMath::MakeUInt16(8192), // 4 51 | ParallelMath::MakeUInt16(6554), // 5 52 | ParallelMath::MakeUInt16(5461), // 6 53 | ParallelMath::MakeUInt16(4681), // 7 54 | ParallelMath::MakeUInt16(4096), // 8 55 | ParallelMath::MakeUInt16(3641), // 9 56 | ParallelMath::MakeUInt16(3277), // 10 57 | ParallelMath::MakeUInt16(2979), // 11 58 | ParallelMath::MakeUInt16(2731), // 12 59 | ParallelMath::MakeUInt16(2521), // 13 60 | ParallelMath::MakeUInt16(2341), // 14 61 | ParallelMath::MakeUInt16(2185), // 15 62 | }; 63 | } 64 | } 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /ConvectionKernels_IndexSelector.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef __CVTT_INDEXSELECTOR_H__ 3 | #define __CVTT_INDEXSELECTOR_H__ 4 | 5 | #include "ConvectionKernels_ParallelMath.h" 6 | 7 | namespace cvtt 8 | { 9 | namespace Internal 10 | { 11 | extern const ParallelMath::UInt16 g_weightReciprocals[17]; 12 | 13 | template 14 | class IndexSelector 15 | { 16 | public: 17 | typedef ParallelMath::Float MFloat; 18 | typedef ParallelMath::UInt16 MUInt16; 19 | typedef ParallelMath::UInt15 MUInt15; 20 | typedef ParallelMath::SInt16 MSInt16; 21 | typedef ParallelMath::AInt16 MAInt16; 22 | typedef ParallelMath::SInt32 MSInt32; 23 | typedef ParallelMath::UInt31 MUInt31; 24 | 25 | 26 | template 27 | void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range) 28 | { 29 | // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space. 30 | // We need to select indexes using the color-space endpoints. 31 | 32 | m_isUniform = true; 33 | for (int ch = 1; ch < TVectorSize; ch++) 34 | { 35 | if (channelWeights[ch] != channelWeights[0]) 36 | m_isUniform = false; 37 | } 38 | 39 | // To work with channel weights, we need something where: 40 | // pxDiff = px - ep[0] 41 | // epDiff = ep[1] - ep[0] 42 | // 43 | // weightedEPDiff = epDiff * channelWeights 44 | // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff) 45 | // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff) 46 | // index = normalizedIndex * maxValue 47 | // 48 | // Equivalent to: 49 | // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights) 50 | // index = dot(axis, pxDiff) 51 | 52 | for (int ep = 0; ep < 2; ep++) 53 | for (int ch = 0; ch < TVectorSize; ch++) 54 | m_endPoint[ep][ch] = ParallelMath::LosslessCast::Cast(interpolationEndPoints[ep][ch]); 55 | 56 | m_range = range; 57 | m_maxValue = static_cast(range - 1); 58 | 59 | MFloat epDiffWeighted[TVectorSize]; 60 | for (int ch = 0; ch < TVectorSize; ch++) 61 | { 62 | m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]); 63 | MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]); 64 | epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch]; 65 | } 66 | 67 | MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0]; 68 | for (int ch = 1; ch < TVectorSize; ch++) 69 | lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch]; 70 | 71 | ParallelMath::MakeSafeDenominator(lenSquared); 72 | 73 | MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared; 74 | 75 | for (int ch = 0; ch < TVectorSize; ch++) 76 | m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared; 77 | } 78 | 79 | template 80 | void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range) 81 | { 82 | MAInt16 converted[2][TVectorSize]; 83 | for (int epi = 0; epi < 2; epi++) 84 | for (int ch = 0; ch < TVectorSize; ch++) 85 | converted[epi][ch] = ParallelMath::LosslessCast::Cast(endPoints[epi][ch]); 86 | 87 | Init(channelWeights, endPoints, endPoints, range); 88 | } 89 | 90 | void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels) 91 | { 92 | MUInt15 weight = ParallelMath::LosslessCast::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); 93 | 94 | for (int ch = 0; ch < numRealChannels; ch++) 95 | { 96 | MUInt15 ep0f = ParallelMath::LosslessCast::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast::Cast(m_endPoint[0][ch]))); 97 | MUInt15 ep1f = ParallelMath::LosslessCast::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast::Cast(m_endPoint[1][ch]))); 98 | pixel[ch] = ParallelMath::LosslessCast::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6)); 99 | } 100 | } 101 | 102 | void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels) 103 | { 104 | MUInt15 weight = ParallelMath::LosslessCast::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7)); 105 | 106 | for (int ch = 0; ch < numRealChannels; ch++) 107 | { 108 | MUInt15 ep0f = ParallelMath::LosslessCast::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast::Cast(m_endPoint[0][ch]))); 109 | MUInt15 ep1f = ParallelMath::LosslessCast::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast::Cast(m_endPoint[1][ch]))); 110 | pixel[ch] = ParallelMath::LosslessCast::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8)); 111 | } 112 | } 113 | 114 | void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel) 115 | { 116 | ReconstructLDR_BC7(index, pixel, TVectorSize); 117 | } 118 | 119 | void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel) 120 | { 121 | ReconstructLDRPrecise(index, pixel, TVectorSize); 122 | } 123 | 124 | MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const 125 | { 126 | MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0]; 127 | for (int ch = 1; ch < TVectorSize; ch++) 128 | dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch]; 129 | 130 | return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn); 131 | } 132 | 133 | protected: 134 | MAInt16 m_endPoint[2][TVectorSize]; 135 | 136 | private: 137 | MFloat m_origin[TVectorSize]; 138 | MFloat m_axis[TVectorSize]; 139 | int m_range; 140 | float m_maxValue; 141 | bool m_isUniform; 142 | }; 143 | } 144 | } 145 | 146 | #endif 147 | 148 | -------------------------------------------------------------------------------- /ConvectionKernels_IndexSelectorHDR.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef __CVTT_INDEXSELECTORHDR_H__ 3 | #define __CVTT_INDEXSELECTORHDR_H__ 4 | 5 | #include "ConvectionKernels_ParallelMath.h" 6 | #include "ConvectionKernels_IndexSelector.h" 7 | 8 | namespace cvtt 9 | { 10 | namespace Internal 11 | { 12 | ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v); 13 | ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v); 14 | 15 | template 16 | class IndexSelectorHDR : public IndexSelector 17 | { 18 | public: 19 | typedef ParallelMath::UInt15 MUInt15; 20 | typedef ParallelMath::UInt16 MUInt16; 21 | typedef ParallelMath::UInt31 MUInt31; 22 | typedef ParallelMath::SInt16 MSInt16; 23 | typedef ParallelMath::SInt32 MSInt32; 24 | typedef ParallelMath::Float MFloat; 25 | 26 | private: 27 | 28 | MUInt15 InvertSingle(const MUInt15& anIndex) const 29 | { 30 | MUInt15 inverted = m_maxValueMinusOne - anIndex; 31 | return ParallelMath::Select(m_isInverted, inverted, anIndex); 32 | } 33 | 34 | void ReconstructHDRSignedUninverted(const MUInt15 &index, MSInt16* pixel) const 35 | { 36 | MUInt15 weight = ParallelMath::LosslessCast::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); 37 | 38 | for (int ch = 0; ch < TVectorSize; ch++) 39 | { 40 | MSInt16 ep0 = ParallelMath::LosslessCast::Cast(this->m_endPoint[0][ch]); 41 | MSInt16 ep1 = ParallelMath::LosslessCast::Cast(this->m_endPoint[1][ch]); 42 | 43 | MSInt32 pixel32 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1); 44 | 45 | pixel32 = ParallelMath::RightShift(pixel32 + ParallelMath::MakeSInt32(32), 6); 46 | 47 | pixel[ch] = UnscaleHDRValueSigned(ParallelMath::ToSInt16(pixel32)); 48 | } 49 | } 50 | 51 | void ReconstructHDRUnsignedUninverted(const MUInt15 &index, MSInt16* pixel) const 52 | { 53 | MUInt15 weight = ParallelMath::LosslessCast::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); 54 | 55 | for (int ch = 0; ch < TVectorSize; ch++) 56 | { 57 | MUInt16 ep0 = ParallelMath::LosslessCast::Cast(this->m_endPoint[0][ch]); 58 | MUInt16 ep1 = ParallelMath::LosslessCast::Cast(this->m_endPoint[1][ch]); 59 | 60 | MUInt31 pixel31 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1); 61 | 62 | pixel31 = ParallelMath::RightShift(pixel31 + ParallelMath::MakeUInt31(32), 6); 63 | 64 | pixel[ch] = ParallelMath::LosslessCast::Cast(UnscaleHDRValueUnsigned(ParallelMath::ToUInt16(pixel31))); 65 | } 66 | } 67 | 68 | MFloat ErrorForInterpolatorComponent(int index, int ch, const MFloat *pixel) const 69 | { 70 | MFloat diff = pixel[ch] - m_reconstructedInterpolators[index][ch]; 71 | return diff * diff; 72 | } 73 | 74 | MFloat ErrorForInterpolator(int index, const MFloat *pixel) const 75 | { 76 | MFloat error = ErrorForInterpolatorComponent(index, 0, pixel); 77 | for (int ch = 1; ch < TVectorSize; ch++) 78 | error = error + ErrorForInterpolatorComponent(index, ch, pixel); 79 | return error; 80 | } 81 | 82 | public: 83 | 84 | void InitHDR(int range, bool isSigned, bool fastIndexing, const float *channelWeights) 85 | { 86 | assert(range <= 16); 87 | 88 | m_range = range; 89 | 90 | m_isInverted = ParallelMath::MakeBoolInt16(false); 91 | m_maxValueMinusOne = ParallelMath::MakeUInt15(static_cast(range - 1)); 92 | 93 | if (!fastIndexing) 94 | { 95 | for (int i = 0; i < range; i++) 96 | { 97 | MSInt16 recon2CL[TVectorSize]; 98 | 99 | if (isSigned) 100 | ReconstructHDRSignedUninverted(ParallelMath::MakeUInt15(static_cast(i)), recon2CL); 101 | else 102 | ReconstructHDRUnsignedUninverted(ParallelMath::MakeUInt15(static_cast(i)), recon2CL); 103 | 104 | for (int ch = 0; ch < TVectorSize; ch++) 105 | m_reconstructedInterpolators[i][ch] = ParallelMath::TwosCLHalfToFloat(recon2CL[ch]) * channelWeights[ch]; 106 | } 107 | } 108 | } 109 | 110 | void ReconstructHDRSigned(const MUInt15 &index, MSInt16* pixel) const 111 | { 112 | ReconstructHDRSignedUninverted(InvertSingle(index), pixel); 113 | } 114 | 115 | void ReconstructHDRUnsigned(const MUInt15 &index, MSInt16* pixel) const 116 | { 117 | ReconstructHDRUnsignedUninverted(InvertSingle(index), pixel); 118 | } 119 | 120 | void ConditionalInvert(const ParallelMath::Int16CompFlag &invert) 121 | { 122 | m_isInverted = invert; 123 | } 124 | 125 | MUInt15 SelectIndexHDRSlow(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope*) const 126 | { 127 | MUInt15 index = ParallelMath::MakeUInt15(0); 128 | 129 | MFloat bestError = ErrorForInterpolator(0, pixel); 130 | for (int i = 1; i < m_range; i++) 131 | { 132 | MFloat error = ErrorForInterpolator(i, pixel); 133 | ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError); 134 | ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(static_cast(i))); 135 | bestError = ParallelMath::Min(bestError, error); 136 | } 137 | 138 | return InvertSingle(index); 139 | } 140 | 141 | MUInt15 SelectIndexHDRFast(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const 142 | { 143 | return InvertSingle(this->SelectIndexLDR(pixel, rtn)); 144 | } 145 | 146 | private: 147 | MFloat m_reconstructedInterpolators[16][TVectorSize]; 148 | ParallelMath::Int16CompFlag m_isInverted; 149 | MUInt15 m_maxValueMinusOne; 150 | int m_range; 151 | }; 152 | } 153 | } 154 | #endif 155 | 156 | -------------------------------------------------------------------------------- /ConvectionKernels_PackedCovarianceMatrix.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef __CVTT_COVARIANCEMATRIX_H__ 3 | #define __CVTT_COVARIANCEMATRIX_H__ 4 | 5 | namespace cvtt 6 | { 7 | namespace Internal 8 | { 9 | 10 | template 11 | class PackedCovarianceMatrix 12 | { 13 | public: 14 | // 0: xx, 15 | // 1: xy, yy 16 | // 3: xz, yz, zz 17 | // 6: xw, yw, zw, ww 18 | // ... etc. 19 | static const int PyramidSize = (TMatrixSize * (TMatrixSize + 1)) / 2; 20 | 21 | typedef ParallelMath::Float MFloat; 22 | 23 | PackedCovarianceMatrix() 24 | { 25 | for (int i = 0; i < PyramidSize; i++) 26 | m_values[i] = ParallelMath::MakeFloatZero(); 27 | } 28 | 29 | void Add(const ParallelMath::Float *vec, const ParallelMath::Float &weight) 30 | { 31 | int index = 0; 32 | for (int row = 0; row < TMatrixSize; row++) 33 | { 34 | for (int col = 0; col <= row; col++) 35 | { 36 | m_values[index] = m_values[index] + vec[row] * vec[col] * weight; 37 | index++; 38 | } 39 | } 40 | } 41 | 42 | void Product(MFloat *outVec, const MFloat *inVec) 43 | { 44 | for (int row = 0; row < TMatrixSize; row++) 45 | { 46 | MFloat sum = ParallelMath::MakeFloatZero(); 47 | 48 | int index = (row * (row + 1)) >> 1; 49 | for (int col = 0; col < TMatrixSize; col++) 50 | { 51 | sum = sum + inVec[col] * m_values[index]; 52 | if (col >= row) 53 | index += col + 1; 54 | else 55 | index++; 56 | } 57 | 58 | outVec[row] = sum; 59 | } 60 | } 61 | 62 | private: 63 | ParallelMath::Float m_values[PyramidSize]; 64 | }; 65 | } 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /ConvectionKernels_S3TC.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #ifndef __CVTT_S3TC_H__ 3 | #define __CVTT_S3TC_H__ 4 | 5 | #include "ConvectionKernels_ParallelMath.h" 6 | 7 | namespace cvtt 8 | { 9 | namespace Internal 10 | { 11 | template 12 | class EndpointRefiner; 13 | } 14 | 15 | struct PixelBlockU8; 16 | } 17 | 18 | namespace cvtt 19 | { 20 | namespace Internal 21 | { 22 | class S3TCComputer 23 | { 24 | public: 25 | typedef ParallelMath::Float MFloat; 26 | typedef ParallelMath::SInt16 MSInt16; 27 | typedef ParallelMath::UInt15 MUInt15; 28 | typedef ParallelMath::UInt16 MUInt16; 29 | typedef ParallelMath::SInt32 MSInt32; 30 | 31 | static void Init(MFloat& error); 32 | static void QuantizeTo6Bits(MUInt15& v); 33 | static void QuantizeTo5Bits(MUInt15& v); 34 | static void QuantizeTo565(MUInt15 endPoint[3]); 35 | static MFloat ParanoidFactorForSpan(const MSInt16& span); 36 | static MFloat ParanoidDiff(const MUInt15& a, const MUInt15& b, const MFloat& d); 37 | static void TestSingleColor(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], int range, const float* channelWeights, 38 | MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, const ParallelMath::RoundTowardNearestForScope *rtn); 39 | static void TestEndpoints(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], const MUInt15 unquantizedEndPoints[2][3], int range, const float* channelWeights, 40 | MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, EndpointRefiner<3> *refiner, const ParallelMath::RoundTowardNearestForScope *rtn); 41 | static void TestCounts(uint32_t flags, const int *counts, int nCounts, const MUInt15 &numElements, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], bool alphaTest, 42 | const MFloat floatSortedInputs[16][4], const MFloat preWeightedFloatSortedInputs[16][4], const float *channelWeights, MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, 43 | const ParallelMath::RoundTowardNearestForScope* rtn); 44 | static void PackExplicitAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride); 45 | static void PackInterpolatedAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride, bool isSigned, int maxTweakRounds, int numRefineRounds); 46 | static void PackRGB(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, size_t packedBlockStride, const float channelWeights[4], bool alphaTest, float alphaThreshold, bool exhaustive, int maxTweakRounds, int numRefineRounds); 47 | }; 48 | } 49 | } 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /ConvectionKernels_SingleFile.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Convection Texture Tools 3 | Copyright (c) 2018-2019 Eric Lasota 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject 11 | to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included 14 | in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | ------------------------------------------------------------------------------------- 25 | 26 | Portions based on DirectX Texture Library (DirectXTex) 27 | 28 | Copyright (c) Microsoft Corporation. All rights reserved. 29 | Licensed under the MIT License. 30 | 31 | http://go.microsoft.com/fwlink/?LinkId=248926 32 | */ 33 | #include "ConvectionKernels_Config.h" 34 | 35 | #if defined(CVTT_SINGLE_FILE) 36 | #define CVTT_SINGLE_FILE_IMPL 37 | 38 | #include "ConvectionKernels_API.cpp" 39 | #include "ConvectionKernels_BC67.cpp" 40 | #include "ConvectionKernels_BC6H_IO.cpp" 41 | #include "ConvectionKernels_BC7_PrioData.cpp" 42 | #include "ConvectionKernels_BCCommon.cpp" 43 | #include "ConvectionKernels_ETC.cpp" 44 | #include "ConvectionKernels_IndexSelector.cpp" 45 | #include "ConvectionKernels_S3TC.cpp" 46 | #include "ConvectionKernels_Util.cpp" 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /ConvectionKernels_UnfinishedEndpoints.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ConvectionKernels_Util.h" 4 | 5 | namespace cvtt 6 | { 7 | namespace Internal 8 | { 9 | template 10 | class UnfinishedEndpoints 11 | { 12 | public: 13 | typedef ParallelMath::Float MFloat; 14 | typedef ParallelMath::UInt16 MUInt16; 15 | typedef ParallelMath::UInt15 MUInt15; 16 | typedef ParallelMath::SInt16 MSInt16; 17 | typedef ParallelMath::SInt32 MSInt32; 18 | 19 | UnfinishedEndpoints() 20 | { 21 | } 22 | 23 | UnfinishedEndpoints(const MFloat *base, const MFloat *offset) 24 | { 25 | for (int ch = 0; ch < TVectorSize; ch++) 26 | m_base[ch] = base[ch]; 27 | for (int ch = 0; ch < TVectorSize; ch++) 28 | m_offset[ch] = offset[ch]; 29 | } 30 | 31 | UnfinishedEndpoints(const UnfinishedEndpoints& other) 32 | { 33 | for (int ch = 0; ch < TVectorSize; ch++) 34 | m_base[ch] = other.m_base[ch]; 35 | for (int ch = 0; ch < TVectorSize; ch++) 36 | m_offset[ch] = other.m_offset[ch]; 37 | } 38 | 39 | void FinishHDRUnsigned(int tweak, int range, MSInt16 *outEP0, MSInt16 *outEP1, ParallelMath::RoundTowardNearestForScope *roundingMode) 40 | { 41 | float tweakFactors[2]; 42 | Util::ComputeTweakFactors(tweak, range, tweakFactors); 43 | 44 | for (int ch = 0; ch < TVectorSize; ch++) 45 | { 46 | MUInt15 channelEPs[2]; 47 | for (int epi = 0; epi < 2; epi++) 48 | { 49 | MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], 0.0f, 31743.0f); 50 | channelEPs[epi] = ParallelMath::RoundAndConvertToU15(f, roundingMode); 51 | } 52 | 53 | outEP0[ch] = ParallelMath::LosslessCast::Cast(channelEPs[0]); 54 | outEP1[ch] = ParallelMath::LosslessCast::Cast(channelEPs[1]); 55 | } 56 | } 57 | 58 | void FinishHDRSigned(int tweak, int range, MSInt16* outEP0, MSInt16* outEP1, ParallelMath::RoundTowardNearestForScope* roundingMode) 59 | { 60 | float tweakFactors[2]; 61 | Util::ComputeTweakFactors(tweak, range, tweakFactors); 62 | 63 | for (int ch = 0; ch < TVectorSize; ch++) 64 | { 65 | MSInt16 channelEPs[2]; 66 | for (int epi = 0; epi < 2; epi++) 67 | { 68 | MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], -31743.0f, 31743.0f); 69 | channelEPs[epi] = ParallelMath::RoundAndConvertToS16(f, roundingMode); 70 | } 71 | 72 | outEP0[ch] = channelEPs[0]; 73 | outEP1[ch] = channelEPs[1]; 74 | } 75 | } 76 | 77 | void FinishLDR(int tweak, int range, MUInt15* outEP0, MUInt15* outEP1) 78 | { 79 | ParallelMath::RoundTowardNearestForScope roundingMode; 80 | 81 | float tweakFactors[2]; 82 | Util::ComputeTweakFactors(tweak, range, tweakFactors); 83 | 84 | for (int ch = 0; ch < TVectorSize; ch++) 85 | { 86 | MFloat ep0f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[0], 0.0f, 255.0f); 87 | MFloat ep1f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[1], 0.0f, 255.0f); 88 | outEP0[ch] = ParallelMath::RoundAndConvertToU15(ep0f, &roundingMode); 89 | outEP1[ch] = ParallelMath::RoundAndConvertToU15(ep1f, &roundingMode); 90 | } 91 | } 92 | 93 | template 94 | UnfinishedEndpoints ExpandTo(float filler) 95 | { 96 | MFloat newBase[TNewVectorSize]; 97 | MFloat newOffset[TNewVectorSize]; 98 | 99 | for (int ch = 0; ch < TNewVectorSize && ch < TVectorSize; ch++) 100 | { 101 | newBase[ch] = m_base[ch]; 102 | newOffset[ch] = m_offset[ch]; 103 | } 104 | 105 | MFloat fillerV = ParallelMath::MakeFloat(filler); 106 | 107 | for (int ch = TVectorSize; ch < TNewVectorSize; ch++) 108 | { 109 | newBase[ch] = fillerV; 110 | newOffset[ch] = ParallelMath::MakeFloatZero(); 111 | } 112 | 113 | return UnfinishedEndpoints(newBase, newOffset); 114 | } 115 | 116 | private: 117 | MFloat m_base[TVectorSize]; 118 | MFloat m_offset[TVectorSize]; 119 | }; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /ConvectionKernels_Util.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Convection Texture Tools 3 | Copyright (c) 2018-2019 Eric Lasota 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject 11 | to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included 14 | in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | ------------------------------------------------------------------------------------- 25 | 26 | Portions based on DirectX Texture Library (DirectXTex) 27 | 28 | Copyright (c) Microsoft Corporation. All rights reserved. 29 | Licensed under the MIT License. 30 | 31 | http://go.microsoft.com/fwlink/?LinkId=248926 32 | */ 33 | #include "ConvectionKernels_Config.h" 34 | 35 | #if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) 36 | 37 | #include "ConvectionKernels.h" 38 | #include "ConvectionKernels_ParallelMath.h" 39 | 40 | #include 41 | 42 | namespace cvtt 43 | { 44 | namespace Util 45 | { 46 | // Signed input blocks are converted into unsigned space, with the maximum value being 254 47 | void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize]) 48 | { 49 | for (size_t block = 0; block < ParallelMath::ParallelSize; block++) 50 | { 51 | const PixelBlockS8& inputSignedBlock = inputSigned[block]; 52 | PixelBlockU8& inputNormalizedBlock = inputNormalized[block]; 53 | 54 | for (size_t px = 0; px < 16; px++) 55 | { 56 | for (size_t ch = 0; ch < 4; ch++) 57 | inputNormalizedBlock.m_pixels[px][ch] = static_cast(std::max(inputSignedBlock.m_pixels[px][ch], -127) + 127); 58 | } 59 | } 60 | } 61 | 62 | void FillWeights(const Options &options, float channelWeights[4]) 63 | { 64 | if (options.flags & Flags::Uniform) 65 | channelWeights[0] = channelWeights[1] = channelWeights[2] = channelWeights[3] = 1.0f; 66 | else 67 | { 68 | channelWeights[0] = options.redWeight; 69 | channelWeights[1] = options.greenWeight; 70 | channelWeights[2] = options.blueWeight; 71 | channelWeights[3] = options.alphaWeight; 72 | } 73 | } 74 | 75 | void ComputeTweakFactors(int tweak, int range, float *outFactors) 76 | { 77 | int totalUnits = range - 1; 78 | int minOutsideUnits = ((tweak >> 1) & 1); 79 | int maxOutsideUnits = (tweak & 1); 80 | int insideUnits = totalUnits - minOutsideUnits - maxOutsideUnits; 81 | 82 | outFactors[0] = -static_cast(minOutsideUnits) / static_cast(insideUnits); 83 | outFactors[1] = static_cast(maxOutsideUnits) / static_cast(insideUnits) + 1.0f; 84 | } 85 | } 86 | } 87 | 88 | #endif 89 | -------------------------------------------------------------------------------- /ConvectionKernels_Util.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ConvectionKernels_ParallelMath.h" 4 | 5 | namespace cvtt 6 | { 7 | struct PixelBlockU8; 8 | struct PixelBlockS8; 9 | struct Options; 10 | } 11 | 12 | namespace cvtt 13 | { 14 | namespace Util 15 | { 16 | // Signed input blocks are converted into unsigned space, with the maximum value being 254 17 | void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize]); 18 | void FillWeights(const Options &options, float channelWeights[4]); 19 | void ComputeTweakFactors(int tweak, int range, float *outFactors); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Convection Texture Tools Stand-Alone Kernels 2 | 3 | Copyright (c) 2018 Eric Lasota 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject 11 | to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included 14 | in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | ************************************************************************** 25 | 26 | Based on DirectX Texture Library 27 | 28 | Copyright (c) 2018 Microsoft Corp 29 | 30 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 31 | software and associated documentation files (the "Software"), to deal in the Software 32 | without restriction, including without limitation the rights to use, copy, modify, 33 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 34 | permit persons to whom the Software is furnished to do so, subject to the following 35 | conditions: 36 | 37 | The above copyright notice and this permission notice shall be included in all copies 38 | or substantial portions of the Software. 39 | 40 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 41 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 42 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 43 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 44 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 45 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /MakeTables/App.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /MakeTables/MakeTables.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {867F8F36-10EA-4594-AA41-34BC5B74A65A} 8 | Exe 9 | MakeTables 10 | MakeTables 11 | v4.6.2 12 | 512 13 | true 14 | 15 | 16 | AnyCPU 17 | true 18 | full 19 | false 20 | bin\Debug\ 21 | DEBUG;TRACE 22 | prompt 23 | 4 24 | 25 | 26 | AnyCPU 27 | pdbonly 28 | true 29 | bin\Release\ 30 | TRACE 31 | prompt 32 | 4 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /MakeTables/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | 5 | namespace MakeTables 6 | { 7 | class Program 8 | { 9 | static int BitExpand(int v, int bits) 10 | { 11 | v <<= (8 - bits); 12 | return (v | (v >> bits)); 13 | } 14 | 15 | static int BitExpandP(int v, int bits, int parityBit) 16 | { 17 | v <<= (8 - bits); 18 | v |= (parityBit << (7 - bits)); 19 | v |= (v >> (bits + 1)); 20 | return v; 21 | } 22 | 23 | static int[] aWeight2 = { 0, 21, 43, 64 }; 24 | static int[] aWeight3 = { 0, 9, 18, 27, 37, 46, 55, 64 }; 25 | static int[] aWeight4 = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; 26 | 27 | static void EmitTableBC7(StreamWriter w, int bits, int parityBits, int parityBitMin, int parityBitMax, int targetIndex, int maxIndex, string name) 28 | { 29 | int parityBitsCombined = parityBitMin; 30 | if (parityBits == 2) 31 | parityBitsCombined += (parityBitMax << 1); 32 | 33 | w.WriteLine("Table " + name + "="); 34 | w.WriteLine("{"); 35 | w.WriteLine(" " + targetIndex + ","); 36 | w.WriteLine(" " + parityBitsCombined + ","); 37 | w.WriteLine(" {"); 38 | 39 | int epRange = 1 << bits; 40 | 41 | for (int i = 0; i < 256; i++) 42 | { 43 | if (i % 8 == 0) 44 | w.Write(" "); 45 | 46 | double bestError = double.MaxValue; 47 | int bestMin = 0; 48 | int bestMax = 0; 49 | int bestActualColor = 0; 50 | 51 | int[] weightTable = null; 52 | if (maxIndex == 3) 53 | weightTable = aWeight2; 54 | else if (maxIndex == 7) 55 | weightTable = aWeight3; 56 | else if (maxIndex == 15) 57 | weightTable = aWeight4; 58 | 59 | for (int min = 0; min < epRange; min++) 60 | { 61 | int minExpanded = (parityBits != 0) ? BitExpandP(min, bits, parityBitMin) : BitExpand(min, bits); 62 | 63 | for (int max = 0; max < epRange; max++) 64 | { 65 | int maxExpanded = (parityBits != 0) ? BitExpandP(max, bits, parityBitMax) : BitExpand(max, bits); 66 | 67 | int interpolated = (((64 - weightTable[targetIndex]) * minExpanded + weightTable[targetIndex] * maxExpanded + 32) >> 6); 68 | 69 | double delta = interpolated - i; 70 | 71 | double error = delta * delta; 72 | 73 | if (error < bestError) 74 | { 75 | bestError = error; 76 | bestActualColor = interpolated; 77 | bestMin = minExpanded; 78 | bestMax = maxExpanded; 79 | } 80 | } 81 | } 82 | 83 | w.Write("{ " + bestMin.ToString() + ", " + bestMax.ToString() + ", " + bestActualColor.ToString() + " },"); 84 | if (i % 8 == 7) 85 | w.WriteLine(); 86 | else 87 | w.Write(" "); 88 | } 89 | 90 | w.WriteLine(" }"); 91 | w.WriteLine("};"); 92 | w.WriteLine(); 93 | } 94 | 95 | static void EmitTable(StreamWriter w, int bits, int maxIndex, double paranoia, string name) 96 | { 97 | w.WriteLine("TableEntry " + name + "[256] ="); 98 | w.WriteLine("{"); 99 | 100 | int epRange = 1 << bits; 101 | 102 | for (int i = 0; i < 256; i++) 103 | { 104 | if (i % 8 == 0) 105 | w.Write(" "); 106 | 107 | double bestError = double.MaxValue; 108 | int bestSpan = 255; 109 | int bestMin = 0; 110 | int bestMax = 0; 111 | int bestActualColor = 0; 112 | 113 | for (int min = 0; min < epRange; min++) 114 | { 115 | int minExpanded = BitExpand(min, bits); 116 | 117 | for (int max = 0; max < epRange; max++) 118 | { 119 | int maxExpanded = BitExpand(max, bits); 120 | 121 | int interpolated = (minExpanded * (maxIndex - 1) + maxExpanded) / maxIndex; 122 | int epSpan = Math.Abs(minExpanded - maxExpanded); 123 | 124 | double delta = Math.Abs(interpolated - i) + epSpan * paranoia; 125 | 126 | double error = delta * delta; 127 | 128 | if (error < bestError || (error == bestError && epSpan < bestSpan)) 129 | { 130 | bestError = error; 131 | bestSpan = epSpan; 132 | bestActualColor = interpolated; 133 | bestMin = minExpanded; 134 | bestMax = maxExpanded; 135 | } 136 | } 137 | } 138 | 139 | w.Write("{ " + bestMin.ToString() + ", " + bestMax.ToString() + ", " + bestActualColor.ToString() + ", " + bestSpan.ToString() + " },"); 140 | if (i % 8 == 7) 141 | w.WriteLine(); 142 | else 143 | w.Write(" "); 144 | } 145 | 146 | w.WriteLine("};"); 147 | w.WriteLine(); 148 | } 149 | 150 | static void MakeETC2AlphaRoundingTables(string path) 151 | { 152 | int numRounders = 13; 153 | 154 | int[] etc2alphatable = 155 | { 156 | 2, 5, 8, 14, 157 | 2, 6, 9, 12, 158 | 1, 4, 7, 12, 159 | 1, 3, 5, 12, 160 | 2, 5, 7, 11, 161 | 2, 6, 8, 10, 162 | 3, 6, 7, 10, 163 | 2, 4, 7, 10, 164 | 1, 5, 7, 9, 165 | 1, 4, 7, 9, 166 | 1, 3, 7, 9, 167 | 1, 4, 6, 9, 168 | 2, 3, 6, 9, 169 | 0, 1, 2, 9, 170 | 3, 5, 7, 8, 171 | 2, 4, 6, 8, 172 | }; 173 | 174 | using (StreamWriter w = new StreamWriter(path)) 175 | { 176 | w.WriteLine("#pragma once"); 177 | w.WriteLine("#include "); 178 | w.WriteLine(); 179 | w.WriteLine("// This file is generated by the MakeTables app. Do not edit this file manually."); 180 | w.WriteLine(); 181 | 182 | w.WriteLine("namespace cvtt { namespace Tables { namespace ETC2 {"); 183 | w.WriteLine(" const int g_alphaRoundingTableWidth = " + numRounders.ToString() + ";"); 184 | w.WriteLine(" const uint8_t g_alphaRoundingTables[16][" + numRounders.ToString() + "] ="); 185 | w.WriteLine(" {"); 186 | 187 | for (int table = 0; table < 16; table++) 188 | { 189 | w.Write(" { "); 190 | 191 | int baseIndex = table * 4; 192 | for (int rounder = 0; rounder < numRounders; rounder++) 193 | { 194 | int bestIndex = 0; 195 | int bestDistance = 9999; 196 | 197 | for (int index = 0; index < 4; index++) 198 | { 199 | int absDiff = Math.Abs(rounder - etc2alphatable[baseIndex + index]); 200 | if (absDiff < bestDistance) 201 | { 202 | bestDistance = absDiff; 203 | bestIndex = index; 204 | } 205 | } 206 | 207 | if (rounder != 0) 208 | w.Write(", "); 209 | 210 | w.Write(bestIndex.ToString()); 211 | } 212 | 213 | w.WriteLine(" },"); 214 | } 215 | 216 | w.WriteLine(" };"); 217 | w.WriteLine("}}}"); 218 | } 219 | } 220 | 221 | static void ConvertToFakeBT709(out double y, out double u, out double v, double pr, double pg, double pb) 222 | { 223 | double r = pr; 224 | double g = pg; 225 | double b = pb; 226 | 227 | y = r * 0.368233989135369 + g * 1.23876274963149 + b * 0.125054068802017; 228 | u = r * 0.5f - g * 0.4541529 - b * 0.04584709; 229 | v = r * -0.081014709086133 - g * 0.272538676238785 + b * 0.353553390593274; 230 | } 231 | 232 | static void EmitFakeBT709RoundingTable(StreamWriter sw, int tableResolution) 233 | { 234 | for (int r = 0; r < tableResolution; r++) 235 | { 236 | for (int g = 0; g < tableResolution; g++) 237 | { 238 | sw.Write(" "); 239 | 240 | for (int b = 0; b < tableResolution; b++) 241 | { 242 | double y, u, v; 243 | 244 | ConvertToFakeBT709(out y, out u, out v, r, g, b); 245 | 246 | double bestDiff = double.MaxValue; 247 | int bestOctant = 0; 248 | for (int compareOctant = 0; compareOctant < 8; compareOctant++) 249 | { 250 | double or = ((compareOctant & 1) == 0) ? 0 : tableResolution; 251 | double og = ((compareOctant & 2) == 0) ? 0 : tableResolution; 252 | double ob = ((compareOctant & 4) == 0) ? 0 : tableResolution; 253 | 254 | double oy, ou, ov; 255 | ConvertToFakeBT709(out oy, out ou, out ov, or, og, ob); 256 | 257 | double dy = oy - y; 258 | double du = ou - u; 259 | double dv = ov - v; 260 | 261 | double error = dy * dy + du * du + dv * dv; 262 | if (error < bestDiff) 263 | { 264 | bestDiff = error; 265 | bestOctant = compareOctant; 266 | } 267 | } 268 | 269 | sw.Write(bestOctant); 270 | sw.Write(", "); 271 | } 272 | sw.WriteLine(); 273 | } 274 | sw.WriteLine(); 275 | } 276 | } 277 | 278 | static void MakeFakeBT709RoundingTables(string path) 279 | { 280 | using (StreamWriter w = new StreamWriter(path)) 281 | { 282 | w.WriteLine("#pragma once"); 283 | w.WriteLine("#include "); 284 | w.WriteLine(); 285 | w.WriteLine("// This file is generated by the MakeTables app. Do not edit this file manually."); 286 | w.WriteLine(); 287 | 288 | w.WriteLine("namespace cvtt { namespace Tables { namespace FakeBT709 {"); 289 | 290 | for (int r = 16; r <= 16; r++) 291 | { 292 | w.WriteLine(" const uint8_t g_rounding" + r.ToString() + "[] ="); 293 | w.WriteLine(" {"); 294 | 295 | EmitFakeBT709RoundingTable(w, r); 296 | 297 | w.WriteLine(" };"); 298 | } 299 | w.WriteLine("}}}"); 300 | } 301 | } 302 | 303 | static void Main(string[] args) 304 | { 305 | string[] filenames = { "ConvectionKernels_BC7_SingleColor.h", "ConvectionKernels_S3TC_SingleColor.h" }; 306 | 307 | for (int i = 0; i < 2; i++) 308 | { 309 | using (StreamWriter w = new StreamWriter(filenames[i])) 310 | { 311 | bool bc7 = (i == 0); 312 | 313 | w.WriteLine("#pragma once"); 314 | w.WriteLine("#include "); 315 | w.WriteLine(); 316 | w.WriteLine("// This file is generated by the MakeTables app. Do not edit this file manually."); 317 | w.WriteLine(); 318 | 319 | if (bc7) 320 | w.WriteLine("namespace cvtt { namespace Tables { namespace BC7SC {"); 321 | else 322 | w.WriteLine("namespace cvtt { namespace Tables { namespace S3TCSC {"); 323 | 324 | w.WriteLine(); 325 | w.WriteLine("struct TableEntry"); 326 | w.WriteLine("{"); 327 | w.WriteLine(" uint8_t m_min;"); 328 | w.WriteLine(" uint8_t m_max;"); 329 | w.WriteLine(" uint8_t m_actualColor;"); 330 | if (!bc7) 331 | w.WriteLine(" uint8_t m_span;"); 332 | w.WriteLine("};"); 333 | w.WriteLine(); 334 | 335 | if (bc7) 336 | { 337 | w.WriteLine("struct Table"); 338 | w.WriteLine("{"); 339 | w.WriteLine(" uint8_t m_index;"); 340 | if (bc7) 341 | w.WriteLine(" uint8_t m_pBits;"); 342 | w.WriteLine(" TableEntry m_entries[256];"); 343 | w.WriteLine("};"); 344 | w.WriteLine(); 345 | 346 | // Mode 0: 5-bit endpoints, 2 P-bits, 3-bit indexes 347 | EmitTableBC7(w, 4, 2, 0, 0, 1, 7, "g_mode0_p00_i1"); 348 | EmitTableBC7(w, 4, 2, 0, 0, 2, 7, "g_mode0_p00_i2"); 349 | EmitTableBC7(w, 4, 2, 0, 0, 3, 7, "g_mode0_p00_i3"); 350 | EmitTableBC7(w, 4, 2, 0, 1, 1, 7, "g_mode0_p01_i1"); 351 | EmitTableBC7(w, 4, 2, 0, 1, 2, 7, "g_mode0_p01_i2"); 352 | EmitTableBC7(w, 4, 2, 0, 1, 3, 7, "g_mode0_p01_i3"); 353 | EmitTableBC7(w, 4, 2, 1, 0, 1, 7, "g_mode0_p10_i1"); 354 | EmitTableBC7(w, 4, 2, 1, 0, 2, 7, "g_mode0_p10_i2"); 355 | EmitTableBC7(w, 4, 2, 1, 1, 3, 7, "g_mode0_p10_i3"); 356 | EmitTableBC7(w, 4, 2, 1, 1, 1, 7, "g_mode0_p11_i1"); 357 | EmitTableBC7(w, 4, 2, 1, 1, 2, 7, "g_mode0_p11_i2"); 358 | EmitTableBC7(w, 4, 2, 1, 1, 3, 7, "g_mode0_p11_i3"); 359 | 360 | // Mode 1: 6-bit endpoints, 1 P-bit, 3-bit indexes 361 | EmitTableBC7(w, 6, 1, 0, 0, 1, 7, "g_mode1_p0_i1"); 362 | EmitTableBC7(w, 6, 1, 0, 0, 2, 7, "g_mode1_p0_i2"); 363 | EmitTableBC7(w, 6, 1, 0, 0, 3, 7, "g_mode1_p0_i3"); 364 | EmitTableBC7(w, 6, 1, 1, 1, 1, 7, "g_mode1_p1_i1"); 365 | EmitTableBC7(w, 6, 1, 1, 1, 2, 7, "g_mode1_p1_i2"); 366 | EmitTableBC7(w, 6, 1, 1, 1, 3, 7, "g_mode1_p1_i3"); 367 | 368 | // Mode 2: 5-bit endpoints, 0 P-bits, 2-bit indexes 369 | EmitTableBC7(w, 5, 0, 0, 0, 1, 3, "g_mode2"); 370 | 371 | // Mode 3: 7-bit endpoints, 1 P-bit, 2-bit indexes 372 | EmitTableBC7(w, 7, 1, 0, 0, 1, 3, "g_mode3_p0"); 373 | EmitTableBC7(w, 7, 1, 1, 1, 1, 3, "g_mode3_p1"); 374 | 375 | // Mode 4: 5-bit RGB endpoints, 6-bit alpha endpoints, no P-bits, 2 or 3-bit indexes 376 | EmitTableBC7(w, 5, 0, 0, 0, 1, 3, "g_mode4_rgb_low"); 377 | EmitTableBC7(w, 5, 0, 0, 0, 1, 7, "g_mode4_rgb_high_i1"); 378 | EmitTableBC7(w, 5, 0, 0, 0, 2, 7, "g_mode4_rgb_high_i2"); 379 | EmitTableBC7(w, 5, 0, 0, 0, 3, 7, "g_mode4_rgb_high_i3"); 380 | EmitTableBC7(w, 6, 0, 0, 0, 1, 3, "g_mode4_a_low"); 381 | EmitTableBC7(w, 6, 0, 0, 0, 1, 7, "g_mode4_a_high_i1"); 382 | EmitTableBC7(w, 6, 0, 0, 0, 2, 7, "g_mode4_a_high_i2"); 383 | EmitTableBC7(w, 6, 0, 0, 0, 3, 7, "g_mode4_a_high_i3"); 384 | 385 | // Mode 5: 7-bit RGB endpoints, 8-bit alpha endpoints (omit), no P-bits, 2-bit indexes 386 | EmitTableBC7(w, 7, 0, 0, 0, 1, 3, "g_mode5_rgb_low"); 387 | 388 | // Mode 6: 7-bit RGB endpoints, 1 P-bit, 4-bit indexes 389 | EmitTableBC7(w, 7, 1, 0, 0, 1, 15, "g_mode6_p0_i1"); 390 | EmitTableBC7(w, 7, 1, 0, 0, 2, 15, "g_mode6_p0_i2"); 391 | EmitTableBC7(w, 7, 1, 0, 0, 3, 15, "g_mode6_p0_i3"); 392 | EmitTableBC7(w, 7, 1, 0, 0, 4, 15, "g_mode6_p0_i4"); 393 | EmitTableBC7(w, 7, 1, 0, 0, 5, 15, "g_mode6_p0_i5"); 394 | EmitTableBC7(w, 7, 1, 0, 0, 6, 15, "g_mode6_p0_i6"); 395 | EmitTableBC7(w, 7, 1, 0, 0, 7, 15, "g_mode6_p0_i7"); 396 | EmitTableBC7(w, 7, 1, 1, 1, 1, 15, "g_mode6_p1_i1"); 397 | EmitTableBC7(w, 7, 1, 1, 1, 2, 15, "g_mode6_p1_i2"); 398 | EmitTableBC7(w, 7, 1, 1, 1, 3, 15, "g_mode6_p1_i3"); 399 | EmitTableBC7(w, 7, 1, 1, 1, 4, 15, "g_mode6_p1_i4"); 400 | EmitTableBC7(w, 7, 1, 1, 1, 5, 15, "g_mode6_p1_i5"); 401 | EmitTableBC7(w, 7, 1, 1, 1, 6, 15, "g_mode6_p1_i6"); 402 | EmitTableBC7(w, 7, 1, 1, 1, 7, 15, "g_mode6_p1_i7"); 403 | 404 | // Mode 7: 5-bit RGB endpoints, 2 P-bits, 2-bit indexes 405 | EmitTableBC7(w, 7, 2, 0, 0, 1, 3, "g_mode7_p00"); 406 | EmitTableBC7(w, 7, 2, 0, 1, 1, 3, "g_mode7_p01"); 407 | EmitTableBC7(w, 7, 2, 1, 0, 1, 3, "g_mode7_p10"); 408 | EmitTableBC7(w, 7, 2, 1, 1, 1, 3, "g_mode7_p11"); 409 | } 410 | else 411 | { 412 | EmitTable(w, 5, 3, 0.0, "g_singleColor5_3"); 413 | EmitTable(w, 6, 3, 0.0, "g_singleColor6_3"); 414 | EmitTable(w, 5, 2, 0.0, "g_singleColor5_2"); 415 | EmitTable(w, 6, 2, 0.0, "g_singleColor6_2"); 416 | EmitTable(w, 5, 3, 0.03, "g_singleColor5_3_p"); 417 | EmitTable(w, 6, 3, 0.03, "g_singleColor6_3_p"); 418 | EmitTable(w, 5, 2, 0.03, "g_singleColor5_2_p"); 419 | EmitTable(w, 6, 2, 0.03, "g_singleColor6_2_p"); 420 | } 421 | 422 | w.WriteLine("}}}"); 423 | } 424 | } 425 | 426 | MakeETC2AlphaRoundingTables("ConvectionKernels_ETC2_Rounding.h"); 427 | MakeFakeBT709RoundingTables("ConvectionKernels_FakeBT709_Rounding.h"); 428 | } 429 | } 430 | } 431 | -------------------------------------------------------------------------------- /MakeTables/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("MakeSingleColorTables")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("HP Inc.")] 12 | [assembly: AssemblyProduct("MakeSingleColorTables")] 13 | [assembly: AssemblyCopyright("Copyright © HP Inc. 2019")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("867f8f36-10ea-4594-aa41-34bc5b74a65a")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ConvectionKernels 2 | These are the stand-alone texture compression kernels for Convection Texture Tools (CVTT), you can embed these in other applications. 3 | https://github.com/elasota/cvtt 4 | 5 | The CVTT codecs are designed to get very high quality at good speed by leveraging effective heuristics and a SPMD-style design that makes heavy use of SIMD ops and 16-bit math. 6 | 7 | Compressed texture format support: 8 | * BC1 (DXT1): Complete 9 | * BC2 (DXT3): Complete 10 | * BC3 (DXT5): Complete 11 | * BC4: Complete 12 | * BC5: Complete 13 | * BC6H: Experimental 14 | * BC7: Complete 15 | * ETC1: Complete 16 | * ETC2 RGB: Complete 17 | * ETC2 RGBA: Complete 18 | * ETC2 with punchthrough alpha: Complete 19 | * 11-bit EAC: Experimental 20 | * PVRTC: Not supported 21 | * ASTC: Not supported 22 | 23 | 24 | # Basic usage 25 | 26 | Include "ConvectionKernels.h" 27 | 28 | Depending on the input format, blocks should be pre-packed into one of the PixelBlock structures: PixelBlockU8 for unsigned LDR formats (BC1, BC2, BC3, BC7, BC4U, BC5U), PixelBlockS8 for signed LDR formats (BC4S, BC5S), and PixelBlockF16 for HDR formats (BC6H). The block pixel order is left-to-right, top-to-bottom, and the channel order is red, green, blue, alpha. 29 | 30 | BC6H floats are stored as int16_t in the pixel block structure, which should be bit-cast from the 16-bit float input. Converting other float precisions to 16-bit is outside of the scope of the kernels. 31 | 32 | Create an Options structure and fill it out: 33 | * flags: A bitwise OR mask of one of cvtt::Flags, which enable or disable various features. 34 | * threshold: The alpha threshold for encoding BC1 with alpha test. Any alpha value lower than than the threshold will use transparent alpha. 35 | * redWeight: Red channel relative importance 36 | * blueWeight: Blue channel relative importance 37 | * alphaWeight: Alpha channel relative importance 38 | 39 | For some modes, you must pass an encoding plan, which controls how the encoder will behave. You should NOT attempt to initialize the encoding plan yourself, either use a default-initialized encoding plan (which will run at maximum quality), or use ConfigureBC7EncodingPlanFromQuality or ConfigureBC7EncodingPlanFromFineTuningParams to configure a lower-quality encoding plan. Configuring an encoding plan is somewhat slow and you should only do it once per encode job. 40 | 41 | Once you've done both of those things, call the corresponding encode function to digest the input blocks and emit output blocks. 42 | 43 | **VERY IMPORTANT**: The encode functions must be given a list of cvtt::NumParallelBlocks blocks, and will emit cvtt::NumParallelBlocks output blocks. If you want to encode fewer blocks, then you must pad the input structure with unused block data, and the output buffer must still contain enough space. 44 | 45 | # ETC compression 46 | 47 | The ETC encoders require significantly more temporary data storage than the other encoders, so the storage must be allocated before using the encoders. 48 | 49 | To allocate the temporary data: 50 | * Create an allocation function compatible with cvtt::Kernels::allocFunc_t, which accepts a context pointer and byte size and returns a buffer of at least that size. The returned buffer must be byte-aligned for SIMD usage (i.e. 16 byte alignment on Intel). 51 | * Use the AllocETC1Data or AllocETC2Data functions, pass the allocation function and a context pointer, which will be passed back to the allocation function. 52 | 53 | To release the temporary data: 54 | * Create a free function compatible with cvtt::Kernels::freeFunc_t, which accepts a context pointer, a pointer to the buffer allocated by the allocation func, and the original size. 55 | * Use the ReleaseETC1Data or ReleaseETC2Data functions, pass the original compression data structure returned by the allocation function, and the free function. 56 | 57 | Once allocated, the compression data can be reused over multiple calls to the encode functions, and depending on architecture, can usually be used by a different thread than the one that allocated it, as long as multiple encode functions are not using it at once. 58 | -------------------------------------------------------------------------------- /etc2packer/.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.{cpp,h,inl,fx,hlsl}] 4 | indent_size = 4 5 | indent_style = space 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | end_of_line = crlf 9 | charset = latin1 10 | -------------------------------------------------------------------------------- /etc2packer/etc2packer.cpp: -------------------------------------------------------------------------------- 1 | // This is a simple example application for using CVTT's ETC kernels to compress ETC textures. 2 | // It only compresses a single texture level. 3 | 4 | #include 5 | #include 6 | 7 | #include "stb_image/stb_image.h" 8 | 9 | #include "ktxheader.h" 10 | #include "etc2packer.h" 11 | #include "../ConvectionKernels.h" 12 | 13 | static void *allocshim(void *context, size_t size) 14 | { 15 | return _aligned_malloc(size, 16); 16 | } 17 | 18 | static void freeshim(void *context, void *ptr, size_t size) 19 | { 20 | _aligned_free(ptr); 21 | } 22 | 23 | enum TargetFormat 24 | { 25 | ETC1, 26 | ETC2_RGB, 27 | ETC2_RGBA, 28 | ETC2_Punchthrough, 29 | R11_Unsigned, 30 | R11_Signed, 31 | }; 32 | 33 | const char *g_formatNames[] = 34 | { 35 | "etc1", 36 | "etc2rgb", 37 | "etc2rgba", 38 | "etc2punchthrough", 39 | "r11u", 40 | "r11s", 41 | }; 42 | 43 | void PrintUsageAndExit() 44 | { 45 | fprintf(stderr, "Usage: etc2packer [options] input output\n"); 46 | fprintf(stderr, "Options:\n"); 47 | fprintf(stderr, "-format - Selects output format. Format is one of:\n"); 48 | fprintf(stderr, " etc1 - ETC1\n"); 49 | fprintf(stderr, " etc2rgb - ETC2 RGB\n"); 50 | fprintf(stderr, " etc2rgba - ETC2 RGBA\n"); 51 | fprintf(stderr, " etc2punchthrough - ETC2 RGB with punchthrough alpha\n"); 52 | fprintf(stderr, "-fakebt709 - Use fake BT.709 error metric (same as etc2comp, significantly slower)\n"); 53 | fprintf(stderr, "-uniform - Use uniform color weights (overrides -fakebt709)\n"); 54 | exit(-1); 55 | } 56 | 57 | int main(int argc, const char **argv) 58 | { 59 | TargetFormat targetFormat = ETC2_RGB; 60 | bool useFakeBT709 = false; 61 | bool useUniform = false; 62 | 63 | const char *inputPath = NULL; 64 | const char *outputPath = NULL; 65 | 66 | if (argc < 3) 67 | PrintUsageAndExit(); 68 | 69 | for (int i = 1; i < argc; i++) 70 | { 71 | if (!strcmp(argv[i], "-format")) 72 | { 73 | i++; 74 | if (i == argc) 75 | PrintUsageAndExit(); 76 | 77 | bool foundFormat = false; 78 | for (int f = 0; f < sizeof(g_formatNames) / sizeof(g_formatNames[0]); f++) 79 | { 80 | if (!strcmp(argv[i], g_formatNames[f])) 81 | { 82 | targetFormat = static_cast(f); 83 | foundFormat = true; 84 | break; 85 | } 86 | } 87 | } 88 | else if (!strcmp(argv[i], "-fakebt709")) 89 | { 90 | useFakeBT709 = true; 91 | } 92 | else if (!strcmp(argv[i], "-uniform")) 93 | { 94 | useUniform = true; 95 | } 96 | else 97 | { 98 | if (i != argc - 2) 99 | PrintUsageAndExit(); 100 | 101 | inputPath = argv[i]; 102 | outputPath = argv[i + 1]; 103 | break; 104 | } 105 | } 106 | 107 | int w, h, channels; 108 | stbi_uc *image = stbi_load(inputPath, &w, &h, &channels, 4); 109 | 110 | if (!image) 111 | { 112 | fprintf(stderr, "Could not load input image\n"); 113 | return -1; 114 | } 115 | 116 | static const uint8_t ktxIdentifier[12] = 117 | { 118 | 0xAB, 0x4B, 0x54, 0x58, // first four bytes of Byte[12] identifier 119 | 0x20, 0x31, 0x31, 0xBB, // next four bytes of Byte[12] identifier 120 | 0x0D, 0x0A, 0x1A, 0x0A // final four bytes of Byte[12] identifier 121 | }; 122 | 123 | KtxHeader_t ktxHeader; 124 | memcpy(ktxHeader.identifier, ktxIdentifier, 12); 125 | 126 | ktxHeader.endianness = 0x04030201; 127 | ktxHeader.glType = 0; 128 | ktxHeader.glTypeSize = 1; 129 | ktxHeader.glFormat = 0; 130 | 131 | ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_RGB8; 132 | ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_RGB8; 133 | 134 | ktxHeader.pixelWidth = w; 135 | ktxHeader.pixelHeight = h; 136 | ktxHeader.pixelDepth = 0; 137 | ktxHeader.numberOfArrayElements = 0; 138 | ktxHeader.numberOfFaces = 0; 139 | ktxHeader.bytesOfKeyValueData = 0; 140 | 141 | ktxHeader.pixelDepth = 0; 142 | ktxHeader.numberOfArrayElements = 0; 143 | ktxHeader.numberOfFaces = 1; 144 | ktxHeader.numberOfMipmapLevels = 1; 145 | 146 | unsigned int blockSizeBytes = 8; 147 | 148 | switch (targetFormat) 149 | { 150 | case ETC1: 151 | ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC1_RGB8; 152 | ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC1_RGB8; 153 | blockSizeBytes = 8; 154 | break; 155 | case ETC2_RGB: 156 | ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_RGB8; 157 | ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_RGB8; 158 | blockSizeBytes = 8; 159 | break; 160 | case ETC2_RGBA: 161 | ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_RGBA8; 162 | ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_RGBA8; 163 | blockSizeBytes = 16; 164 | break; 165 | case ETC2_Punchthrough: 166 | ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_RGB8A1; 167 | ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_RGB8A1; 168 | blockSizeBytes = 8; 169 | break; 170 | case R11_Unsigned: 171 | ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_R11; 172 | ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_R11; 173 | blockSizeBytes = 8; 174 | break; 175 | case R11_Signed: 176 | ktxHeader.glInternalFormat = (unsigned int)KtxHeader_t::InternalFormat::ETC2_SIGNED_R11; 177 | ktxHeader.glBaseInternalFormat = (unsigned int)KtxHeader_t::BaseInternalFormat::ETC2_R11; 178 | blockSizeBytes = 8; 179 | break; 180 | } 181 | 182 | uint8_t alphaOutputBlock[8 * cvtt::NumParallelBlocks]; 183 | uint8_t outputBlock[8 * cvtt::NumParallelBlocks]; 184 | 185 | FILE *f = fopen(outputPath, "wb"); 186 | if (!f) 187 | { 188 | fprintf(stderr, "Could not open output file\n"); 189 | return -1; 190 | } 191 | 192 | int blockWidth = (w + 3) / 4; 193 | int blockHeight = (h + 3) / 4; 194 | 195 | fwrite(&ktxHeader, sizeof(ktxHeader), 1, f); 196 | uint32_t dataSize = blockWidth * blockHeight * blockSizeBytes; 197 | fwrite(&dataSize, 4, 1, f); 198 | 199 | cvtt::Options options; 200 | 201 | if (useUniform) 202 | options.flags |= cvtt::Flags::Uniform; 203 | else if (useFakeBT709) 204 | options.flags |= cvtt::Flags::ETC_UseFakeBT709; 205 | 206 | cvtt::ETC1CompressionData* compressionData1 = NULL; 207 | cvtt::ETC2CompressionData* compressionData2 = NULL; 208 | 209 | if (targetFormat == ETC1) 210 | compressionData1 = cvtt::Kernels::AllocETC1Data(allocshim, nullptr); 211 | 212 | if (targetFormat == ETC2_RGB || targetFormat == ETC2_RGBA || targetFormat == ETC2_Punchthrough) 213 | compressionData2 = cvtt::Kernels::AllocETC2Data(allocshim, nullptr, options); 214 | 215 | for (int y = 0; y < h; y += 4) 216 | { 217 | cvtt::PixelBlockU8 pixelBlocks[8]; 218 | cvtt::PixelBlockScalarS16 pixelBlockSigned[8]; 219 | cvtt::PixelBlockScalarS16 pixelBlockUnsigned[8]; 220 | for (int x = 0; x < w; x += 32) 221 | { 222 | for (int block = 0; block < cvtt::NumParallelBlocks; block++) 223 | { 224 | for (int subY = 0; subY < 4; subY++) 225 | { 226 | int clampedY = std::min(y + subY, h - 1); 227 | 228 | const uint8_t *inputRow = image + (clampedY) * w * 4; 229 | for (int subX = 0; subX < 4; subX++) 230 | { 231 | int clampedX = std::min(x + subX + block * 4, w - 1); 232 | 233 | int rgba[4]; 234 | for (int ch = 0; ch < 4; ch++) 235 | rgba[ch] = inputRow[clampedX * 4 + ch]; 236 | 237 | for (int ch = 0; ch < 4; ch++) 238 | pixelBlocks[block].m_pixels[subY * 4 + subX][ch] = rgba[ch]; 239 | 240 | double rgbaTotal = rgba[0] + rgba[1] + rgba[2]; 241 | double normalizedUnsigned = rgbaTotal / (255.0 * 3.0); 242 | double normalizedSigned = normalizedUnsigned * 2.0 - 1.0; 243 | 244 | pixelBlockUnsigned[block].m_pixels[subY * 4 + subX] = static_cast(floor(normalizedUnsigned * 2047.0 + 0.5)); 245 | pixelBlockSigned[block].m_pixels[subY * 4 + subX] = static_cast(floor(normalizedUnsigned * 1023.0 + 0.5)); 246 | } 247 | } 248 | } 249 | 250 | if (targetFormat == ETC2_RGBA) 251 | cvtt::Kernels::EncodeETC2Alpha(alphaOutputBlock, pixelBlocks, options); 252 | 253 | switch (targetFormat) 254 | { 255 | case ETC1: 256 | cvtt::Kernels::EncodeETC1(outputBlock, pixelBlocks, options, compressionData1); 257 | break; 258 | case R11_Unsigned: 259 | cvtt::Kernels::EncodeETC2Alpha11(outputBlock, pixelBlockUnsigned, false, options); 260 | break; 261 | case R11_Signed: 262 | cvtt::Kernels::EncodeETC2Alpha11(outputBlock, pixelBlockSigned, true, options); 263 | break; 264 | case ETC2_Punchthrough: 265 | cvtt::Kernels::EncodeETC2PunchthroughAlpha(outputBlock, pixelBlocks, options, compressionData2); 266 | break; 267 | case ETC2_RGB: 268 | case ETC2_RGBA: 269 | cvtt::Kernels::EncodeETC2(outputBlock, pixelBlocks, options, compressionData2); 270 | break; 271 | } 272 | 273 | int writableBlocks = std::min(cvtt::NumParallelBlocks, (w - x + 3) / 4); 274 | 275 | for (int block = 0; block < writableBlocks; block++) 276 | { 277 | if (targetFormat == ETC2_RGBA) 278 | fwrite(alphaOutputBlock + block * 8, 8, 1, f); 279 | fwrite(outputBlock + block * 8, 8, 1, f); 280 | } 281 | } 282 | } 283 | 284 | if (compressionData1) 285 | cvtt::Kernels::ReleaseETC1Data(compressionData1, freeshim); 286 | 287 | if (compressionData2) 288 | cvtt::Kernels::ReleaseETC2Data(compressionData2, freeshim); 289 | 290 | stbi_image_free(image); 291 | 292 | return 0; 293 | } 294 | -------------------------------------------------------------------------------- /etc2packer/etc2packer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | void CompressETC2Block(uint8_t * outputBuffer, const uint8_t * inputBuffer); 4 | -------------------------------------------------------------------------------- /etc2packer/etc2packer.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | Release 10 | x64 11 | 12 | 13 | 14 | 15.0 15 | {23B20484-6E2E-4102-8362-33A29A8D1933} 16 | etc2packer 17 | 10.0.16299.0 18 | 19 | 20 | 21 | Application 22 | true 23 | v141 24 | MultiByte 25 | 26 | 27 | Application 28 | false 29 | v141 30 | true 31 | MultiByte 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | Level3 49 | Disabled 50 | true 51 | true 52 | _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 53 | 54 | 55 | 56 | 57 | Level3 58 | MaxSpeed 59 | true 60 | true 61 | true 62 | true 63 | _CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 64 | 65 | 66 | true 67 | true 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | {5e4f0557-b7d8-4d9b-9d3a-2b966c9c1b47} 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /etc2packer/etc2packer.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Source Files 28 | 29 | 30 | Header Files 31 | 32 | 33 | Header Files 34 | 35 | 36 | Header Files 37 | 38 | 39 | -------------------------------------------------------------------------------- /etc2packer/ktxheader.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | typedef struct KtxHeader_s 6 | { 7 | enum class InternalFormat 8 | { 9 | ETC1_RGB8 = 0x8D64, 10 | ETC1_ALPHA8 = ETC1_RGB8, 11 | // 12 | ETC2_R11 = 0x9270, 13 | ETC2_SIGNED_R11 = 0x9271, 14 | ETC2_RG11 = 0x9272, 15 | ETC2_SIGNED_RG11 = 0x9273, 16 | ETC2_RGB8 = 0x9274, 17 | ETC2_SRGB8 = 0x9275, 18 | ETC2_RGB8A1 = 0x9276, 19 | ETC2_SRGB8_PUNCHTHROUGH_ALPHA1 = 0x9277, 20 | ETC2_RGBA8 = 0x9278 21 | }; 22 | 23 | enum class BaseInternalFormat 24 | { 25 | ETC2_R11 = 0x1903, 26 | ETC2_RG11 = 0x8227, 27 | ETC1_RGB8 = 0x1907, 28 | ETC1_ALPHA8 = ETC1_RGB8, 29 | // 30 | ETC2_RGB8 = 0x1907, 31 | ETC2_RGB8A1 = 0x1908, 32 | ETC2_RGBA8 = 0x1908, 33 | }; 34 | 35 | uint8_t identifier[12]; 36 | uint32_t endianness; 37 | uint32_t glType; 38 | uint32_t glTypeSize; 39 | uint32_t glFormat; 40 | uint32_t glInternalFormat; 41 | uint32_t glBaseInternalFormat; 42 | uint32_t pixelWidth; 43 | uint32_t pixelHeight; 44 | uint32_t pixelDepth; 45 | uint32_t numberOfArrayElements; 46 | uint32_t numberOfFaces; 47 | uint32_t numberOfMipmapLevels; 48 | uint32_t bytesOfKeyValueData; 49 | } KtxHeader_t; 50 | -------------------------------------------------------------------------------- /etc2packer/stb_image/stb_image.cpp: -------------------------------------------------------------------------------- 1 | #define STB_IMAGE_IMPLEMENTATION 2 | #include "stb_image.h" 3 | -------------------------------------------------------------------------------- /etc2packer/stb_image/stb_image.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 15.0 23 | {BF8EC93D-003C-45E9-878B-16DECD78808D} 24 | stbimage 25 | 10.0.16299.0 26 | 27 | 28 | 29 | StaticLibrary 30 | true 31 | v141 32 | MultiByte 33 | 34 | 35 | StaticLibrary 36 | false 37 | v141 38 | true 39 | MultiByte 40 | 41 | 42 | StaticLibrary 43 | true 44 | v141 45 | MultiByte 46 | 47 | 48 | StaticLibrary 49 | false 50 | v141 51 | true 52 | MultiByte 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | Level3 76 | MaxSpeed 77 | true 78 | true 79 | true 80 | true 81 | 82 | 83 | true 84 | true 85 | 86 | 87 | 88 | 89 | Level3 90 | Disabled 91 | true 92 | true 93 | 94 | 95 | 96 | 97 | Level3 98 | Disabled 99 | true 100 | true 101 | 102 | 103 | 104 | 105 | Level3 106 | MaxSpeed 107 | true 108 | true 109 | true 110 | true 111 | 112 | 113 | true 114 | true 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /etc2packer/stb_image/stb_image.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | 26 | 27 | Source Files 28 | 29 | 30 | Source Files 31 | 32 | 33 | -------------------------------------------------------------------------------- /etc2packer/stb_image/stb_image_write.cpp: -------------------------------------------------------------------------------- 1 | #define STB_IMAGE_WRITE_IMPLEMENTATION 2 | #include "stb_image_write.h" 3 | -------------------------------------------------------------------------------- /etc_notes.txt: -------------------------------------------------------------------------------- 1 | The ETC1 compressor uses modified cluster fit: 2 | 3 | Assume that there exists an ideal base color and set of selectors for a given table. 4 | For a given table and set of selectors, the ideal base color can be determined by subtracting the offsets from each pixel and averaging them. 5 | Doing that is equivalent to subtracting the average offset from the average color. 6 | Because positive and negative selectors of the same magnitude cancel out, the search space of possible average offsets is reduced: 57 unique offsets for the first table and 81 for the others. 7 | Most of the offsets result in the same color as another average offset due to quantization of the base color, so those can be de-duplicated. 8 | So: 9 | - Start with a high-precision average color. 10 | - Apply precomputed luma offsets to it. 11 | - Quantize and de-duplicate the base colors. 12 | - Find the ideal selectors for each base color. 13 | 14 | Differential mode is solved by just finding the best legal combination from those attempts. 15 | 16 | There are several scenarios where this is not ideal: 17 | - Clamping behavior can sometimes be leveraged for a more accurate block. 18 | - Differentials can sometimes be moved slightly closer to become legal. 19 | - This only works when MSE is the error metric (i.e. not normal maps) 20 | - This only works when pixel weights are of equal importance (i.e. not using weight by alpha or edge deblocking) 21 | 22 | T and H mode just work by generating clustering assignments by computing a chrominance line and splitting the block in half by the chrominance midpoint and using those to determine the averages. 23 | 24 | Planar mode is just solved algebraically. 25 | 26 | If you want to emulate etc2comp's default settings, add the flag ETC_UseFakeBT709 to use its modified Rec. 709 error coefficients. 27 | Doing that will significantly slow down encoding because it requires much more complicated quantization math. --------------------------------------------------------------------------------