├── LICENSE ├── README.md ├── dxt.cpp └── dxt.h /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Real-Time DXT1/DXT5 compressor 2 | 3 | Compressor by Dale Weiler [twitter](https://twitter.com/graphmaster) 4 | 5 | DXT endpoint optimization algorithm by Shane Calimlim [twitter](https://twitter.com/ShaneCalimlim) 6 | 7 | Color line algorithm by Fabian Giesen [twitter](https://twitter.com/rygorous) 8 | 9 | ### Resources 10 | * https://raw.githubusercontent.com/nothings/stb/master/stb_dxt.h 11 | * https://code.google.com/p/crunch/ 12 | * https://www.opengl.org/registry/specs/EXT/texture_compression_s3tc.txt 13 | * https://code.google.com/p/libsquish/ 14 | * https://github.com/divVerent/s2tc/wiki 15 | 16 | ### Notes 17 | #### DXT endpoint optimization 18 | `#define DXT_OPTIMIZE` to enable DXT end point optimization. This helps with 19 | old hardware fetches and improves on disk compression ratio. 20 | 21 | `#define DXT_HIGHP` to enable use of `double` for color-line evaluation. This 22 | can produce better results if the precision of `float` is just not cutting it. 23 | Take note however that the final result will always be treated as `float`. 24 | 25 | Change `kRefineIterations` for more color-line refinement iterations. This should 26 | never be `< 1`. If you increase it `> 3` it's suggested you also enable `DXT_HIGHP`. 27 | 28 | #### RYG covariance matrix 29 | Uses RYG covariance matrix for standard derivation to establish color vector 30 | line. This method works great except for a few boundary cases. Full green 31 | next to full red will result in a covariance matrix like: 32 | ``` 33 | [ 1, -1, 0 ] 34 | [ -1, 1, 0 ] 35 | [ 0, 0, 0 ] 36 | ``` 37 | 38 | The power method for a starting vector can generate all zeros. So `[1, 1, 1]` 39 | is not used in favor for `[1, 2.718281828, 3.141592654]` instead. If the power 40 | method fails to find the largest eigenvector (which is incredibly rare) some 41 | error will occur in the final result. 42 | -------------------------------------------------------------------------------- /dxt.cpp: -------------------------------------------------------------------------------- 1 | #include "dxt.h" 2 | 3 | #include // std::min / std::max 4 | 5 | // The precision to do color-line calculation: 6 | // Note: final evaluation is always treated as float. 7 | #ifdef DXT_HIGHP 8 | typedef double real; 9 | #else 10 | typedef float real; 11 | #endif 12 | 13 | // Color line refinement iterations: 14 | // Minimum is 1 15 | // Default is 3 16 | // 17 | // The maximum really has a lot to do with how much error you'll eventually 18 | // introduce due to precision of the `real' type used in the color line algorithm. 19 | // 20 | // It's suggested you use #define DXT_HIGHP if you want to increase this. 21 | static constexpr size_t kRefineIterations = 3; 22 | 23 | template 24 | static inline T clamp(T current, T min, T max) { 25 | return std::max(min, std::min(current, max)); 26 | } 27 | 28 | enum dxtColor { 29 | kDXTColor33, 30 | kDXTColor66, 31 | kDXTColor50 32 | }; 33 | 34 | struct dxtBlock { 35 | uint16_t color0; 36 | uint16_t color1; 37 | uint32_t pixels; 38 | }; 39 | 40 | static uint16_t dxtPack565(uint16_t &r, uint16_t &g, uint16_t &b) { 41 | return ((r & 0xF8) << 8) | ((g & 0xFC) << 3) | (b >> 3); 42 | } 43 | 44 | static void dxtUnpack565(uint16_t src, uint16_t &r, uint16_t &g, uint16_t &b) { 45 | r = (((src>>11)&0x1F)*527 + 15) >> 6; 46 | g = (((src>>5)&0x3F)*259 + 35) >> 6; 47 | b = ((src&0x1F)*527 + 15) >> 6; 48 | } 49 | 50 | #ifdef DXT_OPTIMIZE 51 | template 52 | static uint16_t dxtCalcColor(uint16_t color0, uint16_t color1) { 53 | uint16_t r[3], g[3], b[3]; 54 | dxtUnpack565(color0, r[0], g[0], b[0]); 55 | dxtUnpack565(color1, r[1], g[1], b[1]); 56 | if (E == kDXTColor33) { 57 | r[2] = (2*r[0] + r[1]) / 3; 58 | g[2] = (2*g[0] + g[1]) / 3; 59 | b[2] = (2*b[0] + b[1]) / 3; 60 | } else if (E == kDXTColor66) { 61 | r[2] = (r[0] + 2*r[1]) / 3; 62 | g[2] = (g[0] + 2*g[1]) / 3; 63 | b[2] = (b[0] + 2*b[1]) / 3; 64 | } else if (E == kDXTColor50) { 65 | r[2] = (r[0] + r[1]) / 2; 66 | g[2] = (g[0] + g[1]) / 2; 67 | b[2] = (b[0] + b[1]) / 2; 68 | } 69 | return dxtPack565(r[2], g[2], b[2]); 70 | } 71 | 72 | template 73 | static size_t dxtOptimize(unsigned char *data, size_t width, size_t height) { 74 | size_t count = 0; 75 | const size_t numBlocks = (width / 4) * (height / 4); 76 | dxtBlock *block = ((dxtBlock*)data) + (T == kDXT5); // DXT5: alpha block is first 77 | for (size_t i = 0; i != numBlocks; ++i, block += (T == kDXT1 ? 1 : 2)) { 78 | const uint16_t color0 = block->color0; 79 | const uint16_t color1 = block->color1; 80 | const uint32_t pixels = block->pixels; 81 | if (pixels == 0) { 82 | // Solid color0 83 | block->color1 = 0; 84 | count++; 85 | } else if (pixels == 0x55555555u) { 86 | // Solid color1, fill with color0 instead, possibly encoding the block 87 | // as 1-bit alpha if color1 is black. 88 | block->color0 = color1; 89 | block->color1 = 0; 90 | block->pixels = 0; 91 | count++; 92 | } else if (pixels == 0xAAAAAAAAu) { 93 | // Solid color2, fill with color0 instead, possibly encoding the block 94 | // as 1-bit alpha if color2 is black. 95 | block->color0 = (color0 > color1 || T == kDXT5) 96 | ? dxtCalcColor(color0, color1) 97 | : dxtCalcColor(color0, color1); 98 | block->color1 = 0; 99 | block->pixels = 0; 100 | count++; 101 | } else if (pixels == 0xFFFFFFFFu) { 102 | // Solid color3 103 | if (color0 > color1 || T == kDXT5) { 104 | // Fill with color0 instead, possibly encoding the block as 1-bit 105 | // alpha if color3 is black. 106 | block->color0 = dxtCalcColor(color0, color1); 107 | block->color1 = 0; 108 | block->pixels = 0; 109 | count++; 110 | } else { 111 | // Transparent / solid black 112 | block->color0 = 0; 113 | block->color1 = T == kDXT1 ? 0xFFFFu : 0; // kDXT1: Transparent black 114 | if (T == kDXT5) // Solid black 115 | block->pixels = 0; 116 | count++; 117 | } 118 | } else if (T == kDXT5 && (pixels & 0xAAAAAAAAu) == 0xAAAAAAAAu) { 119 | // Only interpolated colors are used, not the endpoints 120 | block->color0 = dxtCalcColor(color0, color1); 121 | block->color1 = dxtCalcColor(color0, color1); 122 | block->pixels = ~pixels; 123 | count++; 124 | } else if (T == kDXT5 && color0 < color1) { 125 | // Otherwise, ensure the colors are always in the same order 126 | block->color0 = color1; 127 | block->color1 = color0; 128 | block->pixels ^= 0x55555555u; 129 | count++; 130 | } 131 | } 132 | return count; 133 | } 134 | #endif 135 | 136 | template 137 | static inline void dxtComputeColorLine(const unsigned char *const uncompressed, 138 | float (&point)[3], float (&direction)[3]) 139 | { 140 | static constexpr real kSixteen = real(16.0); 141 | static constexpr real kOne = real(1.0); 142 | static constexpr real kZero = real(0.0); 143 | static constexpr real kInv16 = kOne / kSixteen; 144 | real sumR = kZero, sumG = kZero, sumB = kZero; 145 | real sumRR = kZero, sumGG = kZero, sumBB = kZero; 146 | real sumRG = kZero, sumRB = kZero, sumGB = kZero; 147 | 148 | for (size_t i = 0; i < 16*C; i += C) { 149 | sumR += uncompressed[i+0]; 150 | sumG += uncompressed[i+1]; 151 | sumB += uncompressed[i+2]; 152 | sumRR += uncompressed[i+0] * uncompressed[i+0]; 153 | sumGG += uncompressed[i+1] * uncompressed[i+1]; 154 | sumBB += uncompressed[i+2] * uncompressed[i+2]; 155 | sumRG += uncompressed[i+0] * uncompressed[i+1]; 156 | sumRB += uncompressed[i+0] * uncompressed[i+2]; 157 | sumGB += uncompressed[i+1] * uncompressed[i+2]; 158 | } 159 | // Average all sums 160 | sumR *= kInv16; 161 | sumG *= kInv16; 162 | sumB *= kInv16; 163 | // Convert squares to squares of the value minus their average 164 | sumRR -= kSixteen * sumR * sumR; 165 | sumGG -= kSixteen * sumG * sumG; 166 | sumBB -= kSixteen * sumB * sumB; 167 | sumRG -= kSixteen * sumR * sumG; 168 | sumRB -= kSixteen * sumR * sumB; 169 | sumGB -= kSixteen * sumG * sumB; 170 | // The point on the color line is the average 171 | point[0] = sumR; 172 | point[1] = sumG; 173 | point[2] = sumB; 174 | // RYGDXT covariance matrix 175 | direction[0] = real(1.0); 176 | direction[1] = real(2.718281828); 177 | direction[2] = real(3.141592654); 178 | for (size_t i = 0; i < kRefineIterations; ++i) { 179 | sumR = direction[0]; 180 | sumG = direction[1]; 181 | sumB = direction[2]; 182 | direction[0] = float(sumR*sumRR + sumG*sumRG + sumB*sumRB); 183 | direction[1] = float(sumR*sumRG + sumG*sumGG + sumB*sumGB); 184 | direction[2] = float(sumR*sumRB + sumG*sumGB + sumB*sumBB); 185 | } 186 | } 187 | 188 | template 189 | static inline void dxtLSEMasterColorsClamp(uint16_t (&colors)[2], 190 | const unsigned char *const uncompressed) 191 | { 192 | float sumx1[] = { 0.0f, 0.0f, 0.0f }; 193 | float sumx2[] = { 0.0f, 0.0f, 0.0f }; 194 | dxtComputeColorLine(uncompressed, sumx1, sumx2); 195 | 196 | float length = 1.0f / (0.00001f + sumx2[0]*sumx2[0] + sumx2[1]*sumx2[1] + sumx2[2]*sumx2[2]); 197 | // Calcualte range for vector values 198 | float dotMax = sumx2[0] * uncompressed[0] + 199 | sumx2[1] * uncompressed[1] + 200 | sumx2[2] * uncompressed[2]; 201 | float dotMin = dotMax; 202 | for (size_t i = 1; i < 16; ++i) { 203 | const float dot = sumx2[0] * uncompressed[i*C+0] + 204 | sumx2[1] * uncompressed[i*C+1] + 205 | sumx2[2] * uncompressed[i*C+2]; 206 | if (dot < dotMin) 207 | dotMin = dot; 208 | else if (dot > dotMax) 209 | dotMax = dot; 210 | } 211 | 212 | // Calculate offset from the average location 213 | float dot = sumx2[0]*sumx1[0] + sumx2[1]*sumx1[1] + sumx2[2]*sumx1[2]; 214 | dotMin -= dot; 215 | dotMax -= dot; 216 | dotMin *= length; 217 | dotMax *= length; 218 | // Build the master colors 219 | uint16_t c0[3]; 220 | uint16_t c1[3]; 221 | for (size_t i = 0; i < 3; ++i) { 222 | c0[i] = clamp(int(0.5f + sumx1[i] + dotMax * sumx2[i]), 0, 255); 223 | c1[i] = clamp(int(0.5f + sumx1[i] + dotMin * sumx2[i]), 0, 255); 224 | } 225 | // Down sample the master colors to RGB565 226 | const uint16_t i = dxtPack565(c0[0], c0[1], c0[2]); 227 | const uint16_t j = dxtPack565(c1[0], c1[1], c1[2]); 228 | if (i > j) 229 | colors[0] = i, colors[1] = j; 230 | else 231 | colors[1] = i, colors[0] = j; 232 | } 233 | 234 | template 235 | static inline void dxtCompressColorBlock(const unsigned char *const uncompressed, unsigned char (&compressed)[8]) { 236 | uint16_t encodeColor[2]; 237 | dxtLSEMasterColorsClamp(encodeColor, uncompressed); 238 | // Store 565 color 239 | compressed[0] = encodeColor[0] & 255; 240 | compressed[1] = (encodeColor[0] >> 8) & 255; 241 | compressed[2] = encodeColor[1] & 255; 242 | compressed[3] = (encodeColor[1] >> 8) & 255; 243 | for (size_t i = 4; i < 8; i++) 244 | compressed[i] = 0; 245 | 246 | // Reconstitute master color vectors 247 | uint16_t c0[3]; 248 | uint16_t c1[3]; 249 | dxtUnpack565(encodeColor[0], c0[0], c0[1], c0[2]); 250 | dxtUnpack565(encodeColor[1], c1[0], c1[1], c1[2]); 251 | 252 | float colorLine[] = { 0.0f, 0.0f, 0.0f, 0.0f }; 253 | float length = 0.0f; 254 | for (size_t i = 0; i < 3; ++i) { 255 | colorLine[i] = float(c1[i] - c0[i]); 256 | length += colorLine[i] * colorLine[i]; 257 | } 258 | if (length > 0.0f) 259 | length = 1.0f / length; 260 | // Scaling 261 | for (size_t i = 0; i < 3; i++) 262 | colorLine[i] *= length; 263 | // Offset portion of dot product 264 | const float dotOffset = colorLine[0]*c0[0] + colorLine[1]*c0[1] + colorLine[2]*c0[2]; 265 | // Store rest of bits 266 | size_t nextBit = 8*4; 267 | for (size_t i = 0; i < 16; ++i) { 268 | // Find the dot product for this color, to place it on the line with 269 | // A range of [-1, 1] 270 | float dotProduct = colorLine[0] * uncompressed[i*C+0] + 271 | colorLine[1] * uncompressed[i*C+1] + 272 | colorLine[2] * uncompressed[i*C+2] - dotOffset; 273 | // Map to [0, 3] 274 | int nextValue = clamp(int(dotProduct * 3.0f + 0.5f), 0, 3); 275 | compressed[nextBit >> 3] |= "\x0\x2\x3\x1"[nextValue] << (nextBit & 7); 276 | nextBit += 2; 277 | } 278 | } 279 | 280 | static inline void dxtCompressAlphaBlock(const unsigned char *const uncompressed, unsigned char (&compressed)[8]) { 281 | unsigned char a0 = uncompressed[3]; 282 | unsigned char a1 = uncompressed[3]; 283 | for (size_t i = 4+3; i < 16*4; i += 4) { 284 | if (uncompressed[i] > a0) a0 = uncompressed[i]; 285 | if (uncompressed[i] < a1) a1 = uncompressed[i]; 286 | } 287 | compressed[0] = a0; 288 | compressed[1] = a1; 289 | for (size_t i = 2; i < 8; i++) 290 | compressed[i] = 0; 291 | size_t nextBit = 8*2; 292 | const float scale = 7.9999f / (a0 - a1); 293 | for (size_t i = 3; i < 16*4; i += 4) { 294 | const unsigned char value = "\x1\x7\x6\x5\x4\x3\x2\x0"[size_t((uncompressed[i] - a1) * scale) & 7]; 295 | compressed[nextBit >> 3] |= value << (nextBit & 7); 296 | // Spans two bytes 297 | if ((nextBit & 7) > 5) 298 | compressed[1 + (nextBit >> 3)] |= value >> (8 - (nextBit & 7)); 299 | nextBit += 3; 300 | } 301 | } 302 | 303 | template 304 | std::vector dxtCompress(const unsigned char *const uncompressed, 305 | size_t width, size_t height, size_t channels, size_t &outSize, size_t &optimizedBlocks) 306 | { 307 | size_t index = 0; 308 | const size_t chanStep = channels < 3 ? 0 : 1; 309 | const int hasAlpha = 1 - (channels & 1); 310 | outSize = ((width + 3) >> 2) * ((height + 3) >> 2) * (T == kDXT1 ? 8 : 16); 311 | std::vector compressed(outSize); 312 | unsigned char ublock[16 * (T == kDXT1 ? 3 : 4)]; 313 | unsigned char cblock[8]; 314 | for (size_t j = 0; j < height; j += 4) { 315 | for (size_t i = 0; i < width; i += 4) { 316 | size_t z = 0; 317 | const size_t my = j + 4 >= height ? height - j : 4; 318 | const size_t mx = i + 4 >= width ? width - i : 4; 319 | for (size_t y = 0; y < my; ++y) { 320 | for (size_t x = 0; x < mx; ++x) { 321 | for (size_t p = 0; p < 3; ++p) 322 | ublock[z++] = uncompressed[((((j+y)*width)*channels)+((i+x)*channels))+(chanStep * p)]; 323 | if (T == kDXT5) 324 | ublock[z++] = hasAlpha * uncompressed[(j+y)*width*channels+(i+x)*channels+channels-1] + (1 - hasAlpha) * 255; 325 | } 326 | for (size_t x = mx; x < 4; ++x) 327 | for (size_t p = 0; p < (T == kDXT1 ? 3 : 4); ++p) 328 | ublock[z++] = ublock[p]; 329 | } 330 | for (size_t y = my; y < 4; ++y) 331 | for (size_t x = 0; x < 4; ++x) 332 | for (size_t p = 0; p < (T == kDXT1 ? 3 : 4); ++p) 333 | ublock[z++] = ublock[p]; 334 | if (T == kDXT5) { 335 | dxtCompressAlphaBlock(ublock, cblock); 336 | for (size_t x = 0; x < 8; ++x) 337 | compressed[index++] = cblock[x]; 338 | } 339 | dxtCompressColorBlock<(T == kDXT1 ? 3 : 4)>(ublock, cblock); 340 | for (size_t x = 0; x < 8; ++x) 341 | compressed[index++] = cblock[x]; 342 | } 343 | } 344 | #ifdef DXT_OPTIMIZE 345 | optimizedBlocks = dxtOptimize(&compressed[0], width, height); 346 | #endif 347 | return compressed; 348 | } 349 | 350 | template std::vector dxtCompress(const unsigned char *const uncompressed, 351 | size_t width, size_t height, size_t channels, size_t &outSize, size_t &optimizedBlocks); 352 | template std::vector dxtCompress(const unsigned char *const uncompressed, 353 | size_t width, size_t height, size_t channels, size_t &outSize, size_t &optimizedBlocks); 354 | -------------------------------------------------------------------------------- /dxt.h: -------------------------------------------------------------------------------- 1 | #ifndef DXT_HDR 2 | #define DXT_HDR 3 | #include // std::vector 4 | #include // size_t 5 | 6 | enum dxtType { 7 | kDXT1, 8 | kDXT5 9 | }; 10 | 11 | template 12 | std::vector dxtCompress(const unsigned char *const uncompressed, 13 | size_t width, size_t height, size_t channels, size_t &outSize, size_t &optimizedBlocks); 14 | 15 | #endif 16 | --------------------------------------------------------------------------------