├── .travis.yml ├── LICENSE ├── README.md ├── quant.cpp ├── quant.hpp └── sample.cc /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | sudo: required 3 | 4 | compiler: 5 | - clang 6 | - gcc 7 | 8 | install: 9 | - wget --quiet -O - https://raw.githubusercontent.com/r-lyeh/depot/master/travis.pre.sh | bash -x 10 | 11 | script: 12 | - wget --quiet -O - https://raw.githubusercontent.com/r-lyeh/depot/master/travis.build.sh | bash -x 13 | - wget --quiet -O - https://raw.githubusercontent.com/r-lyeh/depot/master/travis.run.sh | bash -x 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Quant :fish_cake:

2 | - Quant is a quantization suite supporting many targets and unit types. 3 | - Quant is tiny, header-only, cross-platform. 4 | - Quant is public domain. 5 | 6 | ## Features 7 | - Support conversion from/to signed normalized floats to/from N-bits shorts (d3d10 variant). 8 | - Support conversion from/to signed normalized floats to/from N-bits shorts (opengl variant). 9 | - Support conversion from/to unsigned normalized floats to/from N-bits shorts. 10 | - Support conversion from/to quaternions to/from 32-bits integers. 11 | - Support conversion from/to positions to/from 48/32/16-bits integers. 12 | - Support conversion from/to scales to/from 48/32/16-bits integers. 13 | - Pack standard 3D transform matrix (matrix4x4f 64 bytes) into 10 or 12 bytes. 14 | - Conversions done as cross-platform and architecture-friendly as possible. 15 | - Good visual quality while aiming to smallest types. 16 | 17 | ## Usages 18 | - To de/quantize animations. 19 | - To de/quantize colors. 20 | - To de/quantize sounds. 21 | - To de/quantize user input. 22 | - To de/quantize network packets. 23 | - Etc 24 | 25 | ## API 26 | ```c++ 27 | namespace quant { 28 | // For generic floats 29 | uint16_t encode16_half(float); // 16-bit 30 | float decode16_half(uint16_t); // 16-bit 31 | 32 | // For signed normalized [-1..1] floats 33 | uint8_t encode8_snorm(float); // 8-bit 34 | float decode8_snorm(uint8_t); // 8-bit 35 | 36 | // For unsigned normalized [0..1] floats 37 | uint8_t encode8_unorm(float); // 8-bit 38 | float decode8_unorm(uint8_t); // 8-bit 39 | 40 | // For rotation quaternions 41 | encode101010_quant(uint32_t &q, float x, y, z, w); // 32-bit 42 | decode101010_quant(float &x, &y, &z, &w, uint32_t q); // 32-bit 43 | 44 | // For position vectors 45 | encode161616_vec(uint64_t &q, float x, y, z); // 48-bit version 46 | decode161616_vec(float &x, &y, &z, uint64_t q); // 48-bit version 47 | encode8814_vec(uint32_t &q, float x, y, z); // 32-bit version 48 | decode8814_vec(float &x, &y, &z, uint32_t q); // 32-bit version 49 | encode555_vec(uint16_t q, float x, y, z); // 16-bit version 50 | decode555_vec(float &x, &y, &z, uint16_t q); // 16-bit version 51 | 52 | // For scale vectors 53 | // Scale tip: 54 | //- Try to de/quantize scale vectors as `fn(q,1-x,1-y,1-z)` if possible, rather than `fn(q,x,y,z)` 55 | //- So, scales close to one will be numerically stabler (~less visual glitches at larger scales) 56 | //- So, scales close to zero will be numerically unstabler (~more visual glitches at smaller scales) 57 | encode8814_vec(uint32_t q, float x, y, z); // 32-bit version 58 | decode8814_vec(float &x, &y, &z, uint32_t q); // 32-bit version 59 | encode555_vec(uint16_t q, float x, y, z); // 16-bit version 60 | decode555_vec(float &x, &y, &z, uint16_t q); // 16-bit version 61 | } 62 | ``` 63 | For more variants, please check [quant.hpp header](quant.hpp). 64 | 65 | ## Todos 66 | - Finish (and integrate) curve simplification and hermite splines that I have somewhere lying around. 67 | - Lossless/lossy animation format proposal: 68 | - demultiplex vertex streams to a single giant (mono) stream 69 | - quantize whole stream `(iter - min) / ( max - min ) -> [0..1]` 70 | - apply lossless audio (FLAC) or lossy audio codec (mp3/ogg) 71 | - decode, upscale stream `iter * ( max - min ) + min`, and multiplex vertex 72 | - profit (?) 73 | 74 | ## Showcase (demo not provided) 75 | ![image](https://raw.github.com/r-lyeh/depot/master/skull-quant.png) 76 | 77 | ## Changelog 78 | - v1.0.2 (2015/06/05) 79 | - Expanded api: 48-bit and 16-bit vector support. 80 | - Improved numerical stability 81 | - v1.0.1 (2015/06/02) 82 | - Improved quant precision 83 | - Fixed rotation bug 84 | - v1.0.0 (2015/05/29) 85 | - Initial revision 86 | 87 | ## References and links 88 | - https://gist.github.com/rygorous/2156668 89 | - http://zeuxcg.org/2010/12/14/quantizing-floats/ 90 | - http://en.wikipedia.org/wiki/Fast_inverse_square_root 91 | - http://bitsquid.blogspot.com.es/2009/11/bitsquid-low-level-animation-system.html 92 | 93 | ## Licenses 94 | - [Quant](https://github.com/r-lyeh/quant) (Public Domain). 95 | - [float from/to half variants](https://gist.github.com/rygorous/2156668) by Fabian "ryg" Giesen (Public Domain). 96 | -------------------------------------------------------------------------------- /quant.cpp: -------------------------------------------------------------------------------- 1 | #include "quant.hpp" 2 | -------------------------------------------------------------------------------- /quant.hpp: -------------------------------------------------------------------------------- 1 | // Quantization suite: float <-> 16-bit half, s/unorm <-> 8/10-bit short, quaternion <-> 32-bit int, position <-> 32/48-bit int, scale <-> 32/16-bit int 2 | // r-lyeh, public domain 3 | 4 | #pragma once 5 | #include 6 | #include 7 | 8 | #define QUANT_VERSION "1.0.2" /* (2015/06/05) expanded api: 48-bit and 16-bit vector support; improved numerical stability 9 | #define QUANT_VERSION "1.0.1" /* (2015/06/02) improved quat precision; fixed rotation bug 10 | #define QUANT_VERSION "1.0.0" // (2015/05/29) initial revision */ 11 | 12 | // [usage] 13 | // - Quantize animations (from standard 64-bytes (matrix4x4f) to 12-bytes (uint32_t pos,rot,sca) per bone). 14 | // - Quantize colors. 15 | // - Quantize sounds. 16 | // - Quantize user input. 17 | // - Quantize network packets. 18 | // - ... 19 | // 20 | // [todo] 21 | // - Research: Integrate (and finish) curve simplification and hermite splines that I have somewhere around. 22 | // - Research: Lossless/lossy animation format proposal: 23 | // - demultiplex vertex streams to a single giant (mono) stream 24 | // - quantize whole stream `(iter - min) / ( max - min ) -> [0..1]` 25 | // - apply lossless audio (FLAC) or lossy audio codec (mp3/ogg) 26 | // - decode, upscale stream `iter * ( max - min ) + min`, and multiplex vertex 27 | // - profit (?) 28 | // 29 | // [refs] 30 | // - https://gist.github.com/rygorous/2156668 31 | // - http://zeuxcg.org/2010/12/14/quantizing-floats/ 32 | // - http://en.wikipedia.org/wiki/Fast_inverse_square_root 33 | // - http://bitsquid.blogspot.com.es/2009/11/bitsquid-low-level-animation-system.html 34 | 35 | // [api: generics, per component] 36 | namespace quant { 37 | /*/ float to 16-bit half 38 | /*/ static uint16_t encode16_half( float fl ); 39 | /*/ 16-bit half to float 40 | /*/ static float decode16_half( uint16_t half ); 41 | /*/ float [0..1] to 8-bit byte 42 | /*/ static uint8_t encode8_unorm( float x ); 43 | /*/ 8-bit byte to float [0..1] 44 | /*/ static float decode8_unorm( uint8_t x ); 45 | /*/ float [-1..1] to 8-bit byte 46 | /*/ static uint8_t encode8_snorm( float x ); 47 | /*/ 8-bit byte to float [-1..1] 48 | /*/ static float decode8_snorm( uint8_t x ); 49 | /*/ float [-1..1] to 8-bit byte (OpenGL version) 50 | /*/ static uint8_t encode8_snorm_gl2( float x ); 51 | /*/ 8-bit byte to float [-1..1] (OpenGL version) 52 | /*/ static float decode8_snorm_gl2( uint8_t x ); 53 | 54 | /*/ generic N-bits[1..16] half encoder 55 | /*/ template static uint16_t encode_half( float fl ) { return encode16_half(fl) >> (16-N); } 56 | /*/ generic N-bits[1..16] half decoder 57 | /*/ template static float decode_half( uint16_t fl ) { return decode16_half(fl << (16-N)); } 58 | 59 | /*/ generic N-bit unorm encoder (based on D3D10_UNORM) 60 | /*/ template static uint16_t encode_unorm( float x ) { return uint16_t( int (x * ((1<<(N))-1) + 0.5f) ); } 61 | /*/ generic N-bit unorm decoder (based on D3D10_UNORM) 62 | /*/ template static float decode_unorm( uint16_t x ) { return x / float((1<<(N))-1); } 63 | /*/ generic N-bit snorm encoder (based on D3D10_UNORM) 64 | /*/ template static uint16_t encode_snorm( float x ) { return (x < 0) | (encode_unorm(x < 0 ? -x : x) << 1); } 65 | /*/ generic N-bit snorm decoder (based on D3D10_UNORM) 66 | /*/ template static float decode_snorm( uint16_t x ) { return decode_unorm(x>>1) * (x & 1 ? -1:1); } 67 | } 68 | 69 | // [api: specialized, per type] 70 | namespace quant { 71 | /*/ quaternion to 32-bit integer 72 | /*/ static void encode101010_quat( uint32_t &out, float x, float y, float z, float w ); 73 | /*/ 32-bit integer to quaternion 74 | /*/ static void decode101010_quat( float &x, float &y, float &z, float &w, uint32_t in ); 75 | /*/ position or scale to 48-bit integer 76 | /*/ static void encode161616_vec( uint64_t &out, float x, float y, float z ); 77 | /*/ 48-bit integer to position or scale 78 | /*/ static void decode161616_vec( float &x, float &y, float &z, uint64_t in ); 79 | /*/ position or scale to 32-bit integer 80 | /*/ static void encode8814_vec( uint32_t &out, float x, float y, float z ); 81 | /*/ 32-bit integer to position or scale 82 | /*/ static void decode8814_vec( float &x, float &y, float &z, uint32_t in ); 83 | /*/ position or scale to 16-bit integer 84 | /*/ static void encode555_vec( uint16_t &out, float x, float y, float z ); 85 | /*/ 16-bit integer to position or scale 86 | /*/ static void decode555_vec( float &x, float &y, float &z, uint16_t in ); 87 | 88 | /*/ quaternion to 32-bit integer (struct version) 89 | /*/ template static void encode101010_quat( uint32_t &out, const T &q ); 90 | /*/ 32-bit integer to quaternion (struct version) 91 | /*/ template static void decode101010_quat( T &q, uint32_t in ); 92 | 93 | /*/ position or scale to 48-bit integer (struct version) (good compromise) 94 | /*/ template static void encode161616_vec( uint64_t &out, const vec3 &v ); 95 | /*/ 48-bit integer to position or scale (struct version) (good compromise) 96 | /*/ template static void decode161616_vec( vec3 &v, uint64_t in ); 97 | /*/ position or scale to 16-bit integer (struct version) 98 | /*/ template static void encode555_vec( uint16_t &out, const vec3 &v ); 99 | /*/ 16-bit integer to position or scale (struct version) 100 | /*/ template static void decode555_vec( vec3 &v, uint16_t in ); 101 | } 102 | 103 | // [api: specialized, scale/positions variants] 104 | namespace quant { 105 | /*/ position or scale to 32-bit integer (struct version)(best rotation, poor distance) 106 | /*/ template static void encode11118_vec( uint32_t &out, const vec3 &v ); 107 | /*/ 32-bit integer to position or scale (struct version)(best rotation, poor distance) 108 | /*/ template static void decode11118_vec( vec3 &v, uint32_t in ); 109 | /*/ position or scale to 32-bit integer (struct version)(large rotation, small distance) 110 | /*/ template static void encode101010_vec( uint32_t &out, const vec3 &v ); 111 | /*/ 32-bit integer to position or scale (struct version)(large rotation, small distance) 112 | /*/ template static void decode101010_vec( vec3 &v, uint32_t in ); 113 | /*/ position or scale to 32-bit integer (struct version)(medium orientation, medium distance) 114 | /*/ template static void encode9912_vec( uint32_t &out, const vec3 &v ); 115 | /*/ 32-bit integer to position or scale (struct version)(medium orientation, medium distance) 116 | /*/ template static void decode9912_vec( vec3 &v, uint32_t in ); 117 | /*/ position or scale to 32-bit integer (struct version)(small orientation, large distance) (good compromise) 118 | /*/ template static void encode8814_vec( uint32_t &out, const vec3 &v ); 119 | /*/ 32-bit integer to position or scale (struct version)(small orientation, large distance) (good compromise) 120 | /*/ template static void decode8814_vec( vec3 &v, uint32_t in ); 121 | /*/ position or scale to 32-bit integer (struct version)(poor orientation, best distance) 122 | /*/ template static void encode7716_vec( uint32_t &out, const vec3 &v ); 123 | /*/ 32-bit integer to position or scale (struct version)(poor orientation, best distance) 124 | /*/ template static void decode7716_vec( vec3 &v, uint32_t in ); 125 | } 126 | 127 | // [api: misc] 128 | namespace quant { 129 | /*/ helper function. reverse/inverse square root (-DQUANT_USE_STD_SQRT to use standard sqrt() instead) 130 | /*/ static float rsqrt( float number ); 131 | /*/ helper function. remap floating number in range [min1..max1] to range [min2..max2] 132 | /*/ static float remap( float x, float min1, float max1, float min2, float max2 ); 133 | } 134 | 135 | 136 | 137 | 138 | // implementation, beware of dog 139 | 140 | namespace quant { 141 | 142 | // 143 | // Helper: inverse square root (default: fast, less precision, predictable results across platforms) 144 | static float rsqrt( float number ) { 145 | #if defined(QUANT_USE_STD_SQRT) 146 | // a more precise inverse square root. 147 | // also, this provides less predictable floating results across platforms. 148 | return float(1 / sqrt(number)); 149 | #else 150 | // fast inverse square root. 151 | // [ref] http://en.wikipedia.org/wiki/Fast_inverse_square_root 152 | long i; 153 | float x2, y; 154 | const float threehalfs = 1.5F; 155 | x2 = number * 0.5F; 156 | y = number; 157 | i = * ( long * ) &y; // evil floating point bit level hacking 158 | i = 0x5f3759df - ( i >> 1 ); // what the fuck? 159 | y = * ( float * ) &i; 160 | y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration 161 | y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed 162 | y = y * ( threehalfs - ( x2 * y * y ) ); // 3rd iteration, this can be removed 163 | return y; 164 | #endif 165 | } 166 | 167 | // 168 | // Helper: remap floating number in range [min1..max1] to range [min2..max2] 169 | static float remap( float x, float min1, float max1, float min2, float max2 ) { 170 | return ( ( ( x - min1 ) / ( max1 - min1 ) ) * ( max2 - min2 ) ) + min2; 171 | } 172 | 173 | // 174 | // [src]: https://gist.github.com/rygorous/2156668 175 | union FP32 { 176 | uint32_t u; 177 | float f; 178 | struct { 179 | uint32_t Mantissa : 23; 180 | uint32_t Exponent : 8; 181 | uint32_t Sign : 1; 182 | }; 183 | }; 184 | 185 | union FP16 { 186 | uint16_t u; 187 | struct { 188 | uint32_t Mantissa : 10; 189 | uint32_t Exponent : 5; 190 | uint32_t Sign : 1; 191 | }; 192 | }; 193 | 194 | static uint16_t encode16_half( float fl ) { 195 | FP16 o = { 0 }; 196 | FP32 f; f.f = fl; 197 | // Based on ISPC reference code (with minor modifications) 198 | if (f.Exponent == 0) { 199 | // if Signed zero/denormal (which will underflow) 200 | o.Exponent = 0; 201 | } else if (f.Exponent == 255) { 202 | // if Inf or NaN (all exponent bits set) 203 | o.Exponent = 31; 204 | o.Mantissa = f.Mantissa ? 0x200 : 0; // NaN->qNaN and Inf->Inf 205 | } 206 | else { 207 | // if Normalized number 208 | // Exponent unbias the single, then bias the halfp 209 | int newexp = f.Exponent - 127 + 15; 210 | if (newexp >= 31) { 211 | // if Overflow, return signed infinity 212 | o.Exponent = 31; 213 | } else if (newexp <= 0) { 214 | // if Underflow 215 | if ((14 - newexp) <= 24) { 216 | // Mantissa might be non-zero 217 | uint32_t mant = f.Mantissa | 0x800000; // Hidden 1 bit 218 | o.Mantissa = mant >> (14 - newexp); 219 | // Check for rounding 220 | if ((mant >> (13 - newexp)) & 1) { 221 | // Round, might overflow into exp bit, but this is OK 222 | o.u++; 223 | } } 224 | } else { 225 | o.Exponent = newexp; 226 | o.Mantissa = f.Mantissa >> 13; 227 | // Check for rounding 228 | if (f.Mantissa & 0x1000) { 229 | // Round, might overflow to inf, this is OK 230 | o.u++; 231 | } } 232 | } 233 | o.Sign = f.Sign; 234 | return o.u; 235 | } 236 | 237 | static float decode16_half( uint16_t half ) { 238 | static const FP32 magic = { 113 << 23 }; 239 | static const uint32_t shifted_exp = 0x7c00 << 13; // exponent mask after shift 240 | FP32 o; 241 | FP16 h; h.u = half; 242 | 243 | // exponent/mantissa bits 244 | o.u = (h.u & 0x7fff) << 13; 245 | // just the exponent 246 | uint32_t exp = shifted_exp & o.u; 247 | // exponent adjust 248 | o.u += (127 - 15) << 23; 249 | // handle exponent special cases 250 | if (exp == shifted_exp) { 251 | // if Inf/NaN 252 | // extra exp adjust 253 | o.u += (128 - 16) << 23; 254 | } else if (exp == 0) { 255 | // if Zero/Denormal 256 | // extra exp adjust 257 | o.u += 1 << 23; 258 | // renormalize 259 | o.f -= magic.f; 260 | } 261 | // sign bit 262 | o.u |= (h.u & 0x8000) << 16; 263 | return o.f; 264 | } 265 | 266 | // 267 | // [ref] http://zeuxcg.org/2010/12/14/quantizing-floats/ 268 | // D3D10, GL2: for *_UNORM formats of n-bit length, the decode function is decode(x) = x / (2^n - 1) 269 | // Unsigned quantization: input: [0..1] float; output: [0..255] integer 270 | // D3D10: for *_SNORM formats of n-bit length the decode function is decode(x) = clamp(x / (2^(n-1) - 1), -1, 1) 271 | // Signed quantization for D3D10 rules: input: [-1..1] float; output: [-127..127] integer 272 | static uint8_t encode8_unorm(float x) { return uint8_t( int (x * 255.f + 0.5f) ); } 273 | static float decode8_unorm(uint8_t x) { return x / 255.f; } 274 | static uint8_t encode8_snorm(float x) { return uint8_t( int (x * 127.f + (x > 0 ? 0.5f : -0.5f)) ); } 275 | static float decode8_snorm(uint8_t x) { float f = x / 127.f; return f <= -1 ? -1.f : (f >= 1 ? 1.f : f); } 276 | // 277 | // [ref] http://zeuxcg.org/2010/12/14/quantizing-floats/ 278 | // OpenGL2: same decoding function for unsigned numbers, but a different one for signed: decode(x) = (2x + 1) / (2^n - 1) 279 | // Signed quantization for OpenGL rules: input: [-1..1] float; output: [-128..127] integer 280 | // Warning: This has slightly better precision (all numbers encode distinct values), but can't represent 0 exactly. 281 | static uint8_t encode8_snorm_gl2(float x) { return uint8_t( int (x * 127.5f + (x >= 0.f ? 0.f : -1.f)) ); } 282 | static float decode8_snorm_gl2(uint8_t x) { return (2*x + 1) / 255.f; } 283 | 284 | // 285 | // Quantize rotation as 10+10+10 bits for quaternion components 286 | 287 | static void encode101010_quat( uint32_t &out, float x, float y, float z, float w ) { 288 | // [ref] http://bitsquid.blogspot.com.es/2009/11/bitsquid-low-level-animation-system.html 289 | // "For quaternions we use 2 bits to store the index of the largest component, 290 | // then 10 bits each to store the value of the remaining three components. We use the knowledge 291 | // that 1 = x^2 + y^2 + z^2 + w^2 to restore the largest component, so we don't actually have to store its value. 292 | // Since we don't store the largest component we know that the remaining ones must be in the range (-1/sqrt(2), 1/sqrt(2)) 293 | // (otherwise, one of them would be largest). So we use the 10 bits to quantize a value in that range, giving us a precision of 0.0014. 294 | // The quaternions (x, y, z, w) and (-x, -y, -z, -w) represent the same rotation, so I flip the signs so that the largest component 295 | // is always positive." - Niklas Frykholm / bitsquid.se 296 | static const float rmin = -rsqrt(2), rmax = rsqrt(2); 297 | float xx = x*x, yy = y*y, zz = z*z, ww = w*w; 298 | /**/ if( xx >= yy && xx >= zz && xx >= ww ) { 299 | y = remap( y, rmin, rmax, -1, 1 ); 300 | z = remap( z, rmin, rmax, -1, 1 ); 301 | w = remap( w, rmin, rmax, -1, 1 ); 302 | out = x >= 0 ? uint32_t( (0<<30) | (encode_snorm<10>( y)<<20) | (encode_snorm<10>( z)<<10) | (encode_snorm<10>( w)<<0) ) 303 | : uint32_t( (0<<30) | (encode_snorm<10>(-y)<<20) | (encode_snorm<10>(-z)<<10) | (encode_snorm<10>(-w)<<0) ); 304 | } 305 | else if( yy >= zz && yy >= ww ) { 306 | x = remap( x, rmin, rmax, -1, 1 ); 307 | z = remap( z, rmin, rmax, -1, 1 ); 308 | w = remap( w, rmin, rmax, -1, 1 ); 309 | out = y >= 0 ? uint32_t( (1<<30) | (encode_snorm<10>( x)<<20) | (encode_snorm<10>( z)<<10) | (encode_snorm<10>( w)<<0) ) 310 | : uint32_t( (1<<30) | (encode_snorm<10>(-x)<<20) | (encode_snorm<10>(-z)<<10) | (encode_snorm<10>(-w)<<0) ); 311 | } 312 | else if( zz >= ww ) { 313 | x = remap( x, rmin, rmax, -1, 1 ); 314 | y = remap( y, rmin, rmax, -1, 1 ); 315 | w = remap( w, rmin, rmax, -1, 1 ); 316 | out = z >= 0 ? uint32_t( (2<<30) | (encode_snorm<10>( x)<<20) | (encode_snorm<10>( y)<<10) | (encode_snorm<10>( w)<<0) ) 317 | : uint32_t( (2<<30) | (encode_snorm<10>(-x)<<20) | (encode_snorm<10>(-y)<<10) | (encode_snorm<10>(-w)<<0) ); 318 | } 319 | else { 320 | x = remap( x, rmin, rmax, -1, 1 ); 321 | y = remap( y, rmin, rmax, -1, 1 ); 322 | z = remap( z, rmin, rmax, -1, 1 ); 323 | out = w >= 0 ? uint32_t( (3<<30) | (encode_snorm<10>( x)<<20) | (encode_snorm<10>( y)<<10) | (encode_snorm<10>( z)<<0) ) 324 | : uint32_t( (3<<30) | (encode_snorm<10>(-x)<<20) | (encode_snorm<10>(-y)<<10) | (encode_snorm<10>(-z)<<0) ); 325 | } 326 | } 327 | 328 | // 329 | // DeQuantize rotation as 10+10+10 bits for quaternion components 330 | 331 | static void decode101010_quat( float &x, float &y, float &z, float &w, uint32_t in ) { 332 | // [ref] http://bitsquid.blogspot.com.es/2009/11/bitsquid-low-level-animation-system.html 333 | // See encode101010_quat() function above. 334 | static const float rmin = -rsqrt(2), rmax = rsqrt(2); 335 | switch( in >> 30 ) { 336 | default: case 0: 337 | y = decode_snorm<10>( ( in >> 20 ) & 0x3FF ); 338 | z = decode_snorm<10>( ( in >> 10 ) & 0x3FF ); 339 | w = decode_snorm<10>( ( in >> 0 ) & 0x3FF ); 340 | y = remap( y, -1, 1, rmin, rmax ); 341 | z = remap( z, -1, 1, rmin, rmax ); 342 | w = remap( w, -1, 1, rmin, rmax ); 343 | x = 1 / rsqrt( 1 - y * y - z * z - w * w ); 344 | break; case 1: 345 | x = decode_snorm<10>( ( in >> 20 ) & 0x3FF ); 346 | z = decode_snorm<10>( ( in >> 10 ) & 0x3FF ); 347 | w = decode_snorm<10>( ( in >> 0 ) & 0x3FF ); 348 | x = remap( x, -1, 1, rmin, rmax ); 349 | z = remap( z, -1, 1, rmin, rmax ); 350 | w = remap( w, -1, 1, rmin, rmax ); 351 | y = 1 / rsqrt( 1 - x * x - z * z - w * w ); 352 | break; case 2: 353 | x = decode_snorm<10>( ( in >> 20 ) & 0x3FF ); 354 | y = decode_snorm<10>( ( in >> 10 ) & 0x3FF ); 355 | w = decode_snorm<10>( ( in >> 0 ) & 0x3FF ); 356 | x = remap( x, -1, 1, rmin, rmax ); 357 | y = remap( y, -1, 1, rmin, rmax ); 358 | w = remap( w, -1, 1, rmin, rmax ); 359 | z = 1 / rsqrt( 1 - x * x - y * y - w * w ); 360 | break; case 3: 361 | x = decode_snorm<10>( ( in >> 20 ) & 0x3FF ); 362 | y = decode_snorm<10>( ( in >> 10 ) & 0x3FF ); 363 | z = decode_snorm<10>( ( in >> 0 ) & 0x3FF ); 364 | x = remap( x, -1, 1, rmin, rmax ); 365 | y = remap( y, -1, 1, rmin, rmax ); 366 | z = remap( z, -1, 1, rmin, rmax ); 367 | w = 1 / rsqrt( 1 - x * x - y * y - z * z ); 368 | } 369 | } 370 | 371 | // 372 | // Quantize scale/position as X+Y bits for unit vector rotation + Z bits for vector length (note: 2 bits reserved) 373 | // Requires: each X, Y, Z in [7..16] range && X+Y+Z <= 30 374 | 375 | template 376 | static void encode_vec( uint32_t &out, float x, float y, float z ) { 377 | // Somehow similar to encode101010_quat() function above. 378 | // We decompose given vector into unit vector and length (magnitude). Then we discard the largest component, as unit vectors 379 | // follow 1 = x^2 + y^2 + z^2 expression (similar to encode101010_quat() function above). The unit vectors (x, y, z) and 380 | // (-x, -y, -z) represent the same direction, so I flip the magnitude so that the largest component is always positive. 381 | static const float rmin = -rsqrt(2), rmax = rsqrt(2); 382 | float xx = x*x, yy = y*y, zz = z*z; 383 | float len = rsqrt( xx + yy + zz ); // float len = sqrt( xx + yy + zz ); 384 | x *= len; y *= len; z *= len; // x /= len; y /= len; z /= len; 385 | /****/ if( xx >= yy && xx >= zz ) { 386 | // y = remap( y, rmin, rmax, -1, 1 ); 387 | // z = remap( z, rmin, rmax, -1, 1 ); 388 | out = ( x >= 0 ? uint32_t( (0<<30)|(encode_snorm( y)<<(Z+Y))|(encode_snorm( z)<( 1/len)) ) : // ( len) 389 | uint32_t( (0<<30)|(encode_snorm(-y)<<(Z+Y))|(encode_snorm(-z)<(-1/len)) ) ); // (-len) 390 | } else if( yy >= zz ) { 391 | // x = remap( x, rmin, rmax, -1, 1 ); 392 | // z = remap( z, rmin, rmax, -1, 1 ); 393 | out = ( y >= 0 ? uint32_t( (1<<30)|(encode_snorm( x)<<(Z+Y))|(encode_snorm( z)<( 1/len)) ) : // ( len) 394 | uint32_t( (1<<30)|(encode_snorm(-x)<<(Z+Y))|(encode_snorm(-z)<(-1/len)) ) ); // (-len) 395 | } else { 396 | // x = remap( x, rmin, rmax, -1, 1 ); 397 | // y = remap( y, rmin, rmax, -1, 1 ); 398 | out = ( z >= 0 ? uint32_t( (2<<30)|(encode_snorm( x)<<(Z+Y))|(encode_snorm( y)<( 1/len)) ) : // ( len) 399 | uint32_t( (2<<30)|(encode_snorm(-x)<<(Z+Y))|(encode_snorm(-y)<(-1/len)) ) ); // (-len) 400 | } 401 | } 402 | 403 | // 404 | // DeQuantize scale/position as X+Y bits for unit vector rotation + Z bits for vector length (note: 2 bits reserved) 405 | // Requires: each X, Y, Z in [7..16] range && X+Y+Z <= 30 406 | 407 | template 408 | static void decode_vec( float &x, float &y, float &z, uint32_t in ) { 409 | // See encode_vec() function above. 410 | static const float rmin = -rsqrt(2), rmax = rsqrt(2); 411 | switch( in >> 30 ) { 412 | default: case 0: 413 | y = decode_snorm( ( in >> (Z+Y) ) & ((1<( ( in >> Z ) & ((1<( ( in >> (Z+Y) ) & ((1<( ( in >> Z ) & ((1<( ( in >> (Z+Y) ) & ((1<( ( in >> Z ) & ((1<( uint16_t(in & ((1<(x)) << 32; 438 | out |= ((uint64_t)encode_half<16>(y)) << 16; 439 | out |= ((uint64_t)encode_half<16>(z)) << 0; 440 | } 441 | // 48-bit integer to position or scale 442 | static void decode161616_vec( float &x, float &y, float &z, uint64_t in ) { 443 | x = decode_half<16>((in >> 32) & 0xffff); 444 | y = decode_half<16>((in >> 16) & 0xffff); 445 | z = decode_half<16>((in >> 0) & 0xffff); 446 | } 447 | // position or scale to 48-bit integer (struct version) 448 | template static void encode161616_vec( uint64_t &out, const vec3 &v ) { encode161616_vec( out, v.x, v.y, v.z ); } 449 | // 48-bit integer to position or scale (struct version) 450 | template static void decode161616_vec( vec3 &v, uint64_t in ) { decode161616_vec( v.x, v.y, v.z, in ); } 451 | 452 | 453 | // in-place class sugars 454 | 455 | template 456 | static void encode101010_quat( uint32_t &out, const quat &q ) { 457 | encode101010_quat( out, q.x, q.y, q.z, q.w ); 458 | } 459 | template 460 | static void decode101010_quat( quat &q, uint32_t in ) { 461 | decode101010_quat( q.x, q.y, q.z, q.w, in ); 462 | } 463 | 464 | 465 | // position or scale to 16-bit integer (struct version) 466 | static void encode555_vec( uint16_t &out, float x, float y, float z ) { 467 | uint16_t x5 = encode_snorm<5>( x ); 468 | uint16_t y5 = encode_snorm<5>( y ); 469 | uint16_t z5 = encode_snorm<5>( z ); 470 | out = (x5 << 10) | (y5 << 5) | (z5 << 0); 471 | } 472 | // 16-bit integer to position or scale (struct version) 473 | static void decode555_vec( float &x, float &y, float &z, uint16_t in ) { 474 | x = decode_snorm<5>( (in>>10) & 0x1f ); 475 | y = decode_snorm<5>( (in>> 5) & 0x1f ); 476 | z = decode_snorm<5>( (in>> 0) & 0x1f ); 477 | } 478 | // position or scale to 16-bit integer (struct version) 479 | template static void encode555_vec( uint16_t &out, const vec3 &v ) { 480 | encode555_vec( out, v.x, v.y, v.z ); 481 | } 482 | // 16-bit integer to position or scale (struct version) 483 | template static void decode555_vec( vec3 &v, uint16_t in ) { 484 | decode555_vec( v.x, v.y, v.z, in ); 485 | } 486 | 487 | 488 | 489 | 490 | template 491 | static void encode_vec( uint32_t &out, const vec3 &v ) { 492 | encode_vec( out, v.x, v.y, v.z ); 493 | } 494 | template 495 | static void decode_vec( vec3 &v, uint32_t in ) { 496 | decode_vec( v.x, v.y, v.z, in ); 497 | } 498 | 499 | template 500 | static void encode7716_vec( uint32_t &out, const vec3 &v ) { 501 | encode_vec<7,7,16>( out, v.x, v.y, v.z ); 502 | } 503 | template 504 | static void decode7716_vec( vec3 &v, uint32_t in ) { 505 | decode_vec<7,7,16>( v.x, v.y, v.z, in ); 506 | } 507 | 508 | template 509 | static void encode8814_vec( uint32_t &out, const vec3 &v ) { 510 | encode_vec<8,8,14>( out, v.x, v.y, v.z ); 511 | } 512 | template 513 | static void decode8814_vec( vec3 &v, uint32_t in ) { 514 | decode_vec<8,8,14>( v.x, v.y, v.z, in ); 515 | } 516 | 517 | template 518 | static void encode9912_vec( uint32_t &out, const vec3 &v ) { 519 | encode_vec<9,9,12>( out, v.x, v.y, v.z ); 520 | } 521 | template 522 | static void decode9912_vec( vec3 &v, uint32_t in ) { 523 | decode_vec<9,9,12>( v.x, v.y, v.z, in ); 524 | } 525 | 526 | template 527 | static void encode101010_vec( uint32_t &out, const vec3 &v ) { 528 | encode_vec<10,10,10>( out, v.x, v.y, v.z ); 529 | } 530 | template 531 | static void decode101010_vec( vec3 &v, uint32_t in ) { 532 | decode_vec<10,10,10>( v.x, v.y, v.z, in ); 533 | } 534 | 535 | template 536 | static void encode11118_vec( uint32_t &out, const vec3 &v ) { 537 | encode_vec<11,11,8>( out, v.x, v.y, v.z ); 538 | } 539 | template 540 | static void decode11118_vec( vec3 &v, uint32_t in ) { 541 | decode_vec<11,11,8>( v.x, v.y, v.z, in ); 542 | } 543 | 544 | } 545 | 546 | // } quant.hpp 547 | 548 | #ifdef QUANT_TESTS 549 | 550 | #include 551 | #include 552 | 553 | float acc[3] = {}; 554 | float hit[3] = {}; 555 | 556 | void verify( float x ) { 557 | using namespace quant; 558 | const int y1 = encode8_unorm(x); const float x1 = decode8_unorm(y1); 559 | const int y2 = encode8_snorm(x); const float x2 = decode8_snorm(y2); 560 | const int y3 = encode8_snorm_gl2(x); const float x3 = decode8_snorm_gl2(y3); 561 | 562 | float err; 563 | err = std::abs(x - x1); acc[0] += err; hit[0]++; std::cout << "v1 " << x1 << " vs " << x << " (error: " << err << ")" << std::endl; 564 | err = std::abs(x - x2); acc[1] += err; hit[1]++; std::cout << "v2 " << x2 << " vs " << x << " (error: " << err << ")" << std::endl; 565 | err = std::abs(x - x3); acc[2] += err; hit[2]++; std::cout << "v3 " << x3 << " vs " << x << " (error: " << err << ")" << std::endl; 566 | } 567 | 568 | // run 'sample | sort' 569 | int main() { 570 | using namespace quant; 571 | for( short x = 0, e = 1000; x <= e; ++x ) { 572 | verify( x * 0.001f ); 573 | } 574 | std::cout << "average error (lower is better)" << std::endl; 575 | std::cout << "avg-1:" << (acc[0] / hit[0]) << std::endl; 576 | std::cout << "avg-2:" << (acc[1] / hit[1]) << std::endl; 577 | std::cout << "avg-3:" << (acc[2] / hit[2]) << std::endl; 578 | 579 | std::cout << decode16_half( encode16_half(3.14159f) ) << std::endl; 580 | } 581 | 582 | #endif 583 | -------------------------------------------------------------------------------- /sample.cc: -------------------------------------------------------------------------------- 1 | #define QUANT_TESTS 2 | #include "quant.hpp" 3 | --------------------------------------------------------------------------------