├── LICENSE
├── README.md
├── abs_fp64.shader_test
├── add_fp64.shader_test
├── div_fp64.shader_test
├── eq_fp64.shader_test
├── fp32-to-fp64-conversion.shader_test
├── fp64-to-fp32-conversion.shader_test
├── le_fp64.shader_test
├── lt_fp64.shader_test
├── mul_fp64.shader_test
└── neg_fp64.shader_test


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Elie Tournier
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | # libSoftFloat
 3 | ---
 4 | 
 5 | 
 6 | ## About
 7 | 
 8 | Implementation of a library of double precision operations in pure GLSL 1.30 for GPU using bit twiddling operations and integer math.
 9 | 
10 | This library is the translation of SoftFloat by John R. Hauser in GLSL in order to address GPUs instead of CPUs.
11 | 
12 | 
13 | ## Goals
14 | 
15 | * The first goal is to be able to compute FP64 on GPUs without FP64 hardware support.
16 | 
17 | * A streach goal of this project is to integrate it into [Mesa](https://www.mesa3d.org) and create GL\_ARB\_gpu\_shader\_fp64.
18 | 
19 | For more details, see [my presentation](https://github.com/Hopetech/XDC2016) at XDC'16, [my website](https://hopetech.github.io/) or simply [email me](#author).
20 | 
21 | 
22 | ## GSoC
23 | 
24 | This project began with the [Google Summer of Code](https://developers.google.com/open-source/gsoc/) 2016.
25 | 
26 | 
27 | ## Author
28 | 
29 | Elie Tournier
30 | 
31 | tournier.elie@gmail.com
32 | 
33 | https://github.com/Hopetech
34 | 


--------------------------------------------------------------------------------
/abs_fp64.shader_test:
--------------------------------------------------------------------------------
 1 | #  Absolute value of a double
 2 | # IEEE 754 compliant
 3 | 
 4 | [require]
 5 | GLSL >= 1.30
 6 | 
 7 | [vertex shader]
 8 | #version 130
 9 | 
10 | void main()
11 | {
12 |     gl_Position = gl_Vertex;
13 | }
14 | 
15 | [fragment shader]
16 | #version 130
17 | 
18 | /* Absolute value of a Float64 :
19 |  * Clear the sign bit
20 |  */
21 | uvec2
22 | abs_fp64( uvec2 a )
23 | {
24 |     a.x &= 0x7FFFFFFFu;
25 |     return a;
26 | }
27 | 
28 | uniform uvec2 a;
29 | uniform uvec2 expected;
30 | 
31 | void main()
32 | {
33 |     /* Generate green if the expected value is produced, red
34 |      * otherwise. 
35 |      */
36 |     gl_FragColor = abs_fp64(a) == expected
37 |         ? vec4(0.0, 1.0, 0.0, 1.0)
38 |         : vec4(1.0, 0.0, 0.0, 1.0);
39 | }
40 | 
41 | [test]
42 | # A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
43 | # 'draw rect' line draws a rectangle that covers the whole window.
44 | # The 'probe all' line verifies that every pixel contains the expected
45 | # color.
46 | 
47 | # Try +0.0
48 | uniform uvec2 a        0x00000000 0x00000000
49 | uniform uvec2 expected 0x00000000 0x00000000
50 | draw rect -1 -1 2 2
51 | probe all rgba 0.0 1.0 0.0 1.0
52 | 
53 | # Try -0.0
54 | uniform uvec2 a        0x80000000 0x00000000
55 | uniform uvec2 expected 0x00000000 0x00000000
56 | draw rect -1 -1 2 2
57 | probe all rgba 0.0 1.0 0.0 1.0
58 | 


--------------------------------------------------------------------------------
/add_fp64.shader_test:
--------------------------------------------------------------------------------
  1 | # Add two double 'a' and 'b' 
  2 | # IEEE 754 compliant
  3 | 
  4 | [require]
  5 | GLSL >= 1.30
  6 | 
  7 | [vertex shader]
  8 | #version 130
  9 | 
 10 | void main()
 11 | {
 12 |     gl_Position = gl_Vertex;
 13 | }
 14 | 
 15 | [fragment shader]
 16 | #version 130
 17 | 
 18 | /* Software IEEE floating-point rounding mode. */
 19 | uint float_rounding_mode;
 20 | const uint float_round_nearest_even = 0u;
 21 | const uint float_round_to_zero      = 1u;
 22 | const uint float_round_down         = 2u;
 23 | const uint float_round_up           = 3u;
 24 | 
 25 | /* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
 26 |  * value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
 27 |  * any carry out is lost.  The result is broken into two 32-bit pieces which
 28 |  * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 29 |  */
 30 | void
 31 | add64( uint a0, uint a1, uint b0, uint b1,
 32 |        inout uint z0Ptr,
 33 |        inout uint z1Ptr )
 34 | {
 35 |     uint z1;
 36 | 
 37 |     z1 = a1 + b1;
 38 |     z1Ptr = z1;
 39 |     z0Ptr = a0 + b0 + uint ( z1 < a1 );
 40 | }
 41 | 
 42 | 
 43 | /* Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
 44 |  * 64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
 45 |  * 2^64, so any borrow out (carry out) is lost.  The result is broken into two
 46 |  * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
 47 |  * `z1Ptr'.
 48 |  */
 49 | void
 50 | sub64( uint a0, uint a1, uint b0, uint b1,
 51 |        inout uint z0Ptr,
 52 |        inout uint z1Ptr )
 53 | {
 54 |     z1Ptr = a1 - b1;
 55 |     z0Ptr = a0 - b0 - uint ( a1 < b1 );
 56 | }
 57 | 
 58 | /* Shifts the 64-bit value formed by concatenating `a.x' and `a.y' right by the
 59 |  * number of bits given in `count'.  If any nonzero bits are shifted off, they
 60 |  * are "jammed" into the least significant bit of the result by setting the
 61 |  * least significant bit to 1.  The value of `count' can be arbitrarily large;
 62 |  * in particular, if `count' is greater than 64, the result will be either 0
 63 |  * or 1, depending on whether the concatenation of `a.x' and `a.y' is zero or
 64 |  * nonzero.  The result is broken into two 32-bit pieces which are stored at
 65 |  * the locations pointed to by `z0Ptr' and `z1Ptr'.
 66 |  */
 67 | void
 68 | shift64RightJamming( uvec2 a,
 69 |                      int count,
 70 |                      inout uint z0Ptr,
 71 |                      inout uint z1Ptr )
 72 | {
 73 |     uint z0;
 74 |     uint z1;
 75 |     int negCount = ( - count ) & 31;
 76 | 
 77 |     if ( count == 0 ) {
 78 |         z1 = a.y;
 79 |         z0 = a.x;
 80 |     } else if ( count < 32 ) {
 81 |         z1 = ( a.x<<negCount ) |
 82 |                 ( a.y>>count ) |
 83 |                 uint ( ( a.y<<negCount ) != 0u );
 84 |         z0 = a.x>>count;
 85 |     } else {
 86 |         if ( count == 32 ) {
 87 |             z1 = a.x | uint ( a.y != 0u );
 88 |         } else if ( count < 64 ) {
 89 |             z1 = ( a.x>>( count & 31 ) ) |
 90 |                 uint ( ( ( a.x<<negCount ) | a.y ) != 0u );
 91 |         } else {
 92 |             z1 = uint ( ( a.x | a.y ) != 0u );
 93 |         }
 94 |         z0 = 0u;
 95 |     }
 96 |     z1Ptr = z1;
 97 |     z0Ptr = z0;
 98 | }
 99 | 
100 | /* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
101 |  * by 32 _plus_ the number of bits given in `count'.  The shifted result is
102 |  * at most 64 nonzero bits; these are broken into two 32-bit pieces which are
103 |  * stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
104 |  * off form a third 32-bit result as follows:  The _last_ bit shifted off is
105 |  * the most-significant bit of the extra result, and the other 31 bits of the
106 |  * extra result are all zero if and only if _all_but_the_last_ bits shifted off
107 |  * were all zero.  This extra result is stored in the location pointed to by
108 |  * `z2Ptr'.  The value of `count' can be arbitrarily large.
109 |  *     (This routine makes more sense if `a0', `a1', and `a2' are considered
110 |  * to form a fixed-point value with binary point between `a1' and `a2'.  This
111 |  * fixed-point value is shifted right by the number of bits given in `count',
112 |  * and the integer part of the result is returned at the locations pointed to
113 |  * by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
114 |  * corrupted as described above, and is returned at the location pointed to by
115 |  * `z2Ptr'.)
116 |  */
117 | void
118 | shift64ExtraRightJamming( uint a0, uint a1, uint a2,
119 |                           int count,
120 |                           inout uint z0Ptr,
121 |                           inout uint z1Ptr,
122 |                           inout uint z2Ptr )
123 | {
124 |     uint z0;
125 |     uint z1;
126 |     uint z2;
127 |     int negCount = ( - count ) & 31;
128 | 
129 |     if ( count == 0 ) {
130 |         z2 = a2;
131 |         z1 = a1;
132 |         z0 = a0;
133 |     } else {
134 |         if ( count < 32 ) {
135 |             z2 = a1<<negCount;
136 |             z1 = ( a0<<negCount ) | ( a1>>count );
137 |             z0 = a0>>count;
138 |         } else {
139 |             if ( count == 32 ) {
140 |                 z2 = a1;
141 |                 z1 = a0;
142 |             } else {
143 |                 a2 |= a1;
144 |                 if ( count < 64 ) {
145 |                     z2 = a0<<negCount;
146 |                     z1 = a0>>( count & 31 );
147 |                 } else {
148 |                     z2 = ( count == 64 ) ? a0 : uint ( a0 != 0u );
149 |                     z1 = 0u;
150 |                 }
151 |             }
152 |             z0 = 0u;
153 |         }
154 |         z2 |= uint ( a2 != 0u );
155 |     }
156 |     z2Ptr = z2;
157 |     z1Ptr = z1;
158 |     z0Ptr = z0;
159 | }
160 | 
161 | /* Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
162 |  * equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
163 |  * returns 0.
164 |  */
165 | bool
166 | eq64( uint a0, uint a1, uint b0, uint b1 )
167 | {
168 |     return ( a0 == b0 ) && ( a1 == b1 );
169 | }
170 | 
171 | /* Packs the sign `zSign', the exponent `zExp', and the significand formed by
172 |  * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating-
173 |  * point value, returning the result.  After being shifted into the proper
174 |  * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added
175 |  * together to form the most significant 32 bits of the result.  This means
176 |  * that any integer portion of `zFrac0' will be added into the exponent.  Since
177 |  * a properly normalized significand will have an integer portion equal to 1,
178 |  * the `zExp' input should be 1 less than the desired result exponent whenever
179 |  * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand.
180 |  */
181 | uvec2
182 | packFloat64( uint zSign, uint zExp, uint zFrac0, uint zFrac1 )
183 | {
184 |     uvec2 z;
185 | 
186 |     z.x = ( zSign<<31 ) + ( zExp<<20 ) + zFrac0;
187 |     z.y = zFrac1;
188 |     return z;
189 | }
190 | 
191 | /* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
192 |  * and extended significand formed by the concatenation of `zFrac0', `zFrac1',
193 |  * and `zFrac2', and returns the proper double-precision floating-point value
194 |  * corresponding to the abstract input.  Ordinarily, the abstract value is
195 |  * simply rounded and packed into the double-precision format, with the inexact
196 |  * exception raised if the abstract input cannot be represented exactly.
197 |  * However, if the abstract value is too large, the overflow and inexact
198 |  * exceptions are raised and an infinity or maximal finite value is returned.
199 |  * If the abstract value is too small, the input value is rounded to a
200 |  * subnormal number, and the underflow and inexact exceptions are raised if the
201 |  * abstract input cannot be represented exactly as a subnormal double-precision
202 |  * floating-point number.
203 |  *     The input significand must be normalized or smaller.  If the input
204 |  * significand is not normalized, `zExp' must be 0; in that case, the result
205 |  * returned is a subnormal number, and it must not require rounding.  In the
206 |  * usual case that the input significand is normalized, `zExp' must be 1 less
207 |  * than the "true" floating-point exponent.  The handling of underflow and
208 |  * overflow follows the IEEE Standard for Floating-Point Arithmetic.
209 |  */
210 | uvec2
211 | roundAndPackFloat64( uint zSign,
212 |                      uint zExp,
213 |                      uint zFrac0,
214 |                      uint zFrac1,
215 |                      uint zFrac2 )
216 | {
217 |     uint roundingMode;
218 |     uint roundNearestEven;
219 |     uint increment;
220 | 
221 |     roundingMode = float_rounding_mode;
222 |     roundNearestEven = uint ( roundingMode == float_round_nearest_even );
223 |     increment = uint ( zFrac2 < 0u );
224 |     if ( roundNearestEven == 0u ) {
225 |         if ( roundingMode == float_round_to_zero ) {
226 |             increment = 0u;
227 |         } else {
228 |             if ( zSign != 0u ) {
229 |                 increment = uint ( ( roundingMode == float_round_down ) &&
230 |                         ( zFrac2 != 0u ) );
231 |             } else {
232 |                 increment = uint ( ( roundingMode == float_round_up ) &&
233 |                         ( zFrac2 != 0u ) );
234 |             }
235 |         }
236 |     }
237 |     if ( 0x7FDu <= zExp ) {
238 |         if ( ( 0x7FDu < zExp ) ||
239 |             ( ( zExp == 0x7FDu ) &&
240 |                 eq64( 0x001FFFFFu, 0xFFFFFFFFu, zFrac0, zFrac1 ) &&
241 |                    ( increment != 0u ) ) ) {
242 |             if ( ( roundingMode == float_round_to_zero ) ||
243 |                 ( ( zSign != 0u ) && ( roundingMode == float_round_up ) ) ||
244 |                     ( ( zSign == 0u ) &&
245 |                         ( roundingMode == float_round_down ) ) ) {
246 |                 return packFloat64( zSign, 0x7FEu, 0x000FFFFFu, 0xFFFFFFFFu );
247 |             }
248 |             return packFloat64( zSign, 0x7FFu, 0u, 0u );
249 |         }
250 |         if ( zExp < 0u ) {
251 |             shift64ExtraRightJamming(
252 |                 zFrac0, zFrac1, zFrac2, int (-zExp), zFrac0, zFrac1, zFrac2 );
253 |             zExp = 0u;
254 |             if ( roundNearestEven != 0u ) {
255 |                 increment = uint ( zFrac2 < 0u );
256 |             } else {
257 |                 if ( zSign != 0u ) {
258 |                     increment = uint ( ( roundingMode == float_round_down ) &&
259 |                             ( zFrac2 != 0u ) );
260 |                 } else {
261 |                     increment = uint ( ( roundingMode == float_round_up ) &&
262 |                             ( zFrac2 != 0u ) );
263 |                 }
264 |             }
265 |         }
266 |     }
267 |     if ( increment != 0u ) {
268 |         add64( zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1 );
269 |         zFrac1 &= ~ ( uint ( zFrac2 + zFrac2 == 0u ) & roundNearestEven );
270 |     } else {
271 |         if ( ( zFrac0 | zFrac1 ) == 0u )
272 |             zExp = 0u;
273 |     }
274 |     return packFloat64( zSign, zExp, zFrac0, zFrac1 );
275 | }
276 | 
277 | /* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
278 |  * number of bits given in `count'.  Any bits shifted off are lost.  The value
279 |  * of `count' must be less than 32.  The result is broken into two 32-bit
280 |  * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
281 |  */
282 | void shortShift64Left( uint a0, uint a1,
283 |                        int count,
284 |                        inout uint z0Ptr,
285 |                        inout uint z1Ptr )
286 | {
287 |     z1Ptr = a1<<count;
288 |     z0Ptr =
289 |         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
290 | }
291 | 
292 | /* Returns the number of leading 0 bits before the most-significant 1 bit of
293 |  * `a'.  If `a' is zero, 32 is returned.
294 |  */
295 | uint
296 | countLeadingZeros32( uint a )
297 | {
298 |     if ( a == 0u )
299 |         return 32u;
300 | 
301 |     uint shiftCount = 0u;
302 |     if ( ( a & 0xFFFF0000u ) == 0u ) { shiftCount += 16u; a <<= 16; }
303 |     if ( ( a & 0xFF000000u ) == 0u ) { shiftCount += 8u; a <<= 8; }
304 |     if ( ( a & 0xF0000000u ) == 0u ) { shiftCount += 4u; a <<= 4; }
305 |     if ( ( a & 0xC0000000u ) == 0u ) { shiftCount += 2u; a <<= 2; }
306 |     if ( ( a & 0x80000000u ) == 0u ) { shiftCount += 1u; }
307 |     return shiftCount;
308 | }
309 | 
310 | /* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
311 |  * and significand formed by the concatenation of `zSig0' and `zSig1', and
312 |  * returns the proper double-precision floating-point value corresponding
313 |  * to the abstract input.  This routine is just like `roundAndPackFloat64'
314 |  * except that the input significand has fewer bits and does not have to be
315 |  * normalized.  In all cases, `zExp' must be 1 less than the "true" floating-
316 |  * point exponent.
317 |  */
318 | 
319 | uvec2
320 | normalizeRoundAndPackFloat64( uint zSign,
321 |                               uint zExp,
322 |                               uint zFrac0,
323 |                               uint zFrac1 )
324 | {
325 |     int shiftCount;
326 |     uint zFrac2;
327 | 
328 |     if ( zFrac0 == 0u ) {
329 |         zFrac0 = zFrac1;
330 |         zFrac1 = 0u;
331 |         zExp -= 32u;
332 |     }
333 |     shiftCount = int ( countLeadingZeros32( zFrac0 ) ) - 11;
334 |     if ( 0 <= shiftCount ) {
335 |         zFrac2 = 0u;
336 |         shortShift64Left( zFrac0, zFrac1, shiftCount, zFrac0, zFrac1 );
337 |     } else {
338 |         shift64ExtraRightJamming(
339 |             zFrac0, zFrac1, 0u, - shiftCount, zFrac0, zFrac1, zFrac2 );
340 |     }
341 |     zExp -= uint ( shiftCount );
342 |     return roundAndPackFloat64( zSign, zExp, zFrac0, zFrac1, zFrac2 );
343 | }
344 | 
345 | /* Returns 1 if the double-precision floating-point value `a' is a NaN;
346 |  * otherwise returns 0.
347 |  */
348 | bool
349 | float64_is_nan( uvec2 a )
350 | {
351 |     return ( 0xFFE00000u <= ( a.y<<1 ) ) &&
352 |         ( ( a.x != 0u ) || ( ( a.y & 0x000FFFFFu ) != 0u ) );
353 | }
354 | 
355 | /* Returns 1 if the double-precision floating-point value `a' is a signaling
356 |  * NaN; otherwise returns 0.
357 |  */
358 | bool
359 | float64_is_signaling_nan( uvec2 a )
360 | {
361 |     return ( ( ( a.y>>19 ) & 0xFFFu ) == 0xFFEu ) &&
362 |         ( ( a.x != 0u ) || ( ( a.y & 0x0007FFFFu ) != 0u ) );
363 | }
364 | 
365 | /* Takes two double-precision floating-point values `a' and `b', one of which
366 |  * is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is
367 |  * a signaling NaN, the invalid exception is raised.
368 |  */
369 | uvec2
370 | propagateFloat64NaN( uvec2 a, uvec2 b )
371 | {
372 |     bool aIsNaN;
373 |     bool aIsSignalingNaN;
374 |     bool bIsNaN;
375 |     bool bIsSignalingNaN;
376 | 
377 |     aIsNaN = float64_is_nan( a );
378 |     aIsSignalingNaN = float64_is_signaling_nan( a );
379 |     bIsNaN = float64_is_nan( b );
380 |     bIsSignalingNaN = float64_is_signaling_nan( b );
381 |     a.y |= 0x00080000u;
382 |     b.y |= 0x00080000u;
383 |     if ( aIsNaN ) {
384 |         return ( aIsSignalingNaN && bIsNaN ) ? b : a;
385 |     } else {
386 |         return b;
387 |     }
388 | }
389 | 
390 | /* Returns the fraction bits of the double-precision floating-point value `a'.*/
391 | uvec2
392 | extractFloat64Frac( uvec2 a )
393 | {
394 |     return uvec2( a.x & 0x000FFFFFu, a.y );
395 | }
396 | 
397 | /* Returns the exponent bits of the double-precision floating-point value `a'.*/
398 | uint
399 | extractFloat64Exp( uvec2 a )
400 | {
401 |     return (a.x>>20) & 0x7FFu;
402 | }
403 | 
404 | /* Returns the sign bit of the double-precision floating-point value `a'.*/
405 | uint
406 | extractFloat64Sign( uvec2 a )
407 | {
408 |     return (a.x>>31);
409 | }
410 | 
411 | /* Returns the result of adding the absolute values of the double-precision
412 |  * floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
413 |  * before being returned.  `zSign' is ignored if the result is a NaN.  The
414 |  * addition is performed according to the IEEE Standard for Floating-Point
415 |  * Arithmetic.
416 |  */
417 | uvec2
418 | addFloat64Fracs( uvec2 a, uvec2 b, uint zSign )
419 | {
420 |     uvec2 aFrac;
421 |     uvec2 bFrac;
422 |     uint aExp;
423 |     uint bExp;
424 |     uint zExp;
425 |     uint zFrac0;
426 |     uint zFrac1;
427 |     uint zFrac2;
428 |     int expDiff;
429 | 
430 |     aFrac = extractFloat64Frac( a );
431 |     aExp = extractFloat64Exp( a );
432 |     bFrac = extractFloat64Frac( b );
433 |     bExp = extractFloat64Exp( b );
434 |     expDiff = int ( aExp ) - int ( bExp );
435 |     if ( 0 < expDiff ) {
436 |         if ( aExp == 0x7FFu ) {
437 |             if ( ( aFrac.x | aFrac.y ) != 0u ) {
438 |                 return propagateFloat64NaN( a, b );
439 |             }
440 |             return a;
441 |         }
442 |         if ( bExp == 0u ) {
443 |             --expDiff;
444 |         } else {
445 |             bFrac.x |= 0x00100000u;
446 |         }
447 |         shift64ExtraRightJamming(
448 |             bFrac.x, bFrac.y, 0u, expDiff, bFrac.x, bFrac.y, zFrac2 );
449 |         zExp = aExp;
450 |     } else if ( expDiff < 0 ) {
451 |         if ( bExp == 0x7FFu ) {
452 |             if ( ( bFrac.x | bFrac.y ) != 0u ) {
453 |                 return propagateFloat64NaN( a, b );
454 |             }
455 |             return packFloat64( zSign, 0x7FFu, 0u, 0u );
456 |         }
457 |         if ( aExp == 0u ) {
458 |             ++expDiff;
459 |         } else {
460 |             aFrac.x |= 0x00100000u;
461 |         }
462 |         shift64ExtraRightJamming(
463 |             aFrac.x, aFrac.y, 0u, - expDiff, aFrac.x, aFrac.y, zFrac2 );
464 |         zExp = bExp;
465 |     } else {
466 |         if ( aExp == 0x7FFu ) {
467 |             if ( ( aFrac.x | aFrac.y | bFrac.x | bFrac.y ) != 0u ) {
468 |                 return propagateFloat64NaN( a, b );
469 |             }
470 |             return a;
471 |         }
472 |         add64( aFrac.x, aFrac.y, bFrac.x, bFrac.y, zFrac0, zFrac1 );
473 |         if ( aExp == 0u ) {
474 |             return packFloat64( zSign, 0u, zFrac0, zFrac1 );
475 |         }
476 |         zFrac2 = 0u;
477 |         zFrac0 |= 0x00200000u;
478 |         zExp = aExp;
479 |         shift64ExtraRightJamming(
480 |             zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2 );
481 |         return roundAndPackFloat64( zSign, zExp, zFrac0, zFrac1, zFrac2 );
482 |     }
483 |     aFrac.x |= 0x00100000u;
484 |     add64( aFrac.x, aFrac.y, bFrac.x, bFrac.y, zFrac0, zFrac1 );
485 |     --zExp;
486 |     if ( zFrac0 < 0x00200000u ) {
487 |         return roundAndPackFloat64( zSign, zExp, zFrac0, zFrac1, zFrac2 );
488 |     }
489 |     ++zExp;
490 |     shift64ExtraRightJamming(
491 |         zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2 );
492 |     return roundAndPackFloat64( zSign, zExp, zFrac0, zFrac1, zFrac2 );
493 | }
494 | 
495 | /* Returns the result of subtracting the absolute values of the double-
496 |  * precision floating-point values `a' and `b'.  If `zSign' is 1, the
497 |  * difference is negated before being returned.  `zSign' is ignored if the
498 |  * result is a NaN.  The subtraction is performed according to the IEEE
499 |  * Standard for Floating-Point Arithmetic.
500 |  */
501 | uvec2
502 | subFloat64Fracs( uvec2 a, uvec2 b, uint zSign )
503 | {
504 |     uvec2 aFrac;
505 |     uvec2 bFrac;
506 |     uvec2 z;
507 |     uint aExp;
508 |     uint bExp;
509 |     uint zExp;
510 |     uint zFrac0;
511 |     uint zFrac1;
512 |     int expDiff;
513 | 
514 |     aFrac = extractFloat64Frac( a );
515 |     aExp = extractFloat64Exp( a );
516 |     bFrac = extractFloat64Frac( b );
517 |     bExp = extractFloat64Exp( b );
518 |     expDiff = int ( aExp ) - int ( bExp );
519 |     shortShift64Left( aFrac.x, aFrac.y, 10, aFrac.x, aFrac.y );
520 |     shortShift64Left( bFrac.x, bFrac.y, 10, bFrac.x, bFrac.y );
521 |     if ( 0 < expDiff ) {
522 |         if ( aExp == 0x7FFu ) {
523 |             if ( ( aFrac.x | aFrac.y ) != 0u ) {
524 |                 return propagateFloat64NaN( a, b );
525 |             }
526 |         return a;
527 |         }
528 |         if ( bExp == 0u ) {
529 |             --expDiff;
530 |         } else {
531 |             bFrac.x |= 0x40000000u;
532 |         }
533 |         shift64RightJamming( bFrac, expDiff, bFrac.x, bFrac.y );
534 |         aFrac.x |= 0x40000000u;
535 |         sub64( aFrac.x, aFrac.y, bFrac.x, bFrac.y, zFrac0, zFrac1 );
536 |         zExp = aExp;
537 |         --zExp;
538 |         return normalizeRoundAndPackFloat64(
539 |             zSign, zExp - 10u, zFrac0, zFrac1 );
540 |     }
541 |     if ( expDiff < 0 ) {
542 |         if ( bExp == 0x7FFu ) {
543 |             if ( ( bFrac.x | bFrac.y ) != 0u ) {
544 |                 return propagateFloat64NaN( a, b );
545 |             }
546 |             return packFloat64( zSign ^ 1u, 0x7FFu, 0u, 0u );
547 |         }
548 |         if ( aExp == 0u ) {
549 |             ++expDiff;
550 |         } else {
551 |             aFrac.x |= 0x40000000u;
552 |         }
553 |         shift64RightJamming( aFrac, - expDiff, aFrac.x, aFrac.y );
554 |         bFrac.x |= 0x40000000u;
555 |         sub64( bFrac.x, bFrac.y, aFrac.x, aFrac.y, zFrac0, zFrac1 );
556 |         zExp = bExp;
557 |         zSign ^= 1u;
558 |         --zExp;
559 |         return normalizeRoundAndPackFloat64(
560 |             zSign, zExp - 10u, zFrac0, zFrac1 );
561 |     }
562 |     if ( aExp == 0x7FFu ) {
563 |         if ( ( aFrac.x | aFrac.y | bFrac.x | bFrac.y ) != 0u ) {
564 |             return propagateFloat64NaN( a, b );
565 |         }
566 |         return uvec2( 0xFFFFFFFFu, 0xFFFFFFFFu );
567 |     }
568 |     if ( aExp == 0u ) {
569 |         aExp = 1u;
570 |         bExp = 1u;
571 |     }
572 |     if ( bFrac.x < aFrac.x ) {
573 |         sub64( aFrac.x, aFrac.y, bFrac.x, bFrac.y, zFrac0, zFrac1 );
574 |         zExp = aExp;
575 |         --zExp;
576 |         return normalizeRoundAndPackFloat64(
577 |             zSign, zExp - 10u, zFrac0, zFrac1 );
578 |     }
579 |     if ( aFrac.x < bFrac.x ) {
580 |     sub64( bFrac.x, bFrac.y, aFrac.x, aFrac.y, zFrac0, zFrac1 );
581 |     zExp = bExp;
582 |     zSign ^= 1u;
583 |     --zExp;
584 |     return normalizeRoundAndPackFloat64( zSign, zExp - 10u, zFrac0, zFrac1 );
585 |     }
586 |     if ( bFrac.y < aFrac.y ) {
587 |         sub64( aFrac.x, aFrac.y, bFrac.x, bFrac.y, zFrac0, zFrac1 );
588 |         zExp = aExp;
589 |         --zExp;
590 |         return normalizeRoundAndPackFloat64(
591 |             zSign, zExp - 10u, zFrac0, zFrac1 );
592 |     }
593 |     if ( aFrac.y < bFrac.y ) {
594 |         sub64( bFrac.x, bFrac.y, aFrac.x, aFrac.y, zFrac0, zFrac1 );
595 |         zExp = bExp;
596 |         zSign ^= 1u;
597 |         --zExp;
598 |         return normalizeRoundAndPackFloat64( zSign, zExp - 10u, zFrac0, zFrac1 );
599 |     }
600 |     return packFloat64(
601 |         uint ( float_rounding_mode == float_round_down ), 0u, 0u, 0u );
602 | }
603 | 
604 | /* Returns the result of adding the double-precision floating-point values
605 |  * `a' and `b'.  The operation is performed according to the IEEE Standard for
606 |  * Floating-Point Arithmetic.
607 |  */
608 | uvec2
609 | add_fp64( uvec2 a, uvec2 b )
610 | {
611 |     uint aSign;
612 |     uint bSign;
613 | 
614 |     aSign = extractFloat64Sign( a );
615 |     bSign = extractFloat64Sign( b );
616 |     if ( aSign == bSign ) {
617 |         return addFloat64Fracs( a, b, aSign );
618 |     } else {
619 |         return subFloat64Fracs( a, b, aSign );
620 |     }
621 | }
622 | 
623 | uniform uvec2 a;
624 | uniform uvec2 b;
625 | uniform uvec2 expected;
626 | 
627 | void main()
628 | {
629 |     /* Generate green if the expected value is producted, red
630 |      * otherwise.
631 |      */
632 |     gl_FragColor = add_fp64(a,b) == expected
633 |         ? vec4(0.0, 1.0, 0.0, 1.0)
634 |         : vec4(1.0, 0.0, 0.0, 1.0);
635 | }
636 | 
637 | [test]
638 | # A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
639 | # 'draw rect' line draws a rectangle that covers the whole window.
640 | # The 'probe all' line verifies that every pixel contains the expected
641 | # color.
642 | 
643 | # Try +0.0 and +0.0
644 | uniform uvec2 a        0x00000000 0x00000000
645 | uniform uvec2 b        0x00000000 0x00000000
646 | uniform uvec2 expected 0x00000000 0x00000000
647 | draw rect -1 -1 2 2
648 | probe all rgba 0.0 1.0 0.0 1.0
649 | 
650 | # Try +2.0 and +2.0
651 | uniform uvec2 a        0x40000000 0x00000000
652 | uniform uvec2 b        0x40000000 0x00000000
653 | uniform uvec2 expected 0x40100000 0x00000000
654 | draw rect -1 -1 2 2
655 | probe all rgba 0.0 1.0 0.0 1.0
656 | 
657 | # Try +0.0 and +50.0
658 | uniform uvec2 a        0x00000000 0x00000000
659 | uniform uvec2 b        0x40490000 0x00000000
660 | uniform uvec2 expected 0x40490000 0x00000000
661 | draw rect -1 -1 2 2
662 | probe all rgba 0.0 1.0 0.0 1.0
663 | 
664 | # Try +0.0 and -50.0
665 | uniform uvec2 a        0x00000000 0x00000000
666 | uniform uvec2 b        0xC0490000 0x00000000
667 | uniform uvec2 expected 0xC0490000 0x00000000
668 | draw rect -1 -1 2 2
669 | probe all rgba 0.0 1.0 0.0 1.0
670 | 
671 | # Try +1.5 and +50.0
672 | uniform uvec2 a        0x3FF80000 0x00000000
673 | uniform uvec2 b        0x40490000 0x00000000
674 | uniform uvec2 expected 0x4049C000 0x00000000
675 | draw rect -1 -1 2 2
676 | probe all rgba 0.0 1.0 0.0 1.0
677 | 
678 | # Try +1.5 and +1.000002
679 | uniform uvec2 a        0x3FF80000 0x00000000
680 | uniform uvec2 b        0x3FF00002 0x18DEF417
681 | uniform uvec2 expected 0x40040001 0x0C6F7A0B
682 | draw rect -1 -1 2 2
683 | probe all rgba 0.0 1.0 0.0 1.0
684 | 
685 | # Try +1.5 and +INF
686 | uniform uvec2 a        0x3FF80000 0x00000000
687 | uniform uvec2 b        0x7FF00000 0x00000000
688 | uniform uvec2 expected 0x7FF00000 0x00000000
689 | draw rect -1 -1 2 2
690 | probe all rgba 0.0 1.0 0.0 1.0
691 | 
692 | # Try +1.5 and NaN
693 | uniform uvec2 a        0x3FF80000 0x00000000
694 | uniform uvec2 b        0x7FF00000 0x00000001
695 | uniform uvec2 expected 0x7FF00000 0x00080001
696 | draw rect -1 -1 2 2
697 | probe all rgba 0.0 1.0 0.0 1.0
698 | 


--------------------------------------------------------------------------------
/div_fp64.shader_test:
--------------------------------------------------------------------------------
  1 | # Divide two double 'a' and 'b' 
  2 | # IEEE 754 compliant
  3 | 
  4 | [require]
  5 | GLSL >= 1.30
  6 | 
  7 | [vertex shader]
  8 | #version 130
  9 | 
 10 | void main()
 11 | {
 12 |     gl_Position = gl_Vertex;
 13 | }
 14 | 
 15 | [fragment shader]
 16 | #version 130
 17 | 
 18 | /* Software IEEE floating-point rounding mode. */
 19 | const uint float_round_nearest_even = 0u;
 20 | const uint float_round_to_zero      = 1u;
 21 | const uint float_round_down         = 2u;
 22 | const uint float_round_up           = 3u;
 23 | uint float_rounding_mode = float_round_nearest_even;
 24 | 
 25 | /* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
 26 |  * value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
 27 |  * any carry out is lost.  The result is broken into two 32-bit pieces which
 28 |  * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 29 |  */
 30 | void
 31 | add64( uint a0,
 32 |        uint a1,
 33 |        uint b0,
 34 |        uint b1,
 35 |        inout uint z0Ptr,
 36 |        inout uint z1Ptr )
 37 | {
 38 |     uint z1;
 39 | 
 40 |     z1 = a1 + b1;
 41 |     z1Ptr = z1;
 42 |     z0Ptr = a0 + b0 + uint ( z1 < a1 );
 43 | }
 44 | 
 45 | /* Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the
 46 |  * 64-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
 47 |  * 2^64, so any borrow out (carry out) is lost.  The result is broken into two
 48 |  * 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and
 49 |  * `z1Ptr'.
 50 |  */
 51 | void
 52 | sub64( uint a0, uint a1, uint b0, uint b1,
 53 |        inout uint z0Ptr,
 54 |        inout uint z1Ptr )
 55 | {
 56 |     z1Ptr = a1 - b1;
 57 |     z0Ptr = a0 - b0 - uint ( a1 < b1 );
 58 | }
 59 | 
 60 | /* Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the
 61 |  * 96-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
 62 |  * modulo 2^96, so any carry out is lost.  The result is broken into three
 63 |  * 32-bit pieces which are stored at the locations pointed to by `z0Ptr',
 64 |  * `z1Ptr', and `z2Ptr'.
 65 |  */
 66 | void
 67 | add96( uint a0, uint a1, uint a2,
 68 |        uint b0, uint b1, uint b2,
 69 |        inout uint z0Ptr,
 70 |        inout uint z1Ptr,
 71 |        inout uint z2Ptr )
 72 | {
 73 |     uint z0;
 74 |     uint z1;
 75 |     uint z2;
 76 |     uint carry0;
 77 |     uint carry1;
 78 | 
 79 |     z2 = a2 + b2;
 80 |     carry1 = uint ( z2 < a2 );
 81 |     z1 = a1 + b1;
 82 |     carry0 = uint ( z1 < a1 );
 83 |     z0 = a0 + b0;
 84 |     z1 += carry1;
 85 |     z0 += uint ( z1 < carry1 );
 86 |     z0 += carry0;
 87 |     z2Ptr = z2;
 88 |     z1Ptr = z1;
 89 |     z0Ptr = z0;
 90 | }
 91 | 
 92 | /* Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from
 93 |  * the 96-bit value formed by concatenating `a0', `a1', and `a2'.  Subtraction
 94 |  * is modulo 2^96, so any borrow out (carry out) is lost.  The result is broken
 95 |  * into three 32-bit pieces which are stored at the locations pointed to by
 96 |  * `z0Ptr', `z1Ptr', and `z2Ptr'.
 97 |  */
 98 | void
 99 | sub96( uint a0, uint a1, uint a2,
100 |        uint b0, uint b1, uint b2,
101 |        inout uint z0Ptr,
102 |        inout uint z1Ptr,
103 |        inout uint z2Ptr )
104 | {
105 |     uint z0;
106 |     uint z1;
107 |     uint z2;
108 |     uint borrow0;
109 |     uint borrow1;
110 | 
111 |     z2 = a2 - b2;
112 |     borrow1 = uint ( a2 < b2 );
113 |     z1 = a1 - b1;
114 |     borrow0 = uint ( a1 < b1 );
115 |     z0 = a0 - b0;
116 |     z0 -= uint ( z1 < borrow1 );
117 |     z1 -= borrow1;
118 |     z0 -= borrow0;
119 |     z2Ptr = z2;
120 |     z1Ptr = z1;
121 |     z0Ptr = z0;
122 | }
123 | 
124 | /* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less
125 |  * than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
126 |  * Otherwise, returns false.
127 |  */
128 | bool
129 | le64( uint a0, uint a1, uint b0, uint b1 )
130 | {
131 |     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
132 | }
133 | 
134 | /* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
135 |  * number of bits given in `count'.  Any bits shifted off are lost.  The value
136 |  * of `count' can be arbitrarily large; in particular, if `count' is greater
137 |  * than 64, the result will be 0.  The result is broken into two 32-bit pieces
138 |  * which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
139 |  */
140 | void
141 | shift64Right( uint a0, uint a1,
142 |               int count,
143 |               inout uint z0Ptr,
144 |               inout uint z1Ptr )
145 | {
146 |     uint z0;
147 |     uint z1;
148 |     int negCount = ( - count ) & 31;
149 | 
150 |     if ( count == 0 ) {
151 |         z1 = a1;
152 |         z0 = a0;
153 |     }
154 |     else if ( count < 32 ) {
155 |         z1 = ( a0<<negCount ) | ( a1>>count );
156 |         z0 = a0>>count;
157 |     }
158 |     else {
159 |         z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0u;
160 |         z0 = 0u;
161 |     }
162 |     z1Ptr = z1;
163 |     z0Ptr = z0;
164 | }
165 | 
166 | /* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
167 |  * by 32 _plus_ the number of bits given in `count'.  The shifted result is
168 |  * at most 64 nonzero bits; these are broken into two 32-bit pieces which are
169 |  * stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
170 |  * off form a third 32-bit result as follows:  The _last_ bit shifted off is
171 |  * the most-significant bit of the extra result, and the other 31 bits of the
172 |  * extra result are all zero if and only if _all_but_the_last_ bits shifted off
173 |  * were all zero.  This extra result is stored in the location pointed to by
174 |  * `z2Ptr'.  The value of `count' can be arbitrarily large.
175 |  *     (This routine makes more sense if `a0', `a1', and `a2' are considered
176 |  * to form a fixed-point value with binary point between `a1' and `a2'.  This
177 |  * fixed-point value is shifted right by the number of bits given in `count',
178 |  * and the integer part of the result is returned at the locations pointed to
179 |  * by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
180 |  * corrupted as described above, and is returned at the location pointed to by
181 |  * `z2Ptr'.)
182 |  */
183 | void
184 | shift64ExtraRightJamming( uint a0, uint a1, uint a2,
185 |                           int count,
186 |                           inout uint z0Ptr,
187 |                           inout uint z1Ptr,
188 |                           inout uint z2Ptr )
189 | {
190 |     uint z0;
191 |     uint z1;
192 |     uint z2;
193 |     int negCount = ( - count ) & 31;
194 | 
195 |     if ( count == 0 ) {
196 |         z2 = a2;
197 |         z1 = a1;
198 |         z0 = a0;
199 |     } else {
200 |         if ( count < 32 ) {
201 |             z2 = a1<<negCount;
202 |             z1 = ( a0<<negCount ) | ( a1>>count );
203 |             z0 = a0>>count;
204 |         } else {
205 |             if ( count == 32 ) {
206 |                 z2 = a1;
207 |                 z1 = a0;
208 |             } else {
209 |                 a2 |= a1;
210 |                 if ( count < 64 ) {
211 |                     z2 = a0<<negCount;
212 |                     z1 = a0>>( count & 31 );
213 |                 } else {
214 |                     z2 = ( count == 64 ) ? a0 : uint ( a0 != 0u );
215 |                     z1 = 0u;
216 |                 }
217 |             }
218 |             z0 = 0u;
219 |         }
220 |         z2 |= uint ( a2 != 0u );
221 |     }
222 |     z2Ptr = z2;
223 |     z1Ptr = z1;
224 |     z0Ptr = z0;
225 | }
226 | 
227 | /* Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
228 |  * equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
229 |  * returns 0.
230 |  */
231 | bool
232 | eq64( uint a0, uint a1, uint b0, uint b1 )
233 | {
234 |     return ( a0 == b0 ) && ( a1 == b1 );
235 | }
236 | 
237 | /* Packs the sign `zSign', the exponent `zExp', and the significand formed by
238 |  * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating-
239 |  * point value, returning the result.  After being shifted into the proper
240 |  * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added
241 |  * together to form the most significant 32 bits of the result.  This means
242 |  * that any integer portion of `zFrac0' will be added into the exponent.  Since
243 |  * a properly normalized significand will have an integer portion equal to 1,
244 |  * the `zExp' input should be 1 less than the desired result exponent whenever
245 |  * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand.
246 |  */
247 | uvec2
248 | packFloat64( uint zSign, uint zExp, uint zFrac0, uint zFrac1 )
249 | {
250 |     uvec2 z;
251 | 
252 |     z.x = ( zSign<<31 ) + ( zExp<<20 ) + zFrac0;
253 |     z.y = zFrac1;
254 |     return z;
255 | }
256 | 
257 | /* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
258 |  * and extended significand formed by the concatenation of `zFrac0', `zFrac1',
259 |  * and `zFrac2', and returns the proper double-precision floating-point value
260 |  * corresponding to the abstract input.  Ordinarily, the abstract value is
261 |  * simply rounded and packed into the double-precision format, with the inexact
262 |  * exception raised if the abstract input cannot be represented exactly.
263 |  * However, if the abstract value is too large, the overflow and inexact
264 |  * exceptions are raised and an infinity or maximal finite value is returned.
265 |  * If the abstract value is too small, the input value is rounded to a
266 |  * subnormal number, and the underflow and inexact exceptions are raised if the
267 |  * abstract input cannot be represented exactly as a subnormal double-precision
268 |  * floating-point number.
269 |  *     The input significand must be normalized or smaller.  If the input
270 |  * significand is not normalized, `zExp' must be 0; in that case, the result
271 |  * returned is a subnormal number, and it must not require rounding.  In the
272 |  * usual case that the input significand is normalized, `zExp' must be 1 less
273 |  * than the "true" floating-point exponent.  The handling of underflow and
274 |  * overflow follows the IEEE Standard for Floating-Point Arithmetic.
275 |  */
276 | uvec2
277 | roundAndPackFloat64( uint zSign,
278 |                      uint zExp,
279 |                      uint zFrac0,
280 |                      uint zFrac1,
281 |                      uint zFrac2 )
282 | {
283 |     uint roundingMode;
284 |     uint roundNearestEven;
285 |     uint increment;
286 | 
287 |     roundingMode = float_rounding_mode;
288 |     roundNearestEven = uint ( roundingMode == float_round_nearest_even );
289 |     increment = uint ( zFrac2 < 0u );
290 |     if ( roundNearestEven == 0u ) {
291 |         if ( roundingMode == float_round_to_zero ) {
292 |             increment = 0u;
293 |         } else {
294 |             if ( zSign != 0u ) {
295 |                 increment = uint ( ( roundingMode == float_round_down ) &&
296 |                         ( zFrac2 != 0u ) );
297 |             } else {
298 |                 increment = uint ( ( roundingMode == float_round_up ) &&
299 |                         ( zFrac2 != 0u ) );
300 |             }
301 |         }
302 |     }
303 |     if ( 0x7FDu <= zExp ) {
304 |         if ( ( 0x7FDu < zExp ) ||
305 |             ( ( zExp == 0x7FDu ) &&
306 |                 eq64( 0x001FFFFFu, 0xFFFFFFFFu, zFrac0, zFrac1 ) &&
307 |                    ( increment != 0u ) ) ) {
308 |             if ( ( roundingMode == float_round_to_zero ) ||
309 |                 ( ( zSign != 0u ) && ( roundingMode == float_round_up ) ) ||
310 |                     ( ( zSign == 0u ) && ( roundingMode == float_round_down ) ) ) {
311 |                 return packFloat64( zSign, 0x7FEu, 0x000FFFFFu, 0xFFFFFFFFu );
312 |             }
313 |             return packFloat64( zSign, 0x7FFu, 0u, 0u );
314 |         }
315 |         if ( zExp < 0u ) {
316 |             shift64ExtraRightJamming(
317 |                 zFrac0, zFrac1, zFrac2, int ( - zExp ), zFrac0, zFrac1, zFrac2 );
318 |             zExp = 0u;
319 |             if ( roundNearestEven != 0u ) {
320 |                 increment = uint ( zFrac2 < 0u );
321 |             } else {
322 |                 if ( zSign != 0u ) {
323 |                     increment = uint ( ( roundingMode == float_round_down ) &&
324 |                             ( zFrac2 != 0u ) );
325 |                 } else {
326 |                     increment = uint ( ( roundingMode == float_round_up ) &&
327 |                             ( zFrac2 != 0u ) );
328 |                 }
329 |             }
330 |         }
331 |     }
332 |     if ( increment != 0u ) {
333 |         add64( zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1 );
334 |         zFrac1 &= ~ ( uint ( zFrac2 + zFrac2 == 0u ) & roundNearestEven );
335 |     } else {
336 |         if ( ( zFrac0 | zFrac1 ) == 0u )
337 |             zExp = 0u;
338 |     }
339 |     return packFloat64( zSign, zExp, zFrac0, zFrac1 );
340 | }
341 | 
342 | /* Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
343 |  * into two 32-bit pieces which are stored at the locations pointed to by
344 |  * `z0Ptr' and `z1Ptr'.
345 |  */
346 | void
347 | mul32To64( uint a, uint b, inout uint z0Ptr, inout uint z1Ptr )
348 | {
349 |     uint aHigh;
350 |     uint aLow;
351 |     uint bHigh;
352 |     uint bLow;
353 |     uint z0;
354 |     uint zMiddleA;
355 |     uint zMiddleB;
356 |     uint z1;
357 | 
358 |     aLow = a;
359 |     aHigh = a>>16;
360 |     bLow = b;
361 |     bHigh = b>>16;
362 |     z1 = aLow * bLow;
363 |     zMiddleA = aLow * bHigh;
364 |     zMiddleB = aHigh * bLow;
365 |     z0 = aHigh * bHigh;
366 |     zMiddleA += zMiddleB;
367 |     z0 += ( ( uint ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );
368 |     zMiddleA <<= 16;
369 |     z1 += zMiddleA;
370 |     z0 += uint ( z1 < zMiddleA );
371 |     z1Ptr = z1;
372 |     z0Ptr = z0;
373 | }
374 | 
375 | /* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
376 |  * number of bits given in `count'.  Any bits shifted off are lost.  The value
377 |  * of `count' must be less than 32.  The result is broken into two 32-bit
378 |  * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
379 |  */
380 | void
381 | shortShift64Left( uint a0, uint a1,
382 |                   int count,
383 |                   inout uint z0Ptr,
384 |                   inout uint z1Ptr )
385 | {
386 |     z1Ptr = a1<<count;
387 |     z0Ptr =
388 |         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
389 | }
390 | 
391 | /* Returns the number of leading 0 bits before the most-significant 1 bit of
392 |  * `a'.  If `a' is zero, 32 is returned.
393 |  */
394 | uint
395 | countLeadingZeros32( uint a )
396 | {
397 |     if ( a == 0u )
398 |         return 32u;
399 | 
400 |     uint shiftCount = 0u;
401 |     if ( ( a & 0xFFFF0000u ) == 0u ) { shiftCount += 16u; a <<= 16; }
402 |     if ( ( a & 0xFF000000u ) == 0u ) { shiftCount += 8u; a <<= 8; }
403 |     if ( ( a & 0xF0000000u ) == 0u ) { shiftCount += 4u; a <<= 4; }
404 |     if ( ( a & 0xC0000000u ) == 0u ) { shiftCount += 2u; a <<= 2; }
405 |     if ( ( a & 0x80000000u ) == 0u ) { shiftCount += 1u; }
406 |     return shiftCount;
407 | }
408 | 
409 | /* Normalizes the subnormal double-precision floating-point value represented
410 |  * by the denormalized significand formed by the concatenation of `aFrac0' and
411 |  * `aFrac1'.  The normalized exponent is stored at the location pointed to by
412 |  * `zExpPtr'.  The most significant 21 bits of the normalized significand are
413 |  * stored at the location pointed to by `zFrac0Ptr', and the least significant
414 |  * 32 bits of the normalized significand are stored at the location pointed to
415 |  * by `zFrac1Ptr'.
416 |  */
417 | void
418 | normalizeFloat64Subnormal( uint aFrac0, uint aFrac1,
419 |                            inout uint zExpPtr,
420 |                            inout uint zFrac0Ptr,
421 |                            inout uint zFrac1Ptr )
422 | {
423 |     int shiftCount;
424 | 
425 |     if ( aFrac0 == 0u ) {
426 |         shiftCount = int ( countLeadingZeros32( aFrac1 ) ) - 11;
427 |         if ( shiftCount < 0 ) {
428 |             zFrac0Ptr = aFrac1>>( - shiftCount );
429 |             zFrac1Ptr = aFrac1<<( shiftCount & 31 );
430 |         } else {
431 |             zFrac0Ptr = aFrac1<<shiftCount;
432 |             zFrac1Ptr = 0u;
433 |         }
434 |         zExpPtr = uint ( - shiftCount - 31 );
435 |     } else {
436 |         shiftCount = int ( countLeadingZeros32( aFrac0 ) ) - 11;
437 |         shortShift64Left( aFrac0, aFrac1, shiftCount, zFrac0Ptr, zFrac1Ptr );
438 |         zExpPtr = 1u - uint ( shiftCount );
439 |     }
440 | }
441 | 
442 | /* Returns 1 if the double-precision floating-point value `a' is a NaN;
443 |  * otherwise returns 0.
444 |  */
445 | bool
446 | float64_is_nan( uvec2 a )
447 | {
448 |     return ( 0xFFE00000u <= ( a.y<<1 ) ) &&
449 |         ( ( a.x != 0u ) || ( ( a.y & 0x000FFFFFu ) != 0u ) );
450 | }
451 | 
452 | /* Returns 1 if the double-precision floating-point value `a' is a signaling
453 |  * NaN; otherwise returns 0.
454 |  */
455 | bool
456 | float64_is_signaling_nan( uvec2 a )
457 | {
458 |     return ( ( ( a.y>>19 ) & 0xFFFu ) == 0xFFEu ) &&
459 |         ( ( a.x != 0u ) || ( ( a.y & 0x0007FFFFu ) != 0u ) );
460 | }
461 | 
462 | /* Takes two double-precision floating-point values `a' and `b', one of which
463 |  * is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is
464 |  * a signaling NaN, the invalid exception is raised.
465 |  */
466 | uvec2
467 | propagateFloat64NaN( uvec2 a, uvec2 b )
468 | {
469 |     bool aIsNaN;
470 |     bool aIsSignalingNaN;
471 |     bool bIsNaN;
472 |     bool bIsSignalingNaN;
473 | 
474 |     aIsNaN = float64_is_nan( a );
475 |     aIsSignalingNaN = float64_is_signaling_nan( a );
476 |     bIsNaN = float64_is_nan( b );
477 |     bIsSignalingNaN = float64_is_signaling_nan( b );
478 |     a.y |= 0x00080000u;
479 |     b.y |= 0x00080000u;
480 |     if ( aIsNaN ) {
481 |         return ( aIsSignalingNaN && bIsNaN ) ? b : a;
482 |     } else {
483 |         return b;
484 |     }
485 | }
486 | 
487 | /* Returns the fraction bits of the double-precision floating-point value `a'.*/
488 | uvec2
489 | extractFloat64Frac( uvec2 a )
490 | {
491 |     return uvec2( a.x & 0x000FFFFFu, a.y );
492 | }
493 | 
494 | /* Returns the exponent bits of the double-precision floating-point value `a'.*/
495 | uint
496 | extractFloat64Exp( uvec2 a )
497 | {
498 |     return (a.x>>20) & 0x7FFu;
499 | }
500 | 
501 | /* Returns the sign bit of the double-precision floating-point value `a'.*/
502 | uint
503 | extractFloat64Sign( uvec2 a )
504 | {
505 |     return (a.x>>31);
506 | }
507 | 
508 | /* Returns an approximation to the 32-bit integer quotient obtained by dividing
509 |  * `b' into the 64-bit value formed by concatenating `a0' and `a1'.  The
510 |  * divisor `b' must be at least 2^31.  If q is the exact quotient truncated
511 |  * toward zero, the approximation returned lies between q and q + 2 inclusive.
512 |  * If the exact quotient q is larger than 32 bits, the maximum positive 32-bit
513 |  * unsigned integer is returned.
514 |  */
515 | uint
516 | estimateDiv64To32( uint a0, uint a1, uint b )
517 | {
518 |     uint b0;
519 |     uint b1;
520 |     uint rem0 = 0u;
521 |     uint rem1 = 0u;
522 |     uint term0 = 0u;
523 |     uint term1 = 0u;
524 |     uint z;
525 | 
526 |     if ( b <= a0 )
527 |         return 0xFFFFFFFFu;
528 |     b0 = b>>16;
529 |     z = ( b0<<16 <= a0 ) ? 0xFFFF0000u : ( a0 / b0 )<<16;
530 |     mul32To64( b, z, term0, term1 );
531 |     sub64( a0, a1, term0, term1, rem0, rem1 );
532 |     while ( rem0 < 0u ) {
533 |         z -= 0x10000u;
534 |         b1 = b<<16;
535 |         add64( rem0, rem1, b0, b1, rem0, rem1 );
536 |     }
537 |     rem0 = ( rem0<<16 ) | ( rem1>>16 );
538 |     z |= ( b0<<16 <= rem0 ) ? 0xFFFFu : rem0 / b0;
539 |     return z;
540 | }
541 | 
542 | /* Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b'
543 |  * to obtain a 96-bit product.  The product is broken into three 32-bit pieces
544 |  * which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
545 |  * `z2Ptr'.
546 |  */
547 | void
548 | mul64By32To96( uint a0, uint a1,
549 |                uint b,
550 |                inout uint z0Ptr,
551 |                inout uint z1Ptr,
552 |                inout uint z2Ptr )
553 | {
554 |     uint z0 = 0u;
555 |     uint z1 = 0u;
556 |     uint z2 = 0u;
557 |     uint more1 = 0u;
558 | 
559 |     mul32To64( a1, b, z1, z2 );
560 |     mul32To64( a0, b, z0, more1 );
561 |     add64( z0, more1, 0u, z1, z0, z1 );
562 |     z2Ptr = z2;
563 |     z1Ptr = z1;
564 |     z0Ptr = z0;
565 | }
566 | 
567 | /* Returns the result of dividing the double-precision floating-point value
568 |  * `a' by the corresponding value `b'. The operation is performed according to
569 |  * the IEEE Standard for Floating-Point Arithmetic.
570 |  */
571 | uvec2
572 | div_fp64( uvec2 a, uvec2 b )
573 | {
574 |     uint aSign;
575 |     uint bSign;
576 |     uint zSign;
577 |     uint aExp;
578 |     uint bExp;
579 |     uint zExp;
580 |     uvec2 aFrac;
581 |     uvec2 bFrac;
582 |     uint zFrac0 = 0u;
583 |     uint zFrac1 = 0u;
584 |     uint zFrac2 = 0u;
585 |     uint rem0 = 0u;
586 |     uint rem1 = 0u;
587 |     uint rem2 = 0u;
588 |     uint rem3 = 0u;
589 |     uint term0 = 0u;
590 |     uint term1 = 0u;
591 |     uint term2 = 0u;
592 |     uint term3 = 0u;
593 | 
594 |     aFrac = extractFloat64Frac( a );
595 |     aExp = extractFloat64Exp( a );
596 |     aSign = extractFloat64Sign( a );
597 |     bFrac = extractFloat64Frac( b );
598 |     bExp = extractFloat64Exp( b );
599 |     bSign = extractFloat64Sign( b );
600 |     zSign = aSign ^ bSign;
601 |     if ( aExp == 0x7FFu ) {
602 |         if ( ( aFrac.x | aFrac.y ) != 0u )
603 |             return propagateFloat64NaN( a, b );
604 |         if ( bExp == 0x7FFu ) {
605 |             if ( ( bFrac.x | bFrac.y ) != 0u )
606 |                 return propagateFloat64NaN( a, b );
607 |             return uvec2( 0xFFFFFFFFu, 0xFFFFFFFFu );
608 |         }
609 |         return packFloat64( zSign, 0x7FFu, 0u, 0u );
610 |     }
611 |     if ( bExp == 0x7FFu ) {
612 |         if ( ( bFrac.x | bFrac.y ) != 0u )
613 |             return propagateFloat64NaN( a, b );
614 |         return packFloat64( zSign, 0u, 0u, 0u );
615 |     }
616 |     if ( bExp == 0u ) {
617 |         if ( ( bFrac.x | bFrac.y ) == 0u ) {
618 |             if ( ( aExp | aFrac.x | aFrac.y ) == 0u ) {
619 |                 return uvec2( 0xFFFFFFFFu, 0xFFFFFFFFu );
620 |             }
621 |             return packFloat64( zSign, 0x7FFu, 0u, 0u );
622 |         }
623 |         normalizeFloat64Subnormal( bFrac.x, bFrac.y, bExp, bFrac.x, bFrac.y );
624 |     }
625 |     if ( aExp == 0u ) {
626 |         if ( ( aFrac.x | aFrac.y ) == 0u )
627 |             return packFloat64( zSign, 0u, 0u, 0u );
628 |         normalizeFloat64Subnormal( aFrac.x, aFrac.y, aExp, aFrac.x, aFrac.y );
629 |     }
630 |     zExp = aExp - bExp + 0x3FDu;
631 |     shortShift64Left( aFrac.x | 0x00100000u, aFrac.y, 11, aFrac.x, aFrac.y );
632 |     shortShift64Left( bFrac.x | 0x00100000u, bFrac.y, 11, bFrac.x, bFrac.y );
633 |     if ( le64( bFrac.x, bFrac.y, aFrac.x, aFrac.y ) ) {
634 |         shift64Right( aFrac.x, aFrac.y, 1, aFrac.x, aFrac.y );
635 |         ++zExp;
636 |     }
637 |     zFrac0 = estimateDiv64To32( aFrac.x, aFrac.y, bFrac.x );
638 |     mul64By32To96( bFrac.x, bFrac.y, zFrac0, term0, term1, term2 );
639 |     sub96( aFrac.x, aFrac.y, 0u, term0, term1, term2, rem0, rem1, rem2 );
640 |     while ( rem0 < 0u ) {
641 |         --zFrac0;
642 |         add96( rem0, rem1, rem2, 0u, bFrac.x, bFrac.y, rem0, rem1, rem2 );
643 |     }
644 |     zFrac1 = estimateDiv64To32( rem1, rem2, bFrac.x );
645 |     if ( ( zFrac1 & 0x3FFu ) <= 4u ) {
646 |         mul64By32To96( bFrac.x, bFrac.y, zFrac1, term1, term2, term3 );
647 |         sub96( rem1, rem2, 0u, term1, term2, term3, rem1, rem2, rem3 );
648 |         while ( rem1 < 0u ) {
649 |             --zFrac1;
650 |             add96( rem1, rem2, rem3, 0u, bFrac.x, bFrac.y, rem1, rem2, rem3 );
651 |         }
652 |         zFrac1 |= uint ( ( ( rem1 | rem2 | rem3 ) != 0u ) );
653 |     }
654 |     shift64ExtraRightJamming( zFrac0, zFrac1, 0u, 11, zFrac0, zFrac1, zFrac2 );
655 |     return roundAndPackFloat64( zSign, zExp, zFrac0, zFrac1, zFrac2 );
656 | }
657 | 
658 | uniform uvec2 a;
659 | uniform uvec2 b;
660 | uniform uvec2 expected;
661 | 
662 | void main()
663 | {
664 |     /* Generate green if the expected value is producted, red
665 |      * otherwise.
666 |      */
667 |     gl_FragColor = div_fp64(a,b) == expected
668 |         ? vec4(0.0, 1.0, 0.0, 1.0)
669 |         : vec4(1.0, 0.0, 0.0, 1.0);
670 | }
671 | 
672 | [test]
673 | # A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
674 | # 'draw rect' line draws a rectangle that covers the whole window.
675 | # The 'probe all' line verifies that every pixel contains the expected
676 | # color.
677 | 
678 | # Try +0.0 and +0.0
679 | uniform uvec2 a        0x00000000 0x00000000
680 | uniform uvec2 b        0x00000000 0x00000000
681 | uniform uvec2 expected 0xFFFFFFFF 0xFFFFFFFF
682 | draw rect -1 -1 2 2
683 | probe all rgba 0.0 1.0 0.0 1.0
684 | 
685 | # Try +0.0 and -0.0
686 | uniform uvec2 a        0x00000000 0x00000000
687 | uniform uvec2 b        0x80000000 0x00000000
688 | uniform uvec2 expected 0xFFFFFFFF 0xFFFFFFFF
689 | draw rect -1 -1 2 2
690 | probe all rgba 0.0 1.0 0.0 1.0
691 | 
692 | # Try +0.0 and +50.0
693 | uniform uvec2 a        0x00000000 0x00000000
694 | uniform uvec2 b        0x40490000 0x00000000
695 | uniform uvec2 expected 0x00000000 0x00000000
696 | draw rect -1 -1 2 2
697 | probe all rgba 0.0 1.0 0.0 1.0
698 | 
699 | # Try +50.0 and +1.0
700 | uniform uvec2 a        0x40490000 0x00000000
701 | uniform uvec2 b        0x3FF00000 0x00000000
702 | uniform uvec2 expected 0x40490000 0x00000000
703 | draw rect -1 -1 2 2
704 | probe all rgba 0.0 1.0 0.0 1.0
705 | 
706 | # Try +2.0 and +2.0
707 | uniform uvec2 a        0x40000000 0x00000000
708 | uniform uvec2 b        0x40000000 0x00000000
709 | uniform uvec2 expected 0x3FF00000 0x00000000
710 | draw rect -1 -1 2 2
711 | probe all rgba 0.0 1.0 0.0 1.0
712 | 
713 | # Try +1.5 and +INF
714 | uniform uvec2 a        0x3FF80000 0x00000000
715 | uniform uvec2 b        0x7FF00000 0x00000000
716 | uniform uvec2 expected 0x00000000 0x00000000
717 | draw rect -1 -1 2 2
718 | probe all rgba 0.0 1.0 0.0 1.0
719 | 
720 | # Try +0.0 and +INF
721 | uniform uvec2 a        0x00000000 0x00000000
722 | uniform uvec2 b        0x7FF00000 0x00000000
723 | uniform uvec2 expected 0x00000000 0x00000000
724 | draw rect -1 -1 2 2
725 | probe all rgba 0.0 1.0 0.0 1.0
726 | 
727 | # Try +1.5 and NaN
728 | uniform uvec2 a        0x3FF80000 0x00000000
729 | uniform uvec2 b        0x7FF00000 0x00000001
730 | uniform uvec2 expected 0x7FF00000 0x00080001
731 | draw rect -1 -1 2 2
732 | probe all rgba 0.0 1.0 0.0 1.0
733 | 
734 | # Try +0.0 and NaN
735 | uniform uvec2 a        0x00000000 0x00000000
736 | uniform uvec2 b        0x7FF00000 0x00000001
737 | uniform uvec2 expected 0x7FF00000 0x00080001
738 | draw rect -1 -1 2 2
739 | probe all rgba 0.0 1.0 0.0 1.0
740 | 


--------------------------------------------------------------------------------
/eq_fp64.shader_test:
--------------------------------------------------------------------------------
  1 | # Check if 2 double are equals 
  2 | # IEEE 754 compliant
  3 | 
  4 | [require]
  5 | GLSL >= 1.30
  6 | 
  7 | [vertex shader]
  8 | #version 130
  9 | 
 10 | void main()
 11 | {
 12 |     gl_Position = gl_Vertex;
 13 | }
 14 | 
 15 | [fragment shader]
 16 | #version 130
 17 | 
 18 | /* Returns the fraction bits of the double-precision floating-point value `a'.*/
 19 | uvec2
 20 | extractFloat64Frac( uvec2 a )
 21 | {
 22 |     return uvec2( a.x & 0x000FFFFFu, a.y );
 23 | }
 24 | 
 25 | /* Returns the exponent bits of the double-precision floating-point value `a'.*/
 26 | uint
 27 | extractFloat64Exp( uvec2 a )
 28 | {
 29 |     return (a.x>>20) & 0x7FFu;
 30 | }
 31 | 
 32 | /* Returns true if the double-precision floating-point value `a' is equal to the
 33 |  * corresponding value `b', and false otherwise.  The comparison is performed
 34 |  * according to the IEEE Standard for Floating-Point Arithmetic.
 35 |  */
 36 | bool
 37 | eq_fp64( uvec2 a, uvec2 b )
 38 | {
 39 |     uvec2 aFrac;
 40 |     uvec2 bFrac;
 41 |     bool isaNaN;
 42 |     bool isbNaN;
 43 | 
 44 |     aFrac = extractFloat64Frac( a );
 45 |     bFrac = extractFloat64Frac( b );
 46 |     isaNaN = ( extractFloat64Exp( a ) == 0x7FFu ) &&
 47 |         ( ( aFrac.x | aFrac.y ) != 0u );
 48 |     isbNaN = ( extractFloat64Exp( b ) == 0x7FFu ) &&
 49 |        ( ( bFrac.x | bFrac.y ) != 0u );
 50 | 
 51 |     if ( isaNaN || isbNaN ) {
 52 |         return false;
 53 |     }
 54 |     return ( a.y == b.y ) &&
 55 |         ( ( a.x == b.x ) ||
 56 |             ( ( a.y == 0u ) && ( ( ( a.x | b.x )<<1) == 0u ) ) );
 57 | }
 58 | 
 59 | uniform uvec2 a;
 60 | uniform uvec2 b;
 61 | uniform bool expected;
 62 | 
 63 | void main()
 64 | {
 65 |     /* Generate green if the expected value is producted, red
 66 |      * otherwise.
 67 |      */
 68 |     gl_FragColor = eq_fp64(a,b) == expected
 69 |         ? vec4(0.0, 1.0, 0.0, 1.0)
 70 |         : vec4(1.0, 0.0, 0.0, 1.0);
 71 | }
 72 | 
 73 | [test]
 74 | # A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
 75 | # 'draw rect' line draws a rectangle that covers the whole window.
 76 | # The 'probe all' line verifies that every pixel contains the expected
 77 | # color.
 78 | 
 79 | # Try +0.0 and +0.0
 80 | uniform uvec2 a        0x00000000 0x00000000
 81 | uniform uvec2 b        0x00000000 0x00000000
 82 | uniform int expected   1
 83 | draw rect -1 -1 2 2
 84 | probe all rgba 0.0 1.0 0.0 1.0
 85 | 
 86 | # Try -0.0 and +0.0
 87 | uniform uvec2 a        0x10000000 0x00000000
 88 | uniform uvec2 b        0x00000000 0x00000000
 89 | uniform int expected   0
 90 | draw rect -1 -1 2 2
 91 | probe all rgba 0.0 1.0 0.0 1.0
 92 | 
 93 | # Try 0.1 and 0.0
 94 | uniform uvec2 a        0x3FB99999 0x9999999A
 95 | uniform uvec2 b        0x00000000 0x00000000
 96 | uniform int expected   0
 97 | draw rect -1 -1 2 2
 98 | probe all rgba 0.0 1.0 0.0 1.0
 99 | 
100 | # Try 1 bit set and 0.0
101 | uniform uvec2 a        0x00000000 0x00000001
102 | uniform uvec2 b        0x00000000 0x00000000
103 | uniform int expected   0
104 | draw rect -1 -1 2 2
105 | probe all rgba 0.0 1.0 0.0 1.0
106 | 
107 | # Try +Inf and +Inf
108 | uniform uvec2 a       0x7FF00000 0x00000000
109 | uniform uvec2 b       0x7FF00000 0x00000000
110 | uniform int expected  1
111 | draw rect -1 -1 2 2
112 | probe all rgba 0.0 1.0 0.0 1.0
113 | 
114 | # Try +Inf and -Inf
115 | uniform uvec2 a       0x7FF00000 0x00000000
116 | uniform uvec2 b       0xFFF00000 0x00000000
117 | uniform int expected  0
118 | draw rect -1 -1 2 2
119 | probe all rgba 0.0 1.0 0.0 1.0
120 | 
121 | # Try 0 and NaN
122 | uniform uvec2 a      0x00000000 0x00000000
123 | uniform uvec2 b      0x7FF00000 0x00000001
124 | uniform int expected 0
125 | draw rect -1 -1 2 2
126 | probe all rgba 0.0 1.0 0.0 1.0
127 | 
128 | # Try +Inf and NaN
129 | uniform uvec2 a      0x7FF00000 0x00000000
130 | uniform uvec2 b      0x7FF00000 0x00000001
131 | uniform int expected 0
132 | draw rect -1 -1 2 2
133 | probe all rgba 0.0 1.0 0.0 1.0
134 | 


--------------------------------------------------------------------------------
/fp32-to-fp64-conversion.shader_test:
--------------------------------------------------------------------------------
  1 | # Conversion from float to double 
  2 | # IEEE 754 compliant
  3 | 
  4 | [require]
  5 | GLSL >= 1.30
  6 | 
  7 | [vertex shader]
  8 | #version 130
  9 | 
 10 | void main()
 11 | {
 12 |     gl_Position = gl_Vertex;
 13 | }
 14 | 
 15 | [fragment shader]
 16 | #version 130
 17 | 
 18 | /* Packs the sign `zSign', the exponent `zExp', and the significand formed by
 19 |  * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating-
 20 |  * point value, returning the result.  After being shifted into the proper
 21 |  * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added
 22 |  * together to form the most significant 32 bits of the result.  This means
 23 |  * that any integer portion of `zFrac0' will be added into the exponent.  Since
 24 |  * a properly normalized significand will have an integer portion equal to 1,
 25 |  * the `zExp' input should be 1 less than the desired result exponent whenever
 26 |  * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand.
 27 |  */
 28 | uvec2
 29 | packFloat64( uint zSign, uint zExp, uint zFrac0, uint zFrac1 )
 30 | {
 31 |     uvec2 z;
 32 | 
 33 |     z.x = ( zSign<<31 ) + ( zExp<<20 ) + zFrac0;
 34 |     z.y = zFrac1;
 35 |     return z;
 36 | }
 37 | 
 38 | /* Returns the number of leading 0 bits before the most-significant 1 bit of
 39 |  * `a'.  If `a' is zero, 32 is returned.
 40 |  */
 41 | uint
 42 | countLeadingZeros32( uint a )
 43 | {
 44 |     if ( a == 0u )
 45 |         return 32u;
 46 | 
 47 |     uint shiftCount = 0u;
 48 |     if ( ( a & 0xFFFF0000u ) == 0u ) { shiftCount += 16u; a <<= 16; }
 49 |     if ( ( a & 0xFF000000u ) == 0u ) { shiftCount += 8u; a <<= 8; }
 50 |     if ( ( a & 0xF0000000u ) == 0u ) { shiftCount += 4u; a <<= 4; }
 51 |     if ( ( a & 0xC0000000u ) == 0u ) { shiftCount += 2u; a <<= 2; }
 52 |     if ( ( a & 0x80000000u ) == 0u ) { shiftCount += 1u; }
 53 |     return shiftCount;
 54 | }
 55 | 
 56 | /* Normalizes the subnormal single-precision floating-point value represented
 57 |  * by the denormalized significand `aFrac'.  The normalized exponent and
 58 |  * significand are stored at the locations pointed to by `zExpPtr' and
 59 |  * `zFracPtr', respectively.
 60 |  */
 61 | void
 62 | normalizeFloat32Subnormal( uint aFrac,
 63 |                            inout uint zExpPtr,
 64 |                            inout uint zFracPtr )
 65 | {
 66 |     uint shiftCount;
 67 | 
 68 |     shiftCount = countLeadingZeros32( aFrac ) - 8u;
 69 |     zFracPtr = aFrac<<shiftCount;
 70 |     zExpPtr = 1u - shiftCount;
 71 | }
 72 | 
 73 | /* Returns the fraction bits of the single-precision floating-point value `a'.*/
 74 | uint
 75 | extractFloat32Frac( uint a )
 76 | {
 77 |     return a & 0x007FFFFFu;
 78 | }
 79 | 
 80 | /* Returns the exponent bits of the single-precision floating-point value `a'.*/
 81 | uint
 82 | extractFloat32Exp( uint a )
 83 | {
 84 |     return (a>>23) & 0xFFu;
 85 | }
 86 | 
 87 | /* Returns the sign bit of the single-precision floating-point value `a'.*/
 88 | uint
 89 | extractFloat32Sign( uint a )
 90 | {
 91 |     return a>>31;
 92 | }
 93 | 
 94 | /* Returns the result of converting the single-precision floating-point value
 95 |  * `a' to the double-precision floating-point format.
 96 |  */
 97 | uvec2
 98 | fp32_to_fp64( uint a )
 99 | {
100 |     uint aFrac;
101 |     uint aExp;
102 |     uint aSign;
103 | 
104 |     aFrac = extractFloat32Frac( a );
105 |     aExp = extractFloat32Exp( a );
106 |     aSign = extractFloat32Sign( a );
107 |    
108 |     if ( aExp == 0xFFu ) {
109 |         if ( aFrac != 0u ) {
110 |             /* NaN */
111 |             return uvec2(
112 |                 ( ( aSign<<31 ) | 0x7FF00000u | ( aFrac>>3 ) ),
113 |                 ( aFrac<<29 )
114 |             );
115 |         }
116 |         /* Inf */
117 |         return packFloat64( aSign, 0x7FFu, 0u, 0u );
118 |     }
119 | 
120 |     if ( aExp == 0u ) {
121 |         if ( aFrac != 0u ) {
122 |             /* Denormals */
123 |             normalizeFloat32Subnormal( aFrac, aExp, aFrac );
124 |             --aExp;
125 |         }
126 |     /* Zero */
127 |     return packFloat64( aSign, 0u, 0u, 0u );
128 |     }
129 | 
130 |     return packFloat64( aSign, aExp + 0x380u, aFrac>>3, aFrac<<29 );
131 | }
132 | 
133 | uniform uint a;
134 | uniform uvec2 expected;
135 | 
136 | void main()
137 | {
138 |     /* Generate green if the expected value is producted, red
139 |      * otherwise.
140 |      */
141 |     gl_FragColor = fp32_to_fp64(a) == expected
142 |         ? vec4(0.0, 1.0, 0.0, 1.0)
143 |         : vec4(1.0, 0.0, 0.0, 1.0);
144 | }
145 | 
146 | [test]
147 | # A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
148 | # 'draw rect' line draws a rectangle that covers the whole window.
149 | # The 'probe all' line verifies that every pixel contains the expected
150 | # color.
151 | 
152 | # Try +0.0
153 | uniform uint a         0x00000000
154 | uniform uvec2 expected 0x00000000 0x00000000
155 | draw rect -1 -1 2 2
156 | probe all rgba 0.0 1.0 0.0 1.0
157 | 
158 | # Try -0.0
159 | uniform uint a         0x80000000
160 | uniform uvec2 expected 0x80000000 0x00000000
161 | draw rect -1 -1 2 2
162 | probe all rgba 0.0 1.0 0.0 1.0
163 | 
164 | # Try +Inf
165 | uniform uint a         0x7F800000
166 | uniform uvec2 expected 0x7FF00000 0x00000000
167 | draw rect -1 -1 2 2
168 | probe all rgba 0.0 1.0 0.0 1.0
169 | 
170 | # Try -Inf
171 | uniform uint a         0xFF800000
172 | uniform uvec2 expected 0xFFF00000 0x00000000
173 | draw rect -1 -1 2 2
174 | probe all rgba 0.0 1.0 0.0 1.0
175 | 
176 | # Denormal
177 | uniform uint a         0x00000020
178 | uniform uvec2 expected 0x00000000 0x00000000
179 | draw rect -1 -1 2 2
180 | probe all rgba 0.0 1.0 0.0 1.0
181 | 
182 | # Try 50
183 | uniform uint a         0x42480000
184 | uniform uvec2 expected 0x40490000 0x00000000
185 | draw rect -1 -1 2 2
186 | probe all rgba 0.0 1.0 0.0 1.0
187 | 


--------------------------------------------------------------------------------
/fp64-to-fp32-conversion.shader_test:
--------------------------------------------------------------------------------
  1 | # Conversion from double to float 
  2 | # IEEE 754 compliant
  3 | 
  4 | [require]
  5 | GLSL >= 1.30
  6 | 
  7 | [vertex shader]
  8 | #version 130
  9 | 
 10 | void main()
 11 | {
 12 |     gl_Position = gl_Vertex;
 13 | }
 14 | 
 15 | [fragment shader]
 16 | #version 130
 17 | 
 18 | /* Software IEEE floating-point rounding mode. */
 19 | uint float_rounding_mode;
 20 | const uint float_round_nearest_even = 0u;
 21 | const uint float_round_to_zero      = 1u;
 22 | const uint float_round_down         = 2u;
 23 | const uint float_round_up           = 3u;
 24 | 
 25 | /* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a
 26 |  * single-precision floating-point value, returning the result.  After being
 27 |  * shifted into the proper positions, the three fields are simply added
 28 |  * together to form the result.  This means that any integer portion of `zSig'
 29 |  * will be added into the exponent.  Since a properly normalized significand
 30 |  * will have an integer portion equal to 1, the `zExp' input should be 1 less
 31 |  * than the desired result exponent whenever `zFrac' is a complete, normalized
 32 |  * significand.
 33 |  */
 34 | uint
 35 | packFloat32( uint zSign, uint zExp, uint zFrac )
 36 | {
 37 |     return ( zSign<<31 ) + ( zExp<<23 ) + zFrac;
 38 | }
 39 | 
 40 | /* Shifts `a' right by the number of bits given in `count'.  If any nonzero
 41 |  * bits are shifted off, they are "jammed" into the least significant bit of
 42 |  * the result by setting the least significant bit to 1.  The value of `count'
 43 |  * can be arbitrarily large; in particular, if `count' is greater than 32, the
 44 |  * result will be either 0 or 1, depending on whether `a' is zero or nonzero.
 45 |  * The result is stored in the location pointed to by `zPtr'.
 46 |  */
 47 | void
 48 | shift32RightJamming( uint a, int count, inout uint zPtr )
 49 | {
 50 |     uint z;
 51 | 
 52 |     if( count == 0 ) {
 53 |         z = a;
 54 |     } else if( count < 32 ) {
 55 |         z = ( a>>count ) | uint ( ( a<<( ( - count ) & 31 ) ) != 0u );
 56 |     } else {
 57 |         z = uint ( a != 0u );
 58 |     }
 59 |     zPtr = z;
 60 | }
 61 | 
 62 | /* Shifts the 64-bit value formed by concatenating `a.x' and `a.y' right by the
 63 |  * number of bits given in `count'.  If any nonzero bits are shifted off, they
 64 |  * are "jammed" into the least significant bit of the result by setting the
 65 |  * least significant bit to 1.  The value of `count' can be arbitrarily large;
 66 |  * in particular, if `count' is greater than 64, the result will be either 0
 67 |  * or 1, depending on whether the concatenation of `a.x' and `a.y' is zero or
 68 |  * nonzero.  The result is broken into two 32-bit pieces which are stored at
 69 |  * the locations pointed to by `z0Ptr' and `z1Ptr'.
 70 |  */
 71 | void
 72 | shift64RightJamming( uvec2 a,
 73 |                      int count,
 74 |                      inout uint z0Ptr,
 75 |                      inout uint z1Ptr )
 76 | {
 77 |     uint z0;
 78 |     uint z1;
 79 |     int negCount = ( - count ) & 31;
 80 | 
 81 |     if ( count == 0 ) {
 82 |         z1 = a.y;
 83 |         z0 = a.x;
 84 |     } else if ( count < 32 ) {
 85 |         z1 = ( a.x<<negCount ) |
 86 |                 ( a.y>>count ) |
 87 |                 uint ( ( a.y<<negCount ) != 0u );
 88 |         z0 = a.x>>count;
 89 |     } else {
 90 |         if ( count == 32 ) {
 91 |             z1 = a.x | uint ( a.y != 0u );
 92 |         } else if ( count < 64 ) {
 93 |             z1 = ( a.x>>( count & 31 ) ) |
 94 |                 uint ( ( ( a.x<<negCount ) | a.y ) != 0u );
 95 |         } else {
 96 |             z1 = uint ( ( a.x | a.y ) != 0u );
 97 |         }
 98 |         z0 = 0u;
 99 |     }
100 |     z1Ptr = z1;
101 |     z0Ptr = z0;
102 | }
103 | 
104 | /* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
105 |  * and significand `zFrac', and returns the proper single-precision floating-
106 |  * point value corresponding to the abstract input.  Ordinarily, the abstract
107 |  * value is simply rounded and packed into the single-precision format, with
108 |  * the inexact exception raised if the abstract input cannot be represented
109 |  * exactly.  However, if the abstract value is too large, the overflow and
110 |  * inexact exceptions are raised and an infinity or maximal finite value is
111 |  * returned.  If the abstract value is too small, the input value is rounded to
112 |  * a subnormal number, and the underflow and inexact exceptions are raised if
113 |  * the abstract input cannot be represented exactly as a subnormal single-
114 |  * precision floating-point number.
115 |  *     The input significand `zFrac' has its binary point between bits 30
116 |  * and 29, which is 7 bits to the left of the usual location.  This shifted
117 |  * significand must be normalized or smaller.  If `zFrac' is not normalized,
118 |  * `zExp' must be 0; in that case, the result returned is a subnormal number,
119 |  * and it must not require rounding.  In the usual case that `zFrac' is
120 |  * normalized, `zExp' must be 1 less than the "true" floating-point exponent.
121 |  * The handling of underflow and overflow follows the IEEE Standard for
122 |  * Floating-Point Arithmetic.
123 |  */
124 | uint
125 | roundAndPackFloat32( uint zSign, uint zExp, uint zFrac )
126 | {
127 |     uint roundingMode;
128 |     uint roundNearestEven;
129 |     uint roundIncrement;
130 |     uint roundBits;
131 | 
132 |     roundingMode = float_rounding_mode;
133 |     roundNearestEven = uint ( roundingMode == float_round_nearest_even );
134 |     roundIncrement = 0x40u;
135 |     if ( roundNearestEven == 0u ) {
136 |         if ( roundingMode == float_round_to_zero ) {
137 |             roundIncrement = 0u;
138 |         } else {
139 |             roundIncrement = 0x7Fu;
140 |             if ( zSign != 0u ) {
141 |                 if ( roundingMode == float_round_up ) {
142 |                       roundIncrement = 0u;
143 |                 }
144 |             } else {
145 |                 if ( roundingMode == float_round_down ) {
146 |                       roundIncrement = 0u;
147 |                 }
148 |             }
149 |         }
150 |     }
151 |     roundBits = zFrac & 0x7Fu;
152 |     if ( 0xFDu <= zExp ) {
153 |         if ( ( 0xFDu < zExp ) ||
154 |             ( ( zExp == 0xFDu ) && ( ( zFrac + roundIncrement ) < 0u ) ) ) {
155 |             return packFloat32(
156 |                 zSign, 0xFFu, 0u ) - uint ( roundIncrement == 0u );
157 |         }
158 |         if ( zExp < 0u ) {
159 |             shift32RightJamming( zFrac, - int (zExp), zFrac );
160 |             zExp = 0u;
161 |             roundBits = zFrac & 0x7Fu;
162 |         }
163 |     }
164 |     zFrac = ( zFrac + roundIncrement )>>7;
165 |     zFrac &= ~ ( uint ( ( roundBits ^ 0x40u ) == 0u ) & roundNearestEven );
166 |     if ( zFrac == 0u ) {
167 |         zExp = 0u;
168 |     }
169 |     return packFloat32( zSign, zExp, zFrac );
170 | }
171 | 
172 | 
173 | /* Returns the fraction bits of the double-precision floating-point value `a'.*/
174 | uvec2
175 | extractFloat64Frac( uvec2 a )
176 | {
177 |     return uvec2( a.x & 0x000FFFFFu, a.y );
178 | }
179 | 
180 | /* Returns the exponent bits of the double-precision floating-point value `a'.*/
181 | uint
182 | extractFloat64Exp( uvec2 a )
183 | {
184 |     return (a.x>>20) & 0x7FFu;
185 | }
186 | 
187 | /* Returns the sign bit of the double-precision floating-point value `a'.*/
188 | uint
189 | extractFloat64Sign( uvec2 a )
190 | {
191 |     return (a.x>>31);
192 | }
193 | 
194 | /* Returns the result of converting the double-precision floating-point value
195 |  * `a' to the single-precision floating-point format.  The conversion is
196 |  * performed according to the IEEE Standard for Floating-Point Arithmetic.
197 |  */
198 | uint
199 | fp64_to_fp32( uvec2 a )
200 | {
201 |     uint aSign;
202 |     uint aExp;
203 |     uint zFrac;
204 |     uint allZero;
205 |     uvec2 aFrac;
206 | 
207 |     aFrac = extractFloat64Frac( a );
208 |     aExp = extractFloat64Exp( a );
209 |     aSign = extractFloat64Sign( a );
210 |     if ( aExp == 0x7FFu ) {
211 |         if ( ( aFrac.x | aFrac.y ) != 0u ) {
212 |             return ( aSign<<31 ) | 0x7FC00000u |
213 |                 ( ( aFrac.x & 0x000FFFFFu )<<3 ) | ( aFrac.y>>29 );
214 |         }
215 |         return packFloat32( aSign, 0xFFu, 0u );
216 |     }
217 |     shift64RightJamming( aFrac, 22, allZero, zFrac );
218 |     if ( aExp != 0u ) {
219 |         zFrac |= 0x40000000u;
220 |     }
221 |     return roundAndPackFloat32( aSign, aExp - 0x381u, zFrac );
222 | }
223 | 
224 | uniform uvec2 a;
225 | uniform uint expected;
226 | 
227 | void main()
228 | {
229 |     /* Generate green if the expected value is producted, red
230 |      * otherwise.
231 |      */
232 |     gl_FragColor = fp64_to_fp32(a) == expected
233 |         ? vec4(0.0, 1.0, 0.0, 1.0)
234 |         : vec4(1.0, 0.0, 0.0, 1.0);
235 | }
236 | 
237 | [test]
238 | # A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
239 | # 'draw rect' line draws a rectangle that covers the whole window.
240 | # The 'probe all' line verifies that every pixel contains the expected
241 | # color.
242 | 
243 | # Try +Inf
244 | uniform uvec2 a       0x7FF00000 0x00000000
245 | uniform uint expected 0x7F800000
246 | draw rect -1 -1 2 2
247 | probe all rgba 0.0 1.0 0.0 1.0
248 | 
249 | # Try -Inf
250 | uniform uvec2 a       0xFFF00000 0x00000000
251 | uniform uint expected 0xFF800000
252 | draw rect -1 -1 2 2
253 | probe all rgba 0.0 1.0 0.0 1.0
254 | 
255 | # Try 50
256 | uniform uvec2 a       0x40490000 0x00000000
257 | uniform uint expected 0x42480000
258 | draw rect -1 -1 2 2
259 | probe all rgba 0.0 1.0 0.0 1.0
260 | 
261 | # Try -50
262 | uniform uvec2 a       0xC0490000 0x00000000
263 | uniform uint expected 0xC2480000
264 | draw rect -1 -1 2 2
265 | probe all rgba 0.0 1.0 0.0 1.0
266 | 
267 | # Try 1
268 | uniform uvec2 a       0x3FF00000 0x00000000
269 | uniform uint expected 0x3F800000
270 | draw rect -1 -1 2 2
271 | probe all rgba 0.0 1.0 0.0 1.0
272 | 
273 | # Try 0.4
274 | uniform uvec2 a       0x3FD99999 0x9999999A
275 | uniform uint expected 0x3ECCCCCD
276 | draw rect -1 -1 2 2
277 | probe all rgba 0.0 1.0 0.0 1.0
278 | 


--------------------------------------------------------------------------------
/le_fp64.shader_test:
--------------------------------------------------------------------------------
  1 | # Check if double 'a' is less than or equal than 'b' 
  2 | # IEEE 754 compliant
  3 | 
  4 | [require]
  5 | GLSL >= 1.30
  6 | 
  7 | [vertex shader]
  8 | #version 130
  9 | 
 10 | void main()
 11 | {
 12 |     gl_Position = gl_Vertex;
 13 | }
 14 | 
 15 | [fragment shader]
 16 | #version 130
 17 | 
 18 | /* Returns the fraction bits of the double-precision floating-point value `a'.*/
 19 | uvec2
 20 | extractFloat64Frac( uvec2 a )
 21 | {
 22 |     return uvec2( a.x & 0x000FFFFFu, a.y );
 23 | }
 24 | 
 25 | /* Returns the exponent bits of the double-precision floating-point value `a'.*/
 26 | uint
 27 | extractFloat64Exp( uvec2 a )
 28 | {
 29 |     return (a.x>>20) & 0x7FFu;
 30 | }
 31 | 
 32 | /* Returns the sign bit of the double-precision floating-point value `a'.*/
 33 | uint
 34 | extractFloat64Sign( uvec2 a )
 35 | {
 36 |     return (a.x>>31);
 37 | }
 38 | 
 39 | /* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less
 40 |  * than or equal to the 64-bit value formed by concatenating `b0' and `b1'.
 41 |  * Otherwise, returns false.
 42 |  */
 43 | bool
 44 | le64( uint a0, uint a1, uint b0, uint b1 )
 45 | {
 46 |     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
 47 | }
 48 | 
 49 | /* Returns true if the double-precision floating-point value `a' is less than or
 50 |  * equal to the corresponding value `b', and false otherwise.  The comparison is
 51 |  * performed according to the IEEE Standard for Floating-Point Arithmetic.
 52 |  */
 53 | bool
 54 | le_fp64( uvec2 a, uvec2 b )
 55 | {
 56 |     uint aSign;
 57 |     uint bSign;
 58 |     uvec2 aFrac;
 59 |     uvec2 bFrac;
 60 |     bool isaNaN;
 61 |     bool isbNaN;
 62 | 
 63 |     aFrac = extractFloat64Frac( a );
 64 |     bFrac = extractFloat64Frac( b );
 65 |     isaNaN = ( extractFloat64Exp( a ) == 0x7FFu ) &&
 66 |         ( ( aFrac.x | aFrac.y ) != 0u );
 67 |     isbNaN = ( extractFloat64Exp( b ) == 0x7FFu ) &&
 68 |        ( ( bFrac.x | bFrac.y ) != 0u );
 69 | 
 70 |     if ( isaNaN || isbNaN ) {
 71 |         return false;
 72 |     }
 73 | 
 74 |     aSign = extractFloat64Sign( a );
 75 |     bSign = extractFloat64Sign( b );
 76 |     if ( aSign != bSign ) {
 77 |         return ( aSign != 0u ) ||
 78 |             ( ( ( ( ( a.x | b.x )<<1 ) ) | a.y | b.y ) == 0u );
 79 |     }
 80 |     return ( aSign != 0u ) ? le64( b.x, b.y, a.x, a.y )
 81 |         : le64( a.x, a.y, b.x, b.y );
 82 | }
 83 | 
 84 | uniform uvec2 a;
 85 | uniform uvec2 b;
 86 | uniform bool expected;
 87 | 
 88 | void main()
 89 | {
 90 |     /* Generate green if the expected value is producted, red
 91 |      * otherwise.
 92 |      */
 93 |     gl_FragColor = le_fp64(a,b) == expected
 94 |         ? vec4(0.0, 1.0, 0.0, 1.0)
 95 |         : vec4(1.0, 0.0, 0.0, 1.0);
 96 | }
 97 | 
 98 | [test]
 99 | # A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
100 | # 'draw rect' line draws a rectangle that covers the whole window.
101 | # The 'probe all' line verifies that every pixel contains the expected
102 | # color.
103 | 
104 | # Try +0.0 and +0.0
105 | uniform uvec2 a        0x00000000 0x00000000
106 | uniform uvec2 b        0x00000000 0x00000000
107 | uniform int expected   1
108 | draw rect -1 -1 2 2
109 | probe all rgba 0.0 1.0 0.0 1.0
110 | 
111 | # Try 0.1 and 0.0
112 | uniform uvec2 a        0x3FB99999 0x9999999A
113 | uniform uvec2 b        0x00000000 0x00000000
114 | uniform int expected   0
115 | draw rect -1 -1 2 2
116 | probe all rgba 0.0 1.0 0.0 1.0
117 | 
118 | # Try 0.0 and 0.1
119 | uniform uvec2 a        0x00000000 0x00000000
120 | uniform uvec2 b        0x3FB99999 0x9999999A
121 | uniform int expected   1
122 | draw rect -1 -1 2 2
123 | probe all rgba 0.0 1.0 0.0 1.0
124 | 
125 | # Try 1 bit set and 0.0
126 | uniform uvec2 a        0x00000000 0x00000001
127 | uniform uvec2 b        0x00000000 0x00000000
128 | uniform int expected   0
129 | draw rect -1 -1 2 2
130 | probe all rgba 0.0 1.0 0.0 1.0
131 | 
132 | # Try 0.0 and 1 bit set
133 | uniform uvec2 a        0x00000000 0x00000000
134 | uniform uvec2 b        0x00000000 0x00000001
135 | uniform int expected   1
136 | draw rect -1 -1 2 2
137 | probe all rgba 0.0 1.0 0.0 1.0
138 | 
139 | # Try 1 bit set and 1 bit set
140 | uniform uvec2 a        0x00000000 0x00000001
141 | uniform uvec2 b        0x00000000 0x00000001
142 | uniform int expected   1
143 | draw rect -1 -1 2 2
144 | probe all rgba 0.0 1.0 0.0 1.0
145 | 
146 | # Try +Inf and +Inf
147 | uniform uvec2 a       0x7FF00000 0x00000000
148 | uniform uvec2 b       0x7FF00000 0x00000000
149 | uniform int expected  1
150 | draw rect -1 -1 2 2
151 | probe all rgba 0.0 1.0 0.0 1.0
152 | 
153 | # Try +Inf and -Inf
154 | uniform uvec2 a       0x7FF00000 0x00000000
155 | uniform uvec2 b       0xFFF00000 0x00000000
156 | uniform int expected  0
157 | draw rect -1 -1 2 2
158 | probe all rgba 0.0 1.0 0.0 1.0
159 | 
160 | # Try -Inf and +Inf
161 | uniform uvec2 a       0xFFF00000 0x00000000
162 | uniform uvec2 b       0x7FF00000 0x00000000
163 | uniform int expected  1
164 | draw rect -1 -1 2 2
165 | probe all rgba 0.0 1.0 0.0 1.0
166 | 
167 | # Try 0 and NaN
168 | uniform uvec2 a      0x00000000 0x00000000
169 | uniform uvec2 b      0x7FF00000 0x00000001
170 | uniform int expected 0
171 | draw rect -1 -1 2 2
172 | probe all rgba 0.0 1.0 0.0 1.0
173 | 


--------------------------------------------------------------------------------
/lt_fp64.shader_test:
--------------------------------------------------------------------------------
  1 | # Check if double 'a' is less than 'b' 
  2 | # IEEE 754 compliant
  3 | 
  4 | [require]
  5 | GLSL >= 1.30
  6 | 
  7 | [vertex shader]
  8 | #version 130
  9 | 
 10 | void main()
 11 | {
 12 |     gl_Position = gl_Vertex;
 13 | }
 14 | 
 15 | [fragment shader]
 16 | #version 130
 17 | 
 18 | /* Returns the fraction bits of the double-precision floating-point value `a'.*/
 19 | uvec2
 20 | extractFloat64Frac( uvec2 a )
 21 | {
 22 |     return uvec2( a.x & 0x000FFFFFu, a.y );
 23 | }
 24 | 
 25 | /* Returns the exponent bits of the double-precision floating-point value `a'.*/
 26 | uint
 27 | extractFloat64Exp( uvec2 a )
 28 | {
 29 |     return (a.x>>20) & 0x7FFu;
 30 | }
 31 | 
 32 | /* Returns the sign bit of the double-precision floating-point value `a'.*/
 33 | uint
 34 | extractFloat64Sign( uvec2 a )
 35 | {
 36 |     return (a.x>>31);
 37 | }
 38 | 
 39 | /* Returns true if the 64-bit value formed by concatenating `a0' and `a1' is less
 40 |  * than the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
 41 |  * returns false.
 42 |  */
 43 | bool
 44 | lt64( uint a0, uint a1, uint b0, uint b1 )
 45 | {
 46 |     return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
 47 | }
 48 | 
 49 | /* Returns true if the double-precision floating-point value `a' is less than
 50 |  * the corresponding value `b', and false otherwise.  The comparison is performed
 51 |  * according to the IEEE Standard for Floating-Point Arithmetic.
 52 |  */
 53 | bool
 54 | lt_fp64( uvec2 a, uvec2 b )
 55 | {
 56 |     uint aSign;
 57 |     uint bSign;
 58 |     uvec2 aFrac;
 59 |     uvec2 bFrac;
 60 |     bool isaNaN;
 61 |     bool isbNaN;
 62 | 
 63 |     aFrac = extractFloat64Frac( a );
 64 |     bFrac = extractFloat64Frac( b );
 65 |     isaNaN = ( extractFloat64Exp( a ) == 0x7FFu ) &&
 66 |         ( ( aFrac.x | aFrac.y ) != 0u );
 67 |     isbNaN = ( extractFloat64Exp( b ) == 0x7FFu ) &&
 68 |        ( ( bFrac.x | bFrac.y ) != 0u );
 69 | 
 70 |     if ( isaNaN || isbNaN ) {
 71 |         return false;
 72 |     }
 73 | 
 74 |     aSign = extractFloat64Sign( a );
 75 |     bSign = extractFloat64Sign( b );
 76 |     if( aSign != bSign ) {
 77 |         return ( aSign != 0u ) &&
 78 |             ( ( ( ( ( a.x | b.x )<<1 ) ) | a.y | b.y ) != 0u );
 79 |     }
 80 |     return ( aSign != 0u ) ? lt64( b.x, b.y, a.x, a.y )
 81 |         : lt64( a.x, a.y, b.x, b.y );
 82 | }
 83 | 
 84 | uniform uvec2 a;
 85 | uniform uvec2 b;
 86 | uniform bool expected;
 87 | 
 88 | void main()
 89 | {
 90 |     /* Generate green if the expected value is producted, red
 91 |      * otherwise.
 92 |      */
 93 |     gl_FragColor = lt_fp64(a,b) == expected
 94 |         ? vec4(0.0, 1.0, 0.0, 1.0)
 95 |         : vec4(1.0, 0.0, 0.0, 1.0);
 96 | }
 97 | 
 98 | [test]
 99 | # A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
100 | # 'draw rect' line draws a rectangle that covers the whole window.
101 | # The 'probe all' line verifies that every pixel contains the expected
102 | # color.
103 | 
104 | # Try +0.0 and +0.0
105 | uniform uvec2 a        0x00000000 0x00000000
106 | uniform uvec2 b        0x00000000 0x00000000
107 | uniform int expected   0
108 | draw rect -1 -1 2 2
109 | probe all rgba 0.0 1.0 0.0 1.0
110 | 
111 | # Try 0.1 and 0.0
112 | uniform uvec2 a        0x3FB99999 0x9999999A
113 | uniform uvec2 b        0x00000000 0x00000000
114 | uniform int expected   0
115 | draw rect -1 -1 2 2
116 | probe all rgba 0.0 1.0 0.0 1.0
117 | 
118 | # Try 0.0 and 0.1
119 | uniform uvec2 a        0x00000000 0x00000000
120 | uniform uvec2 b        0x3FB99999 0x9999999A
121 | uniform int expected   1
122 | draw rect -1 -1 2 2
123 | probe all rgba 0.0 1.0 0.0 1.0
124 | 
125 | # Try 1 bit set and 0.0
126 | uniform uvec2 a        0x00000000 0x00000001
127 | uniform uvec2 b        0x00000000 0x00000000
128 | uniform int expected   0
129 | draw rect -1 -1 2 2
130 | probe all rgba 0.0 1.0 0.0 1.0
131 | 
132 | # Try 0.0 and 1 bit set
133 | uniform uvec2 a        0x00000000 0x00000000
134 | uniform uvec2 b        0x00000000 0x00000001
135 | uniform int expected   1
136 | draw rect -1 -1 2 2
137 | probe all rgba 0.0 1.0 0.0 1.0
138 | 
139 | # Try 1 bit set and 1 bit set
140 | uniform uvec2 a        0x00000000 0x00000001
141 | uniform uvec2 b        0x00000000 0x00000001
142 | uniform int expected   0
143 | draw rect -1 -1 2 2
144 | probe all rgba 0.0 1.0 0.0 1.0
145 | 
146 | # Try +Inf and +Inf
147 | uniform uvec2 a       0x7FF00000 0x00000000
148 | uniform uvec2 b       0x7FF00000 0x00000000
149 | uniform int expected  0
150 | draw rect -1 -1 2 2
151 | probe all rgba 0.0 1.0 0.0 1.0
152 | 
153 | # Try +Inf and -Inf
154 | uniform uvec2 a       0x7FF00000 0x00000000
155 | uniform uvec2 b       0xFFF00000 0x00000000
156 | uniform int expected  0
157 | draw rect -1 -1 2 2
158 | probe all rgba 0.0 1.0 0.0 1.0
159 | 
160 | # Try -Inf and +Inf
161 | uniform uvec2 a       0xFFF00000 0x00000000
162 | uniform uvec2 b       0x7FF00000 0x00000000
163 | uniform int expected  1
164 | draw rect -1 -1 2 2
165 | probe all rgba 0.0 1.0 0.0 1.0
166 | 
167 | # Try 0 and NaN
168 | uniform uvec2 a      0x00000000 0x00000000
169 | uniform uvec2 b      0x7FF00000 0x00000001
170 | uniform int expected 0
171 | draw rect -1 -1 2 2
172 | probe all rgba 0.0 1.0 0.0 1.0
173 | 


--------------------------------------------------------------------------------
/mul_fp64.shader_test:
--------------------------------------------------------------------------------
  1 | # Multiply two double 'a' and 'b' 
  2 | # IEEE 754 compliant
  3 | 
  4 | [require]
  5 | GLSL >= 1.30
  6 | 
  7 | [vertex shader]
  8 | #version 130
  9 | 
 10 | void main()
 11 | {
 12 |     gl_Position = gl_Vertex;
 13 | }
 14 | 
 15 | [fragment shader]
 16 | #version 130
 17 | 
 18 | /* Software IEEE floating-point rounding mode. */
 19 | uint float_rounding_mode;
 20 | const uint float_round_nearest_even = 0u;
 21 | const uint float_round_to_zero      = 1u;
 22 | const uint float_round_down         = 2u;
 23 | const uint float_round_up           = 3u;
 24 | 
 25 | /* Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit
 26 |  * value formed by concatenating `b0' and `b1'.  Addition is modulo 2^64, so
 27 |  * any carry out is lost.  The result is broken into two 32-bit pieces which
 28 |  * are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 29 |  */
 30 | void
 31 | add64( uint a0,
 32 |        uint a1,
 33 |        uint b0,
 34 |        uint b1,
 35 |        inout uint z0Ptr,
 36 |        inout uint z1Ptr )
 37 | {
 38 |     uint z1;
 39 | 
 40 |     z1 = a1 + b1;
 41 |     z1Ptr = z1;
 42 |     z0Ptr = a0 + b0 + uint ( z1 < a1 );
 43 | }
 44 | 
 45 | /* Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right
 46 |  * by 32 _plus_ the number of bits given in `count'.  The shifted result is
 47 |  * at most 64 nonzero bits; these are broken into two 32-bit pieces which are
 48 |  * stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
 49 |  * off form a third 32-bit result as follows:  The _last_ bit shifted off is
 50 |  * the most-significant bit of the extra result, and the other 31 bits of the
 51 |  * extra result are all zero if and only if _all_but_the_last_ bits shifted off
 52 |  * were all zero.  This extra result is stored in the location pointed to by
 53 |  * `z2Ptr'.  The value of `count' can be arbitrarily large.
 54 |  *     (This routine makes more sense if `a0', `a1', and `a2' are considered
 55 |  * to form a fixed-point value with binary point between `a1' and `a2'.  This
 56 |  * fixed-point value is shifted right by the number of bits given in `count',
 57 |  * and the integer part of the result is returned at the locations pointed to
 58 |  * by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
 59 |  * corrupted as described above, and is returned at the location pointed to by
 60 |  * `z2Ptr'.)
 61 |  */
 62 | void
 63 | shift64ExtraRightJamming( uint a0, uint a1, uint a2,
 64 |                           int count,
 65 |                           inout uint z0Ptr,
 66 |                           inout uint z1Ptr,
 67 |                           inout uint z2Ptr )
 68 | {
 69 |     uint z0;
 70 |     uint z1;
 71 |     uint z2;
 72 |     int negCount = ( - count ) & 31;
 73 | 
 74 |     if ( count == 0 ) {
 75 |         z2 = a2;
 76 |         z1 = a1;
 77 |         z0 = a0;
 78 |     } else {
 79 |         if ( count < 32 ) {
 80 |             z2 = a1<<negCount;
 81 |             z1 = ( a0<<negCount ) | ( a1>>count );
 82 |             z0 = a0>>count;
 83 |         } else {
 84 |             if ( count == 32 ) {
 85 |                 z2 = a1;
 86 |                 z1 = a0;
 87 |             } else {
 88 |                 a2 |= a1;
 89 |                 if ( count < 64 ) {
 90 |                     z2 = a0<<negCount;
 91 |                     z1 = a0>>( count & 31 );
 92 |                 } else {
 93 |                     z2 = ( count == 64 ) ? a0 : uint ( a0 != 0u );
 94 |                     z1 = 0u;
 95 |                 }
 96 |             }
 97 |             z0 = 0u;
 98 |         }
 99 |         z2 |= uint ( a2 != 0u );
100 |     }
101 |     z2Ptr = z2;
102 |     z1Ptr = z1;
103 |     z0Ptr = z0;
104 | }
105 | 
106 | /* Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is
107 |  * equal to the 64-bit value formed by concatenating `b0' and `b1'.  Otherwise,
108 |  * returns 0.
109 |  */
110 | bool
111 | eq64( uint a0, uint a1, uint b0, uint b1 )
112 | {
113 |     return ( a0 == b0 ) && ( a1 == b1 );
114 | }
115 | 
116 | /* Packs the sign `zSign', the exponent `zExp', and the significand formed by
117 |  * the concatenation of `zFrac0' and `zFrac1' into a double-precision floating-
118 |  * point value, returning the result.  After being shifted into the proper
119 |  * positions, the three fields `zSign', `zExp', and `zFrac0' are simply added
120 |  * together to form the most significant 32 bits of the result.  This means
121 |  * that any integer portion of `zFrac0' will be added into the exponent.  Since
122 |  * a properly normalized significand will have an integer portion equal to 1,
123 |  * the `zExp' input should be 1 less than the desired result exponent whenever
124 |  * `zFrac0' and `zFrac1' concatenated form a complete, normalized significand.
125 |  */
126 | uvec2
127 | packFloat64( uint zSign, uint zExp, uint zFrac0, uint zFrac1 )
128 | {
129 |     uvec2 z;
130 | 
131 |     z.x = ( zSign<<31 ) + ( zExp<<20 ) + zFrac0;
132 |     z.y = zFrac1;
133 |     return z;
134 | }
135 | 
136 | /* Takes an abstract floating-point value having sign `zSign', exponent `zExp',
137 |  * and extended significand formed by the concatenation of `zFrac0', `zFrac1',
138 |  * and `zFrac2', and returns the proper double-precision floating-point value
139 |  * corresponding to the abstract input.  Ordinarily, the abstract value is
140 |  * simply rounded and packed into the double-precision format, with the inexact
141 |  * exception raised if the abstract input cannot be represented exactly.
142 |  * However, if the abstract value is too large, the overflow and inexact
143 |  * exceptions are raised and an infinity or maximal finite value is returned.
144 |  * If the abstract value is too small, the input value is rounded to a
145 |  * subnormal number, and the underflow and inexact exceptions are raised if the
146 |  * abstract input cannot be represented exactly as a subnormal double-precision
147 |  * floating-point number.
148 |  *     The input significand must be normalized or smaller.  If the input
149 |  * significand is not normalized, `zExp' must be 0; in that case, the result
150 |  * returned is a subnormal number, and it must not require rounding.  In the
151 |  * usual case that the input significand is normalized, `zExp' must be 1 less
152 |  * than the "true" floating-point exponent.  The handling of underflow and
153 |  * overflow follows the IEEE Standard for Floating-Point Arithmetic.
154 |  */
155 | uvec2
156 | roundAndPackFloat64( uint zSign,
157 |                      uint zExp,
158 |                      uint zFrac0,
159 |                      uint zFrac1,
160 |                      uint zFrac2 )
161 | {
162 |     uint roundingMode;
163 |     uint roundNearestEven;
164 |     uint increment;
165 | 
166 |     roundingMode = float_rounding_mode;
167 |     roundNearestEven = uint ( roundingMode == float_round_nearest_even );
168 |     increment = uint ( zFrac2 < 0u );
169 |     if ( roundNearestEven == 0u ) {
170 |         if ( roundingMode == float_round_to_zero ) {
171 |             increment = 0u;
172 |         } else {
173 |             if ( zSign != 0u ) {
174 |                 increment = uint ( ( roundingMode == float_round_down ) &&
175 |                         ( zFrac2 != 0u ) );
176 |             } else {
177 |                 increment = uint ( ( roundingMode == float_round_up ) &&
178 |                         ( zFrac2 != 0u ) );
179 |             }
180 |         }
181 |     }
182 |     if ( 0x7FDu <= zExp ) {
183 |         if ( ( 0x7FDu < zExp ) ||
184 |             ( ( zExp == 0x7FDu ) &&
185 |                 eq64( 0x001FFFFFu, 0xFFFFFFFFu, zFrac0, zFrac1 ) &&
186 |                    ( increment != 0u ) ) ) {
187 |             if ( ( roundingMode == float_round_to_zero ) ||
188 |                 ( ( zSign != 0u ) && ( roundingMode == float_round_up ) ) ||
189 |                     ( ( zSign == 0u ) && ( roundingMode == float_round_down ) ) ) {
190 |                 return packFloat64( zSign, 0x7FEu, 0x000FFFFFu, 0xFFFFFFFFu );
191 |             }
192 |             return packFloat64( zSign, 0x7FFu, 0u, 0u );
193 |         }
194 |         if ( zExp < 0u ) {
195 |             shift64ExtraRightJamming(
196 |                 zFrac0, zFrac1, zFrac2, int ( - zExp ), zFrac0, zFrac1, zFrac2 );
197 |             zExp = 0u;
198 |             if ( roundNearestEven != 0u ) {
199 |                 increment = uint ( zFrac2 < 0u );
200 |             } else {
201 |                 if ( zSign != 0u ) {
202 |                     increment = uint ( ( roundingMode == float_round_down ) &&
203 |                             ( zFrac2 != 0u ) );
204 |                 } else {
205 |                     increment = uint ( ( roundingMode == float_round_up ) &&
206 |                             ( zFrac2 != 0u ) );
207 |                 }
208 |             }
209 |         }
210 |     }
211 |     if ( increment != 0u ) {
212 |         add64( zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1 );
213 |         zFrac1 &= ~ ( uint ( zFrac2 + zFrac2 == 0u ) & roundNearestEven );
214 |     } else {
215 |         if ( ( zFrac0 | zFrac1 ) == 0u )
216 |             zExp = 0u;
217 |     }
218 |     return packFloat64( zSign, zExp, zFrac0, zFrac1 );
219 | }
220 | 
221 | /* Multiplies `a' by `b' to obtain a 64-bit product.  The product is broken
222 |  * into two 32-bit pieces which are stored at the locations pointed to by
223 |  * `z0Ptr' and `z1Ptr'.
224 |  */
225 | void
226 | mul32To64( uint a, uint b, inout uint z0Ptr, inout uint z1Ptr )
227 | {
228 |     uint aHigh;
229 |     uint aLow;
230 |     uint bHigh;
231 |     uint bLow;
232 |     uint z0;
233 |     uint zMiddleA;
234 |     uint zMiddleB;
235 |     uint z1;
236 | 
237 |     aLow = a;
238 |     aHigh = a>>16;
239 |     bLow = b;
240 |     bHigh = b>>16;
241 |     z1 = aLow * bLow;
242 |     zMiddleA = aLow * bHigh;
243 |     zMiddleB = aHigh * bLow;
244 |     z0 = aHigh * bHigh;
245 |     zMiddleA += zMiddleB;
246 |     z0 += ( ( uint ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 );
247 |     zMiddleA <<= 16;
248 |     z1 += zMiddleA;
249 |     z0 += uint ( z1 < zMiddleA );
250 |     z1Ptr = z1;
251 |     z0Ptr = z0;
252 | }
253 | 
254 | /* Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the
255 |  * 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit
256 |  * product.  The product is broken into four 32-bit pieces which are stored at
257 |  * the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
258 |  */
259 | void
260 | mul64To128( uint a0, uint a1, uint b0, uint b1,
261 |             inout uint z0Ptr,
262 |             inout uint z1Ptr,
263 |             inout uint z2Ptr,
264 |             inout uint z3Ptr )
265 | {
266 |     uint z0;
267 |     uint z1;
268 |     uint z2;
269 |     uint z3;
270 |     uint more1;
271 |     uint more2;
272 | 
273 |     mul32To64( a1, b1, z2, z3 );
274 |     mul32To64( a1, b0, z1, more2 );
275 |     add64( z1, more2, 0u, z2, z1, z2 );
276 |     mul32To64( a0, b0, z0, more1 );
277 |     add64( z0, more1, 0u, z1, z0, z1 );
278 |     mul32To64( a0, b1, more1, more2 );
279 |     add64( more1, more2, 0u, z2, more1, z2 );
280 |     add64( z0, z1, 0u, more1, z0, z1 );
281 |     z3Ptr = z3;
282 |     z2Ptr = z2;
283 |     z1Ptr = z1;
284 |     z0Ptr = z0;
285 | }
286 | 
287 | /* Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the
288 |  * number of bits given in `count'.  Any bits shifted off are lost.  The value
289 |  * of `count' must be less than 32.  The result is broken into two 32-bit
290 |  * pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
291 |  */
292 | void
293 | shortShift64Left( uint a0, uint a1,
294 |                   int count,
295 |                   inout uint z0Ptr,
296 |                   inout uint z1Ptr )
297 | {
298 |     z1Ptr = a1<<count;
299 |     z0Ptr =
300 |         ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) );
301 | }
302 | 
303 | /* Returns the number of leading 0 bits before the most-significant 1 bit of
304 |  * `a'.  If `a' is zero, 32 is returned.
305 |  */
306 | uint
307 | countLeadingZeros32( uint a )
308 | {
309 |     if ( a == 0u )
310 |         return 32u;
311 | 
312 |     uint shiftCount = 0u;
313 |     if ( ( a & 0xFFFF0000u ) == 0u ) { shiftCount += 16u; a <<= 16; }
314 |     if ( ( a & 0xFF000000u ) == 0u ) { shiftCount += 8u; a <<= 8; }
315 |     if ( ( a & 0xF0000000u ) == 0u ) { shiftCount += 4u; a <<= 4; }
316 |     if ( ( a & 0xC0000000u ) == 0u ) { shiftCount += 2u; a <<= 2; }
317 |     if ( ( a & 0x80000000u ) == 0u ) { shiftCount += 1u; }
318 |     return shiftCount;
319 | }
320 | 
321 | /* Normalizes the subnormal double-precision floating-point value represented
322 |  * by the denormalized significand formed by the concatenation of `aFrac0' and
323 |  * `aFrac1'.  The normalized exponent is stored at the location pointed to by
324 |  * `zExpPtr'.  The most significant 21 bits of the normalized significand are
325 |  * stored at the location pointed to by `zFrac0Ptr', and the least significant
326 |  * 32 bits of the normalized significand are stored at the location pointed to
327 |  * by `zFrac1Ptr'.
328 |  */
329 | void
330 | normalizeFloat64Subnormal( uint aFrac0, uint aFrac1,
331 |                            inout uint zExpPtr,
332 |                            inout uint zFrac0Ptr,
333 |                            inout uint zFrac1Ptr )
334 | {
335 |     int shiftCount;
336 | 
337 |     if ( aFrac0 == 0u ) {
338 |         shiftCount = int ( countLeadingZeros32( aFrac1 ) ) - 11;
339 |         if ( shiftCount < 0 ) {
340 |             zFrac0Ptr = aFrac1>>( - shiftCount );
341 |             zFrac1Ptr = aFrac1<<( shiftCount & 31 );
342 |         } else {
343 |             zFrac0Ptr = aFrac1<<shiftCount;
344 |             zFrac1Ptr = 0u;
345 |         }
346 |         zExpPtr = uint ( - shiftCount - 31 );
347 |     } else {
348 |         shiftCount = int ( countLeadingZeros32( aFrac0 ) ) - 11;
349 |         shortShift64Left( aFrac0, aFrac1, shiftCount, zFrac0Ptr, zFrac1Ptr );
350 |         zExpPtr = 1u - uint ( shiftCount );
351 |     }
352 | }
353 | 
354 | /* Returns 1 if the double-precision floating-point value `a' is a NaN;
355 |  * otherwise returns 0.
356 |  */
357 | bool
358 | float64_is_nan( uvec2 a )
359 | {
360 |     return ( 0xFFE00000u <= ( a.y<<1 ) ) &&
361 |         ( ( a.x != 0u ) || ( ( a.y & 0x000FFFFFu ) != 0u ) );
362 | }
363 | 
364 | /* Returns 1 if the double-precision floating-point value `a' is a signaling
365 |  * NaN; otherwise returns 0.
366 |  */
367 | bool
368 | float64_is_signaling_nan( uvec2 a )
369 | {
370 |     return ( ( ( a.y>>19 ) & 0xFFFu ) == 0xFFEu ) &&
371 |         ( ( a.x != 0u ) || ( ( a.y & 0x0007FFFFu ) != 0u ) );
372 | }
373 | 
374 | /* Takes two double-precision floating-point values `a' and `b', one of which
375 |  * is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is
376 |  * a signaling NaN, the invalid exception is raised.
377 |  */
378 | uvec2
379 | propagateFloat64NaN( uvec2 a, uvec2 b )
380 | {
381 |     bool aIsNaN;
382 |     bool aIsSignalingNaN;
383 |     bool bIsNaN;
384 |     bool bIsSignalingNaN;
385 | 
386 |     aIsNaN = float64_is_nan( a );
387 |     aIsSignalingNaN = float64_is_signaling_nan( a );
388 |     bIsNaN = float64_is_nan( b );
389 |     bIsSignalingNaN = float64_is_signaling_nan( b );
390 |     a.y |= 0x00080000u;
391 |     b.y |= 0x00080000u;
392 |     if ( aIsNaN ) {
393 |         return ( aIsSignalingNaN && bIsNaN ) ? b : a;
394 |     } else {
395 |         return b;
396 |     }
397 | }
398 | 
399 | /* Returns the fraction bits of the double-precision floating-point value `a'.*/
400 | uvec2
401 | extractFloat64Frac( uvec2 a )
402 | {
403 |     return uvec2( a.x & 0x000FFFFFu, a.y );
404 | }
405 | 
406 | /* Returns the exponent bits of the double-precision floating-point value `a'.*/
407 | uint
408 | extractFloat64Exp( uvec2 a )
409 | {
410 |     return (a.x>>20) & 0x7FFu;
411 | }
412 | 
413 | /* Returns the sign bit of the double-precision floating-point value `a'.*/
414 | uint
415 | extractFloat64Sign( uvec2 a )
416 | {
417 |     return (a.x>>31);
418 | }
419 | 
420 | /* Returns the result of multiplying the double-precision floating-point values
421 |  * `a' and `b'.  The operation is performed according to the IEEE Standard for
422 |  * Floating-Point Arithmetic.
423 |  */
424 | uvec2
425 | mul_fp64( uvec2 a, uvec2 b )
426 | {
427 |     uint aSign;
428 |     uint bSign;
429 |     uint zSign;
430 |     uint aExp;
431 |     uint bExp;
432 |     uint zExp;
433 |     uvec2 aFrac;
434 |     uvec2 bFrac;
435 |     uint zFrac0;
436 |     uint zFrac1;
437 |     uint zFrac2;
438 |     uint zFrac3;
439 | 
440 |     aFrac = extractFloat64Frac( a );
441 |     aExp = extractFloat64Exp( a );
442 |     aSign = extractFloat64Sign( a );
443 |     bFrac = extractFloat64Frac( b );
444 |     bExp = extractFloat64Exp( b );
445 |     bSign = extractFloat64Sign( b );
446 |     zSign = aSign ^ bSign;
447 |     if ( aExp == 0x7FFu ) {
448 |         if ( ( ( aFrac.x | aFrac.y ) != 0u ) ||
449 |             ( ( bExp == 0x7FFu ) && ( ( bFrac.x | bFrac.y ) != 0u ) ) ) {
450 |             return propagateFloat64NaN( a, b );
451 |         }
452 |         if ( ( bExp | bFrac.x | bFrac.y ) == 0u ) {
453 |             return uvec2(0xFFFFFFFFu, 0xFFFFFFFFu);
454 |         }
455 |         return packFloat64( zSign, 0x7FFu, 0u, 0u );
456 |     }
457 |     if ( bExp == 0x7FFu ) {
458 |         if ( ( bFrac.x | bFrac.y ) != 0u ) {
459 |             return propagateFloat64NaN( a, b );
460 |         }
461 |         if ( ( aExp | aFrac.x | aFrac.y ) == 0u ) {
462 |             return uvec2(0xFFFFFFFFu, 0xFFFFFFFFu);
463 |         }
464 |         return packFloat64( zSign, 0x7FFu, 0u, 0u );
465 |     }
466 |     if ( aExp == 0u ) {
467 |         if ( ( aFrac.x | aFrac.y ) == 0u ) {
468 |             return packFloat64( zSign, 0u, 0u, 0u );
469 |         }
470 |         normalizeFloat64Subnormal( aFrac.x, aFrac.y, aExp, aFrac.x, aFrac.y );
471 |     }
472 |     if ( bExp == 0u ) {
473 |         if ( ( bFrac.x | bFrac.y ) == 0u ) {
474 |             return packFloat64( zSign, 0u, 0u, 0u );
475 |         }
476 |         normalizeFloat64Subnormal( bFrac.x, bFrac.y, bExp, bFrac.x, bFrac.y );
477 |     }
478 |     zExp = uint ( int ( aExp + bExp ) - 0x400 );
479 |     aFrac.x |= 0x00100000u;
480 |     shortShift64Left( bFrac.x, bFrac.y, 12, bFrac.x, bFrac.y );
481 |     mul64To128(
482 |         aFrac.x, aFrac.y, bFrac.x, bFrac.y, zFrac0, zFrac1, zFrac2, zFrac3 );
483 |     add64( zFrac0, zFrac1, aFrac.x, aFrac.y, zFrac0, zFrac1 );
484 |     zFrac2 |= uint ( zFrac3 != 0u );
485 |     if ( 0x00200000u <= zFrac0 ) {
486 |         shift64ExtraRightJamming(
487 |             zFrac0, zFrac1, zFrac2, 1, zFrac0, zFrac1, zFrac2 );
488 |         ++zExp;
489 |     }
490 |     return roundAndPackFloat64( zSign, zExp, zFrac0, zFrac1, zFrac2 );
491 | }
492 | 
493 | uniform uvec2 a;
494 | uniform uvec2 b;
495 | uniform uvec2 expected;
496 | 
497 | void main()
498 | {
499 |     /* Generate green if the expected value is producted, red
500 |      * otherwise.
501 |      */
502 |     gl_FragColor = mul_fp64(a,b) == expected
503 |         ? vec4(0.0, 1.0, 0.0, 1.0)
504 |         : vec4(1.0, 0.0, 0.0, 1.0);
505 | }
506 | 
507 | [test]
508 | # A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
509 | # 'draw rect' line draws a rectangle that covers the whole window.
510 | # The 'probe all' line verifies that every pixel contains the expected
511 | # color.
512 | 
513 | # Try +0.0 and +0.0
514 | uniform uvec2 a        0x00000000 0x00000000
515 | uniform uvec2 b        0x00000000 0x00000000
516 | uniform uvec2 expected 0x00000000 0x00000000
517 | draw rect -1 -1 2 2
518 | probe all rgba 0.0 1.0 0.0 1.0
519 | 
520 | # Try +0.0 and -0.0
521 | uniform uvec2 a        0x00000000 0x00000000
522 | uniform uvec2 b        0x80000000 0x00000000
523 | uniform uvec2 expected 0x80000000 0x00000000
524 | draw rect -1 -1 2 2
525 | probe all rgba 0.0 1.0 0.0 1.0
526 | 
527 | # Try +0.0 and +50.0
528 | uniform uvec2 a        0x00000000 0x00000000
529 | uniform uvec2 b        0x40490000 0x00000000
530 | uniform uvec2 expected 0x00000000 0x00000000
531 | draw rect -1 -1 2 2
532 | probe all rgba 0.0 1.0 0.0 1.0
533 | 
534 | # Try +1.0 and +50.0
535 | uniform uvec2 a        0x3FF00000 0x00000000
536 | uniform uvec2 b        0x40490000 0x00000000
537 | uniform uvec2 expected 0x40490000 0x00000000
538 | draw rect -1 -1 2 2
539 | probe all rgba 0.0 1.0 0.0 1.0
540 | 
541 | # Try +2.0 and +2.0
542 | uniform uvec2 a        0x40000000 0x00000000
543 | uniform uvec2 b        0x40000000 0x00000000
544 | uniform uvec2 expected 0x40100000 0x00000000
545 | draw rect -1 -1 2 2
546 | probe all rgba 0.0 1.0 0.0 1.0
547 | 
548 | # Try +1.5 and +INF
549 | uniform uvec2 a        0x3FF80000 0x00000000
550 | uniform uvec2 b        0x7FF00000 0x00000000
551 | uniform uvec2 expected 0x7FF00000 0x00000000
552 | draw rect -1 -1 2 2
553 | probe all rgba 0.0 1.0 0.0 1.0
554 | 
555 | # Try +0.0 and +INF
556 | uniform uvec2 a        0x00000000 0x00000000
557 | uniform uvec2 b        0x7FF00000 0x00000000
558 | uniform uvec2 expected 0xFFFFFFFF 0xFFFFFFFF
559 | draw rect -1 -1 2 2
560 | probe all rgba 0.0 1.0 0.0 1.0
561 | 
562 | # Try +1.5 and NaN
563 | uniform uvec2 a        0x3FF80000 0x00000000
564 | uniform uvec2 b        0x7FF00000 0x00000001
565 | uniform uvec2 expected 0x7FF00000 0x00080001
566 | draw rect -1 -1 2 2
567 | probe all rgba 0.0 1.0 0.0 1.0
568 | 
569 | # Try +0.0 and NaN
570 | uniform uvec2 a        0x00000000 0x00000000
571 | uniform uvec2 b        0x7FF00000 0x00000001
572 | uniform uvec2 expected 0x7FF00000 0x00080001
573 | draw rect -1 -1 2 2
574 | probe all rgba 0.0 1.0 0.0 1.0
575 | 


--------------------------------------------------------------------------------
/neg_fp64.shader_test:
--------------------------------------------------------------------------------
 1 | # Negate value of a double
 2 | # IEEE 754 compliant
 3 | 
 4 | [require]
 5 | GLSL >= 1.30
 6 | 
 7 | [vertex shader]
 8 | #version 130
 9 | 
10 | void main()
11 | {
12 |     gl_Position = gl_Vertex;
13 | }
14 | 
15 | [fragment shader]
16 | #version 130
17 | 
18 | /* Negate value of a Float64 :
19 |  * Toggle the sign bit
20 |  */
21 | uvec2
22 | neg_fp64( uvec2 a )
23 | {
24 |     a.x ^= (1u<<31);
25 |     return a;
26 | }
27 | 
28 | uniform uvec2 a;
29 | uniform uvec2 expected;
30 | 
31 | void main()
32 | {
33 |     /* Generate green if the expected value is produced, red
34 |      * otherwise. 
35 |      */
36 |     gl_FragColor = neg_fp64(a) == expected
37 |         ? vec4(0.0, 1.0, 0.0, 1.0)
38 |         : vec4(1.0, 0.0, 0.0, 1.0);
39 | }
40 | 
41 | [test]
42 | # A bunch of tests to run.  The 'uniform' lines set the uniforms.  The
43 | # 'draw rect' line draws a rectangle that covers the whole window.
44 | # The 'probe all' line verifies that every pixel contains the expected
45 | # color.
46 | 
47 | # Try +0.0
48 | uniform uvec2 a        0x00000000 0x00000000
49 | uniform uvec2 expected 0x80000000 0x00000000
50 | draw rect -1 -1 2 2
51 | probe all rgba 0.0 1.0 0.0 1.0
52 | 
53 | # Try -0.0
54 | uniform uvec2 a        0x80000000 0x00000000
55 | uniform uvec2 expected 0x00000000 0x00000000
56 | draw rect -1 -1 2 2
57 | probe all rgba 0.0 1.0 0.0 1.0
58 | 


--------------------------------------------------------------------------------