├── L2_by_pi_bits.asm
├── Lsincos_array.asm
├── Lsincosf_array.asm
├── _chgsign.c
├── _chgsignf.c
├── _copysign.c
├── _copysignf.c
├── _finite.c
├── _finitef.c
├── acos.c
├── acosf.c
├── asin.c
├── asinf.c
├── atan.c
├── atan2.c
├── atan2f.c
├── atanf.c
├── cabs.c
├── cabsf.c
├── ceil.c
├── ceilf.c
├── cos.asm
├── cosf.asm
├── cosh.c
├── coshf.c
├── exp.asm
├── exp2.c
├── exp_special.c
├── expf.asm
├── floor.c
├── floorf.c
├── fm.inc
├── fma3_available.c
├── fmod.asm
├── fmodf.asm
├── hypot.c
├── hypotf.c
├── libm.h
├── libm_errno.h
├── libm_inlines.h
├── libm_new.h
├── libm_util.h
├── log.asm
├── log10.asm
├── log10_128_lead_tail_table.asm
├── log10_256_lead_tail_table.asm
├── log_128_lead_tail_table.asm
├── log_256_lead_tail_table.asm
├── log_F_inv_dword_table.asm
├── log_F_inv_qword_table.asm
├── log_special.c
├── logb.c
├── logbf.c
├── logf.asm
├── modf.c
├── modff.c
├── pow.asm
├── pow_special.c
├── remainder.c
├── remainder_piby2.c
├── remainder_piby2_forAsm.asm
├── remainder_piby2_forFMA3.asm
├── remainder_piby2f.c
├── remainder_piby2f_forAsm.asm
├── remainder_piby2f_forC.asm
├── remainderf.c
├── simd.h
├── sin.asm
├── sincos_special.c
├── sinf.asm
├── sinh.c
├── sinhf.c
├── sqrt.c
├── sqrtf.c
├── tan.asm
├── tan.c
├── tanf.asm
├── tanf.c
├── tanh.c
├── tanhf.c
├── two_to_jby64_head_tail_table.asm
└── two_to_jby64_table.asm


/L2_by_pi_bits.asm:
--------------------------------------------------------------------------------
 1 | ;
 2 | ; MIT License
 3 | ; -----------
 4 | ; 
 5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 6 | ; 
 7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | ; of this Software and associated documentaon files (the "Software"), to deal
 9 | ; in the Software without restriction, including without limitation the rights
10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | ; copies of the Software, and to permit persons to whom the Software is
12 | ; furnished to do so, subject to the following conditions:
13 | ; 
14 | ; The above copyright notice and this permission notice shall be included in
15 | ; all copies or substantial portions of the Software.
16 | ; 
17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | ; THE SOFTWARE.
24 | ;
25 | ;;
26 | ;; Defines __L_2_by_pi_bits array
27 | ;; Used in trigonometric argument reduction
28 | ;;
29 | 
30 | .const
31 | 
32 | ALIGN 16
33 | PUBLIC __L_2_by_pi_bits
34 | __L_2_by_pi_bits DB 224, 241,  27, 193,  12,  88,  33, 116
35 |                 DB  53, 126, 196, 126, 237, 175, 169,  75
36 |                 DB  74,  41, 222, 231,  28, 244, 236, 197
37 |                 DB 151, 175,  31, 235, 158, 212, 181, 168
38 |                 DB 127, 121, 154, 253,  24,  61, 221,  38
39 |                 DB  44, 159,  60, 251, 217, 180, 125, 180
40 |                 DB  41, 104,  45,  70, 188, 188,  63,  96
41 |                 DB  22, 120, 255,  95, 226, 127, 236, 160
42 |                 DB 228, 247,  46, 126,  17, 114, 210, 231
43 |                 DB  76,  13, 230,  88,  71, 230,   4, 249
44 |                 DB 125, 209, 154, 192, 113, 166,  19,  18
45 |                 DB 237, 186, 212, 215,   8, 162, 251, 156
46 |                 DB 166, 196, 114, 172, 119, 248, 115,  72
47 |                 DB  70,  39, 168, 187,  36,  25, 128,  75
48 |                 DB  55,   9, 233, 184, 145, 220, 134,  21
49 |                 DB 239, 122, 175, 142,  69, 249,   7,  65
50 |                 DB  14, 241, 100,  86, 138, 109,   3, 119
51 |                 DB 211, 212,  71,  95, 157, 240, 167,  84
52 |                 DB  16,  57, 185,  13, 230, 139,   2,   0
53 |                 DB   0,   0,   0,   0,   0,   0
54 | END
55 | 


--------------------------------------------------------------------------------
/Lsincos_array.asm:
--------------------------------------------------------------------------------
 1 | ;;
 2 | ;
 3 | ; MIT License
 4 | ; -----------
 5 | ; 
 6 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | ; 
 8 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | ; of this Software and associated documentaon files (the "Software"), to deal
10 | ; in the Software without restriction, including without limitation the rights
11 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | ; copies of the Software, and to permit persons to whom the Software is
13 | ; furnished to do so, subject to the following conditions:
14 | ; 
15 | ; The above copyright notice and this permission notice shall be included in
16 | ; all copies or substantial portions of the Software.
17 | ; 
18 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | ; THE SOFTWARE.
25 | ;
26 | ;; Defines __Lcosarray and __Lsinarray arrays.
27 | ;; Used in sin.asm and cos.asm
28 | ;;
29 | 
30 | .const
31 | 
32 | ALIGN 16
33 | PUBLIC __Lcosarray 
34 | __Lcosarray DQ    03fa5555555555555h                          ; 0.0416667           c1
35 |             DQ    0
36 |             DQ    0bf56c16c16c16967h                          ; -0.00138889         c2
37 |             DQ    0
38 |             DQ    03EFA01A019F4EC91h                          ; 2.48016e-005        c3
39 |             DQ    0
40 |             DQ    0bE927E4FA17F667Bh                          ; -2.75573e-007       c4
41 |             DQ    0
42 |             DQ    03E21EEB690382EECh                          ; 2.08761e-009        c5
43 |             DQ    0
44 |             DQ    0bDA907DB47258AA7h                          ; -1.13826e-011       c6
45 |             DQ    0
46 | 
47 | ALIGN 16
48 | PUBLIC __Lsinarray 
49 | __Lsinarray DQ    0bfc5555555555555h                          ; -0.166667           s1
50 |             DQ    0
51 |             DQ    03f81111111110bb3h                          ; 0.00833333          s2
52 |             DQ    0
53 |             DQ    0bf2a01a019e83e5ch                          ; -0.000198413        s3
54 |             DQ    0
55 |             DQ    03ec71de3796cde01h                          ; 2.75573e-006        s4
56 |             DQ    0
57 |             DQ    0be5ae600b42fdfa7h                          ; -2.50511e-008       s5
58 |             DQ    0
59 |             DQ    03de5e0b2f9a43bb8h                          ; 1.59181e-010        s6
60 |             DQ    0
61 | 
62 | END
63 | 


--------------------------------------------------------------------------------
/Lsincosf_array.asm:
--------------------------------------------------------------------------------
 1 | ;;
 2 | ;
 3 | ; MIT License
 4 | ; -----------
 5 | ; 
 6 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | ; 
 8 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | ; of this Software and associated documentaon files (the "Software"), to deal
10 | ; in the Software without restriction, including without limitation the rights
11 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | ; copies of the Software, and to permit persons to whom the Software is
13 | ; furnished to do so, subject to the following conditions:
14 | ; 
15 | ; The above copyright notice and this permission notice shall be included in
16 | ; all copies or substantial portions of the Software.
17 | ; 
18 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | ; THE SOFTWARE.
25 | ;
26 | ;; Defines __Lcosarray and __Lsinarray arrays.
27 | ;; Used in sin.asm and cos.asm
28 | ;; These coefficients are actually from Taylor series.
29 | ;;
30 | 
31 | .const
32 | 
33 | ALIGN 16
34 | PUBLIC __Lcosfarray
35 | __Lcosfarray DQ    0bfe0000000000000h                 ; -0.5              c0
36 |     DQ    03fa5555555555555h                          ; 0.0416667         c1
37 |     DQ    0bf56c16c16c16c16h                          ; -0.00138889       c2
38 |     DQ    03EFA01A01A01A019h                          ; 2.48016e-005      c3
39 |     DQ    0be927e4fb7789f5ch                          ; -2.75573e-007     c4
40 | 
41 | ALIGN 16
42 | PUBLIC __Lsinfarray
43 | __Lsinfarray DQ    0bfc5555555555555h                 ; -0.166667         s1
44 |     DQ    03f81111111111111h                          ; 0.00833333        s2
45 |     DQ    0bf2a01a01a01a01ah                          ; -0.000198413      s3
46 |     DQ    03ec71de3a556c734h                          ; 2.75573e-006      s4
47 | 
48 | END
49 | 


--------------------------------------------------------------------------------
/_chgsign.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | double FN_PROTOTYPE(_chgsign)(double x)
31 | {
32 |   /* Returns x with its sign reversed.
33 |      NaNs are not considered special; their sign bits are handled
34 |      the same as for any other number */
35 |   unsigned long u;
36 |   GET_BITS_DP64(x, u);
37 |   u ^= SIGNBIT_DP64;
38 |   PUT_BITS_DP64(u, x);
39 |   return x;
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/_chgsignf.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | float FN_PROTOTYPE(_chgsignf)(float x)
31 | {
32 |   /* Returns x with its sign reversed.
33 |      NaNs are not considered special; their sign bits are handled
34 |      the same as for any other number */
35 |   unsigned int u;
36 |   GET_BITS_SP32(x, u);
37 |   u ^= SIGNBIT_SP32;
38 |   PUT_BITS_SP32(u, x);
39 |   return x;
40 | }
41 | 


--------------------------------------------------------------------------------
/_copysign.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | /* Returns the absolute value of x with the sign of y.
31 |    NaNs are not considered special; their sign bits are handled
32 |    the same as for any other number. */
33 | 
34 | double FN_PROTOTYPE(_copysign)(double x, double y)
35 | {
36 | 
37 |   unsigned long ux, uy;
38 |   GET_BITS_DP64(x, ux);
39 |   GET_BITS_DP64(y, uy);
40 |   if ((ux ^ uy) & SIGNBIT_DP64)
41 |     PUT_BITS_DP64(ux ^ SIGNBIT_DP64, x);
42 |   return x;
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/_copysignf.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 |   /* Returns the absolute value of x with the sign of y.
31 |      NaNs are not considered special; their sign bits are handled
32 |      the same as for any other number. */
33 | 
34 | float FN_PROTOTYPE(_copysignf)(float x, float y)
35 | {
36 |   unsigned int ux, uy;
37 |   GET_BITS_SP32(x, ux);
38 |   GET_BITS_SP32(y, uy);
39 |   if ((ux ^ uy) & SIGNBIT_SP32)
40 |      PUT_BITS_SP32(ux ^ SIGNBIT_SP32, x);
41 |   return x;
42 | }
43 | 


--------------------------------------------------------------------------------
/_finite.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | /* Returns 0 if x is infinite or NaN, otherwise returns 1 */
31 | 
32 | int FN_PROTOTYPE(_finite)(double x)
33 | {
34 | 
35 | 
36 |   unsigned long ux;
37 |   GET_BITS_DP64(x, ux);
38 |   return (int)(((ux & ~SIGNBIT_DP64) - PINFBITPATT_DP64) >> 63);
39 | }
40 | 


--------------------------------------------------------------------------------
/_finitef.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | /* Returns 0 if x is infinite or NaN, otherwise returns 1 */
31 | 
32 | int FN_PROTOTYPE(_finitef)(float x)
33 | {
34 | 
35 | 
36 |   unsigned int ux;
37 |   GET_BITS_SP32(x, ux);
38 |   return (int)(((ux & ~SIGNBIT_SP32) - PINFBITPATT_SP32) >> 31);
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/acos.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_VAL_WITH_FLAGS
 31 | #define USE_NAN_WITH_FLAGS
 32 | #define USE_HANDLE_ERROR
 33 | #include "libm_inlines.h"
 34 | #undef USE_NAN_WITH_FLAGS
 35 | #undef USE_VAL_WITH_FLAGS
 36 | #undef USE_HANDLE_ERROR
 37 | 
 38 | #include "libm_errno.h"
 39 | 
 40 | 
 41 | #pragma function(acos)
 42 | 
 43 | double FN_PROTOTYPE(acos)(double x)
 44 | {
 45 |   /* Computes arccos(x).
 46 |      The argument is first reduced by noting that arccos(x)
 47 |      is invalid for abs(x) > 1. For denormal and small
 48 |      arguments arccos(x) = pi/2 to machine accuracy.
 49 |      Remaining argument ranges are handled as follows.
 50 |      For abs(x) <= 0.5 use
 51 |      arccos(x) = pi/2 - arcsin(x)
 52 |      = pi/2 - (x + x^3*R(x^2))
 53 |      where R(x^2) is a rational minimax approximation to
 54 |      (arcsin(x) - x)/x^3.
 55 |      For abs(x) > 0.5 exploit the identity:
 56 |      arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
 57 |      together with the above rational approximation, and
 58 |      reconstruct the terms carefully.
 59 |   */
 60 | 
 61 |   /* Some constants and split constants. */
 62 | 
 63 |   static const double
 64 |     pi         = 3.1415926535897933e+00, /* 0x400921fb54442d18 */
 65 |     piby2      = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
 66 |     piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
 67 |     piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */
 68 | 
 69 |   double u, y, s=0.0, r;
 70 |   int xexp, xnan, transform=0;
 71 | 
 72 |   unsigned long ux, aux, xneg;
 73 |   GET_BITS_DP64(x, ux);
 74 |   aux = ux & ~SIGNBIT_DP64;
 75 |   xneg = (ux & SIGNBIT_DP64);
 76 |   xnan = (aux > PINFBITPATT_DP64);
 77 |   xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
 78 | 
 79 |   /* Special cases */
 80 | 
 81 |   if (xnan)
 82 |     {
 83 |       return _handle_error("acos", OP_ACOS, ux|0x0008000000000000, _DOMAIN,
 84 |                           0, EDOM, x, 0.0, 1);
 85 |     }
 86 |   else if (xexp < -56)
 87 |     { /* y small enough that arccos(x) = pi/2 */
 88 |       return val_with_flags(piby2, AMD_F_INEXACT);
 89 |     }
 90 |   else if (xexp >= 0)
 91 |     { /* abs(x) >= 1.0 */
 92 |       if (x == 1.0)
 93 |         return 0.0;
 94 |       else if (x == -1.0)
 95 |         return val_with_flags(pi, AMD_F_INEXACT);
 96 |       else
 97 |         return _handle_error("acos", OP_ACOS, INDEFBITPATT_DP64, _DOMAIN,
 98 |                             AMD_F_INVALID, EDOM, x, 0.0, 1);
 99 |     }
100 | 
101 |   if (xneg) y = -x;
102 |   else y = x;
103 | 
104 |   transform = (xexp >= -1); /* abs(x) >= 0.5 */
105 | 
106 |   if (transform)
107 |     { /* Transform y into the range [0,0.5) */
108 |       r = 0.5*(1.0 - y);
109 |       /* VC++ intrinsic call */
110 |       _mm_store_sd(&s, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r)));
111 |       y = s;
112 |     }
113 |   else
114 |     r = y*y;
115 | 
116 |   /* Use a rational approximation for [0.0, 0.5] */
117 | 
118 |   u = r*(0.227485835556935010735943483075 +
119 |          (-0.445017216867635649900123110649 +
120 |           (0.275558175256937652532686256258 +
121 |            (-0.0549989809235685841612020091328 +
122 |             (0.00109242697235074662306043804220 +
123 |              0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/
124 |     (1.36491501334161032038194214209 +
125 |      (-3.28431505720958658909889444194 +
126 |       (2.76568859157270989520376345954 +
127 |        (-0.943639137032492685763471240072 +
128 | 	0.105869422087204370341222318533*r)*r)*r)*r);
129 | 
130 |   if (transform)
131 |     { /* Reconstruct acos carefully in transformed region */
132 |       if (xneg) return pi - 2.0*(s+(y*u - piby2_tail));
133 |       else
134 | 	{
135 | 	  double c, s1;
136 | 	  unsigned long us;
137 | 	  GET_BITS_DP64(s, us);
138 | 	  PUT_BITS_DP64(0xffffffff00000000 & us, s1);
139 | 	  c = (r-s1*s1)/(s+s1);
140 |           return 2.0*s1 + (2.0*c+2.0*y*u);
141 | 	}
142 |     }
143 |   else
144 |     return piby2_head - (x - (piby2_tail - x*u));
145 | }
146 | 


--------------------------------------------------------------------------------
/acosf.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_VALF_WITH_FLAGS
 31 | #define USE_NANF_WITH_FLAGS
 32 | #define USE_HANDLE_ERRORF
 33 | #include "libm_inlines.h"
 34 | #undef USE_NANF_WITH_FLAGS
 35 | #undef USE_VALF_WITH_FLAGS
 36 | #undef USE_HANDLE_ERRORF
 37 | 
 38 | #include "libm_errno.h"
 39 | 
 40 | // Disable "C4163: not available as intrinsic function" warning that older
 41 | // compilers may issue here.
 42 | #pragma warning(disable:4163)
 43 | #pragma function(acosf)
 44 | 
 45 | 
 46 | float FN_PROTOTYPE(acosf)(float x)
 47 | {
 48 |   /* Computes arccos(x).
 49 |      The argument is first reduced by noting that arccos(x)
 50 |      is invalid for abs(x) > 1. For denormal and small
 51 |      arguments arccos(x) = pi/2 to machine accuracy.
 52 |      Remaining argument ranges are handled as follows.
 53 |      For abs(x) <= 0.5 use
 54 |      arccos(x) = pi/2 - arcsin(x)
 55 |      = pi/2 - (x + x^3*R(x^2))
 56 |      where R(x^2) is a rational minimax approximation to
 57 |      (arcsin(x) - x)/x^3.
 58 |      For abs(x) > 0.5 exploit the identity:
 59 |      arccos(x) = pi - 2*arcsin(sqrt(1-x)/2)
 60 |      together with the above rational approximation, and
 61 |      reconstruct the terms carefully.
 62 |   */
 63 | 
 64 |   /* Some constants and split constants. */
 65 | 
 66 |   static const float
 67 |     piby2      = 1.5707963705e+00F; /* 0x3fc90fdb */
 68 |   static const double
 69 |     pi         = 3.1415926535897933e+00, /* 0x400921fb54442d18 */
 70 |     piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */
 71 |     piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */
 72 | 
 73 |   float u, y, s = 0.0F, r;
 74 |   int xexp, xnan, transform = 0;
 75 | 
 76 |   unsigned int ux, aux, xneg;
 77 | 
 78 |   GET_BITS_SP32(x, ux);
 79 |   aux = ux & ~SIGNBIT_SP32;
 80 |   xneg = (ux & SIGNBIT_SP32);
 81 |   xnan = (aux > PINFBITPATT_SP32);
 82 |   xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
 83 | 
 84 |   /* Special cases */
 85 | 
 86 |   if (xnan)
 87 |     {
 88 |       return _handle_errorf("acosf", OP_ACOS, ux|0x00400000, _DOMAIN, 0,
 89 |                            EDOM, x, 0.0F, 1);
 90 |     }
 91 |   else if (xexp < -26)
 92 |     /* y small enough that arccos(x) = pi/2 */
 93 |     return valf_with_flags(piby2, AMD_F_INEXACT);
 94 |   else if (xexp >= 0)
 95 |     { /* abs(x) >= 1.0 */
 96 |       if (x == 1.0F)
 97 |         return 0.0F;
 98 |       else if (x == -1.0F)
 99 |         return valf_with_flags((float)pi, AMD_F_INEXACT);
100 |       else
101 |         return _handle_errorf("acosf", OP_ACOS, INDEFBITPATT_SP32, _DOMAIN,
102 |                              AMD_F_INVALID, EDOM, x, 0.0F, 1);
103 |     }
104 | 
105 |   if (xneg) y = -x;
106 |   else y = x;
107 | 
108 |   transform = (xexp >= -1); /* abs(x) >= 0.5 */
109 | 
110 |   if (transform)
111 |     { /* Transform y into the range [0,0.5) */
112 |       r = 0.5F*(1.0F - y);
113 |       /* VC++ intrinsic call */
114 |       _mm_store_ss(&s, _mm_sqrt_ss(_mm_load_ss(&r)));
115 |       y = s;
116 |     }
117 |   else
118 |     r = y*y;
119 | 
120 |   /* Use a rational approximation for [0.0, 0.5] */
121 | 
122 |   u=r*(0.184161606965100694821398249421F +
123 |        (-0.0565298683201845211985026327361F +
124 | 	(-0.0133819288943925804214011424456F -
125 | 	 0.00396137437848476485201154797087F*r)*r)*r)/
126 |     (1.10496961524520294485512696706F -
127 |      0.836411276854206731913362287293F*r);
128 | 
129 |   if (transform)
130 |     {
131 |       /* Reconstruct acos carefully in transformed region */
132 |       if (xneg)
133 |         return (float)(pi - 2.0*(s+(y*u - piby2_tail)));
134 |       else
135 | 	{
136 | 	  float c, s1;
137 | 	  unsigned int us;
138 | 	  GET_BITS_SP32(s, us);
139 | 	  PUT_BITS_SP32(0xffff0000 & us, s1);
140 | 	  c = (r-s1*s1)/(s+s1);
141 |           return 2.0F*s1 + (2.0F*c+2.0F*y*u);
142 | 	}
143 |     }
144 |   else
145 |     return (float)(piby2_head - (x - (piby2_tail - x*u)));
146 | }
147 | 


--------------------------------------------------------------------------------
/asin.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_VAL_WITH_FLAGS
 31 | #define USE_NAN_WITH_FLAGS
 32 | #define USE_HANDLE_ERROR
 33 | #include "libm_inlines.h"
 34 | #undef USE_NAN_WITH_FLAGS
 35 | #undef USE_VAL_WITH_FLAGS
 36 | #undef USE_HANDLE_ERROR
 37 | 
 38 | #include "libm_errno.h"
 39 | 
 40 | #pragma function(asin)
 41 | 
 42 | double FN_PROTOTYPE(asin)(double x)
 43 | {
 44 |   /* Computes arcsin(x).
 45 |      The argument is first reduced by noting that arcsin(x)
 46 |      is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x).
 47 |      For denormal and small arguments arcsin(x) = x to machine
 48 |      accuracy. Remaining argument ranges are handled as follows.
 49 |      For abs(x) <= 0.5 use
 50 |      arcsin(x) = x + x^3*R(x^2)
 51 |      where R(x^2) is a rational minimax approximation to
 52 |      (arcsin(x) - x)/x^3.
 53 |      For abs(x) > 0.5 exploit the identity:
 54 |       arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2)
 55 |      together with the above rational approximation, and
 56 |      reconstruct the terms carefully.
 57 |     */
 58 | 
 59 |   /* Some constants and split constants. */
 60 | 
 61 |   static const double
 62 |     piby2_tail  = 6.1232339957367660e-17, /* 0x3c91a62633145c07 */
 63 |     hpiby2_head = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */
 64 |     piby2       = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */
 65 |   double u, v, y, s=0.0, r;
 66 |   int xexp, xnan, transform=0;
 67 | 
 68 |   unsigned long ux, aux, xneg;
 69 |   GET_BITS_DP64(x, ux);
 70 |   aux = ux & ~SIGNBIT_DP64;
 71 |   xneg = (ux & SIGNBIT_DP64);
 72 |   xnan = (aux > PINFBITPATT_DP64);
 73 |   xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
 74 | 
 75 |   /* Special cases */
 76 | 
 77 |   if (xnan)
 78 |     {
 79 |       return _handle_error("asin", OP_ASIN, ux|0x0008000000000000, _DOMAIN,
 80 |                           0, EDOM, x, 0.0, 1);
 81 |     }
 82 |   else if (xexp < -28)
 83 |     { /* y small enough that arcsin(x) = x */
 84 |       return val_with_flags(x, AMD_F_INEXACT);
 85 |     }
 86 |   else if (xexp >= 0)
 87 |     { /* abs(x) >= 1.0 */
 88 |       if (x == 1.0)
 89 |         return val_with_flags(piby2, AMD_F_INEXACT);
 90 |       else if (x == -1.0)
 91 |         return val_with_flags(-piby2, AMD_F_INEXACT);
 92 |       else
 93 |         return _handle_error("asin", OP_ASIN, INDEFBITPATT_DP64, _DOMAIN,
 94 |                             AMD_F_INVALID, EDOM, x, 0.0, 1);
 95 |     }
 96 | 
 97 |   if (xneg) y = -x;
 98 |   else y = x;
 99 | 
100 |   transform = (xexp >= -1); /* abs(x) >= 0.5 */
101 | 
102 |   if (transform)
103 |     { /* Transform y into the range [0,0.5) */
104 |       r = 0.5*(1.0 - y);
105 |       /* VC++ intrinsic call */
106 |       _mm_store_sd(&s, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r)));
107 |       y = s;
108 |     }
109 |   else
110 |     r = y*y;
111 | 
112 |   /* Use a rational approximation for [0.0, 0.5] */
113 | 
114 |   u = r*(0.227485835556935010735943483075 +
115 |          (-0.445017216867635649900123110649 +
116 |           (0.275558175256937652532686256258 +
117 |            (-0.0549989809235685841612020091328 +
118 |             (0.00109242697235074662306043804220 +
119 |              0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/
120 |     (1.36491501334161032038194214209 +
121 |      (-3.28431505720958658909889444194 +
122 |       (2.76568859157270989520376345954 +
123 |        (-0.943639137032492685763471240072 +
124 |         0.105869422087204370341222318533*r)*r)*r)*r);
125 | 
126 |   if (transform)
127 |     { /* Reconstruct asin carefully in transformed region */
128 |         {
129 |           double c, s1, p, q;
130 |           unsigned long us;
131 |           GET_BITS_DP64(s, us);
132 |           PUT_BITS_DP64(0xffffffff00000000 & us, s1);
133 |           c = (r-s1*s1)/(s+s1);
134 |           p = 2.0*s*u - (piby2_tail-2.0*c);
135 |           q = hpiby2_head - 2.0*s1;
136 |           v = hpiby2_head - (p-q);
137 |         }
138 |     }
139 |   else
140 |     {
141 |       /* Use a temporary variable to prevent VC++ rearranging
142 |             y + y*u
143 |          into
144 |             y * (1 + u)
145 |          and getting an incorrectly rounded result */
146 |       double tmp;
147 |       tmp = y * u;
148 |       v = y + tmp;
149 |     }
150 | 
151 |   if (xneg) return -v;
152 |   else return v;
153 | }
154 | 


--------------------------------------------------------------------------------
/asinf.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_VALF_WITH_FLAGS
 31 | #define USE_NANF_WITH_FLAGS
 32 | #define USE_HANDLE_ERRORF
 33 | #include "libm_inlines.h"
 34 | #undef USE_NANF_WITH_FLAGS
 35 | #undef USE_VALF_WITH_FLAGS
 36 | #undef USE_HANDLE_ERRORF
 37 | 
 38 | #include "libm_errno.h"
 39 | 
 40 | // Disable "C4163: not available as intrinsic function" warning that older
 41 | // compilers may issue here.
 42 | #pragma warning(disable:4163)
 43 | #pragma function(asinf)
 44 | 
 45 | 
 46 | float FN_PROTOTYPE(asinf)(float x)
 47 | {
 48 |   /* Computes arcsin(x).
 49 |      The argument is first reduced by noting that arcsin(x)
 50 |      is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x).
 51 |      For denormal and small arguments arcsin(x) = x to machine
 52 |      accuracy. Remaining argument ranges are handled as follows.
 53 |      For abs(x) <= 0.5 use
 54 |      arcsin(x) = x + x^3*R(x^2)
 55 |      where R(x^2) is a rational minimax approximation to
 56 |      (arcsin(x) - x)/x^3.
 57 |      For abs(x) > 0.5 exploit the identity:
 58 |       arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2)
 59 |      together with the above rational approximation, and
 60 |      reconstruct the terms carefully.
 61 |     */
 62 | 
 63 |   /* Some constants and split constants. */
 64 | 
 65 |   static const float
 66 |     piby2_tail  = 7.5497894159e-08F, /* 0x33a22168 */
 67 |     hpiby2_head = 7.8539812565e-01F, /* 0x3f490fda */
 68 |     piby2       = 1.5707963705e+00F; /* 0x3fc90fdb */
 69 |   float u, v, y, s = 0.0F, r;
 70 |   int xexp, xnan, transform = 0;
 71 | 
 72 |   unsigned int ux, aux, xneg;
 73 |   GET_BITS_SP32(x, ux);
 74 |   aux = ux & ~SIGNBIT_SP32;
 75 |   xneg = (ux & SIGNBIT_SP32);
 76 |   xnan = (aux > PINFBITPATT_SP32);
 77 |   xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
 78 | 
 79 |   /* Special cases */
 80 | 
 81 |   if (xnan)
 82 |     {
 83 |       return _handle_errorf("asinf", OP_ASIN, ux|0x00400000, _DOMAIN, 0,
 84 |                            EDOM, x, 0.0F, 1);
 85 |     }
 86 |   else if (xexp < -14)
 87 |     /* y small enough that arcsin(x) = x */
 88 |     return valf_with_flags(x, AMD_F_INEXACT);
 89 |   else if (xexp >= 0)
 90 |     {
 91 |       /* abs(x) >= 1.0 */
 92 |       if (x == 1.0F)
 93 |         return valf_with_flags(piby2, AMD_F_INEXACT);
 94 |       else if (x == -1.0F)
 95 |         return valf_with_flags(-piby2, AMD_F_INEXACT);
 96 |       else
 97 |         return _handle_errorf("asinf", OP_ASIN, INDEFBITPATT_SP32, _DOMAIN,
 98 |                              AMD_F_INVALID, EDOM, x, 0.0F, 1);
 99 |     }
100 | 
101 |   if (xneg) y = -x;
102 |   else y = x;
103 | 
104 |   transform = (xexp >= -1); /* abs(x) >= 0.5 */
105 | 
106 |   if (transform)
107 |     { /* Transform y into the range [0,0.5) */
108 |       r = 0.5F*(1.0F - y);
109 |       /* VC++ intrinsic call */
110 |       _mm_store_ss(&s, _mm_sqrt_ss(_mm_load_ss(&r)));
111 |       y = s;
112 |     }
113 |   else
114 |     r = y*y;
115 | 
116 |   /* Use a rational approximation for [0.0, 0.5] */
117 | 
118 |   u=r*(0.184161606965100694821398249421F +
119 |        (-0.0565298683201845211985026327361F +
120 | 	(-0.0133819288943925804214011424456F -
121 | 	 0.00396137437848476485201154797087F*r)*r)*r)/
122 |     (1.10496961524520294485512696706F -
123 |      0.836411276854206731913362287293F*r);
124 | 
125 |   if (transform)
126 |     {
127 |       /* Reconstruct asin carefully in transformed region */
128 |       float c, s1, p, q;
129 |       unsigned int us;
130 |       GET_BITS_SP32(s, us);
131 |       PUT_BITS_SP32(0xffff0000 & us, s1);
132 |       c = (r-s1*s1)/(s+s1);
133 |       p = 2.0F*s*u - (piby2_tail-2.0F*c);
134 |       q = hpiby2_head - 2.0F*s1;
135 |       v = hpiby2_head - (p-q);
136 |     }
137 |   else
138 |     {
139 |       /* Use a temporary variable to prevent VC++ rearranging
140 |             y + y*u
141 |          into
142 |             y * (1 + u)
143 |          and getting an incorrectly rounded result */
144 |       float tmp;
145 |       tmp = y * u;
146 |       v = y + tmp;
147 |     }
148 | 
149 |   if (xneg) return -v;
150 |   else return v;
151 | }
152 | 


--------------------------------------------------------------------------------
/atan.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_VAL_WITH_FLAGS
 31 | #define USE_NAN_WITH_FLAGS
 32 | #define USE_HANDLE_ERROR
 33 | #include "libm_inlines.h"
 34 | #undef USE_VAL_WITH_FLAGS
 35 | #undef USE_NAN_WITH_FLAGS
 36 | #undef USE_HANDLE_ERROR
 37 | 
 38 | #include "libm_errno.h"
 39 | 
 40 | #pragma function(atan)
 41 | 
 42 | double FN_PROTOTYPE(atan)(double x)
 43 | {
 44 | 
 45 |   /* Some constants and split constants. */
 46 | 
 47 |   static double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
 48 |   double chi, clo, v, s, q, z;
 49 | 
 50 |   /* Find properties of argument x. */
 51 | 
 52 |   unsigned long ux, aux, xneg;
 53 |   GET_BITS_DP64(x, ux);
 54 |   aux = ux & ~SIGNBIT_DP64;
 55 |   xneg = (ux != aux);
 56 | 
 57 |   if (xneg) v = -x;
 58 |   else v = x;
 59 | 
 60 |   /* Argument reduction to range [-7/16,7/16] */
 61 | 
 62 |   if (aux > 0x4003800000000000) /* v > 39./16. */
 63 |     {
 64 | 
 65 |       if (aux > PINFBITPATT_DP64)
 66 |         {
 67 |           /* x is NaN */
 68 |           return _handle_error("atan", OP_ATAN, ux|0x0008000000000000, _DOMAIN, 0,
 69 |                               EDOM, x, 0.0, 1);
 70 |         }
 71 |       else if (v > 0x4370000000000000)
 72 | 	{ /* abs(x) > 2^56 => arctan(1/x) is
 73 | 	     insignificant compared to piby2 */
 74 | 	  if (xneg)
 75 |             return val_with_flags(-piby2, AMD_F_INEXACT);
 76 | 	  else
 77 |             return val_with_flags(piby2, AMD_F_INEXACT);
 78 | 	}
 79 | 
 80 |       x = -1.0/v;
 81 |       /* (chi + clo) = arctan(infinity) */
 82 |       chi = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
 83 |       clo = 6.12323399573676480327e-17; /* 0x3c91a62633145c06 */
 84 |     }
 85 |   else if (aux > 0x3ff3000000000000) /* 39./16. > v > 19./16. */
 86 |     {
 87 |       x = (v-1.5)/(1.0+1.5*v);
 88 |       /* (chi + clo) = arctan(1.5) */
 89 |       chi = 9.82793723247329054082e-01; /* 0x3fef730bd281f69b */
 90 |       clo = 1.39033110312309953701e-17; /* 0x3c7007887af0cbbc */
 91 |     }
 92 |   else if (aux > 0x3fe6000000000000) /* 19./16. > v > 11./16. */
 93 |     {
 94 |       x = (v-1.0)/(1.0+v);
 95 |       /* (chi + clo) = arctan(1.) */
 96 |       chi = 7.85398163397448278999e-01; /* 0x3fe921fb54442d18 */
 97 |       clo = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */
 98 |     }
 99 |   else if (aux > 0x3fdc000000000000) /* 11./16. > v > 7./16. */
100 |     {
101 |       x = (2.0*v-1.0)/(2.0+v);
102 |       /* (chi + clo) = arctan(0.5) */
103 |       chi = 4.63647609000806093515e-01; /* 0x3fddac670561bb4f */
104 |       clo = 2.26987774529616809294e-17; /* 0x3c7a2b7f222f65e0 */
105 |     }
106 |   else  /* v < 7./16. */
107 |     {
108 |       x = v;
109 |       chi = 0.0;
110 |       clo = 0.0;
111 |     }
112 | 
113 |   /* Core approximation: Remez(4,4) on [-7/16,7/16] */
114 | 
115 |   s = x*x;
116 |   q = x*s*
117 |        (0.268297920532545909e0 +
118 | 	(0.447677206805497472e0 +
119 | 	 (0.220638780716667420e0 +
120 | 	  (0.304455919504853031e-1 +
121 | 	    0.142316903342317766e-3*s)*s)*s)*s)/
122 |        (0.804893761597637733e0 +
123 | 	(0.182596787737507063e1 +
124 | 	 (0.141254259931958921e1 +
125 | 	  (0.424602594203847109e0 +
126 | 	    0.389525873944742195e-1*s)*s)*s)*s);
127 | 
128 |   z = chi - ((q - clo) - x);
129 | 
130 |   if (xneg) z = -z;
131 |   return z;
132 | }
133 | 


--------------------------------------------------------------------------------
/atanf.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_VALF_WITH_FLAGS
 31 | #define USE_NAN_WITH_FLAGS
 32 | #define USE_HANDLE_ERRORF
 33 | #include "libm_inlines.h"
 34 | #undef USE_VALF_WITH_FLAGS
 35 | #undef USE_NAN_WITH_FLAGS
 36 | #undef USE_HANDLE_ERRORF
 37 | 
 38 | #include "libm_errno.h"
 39 | 
 40 | // Disable "C4163: not available as intrinsic function" warning that older
 41 | // compilers may issue here.
 42 | #pragma warning(disable:4163)
 43 | #pragma function(atanf)
 44 | 
 45 | float FN_PROTOTYPE(atanf)(float fx)
 46 | {
 47 | 
 48 |   /* Some constants and split constants. */
 49 | 
 50 |   static double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
 51 | 
 52 |   double c, v, s, q, z;
 53 |   unsigned int xnan;
 54 | 
 55 |   double x = fx;
 56 | 
 57 |   /* Find properties of argument fx. */
 58 | 
 59 |   unsigned long ux, aux, xneg;
 60 | 
 61 |   GET_BITS_DP64(x, ux);
 62 |   aux = ux & ~SIGNBIT_DP64;
 63 |   xneg = ux & SIGNBIT_DP64;
 64 | 
 65 |   v = x;
 66 |   if (xneg) v = -x;
 67 | 
 68 |   /* Argument reduction to range [-7/16,7/16] */
 69 | 
 70 |   if (aux < 0x3fdc000000000000) /* v < 7./16. */
 71 |     {
 72 |       x = v;
 73 |       c = 0.0;
 74 |     }
 75 |   else if (aux < 0x3fe6000000000000) /* v < 11./16. */
 76 |     {
 77 |       x = (2.0*v-1.0)/(2.0+v);
 78 |       /* c = arctan(0.5) */
 79 |       c = 4.63647609000806093515e-01; /* 0x3fddac670561bb4f */
 80 |     }
 81 |   else if (aux < 0x3ff3000000000000) /* v < 19./16. */
 82 |     {
 83 |       x = (v-1.0)/(1.0+v);
 84 |       /* c = arctan(1.) */
 85 |       c = 7.85398163397448278999e-01; /* 0x3fe921fb54442d18 */
 86 |     }
 87 |   else if (aux < 0x4003800000000000) /* v < 39./16. */
 88 |     {
 89 |       x = (v-1.5)/(1.0+1.5*v);
 90 |       /* c = arctan(1.5) */
 91 |       c = 9.82793723247329054082e-01; /* 0x3fef730bd281f69b */
 92 |     }
 93 |   else
 94 |     {
 95 | 
 96 |       xnan = (aux > PINFBITPATT_DP64);
 97 | 
 98 |       if (xnan)
 99 |         {
100 |           /* x is NaN */
101 |           unsigned int uhx;
102 |           GET_BITS_SP32(fx, uhx);
103 |           return _handle_errorf("atanf", OP_ATAN, uhx|0x00400000, _DOMAIN,
104 |                                0, EDOM, fx, 0.0F, 1);
105 |         }
106 |       else if (v > 0x4c80000000000000)
107 | 	{ /* abs(x) > 2^26 => arctan(1/x) is
108 | 	     insignificant compared to piby2 */
109 | 	  if (xneg)
110 |             return valf_with_flags((float)-piby2, AMD_F_INEXACT);
111 | 	  else
112 |             return valf_with_flags((float)piby2, AMD_F_INEXACT);
113 | 	}
114 | 
115 |       x = -1.0/v;
116 |       /* c = arctan(infinity) */
117 |       c = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
118 |     }
119 | 
120 |   /* Core approximation: Remez(2,2) on [-7/16,7/16] */
121 | 
122 |   s = x*x;
123 |   q = x*s*
124 |     (0.296528598819239217902158651186e0 +
125 |      (0.192324546402108583211697690500e0 +
126 |        0.470677934286149214138357545549e-2*s)*s)/
127 |     (0.889585796862432286486651434570e0 +
128 |      (0.111072499995399550138837673349e1 +
129 |        0.299309699959659728404442796915e0*s)*s);
130 | 
131 |   z = c - (q - x);
132 | 
133 |   if (xneg) z = -z;
134 |   return (float)z;
135 | }
136 | 


--------------------------------------------------------------------------------
/cabs.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | 
29 | double __cdecl _cabs(COMPLEX z)
30 | {
31 |   /* Returns the absolute value of a complex number z
32 |      with real part a and complex part b. */
33 | return _hypot(z.x, z.y);
34 | }
35 | 


--------------------------------------------------------------------------------
/cabsf.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | 
29 | float _hypotf(float,float);
30 | float _cabsf(COMPLEX z)
31 | {
32 |   /* Returns the absolute value of a complex number z
33 |      with real part a and complex part b. */
34 | return _hypotf((float)z.x, (float)z.y);
35 | }
36 | 


--------------------------------------------------------------------------------
/ceil.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | #include "libm_errno.h"
31 | #define USE_HANDLE_ERROR
32 | #include "libm_inlines.h"
33 | #undef USE_HANDLE_ERROR
34 | 
35 | // Disable "C4163: not available as intrinsic function" warning that older
36 | // compilers may issue here.
37 | #pragma warning(disable:4163)
38 | #pragma function(ceil)
39 | 
40 | double FN_PROTOTYPE(ceil)(double x)
41 | {
42 |   double r;
43 |   long rexp, xneg;
44 |   unsigned long ux, ax, ur, mask;
45 | 
46 |   GET_BITS_DP64(x, ux);
47 |   ax = ux & (~SIGNBIT_DP64);
48 |   xneg = (ux != ax);
49 | 
50 |   if (ax >= 0x4340000000000000)
51 |     {
52 |       /* abs(x) is either NaN, infinity, or >= 2^53 */
53 |       if (ax > 0x7ff0000000000000)
54 |         /* x is NaN */
55 |         return _handle_error("ceil", OP_CEIL, ux|0x0008000000000000, _DOMAIN, 0,
56 |                             EDOM, x, 0.0, 1);
57 |       else
58 |         return x;
59 |     }
60 |   else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
61 |     {
62 |       if (ax == 0x0000000000000000)
63 |         /* x is +zero or -zero; return the same zero */
64 |           return x;
65 |       else if (xneg) /* x < 0.0 */
66 |       {
67 |         PUT_BITS_DP64(SIGNBIT_DP64, r);  /* return -0.0 */
68 |         return r;
69 |       }
70 |       else
71 |         return 1.0;
72 |     }
73 |   else
74 |     {
75 |       rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
76 |       /* Mask out the bits of r that we don't want */
77 |       mask = 1;
78 |       mask = (mask << (EXPSHIFTBITS_DP64 - rexp)) - 1;
79 |       ur = (ux & ~mask);
80 |       PUT_BITS_DP64(ur, r);
81 |       if (xneg || (ur == ux))
82 |         return r;
83 |       else
84 |         /* We threw some bits away and x was positive */
85 |         return r + 1.0;
86 |     }
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/ceilf.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | #include "libm_errno.h"
31 | #define USE_HANDLE_ERRORF
32 | #include "libm_inlines.h"
33 | #undef USE_HANDLE_ERRORF
34 | 
35 | // Disable "C4163: not available as intrinsic function" warning that older
36 | // compilers may issue here.
37 | #pragma warning(disable:4163)
38 | #pragma function(ceilf)
39 | 
40 | float FN_PROTOTYPE(ceilf)(float x)
41 | {
42 |   float r;
43 |   int rexp, xneg;
44 |   unsigned int ux, ax, ur, mask;
45 | 
46 |   GET_BITS_SP32(x, ux);
47 |   ax = ux & (~SIGNBIT_SP32);
48 |   xneg = (ux != ax);
49 | 
50 |   if (ax >= 0x4b800000)
51 |     {
52 |       /* abs(x) is either NaN, infinity, or >= 2^24 */
53 |       if (ax > 0x7f800000)
54 |         /* x is NaN */
55 |         return _handle_errorf("ceilf", OP_CEIL, ux, _DOMAIN, 0, EDOM, x, 
56 |             0.0F, 1);
57 |       else
58 |         return x;
59 |     }
60 |   else if (ax < 0x3f800000) /* abs(x) < 1.0 */
61 |     {
62 |       if (ax == 0x00000000)
63 |         /* x is +zero or -zero; return the same zero */
64 |         return x;
65 |       else if (xneg) /* x < 0.0 */
66 |       {
67 |         PUT_BITS_SP32(SIGNBIT_SP32, r);  /* return -0.0 */
68 |         return r;
69 |       }
70 |       else
71 |         return 1.0F;
72 |     }
73 |   else
74 |     {
75 |       rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
76 |       /* Mask out the bits of r that we don't want */
77 |       mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1;
78 |       ur = (ux & ~mask);
79 |       PUT_BITS_SP32(ur, r);
80 | 
81 |       if (xneg || (ux == ur)) return r;
82 |       else
83 |         /* We threw some bits away and x was positive */
84 |         return r + 1.0F;
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/exp2.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_SPLITEXP
 31 | #define USE_SCALEDOUBLE_1
 32 | #define USE_SCALEDOUBLE_2
 33 | #define USE_ZERO_WITH_FLAGS
 34 | #define USE_INFINITY_WITH_FLAGS
 35 | #define USE_HANDLE_ERROR
 36 | 
 37 | #include "libm_inlines.h"
 38 | #undef USE_ZERO_WITH_FLAGS
 39 | #undef USE_SPLITEXP
 40 | #undef USE_SCALEDOUBLE_1
 41 | #undef USE_SCALEDOUBLE_2
 42 | #undef USE_INFINITY_WITH_FLAGS
 43 | #undef USE_HANDLE_ERROR
 44 | 
 45 | #include "libm_errno.h"
 46 | 
 47 | /* exp2 is only provided for use by powf under Windows, so give
 48 |    it a leading underscore. */
 49 | double FN_PROTOTYPE(_exp2)(double x)
 50 | {
 51 |   static const double
 52 |     max_exp2_arg = 1024.0,  /* 0x4090000000000000 */
 53 |     min_exp2_arg = -1074.0, /* 0xc090c80000000000 */
 54 |     log2 = 6.931471805599453094178e-01, /* 0x3fe62e42fefa39ef */
 55 |     log2_lead = 6.93147167563438415527E-01, /* 0x3fe62e42f8000000 */
 56 |     log2_tail = 1.29965068938898869640E-08, /* 0x3e4be8e7bcd5e4f1 */
 57 |     one_by_32_lead = 0.03125;
 58 | 
 59 |   double y, z1, z2, z, hx, tx, y1, y2;
 60 |   int m;
 61 |   unsigned long ux, ax;
 62 | 
 63 |   /*
 64 |     Computation of exp2(x).
 65 | 
 66 |     We compute the values m, z1, and z2 such that
 67 |     exp2(x) = 2**m * (z1 + z2),  where exp2(x) is 2**x.
 68 | 
 69 |     Computations needed in order to obtain m, z1, and z2
 70 |     involve three steps.
 71 | 
 72 |     First, we reduce the argument x to the form
 73 |     x = n/32 + remainder,
 74 |     where n has the value of an integer and |remainder| <= 1/64.
 75 |     The value of n = x * 32 rounded to the nearest integer and
 76 |     the remainder = x - n/32.
 77 | 
 78 |     Second, we approximate exp2(r1 + r2) - 1 where r1 is the leading
 79 |     part of the remainder and r2 is the trailing part of the remainder.
 80 | 
 81 |     Third, we reconstruct exp2(x) so that
 82 |     exp2(x) = 2**m * (z1 + z2).
 83 |   */
 84 | 
 85 | 
 86 |   GET_BITS_DP64(x, ux);
 87 |   ax = ux & (~SIGNBIT_DP64);
 88 | 
 89 |   if (ax >= 0x4090000000000000) /* abs(x) >= 1024.0 */
 90 |     {
 91 |       if(ax >= 0x7ff0000000000000)
 92 |         {
 93 |           /* x is either NaN or infinity */
 94 |           if (ux & MANTBITS_DP64)
 95 |             /* x is NaN */
 96 |             return _handle_error("exp2", OP_EXP, ux|0x0008000000000000, _DOMAIN,
 97 |                                 0, EDOM, x, 0.0, 1);
 98 |           else if (ux & SIGNBIT_DP64)
 99 |             /* x is negative infinity; return 0.0 with no flags. */
100 |             return 0.0;
101 |           else
102 |             /* x is positive infinity */
103 |             return x;
104 |         }
105 |       if (x > max_exp2_arg)
106 |         /* Return +infinity with overflow flag */
107 |         return _handle_error("exp2", OP_EXP, PINFBITPATT_DP64, _OVERFLOW,
108 |                             AMD_F_OVERFLOW | AMD_F_INEXACT, ERANGE, x, 0.0, 1);
109 |       else if (x < min_exp2_arg)
110 |         /* x is negative. Return +zero with underflow and inexact flags */
111 |         return _handle_error("exp2", OP_EXP, 0, _UNDERFLOW,
112 |                             AMD_F_UNDERFLOW | AMD_F_INEXACT, ERANGE, x, 0.0, 1);
113 |     }
114 | 
115 | 
116 |   /* Handle small arguments separately */
117 |   if (ax < 0x3fb7154764ee6c2f)   /* abs(x) < 1/(16*log2) */
118 |     {
119 |       if (ax < 0x3c00000000000000)   /* abs(x) < 2^(-63) */
120 |         return 1.0 + x; /* Raises inexact if x is non-zero */
121 |       else
122 |         {
123 |           /* Split x into hx (head) and tx (tail). */
124 |           unsigned long u;
125 |           hx = x;
126 |           GET_BITS_DP64(hx, u);
127 |           u &= 0xfffffffff8000000;
128 |           PUT_BITS_DP64(u, hx);
129 |           tx = x - hx;
130 |           /* Carefully multiply x by log2. y1 is the most significant
131 |              part of the result, and y2 the least significant part */
132 |           y1 = x * log2_lead;
133 |           y2 = (((hx * log2_lead - y1) + hx * log2_tail) +
134 |                   tx * log2_lead) + tx * log2_tail;
135 | 
136 |           y = y1 + y2;
137 | 		z = (9.99564649780173690e-1 +
138 | 		     (1.61251249355268050e-5 +
139 | 		      (2.37986978239838493e-2 +
140 | 		        2.68724774856111190e-7*y)*y)*y)/
141 | 		    (9.99564649780173692e-1 +
142 | 		     (-4.99766199765151309e-1 +
143 | 		      (1.070876894098586184e-1 +
144 | 		       (-1.189773642681502232e-2 +
145 | 			 5.9480622371960190616e-4*y)*y)*y)*y);
146 |           z = ((z * y1) + (z * y2)) + 1.0;
147 |         }
148 |     }
149 |   else
150 |     {
151 |       /* Find m, z1 and z2 such that exp2(x) = 2**m * (z1 + z2) */
152 | 
153 |       splitexp(x, log2, 32.0, one_by_32_lead, 0.0, &m, &z1, &z2);
154 | 
155 |       /* Scale (z1 + z2) by 2.0**m */
156 |       if (m > EMIN_DP64 && m < EMAX_DP64)
157 | 	z = scaleDouble_1((z1+z2),m);
158 |       else
159 | 	z = scaleDouble_2((z1+z2),m);
160 |     }
161 |   return z;
162 | }
163 | 


--------------------------------------------------------------------------------
/exp_special.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include <fpieee.h>
 28 | #include <excpt.h>
 29 | #include <float.h>
 30 | #include <math.h>
 31 | #include <errno.h>
 32 | 
 33 | #include "libm_new.h"
 34 | 
 35 | // y = expf(x)
 36 | // y = exp(x)
 37 | 
 38 | // these codes and the ones in the related .asm files have to match
 39 | #define EXP_X_NAN       1
 40 | #define EXP_Y_ZERO      2
 41 | #define EXP_Y_INF       3
 42 | 
 43 | float _expf_special(float x, float y, U32 code)
 44 | {
 45 |     switch(code)
 46 |     {
 47 |     case EXP_X_NAN:
 48 |         {
 49 |             UT64 ym; ym.u64 = 0; ym.f32[0] = y;
 50 |             _handle_errorf("expf", _FpCodeExp, ym.u64, _DOMAIN, 0, EDOM, x, 0.0, 1);
 51 |         }
 52 |         break;
 53 | 
 54 |     case EXP_Y_ZERO:
 55 |         {
 56 |             UT64 ym; ym.u64 = 0; ym.f32[0] = y;
 57 |             _handle_errorf("expf", _FpCodeExp, ym.u64, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, 0.0, 1);
 58 |         }
 59 |         break;
 60 | 
 61 |     case EXP_Y_INF:
 62 |         {
 63 |             UT64 ym; ym.u64 = 0; ym.f32[0] = y;
 64 |             _handle_errorf("expf", _FpCodeExp, ym.u64, _OVERFLOW, AMD_F_INEXACT|AMD_F_OVERFLOW, ERANGE, x, 0.0, 1);
 65 | 
 66 |         }
 67 |         break;
 68 |     }
 69 | 
 70 |     return y;
 71 | }
 72 | 
 73 | double _exp_special(double x, double y, U32 code)
 74 | {
 75 |     switch(code)
 76 |     {
 77 |     case EXP_X_NAN:
 78 |         {
 79 |             UT64 ym; ym.f64 = y;
 80 |             _handle_error("exp", _FpCodeExp, ym.u64, _DOMAIN, 0, EDOM, x, 0.0, 1);
 81 |         }
 82 |         break;
 83 | 
 84 |     case EXP_Y_ZERO:
 85 |         {
 86 |             UT64 ym; ym.f64 = y;
 87 |             _handle_error("exp", _FpCodeExp, ym.u64, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, 0.0, 1);
 88 |         }
 89 |         break;
 90 | 
 91 |     case EXP_Y_INF:
 92 |         {
 93 |             UT64 ym; ym.f64 = y;
 94 |             _handle_error("exp", _FpCodeExp, ym.u64, _OVERFLOW, AMD_F_INEXACT|AMD_F_OVERFLOW, ERANGE, x, 0.0, 1);
 95 |         }
 96 |         break;
 97 |     }
 98 | 
 99 | 
100 |     return y;
101 | }
102 | 


--------------------------------------------------------------------------------
/expf.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ; MIT License
  3 | ; -----------
  4 | ; 
  5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  6 | ; 
  7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | ; of this Software and associated documentaon files (the "Software"), to deal
  9 | ; in the Software without restriction, including without limitation the rights
 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | ; copies of the Software, and to permit persons to whom the Software is
 12 | ; furnished to do so, subject to the following conditions:
 13 | ; 
 14 | ; The above copyright notice and this permission notice shall be included in
 15 | ; all copies or substantial portions of the Software.
 16 | ; 
 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 | ; THE SOFTWARE.
 24 | ;
 25 | ; expf.asm
 26 | ;
 27 | ; An implementation of the expf libm function.
 28 | ;
 29 | ; Prototype:
 30 | ;
 31 | ;     float expf(float x);
 32 | ;
 33 | 
 34 | ;
 35 | ;   Algorithm:
 36 | ;       Similar to one presnted in exp.asm
 37 | ;
 38 | ; If FMA3 hardware is available, an FMA3 implementation of expf will be used.
 39 | 
 40 | 
 41 | .const
 42 | ALIGN 16
 43 | 
 44 | __real_inf                      DD 7f800000h
 45 |                                 DD 0
 46 |                                 DQ 0
 47 | 
 48 | __real_ninf                     DD 0ff800000h
 49 |                                 DD 0
 50 |                                 DQ 0
 51 | 
 52 | __real_qnanbit                  DD 00400000h
 53 |                                 DD 0
 54 |                                 DQ 0
 55 | 
 56 | __real_zero                     DD 00000000h
 57 |                                 DD 0
 58 |                                 DQ 0
 59 | 
 60 | __real_p8192                    DQ 40c0000000000000h
 61 |                                 DQ 0
 62 | __real_m9600                    DQ 0c0c2c00000000000h
 63 |                                 DQ 0
 64 | 
 65 | __real_64_by_log2               DQ 40571547652b82feh ; 64/ln(2)
 66 |                                 DQ 0
 67 | __real_log2_by_64               DQ 3f862e42fefa39efh ; log2_by_64
 68 |                                 DQ 0
 69 | 
 70 | __real_1_by_6                   DQ 3fc5555555555555h ; 1/6
 71 |                                 DQ 0
 72 | __real_1_by_2                   DQ 3fe0000000000000h ; 1/2
 73 |                                 DQ 0
 74 | 
 75 | ; these codes and the ones in the corresponding .c file have to match
 76 | __flag_x_nan            DD 00000001
 77 | __flag_y_zero           DD 00000002
 78 | __flag_y_inf            DD 00000003
 79 | 
 80 | EXTRN __two_to_jby64_table:QWORD
 81 | EXTRN __use_fma3_lib:DWORD
 82 | 
 83 | fname           TEXTEQU <expf>
 84 | fname_special   TEXTEQU <_expf_special>
 85 | 
 86 | ; define local variable storage offsets
 87 | 
 88 | ; make room for fname_special to save things
 89 | dummy_space     EQU    020h
 90 | stack_size      EQU    038h
 91 | 
 92 | include fm.inc
 93 | 
 94 | ; external function
 95 | EXTERN fname_special:PROC
 96 | 
 97 | .code
 98 | 
 99 | ALIGN 16
100 | PUBLIC fname
101 | fname PROC FRAME
102 |     StackAllocate stack_size
103 |     .ENDPROLOG
104 | 
105 |     ; Do this to avoid possible exceptions from a NaN argument.
106 |     movd        edx, xmm0
107 |     btr         edx,31
108 |     cmp         edx, DWORD PTR __real_inf
109 |     jge         Lexpf_x_is_inf_or_nan
110 | 
111 |     cmp          DWORD PTR __use_fma3_lib, 0
112 |     jne          Lexpf_fma3
113 | 
114 | Lexpf_sse2:
115 | 
116 |     cvtss2sd    xmm0, xmm0
117 | 
118 |     ; x * (64/ln(2))
119 |     movsd       xmm3, QWORD PTR __real_64_by_log2
120 |     mulsd       xmm3, xmm0
121 | 
122 |     ; x <= 128*ln(2), ( x * (64/ln(2)) ) <= 64*128
123 |     ; x > -150*ln(2), ( x * (64/ln(2)) ) > 64*(-150)
124 |     comisd      xmm3, QWORD PTR __real_p8192
125 |     jae         Lexpf_y_is_inf
126 | 
127 |     comisd      xmm3, QWORD PTR __real_m9600
128 |     jb          Lexpf_y_is_zero
129 | 
130 |     ; n = int( x * (64/ln(2)) )
131 |     cvtpd2dq    xmm4, xmm3
132 |     lea         r10, __two_to_jby64_table
133 |     cvtdq2pd    xmm1, xmm4
134 | 
135 |     ; r = x - n * ln(2)/64
136 |     movsd       xmm2, QWORD PTR __real_log2_by_64
137 |     mulsd       xmm2, xmm1
138 |     movd        ecx, xmm4
139 |     mov         rax, 3fh
140 |     and         eax, ecx
141 |     subsd       xmm0, xmm2
142 |     movsd       xmm1, xmm0
143 | 
144 |     ; m = (n - j) / 64
145 |     sub         ecx, eax
146 |     sar         ecx, 6
147 | 
148 |     ; q
149 |     movsd       xmm3, QWORD PTR __real_1_by_6
150 |     mulsd       xmm3, xmm0
151 |     mulsd       xmm0, xmm0
152 |     addsd       xmm3, QWORD PTR __real_1_by_2
153 |     mulsd       xmm0, xmm3
154 |     addsd       xmm0, xmm1
155 | 
156 |     add         rcx, 1023
157 |     shl         rcx, 52
158 | 
159 |     ; (f)*(1+q)
160 |     movsd       xmm2, QWORD PTR [r10+rax*8]
161 |     mulsd       xmm0, xmm2
162 |     addsd       xmm0, xmm2
163 | 
164 |     movd        xmm1, rcx
165 |     mulsd       xmm0, xmm1
166 |     cvtsd2ss    xmm0, xmm0
167 |  
168 | Lexpf_final_check:
169 |     StackDeallocate stack_size
170 |     ret
171 | 
172 | ALIGN 16
173 | Lexpf_y_is_zero:
174 | 
175 |     movss       xmm1, DWORD PTR __real_zero
176 |     movd        xmm0, edx
177 |     mov         r8d, DWORD PTR __flag_y_zero
178 | 
179 |     call        fname_special
180 |     jmp         Lexpf_finish          
181 | 
182 | ALIGN 16
183 | Lexpf_y_is_inf:
184 | 
185 |     movss       xmm1, DWORD PTR __real_inf
186 |     movd        xmm0, edx
187 |     mov         r8d, DWORD PTR __flag_y_inf
188 | 
189 |     call        fname_special
190 |     jmp         Lexpf_finish      
191 | 
192 | ALIGN 16
193 | Lexpf_x_is_inf_or_nan:
194 | 
195 |     cmp         edx, DWORD PTR __real_inf
196 |     je          Lexpf_finish
197 | 
198 |     cmp         edx, DWORD PTR __real_ninf
199 |     je          Lexpf_process_zero
200 | 
201 |     or          edx, DWORD PTR __real_qnanbit
202 |     movd        xmm1, edx
203 |     mov         r8d, DWORD PTR __flag_x_nan
204 |     call        fname_special
205 |     jmp         Lexpf_finish    
206 | 
207 | ALIGN 16
208 | Lexpf_process_zero:
209 |     movss       xmm0, DWORD PTR __real_zero
210 |     jmp         Lexpf_final_check
211 | 
212 | ALIGN 16
213 | Lexpf_finish:
214 |     StackDeallocate stack_size
215 |     ret
216 | 
217 | 
218 | ALIGN 16
219 | Lexpf_fma3:
220 | 
221 |     vcvtss2sd    xmm0, xmm0, xmm0
222 | 
223 |     ; x * (64/ln(2))
224 |     vmulsd      xmm3, xmm0, QWORD PTR __real_64_by_log2
225 | 
226 |     ; x <= 128*ln(2), ( x * (64/ln(2)) ) <= 64*128
227 |     ; x > -150*ln(2), ( x * (64/ln(2)) ) > 64*(-150)
228 |     vcomisd     xmm3, QWORD PTR __real_p8192
229 |     jae         Lexpf_fma3_y_is_inf
230 | 
231 |     vucomisd    xmm3, QWORD PTR __real_m9600
232 |     jb          Lexpf_fma3_y_is_zero
233 | 
234 |     ; n = int( x * (64/ln(2)) )
235 |     vcvtpd2dq   xmm4, xmm3
236 |     lea         r10, __two_to_jby64_table
237 |     vcvtdq2pd   xmm1, xmm4
238 | 
239 |     ; r = x - n * ln(2)/64
240 |     vfnmadd231sd xmm0, xmm1, QWORD PTR __real_log2_by_64
241 |     vmovd        ecx, xmm4
242 |     mov          rax, 3fh
243 |     and          eax, ecx
244 |     vmovapd      xmm1, xmm0               ; xmm1 <-- copy of r
245 | 
246 |     ; m = (n - j) / 64
247 |     sub          ecx, eax
248 |     sar          ecx, 6
249 | 
250 |     ; q
251 |     vmovsd       xmm3, QWORD PTR __real_1_by_6
252 |     vmulsd       xmm0, xmm0, xmm0         ; xmm0 <-- r^2
253 |     vfmadd213sd  xmm3, xmm1, QWORD PTR __real_1_by_2 ; xmm3 <-- r/6 + 1/2
254 |     vfmadd213sd  xmm0, xmm3, xmm1         ; xmm0 <-- q = r^2*(r/6 + 1/2) + r
255 | 
256 |     add         rcx, 1023
257 |     shl         rcx, 52
258 | 
259 |     ; (f)*(1+q)
260 |     vmovsd       xmm2, QWORD PTR [r10+rax*8]
261 |     vfmadd213sd  xmm0, xmm2, xmm2
262 | 
263 |     vmovq        xmm2,rcx
264 |     vmulsd       xmm0, xmm0, xmm2
265 |     vcvtsd2ss    xmm0, xmm0, xmm0
266 |  
267 | Lexpf_fma3_final_check:
268 |     StackDeallocate stack_size
269 |     ret
270 | 
271 | ALIGN 16
272 | Lexpf_fma3_y_is_zero:
273 | 
274 |     vmovss       xmm1, DWORD PTR __real_zero
275 |     vmovd        xmm0, edx
276 |     mov          r8d, DWORD PTR __flag_y_zero
277 | 
278 |     call         fname_special
279 |     jmp          Lexpf_fma3_finish          
280 | 
281 | ALIGN 16
282 | Lexpf_fma3_y_is_inf:
283 | 
284 |     vmovss       xmm1, DWORD PTR __real_inf
285 |     vmovd        xmm0, edx
286 |     mov          r8d, DWORD PTR __flag_y_inf
287 | 
288 |     call         fname_special
289 |     jmp          Lexpf_fma3_finish      
290 | 
291 | ALIGN 16
292 | Lexpf_fma3_process_zero:
293 |     vmovss       xmm0, DWORD PTR __real_zero
294 |     jmp          Lexpf_fma3_final_check
295 | 
296 | ALIGN 16
297 | Lexpf_fma3_finish:
298 |     StackDeallocate stack_size
299 |     ret
300 | 
301 | fname endp
302 | 
303 | END
304 | 


--------------------------------------------------------------------------------
/floor.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | #include "libm_errno.h"
31 | #define USE_HANDLE_ERROR
32 | #include "libm_inlines.h"
33 | #undef USE_HANDLE_ERROR
34 | 
35 | #pragma function(floor)
36 | 
37 | double FN_PROTOTYPE(floor)(double x)
38 | {
39 |   double r;
40 |   long rexp, xneg;
41 | 
42 | 
43 |   unsigned long ux, ax, ur, mask;
44 | 
45 |   GET_BITS_DP64(x, ux);
46 |   ax = ux & (~SIGNBIT_DP64);
47 |   xneg = (ux != ax);
48 | 
49 |   if (ax >= 0x4340000000000000)
50 |     {
51 |       /* abs(x) is either NaN, infinity, or >= 2^53 */
52 |       if (ax > 0x7ff0000000000000)
53 |         /* x is NaN */
54 |         return _handle_error("floor", OP_FLOOR, ux|0x0008000000000000, _DOMAIN,
55 |                             0, EDOM, x, 0.0, 1);
56 |       else
57 |         return x;
58 |     }
59 |   else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */
60 |     {
61 |       if (ax == 0x0000000000000000)
62 |         /* x is +zero or -zero; return the same zero */
63 |         return x;
64 |       else if (xneg) /* x < 0.0 */
65 |         return -1.0;
66 |       else
67 |         return 0.0;
68 |     }
69 |   else
70 |     {
71 |       r = x;
72 |       rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
73 |       /* Mask out the bits of r that we don't want */
74 |       mask = 1;
75 |       mask = (mask << (EXPSHIFTBITS_DP64 - rexp)) - 1;
76 |       ur = (ux & ~mask);
77 |       PUT_BITS_DP64(ur, r);
78 |       if (xneg && (ur != ux))
79 |         /* We threw some bits away and x was negative */
80 |         return r - 1.0;
81 |       else
82 |         return r;
83 |     }
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/floorf.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | #include "libm_errno.h"
31 | #define USE_HANDLE_ERRORF
32 | #include "libm_inlines.h"
33 | #undef USE_HANDLE_ERRORF
34 | 
35 | // Disable "C4163: not available as intrinsic function" warning that older
36 | // compilers may issue here.
37 | #pragma warning(disable:4163)
38 | #pragma function(floorf)
39 | 
40 | float FN_PROTOTYPE(floorf)(float x)
41 | {
42 |   float r;
43 |   int rexp, xneg;
44 |   unsigned int ux, ax, ur, mask;
45 | 
46 |   GET_BITS_SP32(x, ux);
47 |   ax = ux & (~SIGNBIT_SP32);
48 |   xneg = (ux != ax);
49 | 
50 |   if (ax >= 0x4b800000)
51 |     {
52 |       /* abs(x) is either NaN, infinity, or >= 2^24 */
53 |       if (ax > 0x7f800000)
54 |         /* x is NaN */
55 |         return _handle_errorf("floorf", OP_FLOOR, ux|0x00400000, _DOMAIN,
56 |                              0, EDOM, x, 0.0F, 1);
57 |       else
58 |         return x;
59 |     }
60 |   else if (ax < 0x3f800000) /* abs(x) < 1.0 */
61 |     {
62 |       if (ax == 0x00000000)
63 |         /* x is +zero or -zero; return the same zero */
64 |         return x;
65 |       else if (xneg) /* x < 0.0 */
66 |         return -1.0F;
67 |       else
68 |         return 0.0F;
69 |     }
70 |   else
71 |     {
72 |       rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
73 |       /* Mask out the bits of r that we don't want */
74 |       mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1;
75 |       ur = (ux & ~mask);
76 |       PUT_BITS_SP32(ur, r);
77 |       if (xneg && (ux != ur))
78 |         /* We threw some bits away and x was negative */
79 |         return r - 1.0F;
80 |       else
81 |         return r;
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/fm.inc:
--------------------------------------------------------------------------------
 1 | StackAllocate   MACRO size
 2 |     if          size ne 0
 3 |     sub         rsp, size
 4 |     .ALLOCSTACK size
 5 |     endif
 6 | ENDM
 7 | 
 8 | StackDeallocate   MACRO size
 9 |     if          size ne 0
10 |     add         rsp, size
11 |     endif
12 | ENDM
13 | 
14 | SaveReg         MACRO reg64, offset
15 |     mov         QWORD PTR [rsp+offset], reg64
16 |     .SAVEREG    reg64, offset
17 | ENDM
18 | 
19 | RestoreReg      MACRO reg64, offset
20 |     mov         reg64, QWORD PTR [rsp+offset]
21 | ENDM
22 | 
23 | SaveXmm         MACRO xmmreg, offset
24 |     movdqa      XMMWORD PTR [offset+rsp], xmmreg
25 |     .SAVEXMM128 xmmreg, offset
26 | ENDM
27 | 
28 | RestoreXmm      MACRO xmmreg, offset
29 |     movdqa      xmmreg, XMMWORD PTR [offset+rsp]
30 | ENDM
31 | 
32 | AVXSaveXmm      MACRO xmmreg, offset
33 |     vmovdqa     XMMWORD PTR [offset+rsp], xmmreg
34 |     .SAVEXMM128 xmmreg, offset
35 | ENDM
36 | 
37 | AVXRestoreXmm   MACRO xmmreg, offset
38 |     vmovdqa     xmmreg, XMMWORD PTR [offset+rsp]
39 | ENDM
40 | 


--------------------------------------------------------------------------------
/fma3_available.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #ifdef TEST_STANDALONE
28 | #include <stdio.h>
29 | #pragma section (".CRT$XIC",long,read)
30 | typedef void (__cdecl *_PIFV)(void);
31 | #else
32 | #include <sect_attribs.h>
33 | #include <windows.h>
34 | #include <cruntime.h>
35 | #include <internal.h>
36 | #endif
37 | 
38 | #define _CRTALLOC(x) __declspec(allocate(x))
39 | 
40 | int __fma3_is_available = 0;
41 | int __use_fma3_lib = 0;
42 | 
43 | 
44 | int __cdecl _set_FMA3_enable(int flag)
45 | {
46 |     if (__fma3_is_available) __use_fma3_lib = flag;
47 |     return __use_fma3_lib;
48 | }
49 | 
50 | int __fma3_lib_init(void);
51 | 
52 | _CRTALLOC(".CRT$XIC") static _PIFV init_fma3 = __fma3_lib_init;
53 | 
54 | int __fma3_lib_init(void)
55 | {
56 |     int CPUID[4]; // CPUID[2] is ECX;
57 | 
58 |     __fma3_is_available = 0;
59 |     __cpuid(CPUID, 1);
60 |     if (CPUID[2] & (1 << 12)) {
61 |         __fma3_is_available = 1;
62 |     }
63 | 
64 |     __use_fma3_lib = __fma3_is_available;
65 |     return 0;
66 | }
67 | 


--------------------------------------------------------------------------------
/fmod.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ; MIT License
  3 | ; -----------
  4 | ; 
  5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  6 | ; 
  7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | ; of this Software and associated documentaon files (the "Software"), to deal
  9 | ; in the Software without restriction, including without limitation the rights
 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | ; copies of the Software, and to permit persons to whom the Software is
 12 | ; furnished to do so, subject to the following conditions:
 13 | ; 
 14 | ; The above copyright notice and this permission notice shall be included in
 15 | ; all copies or substantial portions of the Software.
 16 | ; 
 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 | ; THE SOFTWARE.
 24 | ;
 25 | ; $Workfile: fmod.asm $
 26 | ; $Revision: 4 $
 27 | ;     $Date: 9/15/04 16:43 $
 28 | ;
 29 | ;
 30 | ; This is an optimized version of fmod.
 31 | ;
 32 | ; Define _CRTBLD_C9X to make it compliant with C90 and on.
 33 | ;
 34 | ; If building the OS CRTL (_NTSUBSET_ defined), abort.
 35 | 
 36 | ;		.ERRDEF _NTSUBSET_, "x87 code cannot be used in kernel mode"
 37 | 
 38 | DOMAIN		EQU	1			; _DOMAIN
 39 | EDOM		EQU	33			; EDOM
 40 | FPCODEFMOD	EQU	22			; _FpCodeFmod
 41 | INVALID		EQU	8			; AMD_F_INVALID
 42 | 
 43 | FPIND		EQU	0fff8000000000000h	; indefinite
 44 | FPSNAN		EQU	07ff7ffffffffffffh	; SNAN
 45 | FPQNAN		EQU	07fffffffffffffffh	; QNAN
 46 | 
 47 | X87SW		RECORD	X87SW_B: 1,
 48 | 			X87SW_C3: 1,
 49 | 			X87SW_TOP: 3,
 50 | 			X87SW_C: 3,
 51 | 			X87SW_ES: 1,
 52 | 			X87SW_SF: 1,
 53 | 			X87SW_PE: 1,
 54 | 			X87SW_E: 5
 55 | 
 56 | X87XAM		EQU	MASK X87SW_C3 OR MASK X87SW_C AND NOT (1 SHL (X87SW_C + 1))
 57 | X87XAM_INF	EQU	5 SHL X87SW_C
 58 | X87XAM_NAN	EQU	1 SHL X87SW_C
 59 | X87XAM_BAD	EQU	MASK X87SW_E AND NOT 2
 60 | 
 61 | 		EXTRN	_handle_error: PROC	; float _handle_error (char *fname, int opcode, unsigned long long value, int type, int flags, int error, double arg1, double arg2, int nargs)
 62 | 
 63 | 		.const
 64 | 
 65 | @fmodz		DB	"fmod", 0
 66 | 
 67 | 		.CODE
 68 | 
 69 | ; double fmod [double, double] ----------------------------------
 70 | 
 71 | fmod		PROC	FRAME
 72 | 
 73 | 		sub	rsp, 40 + 32
 74 | 
 75 | 		.ALLOCSTACK 40 + 32
 76 | 		.ENDPROLOG
 77 | 
 78 | 		movsd	QWORD PTR 24 [rsp + 32], xmm1	; Y
 79 | 		movsd	QWORD PTR 16 [rsp + 32], xmm0	; X
 80 | 
 81 | 		DB	0ddh, 44h, 24h, 38h	; fld	QWORD PTR 24 [rsp + 32]
 82 | 		DB	0ddh, 44h, 24h, 30h	; fld	QWORD PTR 16 [rsp + 32]
 83 | 
 84 | 		DB	0d9h, 0e5h		; fxam (X)
 85 | 		DB	09bh, 0ddh, 07ch, 024h, 010h ; fstsw 16 [rsp]
 86 | 
 87 | 		movzx	ecx, WORD PTR 16 [rsp]
 88 | 		and	ecx, X87XAM
 89 | 
 90 | 		fnclex			; clear exception flags
 91 | 						; in preparation for fprem
 92 | 
 93 | @@:
 94 | 		DB	0d9h, 0f8h		; fprem
 95 | 
 96 | 		DB	09bh, 0dfh, 0e0h 	; fstsw	ax
 97 | 		test	ax, 4 SHL X87SW_C
 98 | 		jnz	@b			; do it again in case of partial result
 99 | 
100 | 		DB	0ddh, 01ch, 024h	; fstp	QWORD PTR [rsp]
101 | 		movlpd	xmm0, QWORD PTR [rsp]		; result
102 | 
103 | 		DB	0d9h, 0e5h		; fxam (Y)
104 | 		DB	09bh, 0ddh, 07ch, 024h, 008h ; fstsw 8 [rsp]
105 | 
106 | 		movzx	edx, WORD PTR 8 [rsp]
107 | 		and	edx, X87XAM
108 | 
109 | 		DB	0ddh, 0d8h		; fstp	st(0)
110 | 
111 | 		cmp	edx, X87XAM_NAN		; fmod (x, NAN) = QNAN
112 | 		je	@error
113 | 
114 | 		cmp	ecx, X87XAM_NAN		; fmod (NAN, y) = QNAN
115 | 		je	@error
116 | 
117 | 		and	eax, X87XAM_BAD
118 | 		jnz	@raise			; handle error
119 | 
120 | 		IFNDEF	_CRTBLD_C9X		; Not C90
121 | 		cmp	edx, X87XAM_INF		; fmod (x, infinity) = ???
122 | 		je	@raise
123 | 		ELSE				; C90
124 | 						; fmod (x, infinity) = x (as x87 already does)
125 | 		ENDIF
126 | 
127 | @exit:
128 | 		add	rsp, 40 + 32
129 | 		ret
130 | 
131 | 		ALIGN	16
132 | 
133 | @raise:
134 | 		mov	eax, INVALID		; raise exception
135 | 		mov	r8, FPIND
136 | 		jmp	@f
137 | 
138 | @error:
139 | 		xor	eax, eax		; no exception
140 | 		movd	r8, xmm0
141 | 		jmp	@f
142 | 
143 | @@:
144 | 		lea	rcx, [@fmodz]		; fname
145 | 		mov	edx, FPCODEFMOD		; opcode
146 | ;		mov	r8, INDEF		; value
147 | 		mov	r9d, DOMAIN		; type
148 | 		mov	DWORD PTR  0 [rsp + 32], eax ; flags
149 | 		mov	DWORD PTR  8 [rsp + 32], EDOM ; error
150 | 		mov	DWORD PTR 32 [rsp + 32], 2 ; nargs
151 | 		call	_handle_error		; (char *fname, int opcode, unsigned long long value, int type, int flags, int error, double arg1, double arg2, int nargs)
152 | 
153 | 		DB	09bh, 0dbh, 0e2h	; fclex
154 | 		jmp	@exit
155 | 
156 | fmod		ENDP
157 | 
158 | ; ---------------------------------------------------------------
159 | 
160 | 		END
161 | 


--------------------------------------------------------------------------------
/fmodf.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ; MIT License
  3 | ; -----------
  4 | ; 
  5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  6 | ; 
  7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | ; of this Software and associated documentaon files (the "Software"), to deal
  9 | ; in the Software without restriction, including without limitation the rights
 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | ; copies of the Software, and to permit persons to whom the Software is
 12 | ; furnished to do so, subject to the following conditions:
 13 | ; 
 14 | ; The above copyright notice and this permission notice shall be included in
 15 | ; all copies or substantial portions of the Software.
 16 | ; 
 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 | ; THE SOFTWARE.
 24 | ;
 25 | ; $Workfile: fmodf.asm $
 26 | ; $Revision: 4 $
 27 | ;     $Date: 9/15/04 16:43 $
 28 | ;
 29 | ;
 30 | ; This is an optimized version of fmod.
 31 | ;
 32 | ; Define _CRTBLD_C9X to make it compliant with C90 and on.
 33 | ;
 34 | ; If building the OS CRTL (_NTSUBSET_ defined), abort.
 35 | 
 36 | 		.ERRDEF _NTSUBSET_, "x87 code cannot be used in kernel mode"
 37 | 
 38 | DOMAIN		EQU	1			; _DOMAIN
 39 | EDOM		EQU	33			; EDOM
 40 | FPCODEFMOD	EQU	22			; _FpCodeFmod
 41 | INVALID		EQU	8			; AMD_F_INVALID
 42 | 
 43 | FPIND		EQU	0ffc00000h		; indefinite
 44 | FPSNAN		EQU	07fbfffffh		; SNAN
 45 | FPQNAN		EQU	07fffffffh		; QNAN
 46 | 
 47 | X87SW		RECORD	X87SW_B: 1,
 48 | 			X87SW_C3: 1,
 49 | 			X87SW_TOP: 3,
 50 | 			X87SW_C: 3,
 51 | 			X87SW_ES: 1,
 52 | 			X87SW_SF: 1,
 53 | 			X87SW_PE: 1,
 54 | 			X87SW_E: 5
 55 | 
 56 | X87XAM		EQU	MASK X87SW_C3 OR MASK X87SW_C AND NOT (1 SHL (X87SW_C + 1))
 57 | X87XAM_INF	EQU	5 SHL X87SW_C
 58 | X87XAM_NAN	EQU	1 SHL X87SW_C
 59 | X87XAM_BAD	EQU	MASK X87SW_E AND NOT 2
 60 | 
 61 | 		EXTRN	_handle_errorf: PROC	; float _handle_error (char *fname, int opcode, unsigned long  value, int type, int flags, int error, float arg1, float arg2, int nargs)
 62 | 
 63 | 		.CONST
 64 | 
 65 | @fmodfz 	DB	"fmodf", 0
 66 | 
 67 | 		.CODE
 68 | 
 69 | ; float fmodf [float, float] ------------------------------------
 70 | 
 71 | fmodf		PROC	FRAME
 72 | 
 73 | 		sub	rsp, 40 + 32
 74 | 
 75 | 		.ALLOCSTACK 40 + 32
 76 | 		.ENDPROLOG
 77 | 
 78 | 		movss	DWORD PTR 24 [rsp + 32], xmm1
 79 | 		movss	DWORD PTR 16 [rsp + 32], xmm0
 80 | 
 81 | 		DB	0d9h, 44h, 24h, 38h	; fld	DWORD PTR 24 [rsp + 32]
 82 | 		DB	0d9h, 44h, 24h, 30h	; fld	DWORD PTR 16 [rsp + 32]
 83 | 
 84 | 		DB	0d9h, 0e5h		; fxam (X)
 85 | 		DB	09bh, 0ddh, 07ch, 024h, 010h ; fstsw 16 [rsp]
 86 | 
 87 | 		movzx	ecx, WORD PTR 16 [rsp]
 88 | 		and	ecx, X87XAM
 89 | 
 90 | 		fnclex				; clear exception flags
 91 | 							; in preparation for fprem
 92 | 
 93 | @@:
 94 | 		DB	0d9h, 0f8h		; fprem
 95 | 
 96 | 		DB	9bh, 0dfh, 0e0h 	; fstsw	ax
 97 | 		test	ax, 00400h
 98 | 		jnz	@b			; do it again in case of partial result
 99 | 
100 | 		DB	0d9h, 1ch, 24h		; fstp	DWORD PTR [rsp]
101 | 		movss	xmm0, DWORD PTR [rsp]	; result
102 | 
103 | 		DB	0d9h, 0e5h		; fxam (Y)
104 | 		DB	09bh, 0ddh, 07ch, 024h, 008h ; fstsw 8 [rsp]
105 | 
106 | 		movzx	edx, WORD PTR 8 [rsp]
107 | 		and	edx, X87XAM
108 | 
109 | 		DB	0ddh, 0d8h		; fstp	st(0)
110 | 
111 | 		cmp	edx, X87XAM_NAN		; fmod (x, NAN) = QNAN
112 | 		je	@error
113 | 
114 | 		cmp	ecx, X87XAM_NAN		; fmod (NAN, y) = QNAN
115 | 		je	@error
116 | 
117 | 		and	eax, X87XAM_BAD
118 | 		jnz	@raise			; handle error
119 | 
120 | 		IFNDEF	_CRTBLD_C9X		; Not C90
121 | 		cmp	edx, X87XAM_INF		; fmod (x, infinity) = ???
122 | 		je	@raise
123 | 		ELSE				; C90
124 | 						; fmod (x, infinity) = x (as x87 already does)
125 | 		ENDIF
126 | 
127 | @exit:
128 | 		add	rsp, 40 + 32
129 | 		ret
130 | 
131 | 		ALIGN	16
132 | 
133 | @raise:
134 | 		mov	eax, INVALID		; raise exception
135 | 		mov	r8d, FPIND
136 | 		jmp	@f
137 | 
138 | @error:
139 | 		xor	eax, eax		; no exception
140 | 		movd	r8d, xmm0
141 | 		jmp	@f
142 | 
143 | @@:
144 | 		lea	rcx, [@fmodfz]		; fname
145 | 		mov	edx, FPCODEFMOD		; opcode
146 | ;		mov	r8d, [rsp]		; value
147 | 		mov	r9d, DOMAIN		; type
148 | 		mov	DWORD PTR  0 [rsp + 32], eax ; flags
149 | 		mov	DWORD PTR  8 [rsp + 32], EDOM ; error
150 | 		mov	DWORD PTR 32 [rsp + 32], 2 ; nargs
151 | 		call	_handle_errorf		; (char *fname, int opcode, unsigned long long value, int type, int flags, int error, double arg1, double arg2, int nargs)
152 | 
153 | 		DB	9Bh, 0DBh, 0E2h		; fclex
154 | 		jmp	@exit
155 | 
156 | fmodf		ENDP
157 | 
158 | ; ---------------------------------------------------------------
159 | 
160 | 		END
161 | 


--------------------------------------------------------------------------------
/hypot.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define FAST_BUT_GREATER_THAN_ONE_ULP  /* Helps speed by trading off a little
 31 |                                           accuracy */
 32 | #define USE_SCALEDOUBLE_1
 33 | #define USE_INFINITY_WITH_FLAGS
 34 | #define USE_HANDLE_ERROR
 35 | #include "libm_inlines.h"
 36 | #undef USE_SCALEDOUBLE_1
 37 | #undef USE_INFINITY_WITH_FLAGS
 38 | #undef USE_HANDLE_ERROR
 39 | 
 40 | #include "libm_errno.h"
 41 | 
 42 | 
 43 | double FN_PROTOTYPE(_hypot)(double x, double y)
 44 | {
 45 |   /* Returns sqrt(x*x + y*y) with no overflow or underflow unless
 46 |      the result warrants it */
 47 | 
 48 |   const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */
 49 | 
 50 | #ifdef FAST_BUT_GREATER_THAN_ONE_ULP
 51 |   double r, retval;
 52 |   unsigned long xexp, yexp, ux, uy;
 53 | #else  
 54 |   double u, r, retval, hx, tx, x2, hy, ty, y2, hs, ts;
 55 |   unsigned long xexp, yexp, ux, uy, ut;
 56 | #endif
 57 |   int dexp, expadjust;
 58 | 
 59 |   GET_BITS_DP64(x, ux);
 60 |   ux &= ~SIGNBIT_DP64;
 61 |   GET_BITS_DP64(y, uy);
 62 |   uy &= ~SIGNBIT_DP64;
 63 |   xexp = (ux >> EXPSHIFTBITS_DP64);
 64 |   yexp = (uy >> EXPSHIFTBITS_DP64);
 65 | 
 66 |   if (xexp == BIASEDEMAX_DP64 + 1 || yexp == BIASEDEMAX_DP64 + 1)
 67 |     {
 68 |       /* One or both of the arguments are NaN or infinity. The
 69 |          result will also be NaN or infinity. */
 70 |       retval = x*x + y*y;
 71 |       if (((xexp == BIASEDEMAX_DP64 + 1) && !(ux & MANTBITS_DP64)) ||
 72 |           ((yexp == BIASEDEMAX_DP64 + 1) && !(uy & MANTBITS_DP64)))
 73 |         /* x or y is infinity. ISO C99 defines that we must
 74 |            return +infinity, even if the other argument is NaN.
 75 |            Note that the computation of x*x + y*y above will already
 76 |            have raised invalid if either x or y is a signalling NaN. */
 77 |         return infinity_with_flags(0);
 78 |       else
 79 |         /* One or both of x or y is NaN, and neither is infinity.
 80 |            Raise invalid if it's a signalling NaN */
 81 |         return retval;
 82 |     }
 83 | 
 84 |   /* Set x = abs(x) and y = abs(y) */
 85 |   PUT_BITS_DP64(ux, x);
 86 |   PUT_BITS_DP64(uy, y);
 87 | 
 88 |   /* The difference in exponents between x and y */
 89 |   dexp = (int)(xexp - yexp);
 90 |   expadjust = 0;
 91 | 
 92 |   if (ux == 0)
 93 |     /* x is zero */
 94 |     return y;
 95 |   else if (uy == 0)
 96 |     /* y is zero */
 97 |     return x;
 98 |   else if (dexp > MANTLENGTH_DP64 + 1 || dexp < -MANTLENGTH_DP64 - 1)
 99 |     /* One of x and y is insignificant compared to the other */
100 |     return x + y; /* Raise inexact */
101 |   else if (xexp > EXPBIAS_DP64 + 500 || yexp > EXPBIAS_DP64 + 500)
102 |     {
103 |       /* Danger of overflow; scale down by 2**600. */
104 |       expadjust = 600;
105 |       ux -= 0x2580000000000000;
106 |       PUT_BITS_DP64(ux, x);
107 |       uy -= 0x2580000000000000;
108 |       PUT_BITS_DP64(uy, y);
109 |     }
110 |   else if (xexp < EXPBIAS_DP64 - 500 || yexp < EXPBIAS_DP64 - 500)
111 |     {
112 |       /* Danger of underflow; scale up by 2**600. */
113 |       expadjust = -600;
114 |       if (xexp == 0)
115 |         {
116 |           /* x is denormal - handle by adding 601 to the exponent
117 |            and then subtracting a correction for the implicit bit */
118 |           PUT_BITS_DP64(ux + 0x2590000000000000, x);
119 |           x -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
120 |           GET_BITS_DP64(x, ux);
121 |         }
122 |       else
123 |         {
124 |           /* x is normal - just increase the exponent by 600 */
125 |           ux += 0x2580000000000000;
126 |           PUT_BITS_DP64(ux, x);
127 |         }
128 |       if (yexp == 0)
129 |         {
130 |           PUT_BITS_DP64(uy + 0x2590000000000000, y);
131 |           y -= 9.23297861778573578076e-128; /* 0x2590000000000000 */
132 |           GET_BITS_DP64(y, uy);
133 |         }
134 |       else
135 |         {
136 |           uy += 0x2580000000000000;
137 |           PUT_BITS_DP64(uy, y);
138 |         }
139 |     }
140 | 
141 | 
142 | #ifdef FAST_BUT_GREATER_THAN_ONE_ULP
143 |   /* Not awful, but results in accuracy loss larger than 1 ulp */
144 |   r = x*x + y*y;
145 | #else
146 |   /* Slower but more accurate */
147 | 
148 |   /* Sort so that x is greater than y */
149 |   if (x < y)
150 |     {
151 |       u = y;
152 |       y = x;
153 |       x = u;
154 |       ut = ux;
155 |       ux = uy;
156 |       uy = ut;
157 |     }
158 | 
159 |   /* Split x into hx and tx, head and tail */
160 |   PUT_BITS_DP64(ux & 0xfffffffff8000000, hx);
161 |   tx = x - hx;
162 | 
163 |   PUT_BITS_DP64(uy & 0xfffffffff8000000, hy);
164 |   ty = y - hy;
165 | 
166 |   /* Compute r = x*x + y*y with extra precision */
167 |   x2 = x*x;
168 |   y2 = y*y;
169 |   hs = x2 + y2;
170 | 
171 |   if (dexp == 0)
172 |     /* We take most care when x and y have equal exponents,
173 |        i.e. are almost the same size */
174 |     ts = (((x2 - hs) + y2) +
175 |           ((hx * hx - x2) + 2 * hx * tx) + tx * tx) +
176 |       ((hy * hy - y2) + 2 * hy * ty) + ty * ty;
177 |   else
178 |     ts = (((x2 - hs) + y2) +
179 |           ((hx * hx - x2) + 2 * hx * tx) + tx * tx);
180 | 
181 |   r = hs + ts;
182 | #endif
183 | 
184 |   /* The sqrt can introduce another half ulp error. */
185 |   /* VC++ intrinsic call */
186 |   _mm_store_sd(&retval, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r)));
187 | 
188 |   /* If necessary scale the result back. This may lead to
189 |      overflow but if so that's the correct result. */
190 |   retval = scaleDouble_1(retval, expadjust);
191 | 
192 |   if (retval > large)
193 |     /* The result overflowed. Deal with errno. */
194 |     return _handle_error("_hypot", OP_HYPOT, PINFBITPATT_DP64, _OVERFLOW,
195 |                         AMD_F_OVERFLOW | AMD_F_INEXACT, ERANGE, x, y, 2);
196 | 
197 |   return retval;
198 | }
199 | 


--------------------------------------------------------------------------------
/hypotf.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #ifdef USE_SOFTWARE_SQRT
 31 | #define USE_SQRTF_AMD_INLINE
 32 | #endif
 33 | #define USE_INFINITYF_WITH_FLAGS
 34 | #define USE_HANDLE_ERRORF
 35 | #include "libm_inlines.h"
 36 | #ifdef USE_SOFTWARE_SQRT
 37 | #undef USE_SQRTF_AMD_INLINE
 38 | #endif
 39 | #undef USE_INFINITYF_WITH_FLAGS
 40 | #undef USE_HANDLE_ERRORF
 41 | 
 42 | #include "libm_errno.h"
 43 | 
 44 | 
 45 | float FN_PROTOTYPE(_hypotf)(float x, float y)
 46 | {
 47 |   /* Returns sqrt(x*x + y*y) with no overflow or underflow unless
 48 |      the result warrants it */
 49 | 
 50 |     /* Do intermediate computations in double precision
 51 |        and use sqrt instruction from chip if available. */
 52 |     double dx = x, dy = y, dr, retval;
 53 | 
 54 |     /* The largest finite float, stored as a double */
 55 |     const double large = 3.40282346638528859812e+38; /* 0x47efffffe0000000 */
 56 | 
 57 | 
 58 |   unsigned long ux, uy, avx, avy;
 59 | 
 60 |   GET_BITS_DP64(x, avx);
 61 |   avx &= ~SIGNBIT_DP64;
 62 |   GET_BITS_DP64(y, avy);
 63 |   avy &= ~SIGNBIT_DP64;
 64 |   ux = (avx >> EXPSHIFTBITS_DP64);
 65 |   uy = (avy >> EXPSHIFTBITS_DP64);
 66 | 
 67 |   if (ux == BIASEDEMAX_DP64 + 1 || uy == BIASEDEMAX_DP64 + 1)
 68 |     {
 69 |       retval = x*x + y*y;
 70 |       /* One or both of the arguments are NaN or infinity. The
 71 |          result will also be NaN or infinity. */
 72 |       if (((ux == BIASEDEMAX_DP64 + 1) && !(avx & MANTBITS_DP64)) ||
 73 |           ((uy == BIASEDEMAX_DP64 + 1) && !(avy & MANTBITS_DP64)))
 74 |         /* x or y is infinity. ISO C99 defines that we must
 75 |            return +infinity, even if the other argument is NaN.
 76 |            Note that the computation of x*x + y*y above will already
 77 |            have raised invalid if either x or y is a signalling NaN. */
 78 |         return infinityf_with_flags(0);
 79 |       else
 80 |         /* One or both of x or y is NaN, and neither is infinity.
 81 |            Raise invalid if it's a signalling NaN */
 82 |         return (float)retval;
 83 |     }
 84 | 
 85 |     dr = (dx*dx + dy*dy);
 86 | 
 87 | #if USE_SOFTWARE_SQRT
 88 |     retval = sqrtf_amd_inline(r);
 89 | #else
 90 |   /* VC++ intrinsic call */
 91 |   _mm_store_sd(&retval, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&dr)));
 92 | #endif
 93 | 
 94 |     if (retval > large)
 95 |       return _handle_errorf("_hypotf", OP_HYPOT, PINFBITPATT_SP32, _OVERFLOW,
 96 |                            AMD_F_OVERFLOW | AMD_F_INEXACT, ERANGE, x, y, 2);
 97 |     else
 98 |       return (float)retval;
 99 | }
100 | 


--------------------------------------------------------------------------------
/libm.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************************/
 2 | /** MIT License **/
 3 | /** ----------- **/
 4 | /** **/  
 5 | /** Copyright (c) 2002-2019 Advanced Micro Devices, Inc. **/
 6 | /** **/
 7 | /** Permission is hereby granted, free of charge, to any person obtaining a copy **/
 8 | /** of this Software and associated documentaon files (the "Software"), to deal **/
 9 | /** in the Software without restriction, including without limitation the rights **/
10 | /** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell **/
11 | /** copies of the Software, and to permit persons to whom the Software is **/
12 | /** furnished to do so, subject to the following conditions: **/
13 | /** **/ 
14 | /** The above copyright notice and this permission notice shall be included in **/
15 | /** all copies or substantial portions of the Software. **/
16 | /** **/
17 | /** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR **/
18 | /** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, **/
19 | /** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE **/
20 | /** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER **/
21 | /** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, **/
22 | /** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN **/
23 | /** THE SOFTWARE. **/
24 | /***********************************************************************************/
25 | 
26 | #ifndef LIBM_AMD_H_INCLUDED
27 | #define LIBM_AMD_H_INCLUDED 1
28 | 
29 | #define FN_PROTOTYPE(fname) fname
30 | 
31 | #include <math.h>
32 | #include <fpieee.h>
33 | 
34 | #ifndef IS_64BIT
35 | #define IS_64BIT
36 | #endif
37 | 
38 | #ifndef _COMPLEX_DEFINED
39 | struct _complex
40 | {
41 |   double x, y; /* real and imaginary parts */
42 | };
43 | #define _COMPLEX_DEFINED
44 | #endif
45 | #define COMPLEX struct _complex
46 | 
47 | extern void __remainder_piby2(double x, double *r, double *rr, int *region);
48 | 
49 | #endif /* LIBM_AMD_H_INCLUDED */
50 | 


--------------------------------------------------------------------------------
/libm_errno.h:
--------------------------------------------------------------------------------
 1 | /***********************************************************************************/
 2 | /** MIT License **/
 3 | /** ----------- **/
 4 | /** **/  
 5 | /** Copyright (c) 2002-2019 Advanced Micro Devices, Inc. **/
 6 | /** **/
 7 | /** Permission is hereby granted, free of charge, to any person obtaining a copy **/
 8 | /** of this Software and associated documentaon files (the "Software"), to deal **/
 9 | /** in the Software without restriction, including without limitation the rights **/
10 | /** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell **/
11 | /** copies of the Software, and to permit persons to whom the Software is **/
12 | /** furnished to do so, subject to the following conditions: **/
13 | /** **/ 
14 | /** The above copyright notice and this permission notice shall be included in **/
15 | /** all copies or substantial portions of the Software. **/
16 | /** **/
17 | /** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR **/
18 | /** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, **/
19 | /** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE **/
20 | /** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER **/
21 | /** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, **/
22 | /** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN **/
23 | /** THE SOFTWARE. **/
24 | /***********************************************************************************/
25 | 
26 | #ifndef LIBM_ERRNO_AMD_H_INCLUDED
27 | #define LIBM_ERRNO_AMD_H_INCLUDED 1
28 | 
29 | #include <stdio.h>
30 | #include <errno.h>
31 | #ifndef __set_errno
32 | #define __set_errno(x) errno = (x)
33 | #endif
34 | 
35 | #endif /* LIBM_ERRNO_AMD_H_INCLUDED */
36 | 


--------------------------------------------------------------------------------
/libm_new.h:
--------------------------------------------------------------------------------
  1 | 
  2 | /***********************************************************************************/
  3 | /** MIT License **/
  4 | /** ----------- **/
  5 | /** **/  
  6 | /** Copyright (c) 2002-2019 Advanced Micro Devices, Inc. **/
  7 | /** **/
  8 | /** Permission is hereby granted, free of charge, to any person obtaining a copy **/
  9 | /** of this Software and associated documentaon files (the "Software"), to deal **/
 10 | /** in the Software without restriction, including without limitation the rights **/
 11 | /** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell **/
 12 | /** copies of the Software, and to permit persons to whom the Software is **/
 13 | /** furnished to do so, subject to the following conditions: **/
 14 | /** **/ 
 15 | /** The above copyright notice and this permission notice shall be included in **/
 16 | /** all copies or substantial portions of the Software. **/
 17 | /** **/
 18 | /** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR **/
 19 | /** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, **/
 20 | /** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE **/
 21 | /** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER **/
 22 | /** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, **/
 23 | /** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN **/
 24 | /** THE SOFTWARE. **/
 25 | /***********************************************************************************/
 26 | 
 27 | #ifndef __LIBM_NEW_H__
 28 | #define __LIBM_NEW_H__
 29 | 
 30 | // Defines, protos, etc for *new* math funcs updated by AMD 11/2008
 31 | // Old files will continue to include libm_util.h, libm.h, libm_inlines.h
 32 | // until such time as these have all been refreshed w/ new versions.
 33 | 
 34 | typedef float F32;
 35 | typedef unsigned int U32;
 36 | 
 37 | typedef double F64;
 38 | typedef unsigned long long U64;
 39 | 
 40 | union UT32_ 
 41 | {
 42 |     F32 f32;
 43 |     U32 u32;
 44 | };
 45 | 
 46 | union UT64_ 
 47 | {
 48 |     F64 f64;
 49 |     U64 u64;
 50 |     
 51 |     F32 f32[2];
 52 |     U32 u32[2];
 53 | };
 54 | 
 55 | typedef union UT32_ UT32;
 56 | typedef union UT64_ UT64;
 57 | 
 58 | #define SIGN_MASK_32        0x80000000
 59 | #define MANTISSA_MASK_32    0x007fffff
 60 | #define EXPONENT_MASK_32    0x7f800000
 61 | #define QNAN_MASK_32        0x00400000
 62 | 
 63 | #define INF_POS_32          0x7f800000
 64 | #define INF_NEG_32          0xff800000
 65 | #define QNAN_POS_32         0x7fc00000
 66 | #define QNAN_NEG_32         0xffc00000
 67 | #define IND_32              0xffc00000
 68 | 
 69 | #define EXPONENT_FULL_32    0x7f800000
 70 | #define SIGN_SET_32         0x80000000
 71 | #define QNAN_SET_32         0x00400000
 72 | 
 73 | #define INF_POS_64          0x7ff0000000000000
 74 | #define INF_NEG_64          0xfff0000000000000
 75 | 
 76 | #define MANTISSA_MASK_64    0x000fffffffffffff
 77 | #define SIGN_MASK_64        0x8000000000000000
 78 | #define IND_64              0xfff8000000000000
 79 | #define QNAN_MASK_64        0x0008000000000000
 80 | 
 81 | // constants for 'flags' argument of _handle_error and _handle_errorf
 82 | #define AMD_F_INEXACT     0x00000010
 83 | #define AMD_F_OVERFLOW    0x00000001
 84 | #define AMD_F_UNDERFLOW   0x00000002
 85 | #define AMD_F_DIVBYZERO   0x00000004
 86 | #define AMD_F_INVALID     0x00000008
 87 | 
 88 | // define the Microsoft specific error handling routine
 89 | 
 90 | // Note to mainainers: 
 91 | // These prototypes may appear, at first glance, to differ from the versions
 92 | // declared in libm_inlines.h and defined in libm_error.c.  The third 
 93 | // parameter appears to have changed type from unsigned long to unsigned long
 94 | // long.  In fact they are the same because in both of the aforementioned 
 95 | // files, long has been #defined to __int64 in a most cowardly fashion.  This
 96 | // disgusts me.  The buck stops here. - MAS
 97 | 
 98 | double _handle_error(
 99 |         char *fname,
100 |         int opcode,
101 |         unsigned long long value,
102 |         int type,
103 |         int flags,
104 |         int error,
105 |         double arg1,
106 |         double arg2,
107 |         int nargs
108 |         );
109 | float _handle_errorf(
110 |         char *fname,
111 |         int opcode,
112 |         unsigned long long value,
113 |         int type,
114 |         int flags,
115 |         int error,
116 |         float arg1,
117 |         float arg2,
118 |         int nargs
119 |         );
120 | 
121 | #endif // __LIBM_NEW_H
122 | 
123 | 


--------------------------------------------------------------------------------
/libm_util.h:
--------------------------------------------------------------------------------
  1 | /***********************************************************************************/
  2 | /** MIT License **/
  3 | /** ----------- **/
  4 | /** **/  
  5 | /** Copyright (c) 2002-2019 Advanced Micro Devices, Inc. **/
  6 | /** **/
  7 | /** Permission is hereby granted, free of charge, to any person obtaining a copy **/
  8 | /** of this Software and associated documentaon files (the "Software"), to deal **/
  9 | /** in the Software without restriction, including without limitation the rights **/
 10 | /** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell **/
 11 | /** copies of the Software, and to permit persons to whom the Software is **/
 12 | /** furnished to do so, subject to the following conditions: **/
 13 | /** **/ 
 14 | /** The above copyright notice and this permission notice shall be included in **/
 15 | /** all copies or substantial portions of the Software. **/
 16 | /** **/
 17 | /** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR **/
 18 | /** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, **/
 19 | /** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE **/
 20 | /** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER **/
 21 | /** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, **/
 22 | /** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN **/
 23 | /** THE SOFTWARE. **/
 24 | /***********************************************************************************/
 25 | 
 26 | #ifndef LIBM_UTIL_AMD_H_INCLUDED
 27 | #define LIBM_UTIL_AMD_H_INCLUDED 1
 28 | 
 29 | #define inline __inline
 30 | #undef long
 31 | #define long __int64
 32 | 
 33 | #include "emmintrin.h"
 34 | #include "float.h"
 35 | 
 36 | 
 37 | 
 38 | /* Compile-time verification that type long is the same size
 39 |    as type double (i.e. we are really on a 64-bit machine) */
 40 | void check_long_against_double_size(int machine_is_64_bit[(sizeof(long) == sizeof(double))?1:-1]); 
 41 | 
 42 | 
 43 | /* Definitions for double functions on 64 bit machines */
 44 | #define SIGNBIT_DP64      0x8000000000000000
 45 | #define EXPBITS_DP64      0x7ff0000000000000
 46 | #define MANTBITS_DP64     0x000fffffffffffff
 47 | #define ONEEXPBITS_DP64   0x3ff0000000000000
 48 | #define TWOEXPBITS_DP64   0x4000000000000000
 49 | #define HALFEXPBITS_DP64  0x3fe0000000000000
 50 | #define IMPBIT_DP64       0x0010000000000000
 51 | #define QNANBITPATT_DP64  0x7ff8000000000000
 52 | #define INDEFBITPATT_DP64 0xfff8000000000000
 53 | #define PINFBITPATT_DP64  0x7ff0000000000000
 54 | #define NINFBITPATT_DP64  0xfff0000000000000
 55 | #define EXPBIAS_DP64      1023
 56 | #define EXPSHIFTBITS_DP64 52
 57 | #define BIASEDEMIN_DP64   1
 58 | #define EMIN_DP64         -1022
 59 | #define BIASEDEMAX_DP64   2046
 60 | #define EMAX_DP64         1023
 61 | #define LAMBDA_DP64       1.0e300
 62 | #define MANTLENGTH_DP64   53
 63 | #define BASEDIGITS_DP64   15
 64 | 
 65 | 
 66 | /* These definitions, used by float functions,
 67 |    are for both 32 and 64 bit machines */
 68 | #define SIGNBIT_SP32      0x80000000
 69 | #define EXPBITS_SP32      0x7f800000
 70 | #define MANTBITS_SP32     0x007fffff
 71 | #define ONEEXPBITS_SP32   0x3f800000
 72 | #define TWOEXPBITS_SP32   0x40000000
 73 | #define HALFEXPBITS_SP32  0x3f000000
 74 | #define IMPBIT_SP32       0x00800000
 75 | #define QNANBITPATT_SP32  0x7fc00000
 76 | #define INDEFBITPATT_SP32 0xffc00000
 77 | #define PINFBITPATT_SP32  0x7f800000
 78 | #define NINFBITPATT_SP32  0xff800000
 79 | #define EXPBIAS_SP32      127
 80 | #define EXPSHIFTBITS_SP32 23
 81 | #define BIASEDEMIN_SP32   1
 82 | #define EMIN_SP32         -126
 83 | #define BIASEDEMAX_SP32   254
 84 | #define EMAX_SP32         127
 85 | #define LAMBDA_SP32       1.0e30
 86 | #define MANTLENGTH_SP32   24
 87 | #define BASEDIGITS_SP32   7
 88 | 
 89 | #define CLASS_SIGNALLING_NAN 1
 90 | #define CLASS_QUIET_NAN 2
 91 | #define CLASS_NEGATIVE_INFINITY 3
 92 | #define CLASS_NEGATIVE_NORMAL_NONZERO 4
 93 | #define CLASS_NEGATIVE_DENORMAL 5
 94 | #define CLASS_NEGATIVE_ZERO 6
 95 | #define CLASS_POSITIVE_ZERO 7
 96 | #define CLASS_POSITIVE_DENORMAL 8
 97 | #define CLASS_POSITIVE_NORMAL_NONZERO 9
 98 | #define CLASS_POSITIVE_INFINITY 10
 99 | 
100 | #define OLD_BITS_SP32(x) (*((unsigned int *)&x))
101 | #define OLD_BITS_DP64(x) (*((unsigned long *)&x))
102 | 
103 | /* Alternatives to the above functions which don't have
104 |    problems when using high optimization levels on gcc */
105 | #define GET_BITS_SP32(x, ux) \
106 |   { \
107 |     volatile union {float f; unsigned int i;} _bitsy; \
108 |     _bitsy.f = (x); \
109 |     ux = _bitsy.i; \
110 |   }
111 | #define PUT_BITS_SP32(ux, x) \
112 |   { \
113 |     volatile union {float f; unsigned int i;} _bitsy; \
114 |     _bitsy.i = (ux); \
115 |      x = _bitsy.f; \
116 |   }
117 | 
118 | #define GET_BITS_DP64(x, ux) \
119 |   { \
120 |     volatile union {double d; unsigned long i;} _bitsy; \
121 |     _bitsy.d = (x); \
122 |     ux = _bitsy.i; \
123 |   }
124 | #define PUT_BITS_DP64(ux, x) \
125 |   { \
126 |     volatile union {double d; unsigned long i;} _bitsy; \
127 |     _bitsy.i = (ux); \
128 |     x = _bitsy.d; \
129 |   }
130 | 
131 | 
132 | /* Processor-dependent floating-point status flags */
133 | #define AMD_F_OVERFLOW  0x00000001
134 | #define AMD_F_UNDERFLOW 0x00000002
135 | #define AMD_F_DIVBYZERO 0x00000004
136 | #define AMD_F_INVALID   0x00000008
137 | #define AMD_F_INEXACT   0x00000010
138 | 
139 | /* Processor-dependent floating-point precision-control flags */
140 | #define AMD_F_EXTENDED 0x00000300
141 | #define AMD_F_DOUBLE   0x00000200
142 | #define AMD_F_SINGLE   0x00000000
143 | 
144 | /* Processor-dependent floating-point rounding-control flags */
145 | #define AMD_F_RC_NEAREST 0x00000000
146 | #define AMD_F_RC_DOWN    0x00002000
147 | #define AMD_F_RC_UP      0x00004000
148 | #define AMD_F_RC_ZERO    0x00006000
149 | 
150 | #endif /* LIBM_UTIL_AMD_H_INCLUDED */
151 | 


--------------------------------------------------------------------------------
/log_F_inv_dword_table.asm:
--------------------------------------------------------------------------------
  1 | ;;
  2 | ;
  3 | ; MIT License
  4 | ; -----------
  5 | ; 
  6 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | ; 
  8 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | ; of this Software and associated documentaon files (the "Software"), to deal
 10 | ; in the Software without restriction, including without limitation the rights
 11 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | ; copies of the Software, and to permit persons to whom the Software is
 13 | ; furnished to do so, subject to the following conditions:
 14 | ; 
 15 | ; The above copyright notice and this permission notice shall be included in
 16 | ; all copies or substantial portions of the Software.
 17 | ; 
 18 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | ; THE SOFTWARE.
 25 | ;
 26 | ;; Defines __log_F_inv_dword
 27 | ;; Used in log10f and logf
 28 | ;;
 29 | 
 30 | .const
 31 | 
 32 | ALIGN 16
 33 | PUBLIC __log_F_inv_dword
 34 | __log_F_inv_dword   DD 40000000h
 35 |                     DD 3ffe03f8h
 36 |                     DD 3ffc0fc1h
 37 |                     DD 3ffa232dh
 38 |                     DD 3ff83e10h
 39 |                     DD 3ff6603eh
 40 |                     DD 3ff4898dh
 41 |                     DD 3ff2b9d6h
 42 |                     DD 3ff0f0f1h
 43 |                     DD 3fef2eb7h
 44 |                     DD 3fed7304h
 45 |                     DD 3febbdb3h
 46 |                     DD 3fea0ea1h
 47 |                     DD 3fe865ach
 48 |                     DD 3fe6c2b4h
 49 |                     DD 3fe52598h
 50 |                     DD 3fe38e39h
 51 |                     DD 3fe1fc78h
 52 |                     DD 3fe07038h
 53 |                     DD 3fdee95ch
 54 |                     DD 3fdd67c9h
 55 |                     DD 3fdbeb62h
 56 |                     DD 3fda740eh
 57 |                     DD 3fd901b2h
 58 |                     DD 3fd79436h
 59 |                     DD 3fd62b81h
 60 |                     DD 3fd4c77bh
 61 |                     DD 3fd3680dh
 62 |                     DD 3fd20d21h
 63 |                     DD 3fd0b6a0h
 64 |                     DD 3fcf6475h
 65 |                     DD 3fce168ah
 66 |                     DD 3fcccccdh
 67 |                     DD 3fcb8728h
 68 |                     DD 3fca4588h
 69 |                     DD 3fc907dah
 70 |                     DD 3fc7ce0ch
 71 |                     DD 3fc6980ch
 72 |                     DD 3fc565c8h
 73 |                     DD 3fc43730h
 74 |                     DD 3fc30c31h
 75 |                     DD 3fc1e4bch
 76 |                     DD 3fc0c0c1h
 77 |                     DD 3fbfa030h
 78 |                     DD 3fbe82fah
 79 |                     DD 3fbd6910h
 80 |                     DD 3fbc5264h
 81 |                     DD 3fbb3ee7h
 82 |                     DD 3fba2e8ch
 83 |                     DD 3fb92144h
 84 |                     DD 3fb81703h
 85 |                     DD 3fb70fbbh
 86 |                     DD 3fb60b61h
 87 |                     DD 3fb509e7h
 88 |                     DD 3fb40b41h
 89 |                     DD 3fb30f63h
 90 |                     DD 3fb21643h
 91 |                     DD 3fb11fd4h
 92 |                     DD 3fb02c0bh
 93 |                     DD 3faf3adeh
 94 |                     DD 3fae4c41h
 95 |                     DD 3fad602bh
 96 |                     DD 3fac7692h
 97 |                     DD 3fab8f6ah
 98 |                     DD 3faaaaabh
 99 |                     DD 3fa9c84ah
100 |                     DD 3fa8e83fh
101 |                     DD 3fa80a81h
102 |                     DD 3fa72f05h
103 |                     DD 3fa655c4h
104 |                     DD 3fa57eb5h
105 |                     DD 3fa4a9cfh
106 |                     DD 3fa3d70ah
107 |                     DD 3fa3065eh
108 |                     DD 3fa237c3h
109 |                     DD 3fa16b31h
110 |                     DD 3fa0a0a1h
111 |                     DD 3f9fd80ah
112 |                     DD 3f9f1166h
113 |                     DD 3f9e4cadh
114 |                     DD 3f9d89d9h
115 |                     DD 3f9cc8e1h
116 |                     DD 3f9c09c1h
117 |                     DD 3f9b4c70h
118 |                     DD 3f9a90e8h
119 |                     DD 3f99d723h
120 |                     DD 3f991f1ah
121 |                     DD 3f9868c8h
122 |                     DD 3f97b426h
123 |                     DD 3f97012eh
124 |                     DD 3f964fdah
125 |                     DD 3f95a025h
126 |                     DD 3f94f209h
127 |                     DD 3f944581h
128 |                     DD 3f939a86h
129 |                     DD 3f92f114h
130 |                     DD 3f924925h
131 |                     DD 3f91a2b4h
132 |                     DD 3f90fdbch
133 |                     DD 3f905a38h
134 |                     DD 3f8fb824h
135 |                     DD 3f8f177ah
136 |                     DD 3f8e7835h
137 |                     DD 3f8dda52h
138 |                     DD 3f8d3dcbh
139 |                     DD 3f8ca29ch
140 |                     DD 3f8c08c1h
141 |                     DD 3f8b7034h
142 |                     DD 3f8ad8f3h
143 |                     DD 3f8a42f8h
144 |                     DD 3f89ae41h
145 |                     DD 3f891ac7h
146 |                     DD 3f888889h
147 |                     DD 3f87f781h
148 |                     DD 3f8767abh
149 |                     DD 3f86d905h
150 |                     DD 3f864b8ah
151 |                     DD 3f85bf37h
152 |                     DD 3f853408h
153 |                     DD 3f84a9fah
154 |                     DD 3f842108h
155 |                     DD 3f839930h
156 |                     DD 3f83126fh
157 |                     DD 3f828cc0h
158 |                     DD 3f820821h
159 |                     DD 3f81848eh
160 |                     DD 3f810204h
161 |                     DD 3f808081h
162 |                     DD 3f800000h
163 | 
164 | END
165 | 


--------------------------------------------------------------------------------
/log_special.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include <fpieee.h>
 28 | #include <excpt.h>
 29 | #include <float.h>
 30 | #include <math.h>
 31 | #include <errno.h>
 32 | 
 33 | #include "libm_new.h"
 34 | 
 35 | // y = log10f(x)
 36 | // y = log10(x)
 37 | // y = logf(x)
 38 | // y = log(x)
 39 | 
 40 | // these codes and the ones in the related .asm files have to match
 41 | #define LOG_X_ZERO      1
 42 | #define LOG_X_NEG       2
 43 | #define LOG_X_NAN       3
 44 | 
 45 | static float _logf_special_common(float x, float y, U32 code, unsigned int op, char *name)
 46 | {
 47 |     switch(code)
 48 |     {
 49 |     case LOG_X_ZERO:
 50 |         {
 51 |             UT64 ym; ym.u64 = 0; ym.f32[0] = y;
 52 |             _handle_errorf(name, op, ym.u64, _SING, AMD_F_DIVBYZERO, ERANGE, x, 0.0, 1);
 53 |         }
 54 |         break;
 55 | 
 56 |     case LOG_X_NEG:
 57 |         {
 58 |             UT64 ym; ym.u64 = 0; ym.f32[0] = y;
 59 |             _handle_errorf(name, op, ym.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1);
 60 |         }
 61 |         break;
 62 | 
 63 |     case LOG_X_NAN:
 64 |         {
 65 |             unsigned int is_snan;
 66 |             UT32 xm; UT64 ym;
 67 |             xm.f32 = x;
 68 |             is_snan = (((xm.u32 & QNAN_MASK_32) == QNAN_SET_32) ? 0 : 1);
 69 |             ym.u64 = 0; ym.f32[0] = y;
 70 | 
 71 |             if(is_snan)
 72 |             {
 73 |                 _handle_errorf(name, op, ym.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1);
 74 |             }
 75 |             else
 76 |             {
 77 |                 _handle_errorf(name, op, ym.u64, _DOMAIN, 0, EDOM, x, 0.0, 1);
 78 |             }
 79 |         }
 80 |         break;
 81 |     }
 82 | 
 83 |     return y;
 84 | }
 85 | 
 86 | float _logf_special(float x, float y, U32 code)
 87 | {
 88 |     return _logf_special_common(x, y, code, _FpCodeLog, "logf");
 89 | }
 90 | 
 91 | float _log10f_special(float x, float y, U32 code)
 92 | {
 93 |     return _logf_special_common(x, y, code, _FpCodeLog10, "log10f");
 94 | }
 95 | 
 96 | static double _log_special_common(double x, double y, U32 code, unsigned int op, char *name)
 97 | {
 98 |     switch(code)
 99 |     {
100 |     case LOG_X_ZERO:
101 |         {
102 |             UT64 ym; ym.f64 = y;
103 |             _handle_error(name, op, ym.u64, _SING, AMD_F_DIVBYZERO, ERANGE, x, 0.0, 1);
104 |         }
105 |         break;
106 | 
107 |     case LOG_X_NEG:
108 |         {
109 |             UT64 ym; ym.f64 = y;
110 |             _handle_error(name, op, ym.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1);
111 |         }
112 |         break;
113 | 
114 |     case LOG_X_NAN:
115 |         {
116 |             UT64 ym; ym.f64 = y;
117 |             _handle_error(name, op, ym.u64, _DOMAIN, 0, EDOM, x, 0.0, 1);
118 |         }
119 |         break;
120 |     }
121 | 
122 |     return y;
123 | }
124 | 
125 | double _log_special(double x, double y, U32 code)
126 | {
127 |     return _log_special_common(x, y, code, _FpCodeLog, "log");
128 | }
129 | 
130 | double _log10_special(double x, double y, U32 code)
131 | {
132 |     return _log_special_common(x, y, code, _FpCodeLog10, "log10");
133 | }
134 | 


--------------------------------------------------------------------------------
/logb.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | #define USE_INFINITY_WITH_FLAGS
31 | #define USE_HANDLE_ERROR
32 | #include "libm_inlines.h"
33 | #undef USE_INFINITY_WITH_FLAGS
34 | #undef USE_HANDLE_ERROR
35 | 
36 | #include "libm_errno.h"
37 | 
38 | double _logb(double x)
39 | {
40 | 
41 |   unsigned long ux;
42 |   long u;
43 |   GET_BITS_DP64(x, ux);
44 |   u = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
45 |   if ((ux & ~SIGNBIT_DP64) == 0)
46 |     /* x is +/-zero. Return -infinity with div-by-zero flag. */
47 |     return _handle_error("_logb", OP_LOGB, NINFBITPATT_DP64, _SING,
48 |                         AMD_F_DIVBYZERO, ERANGE, x, 0.0, 1);
49 |   else if (EMIN_DP64 <= u && u <= EMAX_DP64)
50 |     /* x is a normal number */
51 |     return (double)u;
52 |   else if (u > EMAX_DP64)
53 |     {
54 |       /* x is infinity or NaN */
55 |       if ((ux & MANTBITS_DP64) == 0)
56 |         /* x is +/-infinity. For VC++, return infinity of same sign. */
57 |         return x;
58 |       else
59 |         /* x is NaN, result is NaN */
60 |         return _handle_error("_logb", OP_LOGB, ux|0x0008000000000000, _DOMAIN,
61 |                             0, EDOM, x, 0.0, 1);
62 |     }
63 |   else
64 |     {
65 |       /* x is denormalized. */
66 | #ifdef FOLLOW_IEEE754_LOGB
67 |       /* Return the value of the minimum exponent to ensure that
68 |          the relationship between logb and scalb, defined in
69 |          IEEE 754, holds. */
70 |       return EMIN_DP64;
71 | #else
72 |       /* Follow the rule set by IEEE 854 for logb */
73 |       ux &= MANTBITS_DP64;
74 |       u = EMIN_DP64;
75 |       while (ux < IMPBIT_DP64)
76 |         {
77 |           ux <<= 1;
78 |           u--;
79 |         }
80 |       return (double)u;
81 | #endif
82 |     }
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/logbf.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | #define USE_INFINITYF_WITH_FLAGS
31 | #define USE_HANDLE_ERRORF
32 | #include "libm_inlines.h"
33 | #undef USE_INFINITYF_WITH_FLAGS
34 | #undef USE_HANDLE_ERRORF
35 | 
36 | #include "libm_errno.h"
37 | 
38 | float _logbf(float x)
39 | {
40 |   unsigned int ux;
41 |   int u;
42 |   GET_BITS_SP32(x, ux);
43 |   u = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
44 |   if ((ux & ~SIGNBIT_SP32) == 0)
45 |     /* x is +/-zero. Return -infinity with div-by-zero flag. */
46 |     return _handle_errorf("_logbf", OP_LOGB, NINFBITPATT_SP32, _SING,
47 |                          AMD_F_DIVBYZERO, ERANGE, x, 0.0F, 1);
48 |   else if (EMIN_SP32 <= u && u <= EMAX_SP32)
49 |     /* x is a normal number */
50 |     return (float)u;
51 |   else if (u > EMAX_SP32)
52 |     {
53 |       /* x is infinity or NaN */
54 |       if ((ux & MANTBITS_SP32) == 0)
55 |         /* x is +/-infinity. For VC++, return infinity of same sign. */
56 |         return x;
57 |       else
58 |         /* x is NaN, result is NaN */
59 |         return _handle_errorf("_logbf", OP_LOGB, ux|0x00400000, _DOMAIN,
60 |                              0, EDOM, x, 0.0F, 1);
61 |     }
62 |   else
63 |     {
64 |       /* x is denormalized. */
65 | #ifdef FOLLOW_IEEE754_LOGB
66 |       /* Return the value of the minimum exponent to ensure that
67 |          the relationship between logb and scalb, defined in
68 |          IEEE 754, holds. */
69 |       return EMIN_SP32;
70 | #else
71 |       /* Follow the rule set by IEEE 854 for logb */
72 |       ux &= MANTBITS_SP32;
73 |       u = EMIN_SP32;
74 |       while (ux < IMPBIT_SP32)
75 |         {
76 |           ux <<= 1;
77 |           u--;
78 |         }
79 |       return (float)u;
80 | #endif
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/modf.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | double modf(double x, double *iptr)
31 | {
32 |   /* modf splits the argument x into integer and fraction parts,
33 |      each with the same sign as x. */
34 | 
35 | 
36 |   long xexp;
37 |   unsigned long ux, ax, mask;
38 | 
39 |   GET_BITS_DP64(x, ux);
40 |   ax = ux & (~SIGNBIT_DP64);
41 | 
42 |   if (ax >= 0x4340000000000000)
43 |     {
44 |       /* abs(x) is either NaN, infinity, or >= 2^53 */
45 |       if (ax > 0x7ff0000000000000)
46 |         {
47 |           /* x is NaN */
48 |           *iptr = x;
49 |           return x + x; /* Raise invalid if it is a signalling NaN */
50 |         }
51 |       else
52 |         {
53 |           /* x is infinity or large. Return zero with the sign of x */
54 |           *iptr = x;
55 |           PUT_BITS_DP64(ux & SIGNBIT_DP64, x);
56 |           return x;
57 |         }
58 |     }
59 |   else if (ax < 0x3ff0000000000000)
60 |     {
61 |       /* abs(x) < 1.0. Set iptr to zero with the sign of x
62 |          and return x. */
63 |       PUT_BITS_DP64(ux & SIGNBIT_DP64, *iptr);
64 |       return x;
65 |     }
66 |   else
67 |     {
68 |       xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64;
69 |       /* Mask out the bits of x that we don't want */
70 |       mask = 1;
71 |       mask = (mask << (EXPSHIFTBITS_DP64 - xexp)) - 1;
72 |       PUT_BITS_DP64(ux & ~mask, *iptr);
73 |       return x - *iptr;
74 |     }
75 | 
76 | }
77 | 


--------------------------------------------------------------------------------
/modff.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | float modff(float x, float *iptr)
31 | {
32 |   /* modff splits the argument x into integer and fraction parts,
33 |      each with the same sign as x. */
34 | 
35 |   unsigned int ux, mask;
36 |   int xexp;
37 | 
38 |   GET_BITS_SP32(x, ux);
39 |   xexp = ((ux & (~SIGNBIT_SP32)) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
40 | 
41 |   if (xexp < 0)
42 |     {
43 |       /* abs(x) < 1.0. Set iptr to zero with the sign of x
44 |          and return x. */
45 |       PUT_BITS_SP32(ux & SIGNBIT_SP32, *iptr);
46 |       return x;
47 |     }
48 |   else if (xexp < EXPSHIFTBITS_SP32)
49 |     {
50 |       /* x lies between 1.0 and 2**(24) */
51 |       /* Mask out the bits of x that we don't want */
52 |       mask = (1 << (EXPSHIFTBITS_SP32 - xexp)) - 1;
53 |       PUT_BITS_SP32(ux & ~mask, *iptr);
54 |       return x - *iptr;
55 |     }
56 |   else if ((ux & (~SIGNBIT_SP32)) > 0x7f800000)
57 |     {
58 |       /* x is NaN */
59 |       *iptr = x;
60 |       return x + x; /* Raise invalid if it is a signalling NaN */
61 |     }
62 |   else
63 |     {
64 |       /* x is infinity or large. Set iptr to x and return zero
65 |          with the sign of x. */
66 |       *iptr = x;
67 |       PUT_BITS_SP32(ux & SIGNBIT_SP32, x);
68 |       return x;
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/pow_special.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include <fpieee.h>
 28 | #include <excpt.h>
 29 | #include <float.h>
 30 | #include <math.h>
 31 | #include <errno.h>
 32 | 
 33 | #include "libm_new.h"
 34 | 
 35 | // these codes and the ones in the related .asm files have to match
 36 | #define POW_X_ONE_Y_SNAN            1
 37 | #define POW_X_ZERO_Z_INF            2
 38 | #define POW_X_NAN                   3
 39 | #define POW_Y_NAN                   4
 40 | #define POW_X_NAN_Y_NAN             5
 41 | #define POW_X_NEG_Y_NOTINT          6
 42 | #define POW_Z_ZERO                  7
 43 | #define POW_Z_DENORMAL              8
 44 | #define POW_Z_INF                   9
 45 | 
 46 | float _powf_special(float x, float y, float z, U32 code)
 47 | {
 48 |     switch(code)
 49 |     {
 50 |     case POW_X_ONE_Y_SNAN:
 51 |         {
 52 |             UT64 zm; zm.u64 = 0; zm.f32[0] = z;
 53 |             _handle_errorf("powf", _FpCodePow, zm.u64, 0, AMD_F_INVALID, 0, x, y, 2);
 54 |         }
 55 |         break;
 56 | 
 57 |     case POW_X_ZERO_Z_INF:
 58 |         {
 59 |             UT64 zm; zm.u64 = 0; zm.f32[0] = z;
 60 |             _handle_errorf("powf", _FpCodePow, zm.u64, _SING, AMD_F_DIVBYZERO, ERANGE, x, y, 2);
 61 |         }
 62 |         break;
 63 | 
 64 |     case POW_X_NAN:
 65 |     case POW_Y_NAN:
 66 |     case POW_X_NAN_Y_NAN:   
 67 |     case POW_X_NEG_Y_NOTINT:
 68 |         {
 69 |             UT64 zm; zm.u64 = 0; zm.f32[0] = z;
 70 |             _handle_errorf("powf", _FpCodePow, zm.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2);
 71 |         }
 72 |         break;
 73 | 
 74 |     case POW_Z_ZERO:
 75 |         {
 76 |             UT64 zm; zm.u64 = 0; zm.f32[0] = z;
 77 |             _handle_errorf("powf", _FpCodePow, zm.u64, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, y, 2);
 78 |         }
 79 |         break;
 80 | 
 81 |     case POW_Z_INF:
 82 |         {
 83 |             UT64 zm; zm.u64 = 0; zm.f32[0] = z;
 84 |             _handle_errorf("powf", _FpCodePow, zm.u64, _OVERFLOW, AMD_F_INEXACT|AMD_F_OVERFLOW, ERANGE, x, y, 2);
 85 |         }
 86 |         break;
 87 |     }
 88 | 
 89 |     return z;
 90 | }
 91 | 
 92 | double _pow_special(double x, double y, double z, U32 code)
 93 | {
 94 |     switch(code)
 95 |     {
 96 |     case POW_X_ZERO_Z_INF:
 97 |         {
 98 |             UT64 zm; zm.f64 = z;
 99 |             _handle_error("pow", _FpCodePow, zm.u64, _SING, AMD_F_DIVBYZERO, ERANGE, x, y, 2);
100 |         }
101 |         break;
102 | 
103 |     case POW_X_NAN:
104 |     case POW_Y_NAN:
105 |     case POW_X_NAN_Y_NAN:
106 |     case POW_X_NEG_Y_NOTINT:
107 |         {
108 |             UT64 zm; zm.f64 = z;
109 |             _handle_error("pow", _FpCodePow, zm.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2);
110 |         }
111 |         break;
112 | 
113 |     case POW_Z_ZERO:
114 |     case POW_Z_DENORMAL:
115 |         {
116 |             UT64 zm; zm.f64 = z;
117 |             _handle_error("pow", _FpCodePow, zm.u64, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, y, 2);
118 |         }
119 |         break;
120 | 
121 |     case POW_Z_INF:
122 |         {
123 |             UT64 zm; zm.f64 = z;
124 |             _handle_error("pow", _FpCodePow, zm.u64, _OVERFLOW, AMD_F_INEXACT|AMD_F_OVERFLOW, ERANGE, x, y, 2);
125 |         }
126 |         break;
127 |     }
128 | 
129 |     return z;
130 | }
131 | 


--------------------------------------------------------------------------------
/remainder_piby2f.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | 
 31 | /* Given positive argument x, reduce it to the range [-pi/4,pi/4] using
 32 |    extra precision, and return the result in r.
 33 |    Return value "region" tells how many lots of pi/2 were subtracted
 34 |    from x to put it in the range [-pi/4,pi/4], mod 4. */
 35 | void __remainder_piby2f(unsigned long ux, double *r, int *region)
 36 | {
 37 | 
 38 | 
 39 |       /* This method simulates multi-precision floating-point
 40 |          arithmetic and is accurate for all 1 <= x < infinity */
 41 | #define bitsper 36
 42 |       unsigned long res[10];
 43 |       unsigned long u, carry, mask, mant, nextbits;
 44 |       int first, last, i, rexp, xexp, resexp, ltb, determ, bc;
 45 |       double dx;
 46 |       static const double
 47 |         piby2 = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */
 48 |       static unsigned long pibits[] =
 49 |       {
 50 |         0LL,
 51 |         5215LL, 13000023176LL, 11362338026LL, 67174558139LL,
 52 |         34819822259LL, 10612056195LL, 67816420731LL, 57840157550LL,
 53 |         19558516809LL, 50025467026LL, 25186875954LL, 18152700886LL
 54 |       };
 55 | 
 56 | 
 57 |       xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
 58 |       ux = ((ux & MANTBITS_DP64) | IMPBIT_DP64) >> 29;
 59 | 
 60 | 
 61 |       /* Now ux is the mantissa bit pattern of x as a long integer */
 62 |       mask = 1;
 63 |       mask = (mask << bitsper) - 1;
 64 | 
 65 |       /* Set first and last to the positions of the first
 66 |          and last chunks of 2/pi that we need */
 67 |       first = xexp / bitsper;
 68 |       resexp = xexp - first * bitsper;
 69 |       /* 120 is the theoretical maximum number of bits (actually
 70 |          115 for IEEE single precision) that we need to extract
 71 |          from the middle of 2/pi to compute the reduced argument
 72 |          accurately enough for our purposes */
 73 |       last = first + 120 / bitsper;
 74 | 
 75 | 
 76 |       /* Do a long multiplication of the bits of 2/pi by the
 77 |          integer mantissa */
 78 | #if 0
 79 |       for (i = last; i >= first; i--)
 80 |         {
 81 |           u = pibits[i] * ux + carry;
 82 |           res[i - first] = u & mask;
 83 |           carry = u >> bitsper;
 84 |         }
 85 |       res[last - first + 1] = 0;
 86 | #else
 87 |       /* Unroll the loop. This is only correct because we know
 88 |          that bitsper is fixed as 36. */
 89 |       res[4] = 0;
 90 |       u = pibits[last] * ux;
 91 |       res[3] = u & mask;
 92 |       carry = u >> bitsper;
 93 |       u = pibits[last - 1] * ux + carry;
 94 |       res[2] = u & mask;
 95 |       carry = u >> bitsper;
 96 |       u = pibits[last - 2] * ux + carry;
 97 |       res[1] = u & mask;
 98 |       carry = u >> bitsper;
 99 |       u = pibits[first] * ux + carry;
100 |       res[0] = u & mask;
101 | #endif
102 | 
103 | 
104 |       /* Reconstruct the result */
105 |       ltb = (int)((((res[0] << bitsper) | res[1])
106 |                    >> (bitsper - 1 - resexp)) & 7);
107 | 
108 |       /* determ says whether the fractional part is >= 0.5 */
109 |       determ = ltb & 1;
110 | 
111 |       i = 1;
112 |       if (determ)
113 |         {
114 |           /* The mantissa is >= 0.5. We want to subtract it
115 |              from 1.0 by negating all the bits */
116 |           *region = ((ltb >> 1) + 1) & 3;
117 |           mant = 1;
118 |           mant = ~(res[1]) & ((mant << (bitsper - resexp)) - 1);
119 |           while (mant < 0x0000000000010000)
120 |             {
121 |               i++;
122 |               mant = (mant << bitsper) | (~(res[i]) & mask);
123 |             }
124 |           nextbits = (~(res[i+1]) & mask);
125 |         }
126 |       else
127 |         {
128 |           *region = (ltb >> 1);
129 |           mant = 1;
130 |           mant = res[1] & ((mant << (bitsper - resexp)) - 1);
131 |           while (mant < 0x0000000000010000)
132 |             {
133 |               i++;
134 |               mant = (mant << bitsper) | res[i];
135 |             }
136 |           nextbits = res[i+1];
137 |         }
138 | 
139 | 
140 |       /* Normalize the mantissa. The shift value 6 here, determined by
141 |          trial and error, seems to give optimal speed. */
142 |       bc = 0;
143 |       while (mant < 0x0000400000000000)
144 |         {
145 |           bc += 6;
146 |           mant <<= 6;
147 |         }
148 |       while (mant < 0x0010000000000000)
149 |         {
150 |           bc++;
151 |           mant <<= 1;
152 |         }
153 |       mant |= nextbits >> (bitsper - bc);
154 | 
155 |       rexp = 52 + resexp - bc - i * bitsper;
156 | 
157 | 
158 |       /* Put the result exponent rexp onto the mantissa pattern */
159 |       u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64;
160 |       ux = (mant & MANTBITS_DP64) | u;
161 |       if (determ)
162 |         /* If we negated the mantissa we negate x too */
163 |         ux |= SIGNBIT_DP64;
164 |       PUT_BITS_DP64(ux, dx);
165 | 
166 | 
167 |       /* x is a double precision version of the fractional part of
168 |          x * 2 / pi. Multiply x by pi/2 in double precision
169 |          to get the reduced argument r. */
170 |       *r = dx * piby2;
171 |   return;
172 | 
173 | }
174 | 


--------------------------------------------------------------------------------
/remainder_piby2f_forAsm.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ; MIT License
  3 | ; -----------
  4 | ; 
  5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  6 | ; 
  7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | ; of this Software and associated documentaon files (the "Software"), to deal
  9 | ; in the Software without restriction, including without limitation the rights
 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | ; copies of the Software, and to permit persons to whom the Software is
 12 | ; furnished to do so, subject to the following conditions:
 13 | ; 
 14 | ; The above copyright notice and this permission notice shall be included in
 15 | ; all copies or substantial portions of the Software.
 16 | ; 
 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 | ; THE SOFTWARE.
 24 | ;
 25 | ; An implementation of the remainder by pi/2 function
 26 | ; This is a service routine for use by trig functions coded in asm
 27 | ;
 28 | ; On input,
 29 | ;   xmm0 = x;   Note that we assume x >= pi/4
 30 | ; On ouput
 31 | ;   xmm0 = r
 32 | ;   eax  = region
 33 | 
 34 | .const
 35 | 
 36 | ALIGN 16
 37 | L__piby2 DQ 03ff921fb54442d18h
 38 | EXTRN __L_2_by_pi_bits:BYTE
 39 | 
 40 | 
 41 | fname TEXTEQU <__remainder_piby2d2f_forAsm>
 42 | 
 43 | stack_size      EQU     000h
 44 | include fm.inc
 45 | 
 46 | .code
 47 | PUBLIC fname
 48 | fname PROC FRAME
 49 |     StackAllocate stack_size
 50 |     .ENDPROLOG
 51 |  
 52 |     lea    r9,__L_2_by_pi_bits
 53 |  
 54 |     ;get the unbiased exponent and the mantissa part of x
 55 |     ;Since x >= pi/4, xexp = (x >> 52) - 1023
 56 |     movd         r11,xmm0
 57 |     mov          rcx,r11 
 58 |     shr          r11,52
 59 |     sub          r11,1023                 ; r11 <-- xexp = exponent of input x 
 60 | 
 61 |     ;calculate the last byte from which to start multiplication
 62 |     ;last = 134 - (xexp >> 3)
 63 |     mov          r10,r11
 64 |     shr          r10,3
 65 |     sub          r10,134                   ;r10 = -last
 66 |     neg          r10                       ;r10 = last
 67 | 
 68 |     ;load 64 bits of 2_by_pi
 69 |     mov          rax,[r9 + r10]
 70 |  
 71 |     ;mantissa of x = ((x << 12) >> 12) | implied bit
 72 |     shl          rcx,12
 73 |     shr          rcx,12                   ;rcx = mantissa part of input x 
 74 |     bts          rcx,52                   ;add the implied bit as well 
 75 | 
 76 |     ;load next 128 bits of 2_by_pi 
 77 |     add          r10,8                    ;increment to next 8 bytes of 2_by_pi
 78 |     movdqu       xmm0,[r9 + r10] 
 79 | 
 80 |     ;do three 64-bit multiplications with mant of x 
 81 |     mul          rcx
 82 |     mov          r8,rax                   ;r8 = last 64 bits of mul = res1[2] 
 83 |     mov          r10,rdx                  ;r10 <-- carry
 84 |     movd         rax,xmm0
 85 |     mul rcx
 86 |     ;resexp = xexp & 7 
 87 |     and          r11,7                    ;r11 = resexp = xexp & 7 = last 3 bits
 88 |     psrldq       xmm0,8 
 89 |     add          rax,r10                  ; add the previous carry
 90 |     adc          rdx,0
 91 |     mov          r9,rax                   ;r9 = next 64 bits of mul = res1[1]
 92 |     mov          r10,rdx                  ;r10 <-- carry
 93 |     movd         rax,xmm0
 94 |     mul          rcx
 95 |     add          r10,rax                  ;r10 = most sig 64 bits = res1[0]
 96 |  
 97 |  ;find the region 
 98 |  ;last three bits ltb = most sig bits >> (54 - resexp))
 99 |  ;  decimal point in last 18 bits ==> 8 lsb's in first 64 bits and
100 |  ;  8 msb's in next 64 bits
101 |  ;point_five = ltb & 01h;
102 |  ;region = ((ltb >> 1) + point_five) & 3; 
103 |     mov          rcx,54
104 |     mov          rax,r10
105 |     sub          rcx,r11
106 |     xor          rdx,rdx          ;rdx = sign of x(i.e first part of x * 2bypi) 
107 |     shr          rax,cl 
108 |     jnc          L__no_point_five
109 |     ;;if there is carry.. then negate the result of multiplication
110 |     not          r10
111 |     not          r9
112 |     not          r8
113 |     mov          rdx,08000000000000000h
114 | 
115 | ALIGN  16 
116 | L__no_point_five:
117 |     adc          rax,0
118 |     and          rax,3
119 |     ; Until / unless we find a better place to save it, we're putting
120 |     ; the region in xmm1.
121 |     movd         xmm1, rax
122 | 
123 |  ;calculate the number of integer bits and zero them out
124 |     mov          rcx,r11 
125 |     add          rcx,10 ;rcx = no. of integer bits
126 |     shl          r10,cl
127 |     shr          r10,cl ;r10 contains only mant bits
128 |     sub          rcx,64 ;form the exponent
129 |     mov          r11,rcx
130 |  
131 |  ;find the highest set bit
132 |     bsr          rcx,r10
133 |     jnz          L__form_mantissa
134 |     mov          r10,r9
135 |     mov          r9,r8
136 |     bsr          rcx,r10 ;rcx = hsb
137 |     sub          r11,64
138 |  
139 | ALIGN  16 
140 | L__form_mantissa:
141 |     add          r11,rcx ;for exp of x
142 |     sub          rcx,52 ;rcx = no. of bits to shift in r10 
143 |     cmp          rcx,0
144 |     jl           L__hsb_below_52
145 |     je           L__form_numbers
146 |  ;hsb above 52
147 |     mov          r8,r10 ;previous contents of r8 not required
148 |     shr          r10,cl ;r10 = mantissa of x with hsb at 52
149 |     jmp          L__form_numbers
150 |  
151 | ALIGN  16 
152 | L__hsb_below_52:
153 |     neg          rcx
154 |     mov          rax,r9
155 |     shl          r10,cl
156 |     shl          r9,cl
157 |     sub          rcx,64
158 |     neg          rcx
159 |     shr          rax,cl
160 |     or           r10,rax
161 |  
162 | ALIGN  16
163 | L__form_numbers:
164 |     add          r11,1023
165 |     btr          r10,52                   ;remove the implied bit
166 |     mov          rcx,r11
167 |     or           r10,rdx                  ;put the sign 
168 |     shl          rcx,52
169 |     or           r10,rcx ;x is in r10
170 |     movd         xmm0,r10                 ; xmm0 = x
171 |     movd         rax, xmm1                ; rax <-- region
172 | 
173 |     ; At this point xmm0 has a double precision version of the fractional part
174 |     ; of x * 2/pi.  To get the reduced argument r, we multiply that by pi/2.
175 |     mulsd        xmm0,L__piby2
176 |     StackDeallocate stack_size 
177 |     ret 
178 |  
179 | fname        endp
180 | END
181 | 


--------------------------------------------------------------------------------
/remainder_piby2f_forC.asm:
--------------------------------------------------------------------------------
  1 | ;
  2 | ; MIT License
  3 | ; -----------
  4 | ; 
  5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  6 | ; 
  7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | ; of this Software and associated documentaon files (the "Software"), to deal
  9 | ; in the Software without restriction, including without limitation the rights
 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | ; copies of the Software, and to permit persons to whom the Software is
 12 | ; furnished to do so, subject to the following conditions:
 13 | ; 
 14 | ; The above copyright notice and this permission notice shall be included in
 15 | ; all copies or substantial portions of the Software.
 16 | ; 
 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 | ; THE SOFTWARE.
 24 | ;
 25 | ; An implementation of the remainder by pi/2 function
 26 | ; This is a service routine for use by trig functions coded in C
 27 | ;
 28 | 
 29 | fname TEXTEQU <__remainder_piby2d2f_forC>
 30 | 
 31 | save_rdi        EQU      20h
 32 | save_rsi        EQU      30h
 33 | stack_size      EQU     088h
 34 | include fm.inc
 35 | 
 36 | .code
 37 | PUBLIC fname
 38 | fname PROC FRAME
 39 |     StackAllocate stack_size
 40 |     SaveReg rdi,save_rdi
 41 |     SaveReg rsi,save_rsi 
 42 |     .ENDPROLOG
 43 |  
 44 |  mov rdi, rcx
 45 |  mov rsi, rdx
 46 |  mov rdx, r8
 47 | 
 48 |  ;get the unbiased exponent and the mantissa part of x
 49 |  movd    xmm0,rdi
 50 |  lea    r9,L__2_by_pi_bits
 51 |  
 52 |  ;xexp = (x >> 52) - 1023
 53 |  movd   r11,xmm0
 54 |  mov    rcx,r11 
 55 |  shr    r11,52
 56 |  sub    r11,1023 ;r11 = xexp = exponent of input x 
 57 | 
 58 |  ;calculate the last byte from which to start multiplication
 59 |  ;last = 134 - (xexp >> 3) 
 60 |  mov    r10,r11
 61 |  shr    r10,3
 62 |  sub    r10,134 ;r10 = -last
 63 |  neg    r10 ;r10 = last
 64 | 
 65 |  ;load 64 bits of 2_by_pi
 66 |  mov    rax,[r9 + r10]
 67 |  mov    rdi,rdx ; save address of region since mul modifies rdx
 68 |  
 69 |  ;mantissa of x = ((x << 12) >> 12) | implied bit
 70 |  shl    rcx,12
 71 |  shr    rcx,12 ;rcx = mantissa part of input x 
 72 |  bts    rcx,52 ;add the implied bit as well 
 73 | 
 74 |  ;load next 128 bits of 2_by_pi 
 75 |  add    r10,8 ;increment to next 8 bytes of 2_by_pi
 76 |  movdqu xmm0,[r9 + r10] 
 77 | 
 78 |  ;do three 64-bit multiplications with mant of x 
 79 |  mul rcx
 80 |  mov    r8,rax ;r8 = last 64 bits of multiplication = res1[2] 
 81 |  mov    r10,rdx ;r10 = carry
 82 |  movd   rax,xmm0
 83 |  mul rcx
 84 |  ;resexp = xexp & 7 
 85 |  and    r11,7 ;r11 = resexp = xexp & 7 = last 3 bits
 86 |  psrldq xmm0,8 
 87 |  add    rax,r10 ; add the previous carry
 88 |  adc    rdx,0
 89 |  mov    r9,rax ;r9 = next 64 bits of multiplication = res1[1]
 90 |  mov    r10,rdx ;r10 = carry
 91 |  movd   rax,xmm0
 92 |  mul    rcx
 93 |  add    r10,rax ;r10 = most significant 64 bits = res1[0]
 94 |  
 95 |  ;find the region 
 96 |  ;last three bits ltb = most sig bits >> (54 - resexp));  decimal point in last 18 bits == 8 lsb's in first 64 bits and 8 msb's in next 64 bits
 97 |  ;point_five = ltb & 01h;
 98 |  ;region = ((ltb >> 1) + point_five) & 3; 
 99 |  mov    rcx,54
100 |  mov    rax,r10
101 |  sub    rcx,r11
102 |  xor    rdx,rdx ;rdx = sign of x(i.e first part of x * 2bypi) 
103 |  shr    rax,cl 
104 |  jnc    L__no_point_five
105 |  ;;if there is carry.. then negate the result of multiplication
106 |  not r10
107 |  not r9
108 |  not r8
109 |  mov    rdx,08000000000000000h
110 | 
111 | ALIGN  16 
112 | L__no_point_five:
113 |  adc    rax,0
114 |  and    rax,3
115 |  mov    DWORD PTR[rdi],eax ;store region to memory
116 | 
117 |  ;calculate the number of integer bits and zero them out
118 |  mov    rcx,r11 
119 |  add    rcx,10 ;rcx = no. of integer bits
120 |  shl    r10,cl
121 |  shr    r10,cl ;r10 contains only mant bits
122 |  sub    rcx,64 ;form the exponent
123 |  mov    r11,rcx
124 |  
125 |  ;find the highest set bit
126 |  bsr    rcx,r10
127 |  jnz L__form_mantissa
128 |  mov    r10,r9
129 |  mov    r9,r8
130 |  bsr    rcx,r10 ;rcx = hsb
131 |  sub    r11,64
132 |  
133 |  
134 | ALIGN  16 
135 | L__form_mantissa:
136 |  add    r11,rcx ;for exp of x
137 |  sub    rcx,52 ;rcx = no. of bits to shift in r10 
138 |  cmp    rcx,0
139 |  jl L__hsb_below_52
140 |  je L__form_numbers
141 |  ;hsb above 52
142 |  mov    r8,r10 ;previous contents of r8 not required
143 |  shr    r10,cl ;r10 = mantissa of x with hsb at 52
144 |  jmp L__form_numbers
145 |  
146 | ALIGN  16 
147 | L__hsb_below_52:
148 |  neg rcx
149 |  mov    rax,r9
150 |  shl    r10,cl
151 |  shl    r9,cl
152 |  sub    rcx,64
153 |  neg rcx
154 |  shr    rax,cl
155 |  or    r10,rax
156 |  
157 | ALIGN  16
158 | L__form_numbers:
159 |  add    r11,1023
160 |  btr    r10,52 ;remove the implied bit
161 |  mov    rcx,r11
162 |  or    r10,rdx ;put the sign 
163 |  shl    rcx,52
164 |  or    r10,rcx ;x is in r10
165 |  
166 |  movd    xmm0,r10 ;xmm0 = x
167 |  mulsd    xmm0,L__piby2
168 |  movsd    QWORD PTR[rsi],xmm0
169 |  RestoreReg rsi,save_rsi
170 |  RestoreReg rdi,save_rdi
171 |  StackDeallocate stack_size 
172 |  ret 
173 |  
174 | fname        endp
175 | 
176 | .const
177 | ALIGN 16
178 | L__piby2 DQ 03ff921fb54442d18h
179 | 
180 | ALIGN 16
181 | L__2_by_pi_bits DB 224
182 |   DB 241
183 |   DB 27
184 |   DB 193
185 |   DB 12
186 |   DB 88
187 |   DB 33
188 |   DB 116
189 |   DB 53
190 |   DB 126
191 |   DB 196
192 |   DB 126
193 |   DB 237
194 |   DB 175
195 |   DB 169
196 |   DB 75
197 |   DB 74
198 |   DB 41
199 |   DB 222
200 |   DB 231
201 |   DB 28
202 |   DB 244
203 |   DB 236
204 |   DB 197
205 |   DB 151
206 |   DB 175
207 |   DB 31
208 |   DB 235
209 |   DB 158
210 |   DB 212
211 |   DB 181
212 |   DB 168
213 |   DB 127
214 |   DB 121
215 |   DB 154
216 |   DB 253
217 |   DB 24
218 |   DB 61
219 |   DB 221
220 |   DB 38
221 |   DB 44
222 |   DB 159
223 |   DB 60
224 |   DB 251
225 |   DB 217
226 |   DB 180
227 |   DB 125
228 |   DB 180
229 |   DB 41
230 |   DB 104
231 |   DB 45
232 |   DB 70
233 |   DB 188
234 |   DB 188
235 |   DB 63
236 |   DB 96
237 |   DB 22
238 |   DB 120
239 |   DB 255
240 |   DB 95
241 |   DB 226
242 |   DB 127
243 |   DB 236
244 |   DB 160
245 |   DB 228
246 |   DB 247
247 |   DB 46
248 |   DB 126
249 |   DB 17
250 |   DB 114
251 |   DB 210
252 |   DB 231
253 |   DB 76
254 |   DB 13
255 |   DB 230
256 |   DB 88
257 |   DB 71
258 |   DB 230
259 |   DB 4
260 |   DB 249
261 |   DB 125
262 |   DB 209
263 |   DB 154
264 |   DB 192
265 |   DB 113
266 |   DB 166
267 |   DB 19
268 |   DB 18
269 |   DB 237
270 |   DB 186
271 |   DB 212
272 |   DB 215
273 |   DB 8
274 |   DB 162
275 |   DB 251
276 |   DB 156
277 |   DB 166
278 |   DB 196
279 |   DB 114
280 |   DB 172
281 |   DB 119
282 |   DB 248
283 |   DB 115
284 |   DB 72
285 |   DB 70
286 |   DB 39
287 |   DB 168
288 |   DB 187
289 |   DB 36
290 |   DB 25
291 |   DB 128
292 |   DB 75
293 |   DB 55
294 |   DB 9
295 |   DB 233
296 |   DB 184
297 |   DB 145
298 |   DB 220
299 |   DB 134
300 |   DB 21
301 |   DB 239
302 |   DB 122
303 |   DB 175
304 |   DB 142
305 |   DB 69
306 |   DB 249
307 |   DB 7
308 |   DB 65
309 |   DB 14
310 |   DB 241
311 |   DB 100
312 |   DB 86
313 |   DB 138
314 |   DB 109
315 |   DB 3
316 |   DB 119
317 |   DB 211
318 |   DB 212
319 |   DB 71
320 |   DB 95
321 |   DB 157
322 |   DB 240
323 |   DB 167
324 |   DB 84
325 |   DB 16
326 |   DB 57
327 |   DB 185
328 |   DB 13
329 |   DB 230
330 |   DB 139
331 |   DB 2
332 |   DB 0
333 |   DB 0
334 |   DB 0
335 |   DB 0
336 |   DB 0
337 |   DB 0
338 |   DB 0
339 | 
340 | END
341 | 
342 | 


--------------------------------------------------------------------------------
/remainderf.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_NANF_WITH_FLAGS
 31 | #define USE_SCALEDOUBLE_1
 32 | #define USE_GET_FPSW_INLINE
 33 | #define USE_SET_FPSW_INLINE
 34 | #define USE_HANDLE_ERRORF
 35 | #include "libm_inlines.h"
 36 | #undef USE_NANF_WITH_FLAGS
 37 | #undef USE_SCALEDOUBLE_1
 38 | #undef USE_GET_FPSW_INLINE
 39 | #undef USE_SET_FPSW_INLINE
 40 | #undef USE_HANDLE_ERRORF
 41 | 
 42 | #if !defined(_CRTBLD_C9X)
 43 | #define _CRTBLD_C9X
 44 | #endif
 45 | 
 46 | #include "libm_errno.h"
 47 | 
 48 | // Disable "C4163: not available as intrinsic function" warning that older
 49 | // compilers may issue here.
 50 | #pragma warning(disable:4163)
 51 | #pragma function(remainderf,fmodf)
 52 | 
 53 | 
 54 | #undef _FUNCNAME
 55 | #if defined(COMPILING_FMOD)
 56 | float fmodf(float x, float y)
 57 | #define _FUNCNAME "fmodf"
 58 | #define _OPERATION OP_FMOD
 59 | #else
 60 | float remainderf(float x, float y)
 61 | #define _FUNCNAME "remainderf"
 62 | #define _OPERATION OP_REM
 63 | #endif
 64 | {
 65 |   double dx, dy, scale, w, t;
 66 |   int i, ntimes, xexp, yexp;
 67 |   unsigned long ux, uy, ax, ay;
 68 | 
 69 |   unsigned int sw;
 70 | 
 71 |   dx = x;
 72 |   dy = y;
 73 | 
 74 | 
 75 |   GET_BITS_DP64(dx, ux);
 76 |   GET_BITS_DP64(dy, uy);
 77 |   ax = ux & ~SIGNBIT_DP64;
 78 |   ay = uy & ~SIGNBIT_DP64;
 79 |   xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
 80 |   yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
 81 | 
 82 |   if (xexp < 1 || xexp > BIASEDEMAX_DP64 ||
 83 |       yexp < 1 || yexp > BIASEDEMAX_DP64)
 84 |     {
 85 |       /* x or y is zero, NaN or infinity (neither x nor y can be
 86 |          denormalized because we promoted from float to double) */
 87 |       if (xexp > BIASEDEMAX_DP64)
 88 |         {
 89 |           /* x is NaN or infinity */
 90 |           if (ux & MANTBITS_DP64)
 91 |             {
 92 |               /* x is NaN */
 93 |               unsigned int ufx;
 94 |               GET_BITS_SP32(x, ufx);
 95 |               return _handle_errorf(_FUNCNAME, _OPERATION, ufx|0x00400000, _DOMAIN, 0,
 96 |                                    EDOM, x, y, 2);
 97 |             }
 98 |           else
 99 |             {
100 |               /* x is infinity; result is NaN */
101 |               return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN,
102 |                                    AMD_F_INVALID, EDOM, x, y, 2);
103 |             }
104 |         }
105 |       else if (yexp > BIASEDEMAX_DP64)
106 |         {
107 |           /* y is NaN or infinity */
108 |           if (uy & MANTBITS_DP64)
109 |             {
110 |               /* y is NaN */
111 |               unsigned int ufy;
112 |               GET_BITS_SP32(y, ufy);
113 |               return _handle_errorf(_FUNCNAME, _OPERATION, ufy|0x00400000, _DOMAIN, 0,
114 |                                    EDOM, x, y, 2);
115 |             }
116 |           else
117 |             {
118 | #ifdef _CRTBLD_C9X
119 |               /* C99 return for y = +-inf is x */
120 |               return x;
121 | #else
122 |               /* y is infinity; result is indefinite */
123 |               return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN,
124 |                                    AMD_F_INVALID, EDOM, x, y, 2);
125 | #endif
126 |             }
127 |         }
128 |       else if (xexp < 1)
129 |         {
130 |           /* x must be zero (cannot be denormalized) */
131 |           if (yexp < 1)
132 |             {
133 |               /* y must be zero (cannot be denormalized) */
134 |               return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN,
135 |                                    AMD_F_INVALID, EDOM, x, y, 2);
136 |             }
137 |           else
138 |               /* C99 return for x = 0 must preserve sign */
139 |               return x;
140 |         }
141 |       else
142 |         {
143 |           /* y must be zero */
144 |           return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN,
145 |                                AMD_F_INVALID, EDOM, x, y, 2);
146 |         }
147 |     }
148 |   else if (ax == ay)
149 |     {
150 |       /* abs(x) == abs(y); return zero with the sign of x */
151 |       PUT_BITS_DP64(ux & SIGNBIT_DP64, dx);
152 |       return (float)dx;
153 |     }
154 | 
155 |   /* Set dx = abs(x), dy = abs(y) */
156 |   PUT_BITS_DP64(ax, dx);
157 |   PUT_BITS_DP64(ay, dy);
158 | 
159 |   if (ax < ay)
160 |     {
161 |       /* abs(x) < abs(y) */
162 | #if !defined(COMPILING_FMOD)
163 |       if (dx > 0.5*dy)
164 |         dx -= dy;
165 | #endif
166 |       return (float)(x < 0.0? -dx : dx);
167 |     }
168 | 
169 |   /* Save the current floating-point status word. We need
170 |      to do this because the remainder function is always
171 |      exact for finite arguments, but our algorithm causes
172 |      the inexact flag to be raised. We therefore need to
173 |      restore the entry status before exiting. */
174 |   sw = get_fpsw_inline();
175 | 
176 |   /* Set ntimes to the number of times we need to do a
177 |      partial remainder. If the exponent of x is an exact multiple
178 |      of 24 larger than the exponent of y, and the mantissa of x is
179 |      less than the mantissa of y, ntimes will be one too large
180 |      but it doesn't matter - it just means that we'll go round
181 |      the loop below one extra time. */
182 |   if (xexp <= yexp)
183 |     {
184 |       ntimes = 0;
185 |       w = dy;
186 |       scale = 1.0;
187 |     }
188 |   else
189 |     {
190 |       ntimes = (xexp - yexp) / 24;
191 | 
192 |       /* Set w = y * 2^(24*ntimes) */
193 |       PUT_BITS_DP64((unsigned long)(ntimes * 24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
194 |                     scale);
195 |       w = scale * dy;
196 |       /* Set scale = 2^(-24) */
197 |       PUT_BITS_DP64((unsigned long)(-24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64,
198 |                     scale);
199 |     }
200 | 
201 | 
202 |   /* Each time round the loop we compute a partial remainder.
203 |      This is done by subtracting a large multiple of w
204 |      from x each time, where w is a scaled up version of y.
205 |      The subtraction can be performed exactly when performed
206 |      in double precision, and the result at each stage can
207 |      fit exactly in a single precision number. */
208 |   for (i = 0; i < ntimes; i++)
209 |     {
210 |       /* t is the integer multiple of w that we will subtract.
211 |          We use a truncated value for t. */
212 |       t = (double)((int)(dx / w));
213 |       dx -= w * t;
214 |       /* Scale w down by 2^(-24) for the next iteration */
215 |       w *= scale;
216 |     }
217 | 
218 |   /* One more time */
219 | #if defined(COMPILING_FMOD)
220 |   t = (double)((int)(dx / w));
221 |   dx -= w * t;
222 | #else
223 |  {
224 |   unsigned int todd;
225 |   /* Variable todd says whether the integer t is odd or not */
226 |   t = (double)((int)(dx / w));
227 |   todd = ((int)(dx / w)) & 1;
228 |   dx -= w * t;
229 | 
230 |   /* At this point, dx lies in the range [0,dy) */
231 |   /* For the remainder function, we need to adjust dx
232 |      so that it lies in the range (-y/2, y/2] by carefully
233 |      subtracting w (== dy == y) if necessary. */
234 |   if (dx > 0.5 * w || ((dx == 0.5 * w) && todd))
235 |     dx -= w;
236 |  }
237 | #endif
238 | 
239 |   /* **** N.B. for some reason this breaks the 32 bit version
240 |      of remainder when compiling with optimization. */
241 |   /* Restore the entry status flags */
242 |   set_fpsw_inline(sw);
243 | 
244 |   /* Set the result sign according to input argument x */
245 |   return (float)(x < 0.0? -dx : dx);
246 | 
247 | }
248 | 


--------------------------------------------------------------------------------
/sincos_special.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include <fpieee.h>
 28 | #include <excpt.h>
 29 | #include <float.h>
 30 | #include <math.h>
 31 | #include <errno.h>
 32 | 
 33 | #include "libm_new.h"
 34 | 
 35 | double _sincos_special(double x, char *name, unsigned int operation)
 36 | {
 37 |     UT64 xu;
 38 | 	unsigned int is_snan;
 39 | 
 40 | 	xu.f64 = x;
 41 | 
 42 |     if((xu.u64 & INF_POS_64) == INF_POS_64)
 43 |     {
 44 |         // x is Inf or NaN
 45 |         if((xu.u64 & MANTISSA_MASK_64) == 0x0)
 46 |         {
 47 |             // x is Inf
 48 | 			xu.u64 = IND_64;
 49 | 			_handle_error(name, operation, xu.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0, 1);
 50 | 		}
 51 | 		else 
 52 | 		{
 53 | 			// x is NaN
 54 |             is_snan = (((xu.u64 & QNAN_MASK_64) == QNAN_MASK_64) ? 0 : 1);
 55 | 			if(is_snan)
 56 | 			{
 57 | 				xu.u64 |= QNAN_MASK_64;
 58 | 			}
 59 | 			_handle_error(name, operation, xu.u64, _DOMAIN, 0, EDOM, x, 0, 1);
 60 | 		}
 61 | 	}
 62 | 
 63 | 	return xu.f64;
 64 | }
 65 | 
 66 | float _sincosf_special(float x, char *name, unsigned int operation)
 67 | {
 68 |     UT64 xu;
 69 | 	unsigned int is_snan;
 70 | 
 71 | 	xu.u64    = 0;
 72 | 	xu.f32[0] = x;
 73 | 
 74 |     if((xu.u32[0] & INF_POS_32) == INF_POS_32)
 75 |     {
 76 |         // x is Inf or NaN
 77 |         if((xu.u32[0] & MANTISSA_MASK_32) == 0x0)
 78 |         {
 79 |             // x is Inf	
 80 | 			xu.u32[0] = IND_32; 
 81 | 			_handle_errorf(name, operation, xu.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0, 1);
 82 | 		}
 83 | 		else
 84 | 		{
 85 | 			// x is NaN
 86 |             is_snan = (((xu.u32[0] & QNAN_MASK_32) == QNAN_MASK_32) ? 0 : 1);
 87 | 			if(is_snan) 
 88 | 			{
 89 | 				xu.u32[0] |= QNAN_SET_32;
 90 | 				_handle_errorf(name, operation, xu.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0, 1);
 91 | 			}
 92 | 			else 
 93 | 			{
 94 | 				_handle_errorf(name, operation, xu.u64, _DOMAIN, 0, EDOM, x, 0, 1);
 95 | 			}
 96 | 		}
 97 | 	}
 98 | 
 99 | 	return xu.f32[0];
100 | }
101 | 
102 | float _sinf_special(float x)
103 | {
104 | 	return _sincosf_special(x, "sinf", _FpCodeSin);
105 | }
106 | 
107 | double _sin_special(double x)
108 | {
109 | 	return _sincos_special(x, "sin", _FpCodeSin);
110 | }
111 | 
112 | float _cosf_special(float x)
113 | {
114 | 	return _sincosf_special(x, "cosf", _FpCodeCos);
115 | }
116 | 
117 | double _cos_special(double x)
118 | {
119 | 	return _sincos_special(x, "cos", _FpCodeCos);
120 | }
121 | 
122 | double _tan_special(double x)
123 | {
124 |         return  _sincos_special(x, "tan",_FpCodeTan);
125 | }
126 | 
127 | float _tanf_special(float x)
128 | {
129 |         return  _sincosf_special(x, "tanf",_FpCodeTan);
130 | }
131 | 


--------------------------------------------------------------------------------
/sqrt.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | #if USE_SOFTWARE_SQRT
31 | #define USE_SQRT_AMD_INLINE
32 | #endif
33 | #define USE_NAN_WITH_FLAGS
34 | #define USE_HANDLE_ERROR
35 | #include "libm_inlines.h"
36 | #if USE_SOFTWARE_SQRT
37 | #undef USE_SQRT_AMD_INLINE
38 | #endif
39 | #undef USE_NAN_WITH_FLAGS
40 | #undef USE_HANDLE_ERROR
41 | 
42 | #include "libm_errno.h"
43 | 
44 | #pragma function(sqrt)
45 | 
46 | double sqrt(double x)
47 | {
48 | #if USE_SOFTWARE_SQRT
49 |   return sqrt_amd_inline(x);
50 | #else
51 |   double r;
52 |   unsigned long ux;
53 |   GET_BITS_DP64(x, ux);
54 | 
55 |   /* Check for special cases for Microsoft error handling */
56 |   if ((ux & PINFBITPATT_DP64) == PINFBITPATT_DP64)
57 |     {
58 |       /* x is infinity, or NaN */
59 |       if (ux & MANTBITS_DP64)
60 |         {
61 |           /* NaN  of some sort */
62 |           /* If it's a signaling NaN, convert to QNaN */
63 |           return _handle_error("sqrt", OP_SQRT, ux|0x0008000000000000, 
64 |                                   _DOMAIN, 0,EDOM, x, 0.0, 1);
65 |         }
66 |       else
67 |         {
68 |           /* +/-infinity  */
69 |           if (ux & SIGNBIT_DP64)
70 |             {
71 |               /* - infinity */
72 |                 return _handle_error("sqrt", OP_SQRT, INDEFBITPATT_DP64, 
73 |                                     _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1);
74 |             }
75 |           /* positive infinite is not a problem */
76 |         }
77 |     }
78 |   if ((ux & SIGNBIT_DP64)&&(ux & ~SIGNBIT_DP64))   /* if x < zero */
79 |     {
80 |         return _handle_error("sqrt", OP_SQRT, INDEFBITPATT_DP64, 
81 |                             _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1);
82 |     }
83 | 
84 |       /* VC++ intrinsic call */
85 |   _mm_store_sd(&r, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&x)));
86 |   return r;
87 | #endif
88 | }
89 | 


--------------------------------------------------------------------------------
/sqrtf.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /*******************************************************************************
 3 | MIT License
 4 | -----------
 5 | 
 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this Software and associated documentaon files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | *******************************************************************************/
26 | 
27 | #include "libm.h"
28 | #include "libm_util.h"
29 | 
30 | #if USE_SOFTWARE_SQRT
31 | #define USE_SQRTF_AMD_INLINE
32 | #endif
33 | #define USE_NANF_WITH_FLAGS
34 | #define USE_HANDLE_ERRORF
35 | #include "libm_inlines.h"
36 | #if USE_SOFTWARE_SQRT
37 | #undef USE_SQRTF_AMD_INLINE
38 | #endif
39 | #undef USE_NANF_WITH_FLAGS
40 | #undef USE_HANDLE_ERRORF
41 | 
42 | #include "libm_errno.h"
43 | 
44 | // Disable "C4163: not available as intrinsic function" warning that older
45 | // compilers may issue here.
46 | #pragma warning(disable:4163)
47 | #pragma function(sqrtf)
48 | 
49 | 
50 | float sqrtf(float x)
51 | {
52 | #if USE_SOFTWARE_SQRT
53 |   return sqrtf_amd_inline(x);
54 | #else
55 |   float r;
56 |   unsigned int ux;
57 |   GET_BITS_SP32(x, ux);
58 |   /* Check for special cases for Microsoft error handling */
59 |   if ((ux & PINFBITPATT_SP32) == PINFBITPATT_SP32)
60 |     {
61 |       /* x is infinity, or NaN */
62 |       if (ux & MANTBITS_SP32)
63 |         {
64 |           /* NaN  of some sort */
65 |           /* If it's a signaling NaN, convert to QNaN */
66 |             return _handle_errorf("sqrtf", OP_SQRT, ux|0x00400000, _DOMAIN, 0,
67 |                                EDOM, x, 0.0F, 1);
68 |         }
69 |       else
70 |         {
71 |           /* +/-infinity  */
72 |           if (ux & SIGNBIT_SP32)
73 |             {
74 |               /* - infinity */
75 |                 return _handle_errorf("sqrtf", OP_SQRT, INDEFBITPATT_SP32, 
76 |                             _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0F, 1);
77 |             }
78 |           /* positive infinite is not a problem */
79 |         }
80 |     }
81 |   if ((ux & SIGNBIT_SP32)&&(ux & ~SIGNBIT_SP32))  /* if x < zero */
82 |     {
83 |         return _handle_errorf("sqrtf", OP_SQRT, INDEFBITPATT_SP32, 
84 |                     _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0F, 1);
85 |     }
86 | 
87 |       /* VC++ intrinsic call */
88 |       _mm_store_ss(&r, _mm_sqrt_ss(_mm_load_ss(&x)));
89 |   return r;
90 | #endif
91 | }
92 | 


--------------------------------------------------------------------------------
/tan.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_NAN_WITH_FLAGS
 31 | #define USE_VAL_WITH_FLAGS
 32 | #define USE_HANDLE_ERROR
 33 | #include "libm_inlines.h"
 34 | #undef USE_NAN_WITH_FLAGS
 35 | #undef USE_VAL_WITH_FLAGS
 36 | #undef USE_HANDLE_ERROR
 37 | 
 38 | #include "libm_errno.h"
 39 | 
 40 | /* tan(x + xx) approximation valid on the interval [-pi/4,pi/4].
 41 |    If recip is true return -1/tan(x + xx) instead. */
 42 | static inline double tan_piby4(double x, double xx, int recip)
 43 | {
 44 |   double r, t1, t2, xl;
 45 |   int transform = 0;
 46 |   static const double
 47 |      piby4_lead = 7.85398163397448278999e-01, /* 0x3fe921fb54442d18 */
 48 |      piby4_tail = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */
 49 | 
 50 |   /* In order to maintain relative precision transform using the identity:
 51 |      tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
 52 |      Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. */
 53 | 
 54 |   if (x > 0.68)
 55 |     {
 56 |       transform = 1;
 57 |       x = piby4_lead - x;
 58 |       xl = piby4_tail - xx;
 59 |       x += xl;
 60 |       xx = 0.0;
 61 |     }
 62 |   else if (x < -0.68)
 63 |     {
 64 |       transform = -1;
 65 |       x = piby4_lead + x;
 66 |       xl = piby4_tail + xx;
 67 |       x += xl;
 68 |       xx = 0.0;
 69 |     }
 70 | 
 71 |   /* Core Remez [2,3] approximation to tan(x+xx) on the
 72 |      interval [0,0.68]. */
 73 | 
 74 |   r = x*x + 2.0 * x * xx;
 75 |   t1 = x;
 76 |   t2 = xx + x*r*
 77 |     (0.372379159759792203640806338901e0 +
 78 |      (-0.229345080057565662883358588111e-1 +
 79 |       0.224044448537022097264602535574e-3*r)*r)/
 80 |     (0.111713747927937668539901657944e1 +
 81 |      (-0.515658515729031149329237816945e0 +
 82 |       (0.260656620398645407524064091208e-1 -
 83 |        0.232371494088563558304549252913e-3*r)*r)*r);
 84 | 
 85 |   /* Reconstruct tan(x) in the transformed case. */
 86 | 
 87 |   if (transform)
 88 |     {
 89 |       double t;
 90 |       t = t1 + t2;
 91 |       if (recip)
 92 |          return transform*(2*t/(t-1) - 1.0);
 93 |       else
 94 |          return transform*(1.0 - 2*t/(1+t));
 95 |     }
 96 | 
 97 |   if (recip)
 98 |     {
 99 |       /* Compute -1.0/(t1 + t2) accurately */
100 |       double trec, trec_top, z1, z2, t;
101 |       unsigned long u;
102 |       t = t1 + t2;
103 |       GET_BITS_DP64(t, u);
104 |       u &= 0xffffffff00000000;
105 |       PUT_BITS_DP64(u, z1);
106 |       z2 = t2 - (z1 - t1);
107 |       trec = -1.0 / t;
108 |       GET_BITS_DP64(trec, u);
109 |       u &= 0xffffffff00000000;
110 |       PUT_BITS_DP64(u, trec_top);
111 |       return trec_top + trec * ((1.0 + trec_top * z1) + trec_top * z2);
112 | 
113 |     }
114 |   else
115 |     return t1 + t2;
116 | }
117 | 
118 | #pragma function(tan)
119 | 
120 | double tan(double x)
121 | {
122 |   double r, rr;
123 |   int region, xneg;
124 | 
125 |   unsigned long ux, ax;
126 |   GET_BITS_DP64(x, ux);
127 |   ax = (ux & ~SIGNBIT_DP64);
128 |   if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
129 |     {
130 |       if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
131 |         {
132 |           if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */
133 | 	    {
134 | 	      if (ax == 0x0000000000000000) return x;
135 |               else return val_with_flags(x, AMD_F_INEXACT);
136 | 	    }
137 |           else
138 |             {
139 |               /* Using a temporary variable prevents 64-bit VC++ from
140 |                  rearranging
141 |                     x + x*x*x*0.333333333333333333;
142 |                  into
143 |                     x * (1 + x*x*0.333333333333333333);
144 |                  The latter results in an incorrectly rounded answer. */
145 |               double tmp;
146 |               tmp = x*x*x*0.333333333333333333;
147 |               return x + tmp;
148 |             }
149 |         }
150 |       else
151 |         return tan_piby4(x, 0.0, 0);
152 |     }
153 |   else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
154 |     {
155 |       /* x is either NaN or infinity */
156 |       if (ux & MANTBITS_DP64)
157 |         /* x is NaN */
158 |         return _handle_error("tan", OP_TAN, ux|0x0008000000000000, _DOMAIN, 0,
159 |                             EDOM, x, 0.0, 1);
160 |       else
161 |         /* x is infinity. Return a NaN */
162 |         return _handle_error("tan", OP_TAN, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID,
163 |                             EDOM, x, 0.0, 1);
164 |     }
165 |   xneg = (ax != ux);
166 | 
167 | 
168 |   if (xneg)
169 |     x = -x;
170 | 
171 |   if (x < 5.0e5)
172 |     {
173 |       /* For these size arguments we can just carefully subtract the
174 |          appropriate multiple of pi/2, using extra precision where
175 |          x is close to an exact multiple of pi/2 */
176 |       static const double
177 |         twobypi =  6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */
178 |         piby2_1  =  1.57079632673412561417e+00, /* 0x3ff921fb54400000 */
179 |         piby2_1tail =  6.07710050650619224932e-11, /* 0x3dd0b4611a626331 */
180 |         piby2_2  =  6.07710050630396597660e-11, /* 0x3dd0b4611a600000 */
181 |         piby2_2tail =  2.02226624879595063154e-21, /* 0x3ba3198a2e037073 */
182 |         piby2_3  =  2.02226624871116645580e-21, /* 0x3ba3198a2e000000 */
183 |         piby2_3tail =  8.47842766036889956997e-32; /* 0x397b839a252049c1 */
184 |       double t, rhead, rtail;
185 |       int npi2;
186 |       unsigned long uy, xexp, expdiff;
187 |       xexp  = ax >> EXPSHIFTBITS_DP64;
188 |       /* How many pi/2 is x a multiple of? */
189 |       if (ax <= 0x400f6a7a2955385e) /* 5pi/4 */
190 |         {
191 |           if (ax <= 0x4002d97c7f3321d2) /* 3pi/4 */
192 |             npi2 = 1;
193 |           else
194 |             npi2 = 2;
195 |         }
196 |       else if (ax <= 0x401c463abeccb2bb) /* 9pi/4 */
197 |         {
198 |           if (ax <= 0x4015fdbbe9bba775) /* 7pi/4 */
199 |             npi2 = 3;
200 |           else
201 |             npi2 = 4;
202 |         }
203 |       else
204 |         npi2  = (int)(x * twobypi + 0.5);
205 |       /* Subtract the multiple from x to get an extra-precision remainder */
206 |       rhead  = x - npi2 * piby2_1;
207 |       rtail  = npi2 * piby2_1tail;
208 |       GET_BITS_DP64(rhead, uy);
209 |       expdiff = xexp - ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
210 |       if (expdiff > 15)
211 |         {
212 |           /* The remainder is pretty small compared with x, which
213 |              implies that x is a near multiple of pi/2
214 |              (x matches the multiple to at least 15 bits) */
215 |           t  = rhead;
216 |           rtail  = npi2 * piby2_2;
217 |           rhead  = t - rtail;
218 |           rtail  = npi2 * piby2_2tail - ((t - rhead) - rtail);
219 |           if (expdiff > 48)
220 |             {
221 |               /* x matches a pi/2 multiple to at least 48 bits */
222 |               t  = rhead;
223 |               rtail  = npi2 * piby2_3;
224 |               rhead  = t - rtail;
225 |               rtail  = npi2 * piby2_3tail - ((t - rhead) - rtail);
226 |             }
227 |         }
228 |       r = rhead - rtail;
229 |       rr = (rhead - r) - rtail;
230 |       region = npi2 & 3;
231 |     }
232 |   else
233 |     {
234 |       /* Reduce x into range [-pi/4,pi/4] */
235 |       __remainder_piby2(x, &r, &rr, &region);
236 |     }
237 | 
238 |   if (xneg)
239 |     return -tan_piby4(r, rr, region & 1);
240 |   else
241 |     return tan_piby4(r, rr, region & 1);
242 | }
243 | 


--------------------------------------------------------------------------------
/tanf.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_REMAINDER_PIBY2F_INLINE
 31 | #define USE_VALF_WITH_FLAGS
 32 | #define USE_NANF_WITH_FLAGS
 33 | #define USE_HANDLE_ERRORF
 34 | #include "libm_inlines.h"
 35 | #undef USE_VALF_WITH_FLAGS
 36 | #undef USE_NANF_WITH_FLAGS
 37 | #undef USE_REMAINDER_PIBY2F_INLINE
 38 | #undef USE_HANDLE_ERRORF
 39 | 
 40 | #include "libm_errno.h"
 41 | 
 42 | // Disable "C4163: not available as intrinsic function" warning that older
 43 | // compilers may issue here.
 44 | #pragma warning(disable:4163)
 45 | #pragma function(tanf)
 46 | 
 47 | /* tan(x) approximation valid on the interval [-pi/4,pi/4].
 48 |    If recip is true return -1/tan(x) instead. */
 49 | static inline double tanf_piby4(double x, int recip)
 50 | {
 51 |   double r, t;
 52 | 
 53 |   /* Core Remez [1,2] approximation to tan(x) on the
 54 |      interval [0,pi/4]. */
 55 |   r = x*x;
 56 |   t = x + x*r*
 57 |     (0.385296071263995406715129e0 -
 58 |      0.172032480471481694693109e-1 * r) /
 59 |     (0.115588821434688393452299e+1 +
 60 |      (-0.51396505478854532132342e0 +
 61 |       0.1844239256901656082986661e-1 * r) * r);
 62 | 
 63 |   if (recip)
 64 |     return -1.0 / t;
 65 |   else
 66 |     return t;
 67 | }
 68 | 
 69 | 
 70 | float tanf(float x)
 71 | {
 72 |   double r, dx;
 73 |   int region, xneg;
 74 | 
 75 |   unsigned long ux, ax;
 76 | 
 77 |   dx = x;
 78 | 
 79 |   GET_BITS_DP64(dx, ux);
 80 |   ax = (ux & ~SIGNBIT_DP64);
 81 | 
 82 |   if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */
 83 |     {
 84 |       if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */
 85 |         {
 86 |           if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */
 87 |             {
 88 |               if (ax == 0x0000000000000000)
 89 |                 return x;
 90 |               else
 91 |                 return valf_with_flags(x, AMD_F_INEXACT);
 92 |             }
 93 |           else
 94 |             return (float)(dx + dx*dx*dx*0.333333333333333333);
 95 |         }
 96 |       else
 97 |         return (float)tanf_piby4(x, 0);
 98 |     }
 99 |   else if ((ux & EXPBITS_DP64) == EXPBITS_DP64)
100 |     {
101 |       /* x is either NaN or infinity */
102 |       if (ux & MANTBITS_DP64)
103 |         {
104 |           /* x is NaN */
105 |           unsigned int ufx;
106 |           GET_BITS_SP32(x, ufx);
107 |           return _handle_errorf("tanf", OP_TAN, ufx|0x00400000, _DOMAIN, 0,
108 |                                EDOM, x, 0.0F, 1);
109 |         }
110 |       else
111 |         {
112 |           /* x is infinity. Return a NaN */
113 |           return _handle_errorf("tanf", OP_TAN, INDEFBITPATT_SP32, _DOMAIN, AMD_F_INVALID,
114 |                                EDOM, x, 0.0F, 1);
115 |         }
116 |     }
117 | 
118 |   xneg = (int)(ux >> 63);
119 | 
120 |   if (xneg)
121 |     dx = -dx;
122 | 
123 |   if (dx < 5.0e5)
124 |     {
125 |       /* For these size arguments we can just carefully subtract the
126 |          appropriate multiple of pi/2, using extra precision where
127 |          dx is close to an exact multiple of pi/2 */
128 |       static const double
129 |         twobypi =  6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */
130 |         piby2_1  =  1.57079632673412561417e+00, /* 0x3ff921fb54400000 */
131 |         piby2_1tail =  6.07710050650619224932e-11, /* 0x3dd0b4611a626331 */
132 |         piby2_2  =  6.07710050630396597660e-11, /* 0x3dd0b4611a600000 */
133 |         piby2_2tail =  2.02226624879595063154e-21, /* 0x3ba3198a2e037073 */
134 |         piby2_3  =  2.02226624871116645580e-21, /* 0x3ba3198a2e000000 */
135 |         piby2_3tail =  8.47842766036889956997e-32; /* 0x397b839a252049c1 */
136 |       double t, rhead, rtail;
137 |       int npi2;
138 |       unsigned long uy, xexp, expdiff;
139 |       xexp  = ax >> EXPSHIFTBITS_DP64;
140 |       /* How many pi/2 is dx a multiple of? */
141 |       if (ax <= 0x400f6a7a2955385e) /* 5pi/4 */
142 |         {
143 |           if (ax <= 0x4002d97c7f3321d2) /* 3pi/4 */
144 |             npi2 = 1;
145 |           else
146 |             npi2 = 2;
147 |         }
148 |       else if (ax <= 0x401c463abeccb2bb) /* 9pi/4 */
149 |         {
150 |           if (ax <= 0x4015fdbbe9bba775) /* 7pi/4 */
151 |             npi2 = 3;
152 |           else
153 |             npi2 = 4;
154 |         }
155 |       else
156 |         npi2  = (int)(dx * twobypi + 0.5);
157 |       /* Subtract the multiple from dx to get an extra-precision remainder */
158 |       rhead  = dx - npi2 * piby2_1;
159 |       rtail  = npi2 * piby2_1tail;
160 |       GET_BITS_DP64(rhead, uy);
161 |       expdiff = xexp - ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64);
162 |       if (expdiff > 15)
163 |         {
164 |           /* The remainder is pretty small compared with dx, which
165 |              implies that dx is a near multiple of pi/2
166 |              (dx matches the multiple to at least 15 bits) */
167 |           t  = rhead;
168 |           rtail  = npi2 * piby2_2;
169 |           rhead  = t - rtail;
170 |           rtail  = npi2 * piby2_2tail - ((t - rhead) - rtail);
171 |           if (expdiff > 48)
172 |             {
173 |               /* dx matches a pi/2 multiple to at least 48 bits */
174 |               t  = rhead;
175 |               rtail  = npi2 * piby2_3;
176 |               rhead  = t - rtail;
177 |               rtail  = npi2 * piby2_3tail - ((t - rhead) - rtail);
178 |             }
179 |         }
180 |       r = rhead - rtail;
181 |       region = npi2 & 3;
182 |     }
183 |   else
184 |     {
185 |       /* Reduce x into range [-pi/4,pi/4] */
186 |       __remainder_piby2f_inline(ax, &r, &region);
187 |     }
188 | 
189 |   if (xneg)
190 |     return (float)-tanf_piby4(r, region & 1);
191 |   else
192 |     return (float)tanf_piby4(r, region & 1);
193 | }
194 | 


--------------------------------------------------------------------------------
/tanh.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_HANDLE_ERROR
 31 | #define USE_SPLITEXP
 32 | #define USE_SCALEDOUBLE_2
 33 | #define USE_VAL_WITH_FLAGS
 34 | #include "libm_inlines.h"
 35 | #undef USE_SPLITEXP
 36 | #undef USE_SCALEDOUBLE_2
 37 | #undef USE_VAL_WITH_FLAGS
 38 | #undef USE_HANDLE_ERROR
 39 | 
 40 | #include "libm_errno.h"
 41 | 
 42 | 
 43 | #pragma function(tanh)
 44 | double tanh(double x)
 45 | {
 46 |   /*
 47 |     The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
 48 |     to the following three formulae:
 49 |     1.  (exp(x) - exp(-x))/(exp(x) + exp(-x))
 50 |     2.  (1 - (2/(exp(2*x) + 1 )))
 51 |     3.  (exp(2*x) - 1)/(exp(2*x) + 1)
 52 |     but computationally, some formulae are better on some ranges.
 53 |   */
 54 |   static const double
 55 |     thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */
 56 |     log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */
 57 |     log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */
 58 |     large_threshold = 20.0; /* 0x4034000000000000 */
 59 | 
 60 |   unsigned long ux, aux, xneg;
 61 |   double y, z, p, z1, z2;
 62 |   int m;
 63 | 
 64 |   /* Special cases */
 65 | 
 66 |   GET_BITS_DP64(x, ux);
 67 |   aux = ux & ~SIGNBIT_DP64;
 68 |   if (aux < 0x3e30000000000000) /* |x| small enough that tanh(x) = x */
 69 |     {
 70 |       if (aux == 0)
 71 |         return x; /* with no inexact */
 72 |       else
 73 |         return val_with_flags(x, AMD_F_INEXACT);
 74 |     }
 75 |   else if (aux > 0x7ff0000000000000) /* |x| is NaN */
 76 |         return _handle_error("tanh", OP_TANH, ux|0x0008000000000000, _DOMAIN, 
 77 |                         0, EDOM, x, 0.0, 1);
 78 | //    return x + x;
 79 | 
 80 |   xneg = (aux != ux);
 81 | 
 82 |   y = x;
 83 |   if (xneg) y = -x;
 84 | 
 85 |   if (y > large_threshold)
 86 |     {
 87 |       /* If x is large then exp(-x) is negligible and
 88 |          formula 1 reduces to plus or minus 1.0 */
 89 |       z = 1.0;
 90 |     }
 91 |   else if (y <= 1.0)
 92 |     {
 93 |       double y2;
 94 |       y2 = y*y;
 95 |       if (y < 0.9)
 96 |         {
 97 |           /* Use a [3,3] Remez approximation on [0,0.9]. */
 98 |           z = y + y*y2*
 99 |             (-0.274030424656179760118928e0 +
100 |              (-0.176016349003044679402273e-1 +
101 |               (-0.200047621071909498730453e-3 -
102 |                0.142077926378834722618091e-7*y2)*y2)*y2)/
103 |             (0.822091273968539282568011e0 +
104 |              (0.381641414288328849317962e0 +
105 |               (0.201562166026937652780575e-1 +
106 |                0.2091140262529164482568557e-3*y2)*y2)*y2);
107 |         }
108 |       else
109 |         {
110 |           /* Use a [3,3] Remez approximation on [0.9,1]. */
111 |           z = y + y*y2*
112 |             (-0.227793870659088295252442e0 +
113 |              (-0.146173047288731678404066e-1 +
114 |               (-0.165597043903549960486816e-3 -
115 |                0.115475878996143396378318e-7*y2)*y2)*y2)/
116 |             (0.683381611977295894959554e0 +
117 |              (0.317204558977294374244770e0 +
118 |               (0.167358775461896562588695e-1 +
119 |                0.173076050126225961768710e-3*y2)*y2)*y2);
120 |         }
121 |     }
122 |   else
123 |     {
124 |       /* Compute p = exp(2*y) + 1. The code is basically inlined
125 |          from exp_amd. */
126 | 
127 |       splitexp(2*y, 1.0, thirtytwo_by_log2, log2_by_32_lead,
128 | 	       log2_by_32_tail, &m, &z1, &z2);
129 |       p = scaleDouble_2(z1 + z2, m) + 1.0;
130 | 
131 |       /* Now reconstruct tanh from p. */
132 |       z = (1.0 - 2.0/p);
133 |     }
134 | 
135 |   if (xneg) z = - z;
136 |   return z;
137 | }
138 | 


--------------------------------------------------------------------------------
/tanhf.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*******************************************************************************
  3 | MIT License
  4 | -----------
  5 | 
  6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this Software and associated documentaon files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | *******************************************************************************/
 26 | 
 27 | #include "libm.h"
 28 | #include "libm_util.h"
 29 | 
 30 | #define USE_HANDLE_ERRORF
 31 | #define USE_SPLITEXPF
 32 | #define USE_SCALEFLOAT_2
 33 | #define USE_VALF_WITH_FLAGS
 34 | #include "libm_inlines.h"
 35 | #undef USE_SPLITEXPF
 36 | #undef USE_SCALEFLOAT_2
 37 | #undef USE_VALF_WITH_FLAGS
 38 | #undef USE_HANDLE_ERRORF
 39 | 
 40 | #include "libm_errno.h"
 41 | 
 42 | // Disable "C4163: not available as intrinsic function" warning that older
 43 | // compilers may issue here.
 44 | #pragma warning(disable:4163)
 45 | #pragma function(tanhf)
 46 | 
 47 | float tanhf(float x)
 48 | {
 49 |   /*
 50 |     The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
 51 |     to the following three formulae:
 52 |     1.  (exp(x) - exp(-x))/(exp(x) + exp(-x))
 53 |     2.  (1 - (2/(exp(2*x) + 1 )))
 54 |     3.  (exp(2*x) - 1)/(exp(2*x) + 1)
 55 |     but computationally, some formulae are better on some ranges.
 56 |   */
 57 |   static const float
 58 |     thirtytwo_by_log2 =  4.6166240692e+01F, /* 0x4238aa3b */
 59 |     log2_by_32_lead =  2.1659851074e-02F, /* 0x3cb17000 */
 60 |     log2_by_32_tail =  9.9831822808e-07F, /* 0x3585fdf4 */
 61 |     large_threshold = 10.0F; /* 0x41200000 */
 62 | 
 63 |   unsigned int ux, aux;
 64 |   float y, z, p, z1, z2, xneg;
 65 |   int m;
 66 | 
 67 |   /* Special cases */
 68 | 
 69 |   GET_BITS_SP32(x, ux);
 70 |   aux = ux & ~SIGNBIT_SP32;
 71 |   if (aux < 0x39000000) /* |x| small enough that tanh(x) = x */
 72 |     {
 73 |       if (aux == 0)
 74 |         return x; /* with no inexact */
 75 |       else
 76 |         return valf_with_flags(x, AMD_F_INEXACT);
 77 |     }
 78 |   else if (aux > 0x7f800000) /* |x| is NaN */
 79 |   {
 80 |       unsigned int ufx;
 81 |       GET_BITS_SP32(x, ufx);
 82 |       return _handle_errorf("tanhf", OP_TANH, ufx|0x00400000, _DOMAIN, 0,
 83 |                            EDOM, x, 0.0F, 1);
 84 |   }
 85 | //    return x + x;
 86 | 
 87 |   xneg = 1.0F - 2.0F * (aux != ux);
 88 | 
 89 |   y = xneg * x;
 90 | 
 91 |   if (y > large_threshold)
 92 |     {
 93 |       /* If x is large then exp(-x) is negligible and
 94 |          formula 1 reduces to plus or minus 1.0 */
 95 |       z = 1.0F;
 96 |     }
 97 |   else if (y <= 1.0F)
 98 |     {
 99 |       float y2;
100 |       y2 = y*y;
101 | 
102 |       if (y < 0.9F)
103 |         {
104 |           /* Use a [2,1] Remez approximation on [0,0.9]. */
105 |           z = y + y*y2*
106 |             (-0.28192806108402678e0F +
107 |              (-0.14628356048797849e-2F +
108 |               0.4891631088530669873e-4F*y2)*y2)/
109 |             (0.845784192581041099e0F +
110 |              0.3427017942262751343e0F*y2);
111 |         }
112 |       else
113 |         {
114 |           /* Use a [2,1] Remez approximation on [0.9,1]. */
115 |           z = y + y*y2*
116 |             (-0.24069858695196524e0F +
117 |              (-0.12325644183611929e-2F +
118 |               0.3827534993599483396e-4F*y2)*y2)/
119 |             (0.72209738473684982e0F +
120 |              0.292529068698052819e0F*y2);
121 |         }
122 |     }
123 |   else
124 |     {
125 |       /* Compute p = exp(2*y) + 1. The code is basically inlined
126 |          from exp_amd. */
127 | 
128 |       splitexpf(2*y, 1.0F, thirtytwo_by_log2, log2_by_32_lead,
129 | 	       log2_by_32_tail, &m, &z1, &z2);
130 |       p = scaleFloat_2(z1 + z2, m) + 1.0F;
131 |       /* Now reconstruct tanh from p. */
132 |       z = (1.0F - 2.0F/p);
133 |     }
134 | 
135 |   return xneg * z;
136 | }
137 | 


--------------------------------------------------------------------------------
/two_to_jby64_head_tail_table.asm:
--------------------------------------------------------------------------------
  1 | ;;
  2 | ;
  3 | ; MIT License
  4 | ; -----------
  5 | ; 
  6 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | ; 
  8 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | ; of this Software and associated documentaon files (the "Software"), to deal
 10 | ; in the Software without restriction, including without limitation the rights
 11 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | ; copies of the Software, and to permit persons to whom the Software is
 13 | ; furnished to do so, subject to the following conditions:
 14 | ; 
 15 | ; The above copyright notice and this permission notice shall be included in
 16 | ; all copies or substantial portions of the Software.
 17 | ; 
 18 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | ; THE SOFTWARE.
 25 | ;
 26 | ;; Defines __two_to_jby64_head_table and __two_to_jby64_tail_table tables
 27 | ;; Used in exp and pow
 28 | ;;
 29 | 
 30 | .const
 31 | 
 32 | ALIGN 16
 33 | PUBLIC __two_to_jby64_head_table 
 34 | __two_to_jby64_head_table DQ 3ff0000000000000h
 35 |                           DQ 3ff02c9a30000000h
 36 |                           DQ 3ff059b0d0000000h
 37 |                           DQ 3ff0874510000000h
 38 |                           DQ 3ff0b55860000000h
 39 |                           DQ 3ff0e3ec30000000h
 40 |                           DQ 3ff11301d0000000h
 41 |                           DQ 3ff1429aa0000000h
 42 |                           DQ 3ff172b830000000h
 43 |                           DQ 3ff1a35be0000000h
 44 |                           DQ 3ff1d48730000000h
 45 |                           DQ 3ff2063b80000000h
 46 |                           DQ 3ff2387a60000000h
 47 |                           DQ 3ff26b4560000000h
 48 |                           DQ 3ff29e9df0000000h
 49 |                           DQ 3ff2d285a0000000h
 50 |                           DQ 3ff306fe00000000h
 51 |                           DQ 3ff33c08b0000000h
 52 |                           DQ 3ff371a730000000h
 53 |                           DQ 3ff3a7db30000000h
 54 |                           DQ 3ff3dea640000000h
 55 |                           DQ 3ff4160a20000000h
 56 |                           DQ 3ff44e0860000000h
 57 |                           DQ 3ff486a2b0000000h
 58 |                           DQ 3ff4bfdad0000000h
 59 |                           DQ 3ff4f9b270000000h
 60 |                           DQ 3ff5342b50000000h
 61 |                           DQ 3ff56f4730000000h
 62 |                           DQ 3ff5ab07d0000000h
 63 |                           DQ 3ff5e76f10000000h
 64 |                           DQ 3ff6247eb0000000h
 65 |                           DQ 3ff6623880000000h
 66 |                           DQ 3ff6a09e60000000h
 67 |                           DQ 3ff6dfb230000000h
 68 |                           DQ 3ff71f75e0000000h
 69 |                           DQ 3ff75feb50000000h
 70 |                           DQ 3ff7a11470000000h
 71 |                           DQ 3ff7e2f330000000h
 72 |                           DQ 3ff8258990000000h
 73 |                           DQ 3ff868d990000000h
 74 |                           DQ 3ff8ace540000000h
 75 |                           DQ 3ff8f1ae90000000h
 76 |                           DQ 3ff93737b0000000h
 77 |                           DQ 3ff97d8290000000h
 78 |                           DQ 3ff9c49180000000h
 79 |                           DQ 3ffa0c6670000000h
 80 |                           DQ 3ffa5503b0000000h
 81 |                           DQ 3ffa9e6b50000000h
 82 |                           DQ 3ffae89f90000000h
 83 |                           DQ 3ffb33a2b0000000h
 84 |                           DQ 3ffb7f76f0000000h
 85 |                           DQ 3ffbcc1e90000000h
 86 |                           DQ 3ffc199bd0000000h
 87 |                           DQ 3ffc67f120000000h
 88 |                           DQ 3ffcb720d0000000h
 89 |                           DQ 3ffd072d40000000h
 90 |                           DQ 3ffd5818d0000000h
 91 |                           DQ 3ffda9e600000000h
 92 |                           DQ 3ffdfc9730000000h
 93 |                           DQ 3ffe502ee0000000h
 94 |                           DQ 3ffea4afa0000000h
 95 |                           DQ 3ffefa1be0000000h
 96 |                           DQ 3fff507650000000h
 97 |                           DQ 3fffa7c180000000h
 98 | 
 99 | ALIGN 16
100 | PUBLIC __two_to_jby64_tail_table
101 | __two_to_jby64_tail_table DQ 0000000000000000h
102 |                           DQ 3e6cef00c1dcdef9h
103 |                           DQ 3e48ac2ba1d73e2ah
104 |                           DQ 3e60eb37901186beh
105 |                           DQ 3e69f3121ec53172h
106 |                           DQ 3e469e8d10103a17h
107 |                           DQ 3df25b50a4ebbf1ah
108 |                           DQ 3e6d525bbf668203h
109 |                           DQ 3e68faa2f5b9bef9h
110 |                           DQ 3e66df96ea796d31h
111 |                           DQ 3e368b9aa7805b80h
112 |                           DQ 3e60c519ac771dd6h
113 |                           DQ 3e6ceac470cd83f5h
114 |                           DQ 3e5789f37495e99ch
115 |                           DQ 3e547f7b84b09745h
116 |                           DQ 3e5b900c2d002475h
117 |                           DQ 3e64636e2a5bd1abh
118 |                           DQ 3e4320b7fa64e430h
119 |                           DQ 3e5ceaa72a9c5154h
120 |                           DQ 3e53967fdba86f24h
121 |                           DQ 3e682468446b6824h
122 |                           DQ 3e3f72e29f84325bh
123 |                           DQ 3e18624b40c4dbd0h
124 |                           DQ 3e5704f3404f068eh
125 |                           DQ 3e54d8a89c750e5eh
126 |                           DQ 3e5a74b29ab4cf62h
127 |                           DQ 3e5a753e077c2a0fh
128 |                           DQ 3e5ad49f699bb2c0h
129 |                           DQ 3e6a90a852b19260h
130 |                           DQ 3e56b48521ba6f93h
131 |                           DQ 3e0d2ac258f87d03h
132 |                           DQ 3e42a91124893ecfh
133 |                           DQ 3e59fcef32422cbeh
134 |                           DQ 3e68ca345de441c5h
135 |                           DQ 3e61d8bee7ba46e1h
136 |                           DQ 3e59099f22fdba6ah
137 |                           DQ 3e4f580c36bea881h
138 |                           DQ 3e5b3d398841740ah
139 |                           DQ 3e62999c25159f11h
140 |                           DQ 3e668925d901c83bh
141 |                           DQ 3e415506dadd3e2ah
142 |                           DQ 3e622aee6c57304eh
143 |                           DQ 3e29b8bc9e8a0387h
144 |                           DQ 3e6fbc9c9f173d24h
145 |                           DQ 3e451f8480e3e235h
146 |                           DQ 3e66bbcac96535b5h
147 |                           DQ 3e41f12ae45a1224h
148 |                           DQ 3e55e7f6fd0fac90h
149 |                           DQ 3e62b5a75abd0e69h
150 |                           DQ 3e609e2bf5ed7fa1h
151 |                           DQ 3e47daf237553d84h
152 |                           DQ 3e12f074891ee83dh
153 |                           DQ 3e6b0aa538444196h
154 |                           DQ 3e6cafa29694426fh
155 |                           DQ 3e69df20d22a0797h
156 |                           DQ 3e640f12f71a1e45h
157 |                           DQ 3e69f7490e4bb40bh
158 |                           DQ 3e4ed9942b84600dh
159 |                           DQ 3e4bdcdaf5cb4656h
160 |                           DQ 3e5e2cffd89cf44ch
161 |                           DQ 3e452486cc2c7b9dh
162 |                           DQ 3e6cc2b44eee3fa4h
163 |                           DQ 3e66dc8a80ce9f09h
164 |                           DQ 3e39e90d82e90a7eh
165 | END
166 | 


--------------------------------------------------------------------------------
/two_to_jby64_table.asm:
--------------------------------------------------------------------------------
  1 | ;;
  2 | ;
  3 | ; MIT License
  4 | ; -----------
  5 | ; 
  6 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc.
  7 | ; 
  8 | ; Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | ; of this Software and associated documentaon files (the "Software"), to deal
 10 | ; in the Software without restriction, including without limitation the rights
 11 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | ; copies of the Software, and to permit persons to whom the Software is
 13 | ; furnished to do so, subject to the following conditions:
 14 | ; 
 15 | ; The above copyright notice and this permission notice shall be included in
 16 | ; all copies or substantial portions of the Software.
 17 | ; 
 18 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | ; THE SOFTWARE.
 25 | ;
 26 | ;; Defines __two_to_jby64_table table
 27 | ;; Used by exp and expf
 28 | ;;
 29 | 
 30 | .const
 31 | 
 32 | ALIGN 16
 33 | PUBLIC __two_to_jby64_table 
 34 | __two_to_jby64_table DQ 3ff0000000000000h
 35 |                      DQ 3ff02c9a3e778061h
 36 |                      DQ 3ff059b0d3158574h
 37 |                      DQ 3ff0874518759bc8h
 38 |                      DQ 3ff0b5586cf9890fh
 39 |                      DQ 3ff0e3ec32d3d1a2h
 40 |                      DQ 3ff11301d0125b51h
 41 |                      DQ 3ff1429aaea92de0h
 42 |                      DQ 3ff172b83c7d517bh
 43 |                      DQ 3ff1a35beb6fcb75h
 44 |                      DQ 3ff1d4873168b9aah
 45 |                      DQ 3ff2063b88628cd6h
 46 |                      DQ 3ff2387a6e756238h
 47 |                      DQ 3ff26b4565e27cddh
 48 |                      DQ 3ff29e9df51fdee1h
 49 |                      DQ 3ff2d285a6e4030bh
 50 |                      DQ 3ff306fe0a31b715h
 51 |                      DQ 3ff33c08b26416ffh
 52 |                      DQ 3ff371a7373aa9cbh
 53 |                      DQ 3ff3a7db34e59ff7h
 54 |                      DQ 3ff3dea64c123422h
 55 |                      DQ 3ff4160a21f72e2ah
 56 |                      DQ 3ff44e086061892dh
 57 |                      DQ 3ff486a2b5c13cd0h
 58 |                      DQ 3ff4bfdad5362a27h
 59 |                      DQ 3ff4f9b2769d2ca7h
 60 |                      DQ 3ff5342b569d4f82h
 61 |                      DQ 3ff56f4736b527dah
 62 |                      DQ 3ff5ab07dd485429h
 63 |                      DQ 3ff5e76f15ad2148h
 64 |                      DQ 3ff6247eb03a5585h
 65 |                      DQ 3ff6623882552225h
 66 |                      DQ 3ff6a09e667f3bcdh
 67 |                      DQ 3ff6dfb23c651a2fh
 68 |                      DQ 3ff71f75e8ec5f74h
 69 |                      DQ 3ff75feb564267c9h
 70 |                      DQ 3ff7a11473eb0187h
 71 |                      DQ 3ff7e2f336cf4e62h
 72 |                      DQ 3ff82589994cce13h
 73 |                      DQ 3ff868d99b4492edh
 74 |                      DQ 3ff8ace5422aa0dbh
 75 |                      DQ 3ff8f1ae99157736h
 76 |                      DQ 3ff93737b0cdc5e5h
 77 |                      DQ 3ff97d829fde4e50h
 78 |                      DQ 3ff9c49182a3f090h
 79 |                      DQ 3ffa0c667b5de565h
 80 |                      DQ 3ffa5503b23e255dh
 81 |                      DQ 3ffa9e6b5579fdbfh
 82 |                      DQ 3ffae89f995ad3adh
 83 |                      DQ 3ffb33a2b84f15fbh
 84 |                      DQ 3ffb7f76f2fb5e47h
 85 |                      DQ 3ffbcc1e904bc1d2h
 86 |                      DQ 3ffc199bdd85529ch
 87 |                      DQ 3ffc67f12e57d14bh
 88 |                      DQ 3ffcb720dcef9069h
 89 |                      DQ 3ffd072d4a07897ch
 90 |                      DQ 3ffd5818dcfba487h
 91 |                      DQ 3ffda9e603db3285h
 92 |                      DQ 3ffdfc97337b9b5fh
 93 |                      DQ 3ffe502ee78b3ff6h
 94 |                      DQ 3ffea4afa2a490dah
 95 |                      DQ 3ffefa1bee615a27h
 96 |                      DQ 3fff50765b6e4540h
 97 |                      DQ 3fffa7c1819e90d8h
 98 | 
 99 | END
100 | 


--------------------------------------------------------------------------------