├── L2_by_pi_bits.asm ├── Lsincos_array.asm ├── Lsincosf_array.asm ├── _chgsign.c ├── _chgsignf.c ├── _copysign.c ├── _copysignf.c ├── _finite.c ├── _finitef.c ├── acos.c ├── acosf.c ├── asin.c ├── asinf.c ├── atan.c ├── atan2.c ├── atan2f.c ├── atanf.c ├── cabs.c ├── cabsf.c ├── ceil.c ├── ceilf.c ├── cos.asm ├── cosf.asm ├── cosh.c ├── coshf.c ├── exp.asm ├── exp2.c ├── exp_special.c ├── expf.asm ├── floor.c ├── floorf.c ├── fm.inc ├── fma3_available.c ├── fmod.asm ├── fmodf.asm ├── hypot.c ├── hypotf.c ├── libm.h ├── libm_errno.h ├── libm_inlines.h ├── libm_new.h ├── libm_util.h ├── log.asm ├── log10.asm ├── log10_128_lead_tail_table.asm ├── log10_256_lead_tail_table.asm ├── log_128_lead_tail_table.asm ├── log_256_lead_tail_table.asm ├── log_F_inv_dword_table.asm ├── log_F_inv_qword_table.asm ├── log_special.c ├── logb.c ├── logbf.c ├── logf.asm ├── modf.c ├── modff.c ├── pow.asm ├── pow_special.c ├── remainder.c ├── remainder_piby2.c ├── remainder_piby2_forAsm.asm ├── remainder_piby2_forFMA3.asm ├── remainder_piby2f.c ├── remainder_piby2f_forAsm.asm ├── remainder_piby2f_forC.asm ├── remainderf.c ├── simd.h ├── sin.asm ├── sincos_special.c ├── sinf.asm ├── sinh.c ├── sinhf.c ├── sqrt.c ├── sqrtf.c ├── tan.asm ├── tan.c ├── tanf.asm ├── tanf.c ├── tanh.c ├── tanhf.c ├── two_to_jby64_head_tail_table.asm └── two_to_jby64_table.asm /L2_by_pi_bits.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; MIT License 3 | ; ----------- 4 | ; 5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 6 | ; 7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 8 | ; of this Software and associated documentaon files (the "Software"), to deal 9 | ; in the Software without restriction, including without limitation the rights 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | ; copies of the Software, and to permit persons to whom the Software is 12 | ; furnished to do so, subject to the following conditions: 13 | ; 14 | ; The above copyright notice and this permission notice shall be included in 15 | ; all copies or substantial portions of the Software. 16 | ; 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | ; THE SOFTWARE. 24 | ; 25 | ;; 26 | ;; Defines __L_2_by_pi_bits array 27 | ;; Used in trigonometric argument reduction 28 | ;; 29 | 30 | .const 31 | 32 | ALIGN 16 33 | PUBLIC __L_2_by_pi_bits 34 | __L_2_by_pi_bits DB 224, 241, 27, 193, 12, 88, 33, 116 35 | DB 53, 126, 196, 126, 237, 175, 169, 75 36 | DB 74, 41, 222, 231, 28, 244, 236, 197 37 | DB 151, 175, 31, 235, 158, 212, 181, 168 38 | DB 127, 121, 154, 253, 24, 61, 221, 38 39 | DB 44, 159, 60, 251, 217, 180, 125, 180 40 | DB 41, 104, 45, 70, 188, 188, 63, 96 41 | DB 22, 120, 255, 95, 226, 127, 236, 160 42 | DB 228, 247, 46, 126, 17, 114, 210, 231 43 | DB 76, 13, 230, 88, 71, 230, 4, 249 44 | DB 125, 209, 154, 192, 113, 166, 19, 18 45 | DB 237, 186, 212, 215, 8, 162, 251, 156 46 | DB 166, 196, 114, 172, 119, 248, 115, 72 47 | DB 70, 39, 168, 187, 36, 25, 128, 75 48 | DB 55, 9, 233, 184, 145, 220, 134, 21 49 | DB 239, 122, 175, 142, 69, 249, 7, 65 50 | DB 14, 241, 100, 86, 138, 109, 3, 119 51 | DB 211, 212, 71, 95, 157, 240, 167, 84 52 | DB 16, 57, 185, 13, 230, 139, 2, 0 53 | DB 0, 0, 0, 0, 0, 0 54 | END 55 | -------------------------------------------------------------------------------- /Lsincos_array.asm: -------------------------------------------------------------------------------- 1 | ;; 2 | ; 3 | ; MIT License 4 | ; ----------- 5 | ; 6 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | ; 8 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 9 | ; of this Software and associated documentaon files (the "Software"), to deal 10 | ; in the Software without restriction, including without limitation the rights 11 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | ; copies of the Software, and to permit persons to whom the Software is 13 | ; furnished to do so, subject to the following conditions: 14 | ; 15 | ; The above copyright notice and this permission notice shall be included in 16 | ; all copies or substantial portions of the Software. 17 | ; 18 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | ; THE SOFTWARE. 25 | ; 26 | ;; Defines __Lcosarray and __Lsinarray arrays. 27 | ;; Used in sin.asm and cos.asm 28 | ;; 29 | 30 | .const 31 | 32 | ALIGN 16 33 | PUBLIC __Lcosarray 34 | __Lcosarray DQ 03fa5555555555555h ; 0.0416667 c1 35 | DQ 0 36 | DQ 0bf56c16c16c16967h ; -0.00138889 c2 37 | DQ 0 38 | DQ 03EFA01A019F4EC91h ; 2.48016e-005 c3 39 | DQ 0 40 | DQ 0bE927E4FA17F667Bh ; -2.75573e-007 c4 41 | DQ 0 42 | DQ 03E21EEB690382EECh ; 2.08761e-009 c5 43 | DQ 0 44 | DQ 0bDA907DB47258AA7h ; -1.13826e-011 c6 45 | DQ 0 46 | 47 | ALIGN 16 48 | PUBLIC __Lsinarray 49 | __Lsinarray DQ 0bfc5555555555555h ; -0.166667 s1 50 | DQ 0 51 | DQ 03f81111111110bb3h ; 0.00833333 s2 52 | DQ 0 53 | DQ 0bf2a01a019e83e5ch ; -0.000198413 s3 54 | DQ 0 55 | DQ 03ec71de3796cde01h ; 2.75573e-006 s4 56 | DQ 0 57 | DQ 0be5ae600b42fdfa7h ; -2.50511e-008 s5 58 | DQ 0 59 | DQ 03de5e0b2f9a43bb8h ; 1.59181e-010 s6 60 | DQ 0 61 | 62 | END 63 | -------------------------------------------------------------------------------- /Lsincosf_array.asm: -------------------------------------------------------------------------------- 1 | ;; 2 | ; 3 | ; MIT License 4 | ; ----------- 5 | ; 6 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | ; 8 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 9 | ; of this Software and associated documentaon files (the "Software"), to deal 10 | ; in the Software without restriction, including without limitation the rights 11 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | ; copies of the Software, and to permit persons to whom the Software is 13 | ; furnished to do so, subject to the following conditions: 14 | ; 15 | ; The above copyright notice and this permission notice shall be included in 16 | ; all copies or substantial portions of the Software. 17 | ; 18 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | ; THE SOFTWARE. 25 | ; 26 | ;; Defines __Lcosarray and __Lsinarray arrays. 27 | ;; Used in sin.asm and cos.asm 28 | ;; These coefficients are actually from Taylor series. 29 | ;; 30 | 31 | .const 32 | 33 | ALIGN 16 34 | PUBLIC __Lcosfarray 35 | __Lcosfarray DQ 0bfe0000000000000h ; -0.5 c0 36 | DQ 03fa5555555555555h ; 0.0416667 c1 37 | DQ 0bf56c16c16c16c16h ; -0.00138889 c2 38 | DQ 03EFA01A01A01A019h ; 2.48016e-005 c3 39 | DQ 0be927e4fb7789f5ch ; -2.75573e-007 c4 40 | 41 | ALIGN 16 42 | PUBLIC __Lsinfarray 43 | __Lsinfarray DQ 0bfc5555555555555h ; -0.166667 s1 44 | DQ 03f81111111111111h ; 0.00833333 s2 45 | DQ 0bf2a01a01a01a01ah ; -0.000198413 s3 46 | DQ 03ec71de3a556c734h ; 2.75573e-006 s4 47 | 48 | END 49 | -------------------------------------------------------------------------------- /_chgsign.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | double FN_PROTOTYPE(_chgsign)(double x) 31 | { 32 | /* Returns x with its sign reversed. 33 | NaNs are not considered special; their sign bits are handled 34 | the same as for any other number */ 35 | unsigned long u; 36 | GET_BITS_DP64(x, u); 37 | u ^= SIGNBIT_DP64; 38 | PUT_BITS_DP64(u, x); 39 | return x; 40 | } 41 | 42 | -------------------------------------------------------------------------------- /_chgsignf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | float FN_PROTOTYPE(_chgsignf)(float x) 31 | { 32 | /* Returns x with its sign reversed. 33 | NaNs are not considered special; their sign bits are handled 34 | the same as for any other number */ 35 | unsigned int u; 36 | GET_BITS_SP32(x, u); 37 | u ^= SIGNBIT_SP32; 38 | PUT_BITS_SP32(u, x); 39 | return x; 40 | } 41 | -------------------------------------------------------------------------------- /_copysign.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | /* Returns the absolute value of x with the sign of y. 31 | NaNs are not considered special; their sign bits are handled 32 | the same as for any other number. */ 33 | 34 | double FN_PROTOTYPE(_copysign)(double x, double y) 35 | { 36 | 37 | unsigned long ux, uy; 38 | GET_BITS_DP64(x, ux); 39 | GET_BITS_DP64(y, uy); 40 | if ((ux ^ uy) & SIGNBIT_DP64) 41 | PUT_BITS_DP64(ux ^ SIGNBIT_DP64, x); 42 | return x; 43 | 44 | } 45 | -------------------------------------------------------------------------------- /_copysignf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | /* Returns the absolute value of x with the sign of y. 31 | NaNs are not considered special; their sign bits are handled 32 | the same as for any other number. */ 33 | 34 | float FN_PROTOTYPE(_copysignf)(float x, float y) 35 | { 36 | unsigned int ux, uy; 37 | GET_BITS_SP32(x, ux); 38 | GET_BITS_SP32(y, uy); 39 | if ((ux ^ uy) & SIGNBIT_SP32) 40 | PUT_BITS_SP32(ux ^ SIGNBIT_SP32, x); 41 | return x; 42 | } 43 | -------------------------------------------------------------------------------- /_finite.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | /* Returns 0 if x is infinite or NaN, otherwise returns 1 */ 31 | 32 | int FN_PROTOTYPE(_finite)(double x) 33 | { 34 | 35 | 36 | unsigned long ux; 37 | GET_BITS_DP64(x, ux); 38 | return (int)(((ux & ~SIGNBIT_DP64) - PINFBITPATT_DP64) >> 63); 39 | } 40 | -------------------------------------------------------------------------------- /_finitef.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | /* Returns 0 if x is infinite or NaN, otherwise returns 1 */ 31 | 32 | int FN_PROTOTYPE(_finitef)(float x) 33 | { 34 | 35 | 36 | unsigned int ux; 37 | GET_BITS_SP32(x, ux); 38 | return (int)(((ux & ~SIGNBIT_SP32) - PINFBITPATT_SP32) >> 31); 39 | 40 | } 41 | -------------------------------------------------------------------------------- /acos.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_VAL_WITH_FLAGS 31 | #define USE_NAN_WITH_FLAGS 32 | #define USE_HANDLE_ERROR 33 | #include "libm_inlines.h" 34 | #undef USE_NAN_WITH_FLAGS 35 | #undef USE_VAL_WITH_FLAGS 36 | #undef USE_HANDLE_ERROR 37 | 38 | #include "libm_errno.h" 39 | 40 | 41 | #pragma function(acos) 42 | 43 | double FN_PROTOTYPE(acos)(double x) 44 | { 45 | /* Computes arccos(x). 46 | The argument is first reduced by noting that arccos(x) 47 | is invalid for abs(x) > 1. For denormal and small 48 | arguments arccos(x) = pi/2 to machine accuracy. 49 | Remaining argument ranges are handled as follows. 50 | For abs(x) <= 0.5 use 51 | arccos(x) = pi/2 - arcsin(x) 52 | = pi/2 - (x + x^3*R(x^2)) 53 | where R(x^2) is a rational minimax approximation to 54 | (arcsin(x) - x)/x^3. 55 | For abs(x) > 0.5 exploit the identity: 56 | arccos(x) = pi - 2*arcsin(sqrt(1-x)/2) 57 | together with the above rational approximation, and 58 | reconstruct the terms carefully. 59 | */ 60 | 61 | /* Some constants and split constants. */ 62 | 63 | static const double 64 | pi = 3.1415926535897933e+00, /* 0x400921fb54442d18 */ 65 | piby2 = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */ 66 | piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */ 67 | piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */ 68 | 69 | double u, y, s=0.0, r; 70 | int xexp, xnan, transform=0; 71 | 72 | unsigned long ux, aux, xneg; 73 | GET_BITS_DP64(x, ux); 74 | aux = ux & ~SIGNBIT_DP64; 75 | xneg = (ux & SIGNBIT_DP64); 76 | xnan = (aux > PINFBITPATT_DP64); 77 | xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; 78 | 79 | /* Special cases */ 80 | 81 | if (xnan) 82 | { 83 | return _handle_error("acos", OP_ACOS, ux|0x0008000000000000, _DOMAIN, 84 | 0, EDOM, x, 0.0, 1); 85 | } 86 | else if (xexp < -56) 87 | { /* y small enough that arccos(x) = pi/2 */ 88 | return val_with_flags(piby2, AMD_F_INEXACT); 89 | } 90 | else if (xexp >= 0) 91 | { /* abs(x) >= 1.0 */ 92 | if (x == 1.0) 93 | return 0.0; 94 | else if (x == -1.0) 95 | return val_with_flags(pi, AMD_F_INEXACT); 96 | else 97 | return _handle_error("acos", OP_ACOS, INDEFBITPATT_DP64, _DOMAIN, 98 | AMD_F_INVALID, EDOM, x, 0.0, 1); 99 | } 100 | 101 | if (xneg) y = -x; 102 | else y = x; 103 | 104 | transform = (xexp >= -1); /* abs(x) >= 0.5 */ 105 | 106 | if (transform) 107 | { /* Transform y into the range [0,0.5) */ 108 | r = 0.5*(1.0 - y); 109 | /* VC++ intrinsic call */ 110 | _mm_store_sd(&s, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r))); 111 | y = s; 112 | } 113 | else 114 | r = y*y; 115 | 116 | /* Use a rational approximation for [0.0, 0.5] */ 117 | 118 | u = r*(0.227485835556935010735943483075 + 119 | (-0.445017216867635649900123110649 + 120 | (0.275558175256937652532686256258 + 121 | (-0.0549989809235685841612020091328 + 122 | (0.00109242697235074662306043804220 + 123 | 0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/ 124 | (1.36491501334161032038194214209 + 125 | (-3.28431505720958658909889444194 + 126 | (2.76568859157270989520376345954 + 127 | (-0.943639137032492685763471240072 + 128 | 0.105869422087204370341222318533*r)*r)*r)*r); 129 | 130 | if (transform) 131 | { /* Reconstruct acos carefully in transformed region */ 132 | if (xneg) return pi - 2.0*(s+(y*u - piby2_tail)); 133 | else 134 | { 135 | double c, s1; 136 | unsigned long us; 137 | GET_BITS_DP64(s, us); 138 | PUT_BITS_DP64(0xffffffff00000000 & us, s1); 139 | c = (r-s1*s1)/(s+s1); 140 | return 2.0*s1 + (2.0*c+2.0*y*u); 141 | } 142 | } 143 | else 144 | return piby2_head - (x - (piby2_tail - x*u)); 145 | } 146 | -------------------------------------------------------------------------------- /acosf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_VALF_WITH_FLAGS 31 | #define USE_NANF_WITH_FLAGS 32 | #define USE_HANDLE_ERRORF 33 | #include "libm_inlines.h" 34 | #undef USE_NANF_WITH_FLAGS 35 | #undef USE_VALF_WITH_FLAGS 36 | #undef USE_HANDLE_ERRORF 37 | 38 | #include "libm_errno.h" 39 | 40 | // Disable "C4163: not available as intrinsic function" warning that older 41 | // compilers may issue here. 42 | #pragma warning(disable:4163) 43 | #pragma function(acosf) 44 | 45 | 46 | float FN_PROTOTYPE(acosf)(float x) 47 | { 48 | /* Computes arccos(x). 49 | The argument is first reduced by noting that arccos(x) 50 | is invalid for abs(x) > 1. For denormal and small 51 | arguments arccos(x) = pi/2 to machine accuracy. 52 | Remaining argument ranges are handled as follows. 53 | For abs(x) <= 0.5 use 54 | arccos(x) = pi/2 - arcsin(x) 55 | = pi/2 - (x + x^3*R(x^2)) 56 | where R(x^2) is a rational minimax approximation to 57 | (arcsin(x) - x)/x^3. 58 | For abs(x) > 0.5 exploit the identity: 59 | arccos(x) = pi - 2*arcsin(sqrt(1-x)/2) 60 | together with the above rational approximation, and 61 | reconstruct the terms carefully. 62 | */ 63 | 64 | /* Some constants and split constants. */ 65 | 66 | static const float 67 | piby2 = 1.5707963705e+00F; /* 0x3fc90fdb */ 68 | static const double 69 | pi = 3.1415926535897933e+00, /* 0x400921fb54442d18 */ 70 | piby2_head = 1.5707963267948965580e+00, /* 0x3ff921fb54442d18 */ 71 | piby2_tail = 6.12323399573676603587e-17; /* 0x3c91a62633145c07 */ 72 | 73 | float u, y, s = 0.0F, r; 74 | int xexp, xnan, transform = 0; 75 | 76 | unsigned int ux, aux, xneg; 77 | 78 | GET_BITS_SP32(x, ux); 79 | aux = ux & ~SIGNBIT_SP32; 80 | xneg = (ux & SIGNBIT_SP32); 81 | xnan = (aux > PINFBITPATT_SP32); 82 | xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; 83 | 84 | /* Special cases */ 85 | 86 | if (xnan) 87 | { 88 | return _handle_errorf("acosf", OP_ACOS, ux|0x00400000, _DOMAIN, 0, 89 | EDOM, x, 0.0F, 1); 90 | } 91 | else if (xexp < -26) 92 | /* y small enough that arccos(x) = pi/2 */ 93 | return valf_with_flags(piby2, AMD_F_INEXACT); 94 | else if (xexp >= 0) 95 | { /* abs(x) >= 1.0 */ 96 | if (x == 1.0F) 97 | return 0.0F; 98 | else if (x == -1.0F) 99 | return valf_with_flags((float)pi, AMD_F_INEXACT); 100 | else 101 | return _handle_errorf("acosf", OP_ACOS, INDEFBITPATT_SP32, _DOMAIN, 102 | AMD_F_INVALID, EDOM, x, 0.0F, 1); 103 | } 104 | 105 | if (xneg) y = -x; 106 | else y = x; 107 | 108 | transform = (xexp >= -1); /* abs(x) >= 0.5 */ 109 | 110 | if (transform) 111 | { /* Transform y into the range [0,0.5) */ 112 | r = 0.5F*(1.0F - y); 113 | /* VC++ intrinsic call */ 114 | _mm_store_ss(&s, _mm_sqrt_ss(_mm_load_ss(&r))); 115 | y = s; 116 | } 117 | else 118 | r = y*y; 119 | 120 | /* Use a rational approximation for [0.0, 0.5] */ 121 | 122 | u=r*(0.184161606965100694821398249421F + 123 | (-0.0565298683201845211985026327361F + 124 | (-0.0133819288943925804214011424456F - 125 | 0.00396137437848476485201154797087F*r)*r)*r)/ 126 | (1.10496961524520294485512696706F - 127 | 0.836411276854206731913362287293F*r); 128 | 129 | if (transform) 130 | { 131 | /* Reconstruct acos carefully in transformed region */ 132 | if (xneg) 133 | return (float)(pi - 2.0*(s+(y*u - piby2_tail))); 134 | else 135 | { 136 | float c, s1; 137 | unsigned int us; 138 | GET_BITS_SP32(s, us); 139 | PUT_BITS_SP32(0xffff0000 & us, s1); 140 | c = (r-s1*s1)/(s+s1); 141 | return 2.0F*s1 + (2.0F*c+2.0F*y*u); 142 | } 143 | } 144 | else 145 | return (float)(piby2_head - (x - (piby2_tail - x*u))); 146 | } 147 | -------------------------------------------------------------------------------- /asin.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_VAL_WITH_FLAGS 31 | #define USE_NAN_WITH_FLAGS 32 | #define USE_HANDLE_ERROR 33 | #include "libm_inlines.h" 34 | #undef USE_NAN_WITH_FLAGS 35 | #undef USE_VAL_WITH_FLAGS 36 | #undef USE_HANDLE_ERROR 37 | 38 | #include "libm_errno.h" 39 | 40 | #pragma function(asin) 41 | 42 | double FN_PROTOTYPE(asin)(double x) 43 | { 44 | /* Computes arcsin(x). 45 | The argument is first reduced by noting that arcsin(x) 46 | is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x). 47 | For denormal and small arguments arcsin(x) = x to machine 48 | accuracy. Remaining argument ranges are handled as follows. 49 | For abs(x) <= 0.5 use 50 | arcsin(x) = x + x^3*R(x^2) 51 | where R(x^2) is a rational minimax approximation to 52 | (arcsin(x) - x)/x^3. 53 | For abs(x) > 0.5 exploit the identity: 54 | arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2) 55 | together with the above rational approximation, and 56 | reconstruct the terms carefully. 57 | */ 58 | 59 | /* Some constants and split constants. */ 60 | 61 | static const double 62 | piby2_tail = 6.1232339957367660e-17, /* 0x3c91a62633145c07 */ 63 | hpiby2_head = 7.8539816339744831e-01, /* 0x3fe921fb54442d18 */ 64 | piby2 = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */ 65 | double u, v, y, s=0.0, r; 66 | int xexp, xnan, transform=0; 67 | 68 | unsigned long ux, aux, xneg; 69 | GET_BITS_DP64(x, ux); 70 | aux = ux & ~SIGNBIT_DP64; 71 | xneg = (ux & SIGNBIT_DP64); 72 | xnan = (aux > PINFBITPATT_DP64); 73 | xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; 74 | 75 | /* Special cases */ 76 | 77 | if (xnan) 78 | { 79 | return _handle_error("asin", OP_ASIN, ux|0x0008000000000000, _DOMAIN, 80 | 0, EDOM, x, 0.0, 1); 81 | } 82 | else if (xexp < -28) 83 | { /* y small enough that arcsin(x) = x */ 84 | return val_with_flags(x, AMD_F_INEXACT); 85 | } 86 | else if (xexp >= 0) 87 | { /* abs(x) >= 1.0 */ 88 | if (x == 1.0) 89 | return val_with_flags(piby2, AMD_F_INEXACT); 90 | else if (x == -1.0) 91 | return val_with_flags(-piby2, AMD_F_INEXACT); 92 | else 93 | return _handle_error("asin", OP_ASIN, INDEFBITPATT_DP64, _DOMAIN, 94 | AMD_F_INVALID, EDOM, x, 0.0, 1); 95 | } 96 | 97 | if (xneg) y = -x; 98 | else y = x; 99 | 100 | transform = (xexp >= -1); /* abs(x) >= 0.5 */ 101 | 102 | if (transform) 103 | { /* Transform y into the range [0,0.5) */ 104 | r = 0.5*(1.0 - y); 105 | /* VC++ intrinsic call */ 106 | _mm_store_sd(&s, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r))); 107 | y = s; 108 | } 109 | else 110 | r = y*y; 111 | 112 | /* Use a rational approximation for [0.0, 0.5] */ 113 | 114 | u = r*(0.227485835556935010735943483075 + 115 | (-0.445017216867635649900123110649 + 116 | (0.275558175256937652532686256258 + 117 | (-0.0549989809235685841612020091328 + 118 | (0.00109242697235074662306043804220 + 119 | 0.0000482901920344786991880522822991*r)*r)*r)*r)*r)/ 120 | (1.36491501334161032038194214209 + 121 | (-3.28431505720958658909889444194 + 122 | (2.76568859157270989520376345954 + 123 | (-0.943639137032492685763471240072 + 124 | 0.105869422087204370341222318533*r)*r)*r)*r); 125 | 126 | if (transform) 127 | { /* Reconstruct asin carefully in transformed region */ 128 | { 129 | double c, s1, p, q; 130 | unsigned long us; 131 | GET_BITS_DP64(s, us); 132 | PUT_BITS_DP64(0xffffffff00000000 & us, s1); 133 | c = (r-s1*s1)/(s+s1); 134 | p = 2.0*s*u - (piby2_tail-2.0*c); 135 | q = hpiby2_head - 2.0*s1; 136 | v = hpiby2_head - (p-q); 137 | } 138 | } 139 | else 140 | { 141 | /* Use a temporary variable to prevent VC++ rearranging 142 | y + y*u 143 | into 144 | y * (1 + u) 145 | and getting an incorrectly rounded result */ 146 | double tmp; 147 | tmp = y * u; 148 | v = y + tmp; 149 | } 150 | 151 | if (xneg) return -v; 152 | else return v; 153 | } 154 | -------------------------------------------------------------------------------- /asinf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_VALF_WITH_FLAGS 31 | #define USE_NANF_WITH_FLAGS 32 | #define USE_HANDLE_ERRORF 33 | #include "libm_inlines.h" 34 | #undef USE_NANF_WITH_FLAGS 35 | #undef USE_VALF_WITH_FLAGS 36 | #undef USE_HANDLE_ERRORF 37 | 38 | #include "libm_errno.h" 39 | 40 | // Disable "C4163: not available as intrinsic function" warning that older 41 | // compilers may issue here. 42 | #pragma warning(disable:4163) 43 | #pragma function(asinf) 44 | 45 | 46 | float FN_PROTOTYPE(asinf)(float x) 47 | { 48 | /* Computes arcsin(x). 49 | The argument is first reduced by noting that arcsin(x) 50 | is invalid for abs(x) > 1 and arcsin(-x) = -arcsin(x). 51 | For denormal and small arguments arcsin(x) = x to machine 52 | accuracy. Remaining argument ranges are handled as follows. 53 | For abs(x) <= 0.5 use 54 | arcsin(x) = x + x^3*R(x^2) 55 | where R(x^2) is a rational minimax approximation to 56 | (arcsin(x) - x)/x^3. 57 | For abs(x) > 0.5 exploit the identity: 58 | arcsin(x) = pi/2 - 2*arcsin(sqrt(1-x)/2) 59 | together with the above rational approximation, and 60 | reconstruct the terms carefully. 61 | */ 62 | 63 | /* Some constants and split constants. */ 64 | 65 | static const float 66 | piby2_tail = 7.5497894159e-08F, /* 0x33a22168 */ 67 | hpiby2_head = 7.8539812565e-01F, /* 0x3f490fda */ 68 | piby2 = 1.5707963705e+00F; /* 0x3fc90fdb */ 69 | float u, v, y, s = 0.0F, r; 70 | int xexp, xnan, transform = 0; 71 | 72 | unsigned int ux, aux, xneg; 73 | GET_BITS_SP32(x, ux); 74 | aux = ux & ~SIGNBIT_SP32; 75 | xneg = (ux & SIGNBIT_SP32); 76 | xnan = (aux > PINFBITPATT_SP32); 77 | xexp = (int)((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; 78 | 79 | /* Special cases */ 80 | 81 | if (xnan) 82 | { 83 | return _handle_errorf("asinf", OP_ASIN, ux|0x00400000, _DOMAIN, 0, 84 | EDOM, x, 0.0F, 1); 85 | } 86 | else if (xexp < -14) 87 | /* y small enough that arcsin(x) = x */ 88 | return valf_with_flags(x, AMD_F_INEXACT); 89 | else if (xexp >= 0) 90 | { 91 | /* abs(x) >= 1.0 */ 92 | if (x == 1.0F) 93 | return valf_with_flags(piby2, AMD_F_INEXACT); 94 | else if (x == -1.0F) 95 | return valf_with_flags(-piby2, AMD_F_INEXACT); 96 | else 97 | return _handle_errorf("asinf", OP_ASIN, INDEFBITPATT_SP32, _DOMAIN, 98 | AMD_F_INVALID, EDOM, x, 0.0F, 1); 99 | } 100 | 101 | if (xneg) y = -x; 102 | else y = x; 103 | 104 | transform = (xexp >= -1); /* abs(x) >= 0.5 */ 105 | 106 | if (transform) 107 | { /* Transform y into the range [0,0.5) */ 108 | r = 0.5F*(1.0F - y); 109 | /* VC++ intrinsic call */ 110 | _mm_store_ss(&s, _mm_sqrt_ss(_mm_load_ss(&r))); 111 | y = s; 112 | } 113 | else 114 | r = y*y; 115 | 116 | /* Use a rational approximation for [0.0, 0.5] */ 117 | 118 | u=r*(0.184161606965100694821398249421F + 119 | (-0.0565298683201845211985026327361F + 120 | (-0.0133819288943925804214011424456F - 121 | 0.00396137437848476485201154797087F*r)*r)*r)/ 122 | (1.10496961524520294485512696706F - 123 | 0.836411276854206731913362287293F*r); 124 | 125 | if (transform) 126 | { 127 | /* Reconstruct asin carefully in transformed region */ 128 | float c, s1, p, q; 129 | unsigned int us; 130 | GET_BITS_SP32(s, us); 131 | PUT_BITS_SP32(0xffff0000 & us, s1); 132 | c = (r-s1*s1)/(s+s1); 133 | p = 2.0F*s*u - (piby2_tail-2.0F*c); 134 | q = hpiby2_head - 2.0F*s1; 135 | v = hpiby2_head - (p-q); 136 | } 137 | else 138 | { 139 | /* Use a temporary variable to prevent VC++ rearranging 140 | y + y*u 141 | into 142 | y * (1 + u) 143 | and getting an incorrectly rounded result */ 144 | float tmp; 145 | tmp = y * u; 146 | v = y + tmp; 147 | } 148 | 149 | if (xneg) return -v; 150 | else return v; 151 | } 152 | -------------------------------------------------------------------------------- /atan.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_VAL_WITH_FLAGS 31 | #define USE_NAN_WITH_FLAGS 32 | #define USE_HANDLE_ERROR 33 | #include "libm_inlines.h" 34 | #undef USE_VAL_WITH_FLAGS 35 | #undef USE_NAN_WITH_FLAGS 36 | #undef USE_HANDLE_ERROR 37 | 38 | #include "libm_errno.h" 39 | 40 | #pragma function(atan) 41 | 42 | double FN_PROTOTYPE(atan)(double x) 43 | { 44 | 45 | /* Some constants and split constants. */ 46 | 47 | static double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */ 48 | double chi, clo, v, s, q, z; 49 | 50 | /* Find properties of argument x. */ 51 | 52 | unsigned long ux, aux, xneg; 53 | GET_BITS_DP64(x, ux); 54 | aux = ux & ~SIGNBIT_DP64; 55 | xneg = (ux != aux); 56 | 57 | if (xneg) v = -x; 58 | else v = x; 59 | 60 | /* Argument reduction to range [-7/16,7/16] */ 61 | 62 | if (aux > 0x4003800000000000) /* v > 39./16. */ 63 | { 64 | 65 | if (aux > PINFBITPATT_DP64) 66 | { 67 | /* x is NaN */ 68 | return _handle_error("atan", OP_ATAN, ux|0x0008000000000000, _DOMAIN, 0, 69 | EDOM, x, 0.0, 1); 70 | } 71 | else if (v > 0x4370000000000000) 72 | { /* abs(x) > 2^56 => arctan(1/x) is 73 | insignificant compared to piby2 */ 74 | if (xneg) 75 | return val_with_flags(-piby2, AMD_F_INEXACT); 76 | else 77 | return val_with_flags(piby2, AMD_F_INEXACT); 78 | } 79 | 80 | x = -1.0/v; 81 | /* (chi + clo) = arctan(infinity) */ 82 | chi = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */ 83 | clo = 6.12323399573676480327e-17; /* 0x3c91a62633145c06 */ 84 | } 85 | else if (aux > 0x3ff3000000000000) /* 39./16. > v > 19./16. */ 86 | { 87 | x = (v-1.5)/(1.0+1.5*v); 88 | /* (chi + clo) = arctan(1.5) */ 89 | chi = 9.82793723247329054082e-01; /* 0x3fef730bd281f69b */ 90 | clo = 1.39033110312309953701e-17; /* 0x3c7007887af0cbbc */ 91 | } 92 | else if (aux > 0x3fe6000000000000) /* 19./16. > v > 11./16. */ 93 | { 94 | x = (v-1.0)/(1.0+v); 95 | /* (chi + clo) = arctan(1.) */ 96 | chi = 7.85398163397448278999e-01; /* 0x3fe921fb54442d18 */ 97 | clo = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */ 98 | } 99 | else if (aux > 0x3fdc000000000000) /* 11./16. > v > 7./16. */ 100 | { 101 | x = (2.0*v-1.0)/(2.0+v); 102 | /* (chi + clo) = arctan(0.5) */ 103 | chi = 4.63647609000806093515e-01; /* 0x3fddac670561bb4f */ 104 | clo = 2.26987774529616809294e-17; /* 0x3c7a2b7f222f65e0 */ 105 | } 106 | else /* v < 7./16. */ 107 | { 108 | x = v; 109 | chi = 0.0; 110 | clo = 0.0; 111 | } 112 | 113 | /* Core approximation: Remez(4,4) on [-7/16,7/16] */ 114 | 115 | s = x*x; 116 | q = x*s* 117 | (0.268297920532545909e0 + 118 | (0.447677206805497472e0 + 119 | (0.220638780716667420e0 + 120 | (0.304455919504853031e-1 + 121 | 0.142316903342317766e-3*s)*s)*s)*s)/ 122 | (0.804893761597637733e0 + 123 | (0.182596787737507063e1 + 124 | (0.141254259931958921e1 + 125 | (0.424602594203847109e0 + 126 | 0.389525873944742195e-1*s)*s)*s)*s); 127 | 128 | z = chi - ((q - clo) - x); 129 | 130 | if (xneg) z = -z; 131 | return z; 132 | } 133 | -------------------------------------------------------------------------------- /atanf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_VALF_WITH_FLAGS 31 | #define USE_NAN_WITH_FLAGS 32 | #define USE_HANDLE_ERRORF 33 | #include "libm_inlines.h" 34 | #undef USE_VALF_WITH_FLAGS 35 | #undef USE_NAN_WITH_FLAGS 36 | #undef USE_HANDLE_ERRORF 37 | 38 | #include "libm_errno.h" 39 | 40 | // Disable "C4163: not available as intrinsic function" warning that older 41 | // compilers may issue here. 42 | #pragma warning(disable:4163) 43 | #pragma function(atanf) 44 | 45 | float FN_PROTOTYPE(atanf)(float fx) 46 | { 47 | 48 | /* Some constants and split constants. */ 49 | 50 | static double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */ 51 | 52 | double c, v, s, q, z; 53 | unsigned int xnan; 54 | 55 | double x = fx; 56 | 57 | /* Find properties of argument fx. */ 58 | 59 | unsigned long ux, aux, xneg; 60 | 61 | GET_BITS_DP64(x, ux); 62 | aux = ux & ~SIGNBIT_DP64; 63 | xneg = ux & SIGNBIT_DP64; 64 | 65 | v = x; 66 | if (xneg) v = -x; 67 | 68 | /* Argument reduction to range [-7/16,7/16] */ 69 | 70 | if (aux < 0x3fdc000000000000) /* v < 7./16. */ 71 | { 72 | x = v; 73 | c = 0.0; 74 | } 75 | else if (aux < 0x3fe6000000000000) /* v < 11./16. */ 76 | { 77 | x = (2.0*v-1.0)/(2.0+v); 78 | /* c = arctan(0.5) */ 79 | c = 4.63647609000806093515e-01; /* 0x3fddac670561bb4f */ 80 | } 81 | else if (aux < 0x3ff3000000000000) /* v < 19./16. */ 82 | { 83 | x = (v-1.0)/(1.0+v); 84 | /* c = arctan(1.) */ 85 | c = 7.85398163397448278999e-01; /* 0x3fe921fb54442d18 */ 86 | } 87 | else if (aux < 0x4003800000000000) /* v < 39./16. */ 88 | { 89 | x = (v-1.5)/(1.0+1.5*v); 90 | /* c = arctan(1.5) */ 91 | c = 9.82793723247329054082e-01; /* 0x3fef730bd281f69b */ 92 | } 93 | else 94 | { 95 | 96 | xnan = (aux > PINFBITPATT_DP64); 97 | 98 | if (xnan) 99 | { 100 | /* x is NaN */ 101 | unsigned int uhx; 102 | GET_BITS_SP32(fx, uhx); 103 | return _handle_errorf("atanf", OP_ATAN, uhx|0x00400000, _DOMAIN, 104 | 0, EDOM, fx, 0.0F, 1); 105 | } 106 | else if (v > 0x4c80000000000000) 107 | { /* abs(x) > 2^26 => arctan(1/x) is 108 | insignificant compared to piby2 */ 109 | if (xneg) 110 | return valf_with_flags((float)-piby2, AMD_F_INEXACT); 111 | else 112 | return valf_with_flags((float)piby2, AMD_F_INEXACT); 113 | } 114 | 115 | x = -1.0/v; 116 | /* c = arctan(infinity) */ 117 | c = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */ 118 | } 119 | 120 | /* Core approximation: Remez(2,2) on [-7/16,7/16] */ 121 | 122 | s = x*x; 123 | q = x*s* 124 | (0.296528598819239217902158651186e0 + 125 | (0.192324546402108583211697690500e0 + 126 | 0.470677934286149214138357545549e-2*s)*s)/ 127 | (0.889585796862432286486651434570e0 + 128 | (0.111072499995399550138837673349e1 + 129 | 0.299309699959659728404442796915e0*s)*s); 130 | 131 | z = c - (q - x); 132 | 133 | if (xneg) z = -z; 134 | return (float)z; 135 | } 136 | -------------------------------------------------------------------------------- /cabs.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | 29 | double __cdecl _cabs(COMPLEX z) 30 | { 31 | /* Returns the absolute value of a complex number z 32 | with real part a and complex part b. */ 33 | return _hypot(z.x, z.y); 34 | } 35 | -------------------------------------------------------------------------------- /cabsf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | 29 | float _hypotf(float,float); 30 | float _cabsf(COMPLEX z) 31 | { 32 | /* Returns the absolute value of a complex number z 33 | with real part a and complex part b. */ 34 | return _hypotf((float)z.x, (float)z.y); 35 | } 36 | -------------------------------------------------------------------------------- /ceil.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #include "libm_errno.h" 31 | #define USE_HANDLE_ERROR 32 | #include "libm_inlines.h" 33 | #undef USE_HANDLE_ERROR 34 | 35 | // Disable "C4163: not available as intrinsic function" warning that older 36 | // compilers may issue here. 37 | #pragma warning(disable:4163) 38 | #pragma function(ceil) 39 | 40 | double FN_PROTOTYPE(ceil)(double x) 41 | { 42 | double r; 43 | long rexp, xneg; 44 | unsigned long ux, ax, ur, mask; 45 | 46 | GET_BITS_DP64(x, ux); 47 | ax = ux & (~SIGNBIT_DP64); 48 | xneg = (ux != ax); 49 | 50 | if (ax >= 0x4340000000000000) 51 | { 52 | /* abs(x) is either NaN, infinity, or >= 2^53 */ 53 | if (ax > 0x7ff0000000000000) 54 | /* x is NaN */ 55 | return _handle_error("ceil", OP_CEIL, ux|0x0008000000000000, _DOMAIN, 0, 56 | EDOM, x, 0.0, 1); 57 | else 58 | return x; 59 | } 60 | else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */ 61 | { 62 | if (ax == 0x0000000000000000) 63 | /* x is +zero or -zero; return the same zero */ 64 | return x; 65 | else if (xneg) /* x < 0.0 */ 66 | { 67 | PUT_BITS_DP64(SIGNBIT_DP64, r); /* return -0.0 */ 68 | return r; 69 | } 70 | else 71 | return 1.0; 72 | } 73 | else 74 | { 75 | rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; 76 | /* Mask out the bits of r that we don't want */ 77 | mask = 1; 78 | mask = (mask << (EXPSHIFTBITS_DP64 - rexp)) - 1; 79 | ur = (ux & ~mask); 80 | PUT_BITS_DP64(ur, r); 81 | if (xneg || (ur == ux)) 82 | return r; 83 | else 84 | /* We threw some bits away and x was positive */ 85 | return r + 1.0; 86 | } 87 | 88 | } 89 | -------------------------------------------------------------------------------- /ceilf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #include "libm_errno.h" 31 | #define USE_HANDLE_ERRORF 32 | #include "libm_inlines.h" 33 | #undef USE_HANDLE_ERRORF 34 | 35 | // Disable "C4163: not available as intrinsic function" warning that older 36 | // compilers may issue here. 37 | #pragma warning(disable:4163) 38 | #pragma function(ceilf) 39 | 40 | float FN_PROTOTYPE(ceilf)(float x) 41 | { 42 | float r; 43 | int rexp, xneg; 44 | unsigned int ux, ax, ur, mask; 45 | 46 | GET_BITS_SP32(x, ux); 47 | ax = ux & (~SIGNBIT_SP32); 48 | xneg = (ux != ax); 49 | 50 | if (ax >= 0x4b800000) 51 | { 52 | /* abs(x) is either NaN, infinity, or >= 2^24 */ 53 | if (ax > 0x7f800000) 54 | /* x is NaN */ 55 | return _handle_errorf("ceilf", OP_CEIL, ux, _DOMAIN, 0, EDOM, x, 56 | 0.0F, 1); 57 | else 58 | return x; 59 | } 60 | else if (ax < 0x3f800000) /* abs(x) < 1.0 */ 61 | { 62 | if (ax == 0x00000000) 63 | /* x is +zero or -zero; return the same zero */ 64 | return x; 65 | else if (xneg) /* x < 0.0 */ 66 | { 67 | PUT_BITS_SP32(SIGNBIT_SP32, r); /* return -0.0 */ 68 | return r; 69 | } 70 | else 71 | return 1.0F; 72 | } 73 | else 74 | { 75 | rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; 76 | /* Mask out the bits of r that we don't want */ 77 | mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1; 78 | ur = (ux & ~mask); 79 | PUT_BITS_SP32(ur, r); 80 | 81 | if (xneg || (ux == ur)) return r; 82 | else 83 | /* We threw some bits away and x was positive */ 84 | return r + 1.0F; 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /exp2.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_SPLITEXP 31 | #define USE_SCALEDOUBLE_1 32 | #define USE_SCALEDOUBLE_2 33 | #define USE_ZERO_WITH_FLAGS 34 | #define USE_INFINITY_WITH_FLAGS 35 | #define USE_HANDLE_ERROR 36 | 37 | #include "libm_inlines.h" 38 | #undef USE_ZERO_WITH_FLAGS 39 | #undef USE_SPLITEXP 40 | #undef USE_SCALEDOUBLE_1 41 | #undef USE_SCALEDOUBLE_2 42 | #undef USE_INFINITY_WITH_FLAGS 43 | #undef USE_HANDLE_ERROR 44 | 45 | #include "libm_errno.h" 46 | 47 | /* exp2 is only provided for use by powf under Windows, so give 48 | it a leading underscore. */ 49 | double FN_PROTOTYPE(_exp2)(double x) 50 | { 51 | static const double 52 | max_exp2_arg = 1024.0, /* 0x4090000000000000 */ 53 | min_exp2_arg = -1074.0, /* 0xc090c80000000000 */ 54 | log2 = 6.931471805599453094178e-01, /* 0x3fe62e42fefa39ef */ 55 | log2_lead = 6.93147167563438415527E-01, /* 0x3fe62e42f8000000 */ 56 | log2_tail = 1.29965068938898869640E-08, /* 0x3e4be8e7bcd5e4f1 */ 57 | one_by_32_lead = 0.03125; 58 | 59 | double y, z1, z2, z, hx, tx, y1, y2; 60 | int m; 61 | unsigned long ux, ax; 62 | 63 | /* 64 | Computation of exp2(x). 65 | 66 | We compute the values m, z1, and z2 such that 67 | exp2(x) = 2**m * (z1 + z2), where exp2(x) is 2**x. 68 | 69 | Computations needed in order to obtain m, z1, and z2 70 | involve three steps. 71 | 72 | First, we reduce the argument x to the form 73 | x = n/32 + remainder, 74 | where n has the value of an integer and |remainder| <= 1/64. 75 | The value of n = x * 32 rounded to the nearest integer and 76 | the remainder = x - n/32. 77 | 78 | Second, we approximate exp2(r1 + r2) - 1 where r1 is the leading 79 | part of the remainder and r2 is the trailing part of the remainder. 80 | 81 | Third, we reconstruct exp2(x) so that 82 | exp2(x) = 2**m * (z1 + z2). 83 | */ 84 | 85 | 86 | GET_BITS_DP64(x, ux); 87 | ax = ux & (~SIGNBIT_DP64); 88 | 89 | if (ax >= 0x4090000000000000) /* abs(x) >= 1024.0 */ 90 | { 91 | if(ax >= 0x7ff0000000000000) 92 | { 93 | /* x is either NaN or infinity */ 94 | if (ux & MANTBITS_DP64) 95 | /* x is NaN */ 96 | return _handle_error("exp2", OP_EXP, ux|0x0008000000000000, _DOMAIN, 97 | 0, EDOM, x, 0.0, 1); 98 | else if (ux & SIGNBIT_DP64) 99 | /* x is negative infinity; return 0.0 with no flags. */ 100 | return 0.0; 101 | else 102 | /* x is positive infinity */ 103 | return x; 104 | } 105 | if (x > max_exp2_arg) 106 | /* Return +infinity with overflow flag */ 107 | return _handle_error("exp2", OP_EXP, PINFBITPATT_DP64, _OVERFLOW, 108 | AMD_F_OVERFLOW | AMD_F_INEXACT, ERANGE, x, 0.0, 1); 109 | else if (x < min_exp2_arg) 110 | /* x is negative. Return +zero with underflow and inexact flags */ 111 | return _handle_error("exp2", OP_EXP, 0, _UNDERFLOW, 112 | AMD_F_UNDERFLOW | AMD_F_INEXACT, ERANGE, x, 0.0, 1); 113 | } 114 | 115 | 116 | /* Handle small arguments separately */ 117 | if (ax < 0x3fb7154764ee6c2f) /* abs(x) < 1/(16*log2) */ 118 | { 119 | if (ax < 0x3c00000000000000) /* abs(x) < 2^(-63) */ 120 | return 1.0 + x; /* Raises inexact if x is non-zero */ 121 | else 122 | { 123 | /* Split x into hx (head) and tx (tail). */ 124 | unsigned long u; 125 | hx = x; 126 | GET_BITS_DP64(hx, u); 127 | u &= 0xfffffffff8000000; 128 | PUT_BITS_DP64(u, hx); 129 | tx = x - hx; 130 | /* Carefully multiply x by log2. y1 is the most significant 131 | part of the result, and y2 the least significant part */ 132 | y1 = x * log2_lead; 133 | y2 = (((hx * log2_lead - y1) + hx * log2_tail) + 134 | tx * log2_lead) + tx * log2_tail; 135 | 136 | y = y1 + y2; 137 | z = (9.99564649780173690e-1 + 138 | (1.61251249355268050e-5 + 139 | (2.37986978239838493e-2 + 140 | 2.68724774856111190e-7*y)*y)*y)/ 141 | (9.99564649780173692e-1 + 142 | (-4.99766199765151309e-1 + 143 | (1.070876894098586184e-1 + 144 | (-1.189773642681502232e-2 + 145 | 5.9480622371960190616e-4*y)*y)*y)*y); 146 | z = ((z * y1) + (z * y2)) + 1.0; 147 | } 148 | } 149 | else 150 | { 151 | /* Find m, z1 and z2 such that exp2(x) = 2**m * (z1 + z2) */ 152 | 153 | splitexp(x, log2, 32.0, one_by_32_lead, 0.0, &m, &z1, &z2); 154 | 155 | /* Scale (z1 + z2) by 2.0**m */ 156 | if (m > EMIN_DP64 && m < EMAX_DP64) 157 | z = scaleDouble_1((z1+z2),m); 158 | else 159 | z = scaleDouble_2((z1+z2),m); 160 | } 161 | return z; 162 | } 163 | -------------------------------------------------------------------------------- /exp_special.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "libm_new.h" 34 | 35 | // y = expf(x) 36 | // y = exp(x) 37 | 38 | // these codes and the ones in the related .asm files have to match 39 | #define EXP_X_NAN 1 40 | #define EXP_Y_ZERO 2 41 | #define EXP_Y_INF 3 42 | 43 | float _expf_special(float x, float y, U32 code) 44 | { 45 | switch(code) 46 | { 47 | case EXP_X_NAN: 48 | { 49 | UT64 ym; ym.u64 = 0; ym.f32[0] = y; 50 | _handle_errorf("expf", _FpCodeExp, ym.u64, _DOMAIN, 0, EDOM, x, 0.0, 1); 51 | } 52 | break; 53 | 54 | case EXP_Y_ZERO: 55 | { 56 | UT64 ym; ym.u64 = 0; ym.f32[0] = y; 57 | _handle_errorf("expf", _FpCodeExp, ym.u64, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, 0.0, 1); 58 | } 59 | break; 60 | 61 | case EXP_Y_INF: 62 | { 63 | UT64 ym; ym.u64 = 0; ym.f32[0] = y; 64 | _handle_errorf("expf", _FpCodeExp, ym.u64, _OVERFLOW, AMD_F_INEXACT|AMD_F_OVERFLOW, ERANGE, x, 0.0, 1); 65 | 66 | } 67 | break; 68 | } 69 | 70 | return y; 71 | } 72 | 73 | double _exp_special(double x, double y, U32 code) 74 | { 75 | switch(code) 76 | { 77 | case EXP_X_NAN: 78 | { 79 | UT64 ym; ym.f64 = y; 80 | _handle_error("exp", _FpCodeExp, ym.u64, _DOMAIN, 0, EDOM, x, 0.0, 1); 81 | } 82 | break; 83 | 84 | case EXP_Y_ZERO: 85 | { 86 | UT64 ym; ym.f64 = y; 87 | _handle_error("exp", _FpCodeExp, ym.u64, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, 0.0, 1); 88 | } 89 | break; 90 | 91 | case EXP_Y_INF: 92 | { 93 | UT64 ym; ym.f64 = y; 94 | _handle_error("exp", _FpCodeExp, ym.u64, _OVERFLOW, AMD_F_INEXACT|AMD_F_OVERFLOW, ERANGE, x, 0.0, 1); 95 | } 96 | break; 97 | } 98 | 99 | 100 | return y; 101 | } 102 | -------------------------------------------------------------------------------- /expf.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; MIT License 3 | ; ----------- 4 | ; 5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 6 | ; 7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 8 | ; of this Software and associated documentaon files (the "Software"), to deal 9 | ; in the Software without restriction, including without limitation the rights 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | ; copies of the Software, and to permit persons to whom the Software is 12 | ; furnished to do so, subject to the following conditions: 13 | ; 14 | ; The above copyright notice and this permission notice shall be included in 15 | ; all copies or substantial portions of the Software. 16 | ; 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | ; THE SOFTWARE. 24 | ; 25 | ; expf.asm 26 | ; 27 | ; An implementation of the expf libm function. 28 | ; 29 | ; Prototype: 30 | ; 31 | ; float expf(float x); 32 | ; 33 | 34 | ; 35 | ; Algorithm: 36 | ; Similar to one presnted in exp.asm 37 | ; 38 | ; If FMA3 hardware is available, an FMA3 implementation of expf will be used. 39 | 40 | 41 | .const 42 | ALIGN 16 43 | 44 | __real_inf DD 7f800000h 45 | DD 0 46 | DQ 0 47 | 48 | __real_ninf DD 0ff800000h 49 | DD 0 50 | DQ 0 51 | 52 | __real_qnanbit DD 00400000h 53 | DD 0 54 | DQ 0 55 | 56 | __real_zero DD 00000000h 57 | DD 0 58 | DQ 0 59 | 60 | __real_p8192 DQ 40c0000000000000h 61 | DQ 0 62 | __real_m9600 DQ 0c0c2c00000000000h 63 | DQ 0 64 | 65 | __real_64_by_log2 DQ 40571547652b82feh ; 64/ln(2) 66 | DQ 0 67 | __real_log2_by_64 DQ 3f862e42fefa39efh ; log2_by_64 68 | DQ 0 69 | 70 | __real_1_by_6 DQ 3fc5555555555555h ; 1/6 71 | DQ 0 72 | __real_1_by_2 DQ 3fe0000000000000h ; 1/2 73 | DQ 0 74 | 75 | ; these codes and the ones in the corresponding .c file have to match 76 | __flag_x_nan DD 00000001 77 | __flag_y_zero DD 00000002 78 | __flag_y_inf DD 00000003 79 | 80 | EXTRN __two_to_jby64_table:QWORD 81 | EXTRN __use_fma3_lib:DWORD 82 | 83 | fname TEXTEQU 84 | fname_special TEXTEQU <_expf_special> 85 | 86 | ; define local variable storage offsets 87 | 88 | ; make room for fname_special to save things 89 | dummy_space EQU 020h 90 | stack_size EQU 038h 91 | 92 | include fm.inc 93 | 94 | ; external function 95 | EXTERN fname_special:PROC 96 | 97 | .code 98 | 99 | ALIGN 16 100 | PUBLIC fname 101 | fname PROC FRAME 102 | StackAllocate stack_size 103 | .ENDPROLOG 104 | 105 | ; Do this to avoid possible exceptions from a NaN argument. 106 | movd edx, xmm0 107 | btr edx,31 108 | cmp edx, DWORD PTR __real_inf 109 | jge Lexpf_x_is_inf_or_nan 110 | 111 | cmp DWORD PTR __use_fma3_lib, 0 112 | jne Lexpf_fma3 113 | 114 | Lexpf_sse2: 115 | 116 | cvtss2sd xmm0, xmm0 117 | 118 | ; x * (64/ln(2)) 119 | movsd xmm3, QWORD PTR __real_64_by_log2 120 | mulsd xmm3, xmm0 121 | 122 | ; x <= 128*ln(2), ( x * (64/ln(2)) ) <= 64*128 123 | ; x > -150*ln(2), ( x * (64/ln(2)) ) > 64*(-150) 124 | comisd xmm3, QWORD PTR __real_p8192 125 | jae Lexpf_y_is_inf 126 | 127 | comisd xmm3, QWORD PTR __real_m9600 128 | jb Lexpf_y_is_zero 129 | 130 | ; n = int( x * (64/ln(2)) ) 131 | cvtpd2dq xmm4, xmm3 132 | lea r10, __two_to_jby64_table 133 | cvtdq2pd xmm1, xmm4 134 | 135 | ; r = x - n * ln(2)/64 136 | movsd xmm2, QWORD PTR __real_log2_by_64 137 | mulsd xmm2, xmm1 138 | movd ecx, xmm4 139 | mov rax, 3fh 140 | and eax, ecx 141 | subsd xmm0, xmm2 142 | movsd xmm1, xmm0 143 | 144 | ; m = (n - j) / 64 145 | sub ecx, eax 146 | sar ecx, 6 147 | 148 | ; q 149 | movsd xmm3, QWORD PTR __real_1_by_6 150 | mulsd xmm3, xmm0 151 | mulsd xmm0, xmm0 152 | addsd xmm3, QWORD PTR __real_1_by_2 153 | mulsd xmm0, xmm3 154 | addsd xmm0, xmm1 155 | 156 | add rcx, 1023 157 | shl rcx, 52 158 | 159 | ; (f)*(1+q) 160 | movsd xmm2, QWORD PTR [r10+rax*8] 161 | mulsd xmm0, xmm2 162 | addsd xmm0, xmm2 163 | 164 | movd xmm1, rcx 165 | mulsd xmm0, xmm1 166 | cvtsd2ss xmm0, xmm0 167 | 168 | Lexpf_final_check: 169 | StackDeallocate stack_size 170 | ret 171 | 172 | ALIGN 16 173 | Lexpf_y_is_zero: 174 | 175 | movss xmm1, DWORD PTR __real_zero 176 | movd xmm0, edx 177 | mov r8d, DWORD PTR __flag_y_zero 178 | 179 | call fname_special 180 | jmp Lexpf_finish 181 | 182 | ALIGN 16 183 | Lexpf_y_is_inf: 184 | 185 | movss xmm1, DWORD PTR __real_inf 186 | movd xmm0, edx 187 | mov r8d, DWORD PTR __flag_y_inf 188 | 189 | call fname_special 190 | jmp Lexpf_finish 191 | 192 | ALIGN 16 193 | Lexpf_x_is_inf_or_nan: 194 | 195 | cmp edx, DWORD PTR __real_inf 196 | je Lexpf_finish 197 | 198 | cmp edx, DWORD PTR __real_ninf 199 | je Lexpf_process_zero 200 | 201 | or edx, DWORD PTR __real_qnanbit 202 | movd xmm1, edx 203 | mov r8d, DWORD PTR __flag_x_nan 204 | call fname_special 205 | jmp Lexpf_finish 206 | 207 | ALIGN 16 208 | Lexpf_process_zero: 209 | movss xmm0, DWORD PTR __real_zero 210 | jmp Lexpf_final_check 211 | 212 | ALIGN 16 213 | Lexpf_finish: 214 | StackDeallocate stack_size 215 | ret 216 | 217 | 218 | ALIGN 16 219 | Lexpf_fma3: 220 | 221 | vcvtss2sd xmm0, xmm0, xmm0 222 | 223 | ; x * (64/ln(2)) 224 | vmulsd xmm3, xmm0, QWORD PTR __real_64_by_log2 225 | 226 | ; x <= 128*ln(2), ( x * (64/ln(2)) ) <= 64*128 227 | ; x > -150*ln(2), ( x * (64/ln(2)) ) > 64*(-150) 228 | vcomisd xmm3, QWORD PTR __real_p8192 229 | jae Lexpf_fma3_y_is_inf 230 | 231 | vucomisd xmm3, QWORD PTR __real_m9600 232 | jb Lexpf_fma3_y_is_zero 233 | 234 | ; n = int( x * (64/ln(2)) ) 235 | vcvtpd2dq xmm4, xmm3 236 | lea r10, __two_to_jby64_table 237 | vcvtdq2pd xmm1, xmm4 238 | 239 | ; r = x - n * ln(2)/64 240 | vfnmadd231sd xmm0, xmm1, QWORD PTR __real_log2_by_64 241 | vmovd ecx, xmm4 242 | mov rax, 3fh 243 | and eax, ecx 244 | vmovapd xmm1, xmm0 ; xmm1 <-- copy of r 245 | 246 | ; m = (n - j) / 64 247 | sub ecx, eax 248 | sar ecx, 6 249 | 250 | ; q 251 | vmovsd xmm3, QWORD PTR __real_1_by_6 252 | vmulsd xmm0, xmm0, xmm0 ; xmm0 <-- r^2 253 | vfmadd213sd xmm3, xmm1, QWORD PTR __real_1_by_2 ; xmm3 <-- r/6 + 1/2 254 | vfmadd213sd xmm0, xmm3, xmm1 ; xmm0 <-- q = r^2*(r/6 + 1/2) + r 255 | 256 | add rcx, 1023 257 | shl rcx, 52 258 | 259 | ; (f)*(1+q) 260 | vmovsd xmm2, QWORD PTR [r10+rax*8] 261 | vfmadd213sd xmm0, xmm2, xmm2 262 | 263 | vmovq xmm2,rcx 264 | vmulsd xmm0, xmm0, xmm2 265 | vcvtsd2ss xmm0, xmm0, xmm0 266 | 267 | Lexpf_fma3_final_check: 268 | StackDeallocate stack_size 269 | ret 270 | 271 | ALIGN 16 272 | Lexpf_fma3_y_is_zero: 273 | 274 | vmovss xmm1, DWORD PTR __real_zero 275 | vmovd xmm0, edx 276 | mov r8d, DWORD PTR __flag_y_zero 277 | 278 | call fname_special 279 | jmp Lexpf_fma3_finish 280 | 281 | ALIGN 16 282 | Lexpf_fma3_y_is_inf: 283 | 284 | vmovss xmm1, DWORD PTR __real_inf 285 | vmovd xmm0, edx 286 | mov r8d, DWORD PTR __flag_y_inf 287 | 288 | call fname_special 289 | jmp Lexpf_fma3_finish 290 | 291 | ALIGN 16 292 | Lexpf_fma3_process_zero: 293 | vmovss xmm0, DWORD PTR __real_zero 294 | jmp Lexpf_fma3_final_check 295 | 296 | ALIGN 16 297 | Lexpf_fma3_finish: 298 | StackDeallocate stack_size 299 | ret 300 | 301 | fname endp 302 | 303 | END 304 | -------------------------------------------------------------------------------- /floor.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #include "libm_errno.h" 31 | #define USE_HANDLE_ERROR 32 | #include "libm_inlines.h" 33 | #undef USE_HANDLE_ERROR 34 | 35 | #pragma function(floor) 36 | 37 | double FN_PROTOTYPE(floor)(double x) 38 | { 39 | double r; 40 | long rexp, xneg; 41 | 42 | 43 | unsigned long ux, ax, ur, mask; 44 | 45 | GET_BITS_DP64(x, ux); 46 | ax = ux & (~SIGNBIT_DP64); 47 | xneg = (ux != ax); 48 | 49 | if (ax >= 0x4340000000000000) 50 | { 51 | /* abs(x) is either NaN, infinity, or >= 2^53 */ 52 | if (ax > 0x7ff0000000000000) 53 | /* x is NaN */ 54 | return _handle_error("floor", OP_FLOOR, ux|0x0008000000000000, _DOMAIN, 55 | 0, EDOM, x, 0.0, 1); 56 | else 57 | return x; 58 | } 59 | else if (ax < 0x3ff0000000000000) /* abs(x) < 1.0 */ 60 | { 61 | if (ax == 0x0000000000000000) 62 | /* x is +zero or -zero; return the same zero */ 63 | return x; 64 | else if (xneg) /* x < 0.0 */ 65 | return -1.0; 66 | else 67 | return 0.0; 68 | } 69 | else 70 | { 71 | r = x; 72 | rexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; 73 | /* Mask out the bits of r that we don't want */ 74 | mask = 1; 75 | mask = (mask << (EXPSHIFTBITS_DP64 - rexp)) - 1; 76 | ur = (ux & ~mask); 77 | PUT_BITS_DP64(ur, r); 78 | if (xneg && (ur != ux)) 79 | /* We threw some bits away and x was negative */ 80 | return r - 1.0; 81 | else 82 | return r; 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /floorf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #include "libm_errno.h" 31 | #define USE_HANDLE_ERRORF 32 | #include "libm_inlines.h" 33 | #undef USE_HANDLE_ERRORF 34 | 35 | // Disable "C4163: not available as intrinsic function" warning that older 36 | // compilers may issue here. 37 | #pragma warning(disable:4163) 38 | #pragma function(floorf) 39 | 40 | float FN_PROTOTYPE(floorf)(float x) 41 | { 42 | float r; 43 | int rexp, xneg; 44 | unsigned int ux, ax, ur, mask; 45 | 46 | GET_BITS_SP32(x, ux); 47 | ax = ux & (~SIGNBIT_SP32); 48 | xneg = (ux != ax); 49 | 50 | if (ax >= 0x4b800000) 51 | { 52 | /* abs(x) is either NaN, infinity, or >= 2^24 */ 53 | if (ax > 0x7f800000) 54 | /* x is NaN */ 55 | return _handle_errorf("floorf", OP_FLOOR, ux|0x00400000, _DOMAIN, 56 | 0, EDOM, x, 0.0F, 1); 57 | else 58 | return x; 59 | } 60 | else if (ax < 0x3f800000) /* abs(x) < 1.0 */ 61 | { 62 | if (ax == 0x00000000) 63 | /* x is +zero or -zero; return the same zero */ 64 | return x; 65 | else if (xneg) /* x < 0.0 */ 66 | return -1.0F; 67 | else 68 | return 0.0F; 69 | } 70 | else 71 | { 72 | rexp = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; 73 | /* Mask out the bits of r that we don't want */ 74 | mask = (1 << (EXPSHIFTBITS_SP32 - rexp)) - 1; 75 | ur = (ux & ~mask); 76 | PUT_BITS_SP32(ur, r); 77 | if (xneg && (ux != ur)) 78 | /* We threw some bits away and x was negative */ 79 | return r - 1.0F; 80 | else 81 | return r; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /fm.inc: -------------------------------------------------------------------------------- 1 | StackAllocate MACRO size 2 | if size ne 0 3 | sub rsp, size 4 | .ALLOCSTACK size 5 | endif 6 | ENDM 7 | 8 | StackDeallocate MACRO size 9 | if size ne 0 10 | add rsp, size 11 | endif 12 | ENDM 13 | 14 | SaveReg MACRO reg64, offset 15 | mov QWORD PTR [rsp+offset], reg64 16 | .SAVEREG reg64, offset 17 | ENDM 18 | 19 | RestoreReg MACRO reg64, offset 20 | mov reg64, QWORD PTR [rsp+offset] 21 | ENDM 22 | 23 | SaveXmm MACRO xmmreg, offset 24 | movdqa XMMWORD PTR [offset+rsp], xmmreg 25 | .SAVEXMM128 xmmreg, offset 26 | ENDM 27 | 28 | RestoreXmm MACRO xmmreg, offset 29 | movdqa xmmreg, XMMWORD PTR [offset+rsp] 30 | ENDM 31 | 32 | AVXSaveXmm MACRO xmmreg, offset 33 | vmovdqa XMMWORD PTR [offset+rsp], xmmreg 34 | .SAVEXMM128 xmmreg, offset 35 | ENDM 36 | 37 | AVXRestoreXmm MACRO xmmreg, offset 38 | vmovdqa xmmreg, XMMWORD PTR [offset+rsp] 39 | ENDM 40 | -------------------------------------------------------------------------------- /fma3_available.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #ifdef TEST_STANDALONE 28 | #include 29 | #pragma section (".CRT$XIC",long,read) 30 | typedef void (__cdecl *_PIFV)(void); 31 | #else 32 | #include 33 | #include 34 | #include 35 | #include 36 | #endif 37 | 38 | #define _CRTALLOC(x) __declspec(allocate(x)) 39 | 40 | int __fma3_is_available = 0; 41 | int __use_fma3_lib = 0; 42 | 43 | 44 | int __cdecl _set_FMA3_enable(int flag) 45 | { 46 | if (__fma3_is_available) __use_fma3_lib = flag; 47 | return __use_fma3_lib; 48 | } 49 | 50 | int __fma3_lib_init(void); 51 | 52 | _CRTALLOC(".CRT$XIC") static _PIFV init_fma3 = __fma3_lib_init; 53 | 54 | int __fma3_lib_init(void) 55 | { 56 | int CPUID[4]; // CPUID[2] is ECX; 57 | 58 | __fma3_is_available = 0; 59 | __cpuid(CPUID, 1); 60 | if (CPUID[2] & (1 << 12)) { 61 | __fma3_is_available = 1; 62 | } 63 | 64 | __use_fma3_lib = __fma3_is_available; 65 | return 0; 66 | } 67 | -------------------------------------------------------------------------------- /fmod.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; MIT License 3 | ; ----------- 4 | ; 5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 6 | ; 7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 8 | ; of this Software and associated documentaon files (the "Software"), to deal 9 | ; in the Software without restriction, including without limitation the rights 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | ; copies of the Software, and to permit persons to whom the Software is 12 | ; furnished to do so, subject to the following conditions: 13 | ; 14 | ; The above copyright notice and this permission notice shall be included in 15 | ; all copies or substantial portions of the Software. 16 | ; 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | ; THE SOFTWARE. 24 | ; 25 | ; $Workfile: fmod.asm $ 26 | ; $Revision: 4 $ 27 | ; $Date: 9/15/04 16:43 $ 28 | ; 29 | ; 30 | ; This is an optimized version of fmod. 31 | ; 32 | ; Define _CRTBLD_C9X to make it compliant with C90 and on. 33 | ; 34 | ; If building the OS CRTL (_NTSUBSET_ defined), abort. 35 | 36 | ; .ERRDEF _NTSUBSET_, "x87 code cannot be used in kernel mode" 37 | 38 | DOMAIN EQU 1 ; _DOMAIN 39 | EDOM EQU 33 ; EDOM 40 | FPCODEFMOD EQU 22 ; _FpCodeFmod 41 | INVALID EQU 8 ; AMD_F_INVALID 42 | 43 | FPIND EQU 0fff8000000000000h ; indefinite 44 | FPSNAN EQU 07ff7ffffffffffffh ; SNAN 45 | FPQNAN EQU 07fffffffffffffffh ; QNAN 46 | 47 | X87SW RECORD X87SW_B: 1, 48 | X87SW_C3: 1, 49 | X87SW_TOP: 3, 50 | X87SW_C: 3, 51 | X87SW_ES: 1, 52 | X87SW_SF: 1, 53 | X87SW_PE: 1, 54 | X87SW_E: 5 55 | 56 | X87XAM EQU MASK X87SW_C3 OR MASK X87SW_C AND NOT (1 SHL (X87SW_C + 1)) 57 | X87XAM_INF EQU 5 SHL X87SW_C 58 | X87XAM_NAN EQU 1 SHL X87SW_C 59 | X87XAM_BAD EQU MASK X87SW_E AND NOT 2 60 | 61 | EXTRN _handle_error: PROC ; float _handle_error (char *fname, int opcode, unsigned long long value, int type, int flags, int error, double arg1, double arg2, int nargs) 62 | 63 | .const 64 | 65 | @fmodz DB "fmod", 0 66 | 67 | .CODE 68 | 69 | ; double fmod [double, double] ---------------------------------- 70 | 71 | fmod PROC FRAME 72 | 73 | sub rsp, 40 + 32 74 | 75 | .ALLOCSTACK 40 + 32 76 | .ENDPROLOG 77 | 78 | movsd QWORD PTR 24 [rsp + 32], xmm1 ; Y 79 | movsd QWORD PTR 16 [rsp + 32], xmm0 ; X 80 | 81 | DB 0ddh, 44h, 24h, 38h ; fld QWORD PTR 24 [rsp + 32] 82 | DB 0ddh, 44h, 24h, 30h ; fld QWORD PTR 16 [rsp + 32] 83 | 84 | DB 0d9h, 0e5h ; fxam (X) 85 | DB 09bh, 0ddh, 07ch, 024h, 010h ; fstsw 16 [rsp] 86 | 87 | movzx ecx, WORD PTR 16 [rsp] 88 | and ecx, X87XAM 89 | 90 | fnclex ; clear exception flags 91 | ; in preparation for fprem 92 | 93 | @@: 94 | DB 0d9h, 0f8h ; fprem 95 | 96 | DB 09bh, 0dfh, 0e0h ; fstsw ax 97 | test ax, 4 SHL X87SW_C 98 | jnz @b ; do it again in case of partial result 99 | 100 | DB 0ddh, 01ch, 024h ; fstp QWORD PTR [rsp] 101 | movlpd xmm0, QWORD PTR [rsp] ; result 102 | 103 | DB 0d9h, 0e5h ; fxam (Y) 104 | DB 09bh, 0ddh, 07ch, 024h, 008h ; fstsw 8 [rsp] 105 | 106 | movzx edx, WORD PTR 8 [rsp] 107 | and edx, X87XAM 108 | 109 | DB 0ddh, 0d8h ; fstp st(0) 110 | 111 | cmp edx, X87XAM_NAN ; fmod (x, NAN) = QNAN 112 | je @error 113 | 114 | cmp ecx, X87XAM_NAN ; fmod (NAN, y) = QNAN 115 | je @error 116 | 117 | and eax, X87XAM_BAD 118 | jnz @raise ; handle error 119 | 120 | IFNDEF _CRTBLD_C9X ; Not C90 121 | cmp edx, X87XAM_INF ; fmod (x, infinity) = ??? 122 | je @raise 123 | ELSE ; C90 124 | ; fmod (x, infinity) = x (as x87 already does) 125 | ENDIF 126 | 127 | @exit: 128 | add rsp, 40 + 32 129 | ret 130 | 131 | ALIGN 16 132 | 133 | @raise: 134 | mov eax, INVALID ; raise exception 135 | mov r8, FPIND 136 | jmp @f 137 | 138 | @error: 139 | xor eax, eax ; no exception 140 | movd r8, xmm0 141 | jmp @f 142 | 143 | @@: 144 | lea rcx, [@fmodz] ; fname 145 | mov edx, FPCODEFMOD ; opcode 146 | ; mov r8, INDEF ; value 147 | mov r9d, DOMAIN ; type 148 | mov DWORD PTR 0 [rsp + 32], eax ; flags 149 | mov DWORD PTR 8 [rsp + 32], EDOM ; error 150 | mov DWORD PTR 32 [rsp + 32], 2 ; nargs 151 | call _handle_error ; (char *fname, int opcode, unsigned long long value, int type, int flags, int error, double arg1, double arg2, int nargs) 152 | 153 | DB 09bh, 0dbh, 0e2h ; fclex 154 | jmp @exit 155 | 156 | fmod ENDP 157 | 158 | ; --------------------------------------------------------------- 159 | 160 | END 161 | -------------------------------------------------------------------------------- /fmodf.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; MIT License 3 | ; ----------- 4 | ; 5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 6 | ; 7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 8 | ; of this Software and associated documentaon files (the "Software"), to deal 9 | ; in the Software without restriction, including without limitation the rights 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | ; copies of the Software, and to permit persons to whom the Software is 12 | ; furnished to do so, subject to the following conditions: 13 | ; 14 | ; The above copyright notice and this permission notice shall be included in 15 | ; all copies or substantial portions of the Software. 16 | ; 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | ; THE SOFTWARE. 24 | ; 25 | ; $Workfile: fmodf.asm $ 26 | ; $Revision: 4 $ 27 | ; $Date: 9/15/04 16:43 $ 28 | ; 29 | ; 30 | ; This is an optimized version of fmod. 31 | ; 32 | ; Define _CRTBLD_C9X to make it compliant with C90 and on. 33 | ; 34 | ; If building the OS CRTL (_NTSUBSET_ defined), abort. 35 | 36 | .ERRDEF _NTSUBSET_, "x87 code cannot be used in kernel mode" 37 | 38 | DOMAIN EQU 1 ; _DOMAIN 39 | EDOM EQU 33 ; EDOM 40 | FPCODEFMOD EQU 22 ; _FpCodeFmod 41 | INVALID EQU 8 ; AMD_F_INVALID 42 | 43 | FPIND EQU 0ffc00000h ; indefinite 44 | FPSNAN EQU 07fbfffffh ; SNAN 45 | FPQNAN EQU 07fffffffh ; QNAN 46 | 47 | X87SW RECORD X87SW_B: 1, 48 | X87SW_C3: 1, 49 | X87SW_TOP: 3, 50 | X87SW_C: 3, 51 | X87SW_ES: 1, 52 | X87SW_SF: 1, 53 | X87SW_PE: 1, 54 | X87SW_E: 5 55 | 56 | X87XAM EQU MASK X87SW_C3 OR MASK X87SW_C AND NOT (1 SHL (X87SW_C + 1)) 57 | X87XAM_INF EQU 5 SHL X87SW_C 58 | X87XAM_NAN EQU 1 SHL X87SW_C 59 | X87XAM_BAD EQU MASK X87SW_E AND NOT 2 60 | 61 | EXTRN _handle_errorf: PROC ; float _handle_error (char *fname, int opcode, unsigned long value, int type, int flags, int error, float arg1, float arg2, int nargs) 62 | 63 | .CONST 64 | 65 | @fmodfz DB "fmodf", 0 66 | 67 | .CODE 68 | 69 | ; float fmodf [float, float] ------------------------------------ 70 | 71 | fmodf PROC FRAME 72 | 73 | sub rsp, 40 + 32 74 | 75 | .ALLOCSTACK 40 + 32 76 | .ENDPROLOG 77 | 78 | movss DWORD PTR 24 [rsp + 32], xmm1 79 | movss DWORD PTR 16 [rsp + 32], xmm0 80 | 81 | DB 0d9h, 44h, 24h, 38h ; fld DWORD PTR 24 [rsp + 32] 82 | DB 0d9h, 44h, 24h, 30h ; fld DWORD PTR 16 [rsp + 32] 83 | 84 | DB 0d9h, 0e5h ; fxam (X) 85 | DB 09bh, 0ddh, 07ch, 024h, 010h ; fstsw 16 [rsp] 86 | 87 | movzx ecx, WORD PTR 16 [rsp] 88 | and ecx, X87XAM 89 | 90 | fnclex ; clear exception flags 91 | ; in preparation for fprem 92 | 93 | @@: 94 | DB 0d9h, 0f8h ; fprem 95 | 96 | DB 9bh, 0dfh, 0e0h ; fstsw ax 97 | test ax, 00400h 98 | jnz @b ; do it again in case of partial result 99 | 100 | DB 0d9h, 1ch, 24h ; fstp DWORD PTR [rsp] 101 | movss xmm0, DWORD PTR [rsp] ; result 102 | 103 | DB 0d9h, 0e5h ; fxam (Y) 104 | DB 09bh, 0ddh, 07ch, 024h, 008h ; fstsw 8 [rsp] 105 | 106 | movzx edx, WORD PTR 8 [rsp] 107 | and edx, X87XAM 108 | 109 | DB 0ddh, 0d8h ; fstp st(0) 110 | 111 | cmp edx, X87XAM_NAN ; fmod (x, NAN) = QNAN 112 | je @error 113 | 114 | cmp ecx, X87XAM_NAN ; fmod (NAN, y) = QNAN 115 | je @error 116 | 117 | and eax, X87XAM_BAD 118 | jnz @raise ; handle error 119 | 120 | IFNDEF _CRTBLD_C9X ; Not C90 121 | cmp edx, X87XAM_INF ; fmod (x, infinity) = ??? 122 | je @raise 123 | ELSE ; C90 124 | ; fmod (x, infinity) = x (as x87 already does) 125 | ENDIF 126 | 127 | @exit: 128 | add rsp, 40 + 32 129 | ret 130 | 131 | ALIGN 16 132 | 133 | @raise: 134 | mov eax, INVALID ; raise exception 135 | mov r8d, FPIND 136 | jmp @f 137 | 138 | @error: 139 | xor eax, eax ; no exception 140 | movd r8d, xmm0 141 | jmp @f 142 | 143 | @@: 144 | lea rcx, [@fmodfz] ; fname 145 | mov edx, FPCODEFMOD ; opcode 146 | ; mov r8d, [rsp] ; value 147 | mov r9d, DOMAIN ; type 148 | mov DWORD PTR 0 [rsp + 32], eax ; flags 149 | mov DWORD PTR 8 [rsp + 32], EDOM ; error 150 | mov DWORD PTR 32 [rsp + 32], 2 ; nargs 151 | call _handle_errorf ; (char *fname, int opcode, unsigned long long value, int type, int flags, int error, double arg1, double arg2, int nargs) 152 | 153 | DB 9Bh, 0DBh, 0E2h ; fclex 154 | jmp @exit 155 | 156 | fmodf ENDP 157 | 158 | ; --------------------------------------------------------------- 159 | 160 | END 161 | -------------------------------------------------------------------------------- /hypot.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define FAST_BUT_GREATER_THAN_ONE_ULP /* Helps speed by trading off a little 31 | accuracy */ 32 | #define USE_SCALEDOUBLE_1 33 | #define USE_INFINITY_WITH_FLAGS 34 | #define USE_HANDLE_ERROR 35 | #include "libm_inlines.h" 36 | #undef USE_SCALEDOUBLE_1 37 | #undef USE_INFINITY_WITH_FLAGS 38 | #undef USE_HANDLE_ERROR 39 | 40 | #include "libm_errno.h" 41 | 42 | 43 | double FN_PROTOTYPE(_hypot)(double x, double y) 44 | { 45 | /* Returns sqrt(x*x + y*y) with no overflow or underflow unless 46 | the result warrants it */ 47 | 48 | const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */ 49 | 50 | #ifdef FAST_BUT_GREATER_THAN_ONE_ULP 51 | double r, retval; 52 | unsigned long xexp, yexp, ux, uy; 53 | #else 54 | double u, r, retval, hx, tx, x2, hy, ty, y2, hs, ts; 55 | unsigned long xexp, yexp, ux, uy, ut; 56 | #endif 57 | int dexp, expadjust; 58 | 59 | GET_BITS_DP64(x, ux); 60 | ux &= ~SIGNBIT_DP64; 61 | GET_BITS_DP64(y, uy); 62 | uy &= ~SIGNBIT_DP64; 63 | xexp = (ux >> EXPSHIFTBITS_DP64); 64 | yexp = (uy >> EXPSHIFTBITS_DP64); 65 | 66 | if (xexp == BIASEDEMAX_DP64 + 1 || yexp == BIASEDEMAX_DP64 + 1) 67 | { 68 | /* One or both of the arguments are NaN or infinity. The 69 | result will also be NaN or infinity. */ 70 | retval = x*x + y*y; 71 | if (((xexp == BIASEDEMAX_DP64 + 1) && !(ux & MANTBITS_DP64)) || 72 | ((yexp == BIASEDEMAX_DP64 + 1) && !(uy & MANTBITS_DP64))) 73 | /* x or y is infinity. ISO C99 defines that we must 74 | return +infinity, even if the other argument is NaN. 75 | Note that the computation of x*x + y*y above will already 76 | have raised invalid if either x or y is a signalling NaN. */ 77 | return infinity_with_flags(0); 78 | else 79 | /* One or both of x or y is NaN, and neither is infinity. 80 | Raise invalid if it's a signalling NaN */ 81 | return retval; 82 | } 83 | 84 | /* Set x = abs(x) and y = abs(y) */ 85 | PUT_BITS_DP64(ux, x); 86 | PUT_BITS_DP64(uy, y); 87 | 88 | /* The difference in exponents between x and y */ 89 | dexp = (int)(xexp - yexp); 90 | expadjust = 0; 91 | 92 | if (ux == 0) 93 | /* x is zero */ 94 | return y; 95 | else if (uy == 0) 96 | /* y is zero */ 97 | return x; 98 | else if (dexp > MANTLENGTH_DP64 + 1 || dexp < -MANTLENGTH_DP64 - 1) 99 | /* One of x and y is insignificant compared to the other */ 100 | return x + y; /* Raise inexact */ 101 | else if (xexp > EXPBIAS_DP64 + 500 || yexp > EXPBIAS_DP64 + 500) 102 | { 103 | /* Danger of overflow; scale down by 2**600. */ 104 | expadjust = 600; 105 | ux -= 0x2580000000000000; 106 | PUT_BITS_DP64(ux, x); 107 | uy -= 0x2580000000000000; 108 | PUT_BITS_DP64(uy, y); 109 | } 110 | else if (xexp < EXPBIAS_DP64 - 500 || yexp < EXPBIAS_DP64 - 500) 111 | { 112 | /* Danger of underflow; scale up by 2**600. */ 113 | expadjust = -600; 114 | if (xexp == 0) 115 | { 116 | /* x is denormal - handle by adding 601 to the exponent 117 | and then subtracting a correction for the implicit bit */ 118 | PUT_BITS_DP64(ux + 0x2590000000000000, x); 119 | x -= 9.23297861778573578076e-128; /* 0x2590000000000000 */ 120 | GET_BITS_DP64(x, ux); 121 | } 122 | else 123 | { 124 | /* x is normal - just increase the exponent by 600 */ 125 | ux += 0x2580000000000000; 126 | PUT_BITS_DP64(ux, x); 127 | } 128 | if (yexp == 0) 129 | { 130 | PUT_BITS_DP64(uy + 0x2590000000000000, y); 131 | y -= 9.23297861778573578076e-128; /* 0x2590000000000000 */ 132 | GET_BITS_DP64(y, uy); 133 | } 134 | else 135 | { 136 | uy += 0x2580000000000000; 137 | PUT_BITS_DP64(uy, y); 138 | } 139 | } 140 | 141 | 142 | #ifdef FAST_BUT_GREATER_THAN_ONE_ULP 143 | /* Not awful, but results in accuracy loss larger than 1 ulp */ 144 | r = x*x + y*y; 145 | #else 146 | /* Slower but more accurate */ 147 | 148 | /* Sort so that x is greater than y */ 149 | if (x < y) 150 | { 151 | u = y; 152 | y = x; 153 | x = u; 154 | ut = ux; 155 | ux = uy; 156 | uy = ut; 157 | } 158 | 159 | /* Split x into hx and tx, head and tail */ 160 | PUT_BITS_DP64(ux & 0xfffffffff8000000, hx); 161 | tx = x - hx; 162 | 163 | PUT_BITS_DP64(uy & 0xfffffffff8000000, hy); 164 | ty = y - hy; 165 | 166 | /* Compute r = x*x + y*y with extra precision */ 167 | x2 = x*x; 168 | y2 = y*y; 169 | hs = x2 + y2; 170 | 171 | if (dexp == 0) 172 | /* We take most care when x and y have equal exponents, 173 | i.e. are almost the same size */ 174 | ts = (((x2 - hs) + y2) + 175 | ((hx * hx - x2) + 2 * hx * tx) + tx * tx) + 176 | ((hy * hy - y2) + 2 * hy * ty) + ty * ty; 177 | else 178 | ts = (((x2 - hs) + y2) + 179 | ((hx * hx - x2) + 2 * hx * tx) + tx * tx); 180 | 181 | r = hs + ts; 182 | #endif 183 | 184 | /* The sqrt can introduce another half ulp error. */ 185 | /* VC++ intrinsic call */ 186 | _mm_store_sd(&retval, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r))); 187 | 188 | /* If necessary scale the result back. This may lead to 189 | overflow but if so that's the correct result. */ 190 | retval = scaleDouble_1(retval, expadjust); 191 | 192 | if (retval > large) 193 | /* The result overflowed. Deal with errno. */ 194 | return _handle_error("_hypot", OP_HYPOT, PINFBITPATT_DP64, _OVERFLOW, 195 | AMD_F_OVERFLOW | AMD_F_INEXACT, ERANGE, x, y, 2); 196 | 197 | return retval; 198 | } 199 | -------------------------------------------------------------------------------- /hypotf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #ifdef USE_SOFTWARE_SQRT 31 | #define USE_SQRTF_AMD_INLINE 32 | #endif 33 | #define USE_INFINITYF_WITH_FLAGS 34 | #define USE_HANDLE_ERRORF 35 | #include "libm_inlines.h" 36 | #ifdef USE_SOFTWARE_SQRT 37 | #undef USE_SQRTF_AMD_INLINE 38 | #endif 39 | #undef USE_INFINITYF_WITH_FLAGS 40 | #undef USE_HANDLE_ERRORF 41 | 42 | #include "libm_errno.h" 43 | 44 | 45 | float FN_PROTOTYPE(_hypotf)(float x, float y) 46 | { 47 | /* Returns sqrt(x*x + y*y) with no overflow or underflow unless 48 | the result warrants it */ 49 | 50 | /* Do intermediate computations in double precision 51 | and use sqrt instruction from chip if available. */ 52 | double dx = x, dy = y, dr, retval; 53 | 54 | /* The largest finite float, stored as a double */ 55 | const double large = 3.40282346638528859812e+38; /* 0x47efffffe0000000 */ 56 | 57 | 58 | unsigned long ux, uy, avx, avy; 59 | 60 | GET_BITS_DP64(x, avx); 61 | avx &= ~SIGNBIT_DP64; 62 | GET_BITS_DP64(y, avy); 63 | avy &= ~SIGNBIT_DP64; 64 | ux = (avx >> EXPSHIFTBITS_DP64); 65 | uy = (avy >> EXPSHIFTBITS_DP64); 66 | 67 | if (ux == BIASEDEMAX_DP64 + 1 || uy == BIASEDEMAX_DP64 + 1) 68 | { 69 | retval = x*x + y*y; 70 | /* One or both of the arguments are NaN or infinity. The 71 | result will also be NaN or infinity. */ 72 | if (((ux == BIASEDEMAX_DP64 + 1) && !(avx & MANTBITS_DP64)) || 73 | ((uy == BIASEDEMAX_DP64 + 1) && !(avy & MANTBITS_DP64))) 74 | /* x or y is infinity. ISO C99 defines that we must 75 | return +infinity, even if the other argument is NaN. 76 | Note that the computation of x*x + y*y above will already 77 | have raised invalid if either x or y is a signalling NaN. */ 78 | return infinityf_with_flags(0); 79 | else 80 | /* One or both of x or y is NaN, and neither is infinity. 81 | Raise invalid if it's a signalling NaN */ 82 | return (float)retval; 83 | } 84 | 85 | dr = (dx*dx + dy*dy); 86 | 87 | #if USE_SOFTWARE_SQRT 88 | retval = sqrtf_amd_inline(r); 89 | #else 90 | /* VC++ intrinsic call */ 91 | _mm_store_sd(&retval, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&dr))); 92 | #endif 93 | 94 | if (retval > large) 95 | return _handle_errorf("_hypotf", OP_HYPOT, PINFBITPATT_SP32, _OVERFLOW, 96 | AMD_F_OVERFLOW | AMD_F_INEXACT, ERANGE, x, y, 2); 97 | else 98 | return (float)retval; 99 | } 100 | -------------------------------------------------------------------------------- /libm.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************************/ 2 | /** MIT License **/ 3 | /** ----------- **/ 4 | /** **/ 5 | /** Copyright (c) 2002-2019 Advanced Micro Devices, Inc. **/ 6 | /** **/ 7 | /** Permission is hereby granted, free of charge, to any person obtaining a copy **/ 8 | /** of this Software and associated documentaon files (the "Software"), to deal **/ 9 | /** in the Software without restriction, including without limitation the rights **/ 10 | /** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell **/ 11 | /** copies of the Software, and to permit persons to whom the Software is **/ 12 | /** furnished to do so, subject to the following conditions: **/ 13 | /** **/ 14 | /** The above copyright notice and this permission notice shall be included in **/ 15 | /** all copies or substantial portions of the Software. **/ 16 | /** **/ 17 | /** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR **/ 18 | /** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, **/ 19 | /** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE **/ 20 | /** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER **/ 21 | /** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, **/ 22 | /** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN **/ 23 | /** THE SOFTWARE. **/ 24 | /***********************************************************************************/ 25 | 26 | #ifndef LIBM_AMD_H_INCLUDED 27 | #define LIBM_AMD_H_INCLUDED 1 28 | 29 | #define FN_PROTOTYPE(fname) fname 30 | 31 | #include 32 | #include 33 | 34 | #ifndef IS_64BIT 35 | #define IS_64BIT 36 | #endif 37 | 38 | #ifndef _COMPLEX_DEFINED 39 | struct _complex 40 | { 41 | double x, y; /* real and imaginary parts */ 42 | }; 43 | #define _COMPLEX_DEFINED 44 | #endif 45 | #define COMPLEX struct _complex 46 | 47 | extern void __remainder_piby2(double x, double *r, double *rr, int *region); 48 | 49 | #endif /* LIBM_AMD_H_INCLUDED */ 50 | -------------------------------------------------------------------------------- /libm_errno.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************************/ 2 | /** MIT License **/ 3 | /** ----------- **/ 4 | /** **/ 5 | /** Copyright (c) 2002-2019 Advanced Micro Devices, Inc. **/ 6 | /** **/ 7 | /** Permission is hereby granted, free of charge, to any person obtaining a copy **/ 8 | /** of this Software and associated documentaon files (the "Software"), to deal **/ 9 | /** in the Software without restriction, including without limitation the rights **/ 10 | /** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell **/ 11 | /** copies of the Software, and to permit persons to whom the Software is **/ 12 | /** furnished to do so, subject to the following conditions: **/ 13 | /** **/ 14 | /** The above copyright notice and this permission notice shall be included in **/ 15 | /** all copies or substantial portions of the Software. **/ 16 | /** **/ 17 | /** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR **/ 18 | /** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, **/ 19 | /** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE **/ 20 | /** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER **/ 21 | /** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, **/ 22 | /** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN **/ 23 | /** THE SOFTWARE. **/ 24 | /***********************************************************************************/ 25 | 26 | #ifndef LIBM_ERRNO_AMD_H_INCLUDED 27 | #define LIBM_ERRNO_AMD_H_INCLUDED 1 28 | 29 | #include 30 | #include 31 | #ifndef __set_errno 32 | #define __set_errno(x) errno = (x) 33 | #endif 34 | 35 | #endif /* LIBM_ERRNO_AMD_H_INCLUDED */ 36 | -------------------------------------------------------------------------------- /libm_new.h: -------------------------------------------------------------------------------- 1 | 2 | /***********************************************************************************/ 3 | /** MIT License **/ 4 | /** ----------- **/ 5 | /** **/ 6 | /** Copyright (c) 2002-2019 Advanced Micro Devices, Inc. **/ 7 | /** **/ 8 | /** Permission is hereby granted, free of charge, to any person obtaining a copy **/ 9 | /** of this Software and associated documentaon files (the "Software"), to deal **/ 10 | /** in the Software without restriction, including without limitation the rights **/ 11 | /** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell **/ 12 | /** copies of the Software, and to permit persons to whom the Software is **/ 13 | /** furnished to do so, subject to the following conditions: **/ 14 | /** **/ 15 | /** The above copyright notice and this permission notice shall be included in **/ 16 | /** all copies or substantial portions of the Software. **/ 17 | /** **/ 18 | /** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR **/ 19 | /** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, **/ 20 | /** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE **/ 21 | /** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER **/ 22 | /** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, **/ 23 | /** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN **/ 24 | /** THE SOFTWARE. **/ 25 | /***********************************************************************************/ 26 | 27 | #ifndef __LIBM_NEW_H__ 28 | #define __LIBM_NEW_H__ 29 | 30 | // Defines, protos, etc for *new* math funcs updated by AMD 11/2008 31 | // Old files will continue to include libm_util.h, libm.h, libm_inlines.h 32 | // until such time as these have all been refreshed w/ new versions. 33 | 34 | typedef float F32; 35 | typedef unsigned int U32; 36 | 37 | typedef double F64; 38 | typedef unsigned long long U64; 39 | 40 | union UT32_ 41 | { 42 | F32 f32; 43 | U32 u32; 44 | }; 45 | 46 | union UT64_ 47 | { 48 | F64 f64; 49 | U64 u64; 50 | 51 | F32 f32[2]; 52 | U32 u32[2]; 53 | }; 54 | 55 | typedef union UT32_ UT32; 56 | typedef union UT64_ UT64; 57 | 58 | #define SIGN_MASK_32 0x80000000 59 | #define MANTISSA_MASK_32 0x007fffff 60 | #define EXPONENT_MASK_32 0x7f800000 61 | #define QNAN_MASK_32 0x00400000 62 | 63 | #define INF_POS_32 0x7f800000 64 | #define INF_NEG_32 0xff800000 65 | #define QNAN_POS_32 0x7fc00000 66 | #define QNAN_NEG_32 0xffc00000 67 | #define IND_32 0xffc00000 68 | 69 | #define EXPONENT_FULL_32 0x7f800000 70 | #define SIGN_SET_32 0x80000000 71 | #define QNAN_SET_32 0x00400000 72 | 73 | #define INF_POS_64 0x7ff0000000000000 74 | #define INF_NEG_64 0xfff0000000000000 75 | 76 | #define MANTISSA_MASK_64 0x000fffffffffffff 77 | #define SIGN_MASK_64 0x8000000000000000 78 | #define IND_64 0xfff8000000000000 79 | #define QNAN_MASK_64 0x0008000000000000 80 | 81 | // constants for 'flags' argument of _handle_error and _handle_errorf 82 | #define AMD_F_INEXACT 0x00000010 83 | #define AMD_F_OVERFLOW 0x00000001 84 | #define AMD_F_UNDERFLOW 0x00000002 85 | #define AMD_F_DIVBYZERO 0x00000004 86 | #define AMD_F_INVALID 0x00000008 87 | 88 | // define the Microsoft specific error handling routine 89 | 90 | // Note to mainainers: 91 | // These prototypes may appear, at first glance, to differ from the versions 92 | // declared in libm_inlines.h and defined in libm_error.c. The third 93 | // parameter appears to have changed type from unsigned long to unsigned long 94 | // long. In fact they are the same because in both of the aforementioned 95 | // files, long has been #defined to __int64 in a most cowardly fashion. This 96 | // disgusts me. The buck stops here. - MAS 97 | 98 | double _handle_error( 99 | char *fname, 100 | int opcode, 101 | unsigned long long value, 102 | int type, 103 | int flags, 104 | int error, 105 | double arg1, 106 | double arg2, 107 | int nargs 108 | ); 109 | float _handle_errorf( 110 | char *fname, 111 | int opcode, 112 | unsigned long long value, 113 | int type, 114 | int flags, 115 | int error, 116 | float arg1, 117 | float arg2, 118 | int nargs 119 | ); 120 | 121 | #endif // __LIBM_NEW_H 122 | 123 | -------------------------------------------------------------------------------- /libm_util.h: -------------------------------------------------------------------------------- 1 | /***********************************************************************************/ 2 | /** MIT License **/ 3 | /** ----------- **/ 4 | /** **/ 5 | /** Copyright (c) 2002-2019 Advanced Micro Devices, Inc. **/ 6 | /** **/ 7 | /** Permission is hereby granted, free of charge, to any person obtaining a copy **/ 8 | /** of this Software and associated documentaon files (the "Software"), to deal **/ 9 | /** in the Software without restriction, including without limitation the rights **/ 10 | /** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell **/ 11 | /** copies of the Software, and to permit persons to whom the Software is **/ 12 | /** furnished to do so, subject to the following conditions: **/ 13 | /** **/ 14 | /** The above copyright notice and this permission notice shall be included in **/ 15 | /** all copies or substantial portions of the Software. **/ 16 | /** **/ 17 | /** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR **/ 18 | /** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, **/ 19 | /** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE **/ 20 | /** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER **/ 21 | /** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, **/ 22 | /** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN **/ 23 | /** THE SOFTWARE. **/ 24 | /***********************************************************************************/ 25 | 26 | #ifndef LIBM_UTIL_AMD_H_INCLUDED 27 | #define LIBM_UTIL_AMD_H_INCLUDED 1 28 | 29 | #define inline __inline 30 | #undef long 31 | #define long __int64 32 | 33 | #include "emmintrin.h" 34 | #include "float.h" 35 | 36 | 37 | 38 | /* Compile-time verification that type long is the same size 39 | as type double (i.e. we are really on a 64-bit machine) */ 40 | void check_long_against_double_size(int machine_is_64_bit[(sizeof(long) == sizeof(double))?1:-1]); 41 | 42 | 43 | /* Definitions for double functions on 64 bit machines */ 44 | #define SIGNBIT_DP64 0x8000000000000000 45 | #define EXPBITS_DP64 0x7ff0000000000000 46 | #define MANTBITS_DP64 0x000fffffffffffff 47 | #define ONEEXPBITS_DP64 0x3ff0000000000000 48 | #define TWOEXPBITS_DP64 0x4000000000000000 49 | #define HALFEXPBITS_DP64 0x3fe0000000000000 50 | #define IMPBIT_DP64 0x0010000000000000 51 | #define QNANBITPATT_DP64 0x7ff8000000000000 52 | #define INDEFBITPATT_DP64 0xfff8000000000000 53 | #define PINFBITPATT_DP64 0x7ff0000000000000 54 | #define NINFBITPATT_DP64 0xfff0000000000000 55 | #define EXPBIAS_DP64 1023 56 | #define EXPSHIFTBITS_DP64 52 57 | #define BIASEDEMIN_DP64 1 58 | #define EMIN_DP64 -1022 59 | #define BIASEDEMAX_DP64 2046 60 | #define EMAX_DP64 1023 61 | #define LAMBDA_DP64 1.0e300 62 | #define MANTLENGTH_DP64 53 63 | #define BASEDIGITS_DP64 15 64 | 65 | 66 | /* These definitions, used by float functions, 67 | are for both 32 and 64 bit machines */ 68 | #define SIGNBIT_SP32 0x80000000 69 | #define EXPBITS_SP32 0x7f800000 70 | #define MANTBITS_SP32 0x007fffff 71 | #define ONEEXPBITS_SP32 0x3f800000 72 | #define TWOEXPBITS_SP32 0x40000000 73 | #define HALFEXPBITS_SP32 0x3f000000 74 | #define IMPBIT_SP32 0x00800000 75 | #define QNANBITPATT_SP32 0x7fc00000 76 | #define INDEFBITPATT_SP32 0xffc00000 77 | #define PINFBITPATT_SP32 0x7f800000 78 | #define NINFBITPATT_SP32 0xff800000 79 | #define EXPBIAS_SP32 127 80 | #define EXPSHIFTBITS_SP32 23 81 | #define BIASEDEMIN_SP32 1 82 | #define EMIN_SP32 -126 83 | #define BIASEDEMAX_SP32 254 84 | #define EMAX_SP32 127 85 | #define LAMBDA_SP32 1.0e30 86 | #define MANTLENGTH_SP32 24 87 | #define BASEDIGITS_SP32 7 88 | 89 | #define CLASS_SIGNALLING_NAN 1 90 | #define CLASS_QUIET_NAN 2 91 | #define CLASS_NEGATIVE_INFINITY 3 92 | #define CLASS_NEGATIVE_NORMAL_NONZERO 4 93 | #define CLASS_NEGATIVE_DENORMAL 5 94 | #define CLASS_NEGATIVE_ZERO 6 95 | #define CLASS_POSITIVE_ZERO 7 96 | #define CLASS_POSITIVE_DENORMAL 8 97 | #define CLASS_POSITIVE_NORMAL_NONZERO 9 98 | #define CLASS_POSITIVE_INFINITY 10 99 | 100 | #define OLD_BITS_SP32(x) (*((unsigned int *)&x)) 101 | #define OLD_BITS_DP64(x) (*((unsigned long *)&x)) 102 | 103 | /* Alternatives to the above functions which don't have 104 | problems when using high optimization levels on gcc */ 105 | #define GET_BITS_SP32(x, ux) \ 106 | { \ 107 | volatile union {float f; unsigned int i;} _bitsy; \ 108 | _bitsy.f = (x); \ 109 | ux = _bitsy.i; \ 110 | } 111 | #define PUT_BITS_SP32(ux, x) \ 112 | { \ 113 | volatile union {float f; unsigned int i;} _bitsy; \ 114 | _bitsy.i = (ux); \ 115 | x = _bitsy.f; \ 116 | } 117 | 118 | #define GET_BITS_DP64(x, ux) \ 119 | { \ 120 | volatile union {double d; unsigned long i;} _bitsy; \ 121 | _bitsy.d = (x); \ 122 | ux = _bitsy.i; \ 123 | } 124 | #define PUT_BITS_DP64(ux, x) \ 125 | { \ 126 | volatile union {double d; unsigned long i;} _bitsy; \ 127 | _bitsy.i = (ux); \ 128 | x = _bitsy.d; \ 129 | } 130 | 131 | 132 | /* Processor-dependent floating-point status flags */ 133 | #define AMD_F_OVERFLOW 0x00000001 134 | #define AMD_F_UNDERFLOW 0x00000002 135 | #define AMD_F_DIVBYZERO 0x00000004 136 | #define AMD_F_INVALID 0x00000008 137 | #define AMD_F_INEXACT 0x00000010 138 | 139 | /* Processor-dependent floating-point precision-control flags */ 140 | #define AMD_F_EXTENDED 0x00000300 141 | #define AMD_F_DOUBLE 0x00000200 142 | #define AMD_F_SINGLE 0x00000000 143 | 144 | /* Processor-dependent floating-point rounding-control flags */ 145 | #define AMD_F_RC_NEAREST 0x00000000 146 | #define AMD_F_RC_DOWN 0x00002000 147 | #define AMD_F_RC_UP 0x00004000 148 | #define AMD_F_RC_ZERO 0x00006000 149 | 150 | #endif /* LIBM_UTIL_AMD_H_INCLUDED */ 151 | -------------------------------------------------------------------------------- /log_F_inv_dword_table.asm: -------------------------------------------------------------------------------- 1 | ;; 2 | ; 3 | ; MIT License 4 | ; ----------- 5 | ; 6 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | ; 8 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 9 | ; of this Software and associated documentaon files (the "Software"), to deal 10 | ; in the Software without restriction, including without limitation the rights 11 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | ; copies of the Software, and to permit persons to whom the Software is 13 | ; furnished to do so, subject to the following conditions: 14 | ; 15 | ; The above copyright notice and this permission notice shall be included in 16 | ; all copies or substantial portions of the Software. 17 | ; 18 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | ; THE SOFTWARE. 25 | ; 26 | ;; Defines __log_F_inv_dword 27 | ;; Used in log10f and logf 28 | ;; 29 | 30 | .const 31 | 32 | ALIGN 16 33 | PUBLIC __log_F_inv_dword 34 | __log_F_inv_dword DD 40000000h 35 | DD 3ffe03f8h 36 | DD 3ffc0fc1h 37 | DD 3ffa232dh 38 | DD 3ff83e10h 39 | DD 3ff6603eh 40 | DD 3ff4898dh 41 | DD 3ff2b9d6h 42 | DD 3ff0f0f1h 43 | DD 3fef2eb7h 44 | DD 3fed7304h 45 | DD 3febbdb3h 46 | DD 3fea0ea1h 47 | DD 3fe865ach 48 | DD 3fe6c2b4h 49 | DD 3fe52598h 50 | DD 3fe38e39h 51 | DD 3fe1fc78h 52 | DD 3fe07038h 53 | DD 3fdee95ch 54 | DD 3fdd67c9h 55 | DD 3fdbeb62h 56 | DD 3fda740eh 57 | DD 3fd901b2h 58 | DD 3fd79436h 59 | DD 3fd62b81h 60 | DD 3fd4c77bh 61 | DD 3fd3680dh 62 | DD 3fd20d21h 63 | DD 3fd0b6a0h 64 | DD 3fcf6475h 65 | DD 3fce168ah 66 | DD 3fcccccdh 67 | DD 3fcb8728h 68 | DD 3fca4588h 69 | DD 3fc907dah 70 | DD 3fc7ce0ch 71 | DD 3fc6980ch 72 | DD 3fc565c8h 73 | DD 3fc43730h 74 | DD 3fc30c31h 75 | DD 3fc1e4bch 76 | DD 3fc0c0c1h 77 | DD 3fbfa030h 78 | DD 3fbe82fah 79 | DD 3fbd6910h 80 | DD 3fbc5264h 81 | DD 3fbb3ee7h 82 | DD 3fba2e8ch 83 | DD 3fb92144h 84 | DD 3fb81703h 85 | DD 3fb70fbbh 86 | DD 3fb60b61h 87 | DD 3fb509e7h 88 | DD 3fb40b41h 89 | DD 3fb30f63h 90 | DD 3fb21643h 91 | DD 3fb11fd4h 92 | DD 3fb02c0bh 93 | DD 3faf3adeh 94 | DD 3fae4c41h 95 | DD 3fad602bh 96 | DD 3fac7692h 97 | DD 3fab8f6ah 98 | DD 3faaaaabh 99 | DD 3fa9c84ah 100 | DD 3fa8e83fh 101 | DD 3fa80a81h 102 | DD 3fa72f05h 103 | DD 3fa655c4h 104 | DD 3fa57eb5h 105 | DD 3fa4a9cfh 106 | DD 3fa3d70ah 107 | DD 3fa3065eh 108 | DD 3fa237c3h 109 | DD 3fa16b31h 110 | DD 3fa0a0a1h 111 | DD 3f9fd80ah 112 | DD 3f9f1166h 113 | DD 3f9e4cadh 114 | DD 3f9d89d9h 115 | DD 3f9cc8e1h 116 | DD 3f9c09c1h 117 | DD 3f9b4c70h 118 | DD 3f9a90e8h 119 | DD 3f99d723h 120 | DD 3f991f1ah 121 | DD 3f9868c8h 122 | DD 3f97b426h 123 | DD 3f97012eh 124 | DD 3f964fdah 125 | DD 3f95a025h 126 | DD 3f94f209h 127 | DD 3f944581h 128 | DD 3f939a86h 129 | DD 3f92f114h 130 | DD 3f924925h 131 | DD 3f91a2b4h 132 | DD 3f90fdbch 133 | DD 3f905a38h 134 | DD 3f8fb824h 135 | DD 3f8f177ah 136 | DD 3f8e7835h 137 | DD 3f8dda52h 138 | DD 3f8d3dcbh 139 | DD 3f8ca29ch 140 | DD 3f8c08c1h 141 | DD 3f8b7034h 142 | DD 3f8ad8f3h 143 | DD 3f8a42f8h 144 | DD 3f89ae41h 145 | DD 3f891ac7h 146 | DD 3f888889h 147 | DD 3f87f781h 148 | DD 3f8767abh 149 | DD 3f86d905h 150 | DD 3f864b8ah 151 | DD 3f85bf37h 152 | DD 3f853408h 153 | DD 3f84a9fah 154 | DD 3f842108h 155 | DD 3f839930h 156 | DD 3f83126fh 157 | DD 3f828cc0h 158 | DD 3f820821h 159 | DD 3f81848eh 160 | DD 3f810204h 161 | DD 3f808081h 162 | DD 3f800000h 163 | 164 | END 165 | -------------------------------------------------------------------------------- /log_special.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "libm_new.h" 34 | 35 | // y = log10f(x) 36 | // y = log10(x) 37 | // y = logf(x) 38 | // y = log(x) 39 | 40 | // these codes and the ones in the related .asm files have to match 41 | #define LOG_X_ZERO 1 42 | #define LOG_X_NEG 2 43 | #define LOG_X_NAN 3 44 | 45 | static float _logf_special_common(float x, float y, U32 code, unsigned int op, char *name) 46 | { 47 | switch(code) 48 | { 49 | case LOG_X_ZERO: 50 | { 51 | UT64 ym; ym.u64 = 0; ym.f32[0] = y; 52 | _handle_errorf(name, op, ym.u64, _SING, AMD_F_DIVBYZERO, ERANGE, x, 0.0, 1); 53 | } 54 | break; 55 | 56 | case LOG_X_NEG: 57 | { 58 | UT64 ym; ym.u64 = 0; ym.f32[0] = y; 59 | _handle_errorf(name, op, ym.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 60 | } 61 | break; 62 | 63 | case LOG_X_NAN: 64 | { 65 | unsigned int is_snan; 66 | UT32 xm; UT64 ym; 67 | xm.f32 = x; 68 | is_snan = (((xm.u32 & QNAN_MASK_32) == QNAN_SET_32) ? 0 : 1); 69 | ym.u64 = 0; ym.f32[0] = y; 70 | 71 | if(is_snan) 72 | { 73 | _handle_errorf(name, op, ym.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 74 | } 75 | else 76 | { 77 | _handle_errorf(name, op, ym.u64, _DOMAIN, 0, EDOM, x, 0.0, 1); 78 | } 79 | } 80 | break; 81 | } 82 | 83 | return y; 84 | } 85 | 86 | float _logf_special(float x, float y, U32 code) 87 | { 88 | return _logf_special_common(x, y, code, _FpCodeLog, "logf"); 89 | } 90 | 91 | float _log10f_special(float x, float y, U32 code) 92 | { 93 | return _logf_special_common(x, y, code, _FpCodeLog10, "log10f"); 94 | } 95 | 96 | static double _log_special_common(double x, double y, U32 code, unsigned int op, char *name) 97 | { 98 | switch(code) 99 | { 100 | case LOG_X_ZERO: 101 | { 102 | UT64 ym; ym.f64 = y; 103 | _handle_error(name, op, ym.u64, _SING, AMD_F_DIVBYZERO, ERANGE, x, 0.0, 1); 104 | } 105 | break; 106 | 107 | case LOG_X_NEG: 108 | { 109 | UT64 ym; ym.f64 = y; 110 | _handle_error(name, op, ym.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 111 | } 112 | break; 113 | 114 | case LOG_X_NAN: 115 | { 116 | UT64 ym; ym.f64 = y; 117 | _handle_error(name, op, ym.u64, _DOMAIN, 0, EDOM, x, 0.0, 1); 118 | } 119 | break; 120 | } 121 | 122 | return y; 123 | } 124 | 125 | double _log_special(double x, double y, U32 code) 126 | { 127 | return _log_special_common(x, y, code, _FpCodeLog, "log"); 128 | } 129 | 130 | double _log10_special(double x, double y, U32 code) 131 | { 132 | return _log_special_common(x, y, code, _FpCodeLog10, "log10"); 133 | } 134 | -------------------------------------------------------------------------------- /logb.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_INFINITY_WITH_FLAGS 31 | #define USE_HANDLE_ERROR 32 | #include "libm_inlines.h" 33 | #undef USE_INFINITY_WITH_FLAGS 34 | #undef USE_HANDLE_ERROR 35 | 36 | #include "libm_errno.h" 37 | 38 | double _logb(double x) 39 | { 40 | 41 | unsigned long ux; 42 | long u; 43 | GET_BITS_DP64(x, ux); 44 | u = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; 45 | if ((ux & ~SIGNBIT_DP64) == 0) 46 | /* x is +/-zero. Return -infinity with div-by-zero flag. */ 47 | return _handle_error("_logb", OP_LOGB, NINFBITPATT_DP64, _SING, 48 | AMD_F_DIVBYZERO, ERANGE, x, 0.0, 1); 49 | else if (EMIN_DP64 <= u && u <= EMAX_DP64) 50 | /* x is a normal number */ 51 | return (double)u; 52 | else if (u > EMAX_DP64) 53 | { 54 | /* x is infinity or NaN */ 55 | if ((ux & MANTBITS_DP64) == 0) 56 | /* x is +/-infinity. For VC++, return infinity of same sign. */ 57 | return x; 58 | else 59 | /* x is NaN, result is NaN */ 60 | return _handle_error("_logb", OP_LOGB, ux|0x0008000000000000, _DOMAIN, 61 | 0, EDOM, x, 0.0, 1); 62 | } 63 | else 64 | { 65 | /* x is denormalized. */ 66 | #ifdef FOLLOW_IEEE754_LOGB 67 | /* Return the value of the minimum exponent to ensure that 68 | the relationship between logb and scalb, defined in 69 | IEEE 754, holds. */ 70 | return EMIN_DP64; 71 | #else 72 | /* Follow the rule set by IEEE 854 for logb */ 73 | ux &= MANTBITS_DP64; 74 | u = EMIN_DP64; 75 | while (ux < IMPBIT_DP64) 76 | { 77 | ux <<= 1; 78 | u--; 79 | } 80 | return (double)u; 81 | #endif 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /logbf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_INFINITYF_WITH_FLAGS 31 | #define USE_HANDLE_ERRORF 32 | #include "libm_inlines.h" 33 | #undef USE_INFINITYF_WITH_FLAGS 34 | #undef USE_HANDLE_ERRORF 35 | 36 | #include "libm_errno.h" 37 | 38 | float _logbf(float x) 39 | { 40 | unsigned int ux; 41 | int u; 42 | GET_BITS_SP32(x, ux); 43 | u = ((ux & EXPBITS_SP32) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; 44 | if ((ux & ~SIGNBIT_SP32) == 0) 45 | /* x is +/-zero. Return -infinity with div-by-zero flag. */ 46 | return _handle_errorf("_logbf", OP_LOGB, NINFBITPATT_SP32, _SING, 47 | AMD_F_DIVBYZERO, ERANGE, x, 0.0F, 1); 48 | else if (EMIN_SP32 <= u && u <= EMAX_SP32) 49 | /* x is a normal number */ 50 | return (float)u; 51 | else if (u > EMAX_SP32) 52 | { 53 | /* x is infinity or NaN */ 54 | if ((ux & MANTBITS_SP32) == 0) 55 | /* x is +/-infinity. For VC++, return infinity of same sign. */ 56 | return x; 57 | else 58 | /* x is NaN, result is NaN */ 59 | return _handle_errorf("_logbf", OP_LOGB, ux|0x00400000, _DOMAIN, 60 | 0, EDOM, x, 0.0F, 1); 61 | } 62 | else 63 | { 64 | /* x is denormalized. */ 65 | #ifdef FOLLOW_IEEE754_LOGB 66 | /* Return the value of the minimum exponent to ensure that 67 | the relationship between logb and scalb, defined in 68 | IEEE 754, holds. */ 69 | return EMIN_SP32; 70 | #else 71 | /* Follow the rule set by IEEE 854 for logb */ 72 | ux &= MANTBITS_SP32; 73 | u = EMIN_SP32; 74 | while (ux < IMPBIT_SP32) 75 | { 76 | ux <<= 1; 77 | u--; 78 | } 79 | return (float)u; 80 | #endif 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /modf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | double modf(double x, double *iptr) 31 | { 32 | /* modf splits the argument x into integer and fraction parts, 33 | each with the same sign as x. */ 34 | 35 | 36 | long xexp; 37 | unsigned long ux, ax, mask; 38 | 39 | GET_BITS_DP64(x, ux); 40 | ax = ux & (~SIGNBIT_DP64); 41 | 42 | if (ax >= 0x4340000000000000) 43 | { 44 | /* abs(x) is either NaN, infinity, or >= 2^53 */ 45 | if (ax > 0x7ff0000000000000) 46 | { 47 | /* x is NaN */ 48 | *iptr = x; 49 | return x + x; /* Raise invalid if it is a signalling NaN */ 50 | } 51 | else 52 | { 53 | /* x is infinity or large. Return zero with the sign of x */ 54 | *iptr = x; 55 | PUT_BITS_DP64(ux & SIGNBIT_DP64, x); 56 | return x; 57 | } 58 | } 59 | else if (ax < 0x3ff0000000000000) 60 | { 61 | /* abs(x) < 1.0. Set iptr to zero with the sign of x 62 | and return x. */ 63 | PUT_BITS_DP64(ux & SIGNBIT_DP64, *iptr); 64 | return x; 65 | } 66 | else 67 | { 68 | xexp = ((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; 69 | /* Mask out the bits of x that we don't want */ 70 | mask = 1; 71 | mask = (mask << (EXPSHIFTBITS_DP64 - xexp)) - 1; 72 | PUT_BITS_DP64(ux & ~mask, *iptr); 73 | return x - *iptr; 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /modff.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | float modff(float x, float *iptr) 31 | { 32 | /* modff splits the argument x into integer and fraction parts, 33 | each with the same sign as x. */ 34 | 35 | unsigned int ux, mask; 36 | int xexp; 37 | 38 | GET_BITS_SP32(x, ux); 39 | xexp = ((ux & (~SIGNBIT_SP32)) >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; 40 | 41 | if (xexp < 0) 42 | { 43 | /* abs(x) < 1.0. Set iptr to zero with the sign of x 44 | and return x. */ 45 | PUT_BITS_SP32(ux & SIGNBIT_SP32, *iptr); 46 | return x; 47 | } 48 | else if (xexp < EXPSHIFTBITS_SP32) 49 | { 50 | /* x lies between 1.0 and 2**(24) */ 51 | /* Mask out the bits of x that we don't want */ 52 | mask = (1 << (EXPSHIFTBITS_SP32 - xexp)) - 1; 53 | PUT_BITS_SP32(ux & ~mask, *iptr); 54 | return x - *iptr; 55 | } 56 | else if ((ux & (~SIGNBIT_SP32)) > 0x7f800000) 57 | { 58 | /* x is NaN */ 59 | *iptr = x; 60 | return x + x; /* Raise invalid if it is a signalling NaN */ 61 | } 62 | else 63 | { 64 | /* x is infinity or large. Set iptr to x and return zero 65 | with the sign of x. */ 66 | *iptr = x; 67 | PUT_BITS_SP32(ux & SIGNBIT_SP32, x); 68 | return x; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /pow_special.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "libm_new.h" 34 | 35 | // these codes and the ones in the related .asm files have to match 36 | #define POW_X_ONE_Y_SNAN 1 37 | #define POW_X_ZERO_Z_INF 2 38 | #define POW_X_NAN 3 39 | #define POW_Y_NAN 4 40 | #define POW_X_NAN_Y_NAN 5 41 | #define POW_X_NEG_Y_NOTINT 6 42 | #define POW_Z_ZERO 7 43 | #define POW_Z_DENORMAL 8 44 | #define POW_Z_INF 9 45 | 46 | float _powf_special(float x, float y, float z, U32 code) 47 | { 48 | switch(code) 49 | { 50 | case POW_X_ONE_Y_SNAN: 51 | { 52 | UT64 zm; zm.u64 = 0; zm.f32[0] = z; 53 | _handle_errorf("powf", _FpCodePow, zm.u64, 0, AMD_F_INVALID, 0, x, y, 2); 54 | } 55 | break; 56 | 57 | case POW_X_ZERO_Z_INF: 58 | { 59 | UT64 zm; zm.u64 = 0; zm.f32[0] = z; 60 | _handle_errorf("powf", _FpCodePow, zm.u64, _SING, AMD_F_DIVBYZERO, ERANGE, x, y, 2); 61 | } 62 | break; 63 | 64 | case POW_X_NAN: 65 | case POW_Y_NAN: 66 | case POW_X_NAN_Y_NAN: 67 | case POW_X_NEG_Y_NOTINT: 68 | { 69 | UT64 zm; zm.u64 = 0; zm.f32[0] = z; 70 | _handle_errorf("powf", _FpCodePow, zm.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2); 71 | } 72 | break; 73 | 74 | case POW_Z_ZERO: 75 | { 76 | UT64 zm; zm.u64 = 0; zm.f32[0] = z; 77 | _handle_errorf("powf", _FpCodePow, zm.u64, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, y, 2); 78 | } 79 | break; 80 | 81 | case POW_Z_INF: 82 | { 83 | UT64 zm; zm.u64 = 0; zm.f32[0] = z; 84 | _handle_errorf("powf", _FpCodePow, zm.u64, _OVERFLOW, AMD_F_INEXACT|AMD_F_OVERFLOW, ERANGE, x, y, 2); 85 | } 86 | break; 87 | } 88 | 89 | return z; 90 | } 91 | 92 | double _pow_special(double x, double y, double z, U32 code) 93 | { 94 | switch(code) 95 | { 96 | case POW_X_ZERO_Z_INF: 97 | { 98 | UT64 zm; zm.f64 = z; 99 | _handle_error("pow", _FpCodePow, zm.u64, _SING, AMD_F_DIVBYZERO, ERANGE, x, y, 2); 100 | } 101 | break; 102 | 103 | case POW_X_NAN: 104 | case POW_Y_NAN: 105 | case POW_X_NAN_Y_NAN: 106 | case POW_X_NEG_Y_NOTINT: 107 | { 108 | UT64 zm; zm.f64 = z; 109 | _handle_error("pow", _FpCodePow, zm.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, y, 2); 110 | } 111 | break; 112 | 113 | case POW_Z_ZERO: 114 | case POW_Z_DENORMAL: 115 | { 116 | UT64 zm; zm.f64 = z; 117 | _handle_error("pow", _FpCodePow, zm.u64, _UNDERFLOW, AMD_F_INEXACT|AMD_F_UNDERFLOW, ERANGE, x, y, 2); 118 | } 119 | break; 120 | 121 | case POW_Z_INF: 122 | { 123 | UT64 zm; zm.f64 = z; 124 | _handle_error("pow", _FpCodePow, zm.u64, _OVERFLOW, AMD_F_INEXACT|AMD_F_OVERFLOW, ERANGE, x, y, 2); 125 | } 126 | break; 127 | } 128 | 129 | return z; 130 | } 131 | -------------------------------------------------------------------------------- /remainder_piby2f.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | 31 | /* Given positive argument x, reduce it to the range [-pi/4,pi/4] using 32 | extra precision, and return the result in r. 33 | Return value "region" tells how many lots of pi/2 were subtracted 34 | from x to put it in the range [-pi/4,pi/4], mod 4. */ 35 | void __remainder_piby2f(unsigned long ux, double *r, int *region) 36 | { 37 | 38 | 39 | /* This method simulates multi-precision floating-point 40 | arithmetic and is accurate for all 1 <= x < infinity */ 41 | #define bitsper 36 42 | unsigned long res[10]; 43 | unsigned long u, carry, mask, mant, nextbits; 44 | int first, last, i, rexp, xexp, resexp, ltb, determ, bc; 45 | double dx; 46 | static const double 47 | piby2 = 1.57079632679489655800e+00; /* 0x3ff921fb54442d18 */ 48 | static unsigned long pibits[] = 49 | { 50 | 0LL, 51 | 5215LL, 13000023176LL, 11362338026LL, 67174558139LL, 52 | 34819822259LL, 10612056195LL, 67816420731LL, 57840157550LL, 53 | 19558516809LL, 50025467026LL, 25186875954LL, 18152700886LL 54 | }; 55 | 56 | 57 | xexp = (int)(((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64); 58 | ux = ((ux & MANTBITS_DP64) | IMPBIT_DP64) >> 29; 59 | 60 | 61 | /* Now ux is the mantissa bit pattern of x as a long integer */ 62 | mask = 1; 63 | mask = (mask << bitsper) - 1; 64 | 65 | /* Set first and last to the positions of the first 66 | and last chunks of 2/pi that we need */ 67 | first = xexp / bitsper; 68 | resexp = xexp - first * bitsper; 69 | /* 120 is the theoretical maximum number of bits (actually 70 | 115 for IEEE single precision) that we need to extract 71 | from the middle of 2/pi to compute the reduced argument 72 | accurately enough for our purposes */ 73 | last = first + 120 / bitsper; 74 | 75 | 76 | /* Do a long multiplication of the bits of 2/pi by the 77 | integer mantissa */ 78 | #if 0 79 | for (i = last; i >= first; i--) 80 | { 81 | u = pibits[i] * ux + carry; 82 | res[i - first] = u & mask; 83 | carry = u >> bitsper; 84 | } 85 | res[last - first + 1] = 0; 86 | #else 87 | /* Unroll the loop. This is only correct because we know 88 | that bitsper is fixed as 36. */ 89 | res[4] = 0; 90 | u = pibits[last] * ux; 91 | res[3] = u & mask; 92 | carry = u >> bitsper; 93 | u = pibits[last - 1] * ux + carry; 94 | res[2] = u & mask; 95 | carry = u >> bitsper; 96 | u = pibits[last - 2] * ux + carry; 97 | res[1] = u & mask; 98 | carry = u >> bitsper; 99 | u = pibits[first] * ux + carry; 100 | res[0] = u & mask; 101 | #endif 102 | 103 | 104 | /* Reconstruct the result */ 105 | ltb = (int)((((res[0] << bitsper) | res[1]) 106 | >> (bitsper - 1 - resexp)) & 7); 107 | 108 | /* determ says whether the fractional part is >= 0.5 */ 109 | determ = ltb & 1; 110 | 111 | i = 1; 112 | if (determ) 113 | { 114 | /* The mantissa is >= 0.5. We want to subtract it 115 | from 1.0 by negating all the bits */ 116 | *region = ((ltb >> 1) + 1) & 3; 117 | mant = 1; 118 | mant = ~(res[1]) & ((mant << (bitsper - resexp)) - 1); 119 | while (mant < 0x0000000000010000) 120 | { 121 | i++; 122 | mant = (mant << bitsper) | (~(res[i]) & mask); 123 | } 124 | nextbits = (~(res[i+1]) & mask); 125 | } 126 | else 127 | { 128 | *region = (ltb >> 1); 129 | mant = 1; 130 | mant = res[1] & ((mant << (bitsper - resexp)) - 1); 131 | while (mant < 0x0000000000010000) 132 | { 133 | i++; 134 | mant = (mant << bitsper) | res[i]; 135 | } 136 | nextbits = res[i+1]; 137 | } 138 | 139 | 140 | /* Normalize the mantissa. The shift value 6 here, determined by 141 | trial and error, seems to give optimal speed. */ 142 | bc = 0; 143 | while (mant < 0x0000400000000000) 144 | { 145 | bc += 6; 146 | mant <<= 6; 147 | } 148 | while (mant < 0x0010000000000000) 149 | { 150 | bc++; 151 | mant <<= 1; 152 | } 153 | mant |= nextbits >> (bitsper - bc); 154 | 155 | rexp = 52 + resexp - bc - i * bitsper; 156 | 157 | 158 | /* Put the result exponent rexp onto the mantissa pattern */ 159 | u = ((unsigned long)rexp + EXPBIAS_DP64) << EXPSHIFTBITS_DP64; 160 | ux = (mant & MANTBITS_DP64) | u; 161 | if (determ) 162 | /* If we negated the mantissa we negate x too */ 163 | ux |= SIGNBIT_DP64; 164 | PUT_BITS_DP64(ux, dx); 165 | 166 | 167 | /* x is a double precision version of the fractional part of 168 | x * 2 / pi. Multiply x by pi/2 in double precision 169 | to get the reduced argument r. */ 170 | *r = dx * piby2; 171 | return; 172 | 173 | } 174 | -------------------------------------------------------------------------------- /remainder_piby2f_forAsm.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; MIT License 3 | ; ----------- 4 | ; 5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 6 | ; 7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 8 | ; of this Software and associated documentaon files (the "Software"), to deal 9 | ; in the Software without restriction, including without limitation the rights 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | ; copies of the Software, and to permit persons to whom the Software is 12 | ; furnished to do so, subject to the following conditions: 13 | ; 14 | ; The above copyright notice and this permission notice shall be included in 15 | ; all copies or substantial portions of the Software. 16 | ; 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | ; THE SOFTWARE. 24 | ; 25 | ; An implementation of the remainder by pi/2 function 26 | ; This is a service routine for use by trig functions coded in asm 27 | ; 28 | ; On input, 29 | ; xmm0 = x; Note that we assume x >= pi/4 30 | ; On ouput 31 | ; xmm0 = r 32 | ; eax = region 33 | 34 | .const 35 | 36 | ALIGN 16 37 | L__piby2 DQ 03ff921fb54442d18h 38 | EXTRN __L_2_by_pi_bits:BYTE 39 | 40 | 41 | fname TEXTEQU <__remainder_piby2d2f_forAsm> 42 | 43 | stack_size EQU 000h 44 | include fm.inc 45 | 46 | .code 47 | PUBLIC fname 48 | fname PROC FRAME 49 | StackAllocate stack_size 50 | .ENDPROLOG 51 | 52 | lea r9,__L_2_by_pi_bits 53 | 54 | ;get the unbiased exponent and the mantissa part of x 55 | ;Since x >= pi/4, xexp = (x >> 52) - 1023 56 | movd r11,xmm0 57 | mov rcx,r11 58 | shr r11,52 59 | sub r11,1023 ; r11 <-- xexp = exponent of input x 60 | 61 | ;calculate the last byte from which to start multiplication 62 | ;last = 134 - (xexp >> 3) 63 | mov r10,r11 64 | shr r10,3 65 | sub r10,134 ;r10 = -last 66 | neg r10 ;r10 = last 67 | 68 | ;load 64 bits of 2_by_pi 69 | mov rax,[r9 + r10] 70 | 71 | ;mantissa of x = ((x << 12) >> 12) | implied bit 72 | shl rcx,12 73 | shr rcx,12 ;rcx = mantissa part of input x 74 | bts rcx,52 ;add the implied bit as well 75 | 76 | ;load next 128 bits of 2_by_pi 77 | add r10,8 ;increment to next 8 bytes of 2_by_pi 78 | movdqu xmm0,[r9 + r10] 79 | 80 | ;do three 64-bit multiplications with mant of x 81 | mul rcx 82 | mov r8,rax ;r8 = last 64 bits of mul = res1[2] 83 | mov r10,rdx ;r10 <-- carry 84 | movd rax,xmm0 85 | mul rcx 86 | ;resexp = xexp & 7 87 | and r11,7 ;r11 = resexp = xexp & 7 = last 3 bits 88 | psrldq xmm0,8 89 | add rax,r10 ; add the previous carry 90 | adc rdx,0 91 | mov r9,rax ;r9 = next 64 bits of mul = res1[1] 92 | mov r10,rdx ;r10 <-- carry 93 | movd rax,xmm0 94 | mul rcx 95 | add r10,rax ;r10 = most sig 64 bits = res1[0] 96 | 97 | ;find the region 98 | ;last three bits ltb = most sig bits >> (54 - resexp)) 99 | ; decimal point in last 18 bits ==> 8 lsb's in first 64 bits and 100 | ; 8 msb's in next 64 bits 101 | ;point_five = ltb & 01h; 102 | ;region = ((ltb >> 1) + point_five) & 3; 103 | mov rcx,54 104 | mov rax,r10 105 | sub rcx,r11 106 | xor rdx,rdx ;rdx = sign of x(i.e first part of x * 2bypi) 107 | shr rax,cl 108 | jnc L__no_point_five 109 | ;;if there is carry.. then negate the result of multiplication 110 | not r10 111 | not r9 112 | not r8 113 | mov rdx,08000000000000000h 114 | 115 | ALIGN 16 116 | L__no_point_five: 117 | adc rax,0 118 | and rax,3 119 | ; Until / unless we find a better place to save it, we're putting 120 | ; the region in xmm1. 121 | movd xmm1, rax 122 | 123 | ;calculate the number of integer bits and zero them out 124 | mov rcx,r11 125 | add rcx,10 ;rcx = no. of integer bits 126 | shl r10,cl 127 | shr r10,cl ;r10 contains only mant bits 128 | sub rcx,64 ;form the exponent 129 | mov r11,rcx 130 | 131 | ;find the highest set bit 132 | bsr rcx,r10 133 | jnz L__form_mantissa 134 | mov r10,r9 135 | mov r9,r8 136 | bsr rcx,r10 ;rcx = hsb 137 | sub r11,64 138 | 139 | ALIGN 16 140 | L__form_mantissa: 141 | add r11,rcx ;for exp of x 142 | sub rcx,52 ;rcx = no. of bits to shift in r10 143 | cmp rcx,0 144 | jl L__hsb_below_52 145 | je L__form_numbers 146 | ;hsb above 52 147 | mov r8,r10 ;previous contents of r8 not required 148 | shr r10,cl ;r10 = mantissa of x with hsb at 52 149 | jmp L__form_numbers 150 | 151 | ALIGN 16 152 | L__hsb_below_52: 153 | neg rcx 154 | mov rax,r9 155 | shl r10,cl 156 | shl r9,cl 157 | sub rcx,64 158 | neg rcx 159 | shr rax,cl 160 | or r10,rax 161 | 162 | ALIGN 16 163 | L__form_numbers: 164 | add r11,1023 165 | btr r10,52 ;remove the implied bit 166 | mov rcx,r11 167 | or r10,rdx ;put the sign 168 | shl rcx,52 169 | or r10,rcx ;x is in r10 170 | movd xmm0,r10 ; xmm0 = x 171 | movd rax, xmm1 ; rax <-- region 172 | 173 | ; At this point xmm0 has a double precision version of the fractional part 174 | ; of x * 2/pi. To get the reduced argument r, we multiply that by pi/2. 175 | mulsd xmm0,L__piby2 176 | StackDeallocate stack_size 177 | ret 178 | 179 | fname endp 180 | END 181 | -------------------------------------------------------------------------------- /remainder_piby2f_forC.asm: -------------------------------------------------------------------------------- 1 | ; 2 | ; MIT License 3 | ; ----------- 4 | ; 5 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 6 | ; 7 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 8 | ; of this Software and associated documentaon files (the "Software"), to deal 9 | ; in the Software without restriction, including without limitation the rights 10 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | ; copies of the Software, and to permit persons to whom the Software is 12 | ; furnished to do so, subject to the following conditions: 13 | ; 14 | ; The above copyright notice and this permission notice shall be included in 15 | ; all copies or substantial portions of the Software. 16 | ; 17 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | ; THE SOFTWARE. 24 | ; 25 | ; An implementation of the remainder by pi/2 function 26 | ; This is a service routine for use by trig functions coded in C 27 | ; 28 | 29 | fname TEXTEQU <__remainder_piby2d2f_forC> 30 | 31 | save_rdi EQU 20h 32 | save_rsi EQU 30h 33 | stack_size EQU 088h 34 | include fm.inc 35 | 36 | .code 37 | PUBLIC fname 38 | fname PROC FRAME 39 | StackAllocate stack_size 40 | SaveReg rdi,save_rdi 41 | SaveReg rsi,save_rsi 42 | .ENDPROLOG 43 | 44 | mov rdi, rcx 45 | mov rsi, rdx 46 | mov rdx, r8 47 | 48 | ;get the unbiased exponent and the mantissa part of x 49 | movd xmm0,rdi 50 | lea r9,L__2_by_pi_bits 51 | 52 | ;xexp = (x >> 52) - 1023 53 | movd r11,xmm0 54 | mov rcx,r11 55 | shr r11,52 56 | sub r11,1023 ;r11 = xexp = exponent of input x 57 | 58 | ;calculate the last byte from which to start multiplication 59 | ;last = 134 - (xexp >> 3) 60 | mov r10,r11 61 | shr r10,3 62 | sub r10,134 ;r10 = -last 63 | neg r10 ;r10 = last 64 | 65 | ;load 64 bits of 2_by_pi 66 | mov rax,[r9 + r10] 67 | mov rdi,rdx ; save address of region since mul modifies rdx 68 | 69 | ;mantissa of x = ((x << 12) >> 12) | implied bit 70 | shl rcx,12 71 | shr rcx,12 ;rcx = mantissa part of input x 72 | bts rcx,52 ;add the implied bit as well 73 | 74 | ;load next 128 bits of 2_by_pi 75 | add r10,8 ;increment to next 8 bytes of 2_by_pi 76 | movdqu xmm0,[r9 + r10] 77 | 78 | ;do three 64-bit multiplications with mant of x 79 | mul rcx 80 | mov r8,rax ;r8 = last 64 bits of multiplication = res1[2] 81 | mov r10,rdx ;r10 = carry 82 | movd rax,xmm0 83 | mul rcx 84 | ;resexp = xexp & 7 85 | and r11,7 ;r11 = resexp = xexp & 7 = last 3 bits 86 | psrldq xmm0,8 87 | add rax,r10 ; add the previous carry 88 | adc rdx,0 89 | mov r9,rax ;r9 = next 64 bits of multiplication = res1[1] 90 | mov r10,rdx ;r10 = carry 91 | movd rax,xmm0 92 | mul rcx 93 | add r10,rax ;r10 = most significant 64 bits = res1[0] 94 | 95 | ;find the region 96 | ;last three bits ltb = most sig bits >> (54 - resexp)); decimal point in last 18 bits == 8 lsb's in first 64 bits and 8 msb's in next 64 bits 97 | ;point_five = ltb & 01h; 98 | ;region = ((ltb >> 1) + point_five) & 3; 99 | mov rcx,54 100 | mov rax,r10 101 | sub rcx,r11 102 | xor rdx,rdx ;rdx = sign of x(i.e first part of x * 2bypi) 103 | shr rax,cl 104 | jnc L__no_point_five 105 | ;;if there is carry.. then negate the result of multiplication 106 | not r10 107 | not r9 108 | not r8 109 | mov rdx,08000000000000000h 110 | 111 | ALIGN 16 112 | L__no_point_five: 113 | adc rax,0 114 | and rax,3 115 | mov DWORD PTR[rdi],eax ;store region to memory 116 | 117 | ;calculate the number of integer bits and zero them out 118 | mov rcx,r11 119 | add rcx,10 ;rcx = no. of integer bits 120 | shl r10,cl 121 | shr r10,cl ;r10 contains only mant bits 122 | sub rcx,64 ;form the exponent 123 | mov r11,rcx 124 | 125 | ;find the highest set bit 126 | bsr rcx,r10 127 | jnz L__form_mantissa 128 | mov r10,r9 129 | mov r9,r8 130 | bsr rcx,r10 ;rcx = hsb 131 | sub r11,64 132 | 133 | 134 | ALIGN 16 135 | L__form_mantissa: 136 | add r11,rcx ;for exp of x 137 | sub rcx,52 ;rcx = no. of bits to shift in r10 138 | cmp rcx,0 139 | jl L__hsb_below_52 140 | je L__form_numbers 141 | ;hsb above 52 142 | mov r8,r10 ;previous contents of r8 not required 143 | shr r10,cl ;r10 = mantissa of x with hsb at 52 144 | jmp L__form_numbers 145 | 146 | ALIGN 16 147 | L__hsb_below_52: 148 | neg rcx 149 | mov rax,r9 150 | shl r10,cl 151 | shl r9,cl 152 | sub rcx,64 153 | neg rcx 154 | shr rax,cl 155 | or r10,rax 156 | 157 | ALIGN 16 158 | L__form_numbers: 159 | add r11,1023 160 | btr r10,52 ;remove the implied bit 161 | mov rcx,r11 162 | or r10,rdx ;put the sign 163 | shl rcx,52 164 | or r10,rcx ;x is in r10 165 | 166 | movd xmm0,r10 ;xmm0 = x 167 | mulsd xmm0,L__piby2 168 | movsd QWORD PTR[rsi],xmm0 169 | RestoreReg rsi,save_rsi 170 | RestoreReg rdi,save_rdi 171 | StackDeallocate stack_size 172 | ret 173 | 174 | fname endp 175 | 176 | .const 177 | ALIGN 16 178 | L__piby2 DQ 03ff921fb54442d18h 179 | 180 | ALIGN 16 181 | L__2_by_pi_bits DB 224 182 | DB 241 183 | DB 27 184 | DB 193 185 | DB 12 186 | DB 88 187 | DB 33 188 | DB 116 189 | DB 53 190 | DB 126 191 | DB 196 192 | DB 126 193 | DB 237 194 | DB 175 195 | DB 169 196 | DB 75 197 | DB 74 198 | DB 41 199 | DB 222 200 | DB 231 201 | DB 28 202 | DB 244 203 | DB 236 204 | DB 197 205 | DB 151 206 | DB 175 207 | DB 31 208 | DB 235 209 | DB 158 210 | DB 212 211 | DB 181 212 | DB 168 213 | DB 127 214 | DB 121 215 | DB 154 216 | DB 253 217 | DB 24 218 | DB 61 219 | DB 221 220 | DB 38 221 | DB 44 222 | DB 159 223 | DB 60 224 | DB 251 225 | DB 217 226 | DB 180 227 | DB 125 228 | DB 180 229 | DB 41 230 | DB 104 231 | DB 45 232 | DB 70 233 | DB 188 234 | DB 188 235 | DB 63 236 | DB 96 237 | DB 22 238 | DB 120 239 | DB 255 240 | DB 95 241 | DB 226 242 | DB 127 243 | DB 236 244 | DB 160 245 | DB 228 246 | DB 247 247 | DB 46 248 | DB 126 249 | DB 17 250 | DB 114 251 | DB 210 252 | DB 231 253 | DB 76 254 | DB 13 255 | DB 230 256 | DB 88 257 | DB 71 258 | DB 230 259 | DB 4 260 | DB 249 261 | DB 125 262 | DB 209 263 | DB 154 264 | DB 192 265 | DB 113 266 | DB 166 267 | DB 19 268 | DB 18 269 | DB 237 270 | DB 186 271 | DB 212 272 | DB 215 273 | DB 8 274 | DB 162 275 | DB 251 276 | DB 156 277 | DB 166 278 | DB 196 279 | DB 114 280 | DB 172 281 | DB 119 282 | DB 248 283 | DB 115 284 | DB 72 285 | DB 70 286 | DB 39 287 | DB 168 288 | DB 187 289 | DB 36 290 | DB 25 291 | DB 128 292 | DB 75 293 | DB 55 294 | DB 9 295 | DB 233 296 | DB 184 297 | DB 145 298 | DB 220 299 | DB 134 300 | DB 21 301 | DB 239 302 | DB 122 303 | DB 175 304 | DB 142 305 | DB 69 306 | DB 249 307 | DB 7 308 | DB 65 309 | DB 14 310 | DB 241 311 | DB 100 312 | DB 86 313 | DB 138 314 | DB 109 315 | DB 3 316 | DB 119 317 | DB 211 318 | DB 212 319 | DB 71 320 | DB 95 321 | DB 157 322 | DB 240 323 | DB 167 324 | DB 84 325 | DB 16 326 | DB 57 327 | DB 185 328 | DB 13 329 | DB 230 330 | DB 139 331 | DB 2 332 | DB 0 333 | DB 0 334 | DB 0 335 | DB 0 336 | DB 0 337 | DB 0 338 | DB 0 339 | 340 | END 341 | 342 | -------------------------------------------------------------------------------- /remainderf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_NANF_WITH_FLAGS 31 | #define USE_SCALEDOUBLE_1 32 | #define USE_GET_FPSW_INLINE 33 | #define USE_SET_FPSW_INLINE 34 | #define USE_HANDLE_ERRORF 35 | #include "libm_inlines.h" 36 | #undef USE_NANF_WITH_FLAGS 37 | #undef USE_SCALEDOUBLE_1 38 | #undef USE_GET_FPSW_INLINE 39 | #undef USE_SET_FPSW_INLINE 40 | #undef USE_HANDLE_ERRORF 41 | 42 | #if !defined(_CRTBLD_C9X) 43 | #define _CRTBLD_C9X 44 | #endif 45 | 46 | #include "libm_errno.h" 47 | 48 | // Disable "C4163: not available as intrinsic function" warning that older 49 | // compilers may issue here. 50 | #pragma warning(disable:4163) 51 | #pragma function(remainderf,fmodf) 52 | 53 | 54 | #undef _FUNCNAME 55 | #if defined(COMPILING_FMOD) 56 | float fmodf(float x, float y) 57 | #define _FUNCNAME "fmodf" 58 | #define _OPERATION OP_FMOD 59 | #else 60 | float remainderf(float x, float y) 61 | #define _FUNCNAME "remainderf" 62 | #define _OPERATION OP_REM 63 | #endif 64 | { 65 | double dx, dy, scale, w, t; 66 | int i, ntimes, xexp, yexp; 67 | unsigned long ux, uy, ax, ay; 68 | 69 | unsigned int sw; 70 | 71 | dx = x; 72 | dy = y; 73 | 74 | 75 | GET_BITS_DP64(dx, ux); 76 | GET_BITS_DP64(dy, uy); 77 | ax = ux & ~SIGNBIT_DP64; 78 | ay = uy & ~SIGNBIT_DP64; 79 | xexp = (int)((ux & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); 80 | yexp = (int)((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); 81 | 82 | if (xexp < 1 || xexp > BIASEDEMAX_DP64 || 83 | yexp < 1 || yexp > BIASEDEMAX_DP64) 84 | { 85 | /* x or y is zero, NaN or infinity (neither x nor y can be 86 | denormalized because we promoted from float to double) */ 87 | if (xexp > BIASEDEMAX_DP64) 88 | { 89 | /* x is NaN or infinity */ 90 | if (ux & MANTBITS_DP64) 91 | { 92 | /* x is NaN */ 93 | unsigned int ufx; 94 | GET_BITS_SP32(x, ufx); 95 | return _handle_errorf(_FUNCNAME, _OPERATION, ufx|0x00400000, _DOMAIN, 0, 96 | EDOM, x, y, 2); 97 | } 98 | else 99 | { 100 | /* x is infinity; result is NaN */ 101 | return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN, 102 | AMD_F_INVALID, EDOM, x, y, 2); 103 | } 104 | } 105 | else if (yexp > BIASEDEMAX_DP64) 106 | { 107 | /* y is NaN or infinity */ 108 | if (uy & MANTBITS_DP64) 109 | { 110 | /* y is NaN */ 111 | unsigned int ufy; 112 | GET_BITS_SP32(y, ufy); 113 | return _handle_errorf(_FUNCNAME, _OPERATION, ufy|0x00400000, _DOMAIN, 0, 114 | EDOM, x, y, 2); 115 | } 116 | else 117 | { 118 | #ifdef _CRTBLD_C9X 119 | /* C99 return for y = +-inf is x */ 120 | return x; 121 | #else 122 | /* y is infinity; result is indefinite */ 123 | return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN, 124 | AMD_F_INVALID, EDOM, x, y, 2); 125 | #endif 126 | } 127 | } 128 | else if (xexp < 1) 129 | { 130 | /* x must be zero (cannot be denormalized) */ 131 | if (yexp < 1) 132 | { 133 | /* y must be zero (cannot be denormalized) */ 134 | return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN, 135 | AMD_F_INVALID, EDOM, x, y, 2); 136 | } 137 | else 138 | /* C99 return for x = 0 must preserve sign */ 139 | return x; 140 | } 141 | else 142 | { 143 | /* y must be zero */ 144 | return _handle_errorf(_FUNCNAME, _OPERATION, INDEFBITPATT_SP32, _DOMAIN, 145 | AMD_F_INVALID, EDOM, x, y, 2); 146 | } 147 | } 148 | else if (ax == ay) 149 | { 150 | /* abs(x) == abs(y); return zero with the sign of x */ 151 | PUT_BITS_DP64(ux & SIGNBIT_DP64, dx); 152 | return (float)dx; 153 | } 154 | 155 | /* Set dx = abs(x), dy = abs(y) */ 156 | PUT_BITS_DP64(ax, dx); 157 | PUT_BITS_DP64(ay, dy); 158 | 159 | if (ax < ay) 160 | { 161 | /* abs(x) < abs(y) */ 162 | #if !defined(COMPILING_FMOD) 163 | if (dx > 0.5*dy) 164 | dx -= dy; 165 | #endif 166 | return (float)(x < 0.0? -dx : dx); 167 | } 168 | 169 | /* Save the current floating-point status word. We need 170 | to do this because the remainder function is always 171 | exact for finite arguments, but our algorithm causes 172 | the inexact flag to be raised. We therefore need to 173 | restore the entry status before exiting. */ 174 | sw = get_fpsw_inline(); 175 | 176 | /* Set ntimes to the number of times we need to do a 177 | partial remainder. If the exponent of x is an exact multiple 178 | of 24 larger than the exponent of y, and the mantissa of x is 179 | less than the mantissa of y, ntimes will be one too large 180 | but it doesn't matter - it just means that we'll go round 181 | the loop below one extra time. */ 182 | if (xexp <= yexp) 183 | { 184 | ntimes = 0; 185 | w = dy; 186 | scale = 1.0; 187 | } 188 | else 189 | { 190 | ntimes = (xexp - yexp) / 24; 191 | 192 | /* Set w = y * 2^(24*ntimes) */ 193 | PUT_BITS_DP64((unsigned long)(ntimes * 24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, 194 | scale); 195 | w = scale * dy; 196 | /* Set scale = 2^(-24) */ 197 | PUT_BITS_DP64((unsigned long)(-24 + EXPBIAS_DP64) << EXPSHIFTBITS_DP64, 198 | scale); 199 | } 200 | 201 | 202 | /* Each time round the loop we compute a partial remainder. 203 | This is done by subtracting a large multiple of w 204 | from x each time, where w is a scaled up version of y. 205 | The subtraction can be performed exactly when performed 206 | in double precision, and the result at each stage can 207 | fit exactly in a single precision number. */ 208 | for (i = 0; i < ntimes; i++) 209 | { 210 | /* t is the integer multiple of w that we will subtract. 211 | We use a truncated value for t. */ 212 | t = (double)((int)(dx / w)); 213 | dx -= w * t; 214 | /* Scale w down by 2^(-24) for the next iteration */ 215 | w *= scale; 216 | } 217 | 218 | /* One more time */ 219 | #if defined(COMPILING_FMOD) 220 | t = (double)((int)(dx / w)); 221 | dx -= w * t; 222 | #else 223 | { 224 | unsigned int todd; 225 | /* Variable todd says whether the integer t is odd or not */ 226 | t = (double)((int)(dx / w)); 227 | todd = ((int)(dx / w)) & 1; 228 | dx -= w * t; 229 | 230 | /* At this point, dx lies in the range [0,dy) */ 231 | /* For the remainder function, we need to adjust dx 232 | so that it lies in the range (-y/2, y/2] by carefully 233 | subtracting w (== dy == y) if necessary. */ 234 | if (dx > 0.5 * w || ((dx == 0.5 * w) && todd)) 235 | dx -= w; 236 | } 237 | #endif 238 | 239 | /* **** N.B. for some reason this breaks the 32 bit version 240 | of remainder when compiling with optimization. */ 241 | /* Restore the entry status flags */ 242 | set_fpsw_inline(sw); 243 | 244 | /* Set the result sign according to input argument x */ 245 | return (float)(x < 0.0? -dx : dx); 246 | 247 | } 248 | -------------------------------------------------------------------------------- /sincos_special.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | #include "libm_new.h" 34 | 35 | double _sincos_special(double x, char *name, unsigned int operation) 36 | { 37 | UT64 xu; 38 | unsigned int is_snan; 39 | 40 | xu.f64 = x; 41 | 42 | if((xu.u64 & INF_POS_64) == INF_POS_64) 43 | { 44 | // x is Inf or NaN 45 | if((xu.u64 & MANTISSA_MASK_64) == 0x0) 46 | { 47 | // x is Inf 48 | xu.u64 = IND_64; 49 | _handle_error(name, operation, xu.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0, 1); 50 | } 51 | else 52 | { 53 | // x is NaN 54 | is_snan = (((xu.u64 & QNAN_MASK_64) == QNAN_MASK_64) ? 0 : 1); 55 | if(is_snan) 56 | { 57 | xu.u64 |= QNAN_MASK_64; 58 | } 59 | _handle_error(name, operation, xu.u64, _DOMAIN, 0, EDOM, x, 0, 1); 60 | } 61 | } 62 | 63 | return xu.f64; 64 | } 65 | 66 | float _sincosf_special(float x, char *name, unsigned int operation) 67 | { 68 | UT64 xu; 69 | unsigned int is_snan; 70 | 71 | xu.u64 = 0; 72 | xu.f32[0] = x; 73 | 74 | if((xu.u32[0] & INF_POS_32) == INF_POS_32) 75 | { 76 | // x is Inf or NaN 77 | if((xu.u32[0] & MANTISSA_MASK_32) == 0x0) 78 | { 79 | // x is Inf 80 | xu.u32[0] = IND_32; 81 | _handle_errorf(name, operation, xu.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0, 1); 82 | } 83 | else 84 | { 85 | // x is NaN 86 | is_snan = (((xu.u32[0] & QNAN_MASK_32) == QNAN_MASK_32) ? 0 : 1); 87 | if(is_snan) 88 | { 89 | xu.u32[0] |= QNAN_SET_32; 90 | _handle_errorf(name, operation, xu.u64, _DOMAIN, AMD_F_INVALID, EDOM, x, 0, 1); 91 | } 92 | else 93 | { 94 | _handle_errorf(name, operation, xu.u64, _DOMAIN, 0, EDOM, x, 0, 1); 95 | } 96 | } 97 | } 98 | 99 | return xu.f32[0]; 100 | } 101 | 102 | float _sinf_special(float x) 103 | { 104 | return _sincosf_special(x, "sinf", _FpCodeSin); 105 | } 106 | 107 | double _sin_special(double x) 108 | { 109 | return _sincos_special(x, "sin", _FpCodeSin); 110 | } 111 | 112 | float _cosf_special(float x) 113 | { 114 | return _sincosf_special(x, "cosf", _FpCodeCos); 115 | } 116 | 117 | double _cos_special(double x) 118 | { 119 | return _sincos_special(x, "cos", _FpCodeCos); 120 | } 121 | 122 | double _tan_special(double x) 123 | { 124 | return _sincos_special(x, "tan",_FpCodeTan); 125 | } 126 | 127 | float _tanf_special(float x) 128 | { 129 | return _sincosf_special(x, "tanf",_FpCodeTan); 130 | } 131 | -------------------------------------------------------------------------------- /sqrt.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #if USE_SOFTWARE_SQRT 31 | #define USE_SQRT_AMD_INLINE 32 | #endif 33 | #define USE_NAN_WITH_FLAGS 34 | #define USE_HANDLE_ERROR 35 | #include "libm_inlines.h" 36 | #if USE_SOFTWARE_SQRT 37 | #undef USE_SQRT_AMD_INLINE 38 | #endif 39 | #undef USE_NAN_WITH_FLAGS 40 | #undef USE_HANDLE_ERROR 41 | 42 | #include "libm_errno.h" 43 | 44 | #pragma function(sqrt) 45 | 46 | double sqrt(double x) 47 | { 48 | #if USE_SOFTWARE_SQRT 49 | return sqrt_amd_inline(x); 50 | #else 51 | double r; 52 | unsigned long ux; 53 | GET_BITS_DP64(x, ux); 54 | 55 | /* Check for special cases for Microsoft error handling */ 56 | if ((ux & PINFBITPATT_DP64) == PINFBITPATT_DP64) 57 | { 58 | /* x is infinity, or NaN */ 59 | if (ux & MANTBITS_DP64) 60 | { 61 | /* NaN of some sort */ 62 | /* If it's a signaling NaN, convert to QNaN */ 63 | return _handle_error("sqrt", OP_SQRT, ux|0x0008000000000000, 64 | _DOMAIN, 0,EDOM, x, 0.0, 1); 65 | } 66 | else 67 | { 68 | /* +/-infinity */ 69 | if (ux & SIGNBIT_DP64) 70 | { 71 | /* - infinity */ 72 | return _handle_error("sqrt", OP_SQRT, INDEFBITPATT_DP64, 73 | _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 74 | } 75 | /* positive infinite is not a problem */ 76 | } 77 | } 78 | if ((ux & SIGNBIT_DP64)&&(ux & ~SIGNBIT_DP64)) /* if x < zero */ 79 | { 80 | return _handle_error("sqrt", OP_SQRT, INDEFBITPATT_DP64, 81 | _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0, 1); 82 | } 83 | 84 | /* VC++ intrinsic call */ 85 | _mm_store_sd(&r, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&x))); 86 | return r; 87 | #endif 88 | } 89 | -------------------------------------------------------------------------------- /sqrtf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #if USE_SOFTWARE_SQRT 31 | #define USE_SQRTF_AMD_INLINE 32 | #endif 33 | #define USE_NANF_WITH_FLAGS 34 | #define USE_HANDLE_ERRORF 35 | #include "libm_inlines.h" 36 | #if USE_SOFTWARE_SQRT 37 | #undef USE_SQRTF_AMD_INLINE 38 | #endif 39 | #undef USE_NANF_WITH_FLAGS 40 | #undef USE_HANDLE_ERRORF 41 | 42 | #include "libm_errno.h" 43 | 44 | // Disable "C4163: not available as intrinsic function" warning that older 45 | // compilers may issue here. 46 | #pragma warning(disable:4163) 47 | #pragma function(sqrtf) 48 | 49 | 50 | float sqrtf(float x) 51 | { 52 | #if USE_SOFTWARE_SQRT 53 | return sqrtf_amd_inline(x); 54 | #else 55 | float r; 56 | unsigned int ux; 57 | GET_BITS_SP32(x, ux); 58 | /* Check for special cases for Microsoft error handling */ 59 | if ((ux & PINFBITPATT_SP32) == PINFBITPATT_SP32) 60 | { 61 | /* x is infinity, or NaN */ 62 | if (ux & MANTBITS_SP32) 63 | { 64 | /* NaN of some sort */ 65 | /* If it's a signaling NaN, convert to QNaN */ 66 | return _handle_errorf("sqrtf", OP_SQRT, ux|0x00400000, _DOMAIN, 0, 67 | EDOM, x, 0.0F, 1); 68 | } 69 | else 70 | { 71 | /* +/-infinity */ 72 | if (ux & SIGNBIT_SP32) 73 | { 74 | /* - infinity */ 75 | return _handle_errorf("sqrtf", OP_SQRT, INDEFBITPATT_SP32, 76 | _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0F, 1); 77 | } 78 | /* positive infinite is not a problem */ 79 | } 80 | } 81 | if ((ux & SIGNBIT_SP32)&&(ux & ~SIGNBIT_SP32)) /* if x < zero */ 82 | { 83 | return _handle_errorf("sqrtf", OP_SQRT, INDEFBITPATT_SP32, 84 | _DOMAIN, AMD_F_INVALID, EDOM, x, 0.0F, 1); 85 | } 86 | 87 | /* VC++ intrinsic call */ 88 | _mm_store_ss(&r, _mm_sqrt_ss(_mm_load_ss(&x))); 89 | return r; 90 | #endif 91 | } 92 | -------------------------------------------------------------------------------- /tan.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_NAN_WITH_FLAGS 31 | #define USE_VAL_WITH_FLAGS 32 | #define USE_HANDLE_ERROR 33 | #include "libm_inlines.h" 34 | #undef USE_NAN_WITH_FLAGS 35 | #undef USE_VAL_WITH_FLAGS 36 | #undef USE_HANDLE_ERROR 37 | 38 | #include "libm_errno.h" 39 | 40 | /* tan(x + xx) approximation valid on the interval [-pi/4,pi/4]. 41 | If recip is true return -1/tan(x + xx) instead. */ 42 | static inline double tan_piby4(double x, double xx, int recip) 43 | { 44 | double r, t1, t2, xl; 45 | int transform = 0; 46 | static const double 47 | piby4_lead = 7.85398163397448278999e-01, /* 0x3fe921fb54442d18 */ 48 | piby4_tail = 3.06161699786838240164e-17; /* 0x3c81a62633145c06 */ 49 | 50 | /* In order to maintain relative precision transform using the identity: 51 | tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4. 52 | Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4. */ 53 | 54 | if (x > 0.68) 55 | { 56 | transform = 1; 57 | x = piby4_lead - x; 58 | xl = piby4_tail - xx; 59 | x += xl; 60 | xx = 0.0; 61 | } 62 | else if (x < -0.68) 63 | { 64 | transform = -1; 65 | x = piby4_lead + x; 66 | xl = piby4_tail + xx; 67 | x += xl; 68 | xx = 0.0; 69 | } 70 | 71 | /* Core Remez [2,3] approximation to tan(x+xx) on the 72 | interval [0,0.68]. */ 73 | 74 | r = x*x + 2.0 * x * xx; 75 | t1 = x; 76 | t2 = xx + x*r* 77 | (0.372379159759792203640806338901e0 + 78 | (-0.229345080057565662883358588111e-1 + 79 | 0.224044448537022097264602535574e-3*r)*r)/ 80 | (0.111713747927937668539901657944e1 + 81 | (-0.515658515729031149329237816945e0 + 82 | (0.260656620398645407524064091208e-1 - 83 | 0.232371494088563558304549252913e-3*r)*r)*r); 84 | 85 | /* Reconstruct tan(x) in the transformed case. */ 86 | 87 | if (transform) 88 | { 89 | double t; 90 | t = t1 + t2; 91 | if (recip) 92 | return transform*(2*t/(t-1) - 1.0); 93 | else 94 | return transform*(1.0 - 2*t/(1+t)); 95 | } 96 | 97 | if (recip) 98 | { 99 | /* Compute -1.0/(t1 + t2) accurately */ 100 | double trec, trec_top, z1, z2, t; 101 | unsigned long u; 102 | t = t1 + t2; 103 | GET_BITS_DP64(t, u); 104 | u &= 0xffffffff00000000; 105 | PUT_BITS_DP64(u, z1); 106 | z2 = t2 - (z1 - t1); 107 | trec = -1.0 / t; 108 | GET_BITS_DP64(trec, u); 109 | u &= 0xffffffff00000000; 110 | PUT_BITS_DP64(u, trec_top); 111 | return trec_top + trec * ((1.0 + trec_top * z1) + trec_top * z2); 112 | 113 | } 114 | else 115 | return t1 + t2; 116 | } 117 | 118 | #pragma function(tan) 119 | 120 | double tan(double x) 121 | { 122 | double r, rr; 123 | int region, xneg; 124 | 125 | unsigned long ux, ax; 126 | GET_BITS_DP64(x, ux); 127 | ax = (ux & ~SIGNBIT_DP64); 128 | if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ 129 | { 130 | if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ 131 | { 132 | if (ax < 0x3e40000000000000) /* abs(x) < 2.0^(-27) */ 133 | { 134 | if (ax == 0x0000000000000000) return x; 135 | else return val_with_flags(x, AMD_F_INEXACT); 136 | } 137 | else 138 | { 139 | /* Using a temporary variable prevents 64-bit VC++ from 140 | rearranging 141 | x + x*x*x*0.333333333333333333; 142 | into 143 | x * (1 + x*x*0.333333333333333333); 144 | The latter results in an incorrectly rounded answer. */ 145 | double tmp; 146 | tmp = x*x*x*0.333333333333333333; 147 | return x + tmp; 148 | } 149 | } 150 | else 151 | return tan_piby4(x, 0.0, 0); 152 | } 153 | else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) 154 | { 155 | /* x is either NaN or infinity */ 156 | if (ux & MANTBITS_DP64) 157 | /* x is NaN */ 158 | return _handle_error("tan", OP_TAN, ux|0x0008000000000000, _DOMAIN, 0, 159 | EDOM, x, 0.0, 1); 160 | else 161 | /* x is infinity. Return a NaN */ 162 | return _handle_error("tan", OP_TAN, INDEFBITPATT_DP64, _DOMAIN, AMD_F_INVALID, 163 | EDOM, x, 0.0, 1); 164 | } 165 | xneg = (ax != ux); 166 | 167 | 168 | if (xneg) 169 | x = -x; 170 | 171 | if (x < 5.0e5) 172 | { 173 | /* For these size arguments we can just carefully subtract the 174 | appropriate multiple of pi/2, using extra precision where 175 | x is close to an exact multiple of pi/2 */ 176 | static const double 177 | twobypi = 6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */ 178 | piby2_1 = 1.57079632673412561417e+00, /* 0x3ff921fb54400000 */ 179 | piby2_1tail = 6.07710050650619224932e-11, /* 0x3dd0b4611a626331 */ 180 | piby2_2 = 6.07710050630396597660e-11, /* 0x3dd0b4611a600000 */ 181 | piby2_2tail = 2.02226624879595063154e-21, /* 0x3ba3198a2e037073 */ 182 | piby2_3 = 2.02226624871116645580e-21, /* 0x3ba3198a2e000000 */ 183 | piby2_3tail = 8.47842766036889956997e-32; /* 0x397b839a252049c1 */ 184 | double t, rhead, rtail; 185 | int npi2; 186 | unsigned long uy, xexp, expdiff; 187 | xexp = ax >> EXPSHIFTBITS_DP64; 188 | /* How many pi/2 is x a multiple of? */ 189 | if (ax <= 0x400f6a7a2955385e) /* 5pi/4 */ 190 | { 191 | if (ax <= 0x4002d97c7f3321d2) /* 3pi/4 */ 192 | npi2 = 1; 193 | else 194 | npi2 = 2; 195 | } 196 | else if (ax <= 0x401c463abeccb2bb) /* 9pi/4 */ 197 | { 198 | if (ax <= 0x4015fdbbe9bba775) /* 7pi/4 */ 199 | npi2 = 3; 200 | else 201 | npi2 = 4; 202 | } 203 | else 204 | npi2 = (int)(x * twobypi + 0.5); 205 | /* Subtract the multiple from x to get an extra-precision remainder */ 206 | rhead = x - npi2 * piby2_1; 207 | rtail = npi2 * piby2_1tail; 208 | GET_BITS_DP64(rhead, uy); 209 | expdiff = xexp - ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); 210 | if (expdiff > 15) 211 | { 212 | /* The remainder is pretty small compared with x, which 213 | implies that x is a near multiple of pi/2 214 | (x matches the multiple to at least 15 bits) */ 215 | t = rhead; 216 | rtail = npi2 * piby2_2; 217 | rhead = t - rtail; 218 | rtail = npi2 * piby2_2tail - ((t - rhead) - rtail); 219 | if (expdiff > 48) 220 | { 221 | /* x matches a pi/2 multiple to at least 48 bits */ 222 | t = rhead; 223 | rtail = npi2 * piby2_3; 224 | rhead = t - rtail; 225 | rtail = npi2 * piby2_3tail - ((t - rhead) - rtail); 226 | } 227 | } 228 | r = rhead - rtail; 229 | rr = (rhead - r) - rtail; 230 | region = npi2 & 3; 231 | } 232 | else 233 | { 234 | /* Reduce x into range [-pi/4,pi/4] */ 235 | __remainder_piby2(x, &r, &rr, ®ion); 236 | } 237 | 238 | if (xneg) 239 | return -tan_piby4(r, rr, region & 1); 240 | else 241 | return tan_piby4(r, rr, region & 1); 242 | } 243 | -------------------------------------------------------------------------------- /tanf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_REMAINDER_PIBY2F_INLINE 31 | #define USE_VALF_WITH_FLAGS 32 | #define USE_NANF_WITH_FLAGS 33 | #define USE_HANDLE_ERRORF 34 | #include "libm_inlines.h" 35 | #undef USE_VALF_WITH_FLAGS 36 | #undef USE_NANF_WITH_FLAGS 37 | #undef USE_REMAINDER_PIBY2F_INLINE 38 | #undef USE_HANDLE_ERRORF 39 | 40 | #include "libm_errno.h" 41 | 42 | // Disable "C4163: not available as intrinsic function" warning that older 43 | // compilers may issue here. 44 | #pragma warning(disable:4163) 45 | #pragma function(tanf) 46 | 47 | /* tan(x) approximation valid on the interval [-pi/4,pi/4]. 48 | If recip is true return -1/tan(x) instead. */ 49 | static inline double tanf_piby4(double x, int recip) 50 | { 51 | double r, t; 52 | 53 | /* Core Remez [1,2] approximation to tan(x) on the 54 | interval [0,pi/4]. */ 55 | r = x*x; 56 | t = x + x*r* 57 | (0.385296071263995406715129e0 - 58 | 0.172032480471481694693109e-1 * r) / 59 | (0.115588821434688393452299e+1 + 60 | (-0.51396505478854532132342e0 + 61 | 0.1844239256901656082986661e-1 * r) * r); 62 | 63 | if (recip) 64 | return -1.0 / t; 65 | else 66 | return t; 67 | } 68 | 69 | 70 | float tanf(float x) 71 | { 72 | double r, dx; 73 | int region, xneg; 74 | 75 | unsigned long ux, ax; 76 | 77 | dx = x; 78 | 79 | GET_BITS_DP64(dx, ux); 80 | ax = (ux & ~SIGNBIT_DP64); 81 | 82 | if (ax <= 0x3fe921fb54442d18) /* abs(x) <= pi/4 */ 83 | { 84 | if (ax < 0x3f80000000000000) /* abs(x) < 2.0^(-7) */ 85 | { 86 | if (ax < 0x3f20000000000000) /* abs(x) < 2.0^(-13) */ 87 | { 88 | if (ax == 0x0000000000000000) 89 | return x; 90 | else 91 | return valf_with_flags(x, AMD_F_INEXACT); 92 | } 93 | else 94 | return (float)(dx + dx*dx*dx*0.333333333333333333); 95 | } 96 | else 97 | return (float)tanf_piby4(x, 0); 98 | } 99 | else if ((ux & EXPBITS_DP64) == EXPBITS_DP64) 100 | { 101 | /* x is either NaN or infinity */ 102 | if (ux & MANTBITS_DP64) 103 | { 104 | /* x is NaN */ 105 | unsigned int ufx; 106 | GET_BITS_SP32(x, ufx); 107 | return _handle_errorf("tanf", OP_TAN, ufx|0x00400000, _DOMAIN, 0, 108 | EDOM, x, 0.0F, 1); 109 | } 110 | else 111 | { 112 | /* x is infinity. Return a NaN */ 113 | return _handle_errorf("tanf", OP_TAN, INDEFBITPATT_SP32, _DOMAIN, AMD_F_INVALID, 114 | EDOM, x, 0.0F, 1); 115 | } 116 | } 117 | 118 | xneg = (int)(ux >> 63); 119 | 120 | if (xneg) 121 | dx = -dx; 122 | 123 | if (dx < 5.0e5) 124 | { 125 | /* For these size arguments we can just carefully subtract the 126 | appropriate multiple of pi/2, using extra precision where 127 | dx is close to an exact multiple of pi/2 */ 128 | static const double 129 | twobypi = 6.36619772367581382433e-01, /* 0x3fe45f306dc9c883 */ 130 | piby2_1 = 1.57079632673412561417e+00, /* 0x3ff921fb54400000 */ 131 | piby2_1tail = 6.07710050650619224932e-11, /* 0x3dd0b4611a626331 */ 132 | piby2_2 = 6.07710050630396597660e-11, /* 0x3dd0b4611a600000 */ 133 | piby2_2tail = 2.02226624879595063154e-21, /* 0x3ba3198a2e037073 */ 134 | piby2_3 = 2.02226624871116645580e-21, /* 0x3ba3198a2e000000 */ 135 | piby2_3tail = 8.47842766036889956997e-32; /* 0x397b839a252049c1 */ 136 | double t, rhead, rtail; 137 | int npi2; 138 | unsigned long uy, xexp, expdiff; 139 | xexp = ax >> EXPSHIFTBITS_DP64; 140 | /* How many pi/2 is dx a multiple of? */ 141 | if (ax <= 0x400f6a7a2955385e) /* 5pi/4 */ 142 | { 143 | if (ax <= 0x4002d97c7f3321d2) /* 3pi/4 */ 144 | npi2 = 1; 145 | else 146 | npi2 = 2; 147 | } 148 | else if (ax <= 0x401c463abeccb2bb) /* 9pi/4 */ 149 | { 150 | if (ax <= 0x4015fdbbe9bba775) /* 7pi/4 */ 151 | npi2 = 3; 152 | else 153 | npi2 = 4; 154 | } 155 | else 156 | npi2 = (int)(dx * twobypi + 0.5); 157 | /* Subtract the multiple from dx to get an extra-precision remainder */ 158 | rhead = dx - npi2 * piby2_1; 159 | rtail = npi2 * piby2_1tail; 160 | GET_BITS_DP64(rhead, uy); 161 | expdiff = xexp - ((uy & EXPBITS_DP64) >> EXPSHIFTBITS_DP64); 162 | if (expdiff > 15) 163 | { 164 | /* The remainder is pretty small compared with dx, which 165 | implies that dx is a near multiple of pi/2 166 | (dx matches the multiple to at least 15 bits) */ 167 | t = rhead; 168 | rtail = npi2 * piby2_2; 169 | rhead = t - rtail; 170 | rtail = npi2 * piby2_2tail - ((t - rhead) - rtail); 171 | if (expdiff > 48) 172 | { 173 | /* dx matches a pi/2 multiple to at least 48 bits */ 174 | t = rhead; 175 | rtail = npi2 * piby2_3; 176 | rhead = t - rtail; 177 | rtail = npi2 * piby2_3tail - ((t - rhead) - rtail); 178 | } 179 | } 180 | r = rhead - rtail; 181 | region = npi2 & 3; 182 | } 183 | else 184 | { 185 | /* Reduce x into range [-pi/4,pi/4] */ 186 | __remainder_piby2f_inline(ax, &r, ®ion); 187 | } 188 | 189 | if (xneg) 190 | return (float)-tanf_piby4(r, region & 1); 191 | else 192 | return (float)tanf_piby4(r, region & 1); 193 | } 194 | -------------------------------------------------------------------------------- /tanh.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_HANDLE_ERROR 31 | #define USE_SPLITEXP 32 | #define USE_SCALEDOUBLE_2 33 | #define USE_VAL_WITH_FLAGS 34 | #include "libm_inlines.h" 35 | #undef USE_SPLITEXP 36 | #undef USE_SCALEDOUBLE_2 37 | #undef USE_VAL_WITH_FLAGS 38 | #undef USE_HANDLE_ERROR 39 | 40 | #include "libm_errno.h" 41 | 42 | 43 | #pragma function(tanh) 44 | double tanh(double x) 45 | { 46 | /* 47 | The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent 48 | to the following three formulae: 49 | 1. (exp(x) - exp(-x))/(exp(x) + exp(-x)) 50 | 2. (1 - (2/(exp(2*x) + 1 ))) 51 | 3. (exp(2*x) - 1)/(exp(2*x) + 1) 52 | but computationally, some formulae are better on some ranges. 53 | */ 54 | static const double 55 | thirtytwo_by_log2 = 4.61662413084468283841e+01, /* 0x40471547652b82fe */ 56 | log2_by_32_lead = 2.16608493356034159660e-02, /* 0x3f962e42fe000000 */ 57 | log2_by_32_tail = 5.68948749532545630390e-11, /* 0x3dcf473de6af278e */ 58 | large_threshold = 20.0; /* 0x4034000000000000 */ 59 | 60 | unsigned long ux, aux, xneg; 61 | double y, z, p, z1, z2; 62 | int m; 63 | 64 | /* Special cases */ 65 | 66 | GET_BITS_DP64(x, ux); 67 | aux = ux & ~SIGNBIT_DP64; 68 | if (aux < 0x3e30000000000000) /* |x| small enough that tanh(x) = x */ 69 | { 70 | if (aux == 0) 71 | return x; /* with no inexact */ 72 | else 73 | return val_with_flags(x, AMD_F_INEXACT); 74 | } 75 | else if (aux > 0x7ff0000000000000) /* |x| is NaN */ 76 | return _handle_error("tanh", OP_TANH, ux|0x0008000000000000, _DOMAIN, 77 | 0, EDOM, x, 0.0, 1); 78 | // return x + x; 79 | 80 | xneg = (aux != ux); 81 | 82 | y = x; 83 | if (xneg) y = -x; 84 | 85 | if (y > large_threshold) 86 | { 87 | /* If x is large then exp(-x) is negligible and 88 | formula 1 reduces to plus or minus 1.0 */ 89 | z = 1.0; 90 | } 91 | else if (y <= 1.0) 92 | { 93 | double y2; 94 | y2 = y*y; 95 | if (y < 0.9) 96 | { 97 | /* Use a [3,3] Remez approximation on [0,0.9]. */ 98 | z = y + y*y2* 99 | (-0.274030424656179760118928e0 + 100 | (-0.176016349003044679402273e-1 + 101 | (-0.200047621071909498730453e-3 - 102 | 0.142077926378834722618091e-7*y2)*y2)*y2)/ 103 | (0.822091273968539282568011e0 + 104 | (0.381641414288328849317962e0 + 105 | (0.201562166026937652780575e-1 + 106 | 0.2091140262529164482568557e-3*y2)*y2)*y2); 107 | } 108 | else 109 | { 110 | /* Use a [3,3] Remez approximation on [0.9,1]. */ 111 | z = y + y*y2* 112 | (-0.227793870659088295252442e0 + 113 | (-0.146173047288731678404066e-1 + 114 | (-0.165597043903549960486816e-3 - 115 | 0.115475878996143396378318e-7*y2)*y2)*y2)/ 116 | (0.683381611977295894959554e0 + 117 | (0.317204558977294374244770e0 + 118 | (0.167358775461896562588695e-1 + 119 | 0.173076050126225961768710e-3*y2)*y2)*y2); 120 | } 121 | } 122 | else 123 | { 124 | /* Compute p = exp(2*y) + 1. The code is basically inlined 125 | from exp_amd. */ 126 | 127 | splitexp(2*y, 1.0, thirtytwo_by_log2, log2_by_32_lead, 128 | log2_by_32_tail, &m, &z1, &z2); 129 | p = scaleDouble_2(z1 + z2, m) + 1.0; 130 | 131 | /* Now reconstruct tanh from p. */ 132 | z = (1.0 - 2.0/p); 133 | } 134 | 135 | if (xneg) z = - z; 136 | return z; 137 | } 138 | -------------------------------------------------------------------------------- /tanhf.c: -------------------------------------------------------------------------------- 1 | 2 | /******************************************************************************* 3 | MIT License 4 | ----------- 5 | 6 | Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this Software and associated documentaon files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | *******************************************************************************/ 26 | 27 | #include "libm.h" 28 | #include "libm_util.h" 29 | 30 | #define USE_HANDLE_ERRORF 31 | #define USE_SPLITEXPF 32 | #define USE_SCALEFLOAT_2 33 | #define USE_VALF_WITH_FLAGS 34 | #include "libm_inlines.h" 35 | #undef USE_SPLITEXPF 36 | #undef USE_SCALEFLOAT_2 37 | #undef USE_VALF_WITH_FLAGS 38 | #undef USE_HANDLE_ERRORF 39 | 40 | #include "libm_errno.h" 41 | 42 | // Disable "C4163: not available as intrinsic function" warning that older 43 | // compilers may issue here. 44 | #pragma warning(disable:4163) 45 | #pragma function(tanhf) 46 | 47 | float tanhf(float x) 48 | { 49 | /* 50 | The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent 51 | to the following three formulae: 52 | 1. (exp(x) - exp(-x))/(exp(x) + exp(-x)) 53 | 2. (1 - (2/(exp(2*x) + 1 ))) 54 | 3. (exp(2*x) - 1)/(exp(2*x) + 1) 55 | but computationally, some formulae are better on some ranges. 56 | */ 57 | static const float 58 | thirtytwo_by_log2 = 4.6166240692e+01F, /* 0x4238aa3b */ 59 | log2_by_32_lead = 2.1659851074e-02F, /* 0x3cb17000 */ 60 | log2_by_32_tail = 9.9831822808e-07F, /* 0x3585fdf4 */ 61 | large_threshold = 10.0F; /* 0x41200000 */ 62 | 63 | unsigned int ux, aux; 64 | float y, z, p, z1, z2, xneg; 65 | int m; 66 | 67 | /* Special cases */ 68 | 69 | GET_BITS_SP32(x, ux); 70 | aux = ux & ~SIGNBIT_SP32; 71 | if (aux < 0x39000000) /* |x| small enough that tanh(x) = x */ 72 | { 73 | if (aux == 0) 74 | return x; /* with no inexact */ 75 | else 76 | return valf_with_flags(x, AMD_F_INEXACT); 77 | } 78 | else if (aux > 0x7f800000) /* |x| is NaN */ 79 | { 80 | unsigned int ufx; 81 | GET_BITS_SP32(x, ufx); 82 | return _handle_errorf("tanhf", OP_TANH, ufx|0x00400000, _DOMAIN, 0, 83 | EDOM, x, 0.0F, 1); 84 | } 85 | // return x + x; 86 | 87 | xneg = 1.0F - 2.0F * (aux != ux); 88 | 89 | y = xneg * x; 90 | 91 | if (y > large_threshold) 92 | { 93 | /* If x is large then exp(-x) is negligible and 94 | formula 1 reduces to plus or minus 1.0 */ 95 | z = 1.0F; 96 | } 97 | else if (y <= 1.0F) 98 | { 99 | float y2; 100 | y2 = y*y; 101 | 102 | if (y < 0.9F) 103 | { 104 | /* Use a [2,1] Remez approximation on [0,0.9]. */ 105 | z = y + y*y2* 106 | (-0.28192806108402678e0F + 107 | (-0.14628356048797849e-2F + 108 | 0.4891631088530669873e-4F*y2)*y2)/ 109 | (0.845784192581041099e0F + 110 | 0.3427017942262751343e0F*y2); 111 | } 112 | else 113 | { 114 | /* Use a [2,1] Remez approximation on [0.9,1]. */ 115 | z = y + y*y2* 116 | (-0.24069858695196524e0F + 117 | (-0.12325644183611929e-2F + 118 | 0.3827534993599483396e-4F*y2)*y2)/ 119 | (0.72209738473684982e0F + 120 | 0.292529068698052819e0F*y2); 121 | } 122 | } 123 | else 124 | { 125 | /* Compute p = exp(2*y) + 1. The code is basically inlined 126 | from exp_amd. */ 127 | 128 | splitexpf(2*y, 1.0F, thirtytwo_by_log2, log2_by_32_lead, 129 | log2_by_32_tail, &m, &z1, &z2); 130 | p = scaleFloat_2(z1 + z2, m) + 1.0F; 131 | /* Now reconstruct tanh from p. */ 132 | z = (1.0F - 2.0F/p); 133 | } 134 | 135 | return xneg * z; 136 | } 137 | -------------------------------------------------------------------------------- /two_to_jby64_head_tail_table.asm: -------------------------------------------------------------------------------- 1 | ;; 2 | ; 3 | ; MIT License 4 | ; ----------- 5 | ; 6 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | ; 8 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 9 | ; of this Software and associated documentaon files (the "Software"), to deal 10 | ; in the Software without restriction, including without limitation the rights 11 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | ; copies of the Software, and to permit persons to whom the Software is 13 | ; furnished to do so, subject to the following conditions: 14 | ; 15 | ; The above copyright notice and this permission notice shall be included in 16 | ; all copies or substantial portions of the Software. 17 | ; 18 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | ; THE SOFTWARE. 25 | ; 26 | ;; Defines __two_to_jby64_head_table and __two_to_jby64_tail_table tables 27 | ;; Used in exp and pow 28 | ;; 29 | 30 | .const 31 | 32 | ALIGN 16 33 | PUBLIC __two_to_jby64_head_table 34 | __two_to_jby64_head_table DQ 3ff0000000000000h 35 | DQ 3ff02c9a30000000h 36 | DQ 3ff059b0d0000000h 37 | DQ 3ff0874510000000h 38 | DQ 3ff0b55860000000h 39 | DQ 3ff0e3ec30000000h 40 | DQ 3ff11301d0000000h 41 | DQ 3ff1429aa0000000h 42 | DQ 3ff172b830000000h 43 | DQ 3ff1a35be0000000h 44 | DQ 3ff1d48730000000h 45 | DQ 3ff2063b80000000h 46 | DQ 3ff2387a60000000h 47 | DQ 3ff26b4560000000h 48 | DQ 3ff29e9df0000000h 49 | DQ 3ff2d285a0000000h 50 | DQ 3ff306fe00000000h 51 | DQ 3ff33c08b0000000h 52 | DQ 3ff371a730000000h 53 | DQ 3ff3a7db30000000h 54 | DQ 3ff3dea640000000h 55 | DQ 3ff4160a20000000h 56 | DQ 3ff44e0860000000h 57 | DQ 3ff486a2b0000000h 58 | DQ 3ff4bfdad0000000h 59 | DQ 3ff4f9b270000000h 60 | DQ 3ff5342b50000000h 61 | DQ 3ff56f4730000000h 62 | DQ 3ff5ab07d0000000h 63 | DQ 3ff5e76f10000000h 64 | DQ 3ff6247eb0000000h 65 | DQ 3ff6623880000000h 66 | DQ 3ff6a09e60000000h 67 | DQ 3ff6dfb230000000h 68 | DQ 3ff71f75e0000000h 69 | DQ 3ff75feb50000000h 70 | DQ 3ff7a11470000000h 71 | DQ 3ff7e2f330000000h 72 | DQ 3ff8258990000000h 73 | DQ 3ff868d990000000h 74 | DQ 3ff8ace540000000h 75 | DQ 3ff8f1ae90000000h 76 | DQ 3ff93737b0000000h 77 | DQ 3ff97d8290000000h 78 | DQ 3ff9c49180000000h 79 | DQ 3ffa0c6670000000h 80 | DQ 3ffa5503b0000000h 81 | DQ 3ffa9e6b50000000h 82 | DQ 3ffae89f90000000h 83 | DQ 3ffb33a2b0000000h 84 | DQ 3ffb7f76f0000000h 85 | DQ 3ffbcc1e90000000h 86 | DQ 3ffc199bd0000000h 87 | DQ 3ffc67f120000000h 88 | DQ 3ffcb720d0000000h 89 | DQ 3ffd072d40000000h 90 | DQ 3ffd5818d0000000h 91 | DQ 3ffda9e600000000h 92 | DQ 3ffdfc9730000000h 93 | DQ 3ffe502ee0000000h 94 | DQ 3ffea4afa0000000h 95 | DQ 3ffefa1be0000000h 96 | DQ 3fff507650000000h 97 | DQ 3fffa7c180000000h 98 | 99 | ALIGN 16 100 | PUBLIC __two_to_jby64_tail_table 101 | __two_to_jby64_tail_table DQ 0000000000000000h 102 | DQ 3e6cef00c1dcdef9h 103 | DQ 3e48ac2ba1d73e2ah 104 | DQ 3e60eb37901186beh 105 | DQ 3e69f3121ec53172h 106 | DQ 3e469e8d10103a17h 107 | DQ 3df25b50a4ebbf1ah 108 | DQ 3e6d525bbf668203h 109 | DQ 3e68faa2f5b9bef9h 110 | DQ 3e66df96ea796d31h 111 | DQ 3e368b9aa7805b80h 112 | DQ 3e60c519ac771dd6h 113 | DQ 3e6ceac470cd83f5h 114 | DQ 3e5789f37495e99ch 115 | DQ 3e547f7b84b09745h 116 | DQ 3e5b900c2d002475h 117 | DQ 3e64636e2a5bd1abh 118 | DQ 3e4320b7fa64e430h 119 | DQ 3e5ceaa72a9c5154h 120 | DQ 3e53967fdba86f24h 121 | DQ 3e682468446b6824h 122 | DQ 3e3f72e29f84325bh 123 | DQ 3e18624b40c4dbd0h 124 | DQ 3e5704f3404f068eh 125 | DQ 3e54d8a89c750e5eh 126 | DQ 3e5a74b29ab4cf62h 127 | DQ 3e5a753e077c2a0fh 128 | DQ 3e5ad49f699bb2c0h 129 | DQ 3e6a90a852b19260h 130 | DQ 3e56b48521ba6f93h 131 | DQ 3e0d2ac258f87d03h 132 | DQ 3e42a91124893ecfh 133 | DQ 3e59fcef32422cbeh 134 | DQ 3e68ca345de441c5h 135 | DQ 3e61d8bee7ba46e1h 136 | DQ 3e59099f22fdba6ah 137 | DQ 3e4f580c36bea881h 138 | DQ 3e5b3d398841740ah 139 | DQ 3e62999c25159f11h 140 | DQ 3e668925d901c83bh 141 | DQ 3e415506dadd3e2ah 142 | DQ 3e622aee6c57304eh 143 | DQ 3e29b8bc9e8a0387h 144 | DQ 3e6fbc9c9f173d24h 145 | DQ 3e451f8480e3e235h 146 | DQ 3e66bbcac96535b5h 147 | DQ 3e41f12ae45a1224h 148 | DQ 3e55e7f6fd0fac90h 149 | DQ 3e62b5a75abd0e69h 150 | DQ 3e609e2bf5ed7fa1h 151 | DQ 3e47daf237553d84h 152 | DQ 3e12f074891ee83dh 153 | DQ 3e6b0aa538444196h 154 | DQ 3e6cafa29694426fh 155 | DQ 3e69df20d22a0797h 156 | DQ 3e640f12f71a1e45h 157 | DQ 3e69f7490e4bb40bh 158 | DQ 3e4ed9942b84600dh 159 | DQ 3e4bdcdaf5cb4656h 160 | DQ 3e5e2cffd89cf44ch 161 | DQ 3e452486cc2c7b9dh 162 | DQ 3e6cc2b44eee3fa4h 163 | DQ 3e66dc8a80ce9f09h 164 | DQ 3e39e90d82e90a7eh 165 | END 166 | -------------------------------------------------------------------------------- /two_to_jby64_table.asm: -------------------------------------------------------------------------------- 1 | ;; 2 | ; 3 | ; MIT License 4 | ; ----------- 5 | ; 6 | ; Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 | ; 8 | ; Permission is hereby granted, free of charge, to any person obtaining a copy 9 | ; of this Software and associated documentaon files (the "Software"), to deal 10 | ; in the Software without restriction, including without limitation the rights 11 | ; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | ; copies of the Software, and to permit persons to whom the Software is 13 | ; furnished to do so, subject to the following conditions: 14 | ; 15 | ; The above copyright notice and this permission notice shall be included in 16 | ; all copies or substantial portions of the Software. 17 | ; 18 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | ; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | ; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | ; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | ; THE SOFTWARE. 25 | ; 26 | ;; Defines __two_to_jby64_table table 27 | ;; Used by exp and expf 28 | ;; 29 | 30 | .const 31 | 32 | ALIGN 16 33 | PUBLIC __two_to_jby64_table 34 | __two_to_jby64_table DQ 3ff0000000000000h 35 | DQ 3ff02c9a3e778061h 36 | DQ 3ff059b0d3158574h 37 | DQ 3ff0874518759bc8h 38 | DQ 3ff0b5586cf9890fh 39 | DQ 3ff0e3ec32d3d1a2h 40 | DQ 3ff11301d0125b51h 41 | DQ 3ff1429aaea92de0h 42 | DQ 3ff172b83c7d517bh 43 | DQ 3ff1a35beb6fcb75h 44 | DQ 3ff1d4873168b9aah 45 | DQ 3ff2063b88628cd6h 46 | DQ 3ff2387a6e756238h 47 | DQ 3ff26b4565e27cddh 48 | DQ 3ff29e9df51fdee1h 49 | DQ 3ff2d285a6e4030bh 50 | DQ 3ff306fe0a31b715h 51 | DQ 3ff33c08b26416ffh 52 | DQ 3ff371a7373aa9cbh 53 | DQ 3ff3a7db34e59ff7h 54 | DQ 3ff3dea64c123422h 55 | DQ 3ff4160a21f72e2ah 56 | DQ 3ff44e086061892dh 57 | DQ 3ff486a2b5c13cd0h 58 | DQ 3ff4bfdad5362a27h 59 | DQ 3ff4f9b2769d2ca7h 60 | DQ 3ff5342b569d4f82h 61 | DQ 3ff56f4736b527dah 62 | DQ 3ff5ab07dd485429h 63 | DQ 3ff5e76f15ad2148h 64 | DQ 3ff6247eb03a5585h 65 | DQ 3ff6623882552225h 66 | DQ 3ff6a09e667f3bcdh 67 | DQ 3ff6dfb23c651a2fh 68 | DQ 3ff71f75e8ec5f74h 69 | DQ 3ff75feb564267c9h 70 | DQ 3ff7a11473eb0187h 71 | DQ 3ff7e2f336cf4e62h 72 | DQ 3ff82589994cce13h 73 | DQ 3ff868d99b4492edh 74 | DQ 3ff8ace5422aa0dbh 75 | DQ 3ff8f1ae99157736h 76 | DQ 3ff93737b0cdc5e5h 77 | DQ 3ff97d829fde4e50h 78 | DQ 3ff9c49182a3f090h 79 | DQ 3ffa0c667b5de565h 80 | DQ 3ffa5503b23e255dh 81 | DQ 3ffa9e6b5579fdbfh 82 | DQ 3ffae89f995ad3adh 83 | DQ 3ffb33a2b84f15fbh 84 | DQ 3ffb7f76f2fb5e47h 85 | DQ 3ffbcc1e904bc1d2h 86 | DQ 3ffc199bdd85529ch 87 | DQ 3ffc67f12e57d14bh 88 | DQ 3ffcb720dcef9069h 89 | DQ 3ffd072d4a07897ch 90 | DQ 3ffd5818dcfba487h 91 | DQ 3ffda9e603db3285h 92 | DQ 3ffdfc97337b9b5fh 93 | DQ 3ffe502ee78b3ff6h 94 | DQ 3ffea4afa2a490dah 95 | DQ 3ffefa1bee615a27h 96 | DQ 3fff50765b6e4540h 97 | DQ 3fffa7c1819e90d8h 98 | 99 | END 100 | --------------------------------------------------------------------------------