├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── MAINTAINERS.md ├── Makefile ├── README.md ├── config.mk.dist ├── contributor-agreement.pdf ├── fp ├── Dir.mk ├── README.md ├── armv6-m │ ├── faddsub.S │ ├── fcmp.h │ ├── fcmp_bool_eq.S │ ├── fcmp_bool_ge.S │ ├── fcmp_bool_gt.S │ ├── fcmp_bool_le.S │ ├── fcmp_bool_lt.S │ ├── fcmp_bool_un.S │ ├── fcmp_flags.S │ ├── fcmp_flags_eq.S │ ├── fcmp_flags_rev.S │ └── fmul.S ├── at32 │ ├── f2iz.S │ ├── f2lz.S │ ├── f2uiz.S │ ├── f2ulz.S │ ├── faddsub.S │ ├── fcmp.h │ ├── fcmp_bool_eq.S │ ├── fcmp_bool_ge.S │ ├── fcmp_bool_gt.S │ ├── fcmp_bool_le.S │ ├── fcmp_bool_lt.S │ ├── fcmp_bool_un.S │ ├── fcmp_flags.S │ ├── fcmp_flags_eq.S │ ├── fcmp_flags_rev.S │ ├── fdiv.S │ ├── fmul.S │ ├── i2f.S │ ├── l2f.S │ └── ui2f.S ├── common │ ├── fnan2.c │ ├── fnorm2.c │ └── funder.c ├── include │ └── endian.h └── test │ ├── test-f2iz.c │ ├── test-f2lz.c │ ├── test-f2uiz.c │ ├── test-f2ulz.c │ ├── test-faddsub.c │ ├── test-fcmp.c │ ├── test-fdiv.c │ ├── test-fmul.c │ ├── test-i2f.c │ ├── test-l2f.c │ ├── test-ui2f.c │ └── test-ul2f.c ├── math ├── Dir.mk ├── README.md ├── aarch64 │ ├── advsimd │ │ ├── acos.c │ │ ├── acosf.c │ │ ├── acosh.c │ │ ├── acoshf.c │ │ ├── acospi.c │ │ ├── acospif.c │ │ ├── asin.c │ │ ├── asinf.c │ │ ├── asinh.c │ │ ├── asinhf.c │ │ ├── asinpi.c │ │ ├── asinpif.c │ │ ├── atan.c │ │ ├── atan2.c │ │ ├── atan2f.c │ │ ├── atan2pi.c │ │ ├── atan2pif.c │ │ ├── atanf.c │ │ ├── atanh.c │ │ ├── atanhf.c │ │ ├── atanpi.c │ │ ├── atanpif.c │ │ ├── cbrt.c │ │ ├── cbrtf.c │ │ ├── cexpi.c │ │ ├── cexpif.c │ │ ├── cos.c │ │ ├── cosf.c │ │ ├── cosh.c │ │ ├── coshf.c │ │ ├── cospi.c │ │ ├── cospif.c │ │ ├── erf.c │ │ ├── erfc.c │ │ ├── erfcf.c │ │ ├── erff.c │ │ ├── exp.c │ │ ├── exp10.c │ │ ├── exp10f.c │ │ ├── exp2.c │ │ ├── exp2f.c │ │ ├── exp2f_1u.c │ │ ├── exp2m1f.c │ │ ├── expf.c │ │ ├── expf_1u.c │ │ ├── expm1.c │ │ ├── expm1f.c │ │ ├── finite_pow.h │ │ ├── hypot.c │ │ ├── hypotf.c │ │ ├── log.c │ │ ├── log10.c │ │ ├── log10f.c │ │ ├── log1p.c │ │ ├── log1pf.c │ │ ├── log2.c │ │ ├── log2f.c │ │ ├── logf.c │ │ ├── modf.c │ │ ├── modff.c │ │ ├── pow.c │ │ ├── powf.c │ │ ├── sin.c │ │ ├── sincos.c │ │ ├── sincosf.c │ │ ├── sincospi.c │ │ ├── sincospif.c │ │ ├── sinf.c │ │ ├── sinh.c │ │ ├── sinhf.c │ │ ├── sinpi.c │ │ ├── sinpif.c │ │ ├── tan.c │ │ ├── tanf.c │ │ ├── tanh.c │ │ ├── tanhf.c │ │ ├── tanpi.c │ │ ├── tanpif.c │ │ ├── v_expf_inline.h │ │ ├── v_expm1_inline.h │ │ ├── v_expm1f_inline.h │ │ ├── v_log1p_inline.h │ │ ├── v_log1pf_inline.h │ │ ├── v_log_inline.h │ │ ├── v_math.h │ │ ├── v_poly_f32.h │ │ ├── v_poly_f64.h │ │ ├── v_sincos_common.h │ │ ├── v_sincosf_common.h │ │ ├── v_sincospi_common.h │ │ └── v_sincospif_common.h │ ├── cospi_3u5.c │ ├── cospif_2u6.c │ ├── experimental │ │ ├── README.contributors │ │ ├── acos_2u.c │ │ ├── acosf_1u4.c │ │ ├── acosh_3u.c │ │ ├── acoshf_2u8.c │ │ ├── advsimd │ │ │ ├── cosf_umax.c │ │ │ ├── erfinv_25u.c │ │ │ ├── erfinvf_5u.c │ │ │ ├── expf_umax.c │ │ │ ├── powf_umax.c │ │ │ ├── sinf_umax.c │ │ │ └── v_logf_inline.h │ │ ├── asin_3u.c │ │ ├── asin_data.c │ │ ├── asinf_2u5.c │ │ ├── asinf_data.c │ │ ├── asinh_2u5.c │ │ ├── asinh_data.c │ │ ├── asinhf_3u5.c │ │ ├── asinhf_data.c │ │ ├── atan2_2u5.c │ │ ├── atan2f_3u.c │ │ ├── atan_2u5.c │ │ ├── atan_common.h │ │ ├── atan_data.c │ │ ├── atanf_2u9.c │ │ ├── atanf_common.h │ │ ├── atanf_data.c │ │ ├── atanh_3u.c │ │ ├── atanhf_3u1.c │ │ ├── cbrt_2u.c │ │ ├── cbrt_data.c │ │ ├── cbrtf_1u5.c │ │ ├── cbrtf_data.c │ │ ├── cosh_2u.c │ │ ├── coshf_1u9.c │ │ ├── erf_2u5.c │ │ ├── erfc_1u8.c │ │ ├── erfcf_1u7.c │ │ ├── erff_2u.c │ │ ├── erfinv_24u5.c │ │ ├── erfinvf_4u7.c │ │ ├── erfinvl.c │ │ ├── exp_inline.h │ │ ├── expf_data.c │ │ ├── expm1_2u5.c │ │ ├── expm1_data.c │ │ ├── expm1f_1u6.c │ │ ├── expm1f_data.c │ │ ├── log10_2u.c │ │ ├── log10_data.c │ │ ├── log1p_2u.c │ │ ├── log1p_data.c │ │ ├── log1pf_2u1.c │ │ ├── log1pf_data.c │ │ ├── sinh_3u.c │ │ ├── sinhf_2u3.c │ │ ├── sve │ │ │ ├── cosf_umax.c │ │ │ ├── erfinv_25u.c │ │ │ ├── erfinvf_5u.c │ │ │ ├── expf_umax.c │ │ │ ├── powf_umax.c │ │ │ ├── powi.c │ │ │ ├── powif.c │ │ │ ├── sinf_umax.c │ │ │ ├── sv_expf_inline.h │ │ │ └── sv_logf_inline.h │ │ ├── tanf_3u3.c │ │ ├── tanf_data.c │ │ ├── tanh_3u.c │ │ └── tanhf_2u6.c │ ├── sincospi_4u.c │ ├── sincospif_3u2.c │ ├── sinpi_3u5.c │ ├── sinpif_2u5.c │ ├── sve │ │ ├── acos.c │ │ ├── acosf.c │ │ ├── acosh.c │ │ ├── acoshf.c │ │ ├── acospi.c │ │ ├── acospif.c │ │ ├── asin.c │ │ ├── asinf.c │ │ ├── asinh.c │ │ ├── asinhf.c │ │ ├── asinpi.c │ │ ├── asinpif.c │ │ ├── atan.c │ │ ├── atan2.c │ │ ├── atan2f.c │ │ ├── atan2pi.c │ │ ├── atan2pif.c │ │ ├── atanf.c │ │ ├── atanh.c │ │ ├── atanhf.c │ │ ├── atanpi.c │ │ ├── atanpif.c │ │ ├── cbrt.c │ │ ├── cbrtf.c │ │ ├── cexpi.c │ │ ├── cexpif.c │ │ ├── cos.c │ │ ├── cosf.c │ │ ├── cosh.c │ │ ├── coshf.c │ │ ├── cospi.c │ │ ├── cospif.c │ │ ├── erf.c │ │ ├── erfc.c │ │ ├── erfcf.c │ │ ├── erff.c │ │ ├── exp.c │ │ ├── exp10.c │ │ ├── exp10f.c │ │ ├── exp2.c │ │ ├── exp2f.c │ │ ├── exp2m1f.c │ │ ├── expf.c │ │ ├── expm1.c │ │ ├── expm1f.c │ │ ├── hypot.c │ │ ├── hypotf.c │ │ ├── log.c │ │ ├── log10.c │ │ ├── log10f.c │ │ ├── log1p.c │ │ ├── log1pf.c │ │ ├── log2.c │ │ ├── log2f.c │ │ ├── logf.c │ │ ├── modf.c │ │ ├── modff.c │ │ ├── pow.c │ │ ├── powf.c │ │ ├── sin.c │ │ ├── sincos.c │ │ ├── sincosf.c │ │ ├── sincospi.c │ │ ├── sincospif.c │ │ ├── sinf.c │ │ ├── sinh.c │ │ ├── sinhf.c │ │ ├── sinpi.c │ │ ├── sinpif.c │ │ ├── sv_expf_inline.h │ │ ├── sv_expm1f_inline.h │ │ ├── sv_log1p_inline.h │ │ ├── sv_log1pf_inline.h │ │ ├── sv_log_inline.h │ │ ├── sv_math.h │ │ ├── sv_poly_f32.h │ │ ├── sv_poly_f64.h │ │ ├── sv_poly_generic.h │ │ ├── sv_sincos_common.h │ │ ├── sv_sincosf_common.h │ │ ├── sv_sincospi_common.h │ │ ├── sv_sincospif_common.h │ │ ├── tan.c │ │ ├── tanf.c │ │ ├── tanh.c │ │ ├── tanhf.c │ │ ├── tanpi.c │ │ └── tanpif.c │ ├── tanpi_2u5.c │ ├── tanpif_3u1.c │ ├── v_erf_data.c │ ├── v_erfc_data.c │ ├── v_erfcf_data.c │ ├── v_erff_data.c │ ├── v_exp_data.c │ ├── v_exp_tail_data.c │ ├── v_log10_data.c │ ├── v_log2_data.c │ ├── v_log_data.c │ ├── v_pow_exp_data.c │ ├── v_pow_log_data.c │ └── v_powf_data.c ├── cosf.c ├── erf.c ├── erf_data.c ├── erff.c ├── erff_data.c ├── exp.c ├── exp10.c ├── exp2.c ├── exp2f.c ├── exp2f_data.c ├── exp_data.c ├── expf.c ├── include │ ├── mathlib.h │ ├── test_defs.h │ └── test_sig.h ├── log.c ├── log10f.c ├── log2.c ├── log2_data.c ├── log2f.c ├── log2f_data.c ├── log_data.c ├── logf.c ├── logf_data.c ├── math_config.h ├── math_err.c ├── math_errf.c ├── poly_generic.h ├── poly_scalar_f32.h ├── poly_scalar_f64.h ├── pow.c ├── pow_log_data.c ├── powf.c ├── powf_log2_data.c ├── sincosf.c ├── sincosf.h ├── sincosf_data.c ├── sinf.c ├── test │ ├── c23_references.h │ ├── mathbench.c │ ├── mathbench_funcs.h │ ├── mathbench_wrappers.h │ ├── mathtest.c │ ├── rtest │ │ ├── dotest.c │ │ ├── intern.h │ │ ├── main.c │ │ ├── random.c │ │ ├── random.h │ │ ├── semi.c │ │ ├── semi.h │ │ ├── types.h │ │ ├── wrappers.c │ │ └── wrappers.h │ ├── runulp.sh │ ├── test_defs.h │ ├── testcases │ │ ├── directed │ │ │ ├── acos.tst │ │ │ ├── acosf.tst │ │ │ ├── acosh.tst │ │ │ ├── acoshf.tst │ │ │ ├── asin.tst │ │ │ ├── asinf.tst │ │ │ ├── asinh.tst │ │ │ ├── asinhf.tst │ │ │ ├── atan.tst │ │ │ ├── atan2.tst │ │ │ ├── atan2f.tst │ │ │ ├── atanf.tst │ │ │ ├── atanh.tst │ │ │ ├── atanhf.tst │ │ │ ├── cbrtf.tst │ │ │ ├── cosf.tst │ │ │ ├── cosh.tst │ │ │ ├── coshf.tst │ │ │ ├── erf.tst │ │ │ ├── erfc.tst │ │ │ ├── erfcf.tst │ │ │ ├── erff.tst │ │ │ ├── exp.tst │ │ │ ├── exp10.tst │ │ │ ├── exp2.tst │ │ │ ├── exp2f.tst │ │ │ ├── expf.tst │ │ │ ├── expm1.tst │ │ │ ├── expm1f.tst │ │ │ ├── log.tst │ │ │ ├── log10.tst │ │ │ ├── log10f.tst │ │ │ ├── log1p.tst │ │ │ ├── log1pf.tst │ │ │ ├── log2.tst │ │ │ ├── log2f.tst │ │ │ ├── logf.tst │ │ │ ├── pow.tst │ │ │ ├── powf.tst │ │ │ ├── sincosf.tst │ │ │ ├── sinf.tst │ │ │ ├── sinh.tst │ │ │ ├── sinhf.tst │ │ │ ├── tanf.tst │ │ │ ├── tanh.tst │ │ │ └── tanhf.tst │ │ └── random │ │ │ ├── double.tst │ │ │ └── float.tst │ ├── traces │ │ ├── exp.txt │ │ └── sincosf.txt │ ├── ulp.c │ ├── ulp.h │ ├── ulp_funcs.h │ └── ulp_wrappers.h ├── tgamma128.c ├── tgamma128.h └── tools │ ├── asin.sollya │ ├── asinf.sollya │ ├── asinh.sollya │ ├── asinhf.sollya │ ├── asinpif.sollya │ ├── atan.sollya │ ├── atanf.sollya │ ├── atanpi.sollya │ ├── cbrt.sollya │ ├── cbrtf.sollya │ ├── cos.sollya │ ├── erf.sollya │ ├── erfc.sollya │ ├── erfcf.sollya │ ├── erff.sollya │ ├── exp.sollya │ ├── exp10.sollya │ ├── exp10f.sollya │ ├── exp2.sollya │ ├── exp2f.sollya │ ├── exp2m1f.sollya │ ├── expm1.sollya │ ├── expm1f.sollya │ ├── log.sollya │ ├── log10.sollya │ ├── log10f.sollya │ ├── log1p.sollya │ ├── log1pf.sollya │ ├── log2.sollya │ ├── log2_abs.sollya │ ├── log_abs.sollya │ ├── plot.py │ ├── remez.jl │ ├── sin.sollya │ ├── sincos.sollya │ ├── sincosf.sollya │ ├── sinpi.sollya │ ├── tan.sollya │ ├── tanf.sollya │ ├── tanpi.sollya │ ├── tgamma128_gen.jl │ ├── v_erf.sollya │ ├── v_erfc.sollya │ ├── v_exp.sollya │ ├── v_log.sollya │ ├── v_log10.sollya │ ├── v_log10f.sollya │ ├── v_log2f.sollya │ └── v_sin.sollya ├── networking ├── Dir.mk ├── README.md ├── aarch64 │ └── chksum_simd.c ├── arm │ └── chksum_simd.c ├── chksum.c ├── chksum_common.h ├── include │ └── networking.h └── test │ └── chksum.c └── string ├── Dir.mk ├── README.md ├── aarch64 ├── __mtag_tag_region.S ├── __mtag_tag_zero_region.S ├── asmdefs.h ├── check-arch.S ├── experimental │ ├── memchr-sve.S │ ├── memcmp-sve.S │ ├── stpcpy-sve.S │ ├── strchr-sve.S │ ├── strchrnul-sve.S │ ├── strcmp-sve.S │ ├── strcpy-sve.S │ ├── strlen-sve.S │ ├── strncmp-sve.S │ ├── strnlen-sve.S │ └── strrchr-sve.S ├── memchr-mte.S ├── memchr.S ├── memcmp.S ├── memcpy-advsimd.S ├── memcpy-mops.S ├── memcpy-sve.S ├── memcpy.S ├── memmove-mops.S ├── memrchr.S ├── memset-mops.S ├── memset-sve.S ├── memset.S ├── stpcpy.S ├── strchr-mte.S ├── strchr.S ├── strchrnul-mte.S ├── strchrnul.S ├── strcmp.S ├── strcpy.S ├── strlen-mte.S ├── strlen.S ├── strncmp.S ├── strnlen.S ├── strrchr-mte.S └── strrchr.S ├── arm ├── asmdefs.h ├── check-arch.S ├── memchr.S ├── memcpy.S ├── memset.S ├── strcmp-armv6m.S ├── strcmp.S ├── strcpy.c └── strlen-armv6t2.S ├── bench ├── memcpy.c ├── memset.c └── strlen.c ├── include ├── benchlib.h └── stringlib.h ├── test ├── __mtag_tag_region.c ├── __mtag_tag_zero_region.c ├── memchr.c ├── memcmp.c ├── memcpy.c ├── memmove.c ├── memrchr.c ├── memset.c ├── mte.h ├── stpcpy.c ├── strchr.c ├── strchrnul.c ├── strcmp.c ├── strcpy.c ├── stringtest.h ├── strlen.c ├── strncmp.c ├── strnlen.c └── strrchr.c └── x86_64 └── check-arch.S /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | .DS_Store 3 | config.mk 4 | -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | # Maintainers 2 | 3 | ## Gatekeeper and Global Maintainer 4 | 5 | - Tamar Christina <[tamar.christina@arm.com](mailto:tamar.christina@arm.com)> 6 | 7 | ## Sub-Project Maintainers 8 | 9 | ### FP 10 | 11 | - Simon Tatham <[simon.tatham@arm.com](mailto:simon.tatham@arm.com)> 12 | - Peter Smith <[peter.smith@arm.com](mailto:peter.smith@arm.com)> 13 | - John Brawn <[john.brawn@arm.com](mailto:john.brawn@arm.com)> 14 | - Kristof Beyls <[kristof.beyls@arm.com](mailto:kristof.beyls@arm.com)> 15 | 16 | ### Math 17 | 18 | - Pierre Blanchard <[pierre.blanchard@arm.com](mailto:pierre.blanchard@arm.com)> 19 | - Dylan Fleming <[dylan.fleming@arm.com](mailto:dylan.fleming@arm.com)> 20 | - Joe Ramsay <[joe.ramsay@arm.com](mailto:joe.ramsay@arm.com)> 21 | 22 | ### Networking 23 | 24 | - Ola Liljedahl <[ola.liljedahl@arm.com](mailto:ola.liljedahl@arm.com)> 25 | 26 | ### String 27 | 28 | - Wilco Dijkstra <[wilco.dijkstra@arm.com](mailto:wilco.dijkstra@arm.com)> 29 | -------------------------------------------------------------------------------- /contributor-agreement.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ARM-software/optimized-routines/367a58e1b24f5931a77d68500ed7c131495c1dc4/contributor-agreement.pdf -------------------------------------------------------------------------------- /fp/armv6-m/fcmp_bool_eq.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a == b. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | BEQ 0f 14 | MOVS r0, #0 15 | BX lr 16 | 0: 17 | MOVS r0, #1 18 | BX lr 19 | .endm 20 | 21 | .globl arm_fp_fcmp_bool_eq 22 | .type arm_fp_fcmp_bool_eq,%function 23 | arm_fp_fcmp_bool_eq: 24 | #include "fcmp.h" 25 | 26 | fcmp_NaN: 27 | // If any NaN is involved, the return value for this comparison is false. 28 | MOVS r0, #0 29 | BX lr 30 | 31 | .size arm_fp_fcmp_bool_eq, .-arm_fp_fcmp_bool_eq 32 | -------------------------------------------------------------------------------- /fp/armv6-m/fcmp_bool_ge.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a >= b. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | BHS 0f 14 | MOVS r0, #0 15 | BX lr 16 | 0: 17 | MOVS r0, #1 18 | BX lr 19 | .endm 20 | 21 | .globl arm_fp_fcmp_bool_ge 22 | .type arm_fp_fcmp_bool_ge,%function 23 | arm_fp_fcmp_bool_ge: 24 | #include "fcmp.h" 25 | 26 | fcmp_NaN: 27 | // If any NaN is involved, the return value for this comparison is false. 28 | MOVS r0, #0 29 | BX lr 30 | 31 | .size arm_fp_fcmp_bool_ge, .-arm_fp_fcmp_bool_ge 32 | -------------------------------------------------------------------------------- /fp/armv6-m/fcmp_bool_gt.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a > b. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | BHI 0f 14 | MOVS r0, #0 15 | BX lr 16 | 0: 17 | MOVS r0, #1 18 | BX lr 19 | .endm 20 | 21 | .globl arm_fp_fcmp_bool_gt 22 | .type arm_fp_fcmp_bool_gt,%function 23 | arm_fp_fcmp_bool_gt: 24 | #include "fcmp.h" 25 | 26 | fcmp_NaN: 27 | // If any NaN is involved, the return value for this comparison is false. 28 | MOVS r0, #0 29 | BX lr 30 | 31 | .size arm_fp_fcmp_bool_gt, .-arm_fp_fcmp_bool_gt 32 | -------------------------------------------------------------------------------- /fp/armv6-m/fcmp_bool_le.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a <= b. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | BLS 0f 14 | MOVS r0, #0 15 | BX lr 16 | 0: 17 | MOVS r0, #1 18 | BX lr 19 | .endm 20 | 21 | .globl arm_fp_fcmp_bool_le 22 | .type arm_fp_fcmp_bool_le,%function 23 | arm_fp_fcmp_bool_le: 24 | #include "fcmp.h" 25 | 26 | fcmp_NaN: 27 | // If any NaN is involved, the return value for this comparison is false. 28 | MOVS r0, #0 29 | BX lr 30 | 31 | .size arm_fp_fcmp_bool_le, .-arm_fp_fcmp_bool_le 32 | -------------------------------------------------------------------------------- /fp/armv6-m/fcmp_bool_lt.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a < b. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | BLO 0f 14 | MOVS r0, #0 15 | BX lr 16 | 0: 17 | MOVS r0, #1 18 | BX lr 19 | .endm 20 | 21 | .globl arm_fp_fcmp_bool_lt 22 | .type arm_fp_fcmp_bool_lt,%function 23 | arm_fp_fcmp_bool_lt: 24 | #include "fcmp.h" 25 | 26 | fcmp_NaN: 27 | // If any NaN is involved, the return value for this comparison is false. 28 | MOVS r0, #0 29 | BX lr 30 | 31 | .size arm_fp_fcmp_bool_lt, .-arm_fp_fcmp_bool_lt 32 | -------------------------------------------------------------------------------- /fp/armv6-m/fcmp_bool_un.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a and b are 2 | // 'unordered', i.e. one or both of them is a NaN. 3 | // 4 | // Copyright (c) 1994-1998,2025, Arm Limited. 5 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | 7 | .syntax unified 8 | .text 9 | .p2align 2 10 | 11 | .globl arm_fp_fcmp_bool_un 12 | .type arm_fp_fcmp_bool_un,%function 13 | arm_fp_fcmp_bool_un: 14 | 15 | // This function isn't based on the general-purpose code in fcmp.h, because 16 | // it's more effort than needed. Here we just need to identify whether or not 17 | // there's at least one NaN in the inputs. There's no need to vary that check 18 | // based on the sign bit, so we might as well just do the NaN test as quickly 19 | // as possible. 20 | MOVS r2, #0xFF 21 | LSLS r2, r2, #24 22 | LSLS r3, r0, #1 23 | CMP r3, r2 24 | BHI fcmp_NaN 25 | LSLS r3, r1, #1 26 | CMP r3, r2 27 | BHI fcmp_NaN 28 | 29 | // If HS, then we have no NaNs and return false. 30 | MOVS r0, #0 31 | BX lr 32 | 33 | // Otherwise, we have at least one NaN, and return true. 34 | fcmp_NaN: 35 | MOVS r0, #1 36 | BX lr 37 | 38 | .size arm_fp_fcmp_bool_un, .-arm_fp_fcmp_bool_un 39 | -------------------------------------------------------------------------------- /fp/armv6-m/fcmp_flags.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning LO / EQ / HI in the flags, with the 2 | // 'unordered result' case also returning HI. 3 | // 4 | // Copyright (c) 1994-1998,2025, Arm Limited. 5 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | 7 | .syntax unified 8 | .text 9 | .p2align 2 10 | 11 | op0 .req r0 12 | op1 .req r1 13 | .macro SetReturnRegister 14 | .endm 15 | 16 | .globl arm_fp_fcmp_flags 17 | .type arm_fp_fcmp_flags,%function 18 | arm_fp_fcmp_flags: 19 | #include "fcmp.h" 20 | 21 | fcmp_NaN: 22 | // If any NaN is involved, the return value for this comparison is 23 | // to set the flags as if a > b, so that the HI condition will return true. 24 | MOVS r0, #1 25 | CMP r0, #0 26 | BX lr 27 | 28 | .size arm_fp_fcmp_flags, .-arm_fp_fcmp_flags 29 | -------------------------------------------------------------------------------- /fp/armv6-m/fcmp_flags_eq.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning EQ / NE in the flags. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | .endm 14 | 15 | .globl arm_fp_fcmp_flags_eq 16 | .type arm_fp_fcmp_flags_eq,%function 17 | arm_fp_fcmp_flags_eq: 18 | #include "fcmp.h" 19 | 20 | fcmp_NaN: 21 | // If any NaN is involved, the return value for this comparison is false. 22 | MOVS r0, #1 // the value in r0 is unimportant; this just sets Z=0 23 | BX lr 24 | 25 | .size arm_fp_fcmp_flags_eq, .-arm_fp_fcmp_flags_eq 26 | -------------------------------------------------------------------------------- /fp/armv6-m/fcmp_flags_rev.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, with the input operands reversed, returning 2 | // LO / EQ / HI in the flags, with the 'unordered result' case also returning 3 | // HI. 4 | // 5 | // Copyright (c) 1994-1998,2025, Arm Limited. 6 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | 8 | .syntax unified 9 | .text 10 | .p2align 2 11 | 12 | op0 .req r1 13 | op1 .req r0 14 | .macro SetReturnRegister 15 | .endm 16 | 17 | .globl arm_fp_fcmp_flags_rev 18 | .type arm_fp_fcmp_flags_rev,%function 19 | arm_fp_fcmp_flags_rev: 20 | #include "fcmp.h" 21 | 22 | fcmp_NaN: 23 | // If any NaN is involved, the return value for this comparison is 24 | // to set the flags as if a > b, so that the HI condition will return true. 25 | MOVS r0, #1 26 | CMP r0, #0 27 | BX lr 28 | 29 | .size arm_fp_fcmp_flags_rev, .-arm_fp_fcmp_flags_rev 30 | -------------------------------------------------------------------------------- /fp/at32/fcmp_bool_eq.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a == b. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | MOV r0, #0 14 | MOVEQ r0, #1 15 | .endm 16 | 17 | .globl arm_fp_fcmp_bool_eq 18 | .type arm_fp_fcmp_bool_eq,%function 19 | arm_fp_fcmp_bool_eq: 20 | #include "fcmp.h" 21 | 22 | fcmp_NaN: 23 | // If any NaN is involved, the return value for this comparison is false. 24 | MOV r0, #0 25 | BX lr 26 | 27 | .size arm_fp_fcmp_bool_eq, .-arm_fp_fcmp_bool_eq 28 | -------------------------------------------------------------------------------- /fp/at32/fcmp_bool_ge.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a >= b. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | MOV r0, #0 14 | MOVHS r0, #1 15 | .endm 16 | 17 | .globl arm_fp_fcmp_bool_ge 18 | .type arm_fp_fcmp_bool_ge,%function 19 | arm_fp_fcmp_bool_ge: 20 | #include "fcmp.h" 21 | 22 | fcmp_NaN: 23 | // If any NaN is involved, the return value for this comparison is false. 24 | MOV r0, #0 25 | BX lr 26 | 27 | .size arm_fp_fcmp_bool_ge, .-arm_fp_fcmp_bool_ge 28 | -------------------------------------------------------------------------------- /fp/at32/fcmp_bool_gt.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a > b. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | MOV r0, #0 14 | MOVHI r0, #1 15 | .endm 16 | 17 | .globl arm_fp_fcmp_bool_gt 18 | .type arm_fp_fcmp_bool_gt,%function 19 | arm_fp_fcmp_bool_gt: 20 | #include "fcmp.h" 21 | 22 | fcmp_NaN: 23 | // If any NaN is involved, the return value for this comparison is false. 24 | MOV r0, #0 25 | BX lr 26 | 27 | .size arm_fp_fcmp_bool_gt, .-arm_fp_fcmp_bool_gt 28 | -------------------------------------------------------------------------------- /fp/at32/fcmp_bool_le.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a <= b. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | MOV r0, #0 14 | MOVLS r0, #1 15 | .endm 16 | 17 | .globl arm_fp_fcmp_bool_le 18 | .type arm_fp_fcmp_bool_le,%function 19 | arm_fp_fcmp_bool_le: 20 | #include "fcmp.h" 21 | 22 | fcmp_NaN: 23 | // If any NaN is involved, the return value for this comparison is false. 24 | MOV r0, #0 25 | BX lr 26 | 27 | .size arm_fp_fcmp_bool_le, .-arm_fp_fcmp_bool_le 28 | -------------------------------------------------------------------------------- /fp/at32/fcmp_bool_lt.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a < b. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | MOV r0, #0 14 | MOVLO r0, #1 15 | .endm 16 | 17 | .globl arm_fp_fcmp_bool_lt 18 | .type arm_fp_fcmp_bool_lt,%function 19 | arm_fp_fcmp_bool_lt: 20 | #include "fcmp.h" 21 | 22 | fcmp_NaN: 23 | // If any NaN is involved, the return value for this comparison is false. 24 | MOV r0, #0 25 | BX lr 26 | 27 | .size arm_fp_fcmp_bool_lt, .-arm_fp_fcmp_bool_lt 28 | -------------------------------------------------------------------------------- /fp/at32/fcmp_bool_un.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning a boolean in r0, true if a and b are 2 | // 'unordered', i.e. one or both of them is a NaN. 3 | // 4 | // Copyright (c) 1994-1998,2025, Arm Limited. 5 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | 7 | .syntax unified 8 | .text 9 | .p2align 2 10 | 11 | .globl arm_fp_fcmp_bool_un 12 | .type arm_fp_fcmp_bool_un,%function 13 | arm_fp_fcmp_bool_un: 14 | 15 | // This function isn't based on the general-purpose code in fcmp.h, because 16 | // it's more effort than needed. Here we just need to identify whether or not 17 | // there's at least one NaN in the inputs. There's no need to vary that check 18 | // based on the sign bit, so we might as well just do the NaN test as quickly 19 | // as possible. 20 | MOV r12, #0xFF << 24 21 | CMP r12, r0, LSL #1 // if LO, then r12 < (r0 << 1), so r0 is a NaN 22 | CMPHS r12, r1, LSL #1 // if not LO, then do the same check for r1 23 | 24 | // If HS, then we have no NaNs and return false. We do this as quickly as we 25 | // can (not stopping to take two instructions setting up r0 for both 26 | // possibilities), on the assumption that NaNs are rare and we want to 27 | // optimize for the non-NaN path. 28 | MOVHS r0, #0 29 | BXHS lr 30 | 31 | // Otherwise, we have at least one NaN, and return true. 32 | MOV r0, #1 33 | BX lr 34 | 35 | .size arm_fp_fcmp_bool_un, .-arm_fp_fcmp_bool_un 36 | -------------------------------------------------------------------------------- /fp/at32/fcmp_flags.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning LO / EQ / HI in the flags, with the 2 | // 'unordered result' case also returning HI. 3 | // 4 | // Copyright (c) 1994-1998,2025, Arm Limited. 5 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | 7 | .syntax unified 8 | .text 9 | .p2align 2 10 | 11 | op0 .req r0 12 | op1 .req r1 13 | .macro SetReturnRegister 14 | .endm 15 | 16 | .globl arm_fp_fcmp_flags 17 | .type arm_fp_fcmp_flags,%function 18 | arm_fp_fcmp_flags: 19 | #include "fcmp.h" 20 | 21 | fcmp_NaN: 22 | // If any NaN is involved, the return value for this comparison is 23 | // to set the flags as if a > b, so that the HI condition will return true. 24 | MOV r0, #1 25 | CMP r0, #0 26 | BX lr 27 | 28 | .size arm_fp_fcmp_flags, .-arm_fp_fcmp_flags 29 | -------------------------------------------------------------------------------- /fp/at32/fcmp_flags_eq.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, returning EQ / NE in the flags. 2 | // 3 | // Copyright (c) 1994-1998,2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | .syntax unified 7 | .text 8 | .p2align 2 9 | 10 | op0 .req r0 11 | op1 .req r1 12 | .macro SetReturnRegister 13 | .endm 14 | 15 | .globl arm_fp_fcmp_flags_eq 16 | .type arm_fp_fcmp_flags_eq,%function 17 | arm_fp_fcmp_flags_eq: 18 | #include "fcmp.h" 19 | 20 | fcmp_NaN: 21 | // If any NaN is involved, the return value for this comparison is false. 22 | MOVS r0, #1 // the value in r0 is unimportant; this just sets Z=0 23 | BX lr 24 | 25 | .size arm_fp_fcmp_flags_eq, .-arm_fp_fcmp_flags_eq 26 | -------------------------------------------------------------------------------- /fp/at32/fcmp_flags_rev.S: -------------------------------------------------------------------------------- 1 | // Single-precision comparison, with the input operands reversed, returning 2 | // LO / EQ / HI in the flags, with the 'unordered result' case also returning 3 | // HI. 4 | // 5 | // Copyright (c) 1994-1998,2025, Arm Limited. 6 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | 8 | .syntax unified 9 | .text 10 | .p2align 2 11 | 12 | op0 .req r1 13 | op1 .req r0 14 | .macro SetReturnRegister 15 | .endm 16 | 17 | .globl arm_fp_fcmp_flags_rev 18 | .type arm_fp_fcmp_flags_rev,%function 19 | arm_fp_fcmp_flags_rev: 20 | #include "fcmp.h" 21 | 22 | fcmp_NaN: 23 | // If any NaN is involved, the return value for this comparison is 24 | // to set the flags as if a > b, so that the HI condition will return true. 25 | MOV r0, #1 26 | CMP r0, #0 27 | BX lr 28 | 29 | .size arm_fp_fcmp_flags_rev, .-arm_fp_fcmp_flags_rev 30 | -------------------------------------------------------------------------------- /fp/common/fnan2.c: -------------------------------------------------------------------------------- 1 | // Helper function for handling single-precision input NaNs. 2 | // 3 | // Copyright (c) 2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // This helper function is available for use by single-precision float 7 | // arithmetic implementations to handle propagating NaNs from the input 8 | // operands to the output, in a way that matches Arm hardware FP. 9 | // 10 | // On input, a and b are floating-point numbers in IEEE 754 encoding, and at 11 | // least one of them must be a NaN. The return value is the correct output NaN. 12 | 13 | unsigned 14 | __fnan2 (unsigned a, unsigned b) 15 | { 16 | unsigned aadj = (a << 1) + 0x00800000; 17 | unsigned badj = (b << 1) + 0x00800000; 18 | if (aadj > 0xff800000) 19 | return a | 0x00400000; 20 | if (badj > 0xff800000) 21 | return b | 0x00400000; 22 | if (aadj < 0x00800000) 23 | return a; 24 | else /* expect (badj < 0x00800000) */ 25 | return b; 26 | } 27 | -------------------------------------------------------------------------------- /fp/common/fnorm2.c: -------------------------------------------------------------------------------- 1 | // Helper function for handling single-precision input denormals. 2 | // 3 | // Copyright (c) 2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // This helper function is available for use by single-precision float 7 | // arithmetic implementations, to handle denormal inputs on entry by 8 | // renormalizing the mantissa and modifying the exponent to match. 9 | 10 | // Structure containing the function's inputs and outputs. 11 | // 12 | // On entry: a, b are two input floating-point numbers, still in IEEE 754 13 | // encoding. expa and expb are the 8-bit exponents of those numbers, extracted 14 | // and shifted down to the low 8 bits of the word, with no other change. 15 | // Neither value should be zero, or have the maximum exponent (indicating an 16 | // infinity or NaN). 17 | // 18 | // On exit: each of a and b contains the mantissa of the input value, with the 19 | // leading 1 bit made explicit, and shifted up to the top of the word. If expa 20 | // was zero (indicating that a was denormal) then it is now represented as a 21 | // normalized number with an out-of-range exponent (zero or negative). The same 22 | // applies to expb and b. 23 | struct fnorm2 24 | { 25 | unsigned a, b, expa, expb; 26 | }; 27 | 28 | void 29 | __fnorm2 (struct fnorm2 *values) 30 | { 31 | values->a <<= 8; 32 | values->b <<= 8; 33 | if (values->expa == 0) 34 | { 35 | unsigned shift = __builtin_clz (values->a); 36 | values->a <<= shift; 37 | values->expa = 1 - shift; 38 | } 39 | else 40 | { 41 | values->a |= 0x80000000; 42 | } 43 | if (values->expb == 0) 44 | { 45 | unsigned shift = __builtin_clz (values->b); 46 | values->b <<= shift; 47 | values->expb = 1 - shift; 48 | } 49 | else 50 | { 51 | values->b |= 0x80000000; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /fp/include/endian.h: -------------------------------------------------------------------------------- 1 | // Register aliases for endian-independent floating point code. 2 | // 3 | // Copyright (c) 2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // This header file should be included from assembly source code (not C). It 7 | // defines two pairs of register aliases, for handling 64-bit values passed and 8 | // returned from functions in the AArch32 integer registers: 9 | // 10 | // ah, al the high and low words of a 64-bit value passed in {r0,r1} 11 | // bh, bl the high and low words of a 64-bit value passed in {r2,r3} 12 | // 13 | // Which alias goes with which register depends on endianness. 14 | 15 | #ifdef __BIG_ENDIAN__ 16 | // Big-endian: high words are in lower-numbered registers. 17 | ah .req r0 18 | al .req r1 19 | bh .req r2 20 | bl .req r3 21 | #else 22 | // Little-endian: low words are in lower-numbered registers. 23 | al .req r0 24 | ah .req r1 25 | bl .req r2 26 | bh .req r3 27 | #endif 28 | 29 | -------------------------------------------------------------------------------- /fp/test/test-ui2f.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Tests of IEEE 754 uint32 to single-precision conversion 3 | * 4 | * Copyright (c) 1999-2025, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | struct test 15 | { 16 | uint32_t in, out; 17 | }; 18 | 19 | static const struct test tests[] = { 20 | }; 21 | 22 | float 23 | make_float (uint32_t x) 24 | { 25 | float r; 26 | memcpy (&r, &x, sizeof (r)); 27 | return r; 28 | } 29 | 30 | uint32_t 31 | unmake_float (float x) 32 | { 33 | uint32_t r; 34 | memcpy (&r, &x, sizeof (r)); 35 | return r; 36 | } 37 | 38 | int 39 | main (void) 40 | { 41 | bool failed = false; 42 | 43 | for (size_t i = 0; i < sizeof (tests) / sizeof (tests[0]); i++) 44 | { 45 | const struct test *t = &tests[i]; 46 | 47 | #ifdef USE_NATIVE_ARITHMETIC 48 | /* If you compile with USE_NATIVE_ARITHMETIC defined, the same 49 | * set of tests will be run using the toolchain's built in float 50 | * arithmetic, instead of calling arm_fp_ui2f. */ 51 | float out = (float)t->in; 52 | uint32_t outbits = unmake_float(out); 53 | #else 54 | extern uint32_t arm_fp_i2f(uint32_t); 55 | uint32_t outbits = arm_fp_i2f(t->in); 56 | #endif 57 | 58 | if (outbits != t->out) 59 | { 60 | printf ("FAIL: i2f(%08" PRIx32 ") -> %08" PRIx32 61 | ", expected %08" PRIx32 "\n", t->in, outbits, t->out); 62 | failed = true; 63 | } 64 | } 65 | 66 | if (!failed) 67 | printf ("all passed\n"); 68 | 69 | return failed; 70 | } 71 | -------------------------------------------------------------------------------- /math/aarch64/advsimd/cexpi.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision vector sincos function - return-by-value interface. 3 | * 4 | * Copyright (c) 2023-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "v_sincos_common.h" 9 | #include "v_math.h" 10 | #include "test_defs.h" 11 | 12 | static float64x2x2_t VPCS_ATTR NOINLINE 13 | special_case (float64x2_t x, uint64x2_t special, float64x2x2_t y) 14 | { 15 | return (float64x2x2_t){ v_call_f64 (sin, x, y.val[0], special), 16 | v_call_f64 (cos, x, y.val[1], special) }; 17 | } 18 | 19 | /* Double-precision vector function allowing calculation of both sin and cos in 20 | one function call, using shared argument reduction and separate polynomials. 21 | Largest observed error is for sin, 3.22 ULP: 22 | v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3 23 | want -0x1.ffe9537d5dbb4p-3. */ 24 | VPCS_ATTR float64x2x2_t 25 | _ZGVnN2v_cexpi (float64x2_t x) 26 | { 27 | const struct v_sincos_data *d = ptr_barrier (&v_sincos_data); 28 | uint64x2_t special = check_ge_rangeval (x, d); 29 | 30 | float64x2x2_t sc = v_sincos_inline (x, d); 31 | 32 | if (unlikely (v_any_u64 (special))) 33 | return special_case (x, special, sc); 34 | return sc; 35 | } 36 | 37 | TEST_DISABLE_FENV (_ZGVnN2v_cexpi_cos) 38 | TEST_DISABLE_FENV (_ZGVnN2v_cexpi_sin) 39 | TEST_ULP (_ZGVnN2v_cexpi_sin, 2.73) 40 | TEST_ULP (_ZGVnN2v_cexpi_cos, 2.73) 41 | #define V_CEXPI_INTERVAL(lo, hi, n) \ 42 | TEST_INTERVAL (_ZGVnN2v_cexpi_sin, lo, hi, n) \ 43 | TEST_INTERVAL (_ZGVnN2v_cexpi_cos, lo, hi, n) 44 | V_CEXPI_INTERVAL (0, 0x1p23, 500000) 45 | V_CEXPI_INTERVAL (-0, -0x1p23, 500000) 46 | V_CEXPI_INTERVAL (0x1p23, inf, 10000) 47 | V_CEXPI_INTERVAL (-0x1p23, -inf, 10000) 48 | -------------------------------------------------------------------------------- /math/aarch64/advsimd/cexpif.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision vector cexpi function. 3 | * 4 | * Copyright (c) 2023-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "v_sincosf_common.h" 9 | #include "v_math.h" 10 | #include "test_defs.h" 11 | 12 | static float32x4x2_t VPCS_ATTR NOINLINE 13 | special_case (float32x4_t x, uint32x4_t special, float32x4x2_t y) 14 | { 15 | return (float32x4x2_t){ v_call_f32 (sinf, x, y.val[0], special), 16 | v_call_f32 (cosf, x, y.val[1], special) }; 17 | } 18 | 19 | /* Single-precision vector function allowing calculation of both sin and cos in 20 | one function call, using shared argument reduction and separate low-order 21 | polynomials. 22 | Worst-case error for sin is 1.67 ULP: 23 | v_cexpif_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5 24 | Worst-case error for cos is 1.81 ULP: 25 | v_cexpif_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6. */ 26 | VPCS_ATTR float32x4x2_t 27 | _ZGVnN4v_cexpif (float32x4_t x) 28 | { 29 | const struct v_sincosf_data *d = ptr_barrier (&v_sincosf_data); 30 | uint32x4_t special = check_ge_rangeval (x, d); 31 | 32 | float32x4x2_t sc = v_sincosf_inline (x, d); 33 | 34 | if (unlikely (v_any_u32 (special))) 35 | return special_case (x, special, sc); 36 | return sc; 37 | } 38 | 39 | TEST_DISABLE_FENV (_ZGVnN4v_cexpif_sin) 40 | TEST_DISABLE_FENV (_ZGVnN4v_cexpif_cos) 41 | TEST_ULP (_ZGVnN4v_cexpif_sin, 1.17) 42 | TEST_ULP (_ZGVnN4v_cexpif_cos, 1.31) 43 | #define V_CEXPIF_INTERVAL(lo, hi, n) \ 44 | TEST_INTERVAL (_ZGVnN4v_cexpif_sin, lo, hi, n) \ 45 | TEST_INTERVAL (_ZGVnN4v_cexpif_cos, lo, hi, n) 46 | V_CEXPIF_INTERVAL (0, 0x1p20, 500000) 47 | V_CEXPIF_INTERVAL (-0, -0x1p20, 500000) 48 | V_CEXPIF_INTERVAL (0x1p20, inf, 10000) 49 | V_CEXPIF_INTERVAL (-0x1p20, -inf, 10000) 50 | -------------------------------------------------------------------------------- /math/aarch64/advsimd/modf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision vector modf(x, *y) function. 3 | * 4 | * Copyright (c) 2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "v_math.h" 9 | #include "test_sig.h" 10 | #include "test_defs.h" 11 | 12 | /* Modf algorithm. Produces exact values in all rounding modes. */ 13 | float64x2_t VPCS_ATTR V_NAME_D1_L1 (modf) (float64x2_t x, double *out_int) 14 | { 15 | /* Get integer component of x. */ 16 | float64x2_t rounded = vrndq_f64 (x); 17 | vst1q_f64 (out_int, rounded); 18 | 19 | /* Subtract integer component from input. */ 20 | uint64x2_t remaining = vreinterpretq_u64_f64 (vsubq_f64 (x, rounded)); 21 | 22 | /* Return +0 for integer x. */ 23 | uint64x2_t is_integer = vceqq_f64 (x, rounded); 24 | return vreinterpretq_f64_u64 (vbicq_u64 (remaining, is_integer)); 25 | } 26 | 27 | TEST_ULP (_ZGVnN2vl8_modf_frac, 0.0) 28 | TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_frac, 0, 1, 20000) 29 | TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_frac, 1, inf, 20000) 30 | 31 | TEST_ULP (_ZGVnN2vl8_modf_int, 0.0) 32 | TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_int, 0, 1, 20000) 33 | TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_int, 1, inf, 20000) 34 | -------------------------------------------------------------------------------- /math/aarch64/advsimd/modff.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision vector modf(x, *y) function. 3 | * 4 | * Copyright (c) 2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "v_math.h" 9 | #include "test_sig.h" 10 | #include "test_defs.h" 11 | 12 | /* Modff algorithm. Produces exact values in all rounding modes. */ 13 | float32x4_t VPCS_ATTR NOINLINE V_NAME_F1_L1 (modf) (float32x4_t x, 14 | float *out_int) 15 | { 16 | /* Get integer component of x. */ 17 | float32x4_t rounded = vrndq_f32 (x); 18 | vst1q_f32 (out_int, rounded); 19 | 20 | /* Subtract integer component from input. */ 21 | uint32x4_t remaining = vreinterpretq_u32_f32 (vsubq_f32 (x, rounded)); 22 | 23 | /* Return +0 for integer x. */ 24 | uint32x4_t is_integer = vceqq_f32 (x, rounded); 25 | return vreinterpretq_f32_u32 (vbicq_u32 (remaining, is_integer)); 26 | } 27 | 28 | TEST_ULP (_ZGVnN4vl4_modff_frac, 0.0) 29 | TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_frac, 0, 1, 20000) 30 | TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_frac, 1, inf, 20000) 31 | 32 | TEST_ULP (_ZGVnN4vl4_modff_int, 0.0) 33 | TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_int, 0, 1, 20000) 34 | TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_int, 1, inf, 20000) 35 | -------------------------------------------------------------------------------- /math/aarch64/advsimd/sincospi.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision vector sincospi function. 3 | * 4 | * Copyright (c) 2024-2025, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | #include "v_sincospi_common.h" 8 | #include "v_math.h" 9 | #include "test_defs.h" 10 | 11 | /* Double-precision vector function allowing calculation of both sin and cos in 12 | one function call, using separate argument reduction and shared low-order 13 | polynomials. 14 | Approximation for vector double-precision sincospi(x). 15 | Maximum Error 3.09 ULP: 16 | _ZGVnN2v_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1 17 | want 0x1.fd54d0b327cf4p-1 18 | Maximum Error 3.16 ULP: 19 | _ZGVnN2v_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1 20 | want 0x1.fd2da484ff402p-1. */ 21 | VPCS_ATTR void 22 | _ZGVnN2vl8l8_sincospi (float64x2_t x, double *out_sin, double *out_cos) 23 | { 24 | const struct v_sincospi_data *d = ptr_barrier (&v_sincospi_data); 25 | 26 | float64x2x2_t sc = v_sincospi_inline (x, d); 27 | 28 | vst1q_f64 (out_sin, sc.val[0]); 29 | vst1q_f64 (out_cos, sc.val[1]); 30 | } 31 | 32 | #if WANT_C23_TESTS 33 | TEST_DISABLE_FENV (_ZGVnN2v_sincospi_cos) 34 | TEST_DISABLE_FENV (_ZGVnN2v_sincospi_sin) 35 | TEST_ULP (_ZGVnN2v_sincospi_sin, 2.59) 36 | TEST_ULP (_ZGVnN2v_sincospi_cos, 2.66) 37 | # define V_SINCOSPI_INTERVAL(lo, hi, n) \ 38 | TEST_SYM_INTERVAL (_ZGVnN2v_sincospi_sin, lo, hi, n) \ 39 | TEST_SYM_INTERVAL (_ZGVnN2v_sincospi_cos, lo, hi, n) 40 | V_SINCOSPI_INTERVAL (0, 0x1p-63, 10000) 41 | V_SINCOSPI_INTERVAL (0x1p-63, 0.5, 50000) 42 | V_SINCOSPI_INTERVAL (0.5, 0x1p63, 50000) 43 | V_SINCOSPI_INTERVAL (0x1p63, inf, 10000) 44 | #endif 45 | -------------------------------------------------------------------------------- /math/aarch64/advsimd/sincospif.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision vector sincospi function. 3 | * 4 | * Copyright (c) 2024-2025, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "v_sincospif_common.h" 9 | #include "v_math.h" 10 | #include "test_defs.h" 11 | #include "mathlib.h" 12 | 13 | /* Single-precision vector function allowing calculation of both sinpi and 14 | cospi in one function call, using shared argument reduction and polynomials. 15 | Worst-case error for sin is 3.04 ULP: 16 | _ZGVnN4v_sincospif_sin(0x1.1d341ap-1) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1. 17 | Worst-case error for cos is 3.18 ULP: 18 | _ZGVnN4v_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1. 19 | */ 20 | VPCS_ATTR void 21 | _ZGVnN4vl4l4_sincospif (float32x4_t x, float *out_sin, float *out_cos) 22 | { 23 | const struct v_sincospif_data *d = ptr_barrier (&v_sincospif_data); 24 | 25 | float32x4x2_t sc = v_sincospif_inline (x, d); 26 | 27 | vst1q_f32 (out_sin, sc.val[0]); 28 | vst1q_f32 (out_cos, sc.val[1]); 29 | } 30 | 31 | #if WANT_C23_TESTS 32 | TEST_DISABLE_FENV (_ZGVnN4v_sincospif_sin) 33 | TEST_DISABLE_FENV (_ZGVnN4v_sincospif_cos) 34 | TEST_ULP (_ZGVnN4v_sincospif_sin, 2.54) 35 | TEST_ULP (_ZGVnN4v_sincospif_cos, 2.68) 36 | # define V_SINCOSPIF_INTERVAL(lo, hi, n) \ 37 | TEST_SYM_INTERVAL (_ZGVnN4v_sincospif_sin, lo, hi, n) \ 38 | TEST_SYM_INTERVAL (_ZGVnN4v_sincospif_cos, lo, hi, n) 39 | V_SINCOSPIF_INTERVAL (0, 0x1p-63, 10000) 40 | V_SINCOSPIF_INTERVAL (0x1p-63, 0.5, 50000) 41 | V_SINCOSPIF_INTERVAL (0.5, 0x1p31, 50000) 42 | V_SINCOSPIF_INTERVAL (0x1p31, inf, 10000) 43 | #endif 44 | -------------------------------------------------------------------------------- /math/aarch64/advsimd/v_poly_f32.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Helpers for evaluating polynomials on single-precision AdvSIMD input, using 3 | * various schemes. 4 | * 5 | * Copyright (c) 2023-2024, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #ifndef MATH_POLY_ADVSIMD_F32_H 10 | #define MATH_POLY_ADVSIMD_F32_H 11 | 12 | #include 13 | 14 | /* Wrap AdvSIMD f32 helpers: evaluation of some scheme/order has form: 15 | v_[scheme]_[order]_f32. */ 16 | #define VTYPE float32x4_t 17 | #define FMA(x, y, z) vfmaq_f32 (z, x, y) 18 | #define VWRAP(f) v_##f##_f32 19 | #include "poly_generic.h" 20 | #undef VWRAP 21 | #undef FMA 22 | #undef VTYPE 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /math/aarch64/advsimd/v_poly_f64.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Helpers for evaluating polynomials on double-precision AdvSIMD input, using 3 | * various schemes. 4 | * 5 | * Copyright (c) 2023-2024, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #ifndef MATH_POLY_ADVSIMD_F64_H 10 | #define MATH_POLY_ADVSIMD_F64_H 11 | 12 | #include 13 | 14 | /* Wrap AdvSIMD f64 helpers: evaluation of some scheme/order has form: 15 | v_[scheme]_[order]_f64. */ 16 | #define VTYPE float64x2_t 17 | #define FMA(x, y, z) vfmaq_f64 (z, x, y) 18 | #define VWRAP(f) v_##f##_f64 19 | #include "poly_generic.h" 20 | #undef VWRAP 21 | #undef FMA 22 | #undef VTYPE 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /math/aarch64/experimental/README.contributors: -------------------------------------------------------------------------------- 1 | Code in this sub-directory should follow the GNU Coding Standard, but it is 2 | not expected to be upstreamed into glibc without modification, so 3 | glibc-specific conventions need not be followed. 4 | 5 | The requirements for portable code apply to non-portable code with the 6 | following differences: 7 | 8 | 1. Worst-case ULP error should be encoded in filenames (e.g. sin_u35.c). There 9 | are no specific restrictions on acceptable ULP error, but if functions 10 | provide significantly less accuracy than portable equivalents then a clear 11 | justification for inclusion should be stated in comments at the top of the 12 | source file. Error bounds of the approximation should be clearly documented 13 | in comments. 14 | 15 | 2. Functions are assumed to support round-to-nearest mode by default, unless 16 | stated; other rounding modes are not required to be provided. 17 | -------------------------------------------------------------------------------- /math/aarch64/experimental/acoshf_2u8.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision acosh(x) function. 3 | * 4 | * Copyright (c) 2022-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | #include "test_sig.h" 10 | #include "test_defs.h" 11 | 12 | #define Ln2 (0x1.62e4p-1f) 13 | #define MinusZero 0x80000000 14 | #define SquareLim 0x5f800000 /* asuint(0x1p64). */ 15 | #define Two 0x40000000 16 | 17 | /* acoshf approximation using a variety of approaches on different intervals: 18 | 19 | x >= 2^64: We cannot square x without overflow. For huge x, sqrt(x*x - 1) is 20 | close enough to x that we can calculate the result by ln(2x) == ln(x) + 21 | ln(2). The greatest error in the region is 0.94 ULP: 22 | acoshf(0x1.15f706p+92) got 0x1.022e14p+6 want 0x1.022e16p+6. 23 | 24 | x > 2: Calculate the result directly using definition of asinh(x) = ln(x + 25 | sqrt(x*x - 1)). Greatest error in this region is 1.30 ULP: 26 | acoshf(0x1.249d8p+1) got 0x1.77e1aep+0 want 0x1.77e1bp+0. 27 | 28 | 0 <= x <= 2: Calculate the result using log1p. For x < 1, acosh(x) is 29 | undefined. For 1 <= x <= 2, the greatest error is 2.78 ULP: 30 | acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 want 0x1.ef9ea2p-3. */ 31 | float 32 | acoshf (float x) 33 | { 34 | uint32_t ix = asuint (x); 35 | 36 | if (unlikely (ix >= MinusZero)) 37 | return __math_invalidf (x); 38 | 39 | if (unlikely (ix >= SquareLim)) 40 | return logf (x) + Ln2; 41 | 42 | if (ix > Two) 43 | return logf (x + sqrtf (x * x - 1)); 44 | 45 | float xm1 = x - 1; 46 | return log1pf (xm1 + sqrtf (2 * xm1 + xm1 * xm1)); 47 | } 48 | 49 | TEST_SIG (S, F, 1, acosh, 1.0, 10.0) 50 | TEST_ULP (acoshf, 2.30) 51 | TEST_INTERVAL (acoshf, 0, 1, 100) 52 | TEST_INTERVAL (acoshf, 1, 2, 10000) 53 | TEST_INTERVAL (acoshf, 2, 0x1p64, 100000) 54 | TEST_INTERVAL (acoshf, 0x1p64, inf, 100000) 55 | TEST_INTERVAL (acoshf, -0, -inf, 10000) 56 | -------------------------------------------------------------------------------- /math/aarch64/experimental/advsimd/powf_umax.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Low accuracy single-precision vector pow(x, y) function. 3 | * 4 | * Copyright (c) 2025, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "mathlib.h" 9 | #include "v_math.h" 10 | #include "test_defs.h" 11 | 12 | /* Fast inaccurate powf. 13 | Only tested with make check yet. 14 | Maximum measured error: 214.066 +0.5 ULP. BASIS 2. 15 | Maximum measured error: 262.151 +0.5 ULP. BASIS e. 16 | Maximum measured error: 249.575 +0.5 ULP. BASIS 10. */ 17 | float32x4_t VPCS_ATTR NOINLINE 18 | arm_math_advsimd_fast_powf (float32x4_t x, float32x4_t y) 19 | { 20 | float32x4_t logx = _ZGVnN4v_log2f (x); 21 | float32x4_t ylogx = vmulq_f32 (y, logx); 22 | return _ZGVnN4v_exp2f (ylogx); 23 | } 24 | 25 | TEST_ULP (arm_math_advsimd_fast_powf, 4096) 26 | TEST_DISABLE_FENV (arm_math_advsimd_fast_powf) 27 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1p-1, 0x1p1, 0x1p-7, 0x1p7, 28 | 500000) 29 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1p-1, 0x1p1, -0x1p-7, -0x1p7, 30 | 500000) 31 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1p-70, 0x1p70, 0x1p-1, 0x1p1, 32 | 500000) 33 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1p-70, 0x1p70, -0x1p-1, -0x1p1, 34 | 500000) 35 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p14, 36 | 500000) 37 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1.ep-1, 0x1.1p0, -0x1p8, -0x1p14, 38 | 500000) 39 | -------------------------------------------------------------------------------- /math/aarch64/experimental/asin_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Coefficients for single-precision asin(x) function. 3 | * 4 | * Copyright (c) 2023-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | /* Approximate asin(x) directly in [0x1p-106, 0.25]. See tools/asin.sollya 11 | for these coeffcients were generated. */ 12 | const double __asin_poly[] = { 13 | /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) 14 | on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. */ 15 | 0x1.555555555554ep-3, 0x1.3333333337233p-4, 0x1.6db6db67f6d9fp-5, 16 | 0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6, 0x1.1c5997c357e9dp-6, 17 | 0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7, 0x1.fd1151acb6bedp-8, 18 | 0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6, 19 | }; 20 | -------------------------------------------------------------------------------- /math/aarch64/experimental/asinf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Coefficients for single-precision asin(x) function. 3 | * 4 | * Copyright (c) 2023-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | /* Approximate asinf(x) directly in [0x1p-24, 0.25]. See for tools/asinf.sollya 11 | for these coeffs were generated. */ 12 | const float __asinf_poly[] = { 13 | /* Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x)) on 14 | [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 . */ 15 | 0x1.55555ep-3, 0x1.33261ap-4, 0x1.70d7dcp-5, 0x1.b059dp-6, 0x1.3af7d8p-5, 16 | }; 17 | -------------------------------------------------------------------------------- /math/aarch64/experimental/asinh_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision polynomial coefficients for scalar asinh(x) 3 | * 4 | * Copyright (c) 2022-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | /* asinh(x) is odd, and the first term of the Taylor expansion is x, so we can 11 | approximate the function by x + x^3 * P(x^2), where P(z) has the form: 12 | C0 + C1 * z + C2 * z^2 + C3 * z^3 + ... 13 | Note P is evaluated on even powers of x only. See tools/asinh.sollya for the 14 | algorithm used to generate these coefficients. */ 15 | const struct asinh_data __asinh_data 16 | = { .poly 17 | = { -0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5, 18 | 0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6, 19 | -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7, 20 | 0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8, 21 | -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11, 22 | 0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14, 23 | 0x1.93d4ba83d34dap-18 } }; 24 | -------------------------------------------------------------------------------- /math/aarch64/experimental/asinhf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Coefficients for single-precision asinh(x) function. 3 | * 4 | * Copyright (c) 2022-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | /* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya 11 | for these coeffs were generated. */ 12 | const struct asinhf_data __asinhf_data 13 | = { .coeffs = { -0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f, 14 | 0x1.3a81dcp-4f, 0x1.65bbaap-10f, -0x1.057f1p-4f, 15 | 0x1.6c1d46p-5f, -0x1.4cafe8p-7f } }; 16 | -------------------------------------------------------------------------------- /math/aarch64/experimental/atan_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision polynomial evaluation function for scalar 3 | * atan(x) and atan2(y,x). 4 | * 5 | * Copyright (c) 2021-2024, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #include "math_config.h" 10 | #include "poly_scalar_f64.h" 11 | 12 | /* Polynomial used in fast atan(x) and atan2(y,x) implementations 13 | The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */ 14 | static inline double 15 | eval_poly (double z, double az, double shift) 16 | { 17 | /* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of 18 | full scheme to avoid underflow in x^16. */ 19 | double z2 = z * z; 20 | double x2 = z2 * z2; 21 | double x4 = x2 * x2; 22 | double x8 = x4 * x4; 23 | double y = fma (estrin_11_f64 (z2, x2, x4, x8, __atan_poly_data.poly + 8), 24 | x8, estrin_7_f64 (z2, x2, x4, __atan_poly_data.poly)); 25 | 26 | /* Finalize. y = shift + z + z^3 * P(z^2). */ 27 | y = fma (y, z2 * az, az); 28 | y = y + shift; 29 | 30 | return y; 31 | } 32 | 33 | #undef P 34 | -------------------------------------------------------------------------------- /math/aarch64/experimental/atan_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision polynomial coefficients for vector atan(x) and atan2(y,x). 3 | * 4 | * Copyright (c) 2019-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | const struct atan_poly_data __atan_poly_data 11 | = { .poly = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) 12 | on [2**-1022, 1.0]. See atan.sollya for details of how 13 | these were generated. */ 14 | -0x1.5555555555555p-2, 0x1.99999999996c1p-3, 15 | -0x1.2492492478f88p-3, 0x1.c71c71bc3951cp-4, 16 | -0x1.745d160a7e368p-4, 0x1.3b139b6a88ba1p-4, 17 | -0x1.11100ee084227p-4, 0x1.e1d0f9696f63bp-5, 18 | -0x1.aebfe7b418581p-5, 0x1.842dbe9b0d916p-5, 19 | -0x1.5d30140ae5e99p-5, 0x1.338e31eb2fbbcp-5, 20 | -0x1.00e6eece7de8p-5, 0x1.860897b29e5efp-6, 21 | -0x1.0051381722a59p-6, 0x1.14e9dc19a4a4ep-7, 22 | -0x1.d0062b42fe3bfp-9, 0x1.17739e210171ap-10, 23 | -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16 } }; 24 | -------------------------------------------------------------------------------- /math/aarch64/experimental/atanf_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision polynomial evaluation function for scalar 3 | * atan(x) and atan2(y,x). 4 | * 5 | * Copyright (c) 2021-2024, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #ifndef PL_MATH_ATANF_COMMON_H 10 | #define PL_MATH_ATANF_COMMON_H 11 | 12 | #include "math_config.h" 13 | #include "poly_scalar_f32.h" 14 | 15 | /* Polynomial used in fast atanf(x) and atan2f(y,x) implementations 16 | The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2). */ 17 | static inline float 18 | eval_poly (float z, float az, float shift) 19 | { 20 | /* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However, 21 | a standard implementation using z8 creates spurious underflow 22 | in the very last fma (when z^8 is small enough). 23 | Therefore, we split the last fma into a mul and and an fma. 24 | Horner and single-level Estrin have higher errors that exceed 25 | threshold. */ 26 | float z2 = z * z; 27 | float z4 = z2 * z2; 28 | 29 | /* Then assemble polynomial. */ 30 | float y = fmaf ( 31 | z4, z4 * pairwise_poly_3_f32 (z2, z4, __atanf_poly_data.poly + 4), 32 | pairwise_poly_3_f32 (z2, z4, __atanf_poly_data.poly)); 33 | /* Finalize: 34 | y = shift + z * P(z^2). */ 35 | return fmaf (y, z2 * az, az) + shift; 36 | } 37 | 38 | #endif // PL_MATH_ATANF_COMMON_H 39 | -------------------------------------------------------------------------------- /math/aarch64/experimental/atanf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision polynomial coefficients for vector atan(x) and atan2(y,x). 3 | * 4 | * Copyright (c) 2019-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0]. 11 | */ 12 | const struct atanf_poly_data __atanf_poly_data 13 | = { .poly 14 | = { /* See atanf.sollya for details of how these were generated. */ 15 | -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f, 16 | -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f, 17 | 0x1.01fd88p-8f } }; 18 | -------------------------------------------------------------------------------- /math/aarch64/experimental/cbrt_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Coefficients and table entries for double-precision cbrt(x). 3 | * 4 | * Copyright (c) 2022-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | const struct cbrt_data __cbrt_data 11 | = {.poly = { /* Coefficients for very rough approximation of cbrt(x) in [0.5, 1]. 12 | See cbrt.sollya for details of generation. */ 13 | 0x1.c14e8ee44767p-2, 0x1.dd2d3f99e4c0ep-1, -0x1.08e83026b7e74p-1, 0x1.2c74eaa3ba428p-3}, 14 | .table = { /* table[i] = 2^((i - 2) / 3). */ 15 | 0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0}}; 16 | -------------------------------------------------------------------------------- /math/aarch64/experimental/cbrtf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Coefficients and table entries for single-precision cbrt(x). 3 | * 4 | * Copyright (c) 2022-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | const struct cbrtf_data __cbrtf_data 11 | = {.poly = { /* Coefficients for very rough approximation of cbrt(x) in [0.5, 1]. 12 | See cbrtf.sollya for details of generation. */ 13 | 0x1.c14e96p-2, 0x1.dd2d3p-1, -0x1.08e81ap-1, 0x1.2c74c2p-3}, 14 | .table = { /* table[i] = 2^((i - 2) / 3). */ 15 | 0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0}}; 16 | -------------------------------------------------------------------------------- /math/aarch64/experimental/expf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Coeffs and table entries for single-precision exp. Copied from 3 | * math/exp2f_data.c, with EXP2F_TABLE_BITS == 32. 4 | * 5 | * Copyright (c) 2017-2024, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #include "math_config.h" 10 | 11 | #define N (1 << EXPF_TABLE_BITS) 12 | 13 | const struct expf_data __expf_data = { 14 | /* tab[i] = uint(2^(i/N)) - (i << 52-BITS) 15 | used for computing 2^(k/N) for an int |k| < 150 N as 16 | double(tab[k%N] + (k << 52-BITS)). */ 17 | .tab = { 18 | 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51, 19 | 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1, 20 | 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d, 21 | 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585, 22 | 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13, 23 | 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d, 24 | 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069, 25 | 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540, 26 | }, 27 | .invln2_scaled = 0x1.71547652b82fep+0 * N, 28 | .poly_scaled = { 29 | 0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N, 30 | }, 31 | }; 32 | -------------------------------------------------------------------------------- /math/aarch64/experimental/expm1_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Coefficients for double-precision e^x - 1 function. 3 | * 4 | * Copyright (c) 2022-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | /* Generated using fpminimax, see tools/expm1.sollya for details. */ 11 | const double __expm1_poly[] = { 0x1p-1, 12 | 0x1.5555555555559p-3, 13 | 0x1.555555555554bp-5, 14 | 0x1.111111110f663p-7, 15 | 0x1.6c16c16c1b5f3p-10, 16 | 0x1.a01a01affa35dp-13, 17 | 0x1.a01a018b4ecbbp-16, 18 | 0x1.71ddf82db5bb4p-19, 19 | 0x1.27e517fc0d54bp-22, 20 | 0x1.af5eedae67435p-26, 21 | 0x1.1f143d060a28ap-29 }; 22 | -------------------------------------------------------------------------------- /math/aarch64/experimental/expm1f_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Coefficients for single-precision e^x - 1 function. 3 | * 4 | * Copyright (c) 2022-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | /* Generated using fpminimax, see tools/expm1f.sollya for details. */ 11 | const float __expm1f_poly[] = { 0x1.fffffep-2, 0x1.5554aep-3, 0x1.555736p-5, 12 | 0x1.12287cp-7, 0x1.6b55a2p-10 }; 13 | -------------------------------------------------------------------------------- /math/aarch64/experimental/log1p_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Data used in double-precision log(1+x) function. 3 | * 4 | * Copyright (c) 2022-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | /* Polynomial coefficients generated using Remez algorithm, see 11 | log1p.sollya for details. */ 12 | const struct log1p_data __log1p_data 13 | = { .coeffs 14 | = { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2, 15 | 0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3, 16 | -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4, 17 | 0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4, 18 | -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5, 19 | 0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4, 20 | -0x1.cfa7385bdb37ep-6 } }; 21 | -------------------------------------------------------------------------------- /math/aarch64/experimental/log1pf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Data used in single-precision log1p(x) function. 3 | * 4 | * Copyright (c) 2022-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | #include "math_config.h" 8 | 9 | /* Polynomial coefficients generated using floating-point minimax 10 | algorithm, see tools/log1pf.sollya for details. */ 11 | const struct log1pf_data __log1pf_data 12 | = { .coeffs = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f, 13 | -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f, 14 | 0x1.abcb6p-4f, -0x1.6f0d5ep-5f } }; 15 | -------------------------------------------------------------------------------- /math/aarch64/experimental/sve/expf_umax.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Low-accuracy single-precision vector e^x function. 3 | * 4 | * Copyright (c) 2025, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "test_sig.h" 10 | #include "test_defs.h" 11 | #include "sv_expf_inline.h" 12 | 13 | /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled 14 | correctly by FEXPA. */ 15 | #define Thres 0x1.5d5e2ap+6f 16 | 17 | static const struct data 18 | { 19 | struct sv_expf_data d; 20 | float thres; 21 | } data = { 22 | .d = SV_EXPF_DATA, 23 | .thres = Thres, 24 | }; 25 | 26 | static svfloat32_t NOINLINE 27 | special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d) 28 | { 29 | return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special); 30 | } 31 | 32 | /* Low-accuracy SVE expf. 33 | Worst-case error is 248.94 +0.5 ULP. 34 | arm_math_sve_fast_expf(0x1.5d5314p+6) got 0x1.fd3a6p+125 35 | want 0x1.fd3c52p+125. */ 36 | svfloat32_t 37 | arm_math_sve_fast_expf (svfloat32_t x, const svbool_t pg) 38 | { 39 | const struct data *d = ptr_barrier (&data); 40 | svbool_t is_special_case = svacgt (pg, x, d->thres); 41 | if (unlikely (svptest_any (pg, is_special_case))) 42 | return special_case (x, is_special_case, &d->d); 43 | return expf_inline (x, pg, &d->d); 44 | } 45 | 46 | TEST_ULP (arm_math_sve_fast_expf, 4096) 47 | TEST_DISABLE_FENV (arm_math_sve_fast_expf) 48 | TEST_SYM_INTERVAL (arm_math_sve_fast_expf, 0, Thres, 50000) 49 | TEST_SYM_INTERVAL (arm_math_sve_fast_expf, Thres, inf, 50000) 50 | CLOSE_SVE_ATTR 51 | -------------------------------------------------------------------------------- /math/aarch64/experimental/sve/powf_umax.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Low-accuracy single-precision SVE pow function. 3 | * 4 | * Copyright (c) 2025, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "mathlib.h" 9 | #include "sv_math.h" 10 | #include "test_defs.h" 11 | 12 | /* Fast inaccurate SVE powf. 13 | Only tested with make check yet. 14 | Testing special cases like x < 0 would break if tested, 15 | we currently do not test these cases, 16 | but ideally they should just be allowed to break. 17 | Maximum measured error: 215.448 +0.5 ULP. BASIS 2. */ 18 | svfloat32_t 19 | arm_math_sve_fast_powf (svfloat32_t x, svfloat32_t y, const svbool_t pg) 20 | { 21 | svfloat32_t logx = _ZGVsMxv_log2f (x, pg); 22 | svfloat32_t ylogx = svmul_x (svptrue_b32 (), y, logx); 23 | return _ZGVsMxv_exp2f (ylogx, pg); 24 | } 25 | 26 | TEST_ULP (arm_math_sve_fast_powf, 4096) 27 | TEST_DISABLE_FENV (arm_math_sve_fast_powf) 28 | /* Wide intervals spanning the whole domain but shared between x and y. */ 29 | #define SV_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n) \ 30 | TEST_INTERVAL2 (arm_math_sve_fast_powf, xlo, xhi, ylo, yhi, n) \ 31 | TEST_INTERVAL2 (arm_math_sve_fast_powf, xlo, xhi, -ylo, -yhi, n) 32 | SV_POWF_INTERVAL2 (0, 0x1p-126, 0, inf, 40000) 33 | SV_POWF_INTERVAL2 (0x1p-126, 1, 0, inf, 50000) 34 | SV_POWF_INTERVAL2 (1, inf, 0, inf, 50000) 35 | /* x~1 or y~1. */ 36 | SV_POWF_INTERVAL2 (0x1p-1, 0x1p1, 0x1p-10, 0x1p10, 10000) 37 | SV_POWF_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000) 38 | SV_POWF_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000) 39 | /* around estimated argmaxs of ULP error. */ 40 | SV_POWF_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000) 41 | SV_POWF_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000) 42 | CLOSE_SVE_ATTR 43 | -------------------------------------------------------------------------------- /math/aarch64/experimental/sve/powi.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision SVE powi(x, n) function. 3 | * 4 | * Copyright (c) 2020-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | 10 | /* Optimized double-precision vector powi (double base, long integer power). 11 | powi is developed for environments in which accuracy is of much less 12 | importance than performance, hence we provide no estimate for worst-case 13 | error. */ 14 | svfloat64_t 15 | _ZGVsMxvv_powk (svfloat64_t as, svint64_t ns, svbool_t p) 16 | { 17 | /* Compute powi by successive squaring, right to left. */ 18 | svfloat64_t acc = sv_f64 (1.0); 19 | svbool_t want_recip = svcmplt (p, ns, 0); 20 | svuint64_t ns_abs = svreinterpret_u64 (svabs_x (p, ns)); 21 | 22 | /* We use a max to avoid needing to check whether any lane != 0 on each 23 | iteration. */ 24 | uint64_t max_n = svmaxv (p, ns_abs); 25 | 26 | svfloat64_t c = as; 27 | /* Successively square c, and use merging predication (_m) to determine 28 | whether or not to perform the multiplication or keep the previous 29 | iteration. */ 30 | while (true) 31 | { 32 | svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1ull), 1ull); 33 | acc = svmul_m (px, acc, c); 34 | max_n >>= 1; 35 | if (max_n == 0) 36 | break; 37 | 38 | ns_abs = svlsr_x (p, ns_abs, 1); 39 | c = svmul_x (p, c, c); 40 | } 41 | 42 | /* Negative powers are handled by computing the abs(n) version and then 43 | taking the reciprocal. */ 44 | if (svptest_any (want_recip, want_recip)) 45 | acc = svdivr_m (want_recip, acc, 1.0); 46 | 47 | return acc; 48 | } 49 | CLOSE_SVE_ATTR 50 | -------------------------------------------------------------------------------- /math/aarch64/experimental/sve/powif.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision SVE powi(x, n) function. 3 | * 4 | * Copyright (c) 2020-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | 10 | /* Optimized single-precision vector powi (float base, integer power). 11 | powi is developed for environments in which accuracy is of much less 12 | importance than performance, hence we provide no estimate for worst-case 13 | error. */ 14 | svfloat32_t 15 | _ZGVsMxvv_powi (svfloat32_t as, svint32_t ns, svbool_t p) 16 | { 17 | /* Compute powi by successive squaring, right to left. */ 18 | svfloat32_t acc = sv_f32 (1.f); 19 | svbool_t want_recip = svcmplt (p, ns, 0); 20 | svuint32_t ns_abs = svreinterpret_u32 (svabs_x (p, ns)); 21 | 22 | /* We use a max to avoid needing to check whether any lane != 0 on each 23 | iteration. */ 24 | uint32_t max_n = svmaxv (p, ns_abs); 25 | 26 | svfloat32_t c = as; 27 | /* Successively square c, and use merging predication (_m) to determine 28 | whether or not to perform the multiplication or keep the previous 29 | iteration. */ 30 | while (true) 31 | { 32 | svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1), 1); 33 | acc = svmul_m (px, acc, c); 34 | max_n >>= 1; 35 | if (max_n == 0) 36 | break; 37 | 38 | ns_abs = svlsr_x (p, ns_abs, 1); 39 | c = svmul_x (p, c, c); 40 | } 41 | 42 | /* Negative powers are handled by computing the abs(n) version and then 43 | taking the reciprocal. */ 44 | if (svptest_any (want_recip, want_recip)) 45 | acc = svdivr_m (want_recip, acc, 1.0f); 46 | 47 | return acc; 48 | } 49 | CLOSE_SVE_ATTR 50 | -------------------------------------------------------------------------------- /math/aarch64/experimental/sve/sv_expf_inline.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SVE helper for single-precision routines which calculate exp(x) and do 3 | * not need special-case handling 4 | * 5 | * Copyright (c) 2025, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #ifndef MATH_SV_EXPF_INLINE_H 10 | #define MATH_SV_EXPF_INLINE_H 11 | 12 | #include "sv_math.h" 13 | #include "test_sig.h" 14 | #include "test_defs.h" 15 | 16 | struct sv_expf_data 17 | { 18 | float ln2, inv_ln2, shift, null; 19 | }; 20 | 21 | #define SV_EXPF_DATA \ 22 | { \ 23 | /* Shift is 1.5*2^17 + 127. */ \ 24 | .shift = 0x1.803f8p17f, .inv_ln2 = 0x1.715476p+0f, .ln2 = 0x1.62e43p-1f, \ 25 | } 26 | 27 | static inline svfloat32_t 28 | expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d) 29 | { 30 | /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] 31 | x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ 32 | 33 | svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2); 34 | 35 | /* n = round(x/(ln2/N)). */ 36 | svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, lane_consts, 1); 37 | svfloat32_t n = svsub_x (pg, z, d->shift); 38 | 39 | /* r = x - n*ln2/N. */ 40 | svfloat32_t r = svmls_lane (x, n, lane_consts, 0); 41 | 42 | /* scale = 2^(n/N). */ 43 | svfloat32_t scale = svexpa (svreinterpret_u32 (z)); 44 | 45 | /* poly(r) = exp(r) - 1 ~= r. */ 46 | return svmla_x (pg, scale, scale, r); 47 | } 48 | 49 | #endif // MATH_SV_EXPF_INLINE_H 50 | -------------------------------------------------------------------------------- /math/aarch64/experimental/tanf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Data used in single-precision tan(x) function. 3 | * 4 | * Copyright (c) 2022-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | const struct tanf_poly_data __tanf_poly_data = { 11 | .poly_tan = { 12 | /* Coefficients generated using: 13 | poly = fpminimax((tan(sqrt(x))-sqrt(x))/x^(3/2), deg, [|single ...|], [a*a;b*b]); 14 | optimize relative error 15 | final prec : 23 bits 16 | deg : 5 17 | a : 0x1p-126 ^ 2 18 | b : ((pi) / 0x1p2) ^ 2 19 | dirty rel error: 0x1.f7c2e4p-25 20 | dirty abs error: 0x1.f7c2ecp-25. */ 21 | 0x1.55555p-2, 22 | 0x1.11166p-3, 23 | 0x1.b88a78p-5, 24 | 0x1.7b5756p-6, 25 | 0x1.4ef4cep-8, 26 | 0x1.0e1e74p-7 27 | }, 28 | .poly_cotan = { 29 | /* Coefficients generated using: 30 | fpminimax(f(x) = (0x1p0 / tan(sqrt(x)) - 0x1p0 / sqrt(x)) / sqrt(x), deg, [|dtype ...|], [a;b]) 31 | optimize a single polynomial 32 | optimize absolute error 33 | final prec : 23 bits 34 | working prec : 128 bits 35 | deg : 3 36 | a : 0x1p-126 37 | b : (pi) / 0x1p2 38 | dirty rel error : 0x1.81298cp-25 39 | dirty abs error : 0x1.a8acf4p-25. */ 40 | -0x1.55555p-2, /* -0.33333325. */ 41 | -0x1.6c23e4p-6, /* -2.2225354e-2. */ 42 | -0x1.12dbap-9, /* -2.0969994e-3. */ 43 | -0x1.05a1c2p-12, /* -2.495116e-4. */ 44 | } 45 | }; 46 | -------------------------------------------------------------------------------- /math/aarch64/sve/acoshf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision SVE acosh(x) function. 3 | * Copyright (c) 2023-2024, Arm Limited. 4 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | */ 6 | 7 | #include "sv_math.h" 8 | #include "test_sig.h" 9 | #include "test_defs.h" 10 | 11 | #define One 0x3f800000 12 | #define Thres 0x20000000 /* asuint(0x1p64) - One. */ 13 | 14 | #include "sv_log1pf_inline.h" 15 | 16 | static svfloat32_t NOINLINE 17 | special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special) 18 | { 19 | svfloat32_t x = svadd_x (svptrue_b32 (), xm1, 1.0f); 20 | svfloat32_t y = sv_log1pf_inline (tmp, svptrue_b32 ()); 21 | return sv_call_f32 (acoshf, x, y, special); 22 | } 23 | 24 | /* Single-precision SVE acosh(x) routine. Implements the same algorithm as 25 | vector acoshf and log1p. 26 | 27 | Maximum error is 2.47 ULPs: 28 | SV_NAME_F1 (acosh) (0x1.01ca76p+0) got 0x1.e435a6p-4 29 | want 0x1.e435a2p-4. */ 30 | svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg) 31 | { 32 | svuint32_t ix = svreinterpret_u32 (x); 33 | svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres); 34 | 35 | svfloat32_t xm1 = svsub_x (pg, x, 1.0f); 36 | svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f)); 37 | svfloat32_t tmp = svadd_x (pg, xm1, svsqrt_x (pg, u)); 38 | 39 | if (unlikely (svptest_any (pg, special))) 40 | return special_case (xm1, tmp, special); 41 | return sv_log1pf_inline (tmp, pg); 42 | } 43 | 44 | TEST_SIG (SV, F, 1, acosh, 1.0, 10.0) 45 | TEST_ULP (SV_NAME_F1 (acosh), 1.97) 46 | TEST_DISABLE_FENV (SV_NAME_F1 (acosh)) 47 | TEST_INTERVAL (SV_NAME_F1 (acosh), 0, 1, 500) 48 | TEST_INTERVAL (SV_NAME_F1 (acosh), 1, 0x1p64, 100000) 49 | TEST_INTERVAL (SV_NAME_F1 (acosh), 0x1p64, inf, 1000) 50 | TEST_INTERVAL (SV_NAME_F1 (acosh), -0, -inf, 1000) 51 | CLOSE_SVE_ATTR 52 | -------------------------------------------------------------------------------- /math/aarch64/sve/cexpi.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision vector cexpi function. 3 | * 4 | * Copyright (c) 2023-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "sv_sincos_common.h" 10 | #include "test_defs.h" 11 | 12 | static svfloat64x2_t NOINLINE 13 | special_case (svfloat64_t x, svbool_t special, svfloat64x2_t y) 14 | { 15 | return svcreate2 (sv_call_f64 (sin, x, svget2 (y, 0), special), 16 | sv_call_f64 (cos, x, svget2 (y, 1), special)); 17 | } 18 | 19 | /* Double-precision vector function allowing calculation of both sin and cos in 20 | one function call, using shared argument reduction and separate polynomials. 21 | Largest observed error is for sin, 3.22 ULP: 22 | sv_cexpi_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3 23 | want -0x1.ffe9537d5dbb4p-3. */ 24 | svfloat64x2_t 25 | _ZGVsMxv_cexpi (svfloat64_t x, svbool_t pg) 26 | { 27 | const struct sv_sincos_data *d = ptr_barrier (&sv_sincos_data); 28 | svbool_t special = check_ge_rangeval (pg, x, d); 29 | 30 | svfloat64x2_t sc = sv_sincos_inline (pg, x, d); 31 | 32 | if (unlikely (svptest_any (pg, special))) 33 | return special_case (x, special, sc); 34 | return sc; 35 | } 36 | 37 | TEST_DISABLE_FENV (_ZGVsMxv_cexpi_sin) 38 | TEST_DISABLE_FENV (_ZGVsMxv_cexpi_cos) 39 | TEST_ULP (_ZGVsMxv_cexpi_sin, 2.73) 40 | TEST_ULP (_ZGVsMxv_cexpi_cos, 2.73) 41 | #define SV_CEXPI_INTERVAL(lo, hi, n) \ 42 | TEST_INTERVAL (_ZGVsMxv_cexpi_sin, lo, hi, n) \ 43 | TEST_INTERVAL (_ZGVsMxv_cexpi_cos, lo, hi, n) 44 | SV_CEXPI_INTERVAL (0, 0x1p23, 500000) 45 | SV_CEXPI_INTERVAL (-0, -0x1p23, 500000) 46 | SV_CEXPI_INTERVAL (0x1p23, inf, 10000) 47 | SV_CEXPI_INTERVAL (-0x1p23, -inf, 10000) 48 | CLOSE_SVE_ATTR 49 | -------------------------------------------------------------------------------- /math/aarch64/sve/cexpif.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision vector cexpi function. 3 | * 4 | * Copyright (c) 2023-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "sv_sincosf_common.h" 10 | #include "test_defs.h" 11 | 12 | static svfloat32x2_t NOINLINE 13 | special_case (svfloat32_t x, svbool_t special, svfloat32x2_t y) 14 | { 15 | return svcreate2 (sv_call_f32 (sinf, x, svget2 (y, 0), special), 16 | sv_call_f32 (cosf, x, svget2 (y, 1), special)); 17 | } 18 | 19 | /* Single-precision vector function allowing calculation of both sin and cos in 20 | one function call, using shared argument reduction and separate low-order 21 | polynomials. 22 | Worst-case error for sin is 1.67 ULP: 23 | v_cexpif_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5 24 | Worst-case error for cos is 1.81 ULP: 25 | v_cexpif_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6. */ 26 | svfloat32x2_t 27 | _ZGVsMxv_cexpif (svfloat32_t x, svbool_t pg) 28 | { 29 | const struct sv_sincosf_data *d = ptr_barrier (&sv_sincosf_data); 30 | svbool_t special = check_ge_rangeval (pg, x, d); 31 | 32 | svfloat32x2_t sc = sv_sincosf_inline (pg, x, d); 33 | 34 | if (unlikely (svptest_any (pg, special))) 35 | return special_case (x, special, sc); 36 | return sc; 37 | } 38 | 39 | TEST_DISABLE_FENV (_ZGVsMxv_cexpif_sin) 40 | TEST_DISABLE_FENV (_ZGVsMxv_cexpif_cos) 41 | TEST_ULP (_ZGVsMxv_cexpif_sin, 1.17) 42 | TEST_ULP (_ZGVsMxv_cexpif_cos, 1.31) 43 | #define SV_CEXPIF_INTERVAL(lo, hi, n) \ 44 | TEST_INTERVAL (_ZGVsMxv_cexpif_sin, lo, hi, n) \ 45 | TEST_INTERVAL (_ZGVsMxv_cexpif_cos, lo, hi, n) 46 | SV_CEXPIF_INTERVAL (0, 0x1p20, 500000) 47 | SV_CEXPIF_INTERVAL (-0, -0x1p20, 500000) 48 | SV_CEXPIF_INTERVAL (0x1p20, inf, 10000) 49 | SV_CEXPIF_INTERVAL (-0x1p20, -inf, 10000) 50 | CLOSE_SVE_ATTR 51 | -------------------------------------------------------------------------------- /math/aarch64/sve/expf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision vector e^x function. 3 | * 4 | * Copyright (c) 2019-2025, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "test_sig.h" 10 | #include "test_defs.h" 11 | #include "sv_expf_inline.h" 12 | 13 | /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled 14 | correctly by FEXPA. */ 15 | #define Thres 0x1.5d5e2ap+6f 16 | 17 | static const struct data 18 | { 19 | struct sv_expf_data d; 20 | float thres; 21 | } data = { 22 | .d = SV_EXPF_DATA, 23 | .thres = Thres, 24 | }; 25 | 26 | static svfloat32_t NOINLINE 27 | special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d) 28 | { 29 | return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special); 30 | } 31 | 32 | /* Optimised single-precision SVE exp function. 33 | Worst-case error is 0.88 +0.50 ULP: 34 | _ZGVsMxv_expf(-0x1.bba276p-6) got 0x1.f25288p-1 35 | want 0x1.f2528ap-1. */ 36 | svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg) 37 | { 38 | const struct data *d = ptr_barrier (&data); 39 | svbool_t is_special_case = svacgt (pg, x, d->thres); 40 | if (unlikely (svptest_any (pg, is_special_case))) 41 | return special_case (x, is_special_case, &d->d); 42 | return expf_inline (x, pg, &d->d); 43 | } 44 | 45 | TEST_SIG (SV, F, 1, exp, -9.9, 9.9) 46 | TEST_ULP (SV_NAME_F1 (exp), 0.89) 47 | TEST_DISABLE_FENV (SV_NAME_F1 (exp)) 48 | TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0, Thres, 50000) 49 | TEST_SYM_INTERVAL (SV_NAME_F1 (exp), Thres, inf, 50000) 50 | CLOSE_SVE_ATTR 51 | -------------------------------------------------------------------------------- /math/aarch64/sve/hypot.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision SVE hypot(x) function. 3 | * 4 | * Copyright (c) 2023-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "test_sig.h" 10 | #include "test_defs.h" 11 | 12 | static const struct data 13 | { 14 | uint64_t tiny_bound, thres; 15 | } data = { 16 | .tiny_bound = 0x0c80000000000000, /* asuint (0x1p-102). */ 17 | .thres = 0x7300000000000000, /* asuint (inf) - tiny_bound. */ 18 | }; 19 | 20 | static svfloat64_t NOINLINE 21 | special_case (svfloat64_t sqsum, svfloat64_t x, svfloat64_t y, svbool_t pg, 22 | svbool_t special) 23 | { 24 | return sv_call2_f64 (hypot, x, y, svsqrt_x (pg, sqsum), special); 25 | } 26 | 27 | /* SVE implementation of double-precision hypot. 28 | Maximum error observed is 1.21 ULP: 29 | _ZGVsMxvv_hypot (-0x1.6a22d0412cdd3p+352, 0x1.d3d89bd66fb1ap+330) 30 | got 0x1.6a22d0412cfp+352 31 | want 0x1.6a22d0412cf01p+352. */ 32 | svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y, svbool_t pg) 33 | { 34 | const struct data *d = ptr_barrier (&data); 35 | 36 | svfloat64_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y); 37 | 38 | svbool_t special = svcmpge ( 39 | pg, svsub_x (pg, svreinterpret_u64 (sqsum), d->tiny_bound), d->thres); 40 | 41 | if (unlikely (svptest_any (pg, special))) 42 | return special_case (sqsum, x, y, pg, special); 43 | return svsqrt_x (pg, sqsum); 44 | } 45 | 46 | TEST_SIG (SV, D, 2, hypot, -10.0, 10.0) 47 | TEST_ULP (SV_NAME_D2 (hypot), 0.71) 48 | TEST_DISABLE_FENV (SV_NAME_D2 (hypot)) 49 | TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, 0, inf, 10000) 50 | TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, -0, -inf, 10000) 51 | TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, 0, inf, 10000) 52 | TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000) 53 | CLOSE_SVE_ATTR 54 | -------------------------------------------------------------------------------- /math/aarch64/sve/hypotf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision SVE hypot(x) function. 3 | * 4 | * Copyright (c) 2023-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "test_sig.h" 10 | #include "test_defs.h" 11 | 12 | #define TinyBound 0x0c800000 /* asuint (0x1p-102). */ 13 | #define Thres 0x73000000 /* 0x70000000 - TinyBound. */ 14 | 15 | static svfloat32_t NOINLINE 16 | special_case (svfloat32_t sqsum, svfloat32_t x, svfloat32_t y, svbool_t pg, 17 | svbool_t special) 18 | { 19 | return sv_call2_f32 (hypotf, x, y, svsqrt_x (pg, sqsum), special); 20 | } 21 | 22 | /* SVE implementation of single-precision hypot. 23 | Maximum error observed is 1.21 ULP: 24 | _ZGVsMxvv_hypotf (0x1.6a213cp-19, -0x1.32b982p-26) got 0x1.6a2346p-19 25 | want 0x1.6a2344p-19. */ 26 | svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y, 27 | const svbool_t pg) 28 | { 29 | svfloat32_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y); 30 | 31 | svbool_t special = svcmpge ( 32 | pg, svsub_x (pg, svreinterpret_u32 (sqsum), TinyBound), Thres); 33 | 34 | if (unlikely (svptest_any (pg, special))) 35 | return special_case (sqsum, x, y, pg, special); 36 | 37 | return svsqrt_x (pg, sqsum); 38 | } 39 | 40 | TEST_SIG (SV, F, 2, hypot, -10.0, 10.0) 41 | TEST_ULP (SV_NAME_F2 (hypot), 0.71) 42 | TEST_DISABLE_FENV (SV_NAME_F2 (hypot)) 43 | TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, 0, inf, 10000) 44 | TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, -0, -inf, 10000) 45 | TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, 0, inf, 10000) 46 | TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000) 47 | CLOSE_SVE_ATTR 48 | -------------------------------------------------------------------------------- /math/aarch64/sve/log1pf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision vector log(x + 1) function. 3 | * 4 | * Copyright (c) 2023-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "test_sig.h" 10 | #include "test_defs.h" 11 | #include "sv_log1pf_inline.h" 12 | 13 | static svfloat32_t NOINLINE 14 | special_case (svfloat32_t x, svbool_t special) 15 | { 16 | return sv_call_f32 (log1pf, x, sv_log1pf_inline (x, svptrue_b32 ()), 17 | special); 18 | } 19 | 20 | /* Vector log1pf approximation using polynomial on reduced interval. Worst-case 21 | error is 1.27 ULP very close to 0.5. 22 | _ZGVsMxv_log1pf(0x1.fffffep-2) got 0x1.9f324p-2 23 | want 0x1.9f323ep-2. */ 24 | svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg) 25 | { 26 | /* x < -1, Inf/Nan. */ 27 | svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000); 28 | special = svorn_z (pg, special, svcmpge (pg, x, -1)); 29 | 30 | if (unlikely (svptest_any (pg, special))) 31 | return special_case (x, special); 32 | 33 | return sv_log1pf_inline (x, pg); 34 | } 35 | 36 | TEST_SIG (SV, F, 1, log1p, -0.9, 10.0) 37 | TEST_ULP (SV_NAME_F1 (log1p), 0.77) 38 | TEST_DISABLE_FENV (SV_NAME_F1 (log1p)) 39 | TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0, 0x1p-23, 5000) 40 | TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0x1p-23, 1, 5000) 41 | TEST_INTERVAL (SV_NAME_F1 (log1p), 1, inf, 10000) 42 | TEST_INTERVAL (SV_NAME_F1 (log1p), -1, -inf, 10) 43 | CLOSE_SVE_ATTR 44 | -------------------------------------------------------------------------------- /math/aarch64/sve/modf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision SVE modf(x, *y) function. 3 | * 4 | * Copyright (c) 2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "test_sig.h" 10 | #include "test_defs.h" 11 | 12 | /* Modf algorithm. Produces exact values in all rounding modes. */ 13 | svfloat64_t SV_NAME_D1_L1 (modf) (svfloat64_t x, double *out_int, 14 | const svbool_t pg) 15 | { 16 | /* Get integer component of x. */ 17 | svfloat64_t fint_comp = svrintz_x (pg, x); 18 | 19 | svst1_f64 (pg, out_int, fint_comp); 20 | 21 | /* Subtract integer component from input. */ 22 | svfloat64_t remaining = svsub_f64_x (svptrue_b64 (), x, fint_comp); 23 | 24 | /* Return +0 for integer x. */ 25 | svbool_t is_integer = svcmpeq (pg, x, fint_comp); 26 | return svsel (is_integer, sv_f64 (0), remaining); 27 | } 28 | 29 | TEST_ULP (_ZGVsMxvl8_modf_frac, 0.0) 30 | TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_frac, 0, 1, 20000) 31 | TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_frac, 1, inf, 20000) 32 | 33 | TEST_ULP (_ZGVsMxvl8_modf_int, 0.0) 34 | TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_int, 0, 1, 20000) 35 | TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_int, 1, inf, 20000) 36 | CLOSE_SVE_ATTR 37 | -------------------------------------------------------------------------------- /math/aarch64/sve/modff.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision SVE modff(x, *y) function. 3 | * 4 | * Copyright (c) 2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "test_sig.h" 10 | #include "test_defs.h" 11 | 12 | /* Modff algorithm. Produces exact values in all rounding modes. */ 13 | svfloat32_t SV_NAME_F1_L1 (modf) (svfloat32_t x, float *out_int, 14 | const svbool_t pg) 15 | { 16 | /* Get integer component of x. */ 17 | svfloat32_t fint_comp = svrintz_x (pg, x); 18 | 19 | svst1_f32 (pg, out_int, fint_comp); 20 | 21 | /* Subtract integer component from input. */ 22 | svfloat32_t remaining = svsub_f32_x (svptrue_b32 (), x, fint_comp); 23 | 24 | /* Return +0 for integer x. */ 25 | svbool_t is_integer = svcmpeq (pg, x, fint_comp); 26 | return svsel (is_integer, sv_f32 (0), remaining); 27 | } 28 | 29 | TEST_ULP (_ZGVsMxvl4_modff_frac, 0.0) 30 | TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_frac, 0, 1, 20000) 31 | TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_frac, 1, inf, 20000) 32 | 33 | TEST_ULP (_ZGVsMxvl4_modff_int, 0.0) 34 | TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_int, 0, 1, 20000) 35 | TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_int, 1, inf, 20000) 36 | CLOSE_SVE_ATTR 37 | -------------------------------------------------------------------------------- /math/aarch64/sve/sincospi.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision SVE sincospi(x, *y, *z) function. 3 | * 4 | * Copyright (c) 2024-2025, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "test_defs.h" 10 | #include "mathlib.h" 11 | #include "sv_sincospi_common.h" 12 | 13 | /* Double-precision vector function allowing calculation of both sinpi and 14 | cospi in one function call, using shared argument reduction and polynomials. 15 | Worst-case error for sin is 3.09 ULP: 16 | _ZGVsMxvl8l8_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1 17 | want 0x1.fd54d0b327cf4p-1. 18 | Worst-case error for sin is 3.16 ULP: 19 | _ZGVsMxvl8l8_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1 20 | want 0x1.fd2da484ff402p-1. 21 | */ 22 | void 23 | _ZGVsMxvl8l8_sincospi (svfloat64_t x, double *out_sin, double *out_cos, 24 | svbool_t pg) 25 | { 26 | const struct sv_sincospi_data *d = ptr_barrier (&sv_sincospi_data); 27 | 28 | svfloat64x2_t sc = sv_sincospi_inline (pg, x, d); 29 | 30 | svst1 (pg, out_sin, svget2 (sc, 0)); 31 | svst1 (pg, out_cos, svget2 (sc, 1)); 32 | } 33 | 34 | #if WANT_C23_TESTS 35 | TEST_DISABLE_FENV (_ZGVsMxvl8l8_sincospi_sin) 36 | TEST_DISABLE_FENV (_ZGVsMxvl8l8_sincospi_cos) 37 | TEST_ULP (_ZGVsMxvl8l8_sincospi_sin, 2.59) 38 | TEST_ULP (_ZGVsMxvl8l8_sincospi_cos, 2.66) 39 | # define SV_SINCOSPI_INTERVAL(lo, hi, n) \ 40 | TEST_SYM_INTERVAL (_ZGVsMxvl8l8_sincospi_sin, lo, hi, n) \ 41 | TEST_SYM_INTERVAL (_ZGVsMxvl8l8_sincospi_cos, lo, hi, n) 42 | SV_SINCOSPI_INTERVAL (0, 0x1p-63, 10000) 43 | SV_SINCOSPI_INTERVAL (0x1p-63, 0.5, 50000) 44 | SV_SINCOSPI_INTERVAL (0.5, 0x1p53, 50000) 45 | SV_SINCOSPI_INTERVAL (0x1p53, inf, 10000) 46 | #endif 47 | CLOSE_SVE_ATTR 48 | -------------------------------------------------------------------------------- /math/aarch64/sve/sincospif.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision SVE sincospi(x, *y, *z) function. 3 | * 4 | * Copyright (c) 2024-2025, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "sv_math.h" 9 | #include "test_defs.h" 10 | #include "mathlib.h" 11 | #include "sv_sincospif_common.h" 12 | 13 | /* Single-precision vector function allowing calculation of both sinpi and 14 | cospi in one function call, using shared argument reduction and polynomials. 15 | Worst-case error for sin is 3.04 ULP: 16 | _ZGVsMxvl4l4_sincospif_sin(0x1.b51b8p-2) got 0x1.f28b5ep-1 want 17 | 0x1.f28b58p-1. 18 | Worst-case error for cos is 3.18 ULP: 19 | _ZGVsMxvl4l4_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want 20 | 0x1.f7cd5p-1. */ 21 | void 22 | _ZGVsMxvl4l4_sincospif (svfloat32_t x, float *out_sin, float *out_cos, 23 | svbool_t pg) 24 | { 25 | const struct sv_sincospif_data *d = ptr_barrier (&sv_sincospif_data); 26 | 27 | svfloat32x2_t sc = sv_sincospif_inline (pg, x, d); 28 | 29 | svst1 (pg, out_sin, svget2 (sc, 0)); 30 | svst1 (pg, out_cos, svget2 (sc, 1)); 31 | } 32 | 33 | #if WANT_C23_TESTS 34 | TEST_DISABLE_FENV (_ZGVsMxvl4l4_sincospif_sin) 35 | TEST_DISABLE_FENV (_ZGVsMxvl4l4_sincospif_cos) 36 | TEST_ULP (_ZGVsMxvl4l4_sincospif_sin, 2.54) 37 | TEST_ULP (_ZGVsMxvl4l4_sincospif_cos, 2.68) 38 | # define SV_SINCOSPIF_INTERVAL(lo, hi, n) \ 39 | TEST_SYM_INTERVAL (_ZGVsMxvl4l4_sincospif_sin, lo, hi, n) \ 40 | TEST_SYM_INTERVAL (_ZGVsMxvl4l4_sincospif_cos, lo, hi, n) 41 | SV_SINCOSPIF_INTERVAL (0, 0x1p-31, 10000) 42 | SV_SINCOSPIF_INTERVAL (0x1p-31, 0.5, 50000) 43 | SV_SINCOSPIF_INTERVAL (0.5, 0x1p31, 50000) 44 | SV_SINCOSPIF_INTERVAL (0x1p31, inf, 10000) 45 | #endif 46 | CLOSE_SVE_ATTR 47 | -------------------------------------------------------------------------------- /math/aarch64/sve/sv_expf_inline.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SVE helper for single-precision routines which calculate exp(x) and do 3 | * not need special-case handling 4 | * 5 | * Copyright (c) 2023-2025, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #ifndef MATH_SV_EXPF_INLINE_H 10 | #define MATH_SV_EXPF_INLINE_H 11 | 12 | #include "sv_math.h" 13 | #include "test_sig.h" 14 | #include "test_defs.h" 15 | 16 | struct sv_expf_data 17 | { 18 | float ln2_hi, ln2_lo, c1, null; 19 | float inv_ln2, shift; 20 | }; 21 | 22 | /* Shift is 1.5*2^17 + 127. */ 23 | #define SV_EXPF_DATA \ 24 | { \ 25 | .c1 = 0.5f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f, \ 26 | .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f, \ 27 | } 28 | 29 | static inline svfloat32_t 30 | expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d) 31 | { 32 | /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] 33 | x = ln2*n + r, with r in [-ln2/2, ln2/2]. */ 34 | 35 | svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_hi); 36 | 37 | /* n = round(x/(ln2/N)). */ 38 | svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift); 39 | svfloat32_t n = svsub_x (pg, z, d->shift); 40 | 41 | /* r = x - n*ln2/N. */ 42 | svfloat32_t r = x; 43 | r = svmls_lane (r, n, lane_consts, 0); 44 | r = svmls_lane (r, n, lane_consts, 1); 45 | 46 | /* scale = 2^(n/N). */ 47 | svfloat32_t scale = svexpa (svreinterpret_u32 (z)); 48 | 49 | /* poly(r) = exp(r) - 1 ~= r + 0.5 r^2. */ 50 | svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r); 51 | svfloat32_t poly = svmla_lane (r, r2, lane_consts, 2); 52 | 53 | return svmla_x (pg, scale, scale, poly); 54 | } 55 | 56 | #endif // MATH_SV_EXPF_INLINE_H 57 | -------------------------------------------------------------------------------- /math/aarch64/sve/sv_poly_f32.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Helpers for evaluating polynomials on single-precision SVE input, using 3 | * various schemes. 4 | * 5 | * Copyright (c) 2023-2025, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #ifndef MATH_POLY_SVE_F32_H 10 | #define MATH_POLY_SVE_F32_H 11 | 12 | #include 13 | 14 | /* Wrap SVE f32 helpers: evaluation of some scheme/order has form: 15 | sv_[scheme]_[order]_f32_x. */ 16 | #define PTRUE svptrue_b32 () 17 | #define VTYPE svfloat32_t 18 | #define STYPE float 19 | #define VWRAP(f) sv_##f##_f32_x 20 | #define DUP svdup_f32 21 | #include "sv_poly_generic.h" 22 | #undef DUP 23 | #undef VWRAP 24 | #undef STYPE 25 | #undef VTYPE 26 | #undef PTRUE 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /math/aarch64/sve/sv_poly_f64.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Helpers for evaluating polynomials on double-precision SVE input, using 3 | * various schemes. 4 | * 5 | * Copyright (c) 2023-2025, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #ifndef MATH_POLY_SVE_F64_H 10 | #define MATH_POLY_SVE_F64_H 11 | 12 | #include 13 | 14 | /* Wrap SVE f64 helpers: evaluation of some scheme/order has form: 15 | sv_[scheme]_[order]_f64_x. */ 16 | #define PTRUE svptrue_b64 () 17 | #define VTYPE svfloat64_t 18 | #define STYPE double 19 | #define VWRAP(f) sv_##f##_f64_x 20 | #define DUP svdup_f64 21 | #include "sv_poly_generic.h" 22 | #undef DUP 23 | #undef VWRAP 24 | #undef STYPE 25 | #undef VTYPE 26 | #undef PTRUE 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /math/erff_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Data for approximation of erff. 3 | * 4 | * Copyright (c) 2019-2020, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | /* Minimax approximation of erff. */ 11 | const struct erff_data __erff_data = { 12 | .erff_poly_A = { 13 | 0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f, 14 | -0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f 15 | }, 16 | .erff_poly_B = { 17 | 0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f, 18 | -0x1.8d6300p-6f, 0x1.fd1336p-9f, -0x1.91d2ccp-12f, 19 | 0x1.222900p-16f 20 | } 21 | }; 22 | 23 | -------------------------------------------------------------------------------- /math/include/test_defs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Helper macros for emitting various details about routines for consumption by 3 | * runulp.sh. This version of the file is for inclusion when building routines, 4 | * so expansions are empty - see math/test/test_defs for versions used by the 5 | * build system. 6 | * 7 | * Copyright (c) 2024, Arm Limited. 8 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception. 9 | */ 10 | 11 | #define TEST_ULP(f, l) 12 | #define TEST_ULP_NONNEAREST(f, l) 13 | 14 | #define TEST_DISABLE_FENV(f) 15 | #define TEST_DISABLE_FENV_IF_NOT(f, e) 16 | 17 | #define TEST_INTERVAL(f, lo, hi, n) 18 | #define TEST_SYM_INTERVAL(f, lo, hi, n) 19 | #define TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n) 20 | 21 | #define TEST_CONTROL_VALUE(f, c) 22 | -------------------------------------------------------------------------------- /math/log2f_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Data definition for log2f. 3 | * 4 | * Copyright (c) 2017-2018, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | const struct log2f_data __log2f_data = { 11 | .tab = { 12 | { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 }, 13 | { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 }, 14 | { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 }, 15 | { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 }, 16 | { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 }, 17 | { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 }, 18 | { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 }, 19 | { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 }, 20 | { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 }, 21 | { 0x1p+0, 0x0p+0 }, 22 | { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 }, 23 | { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 }, 24 | { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 }, 25 | { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 }, 26 | { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 }, 27 | { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }, 28 | }, 29 | .poly = { 30 | -0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1, 31 | 0x1.715475f35c8b8p0, 32 | } 33 | }; 34 | -------------------------------------------------------------------------------- /math/logf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Data definition for logf. 3 | * 4 | * Copyright (c) 2017-2024, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | const struct logf_data __logf_data = { 11 | .tab = { 12 | { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 }, 13 | { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 }, 14 | { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 }, 15 | { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 }, 16 | { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 }, 17 | { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 }, 18 | { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 }, 19 | { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 }, 20 | { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 }, 21 | { 0x1p+0, 0x0p+0 }, 22 | { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 }, 23 | { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 }, 24 | { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 }, 25 | { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 }, 26 | { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 }, 27 | { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }, 28 | }, 29 | .ln2 = 0x1.62e42fefa39efp-1, 30 | .invln10 = 0x1.bcb7b1526e50ep-2, 31 | .poly = { 32 | -0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2, 33 | } 34 | }; 35 | -------------------------------------------------------------------------------- /math/math_err.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Double-precision math error handling. 3 | * 4 | * Copyright (c) 2018, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | #if WANT_ERRNO 11 | #include 12 | /* NOINLINE reduces code size and avoids making math functions non-leaf 13 | when the error handling is inlined. */ 14 | NOINLINE static double 15 | with_errno (double y, int e) 16 | { 17 | errno = e; 18 | return y; 19 | } 20 | #else 21 | #define with_errno(x, e) (x) 22 | #endif 23 | 24 | /* NOINLINE reduces code size. */ 25 | NOINLINE static double 26 | xflow (uint32_t sign, double y) 27 | { 28 | y = eval_as_double (opt_barrier_double (sign ? -y : y) * y); 29 | return with_errno (y, ERANGE); 30 | } 31 | 32 | HIDDEN double 33 | __math_uflow (uint32_t sign) 34 | { 35 | return xflow (sign, 0x1p-767); 36 | } 37 | 38 | #if WANT_ERRNO_UFLOW 39 | /* Underflows to zero in some non-nearest rounding mode, setting errno 40 | is valid even if the result is non-zero, but in the subnormal range. */ 41 | HIDDEN double 42 | __math_may_uflow (uint32_t sign) 43 | { 44 | return xflow (sign, 0x1.8p-538); 45 | } 46 | #endif 47 | 48 | HIDDEN double 49 | __math_oflow (uint32_t sign) 50 | { 51 | return xflow (sign, 0x1p769); 52 | } 53 | 54 | HIDDEN double 55 | __math_divzero (uint32_t sign) 56 | { 57 | double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0; 58 | return with_errno (y, ERANGE); 59 | } 60 | 61 | HIDDEN double 62 | __math_invalid (double x) 63 | { 64 | double y = (x - x) / (x - x); 65 | return isnan (x) ? y : with_errno (y, EDOM); 66 | } 67 | 68 | /* Check result and set errno if necessary. */ 69 | 70 | HIDDEN double 71 | __math_check_uflow (double y) 72 | { 73 | return y == 0.0 ? with_errno (y, ERANGE) : y; 74 | } 75 | 76 | HIDDEN double 77 | __math_check_oflow (double y) 78 | { 79 | return isinf (y) ? with_errno (y, ERANGE) : y; 80 | } 81 | -------------------------------------------------------------------------------- /math/math_errf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Single-precision math error handling. 3 | * 4 | * Copyright (c) 2017-2020, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | #if WANT_ERRNO 11 | #include 12 | /* NOINLINE reduces code size and avoids making math functions non-leaf 13 | when the error handling is inlined. */ 14 | NOINLINE static float 15 | with_errnof (float y, int e) 16 | { 17 | errno = e; 18 | return y; 19 | } 20 | #else 21 | #define with_errnof(x, e) (x) 22 | #endif 23 | 24 | /* NOINLINE reduces code size. */ 25 | NOINLINE static float 26 | xflowf (uint32_t sign, float y) 27 | { 28 | y = eval_as_float (opt_barrier_float (sign ? -y : y) * y); 29 | return with_errnof (y, ERANGE); 30 | } 31 | 32 | HIDDEN float 33 | __math_uflowf (uint32_t sign) 34 | { 35 | return xflowf (sign, 0x1p-95f); 36 | } 37 | 38 | #if WANT_ERRNO_UFLOW 39 | /* Underflows to zero in some non-nearest rounding mode, setting errno 40 | is valid even if the result is non-zero, but in the subnormal range. */ 41 | HIDDEN float 42 | __math_may_uflowf (uint32_t sign) 43 | { 44 | return xflowf (sign, 0x1.4p-75f); 45 | } 46 | #endif 47 | 48 | HIDDEN float 49 | __math_oflowf (uint32_t sign) 50 | { 51 | return xflowf (sign, 0x1p97f); 52 | } 53 | 54 | HIDDEN float 55 | __math_divzerof (uint32_t sign) 56 | { 57 | float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f; 58 | return with_errnof (y, ERANGE); 59 | } 60 | 61 | HIDDEN float 62 | __math_invalidf (float x) 63 | { 64 | float y = (x - x) / (x - x); 65 | return isnan (x) ? y : with_errnof (y, EDOM); 66 | } 67 | 68 | /* Check result and set errno if necessary. */ 69 | 70 | HIDDEN float 71 | __math_check_uflowf (float y) 72 | { 73 | return y == 0.0f ? with_errnof (y, ERANGE) : y; 74 | } 75 | 76 | HIDDEN float 77 | __math_check_oflowf (float y) 78 | { 79 | return isinf (y) ? with_errnof (y, ERANGE) : y; 80 | } 81 | -------------------------------------------------------------------------------- /math/poly_scalar_f32.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Helpers for evaluating polynomials on siongle-precision scalar input, using 3 | * various schemes. 4 | * 5 | * Copyright (c) 2023-2024, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #ifndef MATH_POLY_SCALAR_F32_H 10 | #define MATH_POLY_SCALAR_F32_H 11 | 12 | #include 13 | 14 | /* Wrap scalar f32 helpers: evaluation of some scheme/order has form: 15 | [scheme]_[order]_f32. */ 16 | #define VTYPE float 17 | #define FMA fmaf 18 | #define VWRAP(f) f##_f32 19 | #include "poly_generic.h" 20 | #undef VWRAP 21 | #undef FMA 22 | #undef VTYPE 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /math/poly_scalar_f64.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Helpers for evaluating polynomials on double-precision scalar input, using 3 | * various schemes. 4 | * 5 | * Copyright (c) 2023-2024, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | */ 8 | 9 | #ifndef MATH_POLY_SCALAR_F64_H 10 | #define MATH_POLY_SCALAR_F64_H 11 | 12 | #include 13 | 14 | /* Wrap scalar f64 helpers: evaluation of some scheme/order has form: 15 | [scheme]_[order]_f64. */ 16 | #define VTYPE double 17 | #define FMA fma 18 | #define VWRAP(f) f##_f64 19 | #include "poly_generic.h" 20 | #undef VWRAP 21 | #undef FMA 22 | #undef VTYPE 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /math/powf_log2_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Data definition for powf. 3 | * 4 | * Copyright (c) 2017-2019, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "math_config.h" 9 | 10 | const struct powf_log2_data __powf_log2_data = { 11 | .tab = { 12 | { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE }, 13 | { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE }, 14 | { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE }, 15 | { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE }, 16 | { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE }, 17 | { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE }, 18 | { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE }, 19 | { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE }, 20 | { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE }, 21 | { 0x1p+0, 0x0p+0 * POWF_SCALE }, 22 | { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE }, 23 | { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE }, 24 | { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE }, 25 | { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE }, 26 | { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE }, 27 | { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE }, 28 | }, 29 | .poly = { 30 | 0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE, 31 | 0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE, 32 | 0x1.71547652ab82bp0 * POWF_SCALE, 33 | } 34 | }; 35 | -------------------------------------------------------------------------------- /math/sincosf_data.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Data definition for sinf, cosf and sincosf. 3 | * 4 | * Copyright (c) 2018-2019, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include 9 | #include 10 | #include "math_config.h" 11 | #include "sincosf.h" 12 | 13 | /* The constants and polynomials for sine and cosine. The 2nd entry 14 | computes -cos (x) rather than cos (x) to get negation for free. */ 15 | const sincos_t __sincosf_table[2] = 16 | { 17 | { 18 | { 1.0, -1.0, -1.0, 1.0 }, 19 | #if TOINT_INTRINSICS 20 | 0x1.45F306DC9C883p-1, 21 | #else 22 | 0x1.45F306DC9C883p+23, 23 | #endif 24 | 0x1.921FB54442D18p0, 25 | 0x1p0, 26 | -0x1.ffffffd0c621cp-2, 27 | 0x1.55553e1068f19p-5, 28 | -0x1.6c087e89a359dp-10, 29 | 0x1.99343027bf8c3p-16, 30 | -0x1.555545995a603p-3, 31 | 0x1.1107605230bc4p-7, 32 | -0x1.994eb3774cf24p-13 33 | }, 34 | { 35 | { 1.0, -1.0, -1.0, 1.0 }, 36 | #if TOINT_INTRINSICS 37 | 0x1.45F306DC9C883p-1, 38 | #else 39 | 0x1.45F306DC9C883p+23, 40 | #endif 41 | 0x1.921FB54442D18p0, 42 | -0x1p0, 43 | 0x1.ffffffd0c621cp-2, 44 | -0x1.55553e1068f19p-5, 45 | 0x1.6c087e89a359dp-10, 46 | -0x1.99343027bf8c3p-16, 47 | -0x1.555545995a603p-3, 48 | 0x1.1107605230bc4p-7, 49 | -0x1.994eb3774cf24p-13 50 | } 51 | }; 52 | 53 | /* Table with 4/PI to 192 bit precision. To avoid unaligned accesses 54 | only 8 new bits are added per entry, making the table 4 times larger. */ 55 | const uint32_t __inv_pio4[24] = 56 | { 57 | 0xa2, 0xa2f9, 0xa2f983, 0xa2f9836e, 58 | 0xf9836e4e, 0x836e4e44, 0x6e4e4415, 0x4e441529, 59 | 0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1, 60 | 0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0, 61 | 0x34ddc0db, 0xddc0db62, 0xc0db6295, 0xdb629599, 62 | 0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041 63 | }; 64 | -------------------------------------------------------------------------------- /math/test/rtest/random.h: -------------------------------------------------------------------------------- 1 | /* 2 | * random.h - header for random.c 3 | * 4 | * Copyright (c) 2009-2019, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "types.h" 9 | 10 | uint32 random32(void); 11 | uint32 random_upto(uint32 limit); 12 | uint32 random_upto_biased(uint32 limit, int bias); 13 | -------------------------------------------------------------------------------- /math/test/rtest/types.h: -------------------------------------------------------------------------------- 1 | /* 2 | * types.h 3 | * 4 | * Copyright (c) 2005-2019, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #ifndef mathtest_types_h 9 | #define mathtest_types_h 10 | 11 | #include 12 | 13 | #if UINT_MAX == 4294967295 14 | typedef unsigned int uint32; 15 | typedef int int32; 16 | #define I32 "" 17 | #elif ULONG_MAX == 4294967295 18 | typedef unsigned long uint32; 19 | typedef long int32; 20 | #define I32 "l" 21 | #else 22 | #error Could not find an unsigned 32-bit integer type 23 | #endif 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /math/test/test_defs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Helper macros for emitting various details about routines for consumption by 3 | * runulp.sh. 4 | * 5 | * Copyright (c) 2024, Arm Limited. 6 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception. 7 | */ 8 | 9 | #define TEST_ULP(f, l) TEST_ULP f l 10 | #define TEST_ULP_NONNEAREST(f, l) TEST_ULP_NONNEAREST f l 11 | 12 | /* Emit routine name if e == 0 and f is expected to correctly trigger fenv 13 | exceptions. e allows declaration to be emitted conditionally on 14 | WANT_SIMD_EXCEPT - defer expansion by one pass to allow those flags to be 15 | expanded properly. */ 16 | #define TEST_DISABLE_FENV(f) TEST_DISABLE_FENV f 17 | #define TEST_DISABLE_FENV_IF_NOT(f, e) TEST_DISABLE_FENV_IF_NOT_ (f, e) 18 | #define TEST_DISABLE_FENV_IF_NOT_(f, e) TEST_DISABLE_FENV_IF_NOT_##e (f) 19 | #define TEST_DISABLE_FENV_IF_NOT_0(f) TEST_DISABLE_FENV (f) 20 | #define TEST_DISABLE_FENV_IF_NOT_1(f) 21 | 22 | #define TEST_INTERVAL(f, lo, hi, n) TEST_INTERVAL f lo hi n 23 | #define TEST_SYM_INTERVAL(f, lo, hi, n) \ 24 | TEST_INTERVAL (f, lo, hi, n) \ 25 | TEST_INTERVAL (f, -lo, -hi, n) 26 | // clang-format off 27 | #define TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n) \ 28 | TEST_INTERVAL f xlo,ylo xhi,yhi n 29 | // clang-format on 30 | 31 | #define TEST_CONTROL_VALUE(f, c) TEST_CONTROL_VALUE f c 32 | -------------------------------------------------------------------------------- /math/test/testcases/directed/acos.tst: -------------------------------------------------------------------------------- 1 | ; acos.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=acos op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=acos op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=acos op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=acos op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=acos op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 11 | func=acos op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 12 | func=acos op1=00000000.00000000 result=3ff921fb.54442d18.469 errno=0 13 | func=acos op1=80000000.00000000 result=3ff921fb.54442d18.469 errno=0 14 | func=acos op1=3ff00000.00000000 result=00000000.00000000 errno=0 15 | func=acos op1=bff00000.00000000 result=400921fb.54442d18.469 errno=0 16 | func=acos op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i 17 | func=acos op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i 18 | -------------------------------------------------------------------------------- /math/test/testcases/directed/acosf.tst: -------------------------------------------------------------------------------- 1 | ; acosf.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=acosf op1=7fc00001 result=7fc00001 errno=0 7 | func=acosf op1=ffc00001 result=7fc00001 errno=0 8 | func=acosf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=acosf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=acosf op1=7f800000 result=7fc00001 errno=EDOM status=i 11 | func=acosf op1=ff800000 result=7fc00001 errno=EDOM status=i 12 | func=acosf op1=00000000 result=3fc90fda.a22 errno=0 13 | func=acosf op1=80000000 result=3fc90fda.a22 errno=0 14 | func=acosf op1=3f800000 result=00000000 errno=0 15 | func=acosf op1=bf800000 result=40490fda.a22 errno=0 16 | func=acosf op1=3f800001 result=7fc00001 errno=EDOM status=i 17 | func=acosf op1=bf800001 result=7fc00001 errno=EDOM status=i 18 | func=acosf op1=33000000 result=3fc90fda.622 error=0 19 | func=acosf op1=30000000 result=3fc90fda.a12 error=0 20 | func=acosf op1=2d000000 result=3fc90fda.a21 error=0 21 | func=acosf op1=2a000000 result=3fc90fda.a22 error=0 22 | -------------------------------------------------------------------------------- /math/test/testcases/directed/acosh.tst: -------------------------------------------------------------------------------- 1 | ; acosh.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=acosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=acosh op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=acosh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=acosh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=acosh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 11 | func=acosh op1=3ff00000.00000000 result=00000000.00000000 errno=0 12 | func=acosh op1=3fefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i 13 | func=acosh op1=00000000.00000000 result=7ff80000.00000001 errno=EDOM status=i 14 | func=acosh op1=80000000.00000000 result=7ff80000.00000001 errno=EDOM status=i 15 | func=acosh op1=bfefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i 16 | func=acosh op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 17 | func=acosh op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i 18 | func=acosh op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 19 | func=acosh op1=7fe01ac0.7f03a83e result=40862e50.541778f1.8cc error=0 20 | -------------------------------------------------------------------------------- /math/test/testcases/directed/acoshf.tst: -------------------------------------------------------------------------------- 1 | ; acoshf.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=acoshf op1=7fc00001 result=7fc00001 errno=0 7 | func=acoshf op1=ffc00001 result=7fc00001 errno=0 8 | func=acoshf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=acoshf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=acoshf op1=7f800000 result=7f800000 errno=0 11 | func=acoshf op1=3f800000 result=00000000 errno=0 12 | func=acoshf op1=3f7fffff result=7fc00001 errno=EDOM status=i 13 | func=acoshf op1=00000000 result=7fc00001 errno=EDOM status=i 14 | func=acoshf op1=80000000 result=7fc00001 errno=EDOM status=i 15 | func=acoshf op1=bf7fffff result=7fc00001 errno=EDOM status=i 16 | func=acoshf op1=bf800000 result=7fc00001 errno=EDOM status=i 17 | func=acoshf op1=bf800001 result=7fc00001 errno=EDOM status=i 18 | func=acoshf op1=ff800000 result=7fc00001 errno=EDOM status=i 19 | func=acoshf op1=7f767efe result=42b2c19d.83e error=0 20 | -------------------------------------------------------------------------------- /math/test/testcases/directed/asin.tst: -------------------------------------------------------------------------------- 1 | ; asin.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=asin op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=asin op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=asin op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=asin op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=asin op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 11 | func=asin op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 12 | func=asin op1=00000000.00000000 result=00000000.00000000 errno=0 13 | func=asin op1=80000000.00000000 result=80000000.00000000 errno=0 14 | ; Inconsistent behavior was detected for the following 2 cases. 15 | ; No exception is raised with certain versions of glibc. Functions 16 | ; approximated by x near zero may not generate/implement flops and 17 | ; thus may not raise exceptions. 18 | func=asin op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux 19 | func=asin op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux 20 | 21 | func=asin op1=3ff00000.00000000 result=3ff921fb.54442d18.469 errno=0 22 | func=asin op1=bff00000.00000000 result=bff921fb.54442d18.469 errno=0 23 | func=asin op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i 24 | func=asin op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i 25 | -------------------------------------------------------------------------------- /math/test/testcases/directed/asinf.tst: -------------------------------------------------------------------------------- 1 | ; asinf.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=asinf op1=7fc00001 result=7fc00001 errno=0 7 | func=asinf op1=ffc00001 result=7fc00001 errno=0 8 | func=asinf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=asinf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=asinf op1=7f800000 result=7fc00001 errno=EDOM status=i 11 | func=asinf op1=ff800000 result=7fc00001 errno=EDOM status=i 12 | func=asinf op1=00000000 result=00000000 errno=0 13 | func=asinf op1=80000000 result=80000000 errno=0 14 | ; Inconsistent behavior was detected for the following 2 cases. 15 | ; No exception is raised with certain versions of glibc. Functions 16 | ; approximated by x near zero may not generate/implement flops and 17 | ; thus may not raise exceptions. 18 | func=asinf op1=00000001 result=00000001 errno=0 maybestatus=ux 19 | func=asinf op1=80000001 result=80000001 errno=0 maybestatus=ux 20 | 21 | func=asinf op1=3f800000 result=3fc90fda.a22 errno=0 22 | func=asinf op1=bf800000 result=bfc90fda.a22 errno=0 23 | func=asinf op1=3f800001 result=7fc00001 errno=EDOM status=i 24 | func=asinf op1=bf800001 result=7fc00001 errno=EDOM status=i 25 | -------------------------------------------------------------------------------- /math/test/testcases/directed/asinh.tst: -------------------------------------------------------------------------------- 1 | ; asinh.tst 2 | ; 3 | ; Copyright (c) 2022-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=asinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=asinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=asinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=asinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=asinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 11 | func=asinh op1=fff00000.00000000 result=fff00000.00000000 errno=0 12 | func=asinh op1=00000000.00000000 result=00000000.00000000 errno=0 13 | func=asinh op1=80000000.00000000 result=80000000.00000000 errno=0 14 | ; No exception is raised with certain versions of glibc. Functions 15 | ; approximated by x near zero may not generate/implement flops and 16 | ; thus may not raise exceptions. 17 | func=asinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux 18 | func=asinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux 19 | -------------------------------------------------------------------------------- /math/test/testcases/directed/asinhf.tst: -------------------------------------------------------------------------------- 1 | ; asinhf.tst 2 | ; 3 | ; Copyright (c) 2007-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=asinhf op1=7fc00001 result=7fc00001 errno=0 7 | func=asinhf op1=ffc00001 result=7fc00001 errno=0 8 | func=asinhf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=asinhf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=asinhf op1=7f800000 result=7f800000 errno=0 11 | func=asinhf op1=ff800000 result=ff800000 errno=0 12 | func=asinhf op1=00000000 result=00000000 errno=0 13 | func=asinhf op1=80000000 result=80000000 errno=0 14 | ; No exception is raised on certain machines (different version of glibc) 15 | ; Same issue encountered with other function similar to x close to 0 16 | ; Could be due to function so boring no flop is involved in some implementations 17 | func=asinhf op1=00000001 result=00000001 errno=0 maybestatus=ux 18 | func=asinhf op1=80000001 result=80000001 errno=0 maybestatus=ux 19 | -------------------------------------------------------------------------------- /math/test/testcases/directed/atan.tst: -------------------------------------------------------------------------------- 1 | ; atan.tst 2 | ; 3 | ; Copyright (c) 1999-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=atan op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=atan op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=atan op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=atan op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=atan op1=7ff00000.00000000 result=3ff921fb.54442d18.469 errno=0 11 | func=atan op1=fff00000.00000000 result=bff921fb.54442d18.469 errno=0 12 | func=atan op1=00000000.00000000 result=00000000.00000000 errno=0 13 | func=atan op1=80000000.00000000 result=80000000.00000000 errno=0 14 | ; Inconsistent behavior was detected for the following 2 cases. 15 | ; No exception is raised with certain versions of glibc. Functions 16 | ; approximated by x near zero may not generate/implement flops and 17 | ; thus may not raise exceptions. 18 | func=atan op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux 19 | func=atan op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux 20 | 21 | func=atan op1=3ff00000.00000000 result=3fe921fb.54442d18.469 errno=0 22 | func=atan op1=bff00000.00000000 result=bfe921fb.54442d18.469 errno=0 23 | -------------------------------------------------------------------------------- /math/test/testcases/directed/atanf.tst: -------------------------------------------------------------------------------- 1 | ; atanf.tst 2 | ; 3 | ; Copyright (c) 2007-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=atanf op1=7fc00001 result=7fc00001 errno=0 7 | func=atanf op1=ffc00001 result=7fc00001 errno=0 8 | func=atanf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=atanf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=atanf op1=7f800000 result=3fc90fda.a22 errno=0 11 | func=atanf op1=ff800000 result=bfc90fda.a22 errno=0 12 | func=atanf op1=00000000 result=00000000 errno=0 13 | func=atanf op1=80000000 result=80000000 errno=0 14 | ; Inconsistent behavior was detected for the following 2 cases. 15 | ; No exception is raised with certain versions of glibc. Functions 16 | ; approximated by x near zero may not generate/implement flops and 17 | ; thus may not raise exceptions. 18 | func=atanf op1=00000001 result=00000001 errno=0 maybestatus=ux 19 | func=atanf op1=80000001 result=80000001 errno=0 maybestatus=ux 20 | 21 | func=atanf op1=3f800000 result=3f490fda.a22 errno=0 22 | func=atanf op1=bf800000 result=bf490fda.a22 errno=0 23 | -------------------------------------------------------------------------------- /math/test/testcases/directed/atanh.tst: -------------------------------------------------------------------------------- 1 | ; atanh.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=atanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=atanh op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=atanh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=atanh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=atanh op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 11 | func=atanh op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 12 | func=atanh op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i 13 | func=atanh op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i 14 | func=atanh op1=3ff00000.00000000 result=7ff00000.00000000 errno=ERANGE status=z 15 | func=atanh op1=bff00000.00000000 result=fff00000.00000000 errno=ERANGE status=z 16 | func=atanh op1=00000000.00000000 result=00000000.00000000 errno=0 17 | func=atanh op1=80000000.00000000 result=80000000.00000000 errno=0 18 | ; No exception is raised with certain versions of glibc. Functions 19 | ; approximated by x near zero may not generate/implement flops and 20 | ; thus may not raise exceptions. 21 | func=atanh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux 22 | func=atanh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux 23 | -------------------------------------------------------------------------------- /math/test/testcases/directed/atanhf.tst: -------------------------------------------------------------------------------- 1 | ; atanhf.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=atanhf op1=7fc00001 result=7fc00001 errno=0 7 | func=atanhf op1=ffc00001 result=7fc00001 errno=0 8 | func=atanhf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=atanhf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=atanhf op1=7f800000 result=7fc00001 errno=EDOM status=i 11 | func=atanhf op1=ff800000 result=7fc00001 errno=EDOM status=i 12 | func=atanhf op1=3f800001 result=7fc00001 errno=EDOM status=i 13 | func=atanhf op1=bf800001 result=7fc00001 errno=EDOM status=i 14 | func=atanhf op1=3f800000 result=7f800000 errno=ERANGE status=z 15 | func=atanhf op1=bf800000 result=ff800000 errno=ERANGE status=z 16 | func=atanhf op1=00000000 result=00000000 errno=0 17 | func=atanhf op1=80000000 result=80000000 errno=0 18 | 19 | ; No exception is raised with certain versions of glibc. Functions 20 | ; approximated by x near zero may not generate/implement flops and 21 | ; thus may not raise exceptions. 22 | func=atanhf op1=00000001 result=00000001 errno=0 maybestatus=ux 23 | func=atanhf op1=80000001 result=80000001 errno=0 maybestatus=ux 24 | -------------------------------------------------------------------------------- /math/test/testcases/directed/cbrtf.tst: -------------------------------------------------------------------------------- 1 | ; cbrtf.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=cbrtf op1=7f800000 result=7f800000 errno=0 7 | func=cbrtf op1=ff800000 result=ff800000 errno=0 8 | func=cbrtf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=cbrtf op1=7fc00001 result=7fc00001 errno=0 10 | func=cbrtf op1=00000000 result=00000000 errno=0 11 | func=cbrtf op1=00000001 result=26a14517.cc7 errno=0 12 | func=cbrtf op1=00000002 result=26cb2ff5.29f errno=0 13 | func=cbrtf op1=00000003 result=26e89768.579 errno=0 14 | func=cbrtf op1=00000004 result=27000000.000 errno=0 15 | func=cbrtf op1=00400000 result=2a4b2ff5.29f errno=0 16 | func=cbrtf op1=00800000 result=2a800000.000 errno=0 17 | func=cbrtf op1=3f800000 result=3f800000.000 errno=0 18 | func=cbrtf op1=40000000 result=3fa14517.cc7 errno=0 19 | func=cbrtf op1=7f7fffff result=54cb2ff4.e63 errno=0 20 | func=cbrtf op1=80000000 result=80000000 errno=0 21 | func=cbrtf op1=80000001 result=a6a14517.cc7 errno=0 22 | func=cbrtf op1=80000002 result=a6cb2ff5.29f errno=0 23 | func=cbrtf op1=80000003 result=a6e89768.579 errno=0 24 | func=cbrtf op1=80000004 result=a7000000.000 errno=0 25 | func=cbrtf op1=80400000 result=aa4b2ff5.29f errno=0 26 | func=cbrtf op1=80800000 result=aa800000.000 errno=0 27 | func=cbrtf op1=bf800000 result=bf800000.000 errno=0 28 | func=cbrtf op1=c0000000 result=bfa14517.cc7 errno=0 29 | func=cbrtf op1=ff7fffff result=d4cb2ff4.e63 errno=0 30 | -------------------------------------------------------------------------------- /math/test/testcases/directed/cosf.tst: -------------------------------------------------------------------------------- 1 | ; cosf.tst - Directed test cases for SP cosine 2 | ; 3 | ; Copyright (c) 2007-2019, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=cosf op1=7fc00001 result=7fc00001 errno=0 7 | func=cosf op1=ffc00001 result=7fc00001 errno=0 8 | func=cosf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=cosf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=cosf op1=7f800000 result=7fc00001 errno=EDOM status=i 11 | func=cosf op1=ff800000 result=7fc00001 errno=EDOM status=i 12 | func=cosf op1=00000000 result=3f800000 errno=0 13 | func=cosf op1=80000000 result=3f800000 errno=0 14 | ; SDCOMP-26094: check cosf in the cases for which the range reducer 15 | ; returns values furthest beyond its nominal upper bound of pi/4. 16 | func=cosf op1=46427f1b result=3f34dc5c.565 error=0 17 | func=cosf op1=4647e568 result=3f34dc33.c1f error=0 18 | func=cosf op1=46428bac result=bf34dbf2.8e3 error=0 19 | func=cosf op1=4647f1f9 result=bf34dbc9.f9b error=0 20 | func=cosf op1=4647fe8a result=3f34db60.313 error=0 21 | func=cosf op1=45d8d7f1 result=bf35006a.7fd error=0 22 | func=cosf op1=45d371a4 result=3f350056.39b error=0 23 | func=cosf op1=45ce0b57 result=bf350041.f38 error=0 24 | func=cosf op1=45d35882 result=bf34ffec.868 error=0 25 | func=cosf op1=45cdf235 result=3f34ffd8.404 error=0 26 | -------------------------------------------------------------------------------- /math/test/testcases/directed/cosh.tst: -------------------------------------------------------------------------------- 1 | ; cosh.tst 2 | ; 3 | ; Copyright (c) 1999-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=cosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=cosh op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=cosh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=cosh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=cosh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 11 | func=cosh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox 12 | func=cosh op1=fff00000.00000000 result=7ff00000.00000000 errno=0 13 | func=cosh op1=ffefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox 14 | func=cosh op1=00000000.00000000 result=3ff00000.00000000 errno=0 15 | func=cosh op1=80000000.00000000 result=3ff00000.00000000 errno=0 16 | -------------------------------------------------------------------------------- /math/test/testcases/directed/coshf.tst: -------------------------------------------------------------------------------- 1 | ; coshf.tst 2 | ; 3 | ; Copyright (c) 2007-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=coshf op1=7fc00001 result=7fc00001 errno=0 7 | func=coshf op1=ffc00001 result=7fc00001 errno=0 8 | func=coshf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=coshf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=coshf op1=7f800000 result=7f800000 errno=0 11 | func=coshf op1=7f7fffff result=7f800000 errno=ERANGE status=ox 12 | func=coshf op1=ff800000 result=7f800000 errno=0 13 | func=coshf op1=ff7fffff result=7f800000 errno=ERANGE status=ox 14 | func=coshf op1=00000000 result=3f800000 errno=0 15 | func=coshf op1=80000000 result=3f800000 errno=0 16 | -------------------------------------------------------------------------------- /math/test/testcases/directed/erf.tst: -------------------------------------------------------------------------------- 1 | ; erf.tst - Directed test cases for erf 2 | ; 3 | ; Copyright (c) 2007-2020, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=erf op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=erf op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=erf op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=erf op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=erf op1=7ff00000.00000000 result=3ff00000.00000000 errno=0 11 | func=erf op1=fff00000.00000000 result=bff00000.00000000 errno=0 12 | func=erf op1=00000000.00000000 result=00000000.00000000 errno=ERANGE 13 | func=erf op1=80000000.00000000 result=80000000.00000000 errno=ERANGE 14 | func=erf op1=00000000.00000001 result=00000000.00000001 errno=0 status=ux 15 | func=erf op1=80000000.00000001 result=80000000.00000001 errno=0 status=ux 16 | func=erf op1=3ff00000.00000000 result=3feaf767.a741088a.c6d errno=0 17 | func=erf op1=bff00000.00000000 result=bfeaf767.a741088a.c6d errno=0 18 | -------------------------------------------------------------------------------- /math/test/testcases/directed/erfc.tst: -------------------------------------------------------------------------------- 1 | ; erfc.tst - Directed test cases for erfc 2 | ; 3 | ; Copyright (c) 2022-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=erfc op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=erfc op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=erfc op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=erfc op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=erfc op1=7ff00000.00000000 result=00000000.00000000 errno=0 11 | func=erfc op1=7fefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux 12 | ; We deliberately turned off errno setting in erf, as standard simply 13 | ; state that errno `may` be set to ERANGE in case of underflow. 14 | ; As a result the following condition on errno cannot be satisfied. 15 | ; 16 | ; func=erfc op1=403b44af.48b01531 result=00000000.00000000 errno=ERANGE status=ux 17 | ; 18 | func=erfc op1=c03b44af.48b01531 result=40000000.00000000 errno=0 19 | func=erfc op1=403bffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux 20 | func=erfc op1=c03bffff.ffffffff result=40000000.00000000 errno=0 21 | func=erfc op1=fff00000.00000000 result=40000000.00000000 errno=0 22 | func=erfc op1=00000000.00000000 result=3ff00000.00000000 errno=0 23 | func=erfc op1=80000000.00000000 result=3ff00000.00000000 errno=0 24 | -------------------------------------------------------------------------------- /math/test/testcases/directed/erfcf.tst: -------------------------------------------------------------------------------- 1 | ; erfcf.tst - Directed test cases for erfcf 2 | ; 3 | ; Copyright (c) 2007-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=erfcf op1=7fc00001 result=7fc00001 errno=0 7 | func=erfcf op1=ffc00001 result=7fc00001 errno=0 8 | func=erfcf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=erfcf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=erfcf op1=7f800000 result=00000000 errno=0 11 | func=erfcf op1=7f7fffff result=00000000 errno=ERANGE status=ux 12 | func=erfcf op1=ff800000 result=40000000 errno=0 13 | func=erfcf op1=00000000 result=3f800000 errno=0 14 | func=erfcf op1=80000000 result=3f800000 errno=0 15 | -------------------------------------------------------------------------------- /math/test/testcases/directed/erff.tst: -------------------------------------------------------------------------------- 1 | ; erff.tst 2 | ; 3 | ; Copyright (c) 2007-2020, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=erff op1=7fc00001 result=7fc00001 errno=0 7 | func=erff op1=ffc00001 result=7fc00001 errno=0 8 | func=erff op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=erff op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=erff op1=7f800000 result=3f800000 errno=0 11 | func=erff op1=ff800000 result=bf800000 errno=0 12 | func=erff op1=00000000 result=00000000 errno=ERANGE 13 | func=erff op1=80000000 result=80000000 errno=ERANGE 14 | func=erff op1=00000001 result=00000001 errno=0 status=ux 15 | func=erff op1=80000001 result=80000001 errno=0 status=ux 16 | func=erff op1=3f800000 result=3f57bb3d.3a0 errno=0 17 | func=erff op1=bf800000 result=bf57bb3d.3a0 errno=0 18 | -------------------------------------------------------------------------------- /math/test/testcases/directed/exp10.tst: -------------------------------------------------------------------------------- 1 | ; Directed test cases for exp10 2 | ; 3 | ; Copyright (c) 2023, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=exp10 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=exp10 op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=exp10 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=exp10 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=exp10 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 11 | func=exp10 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox 12 | func=exp10 op1=fff00000.00000000 result=00000000.00000000 errno=0 13 | func=exp10 op1=ffefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux 14 | func=exp10 op1=00000000.00000000 result=3ff00000.00000000 errno=0 15 | func=exp10 op1=80000000.00000000 result=3ff00000.00000000 errno=0 16 | -------------------------------------------------------------------------------- /math/test/testcases/directed/exp2f.tst: -------------------------------------------------------------------------------- 1 | ; exp2f.tst - Directed test cases for exp2f 2 | ; 3 | ; Copyright (c) 2017-2019, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=exp2f op1=7fc00001 result=7fc00001 errno=0 7 | func=exp2f op1=ffc00001 result=7fc00001 errno=0 8 | func=exp2f op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=exp2f op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=exp2f op1=7f800000 result=7f800000 errno=0 11 | func=exp2f op1=7f7fffff result=7f800000 errno=ERANGE status=ox 12 | func=exp2f op1=ff800000 result=00000000 errno=0 13 | func=exp2f op1=ff7fffff result=00000000 errno=ERANGE status=ux 14 | func=exp2f op1=00000000 result=3f800000 errno=0 15 | func=exp2f op1=80000000 result=3f800000 errno=0 16 | func=exp2f op1=42fa0001 result=7e00002c.5c8 errno=0 17 | func=exp2f op1=42ffffff result=7f7fffa7.470 errno=0 18 | func=exp2f op1=43000000 result=7f800000 errno=ERANGE status=ox 19 | func=exp2f op1=43000001 result=7f800000 errno=ERANGE status=ox 20 | func=exp2f op1=c2fa0001 result=00ffffa7.470 errno=0 21 | func=exp2f op1=c2fc0000 result=00800000 errno=0 22 | func=exp2f op1=c2fc0001 result=007fffd3.a38 errno=0 status=ux 23 | func=exp2f op1=c3150000 result=00000001 errno=0 24 | func=exp2f op1=c3158000 result=00000000.800 errno=ERANGE status=ux 25 | func=exp2f op1=c3165432 result=00000000.4bd errno=ERANGE status=ux 26 | -------------------------------------------------------------------------------- /math/test/testcases/directed/expf.tst: -------------------------------------------------------------------------------- 1 | ; expf.tst - Directed test cases for expf 2 | ; 3 | ; Copyright (c) 2007-2019, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=expf op1=7fc00001 result=7fc00001 errno=0 7 | func=expf op1=ffc00001 result=7fc00001 errno=0 8 | func=expf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=expf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=expf op1=7f800000 result=7f800000 errno=0 11 | func=expf op1=7f7fffff result=7f800000 errno=ERANGE status=ox 12 | func=expf op1=ff800000 result=00000000 errno=0 13 | func=expf op1=ff7fffff result=00000000 errno=ERANGE status=ux 14 | func=expf op1=00000000 result=3f800000 errno=0 15 | func=expf op1=80000000 result=3f800000 errno=0 16 | func=expf op1=42affff8 result=7ef87ed4.e0c errno=0 17 | func=expf op1=42b00008 result=7ef88698.f67 errno=0 18 | func=expf op1=42cffff8 result=7f800000 errno=ERANGE status=ox 19 | func=expf op1=42d00008 result=7f800000 errno=ERANGE status=ox 20 | func=expf op1=c2affff8 result=0041eecc.041 errno=0 status=ux 21 | func=expf op1=c2b00008 result=0041ecbc.95e errno=0 status=ux 22 | func=expf op1=c2cffff8 result=00000000 errno=ERANGE status=ux 23 | func=expf op1=c2d00008 result=00000000 errno=ERANGE status=ux 24 | -------------------------------------------------------------------------------- /math/test/testcases/directed/expm1.tst: -------------------------------------------------------------------------------- 1 | ; expm1.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=expm1 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=expm1 op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=expm1 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=expm1 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=expm1 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 11 | func=expm1 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox 12 | func=expm1 op1=fff00000.00000000 result=bff00000.00000000 errno=0 13 | func=expm1 op1=ffefffff.ffffffff result=bff00000.00000000 errno=0 14 | func=expm1 op1=00000000.00000000 result=00000000.00000000 errno=0 15 | func=expm1 op1=80000000.00000000 result=80000000.00000000 errno=0 16 | ; Inconsistent behavior was detected for the following 2 cases. 17 | ; No exception is raised with certain versions of glibc. Functions 18 | ; approximated by x near zero may not generate/implement flops and 19 | ; thus may not raise exceptions. 20 | func=expm1 op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux 21 | func=expm1 op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux 22 | -------------------------------------------------------------------------------- /math/test/testcases/directed/log.tst: -------------------------------------------------------------------------------- 1 | ; Directed test cases for log 2 | ; 3 | ; Copyright (c) 2018-2019, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=log op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=log op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=log op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=log op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=log op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 11 | func=log op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 12 | func=log op1=7fefffff.ffffffff result=40862e42.fefa39ef.354 errno=0 13 | func=log op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i 14 | func=log op1=3ff00000.00000000 result=00000000.00000000 errno=0 15 | func=log op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 16 | func=log op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z 17 | func=log op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z 18 | func=log op1=00000000.00000001 result=c0874385.446d71c3.639 errno=0 19 | func=log op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i 20 | func=log op1=40000000.00000000 result=3fe62e42.fefa39ef.358 errno=0 21 | func=log op1=3fe00000.00000000 result=bfe62e42.fefa39ef.358 errno=0 22 | -------------------------------------------------------------------------------- /math/test/testcases/directed/log10.tst: -------------------------------------------------------------------------------- 1 | ; log10.tst 2 | ; 3 | ; Copyright (c) 2007-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=log10 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=log10 op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=log10 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=log10 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=log10 op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i 11 | func=log10 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 12 | func=log10 op1=3ff00000.00000000 result=00000000.00000000 errno=0 13 | func=log10 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 14 | func=log10 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z 15 | func=log10 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z 16 | func=log10 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i 17 | -------------------------------------------------------------------------------- /math/test/testcases/directed/log1p.tst: -------------------------------------------------------------------------------- 1 | ; log1p.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=log1p op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=log1p op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=log1p op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=log1p op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=log1p op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i 11 | func=log1p op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 12 | ; Cases 6, 9 , 10, 11, 12 fail with certain versions of GLIBC and not others. 13 | ; The main reason seems to be the handling of errno and exceptions. 14 | 15 | func=log1p op1=00000000.00000000 result=00000000.00000000 errno=0 16 | func=log1p op1=80000000.00000000 result=80000000.00000000 errno=0 17 | 18 | ; No exception is raised with certain versions of glibc. Functions 19 | ; approximated by x near zero may not generate/implement flops and 20 | ; thus may not raise exceptions. 21 | func=log1p op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux 22 | func=log1p op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux 23 | -------------------------------------------------------------------------------- /math/test/testcases/directed/log2.tst: -------------------------------------------------------------------------------- 1 | ; Directed test cases for log2 2 | ; 3 | ; Copyright (c) 2018-2019, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=log2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=log2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=log2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 11 | func=log2 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 12 | func=log2 op1=7fefffff.ffffffff result=408fffff.ffffffff.ffa errno=0 13 | func=log2 op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i 14 | func=log2 op1=3ff00000.00000000 result=00000000.00000000 errno=0 15 | func=log2 op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i 16 | func=log2 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z 17 | func=log2 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z 18 | func=log2 op1=00000000.00000001 result=c090c800.00000000 errno=0 19 | func=log2 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i 20 | func=log2 op1=40000000.00000000 result=3ff00000.00000000 errno=0 21 | func=log2 op1=3fe00000.00000000 result=bff00000.00000000 errno=0 22 | -------------------------------------------------------------------------------- /math/test/testcases/directed/log2f.tst: -------------------------------------------------------------------------------- 1 | ; log2f.tst - Directed test cases for log2f 2 | ; 3 | ; Copyright (c) 2017-2019, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=log2f op1=7fc00001 result=7fc00001 errno=0 7 | func=log2f op1=ffc00001 result=7fc00001 errno=0 8 | func=log2f op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=log2f op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=log2f op1=ff810000 result=7fc00001 errno=0 status=i 11 | func=log2f op1=7f800000 result=7f800000 errno=0 12 | func=log2f op1=ff800000 result=7fc00001 errno=EDOM status=i 13 | func=log2f op1=3f800000 result=00000000 errno=0 14 | func=log2f op1=00000000 result=ff800000 errno=ERANGE status=z 15 | func=log2f op1=80000000 result=ff800000 errno=ERANGE status=z 16 | func=log2f op1=80000001 result=7fc00001 errno=EDOM status=i 17 | 18 | func=log2f op1=3f7d70a4 result=bc6d8f8b.7d4 error=0 19 | func=log2f op1=3f604189 result=be4394c8.395 error=0 20 | func=log2f op1=3f278034 result=bf1caa73.88e error=0 21 | func=log2f op1=3edd3c36 result=bf9af3b9.619 error=0 22 | func=log2f op1=3e61259a result=c00bdb95.650 error=0 23 | func=log2f op1=3f8147ae result=3c6b3267.d6a error=0 24 | func=log2f op1=3f8fbe77 result=3e2b5fe2.a1c error=0 25 | func=log2f op1=3fac3eea result=3edb4d5e.1fc error=0 26 | func=log2f op1=3fd6e632 result=3f3f5d3a.827 error=0 27 | func=log2f op1=40070838 result=3f89e055.a0a error=0 28 | -------------------------------------------------------------------------------- /math/test/testcases/directed/sinf.tst: -------------------------------------------------------------------------------- 1 | ; sinf.tst - Directed test cases for SP sine 2 | ; 3 | ; Copyright (c) 2007-2019, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | 7 | func=sinf op1=7fc00001 result=7fc00001 errno=0 8 | func=sinf op1=ffc00001 result=7fc00001 errno=0 9 | func=sinf op1=7f800001 result=7fc00001 errno=0 status=i 10 | func=sinf op1=ff800001 result=7fc00001 errno=0 status=i 11 | func=sinf op1=7f800000 result=7fc00001 errno=EDOM status=i 12 | func=sinf op1=ff800000 result=7fc00001 errno=EDOM status=i 13 | func=sinf op1=00000000 result=00000000 errno=0 14 | func=sinf op1=80000000 result=80000000 errno=0 15 | ; Directed test for a failure I found while developing mathbench 16 | func=sinf op1=c70d39a1 result=be37fad5.7ed errno=0 17 | ; SDCOMP-26094: check sinf in the cases for which the range reducer 18 | ; returns values furthest beyond its nominal upper bound of pi/4. 19 | func=sinf op1=46427f1b result=3f352d80.f9b error=0 20 | func=sinf op1=4647e568 result=3f352da9.7be error=0 21 | func=sinf op1=46428bac result=bf352dea.924 error=0 22 | func=sinf op1=4647f1f9 result=bf352e13.146 error=0 23 | func=sinf op1=4647fe8a result=3f352e7c.ac9 error=0 24 | func=sinf op1=45d8d7f1 result=3f35097b.cb0 error=0 25 | func=sinf op1=45d371a4 result=bf350990.102 error=0 26 | func=sinf op1=45ce0b57 result=3f3509a4.554 error=0 27 | func=sinf op1=45d35882 result=3f3509f9.bdb error=0 28 | func=sinf op1=45cdf235 result=bf350a0e.02c error=0 29 | -------------------------------------------------------------------------------- /math/test/testcases/directed/sinh.tst: -------------------------------------------------------------------------------- 1 | ; sinh.tst 2 | ; 3 | ; Copyright (c) 1999-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=sinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=sinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=sinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=sinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=sinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0 11 | func=sinh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox 12 | func=sinh op1=fff00000.00000000 result=fff00000.00000000 errno=0 13 | func=sinh op1=ffefffff.ffffffff result=fff00000.00000000 errno=ERANGE status=ox 14 | func=sinh op1=00000000.00000000 result=00000000.00000000 errno=0 15 | func=sinh op1=80000000.00000000 result=80000000.00000000 errno=0 16 | 17 | ; No exception is raised with certain versions of glibc. Functions 18 | ; approximated by x near zero may not generate/implement flops and 19 | ; thus may not raise exceptions. 20 | func=sinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux 21 | func=sinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux 22 | -------------------------------------------------------------------------------- /math/test/testcases/directed/sinhf.tst: -------------------------------------------------------------------------------- 1 | ; sinhf.tst 2 | ; 3 | ; Copyright (c) 2009-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=sinhf op1=7fc00001 result=7fc00001 errno=0 7 | func=sinhf op1=ffc00001 result=7fc00001 errno=0 8 | func=sinhf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=sinhf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=sinhf op1=7f800000 result=7f800000 errno=0 11 | func=sinhf op1=7f7fffff result=7f800000 errno=ERANGE status=ox 12 | func=sinhf op1=ff800000 result=ff800000 errno=0 13 | func=sinhf op1=ff7fffff result=ff800000 errno=ERANGE status=ox 14 | func=sinhf op1=00000000 result=00000000 errno=0 15 | func=sinhf op1=80000000 result=80000000 errno=0 16 | 17 | ; No exception is raised with certain versions of glibc. Functions 18 | ; approximated by x near zero may not generate/implement flops and 19 | ; thus may not raise exceptions. 20 | func=sinhf op1=00000001 result=00000001 errno=0 maybestatus=ux 21 | func=sinhf op1=80000001 result=80000001 errno=0 maybestatus=ux 22 | -------------------------------------------------------------------------------- /math/test/testcases/directed/tanf.tst: -------------------------------------------------------------------------------- 1 | ; tanf.tst 2 | ; 3 | ; Copyright (c) 2022-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=tanf op1=7fc00001 result=7fc00001 errno=0 7 | func=tanf op1=ffc00001 result=7fc00001 errno=0 8 | func=tanf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=tanf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=tanf op1=7f800000 result=7fc00001 errno=EDOM status=i 11 | func=tanf op1=ff800000 result=7fc00001 errno=EDOM status=i 12 | func=tanf op1=00000000 result=00000000 errno=0 13 | func=tanf op1=80000000 result=80000000 errno=0 14 | ; SDCOMP-26094: check tanf in the cases for which the range reducer 15 | ; returns values furthest beyond its nominal upper bound of pi/4. 16 | func=tanf op1=46427f1b result=3f80396d.599 error=0 17 | func=tanf op1=4647e568 result=3f8039a6.c9f error=0 18 | func=tanf op1=46428bac result=3f803a03.148 error=0 19 | func=tanf op1=4647f1f9 result=3f803a3c.852 error=0 20 | func=tanf op1=4647fe8a result=3f803ad2.410 error=0 21 | func=tanf op1=45d8d7f1 result=bf800669.901 error=0 22 | func=tanf op1=45d371a4 result=bf800686.3cd error=0 23 | func=tanf op1=45ce0b57 result=bf8006a2.e9a error=0 24 | func=tanf op1=45d35882 result=bf80071b.bc4 error=0 25 | func=tanf op1=45cdf235 result=bf800738.693 error=0 26 | -------------------------------------------------------------------------------- /math/test/testcases/directed/tanh.tst: -------------------------------------------------------------------------------- 1 | ; tanh.tst 2 | ; 3 | ; Copyright (c) 1999-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=tanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0 7 | func=tanh op1=fff80000.00000001 result=7ff80000.00000001 errno=0 8 | func=tanh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i 9 | func=tanh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i 10 | func=tanh op1=7ff00000.00000000 result=3ff00000.00000000 errno=0 11 | func=tanh op1=fff00000.00000000 result=bff00000.00000000 errno=0 12 | func=tanh op1=00000000.00000000 result=00000000.00000000 errno=0 13 | func=tanh op1=80000000.00000000 result=80000000.00000000 errno=0 14 | ; No exception is raised with certain versions of glibc. Functions 15 | ; approximated by x near zero may not generate/implement flops and 16 | ; thus may not raise exceptions. 17 | func=tanh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux 18 | func=tanh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux 19 | -------------------------------------------------------------------------------- /math/test/testcases/directed/tanhf.tst: -------------------------------------------------------------------------------- 1 | ; tanhf.tst 2 | ; 3 | ; Copyright (c) 2007-2024, Arm Limited. 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | func=tanhf op1=7fc00001 result=7fc00001 errno=0 7 | func=tanhf op1=ffc00001 result=7fc00001 errno=0 8 | func=tanhf op1=7f800001 result=7fc00001 errno=0 status=i 9 | func=tanhf op1=ff800001 result=7fc00001 errno=0 status=i 10 | func=tanhf op1=7f800000 result=3f800000 errno=0 11 | func=tanhf op1=ff800000 result=bf800000 errno=0 12 | func=tanhf op1=00000000 result=00000000 errno=0 13 | func=tanhf op1=80000000 result=80000000 errno=0 14 | ; No exception is raised with certain versions of glibc. Functions 15 | ; approximated by x near zero may not generate/implement flops and 16 | ; thus may not raise exceptions. 17 | ; func=tanhf op1=00000001 result=00000001 errno=0 maybestatus=ux 18 | ; func=tanhf op1=80000001 result=80000001 errno=0 maybestatus=ux 19 | func=tanhf op1=00000001 result=00000001 errno=0 maybestatus=ux 20 | func=tanhf op1=80000001 result=80000001 errno=0 maybestatus=ux 21 | -------------------------------------------------------------------------------- /math/test/testcases/random/double.tst: -------------------------------------------------------------------------------- 1 | !! double.tst - Random test case specification for DP functions 2 | !! 3 | !! Copyright (c) 1999-2019, Arm Limited. 4 | !! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | test exp 10000 7 | test exp2 10000 8 | test log 10000 9 | test log2 10000 10 | test pow 40000 11 | -------------------------------------------------------------------------------- /math/test/testcases/random/float.tst: -------------------------------------------------------------------------------- 1 | !! single.tst - Random test case specification for SP functions 2 | !! 3 | !! Copyright (c) 1999-2019, Arm Limited. 4 | !! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | test sinf 10000 7 | test cosf 10000 8 | test sincosf_sinf 5000 9 | test sincosf_cosf 5000 10 | test tanf 10000 11 | test expf 10000 12 | test exp2f 10000 13 | test logf 10000 14 | test log2f 10000 15 | test powf 10000 16 | -------------------------------------------------------------------------------- /math/tools/asin.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating asin(x) 2 | // 3 | // Copyright (c) 2023-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | f = asin(x); 7 | dtype = double; 8 | 9 | prec=256; 10 | 11 | a = 0x1p-106; 12 | b = 0.25; 13 | 14 | deg = 11; 15 | 16 | backward = proc(poly, d) { 17 | return d + d ^ 3 * poly(d * d); 18 | }; 19 | 20 | forward = proc(f, d) { 21 | return (f(sqrt(d))-sqrt(d))/(d*sqrt(d)); 22 | }; 23 | 24 | poly = fpminimax(forward(f, x), [|0,...,deg|], [|dtype ...|], [a;b], relative, floating); 25 | 26 | display = hexadecimal!; 27 | print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b])); 28 | print("in [", a, b, "]"); 29 | for i from 0 to deg do print(coeff(poly, i)); 30 | -------------------------------------------------------------------------------- /math/tools/asinf.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating asinf(x) 2 | // 3 | // Copyright (c) 2023-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | f = asin(x); 7 | dtype = single; 8 | 9 | a = 0x1p-24; 10 | b = 0.25; 11 | 12 | deg = 4; 13 | 14 | backward = proc(poly, d) { 15 | return d + d ^ 3 * poly(d * d); 16 | }; 17 | 18 | forward = proc(f, d) { 19 | return (f(sqrt(d))-sqrt(d))/(d*sqrt(d)); 20 | }; 21 | 22 | approx = proc(poly, d) { 23 | return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16); 24 | }; 25 | 26 | poly = 0; 27 | for i from 0 to deg do { 28 | i; 29 | p = roundcoefficients(approx(poly,i), [|dtype ...|]); 30 | poly = poly + x^i*coeff(p,0); 31 | }; 32 | 33 | display = hexadecimal!; 34 | print("rel error:", accurateinfnorm(1-backward(poly, x)/f(x), [a;b], 30)); 35 | print("in [", a, b, "]"); 36 | for i from 0 to deg do print(coeff(poly, i)); 37 | -------------------------------------------------------------------------------- /math/tools/asinh.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating asinh(x) 2 | // 3 | // Copyright (c) 2022-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // Polynomial is used in [2^-26, 1]. However it is least accurate close to 1, so 7 | // we use 2^-6 as the lower bound for coeff generation, which yields sufficiently 8 | // accurate results in [2^-26, 2^-6]. 9 | a = 0x1p-6; 10 | b = 1.0; 11 | 12 | f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2); 13 | 14 | approx = proc(poly, d) { 15 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 16 | }; 17 | 18 | poly = 0; 19 | for i from 0 to deg do { 20 | i; 21 | p = roundcoefficients(approx(poly,i), [|D ...|]); 22 | poly = poly + x^i*coeff(p,0); 23 | }; 24 | 25 | 26 | display = hexadecimal; 27 | print("coeffs:"); 28 | for i from 0 to deg do coeff(poly,i); 29 | -------------------------------------------------------------------------------- /math/tools/asinhf.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating asinh(x) 2 | // 3 | // Copyright (c) 2022-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 9; 7 | 8 | a = 0x1.0p-12; 9 | b = 1.0; 10 | 11 | f = proc(y) { 12 | return asinh(x); 13 | }; 14 | 15 | approx = proc(poly, d) { 16 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 17 | }; 18 | 19 | poly = x; 20 | for i from 2 to deg do { 21 | p = roundcoefficients(approx(poly,i), [|SG ...|]); 22 | poly = poly + x^i*coeff(p,0); 23 | }; 24 | 25 | display = hexadecimal; 26 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 27 | print("in [",a,b,"]"); 28 | print("coeffs:"); 29 | for i from 2 to deg do coeff(poly,i); 30 | -------------------------------------------------------------------------------- /math/tools/asinpif.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating asinpif(x) 2 | // 3 | // Copyright (c) 2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | f = asin(x)/pi; 7 | dtype = single; 8 | 9 | // x^2 lives in [0, 0.25] if x lives in [0, 0.5] 10 | a = 0x1p-126; // a!=0 facilitates convergence of algorithm 11 | b = 0.25; 12 | 13 | deg = 5; 14 | 15 | // We look for a polynomial in x^2 such that f(x) ~ x/pi + x^3 * poly(x^2) 16 | // basically asin(x)/pi ~ 1/pi * x + c0 * x^3 + c1 * x^5 + ... 17 | backward = proc(poly, d) { 18 | return d/pi + d ^ 3 * poly(d * d); 19 | }; 20 | 21 | 22 | forward = proc(f, d) { 23 | return (f(sqrt(d))-sqrt(d)/pi)/(d*sqrt(d)); 24 | }; 25 | 26 | 27 | // optimise polynomial iteratively, while minimising relative norm (f(x) - poly(x))/f(x) 28 | approx = proc(poly, d) { 29 | return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16); 30 | }; 31 | 32 | poly = 0; 33 | for i from 0 to deg do { 34 | i; 35 | p = roundcoefficients(approx(poly,i), [|dtype ...|]); 36 | poly = poly + x^i*coeff(p,0); 37 | }; 38 | 39 | // Display relative norm (should be under 2^-23) and coefficients 40 | display = hexadecimal!; 41 | print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b])); // fast but approximate 42 | print("in [", a, b, "]"); 43 | for i from 0 to deg do print(coeff(poly, i)); -------------------------------------------------------------------------------- /math/tools/atan.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating atan(x) and atan2(y, x) 2 | // 3 | // Copyright (c) 2022-2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // atan is odd, so approximate with an odd polynomial: 7 | // x + ax^3 + bx^5 + cx^7 + ... 8 | // We generate a, b, c, ... such that we can approximate atan(x) by: 9 | // x + x^3 * (a + bx^2 + cx^4 + ...) 10 | 11 | f = atan(x); 12 | dtype = double; 13 | 14 | deg = 20; 15 | 16 | a = 0x1.0p-1022; 17 | b = 1; 18 | 19 | // f(x) ~ x * poly(x^2) 20 | backward = proc(poly, d) { 21 | return d * poly(d * d); 22 | }; 23 | 24 | // We need to feed remez algorithm with the inverse of the backward transform 25 | // g(y) such that g(x^2) = poly(x^2) 26 | forward = proc(f, d) { 27 | return f(sqrt(d))/(sqrt(d)); 28 | }; 29 | 30 | 31 | approx = proc(poly, d) { 32 | return remez(forward(f, x) - poly(x), deg - d, [a;b], x^d, 1e-16); 33 | }; 34 | 35 | p = 0; 36 | poly = 0; 37 | for i from 0 to deg do { 38 | i; 39 | p = roundcoefficients(approx(poly,i), [|dtype ...|]); 40 | poly = poly + x^i*coeff(p,0); 41 | }; 42 | 43 | display = hexadecimal; 44 | print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b])); 45 | print("in [", a, b, "]"); 46 | for i from 0 to deg do print(coeff(poly, i)); -------------------------------------------------------------------------------- /math/tools/atanf.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating atanf(x) 2 | // 3 | // Copyright (c) 2022-2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // Generate list of monomials: 7 | // Taylor series of atan is of the form x + ax^3 + bx^5 + cx^7 + ... 8 | // So generate a, b, c, ... such that we can approximate atan(x) by: 9 | // x + x^3 * (a + bx^2 + cx^4 + ...) 10 | 11 | f = atan(x); 12 | dtype = single; 13 | deg = 8; 14 | 15 | a = 0x1.0p-126; 16 | b = 1; 17 | 18 | // f(x) ~ x * poly(x^2) 19 | backward = proc(poly, d) { 20 | return d * poly(d * d); 21 | }; 22 | 23 | // We need to feed remez algorithm with the inverse of the backward transform 24 | // g(y) such that g(x^2) = poly(x^2) 25 | forward = proc(f, d) { 26 | return f(sqrt(d))/(sqrt(d)); 27 | }; 28 | 29 | // Iterativley optimise polynomial coefficients, while minimising relative norm (f(x) - poly(x))/f(x) 30 | approx = proc(poly, d) { 31 | return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16); 32 | }; 33 | 34 | p = 0; 35 | poly = 0; 36 | for i from 0 to deg do { 37 | i; 38 | p = roundcoefficients(approx(poly,i), [|dtype ...|]); 39 | poly = poly + x^i*coeff(p,0); 40 | }; 41 | 42 | 43 | display = hexadecimal; 44 | print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b])); 45 | print("in [", a, b, "]"); 46 | for i from 0 to deg do print(coeff(poly, i)); -------------------------------------------------------------------------------- /math/tools/atanpi.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating atanpif(x) 2 | // 3 | // Copyright (c) 2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | f = atan(x)/pi; 7 | dtype = single; 8 | 9 | a = 0x1.0p-126; 10 | b = 1; 11 | 12 | deg = 8; 13 | 14 | // We look for a polynomial in x^2 such that f(x) ~ x * poly(x^2) 15 | // basically atan(x)/pi ~ c0 * x + c1 * x^3 + c2 * x^5 + ... 16 | backward = proc(poly, d) { 17 | return d * poly(d * d); 18 | }; 19 | 20 | // We need to feed remez algorithm with the inverse of the backward transform 21 | // g(y) such that g(x^2) = poly(x^2) 22 | forward = proc(f, d) { 23 | return f(sqrt(d))/(sqrt(d)); 24 | }; 25 | 26 | // Optimise polynomial iteratively, minimising relative norm (f(x) - poly(x))/f(x) 27 | approx = proc(poly, d) { 28 | return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16); 29 | }; 30 | 31 | p = 0; 32 | poly = 0; 33 | for i from 0 to deg do { 34 | i; 35 | p = roundcoefficients(approx(poly,i), [|dtype ...|]); 36 | poly = poly + x^i*coeff(p,0); 37 | }; 38 | 39 | display = hexadecimal!; 40 | print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b])); 41 | print("in [", a, b, "]"); 42 | for i from 0 to deg do print(coeff(poly, i)); 43 | -------------------------------------------------------------------------------- /math/tools/cbrt.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating cbrt(x) in double precision 2 | // 3 | // Copyright (c) 2022-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 3; 7 | 8 | a = 0.5; 9 | b = 1; 10 | 11 | 12 | f = x^(1/3); 13 | 14 | poly = fpminimax(f, deg, [|double ...|], [a;b]); 15 | 16 | display = hexadecimal; 17 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 18 | print("in [",a,b,"]"); 19 | print("coeffs:"); 20 | for i from 0 to deg do round(coeff(poly,i), D, RN); 21 | -------------------------------------------------------------------------------- /math/tools/cbrtf.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating cbrt(x) in single precision 2 | // 3 | // Copyright (c) 2022-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 3; 7 | 8 | a = 0.5; 9 | b = 1; 10 | 11 | 12 | f = x^(1/3); 13 | 14 | poly = fpminimax(f, deg, [|single ...|], [a;b]); 15 | 16 | display = hexadecimal; 17 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 18 | print("in [",a,b,"]"); 19 | print("coeffs:"); 20 | for i from 0 to deg do round(coeff(poly,i), SG, RN); 21 | -------------------------------------------------------------------------------- /math/tools/cos.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating cos(x) 2 | // 3 | // Copyright (c) 2019, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 8; // polynomial degree 7 | a = -pi/4; // interval 8 | b = pi/4; 9 | 10 | // find even polynomial with minimal abs error compared to cos(x) 11 | 12 | f = cos(x); 13 | 14 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 15 | approx = proc(poly,d) { 16 | return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10); 17 | }; 18 | 19 | // first coeff is fixed, iteratively find optimal double prec coeffs 20 | poly = 1; 21 | for i from 1 to deg/2 do { 22 | p = roundcoefficients(approx(poly,2*i), [|D ...|]); 23 | poly = poly + x^(2*i)*coeff(p,0); 24 | }; 25 | 26 | display = hexadecimal; 27 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 28 | print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30)); 29 | print("in [",a,b,"]"); 30 | print("coeffs:"); 31 | for i from 0 to deg do coeff(poly,i); 32 | -------------------------------------------------------------------------------- /math/tools/erf.sollya: -------------------------------------------------------------------------------- 1 | // tables and constants for approximating erf(x). 2 | // 3 | // Copyright (c) 2023-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | display = hexadecimal; 7 | prec=128; 8 | 9 | // Tables 10 | print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}"); 11 | for i from 0 to 768 do { 12 | r = i / 128; 13 | t0 = double(erf(r)); 14 | t1 = double(2/sqrt(pi) * exp(-r * r)); 15 | print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },"); 16 | }; 17 | 18 | // Constants 19 | double(1/3); 20 | double(1/10); 21 | double(2/15); 22 | double(2/9); 23 | double(2/45); 24 | double(2/sqrt(pi)); 25 | 26 | -------------------------------------------------------------------------------- /math/tools/erfc.sollya: -------------------------------------------------------------------------------- 1 | // tables and constants for approximating erfc(x). 2 | // 3 | // Copyright (c) 2023-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | display = hexadecimal; 7 | prec=128; 8 | 9 | // Tables 10 | print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }"); 11 | for i from 0 to 3787 do { 12 | r = 0.0 + i / 128; 13 | t0 = double(erfc(r) * 2^128); 14 | t1 = double(2/sqrt(pi) * exp(-r * r) * 2^128); 15 | print("{ " @ t0 @ ",\t" @ t1 @ " },"); 16 | }; 17 | 18 | // Constants 19 | print("> 2/sqrt(pi)"); 20 | double(2/sqrt(pi)); 21 | 22 | print("> 1/3"); 23 | double(1/3); 24 | 25 | print("> P5"); 26 | double(2/15); 27 | double(1/10); 28 | double(2/9); 29 | double(2/45); 30 | 31 | print("> P6"); 32 | double(1/42); 33 | double(1/7); 34 | double(2/21); 35 | double(4/315); 36 | 37 | print("> Q"); 38 | double( 5.0 / 4.0); 39 | double( 6.0 / 5.0); 40 | double( 7.0 / 6.0); 41 | double( 8.0 / 7.0); 42 | double( 9.0 / 8.0); 43 | double(10.0 / 9.0); 44 | 45 | print("> R"); 46 | double(-2.0 * 4.0 / (5.0 * 6.0)); 47 | double(-2.0 * 5.0 / (6.0 * 7.0)); 48 | double(-2.0 * 6.0 / (7.0 * 8.0)); 49 | double(-2.0 * 7.0 / (8.0 * 9.0)); 50 | double(-2.0 * 8.0 / (9.0 * 10.0)); 51 | double(-2.0 * 9.0 / (10.0 * 11.0)); 52 | -------------------------------------------------------------------------------- /math/tools/erfcf.sollya: -------------------------------------------------------------------------------- 1 | // tables and constants for approximating erfcf(x). 2 | // 3 | // Copyright (c) 2023-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | display = hexadecimal; 7 | prec=128; 8 | 9 | // Tables 10 | print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }"); 11 | for i from 0 to 644 do { 12 | r = 0.0 + i / 64; 13 | t0 = single(erfc(r) * 2^47); 14 | t1 = single(2/sqrt(pi) * exp(-r * r) * 2^47); 15 | print("{ " @ t0 @ ",\t" @ t1 @ " },"); 16 | }; 17 | 18 | // Constants 19 | single(1/3); 20 | single(2/15); 21 | single(1/10); 22 | single(2/sqrt(pi)); 23 | -------------------------------------------------------------------------------- /math/tools/erff.sollya: -------------------------------------------------------------------------------- 1 | // tables and constants for approximating erff(x). 2 | // 3 | // Copyright (c) 2023-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | display = hexadecimal; 7 | prec=128; 8 | 9 | // Tables 10 | print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}"); 11 | for i from 0 to 512 do { 12 | r = i / 128; 13 | t0 = single(erf(r)); 14 | t1 = single(2/sqrt(pi) * exp(-r * r)); 15 | print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },"); 16 | }; 17 | 18 | // Constants 19 | single(1/3); 20 | single(2/sqrt(pi)); 21 | -------------------------------------------------------------------------------- /math/tools/exp.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating e^x 2 | // 3 | // Copyright (c) 2019, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 5; // poly degree 7 | N = 128; // table entries 8 | b = log(2)/(2*N); // interval 9 | b = b + b*0x1p-16; // increase interval for non-nearest rounding (TOINT_NARROW) 10 | a = -b; 11 | 12 | // find polynomial with minimal abs error 13 | 14 | // return p that minimizes |exp(x) - poly(x) - x^d*p(x)| 15 | approx = proc(poly,d) { 16 | return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10); 17 | }; 18 | 19 | // first 2 coeffs are fixed, iteratively find optimal double prec coeffs 20 | poly = 1 + x; 21 | for i from 2 to deg do { 22 | p = roundcoefficients(approx(poly,i), [|D ...|]); 23 | poly = poly + x^i*coeff(p,0); 24 | }; 25 | 26 | display = hexadecimal; 27 | print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30)); 28 | print("abs error:", accurateinfnorm(exp(x)-poly(x), [a;b], 30)); 29 | print("in [",a,b,"]"); 30 | // double interval error for non-nearest rounding 31 | print("rel2 error:", accurateinfnorm(1-poly(x)/exp(x), [2*a;2*b], 30)); 32 | print("abs2 error:", accurateinfnorm(exp(x)-poly(x), [2*a;2*b], 30)); 33 | print("in [",2*a,2*b,"]"); 34 | print("coeffs:"); 35 | for i from 0 to deg do coeff(poly,i); 36 | -------------------------------------------------------------------------------- /math/tools/exp10.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating double precision 10^x 2 | // 3 | // Copyright (c) 2023-2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // Parameters 7 | deg = 4; // Neon 4, SVE 5 8 | N = 128; // Neon 128, SVE 64 9 | b = log(2)/(2 * N * log(10)); // interval 10 | a = -b; 11 | 12 | f = 10^x; 13 | 14 | // return p that minimizes relative error. 15 | approx = proc(poly,d) { 16 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 17 | }; 18 | 19 | // return p that minimizes absolute error 20 | approx_abs = proc(poly,d) { 21 | return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10); 22 | }; 23 | 24 | // first coeff is fixed, iteratively find optimal double prec coeffs 25 | poly = 1; 26 | for i from 1 to deg do { 27 | p = roundcoefficients(approx(poly,i), [|D ...|]); 28 | //p = roundcoefficients(approx_abs(poly,i), [|D ...|]); 29 | poly = poly + x^i*coeff(p,0); 30 | }; 31 | 32 | display = hexadecimal; 33 | print("rel error:", accurateinfnorm(1-poly(x)/10^x, [a;b], 30)); 34 | print("abs error:", accurateinfnorm(10^x-poly(x), [a;b], 30)); 35 | print("in [",a,b,"]"); 36 | print("coeffs:"); 37 | for i from 0 to deg do coeff(poly,i); 38 | 39 | log10_2 = round(N * log(10) / log(2), D, RN); 40 | log2_10 = log(2) / (N * log(10)); 41 | log2_10_hi = round(log2_10, D, RN); 42 | log2_10_lo = round(log2_10 - log2_10_hi, D, RN); 43 | print(log10_2); 44 | print(log2_10_hi); 45 | print(log2_10_lo); 46 | -------------------------------------------------------------------------------- /math/tools/exp10f.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating single precision 10^x 2 | // 3 | // Copyright (c) 2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | 7 | // Parameters 8 | deg = 2; // Neon 5, SVE 2 9 | N = 64; // Neon 1, SVE 64 10 | b = log(2)/(2 * N * log(10)); // interval 11 | a = -b; 12 | f = 10^x; 13 | 14 | // return p that minimizes relative error. 15 | approx = proc(poly,d) { 16 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 17 | }; 18 | 19 | // first coeff is fixed, iteratively find optimal coeffs 20 | poly = 1; 21 | for i from 1 to deg do { 22 | p = roundcoefficients(approx(poly,i), [|SG ...|]); 23 | poly = poly + x^i*coeff(p, 0); 24 | }; 25 | 26 | display = hexadecimal!; 27 | print("rel error:", accurateinfnorm(1-poly(x)/10^x, [a;b], 30)); 28 | print("in [",a,b,"]"); 29 | print("Poly coeffs:"); 30 | for i from 1 to deg do coeff(poly,i); 31 | 32 | log10_2 = round(log(10) / log(2), SG, RN); 33 | log2_10 = log(2) / (log(10)); 34 | log2_10_hi = round(log2_10, SG, RN); 35 | log2_10_lo = round(log2_10 - log2_10_hi, SG, RN); 36 | 37 | print("log10_2:"); 38 | print(log10_2); 39 | 40 | print("log2_10_hi/lo:"); 41 | print(log2_10_hi); 42 | print(log2_10_lo); 43 | -------------------------------------------------------------------------------- /math/tools/exp2.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating 2^x 2 | // 3 | // Copyright (c) 2019-2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // Parameters 7 | deg = 5; // poly degree 8 | N = 64; // Neon 128, SVE 64 (fexpa) 9 | b = 1/(2*N); // interval 10 | a = -b; 11 | f = 2^x; 12 | 13 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 14 | approx = proc(poly,d) { 15 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 16 | }; 17 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 18 | approx_abs = proc(poly,d) { 19 | return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10); 20 | }; 21 | 22 | // first coeff is fixed, iteratively find optimal coeffs 23 | poly = 1; 24 | for i from 1 to deg do { 25 | p = roundcoefficients(approx(poly,i), [|D ...|]); 26 | // p = roundcoefficients(approx_abs(poly,i), [|D ...|]); 27 | poly = poly + x^i*coeff(p,0); 28 | }; 29 | 30 | display = hexadecimal; 31 | print("rel error:", accurateinfnorm(1-poly(x)/2^x, [a;b], 30)); 32 | print("abs error:", accurateinfnorm(2^x-poly(x), [a;b], 30)); 33 | print("in [",a,b,"]"); 34 | // double interval error for non-nearest rounding: 35 | print("rel2 error:", accurateinfnorm(1-poly(x)/2^x, [2*a;2*b], 30)); 36 | print("abs2 error:", accurateinfnorm(2^x-poly(x), [2*a;2*b], 30)); 37 | print("in [",2*a,2*b,"]"); 38 | print("coeffs:"); 39 | for i from 0 to deg do coeff(poly,i); 40 | -------------------------------------------------------------------------------- /math/tools/exp2f.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating single precision 2^x 2 | // 3 | // Copyright (c) 2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // Parameters 7 | deg = 5; // Neon 5, SVE 2 8 | N = 64; // Neon 1, SVE 64 9 | b = 1/(2*N); // interval 10 | a = -b; 11 | f = 2^x; 12 | 13 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 14 | approx = proc(poly, d) { 15 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 16 | }; 17 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 18 | approx_abs = proc(poly, d) { 19 | return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10); 20 | }; 21 | 22 | // first coeff is fixed, iteratively find optimal coeffs 23 | poly = 1; 24 | for i from 1 to deg do { 25 | p = roundcoefficients(approx(poly,i), [|SG ...|]); 26 | // p = roundcoefficients(approx_abs(poly,i), [|SG ...|]); 27 | poly = poly + x^i*coeff(p,0); 28 | }; 29 | 30 | display = hexadecimal; 31 | print("rel error:", accurateinfnorm(1-poly(x)/2^x, [a;b], 30)); 32 | print("abs error:", accurateinfnorm(2^x-poly(x), [a;b], 30)); 33 | print("in [",a,b,"]"); 34 | // double interval error for non-nearest rounding: 35 | print("rel2 error:", accurateinfnorm(1-poly(x)/2^x, [2*a;2*b], 30)); 36 | print("abs2 error:", accurateinfnorm(2^x-poly(x), [2*a;2*b], 30)); 37 | print("in [",2*a,2*b,"]"); 38 | print("coeffs:"); 39 | for i from 0 to deg do coeff(poly,i); 40 | -------------------------------------------------------------------------------- /math/tools/exp2m1f.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating single precision 2^x - 1 2 | // 3 | // Copyright (c) 2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // Parameters 7 | deg = 7; 8 | N = 1; 9 | b = 1/(2*N); // interval 10 | a = -b; 11 | f = 2^x; 12 | 13 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 14 | approx = proc(poly, d) { 15 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 16 | }; 17 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 18 | approx_abs = proc(poly, d) { 19 | return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10); 20 | }; 21 | 22 | // first coeff is fixed, iteratively find optimal coeffs 23 | poly = 1; 24 | for i from 1 to deg do { 25 | p = roundcoefficients(approx(poly,i), [|SG ...|]); 26 | // p = roundcoefficients(approx_abs(poly,i), [|SG ...|]); 27 | poly = poly + x^i*coeff(p,0); 28 | }; 29 | 30 | display = hexadecimal; 31 | print("rel error:", accurateinfnorm(1-poly(x)/2^x, [a;b], 30)); 32 | print("abs error:", accurateinfnorm(2^x-poly(x), [a;b], 30)); 33 | print("in [",a,b,"]"); 34 | // double interval error for non-nearest rounding: 35 | print("coeffs:"); 36 | for i from 0 to deg do coeff(poly,i); 37 | -------------------------------------------------------------------------------- /math/tools/expm1.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating exp(x)-1 in double precision 2 | // 3 | // Copyright (c) 2022-2025, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 6; // poly degree 7 | N = 64; // table entries 8 | b = log(2)/(2*N); // interval 9 | a = -b; 10 | 11 | f = proc(y) { 12 | return exp(y); 13 | }; 14 | 15 | // return p that minimizes |exp(x) - poly(x) - x^d*p(x)| 16 | approx = proc(poly,d) { 17 | return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10); 18 | }; 19 | 20 | // first 2 coeffs are fixed, iteratively find optimal double prec coeffs 21 | poly = 1 + x; 22 | for i from 2 to deg do { 23 | p = roundcoefficients(approx(poly,i), [|D ...|]); 24 | poly = poly + x^i*coeff(p,0); 25 | }; 26 | 27 | display = hexadecimal; 28 | print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30)); 29 | print("rel error:", dirtyinfnorm(1-poly(x)/f(x), [a;b], 30)); 30 | print("in [",a,b,"]"); 31 | print("coeffs:"); 32 | for i from 2 to deg do round(coeff(poly,i), D, RN); 33 | 34 | // SVE expm1 relies on a small lookup table for fexpa correction. 35 | display = decimal; 36 | for i from 0 to 16 do { 37 | val = 2^(i/64) - 1; 38 | err = round(val, D, RN); 39 | print(err); 40 | }; 41 | 42 | for i from 0 to 16 do { 43 | val = 2^(-i/64) - 1; 44 | err = round(val, D, RN); 45 | print(err); 46 | }; -------------------------------------------------------------------------------- /math/tools/expm1f.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating exp(x)-1 in single precision 2 | // 3 | // Copyright (c) 2022-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 5; 7 | 8 | a = -log(2)/2; 9 | b = log(2)/2; 10 | 11 | f = proc(y) { 12 | return exp(y)-1; 13 | }; 14 | 15 | poly = fpminimax(f(x), deg, [|single ...|], [a;b]); 16 | 17 | display = hexadecimal; 18 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 19 | print("in [",a,b,"]"); 20 | print("coeffs:"); 21 | for i from 2 to deg do round(coeff(poly,i), SG, RN); 22 | -------------------------------------------------------------------------------- /math/tools/log.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating log(1+x) 2 | // 3 | // Copyright (c) 2019, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 12; // poly degree 7 | // |log(1+x)| > 0x1p-4 outside the interval 8 | a = -0x1p-4; 9 | b = 0x1.09p-4; 10 | 11 | // find log(1+x)/x polynomial with minimal relative error 12 | // (minimal relative error polynomial for log(1+x) is the same * x) 13 | deg = deg-1; // because of /x 14 | 15 | // f = log(1+x)/x; using taylor series 16 | f = 0; 17 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; 18 | 19 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 20 | approx = proc(poly,d) { 21 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 22 | }; 23 | 24 | // first coeff is fixed, iteratively find optimal double prec coeffs 25 | poly = 1; 26 | for i from 1 to deg do { 27 | p = roundcoefficients(approx(poly,i), [|D ...|]); 28 | poly = poly + x^i*coeff(p,0); 29 | }; 30 | 31 | display = hexadecimal; 32 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 33 | print("in [",a,b,"]"); 34 | print("coeffs:"); 35 | for i from 0 to deg do coeff(poly,i); 36 | -------------------------------------------------------------------------------- /math/tools/log10.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating log10(1+x) 2 | // 3 | // Copyright (c) 2019-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 6; // poly degree 7 | // |log10(1+x)| > 0x1p-5 outside the interval 8 | a = -0x1.p-5; 9 | b = 0x1.p-5; 10 | 11 | ln10 = evaluate(log(10),0); 12 | invln10hi = double(1/ln10 + 0x1p21) - 0x1p21; // round away last 21 bits 13 | invln10lo = double(1/ln10 - invln10hi); 14 | 15 | // find log10(1+x)/x polynomial with minimal relative error 16 | // (minimal relative error polynomial for log10(1+x) is the same * x) 17 | deg = deg-1; // because of /x 18 | 19 | // f = log(1+x)/x; using taylor series 20 | f = 0; 21 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; 22 | f = f/ln10; 23 | 24 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 25 | approx = proc(poly,d) { 26 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 27 | }; 28 | 29 | // first coeff is fixed, iteratively find optimal double prec coeffs 30 | poly = invln10hi + invln10lo; 31 | for i from 1 to deg do { 32 | p = roundcoefficients(approx(poly,i), [|D ...|]); 33 | poly = poly + x^i*coeff(p,0); 34 | }; 35 | display = hexadecimal; 36 | print("invln10hi:", invln10hi); 37 | print("invln10lo:", invln10lo); 38 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 39 | print("in [",a,b,"]"); 40 | print("coeffs:"); 41 | for i from 0 to deg do coeff(poly,i); 42 | 43 | display = decimal; 44 | print("in [",a,b,"]"); 45 | -------------------------------------------------------------------------------- /math/tools/log10f.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating log10f(1+x) 2 | // 3 | // Copyright (c) 2019-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // Computation of log10f(1+x) will be carried out in double precision 7 | 8 | deg = 4; // poly degree 9 | // [OFF; 2*OFF] is divided in 2^4 intervals with OFF~0.7 10 | a = -0.04375; 11 | b = 0.04375; 12 | 13 | // find log(1+x)/x polynomial with minimal relative error 14 | // (minimal relative error polynomial for log(1+x) is the same * x) 15 | deg = deg-1; // because of /x 16 | 17 | // f = log(1+x)/x; using taylor series 18 | f = 0; 19 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; 20 | 21 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 22 | approx = proc(poly,d) { 23 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 24 | }; 25 | 26 | // first coeff is fixed, iteratively find optimal double prec coeffs 27 | poly = 1; 28 | for i from 1 to deg do { 29 | p = roundcoefficients(approx(poly,i), [|D ...|]); 30 | poly = poly + x^i*coeff(p,0); 31 | }; 32 | 33 | display = hexadecimal; 34 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 35 | print("in [",a,b,"]"); 36 | print("coeffs:"); 37 | for i from 0 to deg do double(coeff(poly,i)); 38 | -------------------------------------------------------------------------------- /math/tools/log1p.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating log(1+x) in double precision 2 | // 3 | // Copyright (c) 2022-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 20; 7 | 8 | a = sqrt(2)/2-1; 9 | b = sqrt(2)-1; 10 | 11 | f = proc(y) { 12 | return log(1+y); 13 | }; 14 | 15 | approx = proc(poly, d) { 16 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 17 | }; 18 | 19 | poly = x; 20 | for i from 2 to deg do { 21 | p = roundcoefficients(approx(poly,i), [|D ...|]); 22 | poly = poly + x^i*coeff(p,0); 23 | }; 24 | 25 | 26 | print("coeffs:"); 27 | display = hexadecimal; 28 | for i from 2 to deg do coeff(poly,i); 29 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 30 | print("in [",a,b,"]"); 31 | -------------------------------------------------------------------------------- /math/tools/log1pf.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating log(1+x) in single precision 2 | // 3 | // Copyright (c) 2022-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 10; 7 | 8 | a = -0.25; 9 | b = 0.5; 10 | 11 | f = proc(y) { 12 | return log(1+y); 13 | }; 14 | 15 | poly = fpminimax(f(x), deg, [|single ...|], [a;b]); 16 | 17 | display = hexadecimal; 18 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 19 | print("in [",a,b,"]"); 20 | print("coeffs:"); 21 | for i from 2 to deg do round(coeff(poly,i), SG, RN); 22 | -------------------------------------------------------------------------------- /math/tools/log2.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating log2(1+x) 2 | // 3 | // Copyright (c) 2019, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 11; // poly degree 7 | // |log2(1+x)| > 0x1p-4 outside the interval 8 | a = -0x1.5b51p-5; 9 | b = 0x1.6ab2p-5; 10 | 11 | ln2 = evaluate(log(2),0); 12 | invln2hi = double(1/ln2 + 0x1p21) - 0x1p21; // round away last 21 bits 13 | invln2lo = double(1/ln2 - invln2hi); 14 | 15 | // find log2(1+x)/x polynomial with minimal relative error 16 | // (minimal relative error polynomial for log2(1+x) is the same * x) 17 | deg = deg-1; // because of /x 18 | 19 | // f = log(1+x)/x; using taylor series 20 | f = 0; 21 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; 22 | f = f/ln2; 23 | 24 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 25 | approx = proc(poly,d) { 26 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 27 | }; 28 | 29 | // first coeff is fixed, iteratively find optimal double prec coeffs 30 | poly = invln2hi + invln2lo; 31 | for i from 1 to deg do { 32 | p = roundcoefficients(approx(poly,i), [|D ...|]); 33 | poly = poly + x^i*coeff(p,0); 34 | }; 35 | 36 | display = hexadecimal; 37 | print("invln2hi:", invln2hi); 38 | print("invln2lo:", invln2lo); 39 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 40 | print("in [",a,b,"]"); 41 | print("coeffs:"); 42 | for i from 0 to deg do coeff(poly,i); 43 | -------------------------------------------------------------------------------- /math/tools/log2_abs.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating log2(1+x) 2 | // 3 | // Copyright (c) 2019, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 7; // poly degree 7 | // interval ~= 1/(2*N), where N is the table entries 8 | a= -0x1.f45p-8; 9 | b= 0x1.f45p-8; 10 | 11 | ln2 = evaluate(log(2),0); 12 | invln2hi = double(1/ln2 + 0x1p21) - 0x1p21; // round away last 21 bits 13 | invln2lo = double(1/ln2 - invln2hi); 14 | 15 | // find log2(1+x) polynomial with minimal absolute error 16 | f = log(1+x)/ln2; 17 | 18 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 19 | approx = proc(poly,d) { 20 | return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10); 21 | }; 22 | 23 | // first coeff is fixed, iteratively find optimal double prec coeffs 24 | poly = x*(invln2lo + invln2hi); 25 | for i from 2 to deg do { 26 | p = roundcoefficients(approx(poly,i), [|D ...|]); 27 | poly = poly + x^i*coeff(p,0); 28 | }; 29 | 30 | display = hexadecimal; 31 | print("invln2hi:", invln2hi); 32 | print("invln2lo:", invln2lo); 33 | print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30)); 34 | //// relative error computation fails if f(0)==0 35 | //// g = f(x)/x = log2(1+x)/x; using taylor series 36 | //g = 0; 37 | //for i from 0 to 60 do { g = g + (-x)^i/(i+1)/ln2; }; 38 | //print("rel error:", accurateinfnorm(1-(poly(x)/x)/g(x), [a;b], 30)); 39 | print("in [",a,b,"]"); 40 | print("coeffs:"); 41 | for i from 0 to deg do coeff(poly,i); 42 | -------------------------------------------------------------------------------- /math/tools/log_abs.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating log(1+x) 2 | // 3 | // Copyright (c) 2019, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 6; // poly degree 7 | // interval ~= 1/(2*N), where N is the table entries 8 | a = -0x1.fp-9; 9 | b = 0x1.fp-9; 10 | 11 | // find log(1+x) polynomial with minimal absolute error 12 | f = log(1+x); 13 | 14 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 15 | approx = proc(poly,d) { 16 | return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10); 17 | }; 18 | 19 | // first coeff is fixed, iteratively find optimal double prec coeffs 20 | poly = x; 21 | for i from 2 to deg do { 22 | p = roundcoefficients(approx(poly,i), [|D ...|]); 23 | poly = poly + x^i*coeff(p,0); 24 | }; 25 | 26 | display = hexadecimal; 27 | print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30)); 28 | // relative error computation fails if f(0)==0 29 | // g = f(x)/x = log(1+x)/x; using taylor series 30 | g = 0; 31 | for i from 0 to 60 do { g = g + (-x)^i/(i+1); }; 32 | print("rel error:", accurateinfnorm(1-poly(x)/x/g(x), [a;b], 30)); 33 | print("in [",a,b,"]"); 34 | print("coeffs:"); 35 | for i from 0 to deg do coeff(poly,i); 36 | -------------------------------------------------------------------------------- /math/tools/plot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # ULP error plot tool. 4 | # 5 | # Copyright (c) 2019, Arm Limited. 6 | # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | import sys 11 | import re 12 | 13 | # example usage: 14 | # build/bin/ulp -e .0001 log 0.5 2.0 2345678 | math/tools/plot.py 15 | 16 | def fhex(s): 17 | return float.fromhex(s) 18 | 19 | def parse(f): 20 | xs = [] 21 | gs = [] 22 | ys = [] 23 | es = [] 24 | # Has to match the format used in ulp.c 25 | r = re.compile(r'[^ (]+\(([^ )]*)\) got ([^ ]+) want ([^ ]+) [^ ]+ ulp err ([^ ]+)') 26 | for line in f: 27 | m = r.match(line) 28 | if m: 29 | x = fhex(m.group(1)) 30 | g = fhex(m.group(2)) 31 | y = fhex(m.group(3)) 32 | e = float(m.group(4)) 33 | xs.append(x) 34 | gs.append(g) 35 | ys.append(y) 36 | es.append(e) 37 | elif line.startswith('PASS') or line.startswith('FAIL'): 38 | # Print the summary line 39 | print(line) 40 | return xs, gs, ys, es 41 | 42 | def plot(xs, gs, ys, es): 43 | if len(xs) < 2: 44 | print('not enough samples') 45 | return 46 | a = min(xs) 47 | b = max(xs) 48 | fig, (ax0,ax1) = plt.subplots(nrows=2) 49 | es = np.abs(es) # ignore the sign 50 | emax = max(es) 51 | ax0.text(a+(b-a)*0.7, emax*0.8, '%s\n%g'%(emax.hex(),emax)) 52 | ax0.plot(xs,es,'r.') 53 | ax0.grid() 54 | ax1.plot(xs,ys,'r.',label='want') 55 | ax1.plot(xs,gs,'b.',label='got') 56 | ax1.grid() 57 | ax1.legend() 58 | plt.show() 59 | 60 | xs, gs, ys, es = parse(sys.stdin) 61 | plot(xs, gs, ys, es) 62 | -------------------------------------------------------------------------------- /math/tools/sin.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating sin(x) 2 | // 3 | // Copyright (c) 2019, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 7; // polynomial degree 7 | a = -pi/4; // interval 8 | b = pi/4; 9 | 10 | // find even polynomial with minimal abs error compared to sin(x)/x 11 | 12 | // account for /x 13 | deg = deg-1; 14 | 15 | // f = sin(x)/x; 16 | f = 1; 17 | c = 1; 18 | for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; }; 19 | 20 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 21 | approx = proc(poly,d) { 22 | return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10); 23 | }; 24 | 25 | // first coeff is fixed, iteratively find optimal double prec coeffs 26 | poly = 1; 27 | for i from 1 to deg/2 do { 28 | p = roundcoefficients(approx(poly,2*i), [|D ...|]); 29 | poly = poly + x^(2*i)*coeff(p,0); 30 | }; 31 | 32 | display = hexadecimal; 33 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 34 | print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30)); 35 | print("in [",a,b,"]"); 36 | print("coeffs:"); 37 | for i from 0 to deg do coeff(poly,i); 38 | -------------------------------------------------------------------------------- /math/tools/sincos.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating cos(x) 2 | // 3 | // Copyright (c) 2023-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // This script only finds the coeffs for cos - see math/aarch64/advsimd/sin.c for sin coeffs 7 | 8 | deg = 14; // polynomial degree 9 | a = -pi/4; // interval 10 | b = pi/4; 11 | 12 | // find even polynomial with minimal abs error compared to cos(x) 13 | 14 | f = cos(x); 15 | 16 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 17 | approx = proc(poly,d) { 18 | return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10); 19 | }; 20 | 21 | // first coeff is fixed, iteratively find optimal double prec coeffs 22 | poly = 1; 23 | for i from 1 to deg/2 do { 24 | p = roundcoefficients(approx(poly,2*i), [|double ...|]); 25 | poly = poly + x^(2*i)*coeff(p,0); 26 | }; 27 | 28 | display = hexadecimal; 29 | //print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 30 | //print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30)); 31 | print("in [",a,b,"]"); 32 | print("coeffs:"); 33 | for i from 0 to deg do coeff(poly,i); 34 | -------------------------------------------------------------------------------- /math/tools/sincosf.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating cos(x) 2 | // 3 | // Copyright (c) 2023-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // This script only finds the coeffs for cos - see math/tools/sin.sollya for sin coeffs. 7 | 8 | deg = 8; // polynomial degree 9 | a = -pi/4; // interval 10 | b = pi/4; 11 | 12 | // find even polynomial with minimal abs error compared to cos(x) 13 | 14 | f = cos(x); 15 | 16 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 17 | approx = proc(poly,d) { 18 | return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10); 19 | }; 20 | 21 | // first coeff is fixed, iteratively find optimal double prec coeffs 22 | poly = 1; 23 | for i from 1 to deg/2 do { 24 | p = roundcoefficients(approx(poly,2*i), [|single ...|]); 25 | poly = poly + x^(2*i)*coeff(p,0); 26 | }; 27 | 28 | display = hexadecimal; 29 | //print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 30 | //print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30)); 31 | print("in [",a,b,"]"); 32 | print("coeffs:"); 33 | for i from 0 to deg do coeff(poly,i); 34 | -------------------------------------------------------------------------------- /math/tools/sinpi.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating sinpi(x) 2 | // 3 | // Copyright (c) 2023-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 19; // polynomial degree 7 | a = -1/2; // interval 8 | b = 1/2; 9 | 10 | // find even polynomial with minimal abs error compared to sinpi(x) 11 | 12 | // f = sin(pi* x); 13 | f = pi*x; 14 | c = 1; 15 | for i from 1 to 80 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*(pi*x)^(2*i+1)/c; }; 16 | 17 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 18 | approx = proc(poly,d) { 19 | return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10); 20 | }; 21 | 22 | // first coeff is predefine, iteratively find optimal double prec coeffs 23 | poly = pi*x; 24 | for i from 0 to (deg-1)/2 do { 25 | p = roundcoefficients(approx(poly,2*i+1), [|D ...|]); 26 | poly = poly + x^(2*i+1)*coeff(p,0); 27 | }; 28 | 29 | display = hexadecimal; 30 | print("abs error:", accurateinfnorm(sin(pi*x)-poly(x), [a;b], 30)); 31 | print("in [",a,b,"]"); 32 | print("coeffs:"); 33 | for i from 0 to deg do coeff(poly,i); 34 | -------------------------------------------------------------------------------- /math/tools/tan.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating double precision tan(x) 2 | // 3 | // Copyright (c) 2023-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 8; 7 | 8 | // interval bounds 9 | a = 0x1.0p-126; 10 | b = pi / 8; 11 | 12 | display = hexadecimal; 13 | 14 | f = (tan(sqrt(x))-sqrt(x))/x^(3/2); 15 | poly = fpminimax(f, deg, [|double ...|], [a*a;b*b]); 16 | 17 | //print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 18 | print("in [",a,b,"]"); 19 | print("coeffs:"); 20 | for i from 0 to deg do coeff(poly,i); 21 | -------------------------------------------------------------------------------- /math/tools/tanpi.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating tanpi/f(x) 2 | // 3 | // Copyright (c) 2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | // 0 for tanpi/f [0,0.25], 1 for tanpi/f [0.25,1] 7 | method = 0; 8 | dtype = double; 9 | 10 | if (dtype == single) then { 11 | if (method == 0) then { deg = 5; } 12 | else if (method == 1) then { deg = 3; }; 13 | } else if (dtype == double) then { 14 | if (method == 0) then { deg = 13; } 15 | else if (method == 1) then { deg = 8; }; 16 | }; 17 | 18 | a = 0x1.0p-126; 19 | b = 1/4; 20 | 21 | if (method == 0) then { 22 | g = tan(pi * x); 23 | F = proc(P) { return pi * x + x^3 * P(x^2); }; 24 | f = (g(sqrt(x)) - pi * sqrt(x))/(x^(3/2)); 25 | } else if (method == 1) then { 26 | g = 1/tan(pi * x); 27 | F = proc(P) { return 1/(pi * x) + x * P(x^2); }; 28 | f = (g(sqrt(x)) / sqrt(x)) - 1/(pi * x); 29 | }; 30 | 31 | poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]); 32 | 33 | // 34 | // Display coefficients in Sollya 35 | // 36 | display = hexadecimal!; 37 | if (dtype==double) then { prec = 53!; } 38 | else if (dtype==single) then { prec = 23!; }; 39 | print("_coeffs :_ hex"); 40 | for i from 0 to deg do coeff(poly, i); 41 | 42 | // Compute errors 43 | //display = hexadecimal!; 44 | d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]); 45 | d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]); 46 | print("dirty rel error:", d_rel_err); 47 | print("dirty abs error:", d_abs_err); 48 | print("in [",a,b,"]"); 49 | -------------------------------------------------------------------------------- /math/tools/v_erf.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating erf(x). 2 | // To generate coefficients for interval i (0 to 47) do: 3 | // $ sollya v_erf.sollya $i 4 | // 5 | // Copyright (c) 2022-2024, Arm Limited. 6 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 7 | 8 | scale = 1/8; 9 | deg = 9; 10 | 11 | itv = parse(__argv[0]); 12 | if (itv == 0) then { a = 0x1p-1022; } 13 | else { a = itv * scale; }; 14 | 15 | prec=256; 16 | 17 | poly = fpminimax(erf(scale*x+a), deg, [|D ...|], [0; 1]); 18 | 19 | display = hexadecimal; 20 | for i from 0 to deg do coeff(poly, i); -------------------------------------------------------------------------------- /math/tools/v_erfc.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating erfc(x)*exp(x*x) 2 | // 3 | // Copyright (c) 2022-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 12; // poly degree 7 | 8 | itv = parse(__argv[0]); 9 | 10 | bounds = [|3.725290298461914e-9, 11 | 0.18920711500272103, 12 | 0.41421356237309515, 13 | 0.681792830507429, 14 | 1, 15 | 1.378414230005442, 16 | 1.8284271247461903, 17 | 2.363585661014858, 18 | 3, 19 | 3.756828460010884, 20 | 4.656854249492381, 21 | 5.727171322029716, 22 | 7, 23 | 8.513656920021768, 24 | 10.313708498984761, 25 | 12.454342644059432, 26 | 15, 27 | 18.027313840043536, 28 | 21.627416997969522, 29 | 25.908685288118864, 30 | 31|]; 31 | 32 | a = bounds[itv]; 33 | b = bounds[itv + 1]; 34 | 35 | f = proc(y) { 36 | t = y + a; 37 | return erfc(t) * exp(t*t); 38 | }; 39 | 40 | poly = fpminimax(f(x), deg, [|double ...|], [0;b-a]); 41 | 42 | display = hexadecimal; 43 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 44 | print("in [",a,b,"]"); 45 | print("coeffs:"); 46 | for i from 0 to deg do coeff(poly, i); 47 | -------------------------------------------------------------------------------- /math/tools/v_exp.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating e^x 2 | // 3 | // Copyright (c) 2019, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 4; // poly degree 7 | N = 128; // table entries 8 | b = log(2)/(2*N); // interval 9 | a = -b; 10 | 11 | // find polynomial with minimal abs error 12 | 13 | // return p that minimizes |exp(x) - poly(x) - x^d*p(x)| 14 | approx = proc(poly,d) { 15 | return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10); 16 | }; 17 | 18 | // first 2 coeffs are fixed, iteratively find optimal double prec coeffs 19 | poly = 1 + x; 20 | for i from 2 to deg do { 21 | p = roundcoefficients(approx(poly,i), [|D ...|]); 22 | poly = poly + x^i*coeff(p,0); 23 | }; 24 | 25 | display = hexadecimal; 26 | print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30)); 27 | print("abs error:", accurateinfnorm(exp(x)-poly(x), [a;b], 30)); 28 | print("in [",a,b,"]"); 29 | print("coeffs:"); 30 | for i from 0 to deg do coeff(poly,i); 31 | -------------------------------------------------------------------------------- /math/tools/v_log.sollya: -------------------------------------------------------------------------------- 1 | // polynomial used for __v_log(x) 2 | // 3 | // Copyright (c) 2019, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 6; // poly degree 7 | a = -0x1.fc1p-9; 8 | b = 0x1.009p-8; 9 | 10 | // find log(1+x)/x polynomial with minimal relative error 11 | // (minimal relative error polynomial for log(1+x) is the same * x) 12 | deg = deg-1; // because of /x 13 | 14 | // f = log(1+x)/x; using taylor series 15 | f = 0; 16 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; 17 | 18 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 19 | approx = proc(poly,d) { 20 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 21 | }; 22 | 23 | // first coeff is fixed, iteratively find optimal double prec coeffs 24 | poly = 1; 25 | for i from 1 to deg do { 26 | p = roundcoefficients(approx(poly,i), [|D ...|]); 27 | poly = poly + x^i*coeff(p,0); 28 | }; 29 | 30 | display = hexadecimal; 31 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 32 | print("in [",a,b,"]"); 33 | print("coeffs:"); 34 | for i from 0 to deg do coeff(poly,i); 35 | -------------------------------------------------------------------------------- /math/tools/v_log10.sollya: -------------------------------------------------------------------------------- 1 | // polynomial used for __v_log10(x) 2 | // 3 | // Copyright (c) 2019-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 6; // poly degree 7 | a = -0x1.fc1p-9; 8 | b = 0x1.009p-8; 9 | 10 | // find log(1+x)/x polynomial with minimal relative error 11 | // (minimal relative error polynomial for log(1+x) is the same * x) 12 | deg = deg-1; // because of /x 13 | 14 | // f = log(1+x)/x; using taylor series 15 | f = 0; 16 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; 17 | 18 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 19 | approx = proc(poly,d) { 20 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 21 | }; 22 | 23 | // first coeff is fixed, iteratively find optimal double prec coeffs 24 | poly = 1; 25 | for i from 1 to deg do { 26 | p = roundcoefficients(approx(poly,i), [|D ...|]); 27 | poly = poly + x^i*coeff(p,0); 28 | }; 29 | 30 | // scale coefficients by 1/ln(10) 31 | ln10 = evaluate(log(10),0); 32 | poly = poly/ln10; 33 | 34 | display = hexadecimal; 35 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 36 | print("in [",a,b,"]"); 37 | print("coeffs:"); 38 | for i from 0 to deg do double(coeff(poly,i)); 39 | -------------------------------------------------------------------------------- /math/tools/v_log10f.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating v_log10f(1+x) 2 | // 3 | // Copyright (c) 2019-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 9; // poly degree 7 | // |log10(1+x)| > 0x1p-4 outside the interval 8 | a = -1/3; 9 | b = 1/3; 10 | 11 | display = hexadecimal; 12 | print("log10(2) = ", single(log10(2))); 13 | 14 | ln10 = evaluate(log(10),0); 15 | invln10 = single(1/ln10); 16 | 17 | // find log10(1+x)/x polynomial with minimal relative error 18 | // (minimal relative error polynomial for log10(1+x) is the same * x) 19 | deg = deg-1; // because of /x 20 | 21 | // f = log(1+x)/x; using taylor series 22 | f = 0; 23 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; 24 | f = f/ln10; 25 | 26 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 27 | approx = proc(poly,d) { 28 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 29 | }; 30 | 31 | // first coeff is fixed, iteratively find optimal double prec coeffs 32 | poly = invln10; 33 | for i from 1 to deg do { 34 | p = roundcoefficients(approx(poly,i), [|SG ...|]); 35 | poly = poly + x^i*coeff(p,0); 36 | }; 37 | display = hexadecimal; 38 | print("invln10:", invln10); 39 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 40 | print("in [",a,b,"]"); 41 | print("coeffs:"); 42 | for i from 0 to deg do single(coeff(poly,i)); 43 | 44 | display = decimal; 45 | print("in [",a,b,"]"); 46 | -------------------------------------------------------------------------------- /math/tools/v_log2f.sollya: -------------------------------------------------------------------------------- 1 | // polynomial used for __v_log2f(x) 2 | // 3 | // Copyright (c) 2022-2024, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 9; // poly degree 7 | a = -1/3; 8 | b = 1/3; 9 | 10 | ln2 = evaluate(log(2),0); 11 | invln2 = single(1/ln2); 12 | 13 | // find log2(1+x)/x polynomial with minimal relative error 14 | // (minimal relative error polynomial for log2(1+x) is the same * x) 15 | deg = deg-1; // because of /x 16 | 17 | // f = log2(1+x)/x; using taylor series 18 | f = 0; 19 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); }; 20 | f = f * invln2; 21 | 22 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)| 23 | approx = proc(poly,d) { 24 | return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10); 25 | }; 26 | 27 | // first coeff is fixed, iteratively find optimal double prec coeffs 28 | poly = invln2; 29 | for i from 1 to deg do { 30 | p = roundcoefficients(approx(poly,i), [|SG ...|]); 31 | poly = poly + x^i*coeff(p,0); 32 | }; 33 | 34 | display = hexadecimal; 35 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30)); 36 | print("in [",a,b,"]"); 37 | print("coeffs:"); 38 | for i from 0 to deg do coeff(poly,i); 39 | -------------------------------------------------------------------------------- /math/tools/v_sin.sollya: -------------------------------------------------------------------------------- 1 | // polynomial for approximating sin(x) 2 | // 3 | // Copyright (c) 2019, Arm Limited. 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 5 | 6 | deg = 15; // polynomial degree 7 | a = -pi/2; // interval 8 | b = pi/2; 9 | 10 | // find even polynomial with minimal abs error compared to sin(x)/x 11 | 12 | // account for /x 13 | deg = deg-1; 14 | 15 | // f = sin(x)/x; 16 | f = 1; 17 | c = 1; 18 | for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; }; 19 | 20 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)| 21 | approx = proc(poly,d) { 22 | return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10); 23 | }; 24 | 25 | // first coeff is fixed, iteratively find optimal double prec coeffs 26 | poly = 1; 27 | for i from 1 to deg/2 do { 28 | p = roundcoefficients(approx(poly,2*i), [|D ...|]); 29 | poly = poly + x^(2*i)*coeff(p,0); 30 | }; 31 | 32 | display = hexadecimal; 33 | print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30)); 34 | print("in [",a,b,"]"); 35 | print("coeffs:"); 36 | for i from 0 to deg do coeff(poly,i); 37 | -------------------------------------------------------------------------------- /networking/README.md: -------------------------------------------------------------------------------- 1 | # 🛜 Networking 2 | 3 | This section provides style requirements and contribution guidelines for 4 | networking routines. 5 | 6 | --- 7 | 8 | ## 📂 Subdirectory Structure 9 | 10 | Here is a quick overview of the **`networking/`** subdirectory's layout: 11 | 12 | - **`include/`**: networking library public headers. 13 | - **`test/`**: networking test and benchmark related sources. 14 | 15 | --- 16 | 17 | ## 🧑‍💻 Style Requirements 18 | 19 | 1. **Upstream Compatibility**: 20 | Code should follow GNU Coding Standard and glibc specific conventions to 21 | ease upstreaming. 22 | 23 | 2. **ABI and Symbols**: 24 | Code should be written so it is suitable for inclusion into a libc with 25 | minimal changes. 26 | 27 | 3. **API Headers**: 28 | Include headers should be suitable for benchmarking and testing code and 29 | should not conflict with libc headers. 30 | 31 | --- 32 | 33 | ## ✅ Contribution Guidelines 34 | 35 | - Clearly document assumptions in the code. 36 | - Maintain consistent assembly style across different implementations. 37 | - Benchmarking is needed on several microarchitectures. 38 | 39 | --- 40 | 41 | By adhering to these guidelines, you will help maintain the quality and 42 | consistency of the Arm Optimized Routines project. We appreciate your 43 | contributions and look forward to collaborating with you! 44 | 45 | -------------------------------------------------------------------------------- /networking/include/networking.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Public API. 3 | * 4 | * Copyright (c) 2020, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | unsigned short __chksum (const void *, unsigned int); 9 | #if __aarch64__ && __ARM_NEON 10 | unsigned short __chksum_aarch64_simd (const void *, unsigned int); 11 | #endif 12 | #if __arm__ && __ARM_NEON 13 | unsigned short __chksum_arm_simd (const void *, unsigned int); 14 | #endif 15 | -------------------------------------------------------------------------------- /string/aarch64/asmdefs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Macros for asm code. AArch64 version. 3 | * 4 | * Copyright (c) 2019-2023, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #ifndef _ASMDEFS_H 9 | #define _ASMDEFS_H 10 | 11 | /* Branch Target Identitication support. */ 12 | #define BTI_C hint 34 13 | #define BTI_J hint 36 14 | /* Return address signing support (pac-ret). */ 15 | #define PACIASP hint 25; .cfi_window_save 16 | #define AUTIASP hint 29; .cfi_window_save 17 | 18 | /* GNU_PROPERTY_AARCH64_* macros from elf.h. */ 19 | #define FEATURE_1_AND 0xc0000000 20 | #define FEATURE_1_BTI 1 21 | #define FEATURE_1_PAC 2 22 | 23 | /* Add a NT_GNU_PROPERTY_TYPE_0 note. */ 24 | #define GNU_PROPERTY(type, value) \ 25 | .section .note.gnu.property, "a"; \ 26 | .p2align 3; \ 27 | .word 4; \ 28 | .word 16; \ 29 | .word 5; \ 30 | .asciz "GNU"; \ 31 | .word type; \ 32 | .word 4; \ 33 | .word value; \ 34 | .word 0; \ 35 | .text 36 | 37 | /* If set then the GNU Property Note section will be added to 38 | mark objects to support BTI and PAC-RET. */ 39 | #ifndef WANT_GNU_PROPERTY 40 | #define WANT_GNU_PROPERTY 1 41 | #endif 42 | 43 | #if WANT_GNU_PROPERTY 44 | /* Add property note with supported features to all asm files. */ 45 | GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC) 46 | #endif 47 | 48 | #define ENTRY_ALIGN(name, alignment) \ 49 | .global name; \ 50 | .type name,%function; \ 51 | .align alignment; \ 52 | name: \ 53 | .cfi_startproc; \ 54 | BTI_C; 55 | 56 | #define ENTRY(name) ENTRY_ALIGN(name, 6) 57 | 58 | #define ENTRY_ALIAS(name) \ 59 | .global name; \ 60 | .type name,%function; \ 61 | name: 62 | 63 | #define END(name) \ 64 | .cfi_endproc; \ 65 | .size name, .-name; 66 | 67 | #define L(l) .L ## l 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /string/aarch64/check-arch.S: -------------------------------------------------------------------------------- 1 | /* 2 | * check ARCH setting. 3 | * 4 | * Copyright (c) 2020-2022, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #if !__aarch64__ 9 | # error ARCH setting does not match the compiler. 10 | #endif 11 | 12 | /* Include for GNU property notes. */ 13 | #include "asmdefs.h" 14 | -------------------------------------------------------------------------------- /string/aarch64/experimental/memchr-sve.S: -------------------------------------------------------------------------------- 1 | /* 2 | * memchr - find a character in a memory zone 3 | * 4 | * Copyright (c) 2018-2022, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "asmdefs.h" 9 | 10 | .arch armv8-a+sve 11 | 12 | /* Assumptions: 13 | * 14 | * ARMv8-a, AArch64 15 | * SVE Available. 16 | */ 17 | 18 | ENTRY (__memchr_aarch64_sve) 19 | dup z1.b, w1 /* duplicate c to a vector */ 20 | setffr /* initialize FFR */ 21 | mov x3, 0 /* initialize off */ 22 | 23 | .p2align 4 24 | 0: whilelo p1.b, x3, x2 /* make sure off < max */ 25 | b.none 9f 26 | 27 | /* Read a vector's worth of bytes, bounded by max, 28 | stopping on first fault. */ 29 | ldff1b z0.b, p1/z, [x0, x3] 30 | rdffrs p0.b, p1/z 31 | b.nlast 2f 32 | 33 | /* First fault did not fail: the vector bounded by max is valid. 34 | Avoid depending on the contents of FFR beyond the branch. */ 35 | incb x3 /* speculate increment */ 36 | cmpeq p2.b, p1/z, z0.b, z1.b /* search for c */ 37 | b.none 0b 38 | decb x3 /* undo speculate */ 39 | 40 | /* Found C. */ 41 | 1: brkb p2.b, p1/z, p2.b /* find the first c */ 42 | add x0, x0, x3 /* form partial pointer */ 43 | incp x0, p2.b /* form final pointer to c */ 44 | ret 45 | 46 | /* First fault failed: only some of the vector is valid. 47 | Perform the comparision only on the valid bytes. */ 48 | 2: cmpeq p2.b, p0/z, z0.b, z1.b 49 | b.any 1b 50 | 51 | /* No C found. Re-init FFR, increment, and loop. */ 52 | setffr 53 | incp x3, p0.b 54 | b 0b 55 | 56 | /* Found end of count. */ 57 | 9: mov x0, 0 /* return null */ 58 | ret 59 | 60 | END (__memchr_aarch64_sve) 61 | -------------------------------------------------------------------------------- /string/aarch64/experimental/memcmp-sve.S: -------------------------------------------------------------------------------- 1 | /* 2 | * memcmp - compare memory 3 | * 4 | * Copyright (c) 2018-2022, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "asmdefs.h" 9 | 10 | .arch armv8-a+sve 11 | 12 | /* Assumptions: 13 | * 14 | * ARMv8-a, AArch64 15 | * SVE Available. 16 | */ 17 | 18 | ENTRY (__memcmp_aarch64_sve) 19 | mov x3, 0 /* initialize off */ 20 | 21 | 0: whilelo p0.b, x3, x2 /* while off < max */ 22 | b.none 9f 23 | 24 | ld1b z0.b, p0/z, [x0, x3] /* read vectors bounded by max. */ 25 | ld1b z1.b, p0/z, [x1, x3] 26 | 27 | /* Increment for a whole vector, even if we've only read a partial. 28 | This is significantly cheaper than INCP, and since OFF is not 29 | used after the loop it is ok to increment OFF past MAX. */ 30 | incb x3 31 | 32 | cmpne p1.b, p0/z, z0.b, z1.b /* while no inequalities */ 33 | b.none 0b 34 | 35 | /* Found inequality. */ 36 | 1: brkb p1.b, p0/z, p1.b /* find first such */ 37 | lasta w0, p1, z0.b /* extract each byte */ 38 | lasta w1, p1, z1.b 39 | sub x0, x0, x1 /* return comparison */ 40 | ret 41 | 42 | /* Found end-of-count. */ 43 | 9: mov x0, 0 /* return equality */ 44 | ret 45 | 46 | END (__memcmp_aarch64_sve) 47 | -------------------------------------------------------------------------------- /string/aarch64/experimental/stpcpy-sve.S: -------------------------------------------------------------------------------- 1 | /* 2 | * stpcpy - copy a string returning pointer to end. 3 | * 4 | * Copyright (c) 2020, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #define BUILD_STPCPY 1 9 | 10 | #include "strcpy-sve.S" 11 | -------------------------------------------------------------------------------- /string/aarch64/experimental/strchrnul-sve.S: -------------------------------------------------------------------------------- 1 | /* 2 | * strchrnul - find a character or nul in a string 3 | * 4 | * Copyright (c) 2018-2019, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #define BUILD_STRCHRNUL 9 | #include "strchr-sve.S" 10 | -------------------------------------------------------------------------------- /string/aarch64/experimental/strcmp-sve.S: -------------------------------------------------------------------------------- 1 | /* 2 | * __strcmp_aarch64_sve - compare two strings 3 | * 4 | * Copyright (c) 2018-2022, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "asmdefs.h" 9 | 10 | .arch armv8-a+sve 11 | 12 | /* Assumptions: 13 | * 14 | * ARMv8-a, AArch64 15 | * SVE Available. 16 | */ 17 | 18 | ENTRY (__strcmp_aarch64_sve) 19 | setffr /* initialize FFR */ 20 | ptrue p1.b, all /* all ones; loop invariant */ 21 | mov x2, 0 /* initialize offset */ 22 | 23 | /* Read a vector's worth of bytes, stopping on first fault. */ 24 | .p2align 4 25 | 0: ldff1b z0.b, p1/z, [x0, x2] 26 | ldff1b z1.b, p1/z, [x1, x2] 27 | rdffrs p0.b, p1/z 28 | b.nlast 2f 29 | 30 | /* First fault did not fail: the whole vector is valid. 31 | Avoid depending on the contents of FFR beyond the branch. */ 32 | incb x2, all /* skip bytes for next round */ 33 | cmpeq p2.b, p1/z, z0.b, z1.b /* compare strings */ 34 | cmpne p3.b, p1/z, z0.b, 0 /* search for ~zero */ 35 | nands p2.b, p1/z, p2.b, p3.b /* ~(eq & ~zero) -> ne | zero */ 36 | b.none 0b 37 | 38 | /* Found end-of-string or inequality. */ 39 | 1: brkb p2.b, p1/z, p2.b /* find first such */ 40 | lasta w0, p2, z0.b /* extract each char */ 41 | lasta w1, p2, z1.b 42 | sub x0, x0, x1 /* return comparison */ 43 | ret 44 | 45 | /* First fault failed: only some of the vector is valid. 46 | Perform the comparison only on the valid bytes. */ 47 | 2: incp x2, p0.b /* skip bytes for next round */ 48 | setffr /* re-init FFR for next round */ 49 | cmpeq p2.b, p0/z, z0.b, z1.b /* compare strings, as above */ 50 | cmpne p3.b, p0/z, z0.b, 0 51 | nands p2.b, p0/z, p2.b, p3.b 52 | b.none 0b 53 | b 1b 54 | 55 | END (__strcmp_aarch64_sve) 56 | -------------------------------------------------------------------------------- /string/aarch64/experimental/strcpy-sve.S: -------------------------------------------------------------------------------- 1 | /* 2 | * strcpy/stpcpy - copy a string returning pointer to start/end. 3 | * 4 | * Copyright (c) 2018-2022, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "asmdefs.h" 9 | 10 | .arch armv8-a+sve 11 | 12 | /* Assumptions: 13 | * 14 | * ARMv8-a, AArch64 15 | * SVE Available. 16 | */ 17 | 18 | /* To build as stpcpy, define BUILD_STPCPY before compiling this file. */ 19 | #ifdef BUILD_STPCPY 20 | #define FUNC __stpcpy_aarch64_sve 21 | #else 22 | #define FUNC __strcpy_aarch64_sve 23 | #endif 24 | 25 | ENTRY (FUNC) 26 | setffr /* initialize FFR */ 27 | ptrue p2.b, all /* all ones; loop invariant */ 28 | mov x2, 0 /* initialize offset */ 29 | 30 | .p2align 4 31 | /* Read a vector's worth of bytes, stopping on first fault. */ 32 | 0: ldff1b z0.b, p2/z, [x1, x2] 33 | rdffrs p0.b, p2/z 34 | b.nlast 1f 35 | 36 | /* First fault did not fail: the whole vector is valid. 37 | Avoid depending on the contexts of FFR beyond the branch. */ 38 | cmpeq p1.b, p2/z, z0.b, 0 /* search for zeros */ 39 | b.any 2f 40 | 41 | /* No zero found. Store the whole vector and loop. */ 42 | st1b z0.b, p2, [x0, x2] 43 | incb x2, all 44 | b 0b 45 | 46 | /* First fault failed: only some of the vector is valid. 47 | Perform the comparison only on the valid bytes. */ 48 | 1: cmpeq p1.b, p0/z, z0.b, 0 /* search for zeros */ 49 | b.any 2f 50 | 51 | /* No zero found. Store the valid portion of the vector and loop. */ 52 | setffr /* re-init FFR */ 53 | st1b z0.b, p0, [x0, x2] 54 | incp x2, p0.b 55 | b 0b 56 | 57 | /* Zero found. Crop the vector to the found zero and finish. */ 58 | 2: brka p0.b, p2/z, p1.b 59 | st1b z0.b, p0, [x0, x2] 60 | #ifdef BUILD_STPCPY 61 | add x0, x0, x2 62 | sub x0, x0, 1 63 | incp x0, p0.b 64 | #endif 65 | ret 66 | 67 | END (FUNC) 68 | -------------------------------------------------------------------------------- /string/aarch64/experimental/strlen-sve.S: -------------------------------------------------------------------------------- 1 | /* 2 | * __strlen_aarch64_sve - compute the length of a string 3 | * 4 | * Copyright (c) 2018-2022, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "asmdefs.h" 9 | 10 | .arch armv8-a+sve 11 | 12 | /* Assumptions: 13 | * 14 | * ARMv8-a, AArch64 15 | * SVE Available. 16 | */ 17 | 18 | ENTRY (__strlen_aarch64_sve) 19 | setffr /* initialize FFR */ 20 | ptrue p2.b /* all ones; loop invariant */ 21 | mov x1, 0 /* initialize length */ 22 | 23 | /* Read a vector's worth of bytes, stopping on first fault. */ 24 | .p2align 4 25 | 0: ldff1b z0.b, p2/z, [x0, x1] 26 | rdffrs p0.b, p2/z 27 | b.nlast 2f 28 | 29 | /* First fault did not fail: the whole vector is valid. 30 | Avoid depending on the contents of FFR beyond the branch. */ 31 | incb x1, all /* speculate increment */ 32 | cmpeq p1.b, p2/z, z0.b, 0 /* loop if no zeros */ 33 | b.none 0b 34 | decb x1, all /* undo speculate */ 35 | 36 | /* Zero found. Select the bytes before the first and count them. */ 37 | 1: brkb p0.b, p2/z, p1.b 38 | incp x1, p0.b 39 | mov x0, x1 40 | ret 41 | 42 | /* First fault failed: only some of the vector is valid. 43 | Perform the comparison only on the valid bytes. */ 44 | 2: cmpeq p1.b, p0/z, z0.b, 0 45 | b.any 1b 46 | 47 | /* No zero found. Re-init FFR, increment, and loop. */ 48 | setffr 49 | incp x1, p0.b 50 | b 0b 51 | 52 | END (__strlen_aarch64_sve) 53 | -------------------------------------------------------------------------------- /string/aarch64/experimental/strnlen-sve.S: -------------------------------------------------------------------------------- 1 | /* 2 | * strnlen - calculate the length of a string with limit. 3 | * 4 | * Copyright (c) 2019-2022, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "asmdefs.h" 9 | 10 | .arch armv8-a+sve 11 | 12 | /* Assumptions: 13 | * 14 | * ARMv8-a, AArch64 15 | * SVE Available. 16 | */ 17 | 18 | ENTRY (__strnlen_aarch64_sve) 19 | setffr /* initialize FFR */ 20 | mov x2, 0 /* initialize len */ 21 | b 1f 22 | 23 | .p2align 4 24 | /* We have off + vl <= max, and so may read the whole vector. */ 25 | 0: ldff1b z0.b, p0/z, [x0, x2] 26 | rdffrs p1.b, p0/z 27 | b.nlast 2f 28 | 29 | /* First fault did not fail: the whole vector is valid. 30 | Avoid depending on the contents of FFR beyond the branch. */ 31 | cmpeq p2.b, p0/z, z0.b, 0 32 | b.any 8f 33 | incb x2 34 | 35 | 1: whilelo p0.b, x2, x1 36 | b.last 0b 37 | 38 | /* We have off + vl < max. Test for off == max before proceeding. */ 39 | b.none 9f 40 | 41 | ldff1b z0.b, p0/z, [x0, x2] 42 | rdffrs p1.b, p0/z 43 | b.nlast 2f 44 | 45 | /* First fault did not fail: the vector up to max is valid. 46 | Avoid depending on the contents of FFR beyond the branch. 47 | Compare for end-of-string, but there are no more bytes. */ 48 | cmpeq p2.b, p0/z, z0.b, 0 49 | 50 | /* Found end-of-string or zero. */ 51 | 8: brkb p2.b, p0/z, p2.b 52 | mov x0, x2 53 | incp x0, p2.b 54 | ret 55 | 56 | /* First fault failed: only some of the vector is valid. 57 | Perform the comparison only on the valid bytes. */ 58 | 2: cmpeq p2.b, p1/z, z0.b, 0 59 | b.any 8b 60 | 61 | /* No inequality or zero found. Re-init FFR, incr and loop. */ 62 | setffr 63 | incp x2, p1.b 64 | b 1b 65 | 66 | /* End of count. Return max. */ 67 | 9: mov x0, x1 68 | ret 69 | 70 | END (__strnlen_aarch64_sve) 71 | -------------------------------------------------------------------------------- /string/aarch64/memcpy-mops.S: -------------------------------------------------------------------------------- 1 | /* 2 | * memcpy using MOPS extension. 3 | * 4 | * Copyright (c) 2023, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "asmdefs.h" 9 | 10 | ENTRY (__memcpy_aarch64_mops) 11 | mov x3, x0 12 | .inst 0x19010443 /* cpyfp [x3]!, [x1]!, x2! */ 13 | .inst 0x19410443 /* cpyfm [x3]!, [x1]!, x2! */ 14 | .inst 0x19810443 /* cpyfe [x3]!, [x1]!, x2! */ 15 | ret 16 | 17 | END (__memcpy_aarch64_mops) 18 | -------------------------------------------------------------------------------- /string/aarch64/memmove-mops.S: -------------------------------------------------------------------------------- 1 | /* 2 | * memmove using MOPS extension. 3 | * 4 | * Copyright (c) 2023, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "asmdefs.h" 9 | 10 | ENTRY (__memmove_aarch64_mops) 11 | mov x3, x0 12 | .inst 0x1d010443 /* cpyp [x3]!, [x1]!, x2! */ 13 | .inst 0x1d410443 /* cpym [x3]!, [x1]!, x2! */ 14 | .inst 0x1d810443 /* cpye [x3]!, [x1]!, x2! */ 15 | ret 16 | 17 | END (__memmove_aarch64_mops) 18 | -------------------------------------------------------------------------------- /string/aarch64/memset-mops.S: -------------------------------------------------------------------------------- 1 | /* 2 | * memset using MOPS extension. 3 | * 4 | * Copyright (c) 2023, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include "asmdefs.h" 9 | 10 | ENTRY (__memset_aarch64_mops) 11 | mov x3, x0 12 | .inst 0x19c10443 /* setp [x3]!, x2!, x1 */ 13 | .inst 0x19c14443 /* setm [x3]!, x2!, x1 */ 14 | .inst 0x19c18443 /* sete [x3]!, x2!, x1 */ 15 | ret 16 | 17 | END (__memset_aarch64_mops) 18 | -------------------------------------------------------------------------------- /string/aarch64/stpcpy.S: -------------------------------------------------------------------------------- 1 | /* 2 | * stpcpy - copy a string returning pointer to end. 3 | * 4 | * Copyright (c) 2020, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #define BUILD_STPCPY 1 9 | 10 | #include "strcpy.S" 11 | -------------------------------------------------------------------------------- /string/arm/check-arch.S: -------------------------------------------------------------------------------- 1 | /* 2 | * check ARCH setting. 3 | * 4 | * Copyright (c) 2020-2022, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #if !__arm__ 9 | # error ARCH setting does not match the compiler. 10 | #endif 11 | 12 | /* For attributes that may affect ABI. */ 13 | #include "asmdefs.h" 14 | -------------------------------------------------------------------------------- /string/include/benchlib.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Benchmark support functions. 3 | * 4 | * Copyright (c) 2020, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | /* Fast and accurate timer returning nanoseconds. */ 12 | static inline uint64_t 13 | clock_get_ns (void) 14 | { 15 | struct timespec ts; 16 | clock_gettime (CLOCK_MONOTONIC, &ts); 17 | return ts.tv_sec * (uint64_t) 1000000000 + ts.tv_nsec; 18 | } 19 | 20 | /* Fast 32-bit random number generator. Passing a non-zero seed 21 | value resets the internal state. */ 22 | static inline uint32_t 23 | rand32 (uint32_t seed) 24 | { 25 | static uint64_t state = 0xb707be451df0bb19ULL; 26 | if (seed != 0) 27 | state = seed; 28 | uint32_t res = state >> 32; 29 | state = state * 6364136223846793005ULL + 1; 30 | return res; 31 | } 32 | 33 | /* Macros to run a benchmark BENCH using string function FN. */ 34 | #define RUN(BENCH, FN) BENCH(#FN, FN) 35 | 36 | #if __aarch64__ 37 | # define RUNA64(BENCH, FN) BENCH(#FN, FN) 38 | #else 39 | # define RUNA64(BENCH, FN) 40 | #endif 41 | 42 | #if __ARM_FEATURE_SVE 43 | # define RUNSVE(BENCH, FN) BENCH(#FN, FN) 44 | #else 45 | # define RUNSVE(BENCH, FN) 46 | #endif 47 | 48 | #if WANT_MOPS 49 | # define RUNMOPS(BENCH, FN) BENCH(#FN, FN) 50 | #else 51 | # define RUNMOPS(BENCH, FN) 52 | #endif 53 | 54 | #if __arm__ 55 | # define RUNA32(BENCH, FN) BENCH(#FN, FN) 56 | #else 57 | # define RUNA32(BENCH, FN) 58 | #endif 59 | 60 | #if __arm__ && __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2 61 | # define RUNT32(BENCH, FN) BENCH(#FN, FN) 62 | #else 63 | # define RUNT32(BENCH, FN) 64 | #endif 65 | -------------------------------------------------------------------------------- /string/test/stringtest.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Common string test code. 3 | * 4 | * Copyright (c) 2020, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | /* Accounting errors for a test case. */ 12 | static int err_count; 13 | #define ERR_LIMIT 10 14 | #define ERR(...) (err_count++, printf (__VA_ARGS__)) 15 | 16 | static inline void 17 | quotechar (unsigned char c) 18 | { 19 | if (isprint (c)) 20 | putchar (c); 21 | else 22 | printf ("\\x%02x", c); 23 | } 24 | 25 | /* quoted print around at or the entire string if at < 0. */ 26 | static void 27 | quoteat (const char *prefix, const void *p, int len, int at) 28 | { 29 | static const int CTXLEN = 15; 30 | int i; 31 | const char *pre = "\""; 32 | const char *post = "\""; 33 | const char *s = p; 34 | if (at > CTXLEN) 35 | { 36 | s += at - CTXLEN; 37 | len -= at - CTXLEN; 38 | pre = "...\""; 39 | } 40 | if (at >= 0 && len > 2 * CTXLEN + 1) 41 | { 42 | len = 2 * CTXLEN + 1; 43 | post = "\"..."; 44 | } 45 | printf ("%4s: %s", prefix, pre); 46 | for (i = 0; i < len; i++) 47 | quotechar (s[i]); 48 | printf ("%s\n", post); 49 | } 50 | 51 | static inline void 52 | quote (const char *prefix, const void *p, int len) 53 | { 54 | quoteat (prefix, p, len, -1); 55 | } 56 | -------------------------------------------------------------------------------- /string/x86_64/check-arch.S: -------------------------------------------------------------------------------- 1 | /* 2 | * check ARCH setting. 3 | * 4 | * Copyright (c) 2020, Arm Limited. 5 | * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 | */ 7 | 8 | #if !__x86_64__ 9 | # error ARCH setting does not match the compiler. 10 | #endif 11 | --------------------------------------------------------------------------------