├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── MAINTAINERS.md
├── Makefile
├── README.md
├── config.mk.dist
├── contributor-agreement.pdf
├── fp
    ├── Dir.mk
    ├── README.md
    ├── armv6-m
    │   ├── faddsub.S
    │   ├── fcmp.h
    │   ├── fcmp_bool_eq.S
    │   ├── fcmp_bool_ge.S
    │   ├── fcmp_bool_gt.S
    │   ├── fcmp_bool_le.S
    │   ├── fcmp_bool_lt.S
    │   ├── fcmp_bool_un.S
    │   ├── fcmp_flags.S
    │   ├── fcmp_flags_eq.S
    │   ├── fcmp_flags_rev.S
    │   └── fmul.S
    ├── at32
    │   ├── f2iz.S
    │   ├── f2lz.S
    │   ├── f2uiz.S
    │   ├── f2ulz.S
    │   ├── faddsub.S
    │   ├── fcmp.h
    │   ├── fcmp_bool_eq.S
    │   ├── fcmp_bool_ge.S
    │   ├── fcmp_bool_gt.S
    │   ├── fcmp_bool_le.S
    │   ├── fcmp_bool_lt.S
    │   ├── fcmp_bool_un.S
    │   ├── fcmp_flags.S
    │   ├── fcmp_flags_eq.S
    │   ├── fcmp_flags_rev.S
    │   ├── fdiv.S
    │   ├── fmul.S
    │   ├── i2f.S
    │   ├── l2f.S
    │   └── ui2f.S
    ├── common
    │   ├── fnan2.c
    │   ├── fnorm2.c
    │   └── funder.c
    ├── include
    │   └── endian.h
    └── test
    │   ├── test-f2iz.c
    │   ├── test-f2lz.c
    │   ├── test-f2uiz.c
    │   ├── test-f2ulz.c
    │   ├── test-faddsub.c
    │   ├── test-fcmp.c
    │   ├── test-fdiv.c
    │   ├── test-fmul.c
    │   ├── test-i2f.c
    │   ├── test-l2f.c
    │   ├── test-ui2f.c
    │   └── test-ul2f.c
├── math
    ├── Dir.mk
    ├── README.md
    ├── aarch64
    │   ├── advsimd
    │   │   ├── acos.c
    │   │   ├── acosf.c
    │   │   ├── acosh.c
    │   │   ├── acoshf.c
    │   │   ├── acospi.c
    │   │   ├── acospif.c
    │   │   ├── asin.c
    │   │   ├── asinf.c
    │   │   ├── asinh.c
    │   │   ├── asinhf.c
    │   │   ├── asinpi.c
    │   │   ├── asinpif.c
    │   │   ├── atan.c
    │   │   ├── atan2.c
    │   │   ├── atan2f.c
    │   │   ├── atan2pi.c
    │   │   ├── atan2pif.c
    │   │   ├── atanf.c
    │   │   ├── atanh.c
    │   │   ├── atanhf.c
    │   │   ├── atanpi.c
    │   │   ├── atanpif.c
    │   │   ├── cbrt.c
    │   │   ├── cbrtf.c
    │   │   ├── cexpi.c
    │   │   ├── cexpif.c
    │   │   ├── cos.c
    │   │   ├── cosf.c
    │   │   ├── cosh.c
    │   │   ├── coshf.c
    │   │   ├── cospi.c
    │   │   ├── cospif.c
    │   │   ├── erf.c
    │   │   ├── erfc.c
    │   │   ├── erfcf.c
    │   │   ├── erff.c
    │   │   ├── exp.c
    │   │   ├── exp10.c
    │   │   ├── exp10f.c
    │   │   ├── exp2.c
    │   │   ├── exp2f.c
    │   │   ├── exp2f_1u.c
    │   │   ├── exp2m1f.c
    │   │   ├── expf.c
    │   │   ├── expf_1u.c
    │   │   ├── expm1.c
    │   │   ├── expm1f.c
    │   │   ├── finite_pow.h
    │   │   ├── hypot.c
    │   │   ├── hypotf.c
    │   │   ├── log.c
    │   │   ├── log10.c
    │   │   ├── log10f.c
    │   │   ├── log1p.c
    │   │   ├── log1pf.c
    │   │   ├── log2.c
    │   │   ├── log2f.c
    │   │   ├── logf.c
    │   │   ├── modf.c
    │   │   ├── modff.c
    │   │   ├── pow.c
    │   │   ├── powf.c
    │   │   ├── sin.c
    │   │   ├── sincos.c
    │   │   ├── sincosf.c
    │   │   ├── sincospi.c
    │   │   ├── sincospif.c
    │   │   ├── sinf.c
    │   │   ├── sinh.c
    │   │   ├── sinhf.c
    │   │   ├── sinpi.c
    │   │   ├── sinpif.c
    │   │   ├── tan.c
    │   │   ├── tanf.c
    │   │   ├── tanh.c
    │   │   ├── tanhf.c
    │   │   ├── tanpi.c
    │   │   ├── tanpif.c
    │   │   ├── v_expf_inline.h
    │   │   ├── v_expm1_inline.h
    │   │   ├── v_expm1f_inline.h
    │   │   ├── v_log1p_inline.h
    │   │   ├── v_log1pf_inline.h
    │   │   ├── v_log_inline.h
    │   │   ├── v_math.h
    │   │   ├── v_poly_f32.h
    │   │   ├── v_poly_f64.h
    │   │   ├── v_sincos_common.h
    │   │   ├── v_sincosf_common.h
    │   │   ├── v_sincospi_common.h
    │   │   └── v_sincospif_common.h
    │   ├── cospi_3u5.c
    │   ├── cospif_2u6.c
    │   ├── experimental
    │   │   ├── README.contributors
    │   │   ├── acos_2u.c
    │   │   ├── acosf_1u4.c
    │   │   ├── acosh_3u.c
    │   │   ├── acoshf_2u8.c
    │   │   ├── advsimd
    │   │   │   ├── cosf_umax.c
    │   │   │   ├── erfinv_25u.c
    │   │   │   ├── erfinvf_5u.c
    │   │   │   ├── expf_umax.c
    │   │   │   ├── powf_umax.c
    │   │   │   ├── sinf_umax.c
    │   │   │   └── v_logf_inline.h
    │   │   ├── asin_3u.c
    │   │   ├── asin_data.c
    │   │   ├── asinf_2u5.c
    │   │   ├── asinf_data.c
    │   │   ├── asinh_2u5.c
    │   │   ├── asinh_data.c
    │   │   ├── asinhf_3u5.c
    │   │   ├── asinhf_data.c
    │   │   ├── atan2_2u5.c
    │   │   ├── atan2f_3u.c
    │   │   ├── atan_2u5.c
    │   │   ├── atan_common.h
    │   │   ├── atan_data.c
    │   │   ├── atanf_2u9.c
    │   │   ├── atanf_common.h
    │   │   ├── atanf_data.c
    │   │   ├── atanh_3u.c
    │   │   ├── atanhf_3u1.c
    │   │   ├── cbrt_2u.c
    │   │   ├── cbrt_data.c
    │   │   ├── cbrtf_1u5.c
    │   │   ├── cbrtf_data.c
    │   │   ├── cosh_2u.c
    │   │   ├── coshf_1u9.c
    │   │   ├── erf_2u5.c
    │   │   ├── erfc_1u8.c
    │   │   ├── erfcf_1u7.c
    │   │   ├── erff_2u.c
    │   │   ├── erfinv_24u5.c
    │   │   ├── erfinvf_4u7.c
    │   │   ├── erfinvl.c
    │   │   ├── exp_inline.h
    │   │   ├── expf_data.c
    │   │   ├── expm1_2u5.c
    │   │   ├── expm1_data.c
    │   │   ├── expm1f_1u6.c
    │   │   ├── expm1f_data.c
    │   │   ├── log10_2u.c
    │   │   ├── log10_data.c
    │   │   ├── log1p_2u.c
    │   │   ├── log1p_data.c
    │   │   ├── log1pf_2u1.c
    │   │   ├── log1pf_data.c
    │   │   ├── sinh_3u.c
    │   │   ├── sinhf_2u3.c
    │   │   ├── sve
    │   │   │   ├── cosf_umax.c
    │   │   │   ├── erfinv_25u.c
    │   │   │   ├── erfinvf_5u.c
    │   │   │   ├── expf_umax.c
    │   │   │   ├── powf_umax.c
    │   │   │   ├── powi.c
    │   │   │   ├── powif.c
    │   │   │   ├── sinf_umax.c
    │   │   │   ├── sv_expf_inline.h
    │   │   │   └── sv_logf_inline.h
    │   │   ├── tanf_3u3.c
    │   │   ├── tanf_data.c
    │   │   ├── tanh_3u.c
    │   │   └── tanhf_2u6.c
    │   ├── sincospi_4u.c
    │   ├── sincospif_3u2.c
    │   ├── sinpi_3u5.c
    │   ├── sinpif_2u5.c
    │   ├── sve
    │   │   ├── acos.c
    │   │   ├── acosf.c
    │   │   ├── acosh.c
    │   │   ├── acoshf.c
    │   │   ├── acospi.c
    │   │   ├── acospif.c
    │   │   ├── asin.c
    │   │   ├── asinf.c
    │   │   ├── asinh.c
    │   │   ├── asinhf.c
    │   │   ├── asinpi.c
    │   │   ├── asinpif.c
    │   │   ├── atan.c
    │   │   ├── atan2.c
    │   │   ├── atan2f.c
    │   │   ├── atan2pi.c
    │   │   ├── atan2pif.c
    │   │   ├── atanf.c
    │   │   ├── atanh.c
    │   │   ├── atanhf.c
    │   │   ├── atanpi.c
    │   │   ├── atanpif.c
    │   │   ├── cbrt.c
    │   │   ├── cbrtf.c
    │   │   ├── cexpi.c
    │   │   ├── cexpif.c
    │   │   ├── cos.c
    │   │   ├── cosf.c
    │   │   ├── cosh.c
    │   │   ├── coshf.c
    │   │   ├── cospi.c
    │   │   ├── cospif.c
    │   │   ├── erf.c
    │   │   ├── erfc.c
    │   │   ├── erfcf.c
    │   │   ├── erff.c
    │   │   ├── exp.c
    │   │   ├── exp10.c
    │   │   ├── exp10f.c
    │   │   ├── exp2.c
    │   │   ├── exp2f.c
    │   │   ├── exp2m1f.c
    │   │   ├── expf.c
    │   │   ├── expm1.c
    │   │   ├── expm1f.c
    │   │   ├── hypot.c
    │   │   ├── hypotf.c
    │   │   ├── log.c
    │   │   ├── log10.c
    │   │   ├── log10f.c
    │   │   ├── log1p.c
    │   │   ├── log1pf.c
    │   │   ├── log2.c
    │   │   ├── log2f.c
    │   │   ├── logf.c
    │   │   ├── modf.c
    │   │   ├── modff.c
    │   │   ├── pow.c
    │   │   ├── powf.c
    │   │   ├── sin.c
    │   │   ├── sincos.c
    │   │   ├── sincosf.c
    │   │   ├── sincospi.c
    │   │   ├── sincospif.c
    │   │   ├── sinf.c
    │   │   ├── sinh.c
    │   │   ├── sinhf.c
    │   │   ├── sinpi.c
    │   │   ├── sinpif.c
    │   │   ├── sv_expf_inline.h
    │   │   ├── sv_expm1f_inline.h
    │   │   ├── sv_log1p_inline.h
    │   │   ├── sv_log1pf_inline.h
    │   │   ├── sv_log_inline.h
    │   │   ├── sv_math.h
    │   │   ├── sv_poly_f32.h
    │   │   ├── sv_poly_f64.h
    │   │   ├── sv_poly_generic.h
    │   │   ├── sv_sincos_common.h
    │   │   ├── sv_sincosf_common.h
    │   │   ├── sv_sincospi_common.h
    │   │   ├── sv_sincospif_common.h
    │   │   ├── tan.c
    │   │   ├── tanf.c
    │   │   ├── tanh.c
    │   │   ├── tanhf.c
    │   │   ├── tanpi.c
    │   │   └── tanpif.c
    │   ├── tanpi_2u5.c
    │   ├── tanpif_3u1.c
    │   ├── v_erf_data.c
    │   ├── v_erfc_data.c
    │   ├── v_erfcf_data.c
    │   ├── v_erff_data.c
    │   ├── v_exp_data.c
    │   ├── v_exp_tail_data.c
    │   ├── v_log10_data.c
    │   ├── v_log2_data.c
    │   ├── v_log_data.c
    │   ├── v_pow_exp_data.c
    │   ├── v_pow_log_data.c
    │   └── v_powf_data.c
    ├── cosf.c
    ├── erf.c
    ├── erf_data.c
    ├── erff.c
    ├── erff_data.c
    ├── exp.c
    ├── exp10.c
    ├── exp2.c
    ├── exp2f.c
    ├── exp2f_data.c
    ├── exp_data.c
    ├── expf.c
    ├── include
    │   ├── mathlib.h
    │   ├── test_defs.h
    │   └── test_sig.h
    ├── log.c
    ├── log10f.c
    ├── log2.c
    ├── log2_data.c
    ├── log2f.c
    ├── log2f_data.c
    ├── log_data.c
    ├── logf.c
    ├── logf_data.c
    ├── math_config.h
    ├── math_err.c
    ├── math_errf.c
    ├── poly_generic.h
    ├── poly_scalar_f32.h
    ├── poly_scalar_f64.h
    ├── pow.c
    ├── pow_log_data.c
    ├── powf.c
    ├── powf_log2_data.c
    ├── sincosf.c
    ├── sincosf.h
    ├── sincosf_data.c
    ├── sinf.c
    ├── test
    │   ├── c23_references.h
    │   ├── mathbench.c
    │   ├── mathbench_funcs.h
    │   ├── mathbench_wrappers.h
    │   ├── mathtest.c
    │   ├── rtest
    │   │   ├── dotest.c
    │   │   ├── intern.h
    │   │   ├── main.c
    │   │   ├── random.c
    │   │   ├── random.h
    │   │   ├── semi.c
    │   │   ├── semi.h
    │   │   ├── types.h
    │   │   ├── wrappers.c
    │   │   └── wrappers.h
    │   ├── runulp.sh
    │   ├── test_defs.h
    │   ├── testcases
    │   │   ├── directed
    │   │   │   ├── acos.tst
    │   │   │   ├── acosf.tst
    │   │   │   ├── acosh.tst
    │   │   │   ├── acoshf.tst
    │   │   │   ├── asin.tst
    │   │   │   ├── asinf.tst
    │   │   │   ├── asinh.tst
    │   │   │   ├── asinhf.tst
    │   │   │   ├── atan.tst
    │   │   │   ├── atan2.tst
    │   │   │   ├── atan2f.tst
    │   │   │   ├── atanf.tst
    │   │   │   ├── atanh.tst
    │   │   │   ├── atanhf.tst
    │   │   │   ├── cbrtf.tst
    │   │   │   ├── cosf.tst
    │   │   │   ├── cosh.tst
    │   │   │   ├── coshf.tst
    │   │   │   ├── erf.tst
    │   │   │   ├── erfc.tst
    │   │   │   ├── erfcf.tst
    │   │   │   ├── erff.tst
    │   │   │   ├── exp.tst
    │   │   │   ├── exp10.tst
    │   │   │   ├── exp2.tst
    │   │   │   ├── exp2f.tst
    │   │   │   ├── expf.tst
    │   │   │   ├── expm1.tst
    │   │   │   ├── expm1f.tst
    │   │   │   ├── log.tst
    │   │   │   ├── log10.tst
    │   │   │   ├── log10f.tst
    │   │   │   ├── log1p.tst
    │   │   │   ├── log1pf.tst
    │   │   │   ├── log2.tst
    │   │   │   ├── log2f.tst
    │   │   │   ├── logf.tst
    │   │   │   ├── pow.tst
    │   │   │   ├── powf.tst
    │   │   │   ├── sincosf.tst
    │   │   │   ├── sinf.tst
    │   │   │   ├── sinh.tst
    │   │   │   ├── sinhf.tst
    │   │   │   ├── tanf.tst
    │   │   │   ├── tanh.tst
    │   │   │   └── tanhf.tst
    │   │   └── random
    │   │   │   ├── double.tst
    │   │   │   └── float.tst
    │   ├── traces
    │   │   ├── exp.txt
    │   │   └── sincosf.txt
    │   ├── ulp.c
    │   ├── ulp.h
    │   ├── ulp_funcs.h
    │   └── ulp_wrappers.h
    ├── tgamma128.c
    ├── tgamma128.h
    └── tools
    │   ├── asin.sollya
    │   ├── asinf.sollya
    │   ├── asinh.sollya
    │   ├── asinhf.sollya
    │   ├── asinpif.sollya
    │   ├── atan.sollya
    │   ├── atanf.sollya
    │   ├── atanpi.sollya
    │   ├── cbrt.sollya
    │   ├── cbrtf.sollya
    │   ├── cos.sollya
    │   ├── erf.sollya
    │   ├── erfc.sollya
    │   ├── erfcf.sollya
    │   ├── erff.sollya
    │   ├── exp.sollya
    │   ├── exp10.sollya
    │   ├── exp10f.sollya
    │   ├── exp2.sollya
    │   ├── exp2f.sollya
    │   ├── exp2m1f.sollya
    │   ├── expm1.sollya
    │   ├── expm1f.sollya
    │   ├── log.sollya
    │   ├── log10.sollya
    │   ├── log10f.sollya
    │   ├── log1p.sollya
    │   ├── log1pf.sollya
    │   ├── log2.sollya
    │   ├── log2_abs.sollya
    │   ├── log_abs.sollya
    │   ├── plot.py
    │   ├── remez.jl
    │   ├── sin.sollya
    │   ├── sincos.sollya
    │   ├── sincosf.sollya
    │   ├── sinpi.sollya
    │   ├── tan.sollya
    │   ├── tanf.sollya
    │   ├── tanpi.sollya
    │   ├── tgamma128_gen.jl
    │   ├── v_erf.sollya
    │   ├── v_erfc.sollya
    │   ├── v_exp.sollya
    │   ├── v_log.sollya
    │   ├── v_log10.sollya
    │   ├── v_log10f.sollya
    │   ├── v_log2f.sollya
    │   └── v_sin.sollya
├── networking
    ├── Dir.mk
    ├── README.md
    ├── aarch64
    │   └── chksum_simd.c
    ├── arm
    │   └── chksum_simd.c
    ├── chksum.c
    ├── chksum_common.h
    ├── include
    │   └── networking.h
    └── test
    │   └── chksum.c
└── string
    ├── Dir.mk
    ├── README.md
    ├── aarch64
        ├── __mtag_tag_region.S
        ├── __mtag_tag_zero_region.S
        ├── asmdefs.h
        ├── check-arch.S
        ├── experimental
        │   ├── memchr-sve.S
        │   ├── memcmp-sve.S
        │   ├── stpcpy-sve.S
        │   ├── strchr-sve.S
        │   ├── strchrnul-sve.S
        │   ├── strcmp-sve.S
        │   ├── strcpy-sve.S
        │   ├── strlen-sve.S
        │   ├── strncmp-sve.S
        │   ├── strnlen-sve.S
        │   └── strrchr-sve.S
        ├── memchr-mte.S
        ├── memchr.S
        ├── memcmp.S
        ├── memcpy-advsimd.S
        ├── memcpy-mops.S
        ├── memcpy-sve.S
        ├── memcpy.S
        ├── memmove-mops.S
        ├── memrchr.S
        ├── memset-mops.S
        ├── memset-sve.S
        ├── memset.S
        ├── stpcpy.S
        ├── strchr-mte.S
        ├── strchr.S
        ├── strchrnul-mte.S
        ├── strchrnul.S
        ├── strcmp.S
        ├── strcpy.S
        ├── strlen-mte.S
        ├── strlen.S
        ├── strncmp.S
        ├── strnlen.S
        ├── strrchr-mte.S
        └── strrchr.S
    ├── arm
        ├── asmdefs.h
        ├── check-arch.S
        ├── memchr.S
        ├── memcpy.S
        ├── memset.S
        ├── strcmp-armv6m.S
        ├── strcmp.S
        ├── strcpy.c
        └── strlen-armv6t2.S
    ├── bench
        ├── memcpy.c
        ├── memset.c
        └── strlen.c
    ├── include
        ├── benchlib.h
        └── stringlib.h
    ├── test
        ├── __mtag_tag_region.c
        ├── __mtag_tag_zero_region.c
        ├── memchr.c
        ├── memcmp.c
        ├── memcpy.c
        ├── memmove.c
        ├── memrchr.c
        ├── memset.c
        ├── mte.h
        ├── stpcpy.c
        ├── strchr.c
        ├── strchrnul.c
        ├── strcmp.c
        ├── strcpy.c
        ├── stringtest.h
        ├── strlen.c
        ├── strncmp.c
        ├── strnlen.c
        └── strrchr.c
    └── x86_64
        └── check-arch.S


/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | .DS_Store
3 | config.mk
4 | 


--------------------------------------------------------------------------------
/MAINTAINERS.md:
--------------------------------------------------------------------------------
 1 | # Maintainers
 2 | 
 3 | ## Gatekeeper and Global Maintainer
 4 | 
 5 | - Tamar Christina <[tamar.christina@arm.com](mailto:tamar.christina@arm.com)>
 6 | 
 7 | ## Sub-Project Maintainers
 8 | 
 9 | ### FP
10 | 
11 | - Simon Tatham <[simon.tatham@arm.com](mailto:simon.tatham@arm.com)>
12 | - Peter Smith <[peter.smith@arm.com](mailto:peter.smith@arm.com)>
13 | - John Brawn <[john.brawn@arm.com](mailto:john.brawn@arm.com)>
14 | - Kristof Beyls <[kristof.beyls@arm.com](mailto:kristof.beyls@arm.com)>
15 | 
16 | ### Math
17 | 
18 | - Pierre Blanchard <[pierre.blanchard@arm.com](mailto:pierre.blanchard@arm.com)>
19 | - Dylan Fleming <[dylan.fleming@arm.com](mailto:dylan.fleming@arm.com)>
20 | - Joe Ramsay <[joe.ramsay@arm.com](mailto:joe.ramsay@arm.com)>
21 | 
22 | ### Networking
23 | 
24 | - Ola Liljedahl <[ola.liljedahl@arm.com](mailto:ola.liljedahl@arm.com)>
25 | 
26 | ### String
27 | 
28 | - Wilco Dijkstra <[wilco.dijkstra@arm.com](mailto:wilco.dijkstra@arm.com)>
29 | 


--------------------------------------------------------------------------------
/contributor-agreement.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/optimized-routines/367a58e1b24f5931a77d68500ed7c131495c1dc4/contributor-agreement.pdf


--------------------------------------------------------------------------------
/fp/armv6-m/fcmp_bool_eq.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a == b.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 |   BEQ 0f
14 |   MOVS r0, #0
15 |   BX lr
16 | 0:
17 |   MOVS r0, #1
18 |   BX lr
19 | .endm
20 | 
21 |   .globl arm_fp_fcmp_bool_eq
22 |   .type arm_fp_fcmp_bool_eq,%function
23 | arm_fp_fcmp_bool_eq:
24 |   #include "fcmp.h"
25 | 
26 | fcmp_NaN:
27 |   // If any NaN is involved, the return value for this comparison is false.
28 |   MOVS r0, #0
29 |   BX lr
30 | 
31 |   .size arm_fp_fcmp_bool_eq, .-arm_fp_fcmp_bool_eq
32 | 


--------------------------------------------------------------------------------
/fp/armv6-m/fcmp_bool_ge.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a >= b.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 |   BHS 0f
14 |   MOVS r0, #0
15 |   BX lr
16 | 0:
17 |   MOVS r0, #1
18 |   BX lr
19 | .endm
20 | 
21 |   .globl arm_fp_fcmp_bool_ge
22 |   .type arm_fp_fcmp_bool_ge,%function
23 | arm_fp_fcmp_bool_ge:
24 |   #include "fcmp.h"
25 | 
26 | fcmp_NaN:
27 |   // If any NaN is involved, the return value for this comparison is false.
28 |   MOVS r0, #0
29 |   BX lr
30 | 
31 |   .size arm_fp_fcmp_bool_ge, .-arm_fp_fcmp_bool_ge
32 | 


--------------------------------------------------------------------------------
/fp/armv6-m/fcmp_bool_gt.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a > b.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 |   BHI 0f
14 |   MOVS r0, #0
15 |   BX lr
16 | 0:
17 |   MOVS r0, #1
18 |   BX lr
19 | .endm
20 | 
21 |   .globl arm_fp_fcmp_bool_gt
22 |   .type arm_fp_fcmp_bool_gt,%function
23 | arm_fp_fcmp_bool_gt:
24 |   #include "fcmp.h"
25 | 
26 | fcmp_NaN:
27 |   // If any NaN is involved, the return value for this comparison is false.
28 |   MOVS r0, #0
29 |   BX lr
30 | 
31 |   .size arm_fp_fcmp_bool_gt, .-arm_fp_fcmp_bool_gt
32 | 


--------------------------------------------------------------------------------
/fp/armv6-m/fcmp_bool_le.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a <= b.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 |   BLS 0f
14 |   MOVS r0, #0
15 |   BX lr
16 | 0:
17 |   MOVS r0, #1
18 |   BX lr
19 | .endm
20 | 
21 |   .globl arm_fp_fcmp_bool_le
22 |   .type arm_fp_fcmp_bool_le,%function
23 | arm_fp_fcmp_bool_le:
24 |   #include "fcmp.h"
25 | 
26 | fcmp_NaN:
27 |   // If any NaN is involved, the return value for this comparison is false.
28 |   MOVS r0, #0
29 |   BX lr
30 | 
31 |   .size arm_fp_fcmp_bool_le, .-arm_fp_fcmp_bool_le
32 | 


--------------------------------------------------------------------------------
/fp/armv6-m/fcmp_bool_lt.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a < b.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 |   BLO 0f
14 |   MOVS r0, #0
15 |   BX lr
16 | 0:
17 |   MOVS r0, #1
18 |   BX lr
19 | .endm
20 | 
21 |   .globl arm_fp_fcmp_bool_lt
22 |   .type arm_fp_fcmp_bool_lt,%function
23 | arm_fp_fcmp_bool_lt:
24 |   #include "fcmp.h"
25 | 
26 | fcmp_NaN:
27 |   // If any NaN is involved, the return value for this comparison is false.
28 |   MOVS r0, #0
29 |   BX lr
30 | 
31 |   .size arm_fp_fcmp_bool_lt, .-arm_fp_fcmp_bool_lt
32 | 


--------------------------------------------------------------------------------
/fp/armv6-m/fcmp_bool_un.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a and b are
 2 | // 'unordered', i.e. one or both of them is a NaN.
 3 | //
 4 | // Copyright (c) 1994-1998,2025, Arm Limited.
 5 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 | 
 7 |   .syntax unified
 8 |   .text
 9 |   .p2align 2
10 | 
11 |   .globl arm_fp_fcmp_bool_un
12 |   .type arm_fp_fcmp_bool_un,%function
13 | arm_fp_fcmp_bool_un:
14 | 
15 |   // This function isn't based on the general-purpose code in fcmp.h, because
16 |   // it's more effort than needed. Here we just need to identify whether or not
17 |   // there's at least one NaN in the inputs. There's no need to vary that check
18 |   // based on the sign bit, so we might as well just do the NaN test as quickly
19 |   // as possible.
20 |   MOVS    r2, #0xFF
21 |   LSLS    r2, r2, #24
22 |   LSLS    r3, r0, #1
23 |   CMP     r3, r2
24 |   BHI     fcmp_NaN
25 |   LSLS    r3, r1, #1
26 |   CMP     r3, r2
27 |   BHI     fcmp_NaN
28 | 
29 |   // If HS, then we have no NaNs and return false.
30 |   MOVS    r0, #0
31 |   BX      lr
32 | 
33 |   // Otherwise, we have at least one NaN, and return true.
34 | fcmp_NaN:
35 |   MOVS    r0, #1
36 |   BX      lr
37 | 
38 |   .size arm_fp_fcmp_bool_un, .-arm_fp_fcmp_bool_un
39 | 


--------------------------------------------------------------------------------
/fp/armv6-m/fcmp_flags.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning LO / EQ / HI in the flags, with the
 2 | // 'unordered result' case also returning HI.
 3 | //
 4 | // Copyright (c) 1994-1998,2025, Arm Limited.
 5 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 | 
 7 |   .syntax unified
 8 |   .text
 9 |   .p2align 2
10 | 
11 | op0 .req r0
12 | op1 .req r1
13 | .macro SetReturnRegister
14 | .endm
15 | 
16 |   .globl arm_fp_fcmp_flags
17 |   .type arm_fp_fcmp_flags,%function
18 | arm_fp_fcmp_flags:
19 |   #include "fcmp.h"
20 | 
21 | fcmp_NaN:
22 |   // If any NaN is involved, the return value for this comparison is
23 |   // to set the flags as if a > b, so that the HI condition will return true.
24 |   MOVS r0, #1
25 |   CMP r0, #0
26 |   BX lr
27 | 
28 |   .size arm_fp_fcmp_flags, .-arm_fp_fcmp_flags
29 | 


--------------------------------------------------------------------------------
/fp/armv6-m/fcmp_flags_eq.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning EQ / NE in the flags.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 | .endm
14 | 
15 |   .globl arm_fp_fcmp_flags_eq
16 |   .type arm_fp_fcmp_flags_eq,%function
17 | arm_fp_fcmp_flags_eq:
18 |   #include "fcmp.h"
19 | 
20 | fcmp_NaN:
21 |   // If any NaN is involved, the return value for this comparison is false.
22 |   MOVS r0, #1   // the value in r0 is unimportant; this just sets Z=0
23 |   BX lr
24 | 
25 |   .size arm_fp_fcmp_flags_eq, .-arm_fp_fcmp_flags_eq
26 | 


--------------------------------------------------------------------------------
/fp/armv6-m/fcmp_flags_rev.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, with the input operands reversed, returning
 2 | // LO / EQ / HI in the flags, with the 'unordered result' case also returning
 3 | // HI.
 4 | //
 5 | // Copyright (c) 1994-1998,2025, Arm Limited.
 6 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 | 
 8 |   .syntax unified
 9 |   .text
10 |   .p2align 2
11 | 
12 | op0 .req r1
13 | op1 .req r0
14 | .macro SetReturnRegister
15 | .endm
16 | 
17 |   .globl arm_fp_fcmp_flags_rev
18 |   .type arm_fp_fcmp_flags_rev,%function
19 | arm_fp_fcmp_flags_rev:
20 |   #include "fcmp.h"
21 | 
22 | fcmp_NaN:
23 |   // If any NaN is involved, the return value for this comparison is
24 |   // to set the flags as if a > b, so that the HI condition will return true.
25 |   MOVS r0, #1
26 |   CMP r0, #0
27 |   BX lr
28 | 
29 |   .size arm_fp_fcmp_flags_rev, .-arm_fp_fcmp_flags_rev
30 | 


--------------------------------------------------------------------------------
/fp/at32/fcmp_bool_eq.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a == b.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 |   MOV r0, #0
14 |   MOVEQ r0, #1
15 | .endm
16 | 
17 |   .globl arm_fp_fcmp_bool_eq
18 |   .type arm_fp_fcmp_bool_eq,%function
19 | arm_fp_fcmp_bool_eq:
20 |   #include "fcmp.h"
21 | 
22 | fcmp_NaN:
23 |   // If any NaN is involved, the return value for this comparison is false.
24 |   MOV r0, #0
25 |   BX lr
26 | 
27 |   .size arm_fp_fcmp_bool_eq, .-arm_fp_fcmp_bool_eq
28 | 


--------------------------------------------------------------------------------
/fp/at32/fcmp_bool_ge.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a >= b.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 |   MOV r0, #0
14 |   MOVHS r0, #1
15 | .endm
16 | 
17 |   .globl arm_fp_fcmp_bool_ge
18 |   .type arm_fp_fcmp_bool_ge,%function
19 | arm_fp_fcmp_bool_ge:
20 |   #include "fcmp.h"
21 | 
22 | fcmp_NaN:
23 |   // If any NaN is involved, the return value for this comparison is false.
24 |   MOV r0, #0
25 |   BX lr
26 | 
27 |   .size arm_fp_fcmp_bool_ge, .-arm_fp_fcmp_bool_ge
28 | 


--------------------------------------------------------------------------------
/fp/at32/fcmp_bool_gt.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a > b.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 |   MOV r0, #0
14 |   MOVHI r0, #1
15 | .endm
16 | 
17 |   .globl arm_fp_fcmp_bool_gt
18 |   .type arm_fp_fcmp_bool_gt,%function
19 | arm_fp_fcmp_bool_gt:
20 |   #include "fcmp.h"
21 | 
22 | fcmp_NaN:
23 |   // If any NaN is involved, the return value for this comparison is false.
24 |   MOV r0, #0
25 |   BX lr
26 | 
27 |   .size arm_fp_fcmp_bool_gt, .-arm_fp_fcmp_bool_gt
28 | 


--------------------------------------------------------------------------------
/fp/at32/fcmp_bool_le.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a <= b.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 |   MOV r0, #0
14 |   MOVLS r0, #1
15 | .endm
16 | 
17 |   .globl arm_fp_fcmp_bool_le
18 |   .type arm_fp_fcmp_bool_le,%function
19 | arm_fp_fcmp_bool_le:
20 |   #include "fcmp.h"
21 | 
22 | fcmp_NaN:
23 |   // If any NaN is involved, the return value for this comparison is false.
24 |   MOV r0, #0
25 |   BX lr
26 | 
27 |   .size arm_fp_fcmp_bool_le, .-arm_fp_fcmp_bool_le
28 | 


--------------------------------------------------------------------------------
/fp/at32/fcmp_bool_lt.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a < b.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 |   MOV r0, #0
14 |   MOVLO r0, #1
15 | .endm
16 | 
17 |   .globl arm_fp_fcmp_bool_lt
18 |   .type arm_fp_fcmp_bool_lt,%function
19 | arm_fp_fcmp_bool_lt:
20 |   #include "fcmp.h"
21 | 
22 | fcmp_NaN:
23 |   // If any NaN is involved, the return value for this comparison is false.
24 |   MOV r0, #0
25 |   BX lr
26 | 
27 |   .size arm_fp_fcmp_bool_lt, .-arm_fp_fcmp_bool_lt
28 | 


--------------------------------------------------------------------------------
/fp/at32/fcmp_bool_un.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning a boolean in r0, true if a and b are
 2 | // 'unordered', i.e. one or both of them is a NaN.
 3 | //
 4 | // Copyright (c) 1994-1998,2025, Arm Limited.
 5 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 | 
 7 |   .syntax unified
 8 |   .text
 9 |   .p2align 2
10 | 
11 |   .globl arm_fp_fcmp_bool_un
12 |   .type arm_fp_fcmp_bool_un,%function
13 | arm_fp_fcmp_bool_un:
14 | 
15 |   // This function isn't based on the general-purpose code in fcmp.h, because
16 |   // it's more effort than needed. Here we just need to identify whether or not
17 |   // there's at least one NaN in the inputs. There's no need to vary that check
18 |   // based on the sign bit, so we might as well just do the NaN test as quickly
19 |   // as possible.
20 |   MOV     r12, #0xFF << 24
21 |   CMP     r12, r0, LSL #1    // if LO, then r12 < (r0 << 1), so r0 is a NaN
22 |   CMPHS   r12, r1, LSL #1    // if not LO, then do the same check for r1
23 | 
24 |   // If HS, then we have no NaNs and return false. We do this as quickly as we
25 |   // can (not stopping to take two instructions setting up r0 for both
26 |   // possibilities), on the assumption that NaNs are rare and we want to
27 |   // optimize for the non-NaN path.
28 |   MOVHS   r0, #0
29 |   BXHS    lr
30 | 
31 |   // Otherwise, we have at least one NaN, and return true.
32 |   MOV     r0, #1
33 |   BX      lr
34 | 
35 |   .size arm_fp_fcmp_bool_un, .-arm_fp_fcmp_bool_un
36 | 


--------------------------------------------------------------------------------
/fp/at32/fcmp_flags.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning LO / EQ / HI in the flags, with the
 2 | // 'unordered result' case also returning HI.
 3 | //
 4 | // Copyright (c) 1994-1998,2025, Arm Limited.
 5 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 | 
 7 |   .syntax unified
 8 |   .text
 9 |   .p2align 2
10 | 
11 | op0 .req r0
12 | op1 .req r1
13 | .macro SetReturnRegister
14 | .endm
15 | 
16 |   .globl arm_fp_fcmp_flags
17 |   .type arm_fp_fcmp_flags,%function
18 | arm_fp_fcmp_flags:
19 |   #include "fcmp.h"
20 | 
21 | fcmp_NaN:
22 |   // If any NaN is involved, the return value for this comparison is
23 |   // to set the flags as if a > b, so that the HI condition will return true.
24 |   MOV r0, #1
25 |   CMP r0, #0
26 |   BX lr
27 | 
28 |   .size arm_fp_fcmp_flags, .-arm_fp_fcmp_flags
29 | 


--------------------------------------------------------------------------------
/fp/at32/fcmp_flags_eq.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, returning EQ / NE in the flags.
 2 | //
 3 | // Copyright (c) 1994-1998,2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 |   .syntax unified
 7 |   .text
 8 |   .p2align 2
 9 | 
10 | op0 .req r0
11 | op1 .req r1
12 | .macro SetReturnRegister
13 | .endm
14 | 
15 |   .globl arm_fp_fcmp_flags_eq
16 |   .type arm_fp_fcmp_flags_eq,%function
17 | arm_fp_fcmp_flags_eq:
18 |   #include "fcmp.h"
19 | 
20 | fcmp_NaN:
21 |   // If any NaN is involved, the return value for this comparison is false.
22 |   MOVS r0, #1   // the value in r0 is unimportant; this just sets Z=0
23 |   BX lr
24 | 
25 |   .size arm_fp_fcmp_flags_eq, .-arm_fp_fcmp_flags_eq
26 | 


--------------------------------------------------------------------------------
/fp/at32/fcmp_flags_rev.S:
--------------------------------------------------------------------------------
 1 | // Single-precision comparison, with the input operands reversed, returning
 2 | // LO / EQ / HI in the flags, with the 'unordered result' case also returning
 3 | // HI.
 4 | //
 5 | // Copyright (c) 1994-1998,2025, Arm Limited.
 6 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 | 
 8 |   .syntax unified
 9 |   .text
10 |   .p2align 2
11 | 
12 | op0 .req r1
13 | op1 .req r0
14 | .macro SetReturnRegister
15 | .endm
16 | 
17 |   .globl arm_fp_fcmp_flags_rev
18 |   .type arm_fp_fcmp_flags_rev,%function
19 | arm_fp_fcmp_flags_rev:
20 |   #include "fcmp.h"
21 | 
22 | fcmp_NaN:
23 |   // If any NaN is involved, the return value for this comparison is
24 |   // to set the flags as if a > b, so that the HI condition will return true.
25 |   MOV r0, #1
26 |   CMP r0, #0
27 |   BX lr
28 | 
29 |   .size arm_fp_fcmp_flags_rev, .-arm_fp_fcmp_flags_rev
30 | 


--------------------------------------------------------------------------------
/fp/common/fnan2.c:
--------------------------------------------------------------------------------
 1 | // Helper function for handling single-precision input NaNs.
 2 | //
 3 | // Copyright (c) 2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // This helper function is available for use by single-precision float
 7 | // arithmetic implementations to handle propagating NaNs from the input
 8 | // operands to the output, in a way that matches Arm hardware FP.
 9 | //
10 | // On input, a and b are floating-point numbers in IEEE 754 encoding, and at
11 | // least one of them must be a NaN. The return value is the correct output NaN.
12 | 
13 | unsigned
14 | __fnan2 (unsigned a, unsigned b)
15 | {
16 |   unsigned aadj = (a << 1) + 0x00800000;
17 |   unsigned badj = (b << 1) + 0x00800000;
18 |   if (aadj > 0xff800000)
19 |     return a | 0x00400000;
20 |   if (badj > 0xff800000)
21 |     return b | 0x00400000;
22 |   if (aadj < 0x00800000)
23 |     return a;
24 |   else /* expect (badj < 0x00800000) */
25 |     return b;
26 | }
27 | 


--------------------------------------------------------------------------------
/fp/common/fnorm2.c:
--------------------------------------------------------------------------------
 1 | // Helper function for handling single-precision input denormals.
 2 | //
 3 | // Copyright (c) 2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // This helper function is available for use by single-precision float
 7 | // arithmetic implementations, to handle denormal inputs on entry by
 8 | // renormalizing the mantissa and modifying the exponent to match.
 9 | 
10 | // Structure containing the function's inputs and outputs.
11 | //
12 | // On entry: a, b are two input floating-point numbers, still in IEEE 754
13 | // encoding. expa and expb are the 8-bit exponents of those numbers, extracted
14 | // and shifted down to the low 8 bits of the word, with no other change.
15 | // Neither value should be zero, or have the maximum exponent (indicating an
16 | // infinity or NaN).
17 | //
18 | // On exit: each of a and b contains the mantissa of the input value, with the
19 | // leading 1 bit made explicit, and shifted up to the top of the word. If expa
20 | // was zero (indicating that a was denormal) then it is now represented as a
21 | // normalized number with an out-of-range exponent (zero or negative). The same
22 | // applies to expb and b.
23 | struct fnorm2
24 | {
25 |   unsigned a, b, expa, expb;
26 | };
27 | 
28 | void
29 | __fnorm2 (struct fnorm2 *values)
30 | {
31 |   values->a <<= 8;
32 |   values->b <<= 8;
33 |   if (values->expa == 0)
34 |     {
35 |       unsigned shift = __builtin_clz (values->a);
36 |       values->a <<= shift;
37 |       values->expa = 1 - shift;
38 |     }
39 |   else
40 |     {
41 |       values->a |= 0x80000000;
42 |     }
43 |   if (values->expb == 0)
44 |     {
45 |       unsigned shift = __builtin_clz (values->b);
46 |       values->b <<= shift;
47 |       values->expb = 1 - shift;
48 |     }
49 |   else
50 |     {
51 |       values->b |= 0x80000000;
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/fp/include/endian.h:
--------------------------------------------------------------------------------
 1 | // Register aliases for endian-independent floating point code.
 2 | //
 3 | // Copyright (c) 2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // This header file should be included from assembly source code (not C). It
 7 | // defines two pairs of register aliases, for handling 64-bit values passed and
 8 | // returned from functions in the AArch32 integer registers:
 9 | //
10 | //   ah, al      the high and low words of a 64-bit value passed in {r0,r1}
11 | //   bh, bl      the high and low words of a 64-bit value passed in {r2,r3}
12 | //
13 | // Which alias goes with which register depends on endianness.
14 | 
15 | #ifdef __BIG_ENDIAN__
16 | // Big-endian: high words are in lower-numbered registers.
17 | ah .req r0
18 | al .req r1
19 | bh .req r2
20 | bl .req r3
21 | #else
22 | // Little-endian: low words are in lower-numbered registers.
23 | al .req r0
24 | ah .req r1
25 | bl .req r2
26 | bh .req r3
27 | #endif
28 | 
29 | 


--------------------------------------------------------------------------------
/fp/test/test-ui2f.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Tests of IEEE 754 uint32 to single-precision conversion
 3 |  *
 4 |  * Copyright (c) 1999-2025, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include <inttypes.h>
 9 | #include <stdbool.h>
10 | #include <stdint.h>
11 | #include <stdio.h>
12 | #include <string.h>
13 | 
14 | struct test
15 | {
16 |   uint32_t in, out;
17 | };
18 | 
19 | static const struct test tests[] = {
20 | };
21 | 
22 | float
23 | make_float (uint32_t x)
24 | {
25 |   float r;
26 |   memcpy (&r, &x, sizeof (r));
27 |   return r;
28 | }
29 | 
30 | uint32_t
31 | unmake_float (float x)
32 | {
33 |   uint32_t r;
34 |   memcpy (&r, &x, sizeof (r));
35 |   return r;
36 | }
37 | 
38 | int
39 | main (void)
40 | {
41 |   bool failed = false;
42 | 
43 |   for (size_t i = 0; i < sizeof (tests) / sizeof (tests[0]); i++)
44 |     {
45 |       const struct test *t = &tests[i];
46 | 
47 | #ifdef USE_NATIVE_ARITHMETIC
48 |       /* If you compile with USE_NATIVE_ARITHMETIC defined, the same
49 |        * set of tests will be run using the toolchain's built in float
50 |        * arithmetic, instead of calling arm_fp_ui2f. */
51 |       float out = (float)t->in;
52 |       uint32_t outbits = unmake_float(out);
53 | #else
54 |       extern uint32_t arm_fp_i2f(uint32_t);
55 |       uint32_t outbits = arm_fp_i2f(t->in);
56 | #endif
57 | 
58 |       if (outbits != t->out)
59 | 	{
60 | 	  printf ("FAIL: i2f(%08" PRIx32 ") -> %08" PRIx32
61 | 		  ", expected %08" PRIx32 "\n", t->in, outbits, t->out);
62 | 	  failed = true;
63 | 	}
64 |     }
65 | 
66 |   if (!failed)
67 |     printf ("all passed\n");
68 | 
69 |   return failed;
70 | }
71 | 


--------------------------------------------------------------------------------
/math/aarch64/advsimd/cexpi.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision vector sincos function - return-by-value interface.
 3 |  *
 4 |  * Copyright (c) 2023-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "v_sincos_common.h"
 9 | #include "v_math.h"
10 | #include "test_defs.h"
11 | 
12 | static float64x2x2_t VPCS_ATTR NOINLINE
13 | special_case (float64x2_t x, uint64x2_t special, float64x2x2_t y)
14 | {
15 |   return (float64x2x2_t){ v_call_f64 (sin, x, y.val[0], special),
16 | 			  v_call_f64 (cos, x, y.val[1], special) };
17 | }
18 | 
19 | /* Double-precision vector function allowing calculation of both sin and cos in
20 |    one function call, using shared argument reduction and separate polynomials.
21 |    Largest observed error is for sin, 3.22 ULP:
22 |    v_sincos_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
23 | 				       want -0x1.ffe9537d5dbb4p-3.  */
24 | VPCS_ATTR float64x2x2_t
25 | _ZGVnN2v_cexpi (float64x2_t x)
26 | {
27 |   const struct v_sincos_data *d = ptr_barrier (&v_sincos_data);
28 |   uint64x2_t special = check_ge_rangeval (x, d);
29 | 
30 |   float64x2x2_t sc = v_sincos_inline (x, d);
31 | 
32 |   if (unlikely (v_any_u64 (special)))
33 |     return special_case (x, special, sc);
34 |   return sc;
35 | }
36 | 
37 | TEST_DISABLE_FENV (_ZGVnN2v_cexpi_cos)
38 | TEST_DISABLE_FENV (_ZGVnN2v_cexpi_sin)
39 | TEST_ULP (_ZGVnN2v_cexpi_sin, 2.73)
40 | TEST_ULP (_ZGVnN2v_cexpi_cos, 2.73)
41 | #define V_CEXPI_INTERVAL(lo, hi, n)                                           \
42 |   TEST_INTERVAL (_ZGVnN2v_cexpi_sin, lo, hi, n)                               \
43 |   TEST_INTERVAL (_ZGVnN2v_cexpi_cos, lo, hi, n)
44 | V_CEXPI_INTERVAL (0, 0x1p23, 500000)
45 | V_CEXPI_INTERVAL (-0, -0x1p23, 500000)
46 | V_CEXPI_INTERVAL (0x1p23, inf, 10000)
47 | V_CEXPI_INTERVAL (-0x1p23, -inf, 10000)
48 | 


--------------------------------------------------------------------------------
/math/aarch64/advsimd/cexpif.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision vector cexpi function.
 3 |  *
 4 |  * Copyright (c) 2023-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "v_sincosf_common.h"
 9 | #include "v_math.h"
10 | #include "test_defs.h"
11 | 
12 | static float32x4x2_t VPCS_ATTR NOINLINE
13 | special_case (float32x4_t x, uint32x4_t special, float32x4x2_t y)
14 | {
15 |   return (float32x4x2_t){ v_call_f32 (sinf, x, y.val[0], special),
16 | 			  v_call_f32 (cosf, x, y.val[1], special) };
17 | }
18 | 
19 | /* Single-precision vector function allowing calculation of both sin and cos in
20 |    one function call, using shared argument reduction and separate low-order
21 |    polynomials.
22 |    Worst-case error for sin is 1.67 ULP:
23 |    v_cexpif_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
24 |    Worst-case error for cos is 1.81 ULP:
25 |    v_cexpif_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6.  */
26 | VPCS_ATTR float32x4x2_t
27 | _ZGVnN4v_cexpif (float32x4_t x)
28 | {
29 |   const struct v_sincosf_data *d = ptr_barrier (&v_sincosf_data);
30 |   uint32x4_t special = check_ge_rangeval (x, d);
31 | 
32 |   float32x4x2_t sc = v_sincosf_inline (x, d);
33 | 
34 |   if (unlikely (v_any_u32 (special)))
35 |     return special_case (x, special, sc);
36 |   return sc;
37 | }
38 | 
39 | TEST_DISABLE_FENV (_ZGVnN4v_cexpif_sin)
40 | TEST_DISABLE_FENV (_ZGVnN4v_cexpif_cos)
41 | TEST_ULP (_ZGVnN4v_cexpif_sin, 1.17)
42 | TEST_ULP (_ZGVnN4v_cexpif_cos, 1.31)
43 | #define V_CEXPIF_INTERVAL(lo, hi, n)                                          \
44 |   TEST_INTERVAL (_ZGVnN4v_cexpif_sin, lo, hi, n)                              \
45 |   TEST_INTERVAL (_ZGVnN4v_cexpif_cos, lo, hi, n)
46 | V_CEXPIF_INTERVAL (0, 0x1p20, 500000)
47 | V_CEXPIF_INTERVAL (-0, -0x1p20, 500000)
48 | V_CEXPIF_INTERVAL (0x1p20, inf, 10000)
49 | V_CEXPIF_INTERVAL (-0x1p20, -inf, 10000)
50 | 


--------------------------------------------------------------------------------
/math/aarch64/advsimd/modf.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision vector modf(x, *y) function.
 3 |  *
 4 |  * Copyright (c) 2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "v_math.h"
 9 | #include "test_sig.h"
10 | #include "test_defs.h"
11 | 
12 | /* Modf algorithm. Produces exact values in all rounding modes.  */
13 | float64x2_t VPCS_ATTR V_NAME_D1_L1 (modf) (float64x2_t x, double *out_int)
14 | {
15 |   /* Get integer component of x.  */
16 |   float64x2_t rounded = vrndq_f64 (x);
17 |   vst1q_f64 (out_int, rounded);
18 | 
19 |   /* Subtract integer component from input.  */
20 |   uint64x2_t remaining = vreinterpretq_u64_f64 (vsubq_f64 (x, rounded));
21 | 
22 |   /* Return +0 for integer x.  */
23 |   uint64x2_t is_integer = vceqq_f64 (x, rounded);
24 |   return vreinterpretq_f64_u64 (vbicq_u64 (remaining, is_integer));
25 | }
26 | 
27 | TEST_ULP (_ZGVnN2vl8_modf_frac, 0.0)
28 | TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_frac, 0, 1, 20000)
29 | TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_frac, 1, inf, 20000)
30 | 
31 | TEST_ULP (_ZGVnN2vl8_modf_int, 0.0)
32 | TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_int, 0, 1, 20000)
33 | TEST_SYM_INTERVAL (_ZGVnN2vl8_modf_int, 1, inf, 20000)
34 | 


--------------------------------------------------------------------------------
/math/aarch64/advsimd/modff.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision vector modf(x, *y) function.
 3 |  *
 4 |  * Copyright (c) 2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "v_math.h"
 9 | #include "test_sig.h"
10 | #include "test_defs.h"
11 | 
12 | /* Modff algorithm. Produces exact values in all rounding modes.  */
13 | float32x4_t VPCS_ATTR NOINLINE V_NAME_F1_L1 (modf) (float32x4_t x,
14 | 						    float *out_int)
15 | {
16 |   /* Get integer component of x.  */
17 |   float32x4_t rounded = vrndq_f32 (x);
18 |   vst1q_f32 (out_int, rounded);
19 | 
20 |   /* Subtract integer component from input.  */
21 |   uint32x4_t remaining = vreinterpretq_u32_f32 (vsubq_f32 (x, rounded));
22 | 
23 |   /* Return +0 for integer x.  */
24 |   uint32x4_t is_integer = vceqq_f32 (x, rounded);
25 |   return vreinterpretq_f32_u32 (vbicq_u32 (remaining, is_integer));
26 | }
27 | 
28 | TEST_ULP (_ZGVnN4vl4_modff_frac, 0.0)
29 | TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_frac, 0, 1, 20000)
30 | TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_frac, 1, inf, 20000)
31 | 
32 | TEST_ULP (_ZGVnN4vl4_modff_int, 0.0)
33 | TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_int, 0, 1, 20000)
34 | TEST_SYM_INTERVAL (_ZGVnN4vl4_modff_int, 1, inf, 20000)
35 | 


--------------------------------------------------------------------------------
/math/aarch64/advsimd/sincospi.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision vector sincospi function.
 3 |  *
 4 |  * Copyright (c) 2024-2025, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | #include "v_sincospi_common.h"
 8 | #include "v_math.h"
 9 | #include "test_defs.h"
10 | 
11 | /* Double-precision vector function allowing calculation of both sin and cos in
12 |    one function call, using separate argument reduction and shared low-order
13 |    polynomials.
14 |    Approximation for vector double-precision sincospi(x).
15 |    Maximum Error 3.09 ULP:
16 |   _ZGVnN2v_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1
17 | 					      want 0x1.fd54d0b327cf4p-1
18 |    Maximum Error 3.16 ULP:
19 |   _ZGVnN2v_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1
20 | 					      want 0x1.fd2da484ff402p-1.  */
21 | VPCS_ATTR void
22 | _ZGVnN2vl8l8_sincospi (float64x2_t x, double *out_sin, double *out_cos)
23 | {
24 |   const struct v_sincospi_data *d = ptr_barrier (&v_sincospi_data);
25 | 
26 |   float64x2x2_t sc = v_sincospi_inline (x, d);
27 | 
28 |   vst1q_f64 (out_sin, sc.val[0]);
29 |   vst1q_f64 (out_cos, sc.val[1]);
30 | }
31 | 
32 | #if WANT_C23_TESTS
33 | TEST_DISABLE_FENV (_ZGVnN2v_sincospi_cos)
34 | TEST_DISABLE_FENV (_ZGVnN2v_sincospi_sin)
35 | TEST_ULP (_ZGVnN2v_sincospi_sin, 2.59)
36 | TEST_ULP (_ZGVnN2v_sincospi_cos, 2.66)
37 | #  define V_SINCOSPI_INTERVAL(lo, hi, n)                                      \
38 |     TEST_SYM_INTERVAL (_ZGVnN2v_sincospi_sin, lo, hi, n)                      \
39 |     TEST_SYM_INTERVAL (_ZGVnN2v_sincospi_cos, lo, hi, n)
40 | V_SINCOSPI_INTERVAL (0, 0x1p-63, 10000)
41 | V_SINCOSPI_INTERVAL (0x1p-63, 0.5, 50000)
42 | V_SINCOSPI_INTERVAL (0.5, 0x1p63, 50000)
43 | V_SINCOSPI_INTERVAL (0x1p63, inf, 10000)
44 | #endif
45 | 


--------------------------------------------------------------------------------
/math/aarch64/advsimd/sincospif.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision vector sincospi function.
 3 |  *
 4 |  * Copyright (c) 2024-2025, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "v_sincospif_common.h"
 9 | #include "v_math.h"
10 | #include "test_defs.h"
11 | #include "mathlib.h"
12 | 
13 | /* Single-precision vector function allowing calculation of both sinpi and
14 |    cospi in one function call, using shared argument reduction and polynomials.
15 |    Worst-case error for sin is 3.04 ULP:
16 |    _ZGVnN4v_sincospif_sin(0x1.1d341ap-1) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
17 |    Worst-case error for cos is 3.18 ULP:
18 |    _ZGVnN4v_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want 0x1.f7cd5p-1.
19 |  */
20 | VPCS_ATTR void
21 | _ZGVnN4vl4l4_sincospif (float32x4_t x, float *out_sin, float *out_cos)
22 | {
23 |   const struct v_sincospif_data *d = ptr_barrier (&v_sincospif_data);
24 | 
25 |   float32x4x2_t sc = v_sincospif_inline (x, d);
26 | 
27 |   vst1q_f32 (out_sin, sc.val[0]);
28 |   vst1q_f32 (out_cos, sc.val[1]);
29 | }
30 | 
31 | #if WANT_C23_TESTS
32 | TEST_DISABLE_FENV (_ZGVnN4v_sincospif_sin)
33 | TEST_DISABLE_FENV (_ZGVnN4v_sincospif_cos)
34 | TEST_ULP (_ZGVnN4v_sincospif_sin, 2.54)
35 | TEST_ULP (_ZGVnN4v_sincospif_cos, 2.68)
36 | #  define V_SINCOSPIF_INTERVAL(lo, hi, n)                                     \
37 |     TEST_SYM_INTERVAL (_ZGVnN4v_sincospif_sin, lo, hi, n)                     \
38 |     TEST_SYM_INTERVAL (_ZGVnN4v_sincospif_cos, lo, hi, n)
39 | V_SINCOSPIF_INTERVAL (0, 0x1p-63, 10000)
40 | V_SINCOSPIF_INTERVAL (0x1p-63, 0.5, 50000)
41 | V_SINCOSPIF_INTERVAL (0.5, 0x1p31, 50000)
42 | V_SINCOSPIF_INTERVAL (0x1p31, inf, 10000)
43 | #endif
44 | 


--------------------------------------------------------------------------------
/math/aarch64/advsimd/v_poly_f32.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Helpers for evaluating polynomials on single-precision AdvSIMD input, using
 3 |  * various schemes.
 4 |  *
 5 |  * Copyright (c) 2023-2024, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #ifndef MATH_POLY_ADVSIMD_F32_H
10 | #define MATH_POLY_ADVSIMD_F32_H
11 | 
12 | #include <arm_neon.h>
13 | 
14 | /* Wrap AdvSIMD f32 helpers: evaluation of some scheme/order has form:
15 |    v_[scheme]_[order]_f32.  */
16 | #define VTYPE float32x4_t
17 | #define FMA(x, y, z) vfmaq_f32 (z, x, y)
18 | #define VWRAP(f) v_##f##_f32
19 | #include "poly_generic.h"
20 | #undef VWRAP
21 | #undef FMA
22 | #undef VTYPE
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/math/aarch64/advsimd/v_poly_f64.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Helpers for evaluating polynomials on double-precision AdvSIMD input, using
 3 |  * various schemes.
 4 |  *
 5 |  * Copyright (c) 2023-2024, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #ifndef MATH_POLY_ADVSIMD_F64_H
10 | #define MATH_POLY_ADVSIMD_F64_H
11 | 
12 | #include <arm_neon.h>
13 | 
14 | /* Wrap AdvSIMD f64 helpers: evaluation of some scheme/order has form:
15 |    v_[scheme]_[order]_f64.  */
16 | #define VTYPE float64x2_t
17 | #define FMA(x, y, z) vfmaq_f64 (z, x, y)
18 | #define VWRAP(f) v_##f##_f64
19 | #include "poly_generic.h"
20 | #undef VWRAP
21 | #undef FMA
22 | #undef VTYPE
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/README.contributors:
--------------------------------------------------------------------------------
 1 | Code in this sub-directory should follow the GNU Coding Standard, but it is
 2 | not expected to be upstreamed into glibc without modification, so
 3 | glibc-specific conventions need not be followed.
 4 | 
 5 | The requirements for portable code apply to non-portable code with the
 6 | following differences:
 7 | 
 8 | 1. Worst-case ULP error should be encoded in filenames (e.g. sin_u35.c). There
 9 |    are no specific restrictions on acceptable ULP error, but if functions
10 |    provide significantly less accuracy than portable equivalents then a clear
11 |    justification for inclusion should be stated in comments at the top of the
12 |    source file. Error bounds of the approximation should be clearly documented
13 |    in comments.
14 | 
15 | 2. Functions are assumed to support round-to-nearest mode by default, unless
16 |    stated; other rounding modes are not required to be provided.
17 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/acoshf_2u8.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision acosh(x) function.
 3 |  *
 4 |  * Copyright (c) 2022-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | #include "test_sig.h"
10 | #include "test_defs.h"
11 | 
12 | #define Ln2 (0x1.62e4p-1f)
13 | #define MinusZero 0x80000000
14 | #define SquareLim 0x5f800000 /* asuint(0x1p64).  */
15 | #define Two 0x40000000
16 | 
17 | /* acoshf approximation using a variety of approaches on different intervals:
18 | 
19 |    x >= 2^64: We cannot square x without overflow. For huge x, sqrt(x*x - 1) is
20 |    close enough to x that we can calculate the result by ln(2x) == ln(x) +
21 |    ln(2). The greatest error in the region is 0.94 ULP:
22 |    acoshf(0x1.15f706p+92) got 0x1.022e14p+6 want 0x1.022e16p+6.
23 | 
24 |    x > 2: Calculate the result directly using definition of asinh(x) = ln(x +
25 |    sqrt(x*x - 1)). Greatest error in this region is 1.30 ULP:
26 |    acoshf(0x1.249d8p+1) got 0x1.77e1aep+0 want 0x1.77e1bp+0.
27 | 
28 |    0 <= x <= 2: Calculate the result using log1p. For x < 1, acosh(x) is
29 |    undefined. For 1 <= x <= 2, the greatest error is 2.78 ULP:
30 |    acoshf(0x1.07887p+0) got 0x1.ef9e9cp-3 want 0x1.ef9ea2p-3.  */
31 | float
32 | acoshf (float x)
33 | {
34 |   uint32_t ix = asuint (x);
35 | 
36 |   if (unlikely (ix >= MinusZero))
37 |     return __math_invalidf (x);
38 | 
39 |   if (unlikely (ix >= SquareLim))
40 |     return logf (x) + Ln2;
41 | 
42 |   if (ix > Two)
43 |     return logf (x + sqrtf (x * x - 1));
44 | 
45 |   float xm1 = x - 1;
46 |   return log1pf (xm1 + sqrtf (2 * xm1 + xm1 * xm1));
47 | }
48 | 
49 | TEST_SIG (S, F, 1, acosh, 1.0, 10.0)
50 | TEST_ULP (acoshf, 2.30)
51 | TEST_INTERVAL (acoshf, 0, 1, 100)
52 | TEST_INTERVAL (acoshf, 1, 2, 10000)
53 | TEST_INTERVAL (acoshf, 2, 0x1p64, 100000)
54 | TEST_INTERVAL (acoshf, 0x1p64, inf, 100000)
55 | TEST_INTERVAL (acoshf, -0, -inf, 10000)
56 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/advsimd/powf_umax.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Low accuracy single-precision vector pow(x, y) function.
 3 |  *
 4 |  * Copyright (c) 2025, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "mathlib.h"
 9 | #include "v_math.h"
10 | #include "test_defs.h"
11 | 
12 | /* Fast inaccurate powf.
13 |    Only tested with make check yet.
14 |    Maximum measured error: 214.066 +0.5 ULP. BASIS 2.
15 |    Maximum measured error: 262.151 +0.5 ULP. BASIS e.
16 |    Maximum measured error: 249.575 +0.5 ULP. BASIS 10.  */
17 | float32x4_t VPCS_ATTR NOINLINE
18 | arm_math_advsimd_fast_powf (float32x4_t x, float32x4_t y)
19 | {
20 |   float32x4_t logx = _ZGVnN4v_log2f (x);
21 |   float32x4_t ylogx = vmulq_f32 (y, logx);
22 |   return _ZGVnN4v_exp2f (ylogx);
23 | }
24 | 
25 | TEST_ULP (arm_math_advsimd_fast_powf, 4096)
26 | TEST_DISABLE_FENV (arm_math_advsimd_fast_powf)
27 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1p-1, 0x1p1, 0x1p-7, 0x1p7,
28 | 		500000)
29 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1p-1, 0x1p1, -0x1p-7, -0x1p7,
30 | 		500000)
31 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1p-70, 0x1p70, 0x1p-1, 0x1p1,
32 | 		500000)
33 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1p-70, 0x1p70, -0x1p-1, -0x1p1,
34 | 		500000)
35 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p14,
36 | 		500000)
37 | TEST_INTERVAL2 (arm_math_advsimd_fast_powf, 0x1.ep-1, 0x1.1p0, -0x1p8, -0x1p14,
38 | 		500000)
39 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/asin_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Coefficients for single-precision asin(x) function.
 3 |  *
 4 |  * Copyright (c) 2023-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | /* Approximate asin(x) directly in [0x1p-106, 0.25]. See tools/asin.sollya
11 |    for these coeffcients were generated.  */
12 | const double __asin_poly[] = {
13 |   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))
14 |      on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57.  */
15 |   0x1.555555555554ep-3, 0x1.3333333337233p-4,  0x1.6db6db67f6d9fp-5,
16 |   0x1.f1c71fbd29fbbp-6, 0x1.6e8b264d467d6p-6,  0x1.1c5997c357e9dp-6,
17 |   0x1.c86a22cd9389dp-7, 0x1.856073c22ebbep-7,  0x1.fd1151acb6bedp-8,
18 |   0x1.087182f799c1dp-6, -0x1.6602748120927p-7, 0x1.cfa0dd1f9478p-6,
19 | };
20 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/asinf_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Coefficients for single-precision asin(x) function.
 3 |  *
 4 |  * Copyright (c) 2023-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | /* Approximate asinf(x) directly in [0x1p-24, 0.25]. See for tools/asinf.sollya
11 |    for these coeffs were generated.  */
12 | const float __asinf_poly[] = {
13 |   /* Polynomial approximation of  (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))  on
14 |      [ 0x1p-24 0x1p-2 ] order = 4 rel error: 0x1.00a23bbp-29 .  */
15 |   0x1.55555ep-3, 0x1.33261ap-4, 0x1.70d7dcp-5, 0x1.b059dp-6, 0x1.3af7d8p-5,
16 | };
17 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/asinh_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision polynomial coefficients for scalar asinh(x)
 3 |  *
 4 |  * Copyright (c) 2022-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | /* asinh(x) is odd, and the first term of the Taylor expansion is x, so we can
11 |    approximate the function by x + x^3 * P(x^2), where P(z) has the form:
12 |    C0 + C1 * z + C2 * z^2 + C3 * z^3 + ...
13 |    Note P is evaluated on even powers of x only. See tools/asinh.sollya for the
14 |    algorithm used to generate these coefficients.  */
15 | const struct asinh_data __asinh_data
16 |     = { .poly
17 | 	= { -0x1.55555555554a7p-3, 0x1.3333333326c7p-4, -0x1.6db6db68332e6p-5,
18 | 	    0x1.f1c71b26fb40dp-6, -0x1.6e8b8b654a621p-6, 0x1.1c4daa9e67871p-6,
19 | 	    -0x1.c9871d10885afp-7, 0x1.7a16e8d9d2ecfp-7, -0x1.3ddca533e9f54p-7,
20 | 	    0x1.0becef748dafcp-7, -0x1.b90c7099dd397p-8, 0x1.541f2bb1ffe51p-8,
21 | 	    -0x1.d217026a669ecp-9, 0x1.0b5c7977aaf7p-9, -0x1.e0f37daef9127p-11,
22 | 	    0x1.388b5fe542a6p-12, -0x1.021a48685e287p-14,
23 | 	    0x1.93d4ba83d34dap-18 } };
24 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/asinhf_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Coefficients for single-precision asinh(x) function.
 3 |  *
 4 |  * Copyright (c) 2022-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | /* Approximate asinhf(x) directly in [2^-12, 1]. See for tools/asinhf.sollya
11 |    for these coeffs were generated.  */
12 | const struct asinhf_data __asinhf_data
13 |     = { .coeffs = { -0x1.9b16fap-19f, -0x1.552baap-3f, -0x1.4e572ap-11f,
14 | 		    0x1.3a81dcp-4f, 0x1.65bbaap-10f, -0x1.057f1p-4f,
15 | 		    0x1.6c1d46p-5f, -0x1.4cafe8p-7f } };
16 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/atan_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision polynomial evaluation function for scalar
 3 |  * atan(x) and atan2(y,x).
 4 |  *
 5 |  * Copyright (c) 2021-2024, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #include "math_config.h"
10 | #include "poly_scalar_f64.h"
11 | 
12 | /* Polynomial used in fast atan(x) and atan2(y,x) implementations
13 |    The order 19 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
14 | static inline double
15 | eval_poly (double z, double az, double shift)
16 | {
17 |   /* Use split Estrin scheme for P(z^2) with deg(P)=19. Use split instead of
18 |      full scheme to avoid underflow in x^16.  */
19 |   double z2 = z * z;
20 |   double x2 = z2 * z2;
21 |   double x4 = x2 * x2;
22 |   double x8 = x4 * x4;
23 |   double y = fma (estrin_11_f64 (z2, x2, x4, x8, __atan_poly_data.poly + 8),
24 | 		  x8, estrin_7_f64 (z2, x2, x4, __atan_poly_data.poly));
25 | 
26 |   /* Finalize. y = shift + z + z^3 * P(z^2).  */
27 |   y = fma (y, z2 * az, az);
28 |   y = y + shift;
29 | 
30 |   return y;
31 | }
32 | 
33 | #undef P
34 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/atan_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision polynomial coefficients for vector atan(x) and atan2(y,x).
 3 |  *
 4 |  * Copyright (c) 2019-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | const struct atan_poly_data __atan_poly_data
11 |     = { .poly = { /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2)
12 | 		     on [2**-1022, 1.0]. See atan.sollya for details of how
13 | 		     these were generated.  */
14 | 		  -0x1.5555555555555p-2,  0x1.99999999996c1p-3,
15 | 		  -0x1.2492492478f88p-3,  0x1.c71c71bc3951cp-4,
16 | 		  -0x1.745d160a7e368p-4,  0x1.3b139b6a88ba1p-4,
17 | 		  -0x1.11100ee084227p-4,  0x1.e1d0f9696f63bp-5,
18 | 		  -0x1.aebfe7b418581p-5,  0x1.842dbe9b0d916p-5,
19 | 		  -0x1.5d30140ae5e99p-5,  0x1.338e31eb2fbbcp-5,
20 | 		  -0x1.00e6eece7de8p-5,	  0x1.860897b29e5efp-6,
21 | 		  -0x1.0051381722a59p-6,  0x1.14e9dc19a4a4ep-7,
22 | 		  -0x1.d0062b42fe3bfp-9,  0x1.17739e210171ap-10,
23 | 		  -0x1.ab24da7be7402p-13, 0x1.358851160a528p-16 } };
24 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/atanf_common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision polynomial evaluation function for scalar
 3 |  * atan(x) and atan2(y,x).
 4 |  *
 5 |  * Copyright (c) 2021-2024, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #ifndef PL_MATH_ATANF_COMMON_H
10 | #define PL_MATH_ATANF_COMMON_H
11 | 
12 | #include "math_config.h"
13 | #include "poly_scalar_f32.h"
14 | 
15 | /* Polynomial used in fast atanf(x) and atan2f(y,x) implementations
16 |    The order 7 polynomial P approximates (atan(sqrt(x))-sqrt(x))/x^(3/2).  */
17 | static inline float
18 | eval_poly (float z, float az, float shift)
19 | {
20 |   /* Use 2-level Estrin scheme for P(z^2) with deg(P)=7. However,
21 |      a standard implementation using z8 creates spurious underflow
22 |      in the very last fma (when z^8 is small enough).
23 |      Therefore, we split the last fma into a mul and and an fma.
24 |      Horner and single-level Estrin have higher errors that exceed
25 |      threshold.  */
26 |   float z2 = z * z;
27 |   float z4 = z2 * z2;
28 | 
29 |   /* Then assemble polynomial.  */
30 |   float y = fmaf (
31 |       z4, z4 * pairwise_poly_3_f32 (z2, z4, __atanf_poly_data.poly + 4),
32 |       pairwise_poly_3_f32 (z2, z4, __atanf_poly_data.poly));
33 |   /* Finalize:
34 |      y = shift + z * P(z^2).  */
35 |   return fmaf (y, z2 * az, az) + shift;
36 | }
37 | 
38 | #endif // PL_MATH_ATANF_COMMON_H
39 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/atanf_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision polynomial coefficients for vector atan(x) and atan2(y,x).
 3 |  *
 4 |  * Copyright (c) 2019-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | /* Coefficients of polynomial P such that atan(x)~x+x*P(x^2) on [2**-128, 1.0].
11 |  */
12 | const struct atanf_poly_data __atanf_poly_data
13 |     = { .poly
14 | 	= { /* See atanf.sollya for details of how these were generated.  */
15 | 	    -0x1.55555p-2f, 0x1.99935ep-3f, -0x1.24051ep-3f, 0x1.bd7368p-4f,
16 | 	    -0x1.491f0ep-4f, 0x1.93a2c0p-5f, -0x1.4c3c60p-6f,
17 | 	    0x1.01fd88p-8f } };
18 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/cbrt_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Coefficients and table entries for double-precision cbrt(x).
 3 |  *
 4 |  * Copyright (c) 2022-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | const struct cbrt_data __cbrt_data
11 |   = {.poly = { /* Coefficients for very rough approximation of cbrt(x) in [0.5, 1].
12 |                   See cbrt.sollya for details of generation.  */
13 | 	      0x1.c14e8ee44767p-2, 0x1.dd2d3f99e4c0ep-1, -0x1.08e83026b7e74p-1, 0x1.2c74eaa3ba428p-3},
14 |      .table = { /* table[i] = 2^((i - 2) / 3).  */
15 | 	         0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0, 0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0}};
16 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/cbrtf_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Coefficients and table entries for single-precision cbrt(x).
 3 |  *
 4 |  * Copyright (c) 2022-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | const struct cbrtf_data __cbrtf_data
11 |   = {.poly = { /* Coefficients for very rough approximation of cbrt(x) in [0.5, 1].
12 |                   See cbrtf.sollya for details of generation.  */
13 | 	        0x1.c14e96p-2, 0x1.dd2d3p-1, -0x1.08e81ap-1, 0x1.2c74c2p-3},
14 |      .table = { /* table[i] = 2^((i - 2) / 3).  */
15 | 	        0x1.428a3p-1, 0x1.965feap-1, 0x1p0, 0x1.428a3p0, 0x1.965feap0}};
16 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/expf_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Coeffs and table entries for single-precision exp. Copied from
 3 |  * math/exp2f_data.c, with EXP2F_TABLE_BITS == 32.
 4 |  *
 5 |  * Copyright (c) 2017-2024, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #include "math_config.h"
10 | 
11 | #define N (1 << EXPF_TABLE_BITS)
12 | 
13 | const struct expf_data __expf_data = {
14 |   /* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
15 |      used for computing 2^(k/N) for an int |k| < 150 N as
16 |      double(tab[k%N] + (k << 52-BITS)).  */
17 |   .tab = {
18 | 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
19 | 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
20 | 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
21 | 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
22 | 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
23 | 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
24 | 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
25 | 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
26 |   },
27 |   .invln2_scaled = 0x1.71547652b82fep+0 * N,
28 |   .poly_scaled = {
29 |   0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
30 |   },
31 | };
32 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/expm1_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Coefficients for double-precision e^x - 1 function.
 3 |  *
 4 |  * Copyright (c) 2022-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | /* Generated using fpminimax, see tools/expm1.sollya for details.  */
11 | const double __expm1_poly[] = { 0x1p-1,
12 | 				0x1.5555555555559p-3,
13 | 				0x1.555555555554bp-5,
14 | 				0x1.111111110f663p-7,
15 | 				0x1.6c16c16c1b5f3p-10,
16 | 				0x1.a01a01affa35dp-13,
17 | 				0x1.a01a018b4ecbbp-16,
18 | 				0x1.71ddf82db5bb4p-19,
19 | 				0x1.27e517fc0d54bp-22,
20 | 				0x1.af5eedae67435p-26,
21 | 				0x1.1f143d060a28ap-29 };
22 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/expm1f_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Coefficients for single-precision e^x - 1 function.
 3 |  *
 4 |  * Copyright (c) 2022-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | /* Generated using fpminimax, see tools/expm1f.sollya for details.  */
11 | const float __expm1f_poly[] = { 0x1.fffffep-2, 0x1.5554aep-3, 0x1.555736p-5,
12 | 				0x1.12287cp-7, 0x1.6b55a2p-10 };
13 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/log1p_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Data used in double-precision log(1+x) function.
 3 |  *
 4 |  * Copyright (c) 2022-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | /* Polynomial coefficients generated using Remez algorithm, see
11 |    log1p.sollya for details.  */
12 | const struct log1p_data __log1p_data
13 |     = { .coeffs
14 | 	= { -0x1.ffffffffffffbp-2, 0x1.55555555551a9p-2, -0x1.00000000008e3p-2,
15 | 	    0x1.9999999a32797p-3, -0x1.555555552fecfp-3, 0x1.249248e071e5ap-3,
16 | 	    -0x1.ffffff8bf8482p-4, 0x1.c71c8f07da57ap-4, -0x1.9999ca4ccb617p-4,
17 | 	    0x1.7459ad2e1dfa3p-4, -0x1.554d2680a3ff2p-4, 0x1.3b4c54d487455p-4,
18 | 	    -0x1.2548a9ffe80e6p-4, 0x1.0f389a24b2e07p-4, -0x1.eee4db15db335p-5,
19 | 	    0x1.e95b494d4a5ddp-5, -0x1.15fdf07cb7c73p-4, 0x1.0310b70800fcfp-4,
20 | 	    -0x1.cfa7385bdb37ep-6 } };
21 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/log1pf_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Data used in single-precision log1p(x) function.
 3 |  *
 4 |  * Copyright (c) 2022-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | #include "math_config.h"
 8 | 
 9 | /* Polynomial coefficients generated using floating-point minimax
10 |    algorithm, see tools/log1pf.sollya for details.  */
11 | const struct log1pf_data __log1pf_data
12 |     = { .coeffs = { -0x1p-1f, 0x1.5555aap-2f, -0x1.000038p-2f, 0x1.99675cp-3f,
13 | 		    -0x1.54ef78p-3f, 0x1.28a1f4p-3f, -0x1.0da91p-3f,
14 | 		    0x1.abcb6p-4f, -0x1.6f0d5ep-5f } };
15 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/sve/expf_umax.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Low-accuracy single-precision vector e^x function.
 3 |  *
 4 |  * Copyright (c) 2025, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "test_sig.h"
10 | #include "test_defs.h"
11 | #include "sv_expf_inline.h"
12 | 
13 | /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
14 |    correctly by FEXPA.  */
15 | #define Thres 0x1.5d5e2ap+6f
16 | 
17 | static const struct data
18 | {
19 |   struct sv_expf_data d;
20 |   float thres;
21 | } data = {
22 |   .d = SV_EXPF_DATA,
23 |   .thres = Thres,
24 | };
25 | 
26 | static svfloat32_t NOINLINE
27 | special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d)
28 | {
29 |   return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special);
30 | }
31 | 
32 | /* Low-accuracy SVE expf.
33 |    Worst-case error is 248.94 +0.5 ULP.
34 |    arm_math_sve_fast_expf(0x1.5d5314p+6) got 0x1.fd3a6p+125
35 | 					want 0x1.fd3c52p+125.  */
36 | svfloat32_t
37 | arm_math_sve_fast_expf (svfloat32_t x, const svbool_t pg)
38 | {
39 |   const struct data *d = ptr_barrier (&data);
40 |   svbool_t is_special_case = svacgt (pg, x, d->thres);
41 |   if (unlikely (svptest_any (pg, is_special_case)))
42 |     return special_case (x, is_special_case, &d->d);
43 |   return expf_inline (x, pg, &d->d);
44 | }
45 | 
46 | TEST_ULP (arm_math_sve_fast_expf, 4096)
47 | TEST_DISABLE_FENV (arm_math_sve_fast_expf)
48 | TEST_SYM_INTERVAL (arm_math_sve_fast_expf, 0, Thres, 50000)
49 | TEST_SYM_INTERVAL (arm_math_sve_fast_expf, Thres, inf, 50000)
50 | CLOSE_SVE_ATTR
51 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/sve/powf_umax.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Low-accuracy single-precision SVE pow function.
 3 |  *
 4 |  * Copyright (c) 2025, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "mathlib.h"
 9 | #include "sv_math.h"
10 | #include "test_defs.h"
11 | 
12 | /* Fast inaccurate SVE powf.
13 |    Only tested with make check yet.
14 |    Testing special cases like x < 0 would break if tested,
15 |    we currently do not test these cases,
16 |    but ideally they should just be allowed to break.
17 |    Maximum measured error: 215.448 +0.5 ULP. BASIS 2.  */
18 | svfloat32_t
19 | arm_math_sve_fast_powf (svfloat32_t x, svfloat32_t y, const svbool_t pg)
20 | {
21 |   svfloat32_t logx = _ZGVsMxv_log2f (x, pg);
22 |   svfloat32_t ylogx = svmul_x (svptrue_b32 (), y, logx);
23 |   return _ZGVsMxv_exp2f (ylogx, pg);
24 | }
25 | 
26 | TEST_ULP (arm_math_sve_fast_powf, 4096)
27 | TEST_DISABLE_FENV (arm_math_sve_fast_powf)
28 | /* Wide intervals spanning the whole domain but shared between x and y.  */
29 | #define SV_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n)                              \
30 |   TEST_INTERVAL2 (arm_math_sve_fast_powf, xlo, xhi, ylo, yhi, n)              \
31 |   TEST_INTERVAL2 (arm_math_sve_fast_powf, xlo, xhi, -ylo, -yhi, n)
32 | SV_POWF_INTERVAL2 (0, 0x1p-126, 0, inf, 40000)
33 | SV_POWF_INTERVAL2 (0x1p-126, 1, 0, inf, 50000)
34 | SV_POWF_INTERVAL2 (1, inf, 0, inf, 50000)
35 | /* x~1 or y~1.  */
36 | SV_POWF_INTERVAL2 (0x1p-1, 0x1p1, 0x1p-10, 0x1p10, 10000)
37 | SV_POWF_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000)
38 | SV_POWF_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
39 | /* around estimated argmaxs of ULP error.  */
40 | SV_POWF_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
41 | SV_POWF_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
42 | CLOSE_SVE_ATTR
43 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/sve/powi.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision SVE powi(x, n) function.
 3 |  *
 4 |  * Copyright (c) 2020-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | 
10 | /* Optimized double-precision vector powi (double base, long integer power).
11 |    powi is developed for environments in which accuracy is of much less
12 |    importance than performance, hence we provide no estimate for worst-case
13 |    error.  */
14 | svfloat64_t
15 | _ZGVsMxvv_powk (svfloat64_t as, svint64_t ns, svbool_t p)
16 | {
17 |   /* Compute powi by successive squaring, right to left.  */
18 |   svfloat64_t acc = sv_f64 (1.0);
19 |   svbool_t want_recip = svcmplt (p, ns, 0);
20 |   svuint64_t ns_abs = svreinterpret_u64 (svabs_x (p, ns));
21 | 
22 |   /* We use a max to avoid needing to check whether any lane != 0 on each
23 |      iteration.  */
24 |   uint64_t max_n = svmaxv (p, ns_abs);
25 | 
26 |   svfloat64_t c = as;
27 |   /* Successively square c, and use merging predication (_m) to determine
28 |      whether or not to perform the multiplication or keep the previous
29 |      iteration.  */
30 |   while (true)
31 |     {
32 |       svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1ull), 1ull);
33 |       acc = svmul_m (px, acc, c);
34 |       max_n >>= 1;
35 |       if (max_n == 0)
36 | 	break;
37 | 
38 |       ns_abs = svlsr_x (p, ns_abs, 1);
39 |       c = svmul_x (p, c, c);
40 |     }
41 | 
42 |   /* Negative powers are handled by computing the abs(n) version and then
43 |      taking the reciprocal.  */
44 |   if (svptest_any (want_recip, want_recip))
45 |     acc = svdivr_m (want_recip, acc, 1.0);
46 | 
47 |   return acc;
48 | }
49 | CLOSE_SVE_ATTR
50 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/sve/powif.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision SVE powi(x, n) function.
 3 |  *
 4 |  * Copyright (c) 2020-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | 
10 | /* Optimized single-precision vector powi (float base, integer power).
11 |    powi is developed for environments in which accuracy is of much less
12 |    importance than performance, hence we provide no estimate for worst-case
13 |    error.  */
14 | svfloat32_t
15 | _ZGVsMxvv_powi (svfloat32_t as, svint32_t ns, svbool_t p)
16 | {
17 |   /* Compute powi by successive squaring, right to left.  */
18 |   svfloat32_t acc = sv_f32 (1.f);
19 |   svbool_t want_recip = svcmplt (p, ns, 0);
20 |   svuint32_t ns_abs = svreinterpret_u32 (svabs_x (p, ns));
21 | 
22 |   /* We use a max to avoid needing to check whether any lane != 0 on each
23 |      iteration.  */
24 |   uint32_t max_n = svmaxv (p, ns_abs);
25 | 
26 |   svfloat32_t c = as;
27 |   /* Successively square c, and use merging predication (_m) to determine
28 |      whether or not to perform the multiplication or keep the previous
29 |      iteration.  */
30 |   while (true)
31 |     {
32 |       svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1), 1);
33 |       acc = svmul_m (px, acc, c);
34 |       max_n >>= 1;
35 |       if (max_n == 0)
36 | 	break;
37 | 
38 |       ns_abs = svlsr_x (p, ns_abs, 1);
39 |       c = svmul_x (p, c, c);
40 |     }
41 | 
42 |   /* Negative powers are handled by computing the abs(n) version and then
43 |      taking the reciprocal.  */
44 |   if (svptest_any (want_recip, want_recip))
45 |     acc = svdivr_m (want_recip, acc, 1.0f);
46 | 
47 |   return acc;
48 | }
49 | CLOSE_SVE_ATTR
50 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/sve/sv_expf_inline.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SVE helper for single-precision routines which calculate exp(x) and do
 3 |  * not need special-case handling
 4 |  *
 5 |  * Copyright (c) 2025, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #ifndef MATH_SV_EXPF_INLINE_H
10 | #define MATH_SV_EXPF_INLINE_H
11 | 
12 | #include "sv_math.h"
13 | #include "test_sig.h"
14 | #include "test_defs.h"
15 | 
16 | struct sv_expf_data
17 | {
18 |   float ln2, inv_ln2, shift, null;
19 | };
20 | 
21 | #define SV_EXPF_DATA                                                          \
22 |   {                                                                           \
23 |     /* Shift is 1.5*2^17 + 127.  */                                           \
24 |     .shift = 0x1.803f8p17f, .inv_ln2 = 0x1.715476p+0f, .ln2 = 0x1.62e43p-1f,  \
25 |   }
26 | 
27 | static inline svfloat32_t
28 | expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
29 | {
30 |   /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
31 |      x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
32 | 
33 |   svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2);
34 | 
35 |   /* n = round(x/(ln2/N)).  */
36 |   svfloat32_t z = svmla_lane (sv_f32 (d->shift), x, lane_consts, 1);
37 |   svfloat32_t n = svsub_x (pg, z, d->shift);
38 | 
39 |   /* r = x - n*ln2/N.  */
40 |   svfloat32_t r = svmls_lane (x, n, lane_consts, 0);
41 | 
42 |   /* scale = 2^(n/N).  */
43 |   svfloat32_t scale = svexpa (svreinterpret_u32 (z));
44 | 
45 |   /* poly(r) = exp(r) - 1 ~= r.  */
46 |   return svmla_x (pg, scale, scale, r);
47 | }
48 | 
49 | #endif // MATH_SV_EXPF_INLINE_H
50 | 


--------------------------------------------------------------------------------
/math/aarch64/experimental/tanf_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Data used in single-precision tan(x) function.
 3 |  *
 4 |  * Copyright (c) 2022-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | const struct tanf_poly_data __tanf_poly_data = {
11 | .poly_tan = {
12 | /* Coefficients generated using:
13 |    poly = fpminimax((tan(sqrt(x))-sqrt(x))/x^(3/2), deg, [|single ...|], [a*a;b*b]);
14 |    optimize relative error
15 |    final prec : 23 bits
16 |    deg : 5
17 |    a : 0x1p-126 ^ 2
18 |    b : ((pi) / 0x1p2) ^ 2
19 |    dirty rel error: 0x1.f7c2e4p-25
20 |    dirty abs error: 0x1.f7c2ecp-25.  */
21 | 0x1.55555p-2,
22 | 0x1.11166p-3,
23 | 0x1.b88a78p-5,
24 | 0x1.7b5756p-6,
25 | 0x1.4ef4cep-8,
26 | 0x1.0e1e74p-7
27 | },
28 | .poly_cotan = {
29 | /* Coefficients generated using:
30 |    fpminimax(f(x) = (0x1p0 / tan(sqrt(x)) - 0x1p0 / sqrt(x)) / sqrt(x), deg, [|dtype ...|], [a;b])
31 |    optimize a single polynomial
32 |    optimize absolute error
33 |    final prec : 23 bits
34 |    working prec : 128 bits
35 |    deg : 3
36 |    a : 0x1p-126
37 |    b : (pi) / 0x1p2
38 |    dirty rel error : 0x1.81298cp-25
39 |    dirty abs error : 0x1.a8acf4p-25.  */
40 | -0x1.55555p-2, /* -0.33333325.  */
41 | -0x1.6c23e4p-6, /* -2.2225354e-2.  */
42 | -0x1.12dbap-9, /* -2.0969994e-3.  */
43 | -0x1.05a1c2p-12, /* -2.495116e-4.  */
44 | }
45 | };
46 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/acoshf.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision SVE acosh(x) function.
 3 |  * Copyright (c) 2023-2024, Arm Limited.
 4 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 |  */
 6 | 
 7 | #include "sv_math.h"
 8 | #include "test_sig.h"
 9 | #include "test_defs.h"
10 | 
11 | #define One 0x3f800000
12 | #define Thres 0x20000000 /* asuint(0x1p64) - One.  */
13 | 
14 | #include "sv_log1pf_inline.h"
15 | 
16 | static svfloat32_t NOINLINE
17 | special_case (svfloat32_t xm1, svfloat32_t tmp, svbool_t special)
18 | {
19 |   svfloat32_t x = svadd_x (svptrue_b32 (), xm1, 1.0f);
20 |   svfloat32_t y = sv_log1pf_inline (tmp, svptrue_b32 ());
21 |   return sv_call_f32 (acoshf, x, y, special);
22 | }
23 | 
24 | /* Single-precision SVE acosh(x) routine. Implements the same algorithm as
25 |    vector acoshf and log1p.
26 | 
27 |    Maximum error is 2.47 ULPs:
28 |    SV_NAME_F1 (acosh) (0x1.01ca76p+0) got 0x1.e435a6p-4
29 | 				     want 0x1.e435a2p-4.  */
30 | svfloat32_t SV_NAME_F1 (acosh) (svfloat32_t x, const svbool_t pg)
31 | {
32 |   svuint32_t ix = svreinterpret_u32 (x);
33 |   svbool_t special = svcmpge (pg, svsub_x (pg, ix, One), Thres);
34 | 
35 |   svfloat32_t xm1 = svsub_x (pg, x, 1.0f);
36 |   svfloat32_t u = svmul_x (pg, xm1, svadd_x (pg, x, 1.0f));
37 |   svfloat32_t tmp = svadd_x (pg, xm1, svsqrt_x (pg, u));
38 | 
39 |   if (unlikely (svptest_any (pg, special)))
40 |     return special_case (xm1, tmp, special);
41 |   return sv_log1pf_inline (tmp, pg);
42 | }
43 | 
44 | TEST_SIG (SV, F, 1, acosh, 1.0, 10.0)
45 | TEST_ULP (SV_NAME_F1 (acosh), 1.97)
46 | TEST_DISABLE_FENV (SV_NAME_F1 (acosh))
47 | TEST_INTERVAL (SV_NAME_F1 (acosh), 0, 1, 500)
48 | TEST_INTERVAL (SV_NAME_F1 (acosh), 1, 0x1p64, 100000)
49 | TEST_INTERVAL (SV_NAME_F1 (acosh), 0x1p64, inf, 1000)
50 | TEST_INTERVAL (SV_NAME_F1 (acosh), -0, -inf, 1000)
51 | CLOSE_SVE_ATTR
52 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/cexpi.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision vector cexpi function.
 3 |  *
 4 |  * Copyright (c) 2023-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "sv_sincos_common.h"
10 | #include "test_defs.h"
11 | 
12 | static svfloat64x2_t NOINLINE
13 | special_case (svfloat64_t x, svbool_t special, svfloat64x2_t y)
14 | {
15 |   return svcreate2 (sv_call_f64 (sin, x, svget2 (y, 0), special),
16 | 		    sv_call_f64 (cos, x, svget2 (y, 1), special));
17 | }
18 | 
19 | /* Double-precision vector function allowing calculation of both sin and cos in
20 |    one function call, using shared argument reduction and separate polynomials.
21 |    Largest observed error is for sin, 3.22 ULP:
22 |    sv_cexpi_sin (0x1.d70eef40f39b1p+12) got -0x1.ffe9537d5dbb7p-3
23 | 				       want -0x1.ffe9537d5dbb4p-3.  */
24 | svfloat64x2_t
25 | _ZGVsMxv_cexpi (svfloat64_t x, svbool_t pg)
26 | {
27 |   const struct sv_sincos_data *d = ptr_barrier (&sv_sincos_data);
28 |   svbool_t special = check_ge_rangeval (pg, x, d);
29 | 
30 |   svfloat64x2_t sc = sv_sincos_inline (pg, x, d);
31 | 
32 |   if (unlikely (svptest_any (pg, special)))
33 |     return special_case (x, special, sc);
34 |   return sc;
35 | }
36 | 
37 | TEST_DISABLE_FENV (_ZGVsMxv_cexpi_sin)
38 | TEST_DISABLE_FENV (_ZGVsMxv_cexpi_cos)
39 | TEST_ULP (_ZGVsMxv_cexpi_sin, 2.73)
40 | TEST_ULP (_ZGVsMxv_cexpi_cos, 2.73)
41 | #define SV_CEXPI_INTERVAL(lo, hi, n)                                          \
42 |   TEST_INTERVAL (_ZGVsMxv_cexpi_sin, lo, hi, n)                               \
43 |   TEST_INTERVAL (_ZGVsMxv_cexpi_cos, lo, hi, n)
44 | SV_CEXPI_INTERVAL (0, 0x1p23, 500000)
45 | SV_CEXPI_INTERVAL (-0, -0x1p23, 500000)
46 | SV_CEXPI_INTERVAL (0x1p23, inf, 10000)
47 | SV_CEXPI_INTERVAL (-0x1p23, -inf, 10000)
48 | CLOSE_SVE_ATTR
49 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/cexpif.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision vector cexpi function.
 3 |  *
 4 |  * Copyright (c) 2023-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "sv_sincosf_common.h"
10 | #include "test_defs.h"
11 | 
12 | static svfloat32x2_t NOINLINE
13 | special_case (svfloat32_t x, svbool_t special, svfloat32x2_t y)
14 | {
15 |   return svcreate2 (sv_call_f32 (sinf, x, svget2 (y, 0), special),
16 | 		    sv_call_f32 (cosf, x, svget2 (y, 1), special));
17 | }
18 | 
19 | /* Single-precision vector function allowing calculation of both sin and cos in
20 |    one function call, using shared argument reduction and separate low-order
21 |    polynomials.
22 |    Worst-case error for sin is 1.67 ULP:
23 |    v_cexpif_sin(0x1.c704c4p+19) got 0x1.fff698p-5 want 0x1.fff69cp-5
24 |    Worst-case error for cos is 1.81 ULP:
25 |    v_cexpif_cos(0x1.e506fp+19) got -0x1.ffec6ep-6 want -0x1.ffec72p-6.  */
26 | svfloat32x2_t
27 | _ZGVsMxv_cexpif (svfloat32_t x, svbool_t pg)
28 | {
29 |   const struct sv_sincosf_data *d = ptr_barrier (&sv_sincosf_data);
30 |   svbool_t special = check_ge_rangeval (pg, x, d);
31 | 
32 |   svfloat32x2_t sc = sv_sincosf_inline (pg, x, d);
33 | 
34 |   if (unlikely (svptest_any (pg, special)))
35 |     return special_case (x, special, sc);
36 |   return sc;
37 | }
38 | 
39 | TEST_DISABLE_FENV (_ZGVsMxv_cexpif_sin)
40 | TEST_DISABLE_FENV (_ZGVsMxv_cexpif_cos)
41 | TEST_ULP (_ZGVsMxv_cexpif_sin, 1.17)
42 | TEST_ULP (_ZGVsMxv_cexpif_cos, 1.31)
43 | #define SV_CEXPIF_INTERVAL(lo, hi, n)                                         \
44 |   TEST_INTERVAL (_ZGVsMxv_cexpif_sin, lo, hi, n)                              \
45 |   TEST_INTERVAL (_ZGVsMxv_cexpif_cos, lo, hi, n)
46 | SV_CEXPIF_INTERVAL (0, 0x1p20, 500000)
47 | SV_CEXPIF_INTERVAL (-0, -0x1p20, 500000)
48 | SV_CEXPIF_INTERVAL (0x1p20, inf, 10000)
49 | SV_CEXPIF_INTERVAL (-0x1p20, -inf, 10000)
50 | CLOSE_SVE_ATTR
51 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/expf.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision vector e^x function.
 3 |  *
 4 |  * Copyright (c) 2019-2025, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "test_sig.h"
10 | #include "test_defs.h"
11 | #include "sv_expf_inline.h"
12 | 
13 | /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
14 |    correctly by FEXPA.  */
15 | #define Thres 0x1.5d5e2ap+6f
16 | 
17 | static const struct data
18 | {
19 |   struct sv_expf_data d;
20 |   float thres;
21 | } data = {
22 |   .d = SV_EXPF_DATA,
23 |   .thres = Thres,
24 | };
25 | 
26 | static svfloat32_t NOINLINE
27 | special_case (svfloat32_t x, svbool_t special, const struct sv_expf_data *d)
28 | {
29 |   return sv_call_f32 (expf, x, expf_inline (x, svptrue_b32 (), d), special);
30 | }
31 | 
32 | /* Optimised single-precision SVE exp function.
33 |    Worst-case error is 0.88 +0.50 ULP:
34 |    _ZGVsMxv_expf(-0x1.bba276p-6) got 0x1.f25288p-1
35 | 				want 0x1.f2528ap-1.  */
36 | svfloat32_t SV_NAME_F1 (exp) (svfloat32_t x, const svbool_t pg)
37 | {
38 |   const struct data *d = ptr_barrier (&data);
39 |   svbool_t is_special_case = svacgt (pg, x, d->thres);
40 |   if (unlikely (svptest_any (pg, is_special_case)))
41 |     return special_case (x, is_special_case, &d->d);
42 |   return expf_inline (x, pg, &d->d);
43 | }
44 | 
45 | TEST_SIG (SV, F, 1, exp, -9.9, 9.9)
46 | TEST_ULP (SV_NAME_F1 (exp), 0.89)
47 | TEST_DISABLE_FENV (SV_NAME_F1 (exp))
48 | TEST_SYM_INTERVAL (SV_NAME_F1 (exp), 0, Thres, 50000)
49 | TEST_SYM_INTERVAL (SV_NAME_F1 (exp), Thres, inf, 50000)
50 | CLOSE_SVE_ATTR
51 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/hypot.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision SVE hypot(x) function.
 3 |  *
 4 |  * Copyright (c) 2023-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "test_sig.h"
10 | #include "test_defs.h"
11 | 
12 | static const struct data
13 | {
14 |   uint64_t tiny_bound, thres;
15 | } data = {
16 |   .tiny_bound = 0x0c80000000000000, /* asuint (0x1p-102).  */
17 |   .thres = 0x7300000000000000,	    /* asuint (inf) - tiny_bound.  */
18 | };
19 | 
20 | static svfloat64_t NOINLINE
21 | special_case (svfloat64_t sqsum, svfloat64_t x, svfloat64_t y, svbool_t pg,
22 | 	      svbool_t special)
23 | {
24 |   return sv_call2_f64 (hypot, x, y, svsqrt_x (pg, sqsum), special);
25 | }
26 | 
27 | /* SVE implementation of double-precision hypot.
28 |    Maximum error observed is 1.21 ULP:
29 |    _ZGVsMxvv_hypot (-0x1.6a22d0412cdd3p+352, 0x1.d3d89bd66fb1ap+330)
30 |     got 0x1.6a22d0412cfp+352
31 |    want 0x1.6a22d0412cf01p+352.  */
32 | svfloat64_t SV_NAME_D2 (hypot) (svfloat64_t x, svfloat64_t y, svbool_t pg)
33 | {
34 |   const struct data *d = ptr_barrier (&data);
35 | 
36 |   svfloat64_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
37 | 
38 |   svbool_t special = svcmpge (
39 |       pg, svsub_x (pg, svreinterpret_u64 (sqsum), d->tiny_bound), d->thres);
40 | 
41 |   if (unlikely (svptest_any (pg, special)))
42 |     return special_case (sqsum, x, y, pg, special);
43 |   return svsqrt_x (pg, sqsum);
44 | }
45 | 
46 | TEST_SIG (SV, D, 2, hypot, -10.0, 10.0)
47 | TEST_ULP (SV_NAME_D2 (hypot), 0.71)
48 | TEST_DISABLE_FENV (SV_NAME_D2 (hypot))
49 | TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, 0, inf, 10000)
50 | TEST_INTERVAL2 (SV_NAME_D2 (hypot), 0, inf, -0, -inf, 10000)
51 | TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, 0, inf, 10000)
52 | TEST_INTERVAL2 (SV_NAME_D2 (hypot), -0, -inf, -0, -inf, 10000)
53 | CLOSE_SVE_ATTR
54 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/hypotf.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision SVE hypot(x) function.
 3 |  *
 4 |  * Copyright (c) 2023-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "test_sig.h"
10 | #include "test_defs.h"
11 | 
12 | #define TinyBound 0x0c800000 /* asuint (0x1p-102).  */
13 | #define Thres 0x73000000     /* 0x70000000 - TinyBound.  */
14 | 
15 | static svfloat32_t NOINLINE
16 | special_case (svfloat32_t sqsum, svfloat32_t x, svfloat32_t y, svbool_t pg,
17 | 	      svbool_t special)
18 | {
19 |   return sv_call2_f32 (hypotf, x, y, svsqrt_x (pg, sqsum), special);
20 | }
21 | 
22 | /* SVE implementation of single-precision hypot.
23 |    Maximum error observed is 1.21 ULP:
24 |    _ZGVsMxvv_hypotf (0x1.6a213cp-19, -0x1.32b982p-26) got 0x1.6a2346p-19
25 | 						     want 0x1.6a2344p-19.  */
26 | svfloat32_t SV_NAME_F2 (hypot) (svfloat32_t x, svfloat32_t y,
27 | 				const svbool_t pg)
28 | {
29 |   svfloat32_t sqsum = svmla_x (pg, svmul_x (pg, x, x), y, y);
30 | 
31 |   svbool_t special = svcmpge (
32 |       pg, svsub_x (pg, svreinterpret_u32 (sqsum), TinyBound), Thres);
33 | 
34 |   if (unlikely (svptest_any (pg, special)))
35 |     return special_case (sqsum, x, y, pg, special);
36 | 
37 |   return svsqrt_x (pg, sqsum);
38 | }
39 | 
40 | TEST_SIG (SV, F, 2, hypot, -10.0, 10.0)
41 | TEST_ULP (SV_NAME_F2 (hypot), 0.71)
42 | TEST_DISABLE_FENV (SV_NAME_F2 (hypot))
43 | TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, 0, inf, 10000)
44 | TEST_INTERVAL2 (SV_NAME_F2 (hypot), 0, inf, -0, -inf, 10000)
45 | TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, 0, inf, 10000)
46 | TEST_INTERVAL2 (SV_NAME_F2 (hypot), -0, -inf, -0, -inf, 10000)
47 | CLOSE_SVE_ATTR
48 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/log1pf.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision vector log(x + 1) function.
 3 |  *
 4 |  * Copyright (c) 2023-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "test_sig.h"
10 | #include "test_defs.h"
11 | #include "sv_log1pf_inline.h"
12 | 
13 | static svfloat32_t NOINLINE
14 | special_case (svfloat32_t x, svbool_t special)
15 | {
16 |   return sv_call_f32 (log1pf, x, sv_log1pf_inline (x, svptrue_b32 ()),
17 | 		      special);
18 | }
19 | 
20 | /* Vector log1pf approximation using polynomial on reduced interval. Worst-case
21 |    error is 1.27 ULP very close to 0.5.
22 |    _ZGVsMxv_log1pf(0x1.fffffep-2) got 0x1.9f324p-2
23 | 				 want 0x1.9f323ep-2.  */
24 | svfloat32_t SV_NAME_F1 (log1p) (svfloat32_t x, svbool_t pg)
25 | {
26 |   /* x < -1, Inf/Nan.  */
27 |   svbool_t special = svcmpeq (pg, svreinterpret_u32 (x), 0x7f800000);
28 |   special = svorn_z (pg, special, svcmpge (pg, x, -1));
29 | 
30 |   if (unlikely (svptest_any (pg, special)))
31 |     return special_case (x, special);
32 | 
33 |   return sv_log1pf_inline (x, pg);
34 | }
35 | 
36 | TEST_SIG (SV, F, 1, log1p, -0.9, 10.0)
37 | TEST_ULP (SV_NAME_F1 (log1p), 0.77)
38 | TEST_DISABLE_FENV (SV_NAME_F1 (log1p))
39 | TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0, 0x1p-23, 5000)
40 | TEST_SYM_INTERVAL (SV_NAME_F1 (log1p), 0x1p-23, 1, 5000)
41 | TEST_INTERVAL (SV_NAME_F1 (log1p), 1, inf, 10000)
42 | TEST_INTERVAL (SV_NAME_F1 (log1p), -1, -inf, 10)
43 | CLOSE_SVE_ATTR
44 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/modf.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision SVE modf(x, *y) function.
 3 |  *
 4 |  * Copyright (c) 2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "test_sig.h"
10 | #include "test_defs.h"
11 | 
12 | /* Modf algorithm. Produces exact values in all rounding modes.  */
13 | svfloat64_t SV_NAME_D1_L1 (modf) (svfloat64_t x, double *out_int,
14 | 				  const svbool_t pg)
15 | {
16 |   /* Get integer component of x.  */
17 |   svfloat64_t fint_comp = svrintz_x (pg, x);
18 | 
19 |   svst1_f64 (pg, out_int, fint_comp);
20 | 
21 |   /* Subtract integer component from input.  */
22 |   svfloat64_t remaining = svsub_f64_x (svptrue_b64 (), x, fint_comp);
23 | 
24 |   /* Return +0 for integer x.  */
25 |   svbool_t is_integer = svcmpeq (pg, x, fint_comp);
26 |   return svsel (is_integer, sv_f64 (0), remaining);
27 | }
28 | 
29 | TEST_ULP (_ZGVsMxvl8_modf_frac, 0.0)
30 | TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_frac, 0, 1, 20000)
31 | TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_frac, 1, inf, 20000)
32 | 
33 | TEST_ULP (_ZGVsMxvl8_modf_int, 0.0)
34 | TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_int, 0, 1, 20000)
35 | TEST_SYM_INTERVAL (_ZGVsMxvl8_modf_int, 1, inf, 20000)
36 | CLOSE_SVE_ATTR
37 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/modff.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision SVE modff(x, *y) function.
 3 |  *
 4 |  * Copyright (c) 2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "test_sig.h"
10 | #include "test_defs.h"
11 | 
12 | /* Modff algorithm. Produces exact values in all rounding modes.  */
13 | svfloat32_t SV_NAME_F1_L1 (modf) (svfloat32_t x, float *out_int,
14 | 				  const svbool_t pg)
15 | {
16 |   /* Get integer component of x.  */
17 |   svfloat32_t fint_comp = svrintz_x (pg, x);
18 | 
19 |   svst1_f32 (pg, out_int, fint_comp);
20 | 
21 |   /* Subtract integer component from input.  */
22 |   svfloat32_t remaining = svsub_f32_x (svptrue_b32 (), x, fint_comp);
23 | 
24 |   /* Return +0 for integer x.  */
25 |   svbool_t is_integer = svcmpeq (pg, x, fint_comp);
26 |   return svsel (is_integer, sv_f32 (0), remaining);
27 | }
28 | 
29 | TEST_ULP (_ZGVsMxvl4_modff_frac, 0.0)
30 | TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_frac, 0, 1, 20000)
31 | TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_frac, 1, inf, 20000)
32 | 
33 | TEST_ULP (_ZGVsMxvl4_modff_int, 0.0)
34 | TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_int, 0, 1, 20000)
35 | TEST_SYM_INTERVAL (_ZGVsMxvl4_modff_int, 1, inf, 20000)
36 | CLOSE_SVE_ATTR
37 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/sincospi.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision SVE sincospi(x, *y, *z) function.
 3 |  *
 4 |  * Copyright (c) 2024-2025, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "test_defs.h"
10 | #include "mathlib.h"
11 | #include "sv_sincospi_common.h"
12 | 
13 | /* Double-precision vector function allowing calculation of both sinpi and
14 |    cospi in one function call, using shared argument reduction and polynomials.
15 |     Worst-case error for sin is 3.09 ULP:
16 |     _ZGVsMxvl8l8_sincospi_sin(0x1.7a41deb4b21e1p+14) got 0x1.fd54d0b327cf1p-1
17 | 						    want 0x1.fd54d0b327cf4p-1.
18 |    Worst-case error for sin is 3.16 ULP:
19 |     _ZGVsMxvl8l8_sincospi_cos(-0x1.11e3c7e284adep-5) got 0x1.fd2da484ff3ffp-1
20 | 						    want 0x1.fd2da484ff402p-1.
21 |  */
22 | void
23 | _ZGVsMxvl8l8_sincospi (svfloat64_t x, double *out_sin, double *out_cos,
24 | 		       svbool_t pg)
25 | {
26 |   const struct sv_sincospi_data *d = ptr_barrier (&sv_sincospi_data);
27 | 
28 |   svfloat64x2_t sc = sv_sincospi_inline (pg, x, d);
29 | 
30 |   svst1 (pg, out_sin, svget2 (sc, 0));
31 |   svst1 (pg, out_cos, svget2 (sc, 1));
32 | }
33 | 
34 | #if WANT_C23_TESTS
35 | TEST_DISABLE_FENV (_ZGVsMxvl8l8_sincospi_sin)
36 | TEST_DISABLE_FENV (_ZGVsMxvl8l8_sincospi_cos)
37 | TEST_ULP (_ZGVsMxvl8l8_sincospi_sin, 2.59)
38 | TEST_ULP (_ZGVsMxvl8l8_sincospi_cos, 2.66)
39 | #  define SV_SINCOSPI_INTERVAL(lo, hi, n)                                     \
40 |     TEST_SYM_INTERVAL (_ZGVsMxvl8l8_sincospi_sin, lo, hi, n)                  \
41 |     TEST_SYM_INTERVAL (_ZGVsMxvl8l8_sincospi_cos, lo, hi, n)
42 | SV_SINCOSPI_INTERVAL (0, 0x1p-63, 10000)
43 | SV_SINCOSPI_INTERVAL (0x1p-63, 0.5, 50000)
44 | SV_SINCOSPI_INTERVAL (0.5, 0x1p53, 50000)
45 | SV_SINCOSPI_INTERVAL (0x1p53, inf, 10000)
46 | #endif
47 | CLOSE_SVE_ATTR
48 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/sincospif.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision SVE sincospi(x, *y, *z) function.
 3 |  *
 4 |  * Copyright (c) 2024-2025, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "sv_math.h"
 9 | #include "test_defs.h"
10 | #include "mathlib.h"
11 | #include "sv_sincospif_common.h"
12 | 
13 | /* Single-precision vector function allowing calculation of both sinpi and
14 |    cospi in one function call, using shared argument reduction and polynomials.
15 |    Worst-case error for sin is 3.04 ULP:
16 |    _ZGVsMxvl4l4_sincospif_sin(0x1.b51b8p-2) got 0x1.f28b5ep-1 want
17 |    0x1.f28b58p-1.
18 |    Worst-case error for cos is 3.18 ULP:
19 |    _ZGVsMxvl4l4_sincospif_cos(0x1.d341a8p-5) got 0x1.f7cd56p-1 want
20 |    0x1.f7cd5p-1.  */
21 | void
22 | _ZGVsMxvl4l4_sincospif (svfloat32_t x, float *out_sin, float *out_cos,
23 | 			svbool_t pg)
24 | {
25 |   const struct sv_sincospif_data *d = ptr_barrier (&sv_sincospif_data);
26 | 
27 |   svfloat32x2_t sc = sv_sincospif_inline (pg, x, d);
28 | 
29 |   svst1 (pg, out_sin, svget2 (sc, 0));
30 |   svst1 (pg, out_cos, svget2 (sc, 1));
31 | }
32 | 
33 | #if WANT_C23_TESTS
34 | TEST_DISABLE_FENV (_ZGVsMxvl4l4_sincospif_sin)
35 | TEST_DISABLE_FENV (_ZGVsMxvl4l4_sincospif_cos)
36 | TEST_ULP (_ZGVsMxvl4l4_sincospif_sin, 2.54)
37 | TEST_ULP (_ZGVsMxvl4l4_sincospif_cos, 2.68)
38 | #  define SV_SINCOSPIF_INTERVAL(lo, hi, n)                                    \
39 |     TEST_SYM_INTERVAL (_ZGVsMxvl4l4_sincospif_sin, lo, hi, n)                 \
40 |     TEST_SYM_INTERVAL (_ZGVsMxvl4l4_sincospif_cos, lo, hi, n)
41 | SV_SINCOSPIF_INTERVAL (0, 0x1p-31, 10000)
42 | SV_SINCOSPIF_INTERVAL (0x1p-31, 0.5, 50000)
43 | SV_SINCOSPIF_INTERVAL (0.5, 0x1p31, 50000)
44 | SV_SINCOSPIF_INTERVAL (0x1p31, inf, 10000)
45 | #endif
46 | CLOSE_SVE_ATTR
47 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/sv_expf_inline.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SVE helper for single-precision routines which calculate exp(x) and do
 3 |  * not need special-case handling
 4 |  *
 5 |  * Copyright (c) 2023-2025, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #ifndef MATH_SV_EXPF_INLINE_H
10 | #define MATH_SV_EXPF_INLINE_H
11 | 
12 | #include "sv_math.h"
13 | #include "test_sig.h"
14 | #include "test_defs.h"
15 | 
16 | struct sv_expf_data
17 | {
18 |   float ln2_hi, ln2_lo, c1, null;
19 |   float inv_ln2, shift;
20 | };
21 | 
22 | /* Shift is 1.5*2^17 + 127.  */
23 | #define SV_EXPF_DATA                                                          \
24 |   {                                                                           \
25 |     .c1 = 0.5f, .inv_ln2 = 0x1.715476p+0f, .ln2_hi = 0x1.62e4p-1f,            \
26 |     .ln2_lo = 0x1.7f7d1cp-20f, .shift = 0x1.803f8p17f,                        \
27 |   }
28 | 
29 | static inline svfloat32_t
30 | expf_inline (svfloat32_t x, const svbool_t pg, const struct sv_expf_data *d)
31 | {
32 |   /* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
33 |      x = ln2*n + r, with r in [-ln2/2, ln2/2].  */
34 | 
35 |   svfloat32_t lane_consts = svld1rq (svptrue_b32 (), &d->ln2_hi);
36 | 
37 |   /* n = round(x/(ln2/N)).  */
38 |   svfloat32_t z = svmad_x (pg, sv_f32 (d->inv_ln2), x, d->shift);
39 |   svfloat32_t n = svsub_x (pg, z, d->shift);
40 | 
41 |   /* r = x - n*ln2/N.  */
42 |   svfloat32_t r = x;
43 |   r = svmls_lane (r, n, lane_consts, 0);
44 |   r = svmls_lane (r, n, lane_consts, 1);
45 | 
46 |   /* scale = 2^(n/N).  */
47 |   svfloat32_t scale = svexpa (svreinterpret_u32 (z));
48 | 
49 |   /* poly(r) = exp(r) - 1 ~= r + 0.5 r^2.  */
50 |   svfloat32_t r2 = svmul_x (svptrue_b32 (), r, r);
51 |   svfloat32_t poly = svmla_lane (r, r2, lane_consts, 2);
52 | 
53 |   return svmla_x (pg, scale, scale, poly);
54 | }
55 | 
56 | #endif // MATH_SV_EXPF_INLINE_H
57 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/sv_poly_f32.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Helpers for evaluating polynomials on single-precision SVE input, using
 3 |  * various schemes.
 4 |  *
 5 |  * Copyright (c) 2023-2025, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #ifndef MATH_POLY_SVE_F32_H
10 | #define MATH_POLY_SVE_F32_H
11 | 
12 | #include <arm_sve.h>
13 | 
14 | /* Wrap SVE f32 helpers: evaluation of some scheme/order has form:
15 |    sv_[scheme]_[order]_f32_x.  */
16 | #define PTRUE svptrue_b32 ()
17 | #define VTYPE svfloat32_t
18 | #define STYPE float
19 | #define VWRAP(f) sv_##f##_f32_x
20 | #define DUP svdup_f32
21 | #include "sv_poly_generic.h"
22 | #undef DUP
23 | #undef VWRAP
24 | #undef STYPE
25 | #undef VTYPE
26 | #undef PTRUE
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/math/aarch64/sve/sv_poly_f64.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Helpers for evaluating polynomials on double-precision SVE input, using
 3 |  * various schemes.
 4 |  *
 5 |  * Copyright (c) 2023-2025, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #ifndef MATH_POLY_SVE_F64_H
10 | #define MATH_POLY_SVE_F64_H
11 | 
12 | #include <arm_sve.h>
13 | 
14 | /* Wrap SVE f64 helpers: evaluation of some scheme/order has form:
15 |    sv_[scheme]_[order]_f64_x.  */
16 | #define PTRUE svptrue_b64 ()
17 | #define VTYPE svfloat64_t
18 | #define STYPE double
19 | #define VWRAP(f) sv_##f##_f64_x
20 | #define DUP svdup_f64
21 | #include "sv_poly_generic.h"
22 | #undef DUP
23 | #undef VWRAP
24 | #undef STYPE
25 | #undef VTYPE
26 | #undef PTRUE
27 | 
28 | #endif
29 | 


--------------------------------------------------------------------------------
/math/erff_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Data for approximation of erff.
 3 |  *
 4 |  * Copyright (c) 2019-2020, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | /* Minimax approximation of erff. */
11 | const struct erff_data __erff_data = {
12 | .erff_poly_A = {
13 | 0x1.06eba6p-03f, -0x1.8126e0p-02f, 0x1.ce1a46p-04f,
14 | -0x1.b68bd2p-06f, 0x1.473f48p-08f, -0x1.3a1a82p-11f
15 | },
16 | .erff_poly_B = {
17 | 0x1.079d0cp-3f, 0x1.450aa0p-1f, 0x1.b55cb0p-4f,
18 | -0x1.8d6300p-6f, 0x1.fd1336p-9f, -0x1.91d2ccp-12f,
19 | 0x1.222900p-16f
20 | }
21 | };
22 | 
23 | 


--------------------------------------------------------------------------------
/math/include/test_defs.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Helper macros for emitting various details about routines for consumption by
 3 |  * runulp.sh. This version of the file is for inclusion when building routines,
 4 |  * so expansions are empty - see math/test/test_defs for versions used by the
 5 |  * build system.
 6 |  *
 7 |  * Copyright (c) 2024, Arm Limited.
 8 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
 9 |  */
10 | 
11 | #define TEST_ULP(f, l)
12 | #define TEST_ULP_NONNEAREST(f, l)
13 | 
14 | #define TEST_DISABLE_FENV(f)
15 | #define TEST_DISABLE_FENV_IF_NOT(f, e)
16 | 
17 | #define TEST_INTERVAL(f, lo, hi, n)
18 | #define TEST_SYM_INTERVAL(f, lo, hi, n)
19 | #define TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)
20 | 
21 | #define TEST_CONTROL_VALUE(f, c)
22 | 


--------------------------------------------------------------------------------
/math/log2f_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Data definition for log2f.
 3 |  *
 4 |  * Copyright (c) 2017-2018, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | const struct log2f_data __log2f_data = {
11 |   .tab = {
12 |   { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
13 |   { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
14 |   { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
15 |   { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
16 |   { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
17 |   { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
18 |   { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
19 |   { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
20 |   { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
21 |   { 0x1p+0, 0x0p+0 },
22 |   { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
23 |   { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
24 |   { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
25 |   { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
26 |   { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
27 |   { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
28 |   },
29 |   .poly = {
30 |   -0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
31 |   0x1.715475f35c8b8p0,
32 |   }
33 | };
34 | 


--------------------------------------------------------------------------------
/math/logf_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Data definition for logf.
 3 |  *
 4 |  * Copyright (c) 2017-2024, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | const struct logf_data __logf_data = {
11 |   .tab = {
12 |   { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
13 |   { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
14 |   { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
15 |   { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
16 |   { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
17 |   { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
18 |   { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
19 |   { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
20 |   { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
21 |   { 0x1p+0, 0x0p+0 },
22 |   { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
23 |   { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
24 |   { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
25 |   { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
26 |   { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
27 |   { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
28 |   },
29 |   .ln2 = 0x1.62e42fefa39efp-1,
30 |   .invln10 = 0x1.bcb7b1526e50ep-2,
31 |   .poly = {
32 |   -0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
33 |   }
34 | };
35 | 


--------------------------------------------------------------------------------
/math/math_err.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Double-precision math error handling.
 3 |  *
 4 |  * Copyright (c) 2018, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | #if WANT_ERRNO
11 | #include <errno.h>
12 | /* NOINLINE reduces code size and avoids making math functions non-leaf
13 |    when the error handling is inlined.  */
14 | NOINLINE static double
15 | with_errno (double y, int e)
16 | {
17 |   errno = e;
18 |   return y;
19 | }
20 | #else
21 | #define with_errno(x, e) (x)
22 | #endif
23 | 
24 | /* NOINLINE reduces code size.  */
25 | NOINLINE static double
26 | xflow (uint32_t sign, double y)
27 | {
28 |   y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
29 |   return with_errno (y, ERANGE);
30 | }
31 | 
32 | HIDDEN double
33 | __math_uflow (uint32_t sign)
34 | {
35 |   return xflow (sign, 0x1p-767);
36 | }
37 | 
38 | #if WANT_ERRNO_UFLOW
39 | /* Underflows to zero in some non-nearest rounding mode, setting errno
40 |    is valid even if the result is non-zero, but in the subnormal range.  */
41 | HIDDEN double
42 | __math_may_uflow (uint32_t sign)
43 | {
44 |   return xflow (sign, 0x1.8p-538);
45 | }
46 | #endif
47 | 
48 | HIDDEN double
49 | __math_oflow (uint32_t sign)
50 | {
51 |   return xflow (sign, 0x1p769);
52 | }
53 | 
54 | HIDDEN double
55 | __math_divzero (uint32_t sign)
56 | {
57 |   double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
58 |   return with_errno (y, ERANGE);
59 | }
60 | 
61 | HIDDEN double
62 | __math_invalid (double x)
63 | {
64 |   double y = (x - x) / (x - x);
65 |   return isnan (x) ? y : with_errno (y, EDOM);
66 | }
67 | 
68 | /* Check result and set errno if necessary.  */
69 | 
70 | HIDDEN double
71 | __math_check_uflow (double y)
72 | {
73 |   return y == 0.0 ? with_errno (y, ERANGE) : y;
74 | }
75 | 
76 | HIDDEN double
77 | __math_check_oflow (double y)
78 | {
79 |   return isinf (y) ? with_errno (y, ERANGE) : y;
80 | }
81 | 


--------------------------------------------------------------------------------
/math/math_errf.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Single-precision math error handling.
 3 |  *
 4 |  * Copyright (c) 2017-2020, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | #if WANT_ERRNO
11 | #include <errno.h>
12 | /* NOINLINE reduces code size and avoids making math functions non-leaf
13 |    when the error handling is inlined.  */
14 | NOINLINE static float
15 | with_errnof (float y, int e)
16 | {
17 |   errno = e;
18 |   return y;
19 | }
20 | #else
21 | #define with_errnof(x, e) (x)
22 | #endif
23 | 
24 | /* NOINLINE reduces code size.  */
25 | NOINLINE static float
26 | xflowf (uint32_t sign, float y)
27 | {
28 |   y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
29 |   return with_errnof (y, ERANGE);
30 | }
31 | 
32 | HIDDEN float
33 | __math_uflowf (uint32_t sign)
34 | {
35 |   return xflowf (sign, 0x1p-95f);
36 | }
37 | 
38 | #if WANT_ERRNO_UFLOW
39 | /* Underflows to zero in some non-nearest rounding mode, setting errno
40 |    is valid even if the result is non-zero, but in the subnormal range.  */
41 | HIDDEN float
42 | __math_may_uflowf (uint32_t sign)
43 | {
44 |   return xflowf (sign, 0x1.4p-75f);
45 | }
46 | #endif
47 | 
48 | HIDDEN float
49 | __math_oflowf (uint32_t sign)
50 | {
51 |   return xflowf (sign, 0x1p97f);
52 | }
53 | 
54 | HIDDEN float
55 | __math_divzerof (uint32_t sign)
56 | {
57 |   float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
58 |   return with_errnof (y, ERANGE);
59 | }
60 | 
61 | HIDDEN float
62 | __math_invalidf (float x)
63 | {
64 |   float y = (x - x) / (x - x);
65 |   return isnan (x) ? y : with_errnof (y, EDOM);
66 | }
67 | 
68 | /* Check result and set errno if necessary.  */
69 | 
70 | HIDDEN float
71 | __math_check_uflowf (float y)
72 | {
73 |   return y == 0.0f ? with_errnof (y, ERANGE) : y;
74 | }
75 | 
76 | HIDDEN float
77 | __math_check_oflowf (float y)
78 | {
79 |   return isinf (y) ? with_errnof (y, ERANGE) : y;
80 | }
81 | 


--------------------------------------------------------------------------------
/math/poly_scalar_f32.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Helpers for evaluating polynomials on siongle-precision scalar input, using
 3 |  * various schemes.
 4 |  *
 5 |  * Copyright (c) 2023-2024, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #ifndef MATH_POLY_SCALAR_F32_H
10 | #define MATH_POLY_SCALAR_F32_H
11 | 
12 | #include <math.h>
13 | 
14 | /* Wrap scalar f32 helpers: evaluation of some scheme/order has form:
15 |    [scheme]_[order]_f32.  */
16 | #define VTYPE float
17 | #define FMA fmaf
18 | #define VWRAP(f) f##_f32
19 | #include "poly_generic.h"
20 | #undef VWRAP
21 | #undef FMA
22 | #undef VTYPE
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/math/poly_scalar_f64.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Helpers for evaluating polynomials on double-precision scalar input, using
 3 |  * various schemes.
 4 |  *
 5 |  * Copyright (c) 2023-2024, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 |  */
 8 | 
 9 | #ifndef MATH_POLY_SCALAR_F64_H
10 | #define MATH_POLY_SCALAR_F64_H
11 | 
12 | #include <math.h>
13 | 
14 | /* Wrap scalar f64 helpers: evaluation of some scheme/order has form:
15 |    [scheme]_[order]_f64.  */
16 | #define VTYPE double
17 | #define FMA fma
18 | #define VWRAP(f) f##_f64
19 | #include "poly_generic.h"
20 | #undef VWRAP
21 | #undef FMA
22 | #undef VTYPE
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/math/powf_log2_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Data definition for powf.
 3 |  *
 4 |  * Copyright (c) 2017-2019, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "math_config.h"
 9 | 
10 | const struct powf_log2_data __powf_log2_data = {
11 |   .tab = {
12 |   { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
13 |   { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
14 |   { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
15 |   { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
16 |   { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
17 |   { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
18 |   { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
19 |   { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
20 |   { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
21 |   { 0x1p+0, 0x0p+0 * POWF_SCALE },
22 |   { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
23 |   { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
24 |   { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
25 |   { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
26 |   { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
27 |   { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
28 |   },
29 |   .poly = {
30 |   0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
31 |   0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
32 |   0x1.71547652ab82bp0 * POWF_SCALE,
33 |   }
34 | };
35 | 


--------------------------------------------------------------------------------
/math/sincosf_data.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Data definition for sinf, cosf and sincosf.
 3 |  *
 4 |  * Copyright (c) 2018-2019, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include <stdint.h>
 9 | #include <math.h>
10 | #include "math_config.h"
11 | #include "sincosf.h"
12 | 
13 | /* The constants and polynomials for sine and cosine.  The 2nd entry
14 |    computes -cos (x) rather than cos (x) to get negation for free.  */
15 | const sincos_t __sincosf_table[2] =
16 | {
17 |   {
18 |     { 1.0, -1.0, -1.0, 1.0 },
19 | #if TOINT_INTRINSICS
20 |     0x1.45F306DC9C883p-1,
21 | #else
22 |     0x1.45F306DC9C883p+23,
23 | #endif
24 |     0x1.921FB54442D18p0,
25 |     0x1p0,
26 |     -0x1.ffffffd0c621cp-2,
27 |     0x1.55553e1068f19p-5,
28 |     -0x1.6c087e89a359dp-10,
29 |     0x1.99343027bf8c3p-16,
30 |     -0x1.555545995a603p-3,
31 |     0x1.1107605230bc4p-7,
32 |     -0x1.994eb3774cf24p-13
33 |   },
34 |   {
35 |     { 1.0, -1.0, -1.0, 1.0 },
36 | #if TOINT_INTRINSICS
37 |     0x1.45F306DC9C883p-1,
38 | #else
39 |     0x1.45F306DC9C883p+23,
40 | #endif
41 |     0x1.921FB54442D18p0,
42 |     -0x1p0,
43 |     0x1.ffffffd0c621cp-2,
44 |     -0x1.55553e1068f19p-5,
45 |     0x1.6c087e89a359dp-10,
46 |     -0x1.99343027bf8c3p-16,
47 |     -0x1.555545995a603p-3,
48 |     0x1.1107605230bc4p-7,
49 |     -0x1.994eb3774cf24p-13
50 |   }
51 | };
52 | 
53 | /* Table with 4/PI to 192 bit precision.  To avoid unaligned accesses
54 |    only 8 new bits are added per entry, making the table 4 times larger.  */
55 | const uint32_t __inv_pio4[24] =
56 | {
57 |   0xa2,       0xa2f9,	  0xa2f983,   0xa2f9836e,
58 |   0xf9836e4e, 0x836e4e44, 0x6e4e4415, 0x4e441529,
59 |   0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
60 |   0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0,
61 |   0x34ddc0db, 0xddc0db62, 0xc0db6295, 0xdb629599,
62 |   0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041
63 | };
64 | 


--------------------------------------------------------------------------------
/math/test/rtest/random.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * random.h - header for random.c
 3 |  *
 4 |  * Copyright (c) 2009-2019, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "types.h"
 9 | 
10 | uint32 random32(void);
11 | uint32 random_upto(uint32 limit);
12 | uint32 random_upto_biased(uint32 limit, int bias);
13 | 


--------------------------------------------------------------------------------
/math/test/rtest/types.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * types.h
 3 |  *
 4 |  * Copyright (c) 2005-2019, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #ifndef mathtest_types_h
 9 | #define mathtest_types_h
10 | 
11 | #include <limits.h>
12 | 
13 | #if UINT_MAX == 4294967295
14 | typedef unsigned int uint32;
15 | typedef int int32;
16 | #define I32 ""
17 | #elif ULONG_MAX == 4294967295
18 | typedef unsigned long uint32;
19 | typedef long int32;
20 | #define I32 "l"
21 | #else
22 | #error Could not find an unsigned 32-bit integer type
23 | #endif
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/math/test/test_defs.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Helper macros for emitting various details about routines for consumption by
 3 |  * runulp.sh.
 4 |  *
 5 |  * Copyright (c) 2024, Arm Limited.
 6 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception.
 7 |  */
 8 | 
 9 | #define TEST_ULP(f, l) TEST_ULP f l
10 | #define TEST_ULP_NONNEAREST(f, l) TEST_ULP_NONNEAREST f l
11 | 
12 | /* Emit routine name if e == 0 and f is expected to correctly trigger fenv
13 |    exceptions. e allows declaration to be emitted conditionally on
14 |    WANT_SIMD_EXCEPT - defer expansion by one pass to allow those flags to be
15 |    expanded properly.  */
16 | #define TEST_DISABLE_FENV(f) TEST_DISABLE_FENV f
17 | #define TEST_DISABLE_FENV_IF_NOT(f, e) TEST_DISABLE_FENV_IF_NOT_ (f, e)
18 | #define TEST_DISABLE_FENV_IF_NOT_(f, e) TEST_DISABLE_FENV_IF_NOT_##e (f)
19 | #define TEST_DISABLE_FENV_IF_NOT_0(f) TEST_DISABLE_FENV (f)
20 | #define TEST_DISABLE_FENV_IF_NOT_1(f)
21 | 
22 | #define TEST_INTERVAL(f, lo, hi, n) TEST_INTERVAL f lo hi n
23 | #define TEST_SYM_INTERVAL(f, lo, hi, n)                                       \
24 |   TEST_INTERVAL (f, lo, hi, n)                                                \
25 |   TEST_INTERVAL (f, -lo, -hi, n)
26 | // clang-format off
27 | #define TEST_INTERVAL2(f, xlo, xhi, ylo, yhi, n)                            \
28 |   TEST_INTERVAL f xlo,ylo xhi,yhi n
29 | // clang-format on
30 | 
31 | #define TEST_CONTROL_VALUE(f, c) TEST_CONTROL_VALUE f c
32 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/acos.tst:
--------------------------------------------------------------------------------
 1 | ; acos.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=acos op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=acos op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=acos op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=acos op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=acos op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
11 | func=acos op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
12 | func=acos op1=00000000.00000000 result=3ff921fb.54442d18.469 errno=0
13 | func=acos op1=80000000.00000000 result=3ff921fb.54442d18.469 errno=0
14 | func=acos op1=3ff00000.00000000 result=00000000.00000000 errno=0
15 | func=acos op1=bff00000.00000000 result=400921fb.54442d18.469 errno=0
16 | func=acos op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
17 | func=acos op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
18 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/acosf.tst:
--------------------------------------------------------------------------------
 1 | ; acosf.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=acosf op1=7fc00001 result=7fc00001 errno=0
 7 | func=acosf op1=ffc00001 result=7fc00001 errno=0
 8 | func=acosf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=acosf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=acosf op1=7f800000 result=7fc00001 errno=EDOM status=i
11 | func=acosf op1=ff800000 result=7fc00001 errno=EDOM status=i
12 | func=acosf op1=00000000 result=3fc90fda.a22 errno=0
13 | func=acosf op1=80000000 result=3fc90fda.a22 errno=0
14 | func=acosf op1=3f800000 result=00000000 errno=0
15 | func=acosf op1=bf800000 result=40490fda.a22 errno=0
16 | func=acosf op1=3f800001 result=7fc00001 errno=EDOM status=i
17 | func=acosf op1=bf800001 result=7fc00001 errno=EDOM status=i
18 | func=acosf op1=33000000 result=3fc90fda.622 error=0
19 | func=acosf op1=30000000 result=3fc90fda.a12 error=0
20 | func=acosf op1=2d000000 result=3fc90fda.a21 error=0
21 | func=acosf op1=2a000000 result=3fc90fda.a22 error=0
22 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/acosh.tst:
--------------------------------------------------------------------------------
 1 | ; acosh.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=acosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=acosh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=acosh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=acosh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=acosh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
11 | func=acosh op1=3ff00000.00000000 result=00000000.00000000 errno=0
12 | func=acosh op1=3fefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
13 | func=acosh op1=00000000.00000000 result=7ff80000.00000001 errno=EDOM status=i
14 | func=acosh op1=80000000.00000000 result=7ff80000.00000001 errno=EDOM status=i
15 | func=acosh op1=bfefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
16 | func=acosh op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
17 | func=acosh op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
18 | func=acosh op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
19 | func=acosh op1=7fe01ac0.7f03a83e result=40862e50.541778f1.8cc error=0
20 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/acoshf.tst:
--------------------------------------------------------------------------------
 1 | ; acoshf.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=acoshf op1=7fc00001 result=7fc00001 errno=0
 7 | func=acoshf op1=ffc00001 result=7fc00001 errno=0
 8 | func=acoshf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=acoshf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=acoshf op1=7f800000 result=7f800000 errno=0
11 | func=acoshf op1=3f800000 result=00000000 errno=0
12 | func=acoshf op1=3f7fffff result=7fc00001 errno=EDOM status=i
13 | func=acoshf op1=00000000 result=7fc00001 errno=EDOM status=i
14 | func=acoshf op1=80000000 result=7fc00001 errno=EDOM status=i
15 | func=acoshf op1=bf7fffff result=7fc00001 errno=EDOM status=i
16 | func=acoshf op1=bf800000 result=7fc00001 errno=EDOM status=i
17 | func=acoshf op1=bf800001 result=7fc00001 errno=EDOM status=i
18 | func=acoshf op1=ff800000 result=7fc00001 errno=EDOM status=i
19 | func=acoshf op1=7f767efe result=42b2c19d.83e error=0
20 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/asin.tst:
--------------------------------------------------------------------------------
 1 | ; asin.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=asin op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=asin op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=asin op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=asin op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=asin op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
11 | func=asin op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
12 | func=asin op1=00000000.00000000 result=00000000.00000000 errno=0
13 | func=asin op1=80000000.00000000 result=80000000.00000000 errno=0
14 | ; Inconsistent behavior was detected for the following 2 cases.
15 | ; No exception is raised with certain versions of glibc. Functions
16 | ; approximated by x near zero may not generate/implement flops and
17 | ; thus may not raise exceptions.
18 | func=asin op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
19 | func=asin op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
20 | 
21 | func=asin op1=3ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
22 | func=asin op1=bff00000.00000000 result=bff921fb.54442d18.469 errno=0
23 | func=asin op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
24 | func=asin op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
25 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/asinf.tst:
--------------------------------------------------------------------------------
 1 | ; asinf.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=asinf op1=7fc00001 result=7fc00001 errno=0
 7 | func=asinf op1=ffc00001 result=7fc00001 errno=0
 8 | func=asinf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=asinf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=asinf op1=7f800000 result=7fc00001 errno=EDOM status=i
11 | func=asinf op1=ff800000 result=7fc00001 errno=EDOM status=i
12 | func=asinf op1=00000000 result=00000000 errno=0
13 | func=asinf op1=80000000 result=80000000 errno=0
14 | ; Inconsistent behavior was detected for the following 2 cases.
15 | ; No exception is raised with certain versions of glibc. Functions
16 | ; approximated by x near zero may not generate/implement flops and
17 | ; thus may not raise exceptions.
18 | func=asinf op1=00000001 result=00000001 errno=0 maybestatus=ux
19 | func=asinf op1=80000001 result=80000001 errno=0 maybestatus=ux
20 | 
21 | func=asinf op1=3f800000 result=3fc90fda.a22 errno=0
22 | func=asinf op1=bf800000 result=bfc90fda.a22 errno=0
23 | func=asinf op1=3f800001 result=7fc00001 errno=EDOM status=i
24 | func=asinf op1=bf800001 result=7fc00001 errno=EDOM status=i
25 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/asinh.tst:
--------------------------------------------------------------------------------
 1 | ; asinh.tst
 2 | ;
 3 | ; Copyright (c) 2022-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=asinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=asinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=asinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=asinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=asinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
11 | func=asinh op1=fff00000.00000000 result=fff00000.00000000 errno=0
12 | func=asinh op1=00000000.00000000 result=00000000.00000000 errno=0
13 | func=asinh op1=80000000.00000000 result=80000000.00000000 errno=0
14 | ; No exception is raised with certain versions of glibc. Functions
15 | ; approximated by x near zero may not generate/implement flops and
16 | ; thus may not raise exceptions.
17 | func=asinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
18 | func=asinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
19 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/asinhf.tst:
--------------------------------------------------------------------------------
 1 | ; asinhf.tst
 2 | ;
 3 | ; Copyright (c) 2007-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=asinhf op1=7fc00001 result=7fc00001 errno=0
 7 | func=asinhf op1=ffc00001 result=7fc00001 errno=0
 8 | func=asinhf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=asinhf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=asinhf op1=7f800000 result=7f800000 errno=0
11 | func=asinhf op1=ff800000 result=ff800000 errno=0
12 | func=asinhf op1=00000000 result=00000000 errno=0
13 | func=asinhf op1=80000000 result=80000000 errno=0
14 | ; No exception is raised on certain machines (different version of glibc)
15 | ; Same issue encountered with other function similar to x close to 0
16 | ; Could be due to function so boring no flop is involved in some implementations
17 | func=asinhf op1=00000001 result=00000001 errno=0 maybestatus=ux
18 | func=asinhf op1=80000001 result=80000001 errno=0 maybestatus=ux
19 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/atan.tst:
--------------------------------------------------------------------------------
 1 | ; atan.tst
 2 | ;
 3 | ; Copyright (c) 1999-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=atan op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=atan op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=atan op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=atan op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=atan op1=7ff00000.00000000 result=3ff921fb.54442d18.469 errno=0
11 | func=atan op1=fff00000.00000000 result=bff921fb.54442d18.469 errno=0
12 | func=atan op1=00000000.00000000 result=00000000.00000000 errno=0
13 | func=atan op1=80000000.00000000 result=80000000.00000000 errno=0
14 | ; Inconsistent behavior was detected for the following 2 cases.
15 | ; No exception is raised with certain versions of glibc. Functions
16 | ; approximated by x near zero may not generate/implement flops and
17 | ; thus may not raise exceptions.
18 | func=atan op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
19 | func=atan op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
20 | 
21 | func=atan op1=3ff00000.00000000 result=3fe921fb.54442d18.469 errno=0
22 | func=atan op1=bff00000.00000000 result=bfe921fb.54442d18.469 errno=0
23 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/atanf.tst:
--------------------------------------------------------------------------------
 1 | ; atanf.tst
 2 | ;
 3 | ; Copyright (c) 2007-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=atanf op1=7fc00001 result=7fc00001 errno=0
 7 | func=atanf op1=ffc00001 result=7fc00001 errno=0
 8 | func=atanf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=atanf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=atanf op1=7f800000 result=3fc90fda.a22 errno=0
11 | func=atanf op1=ff800000 result=bfc90fda.a22 errno=0
12 | func=atanf op1=00000000 result=00000000 errno=0
13 | func=atanf op1=80000000 result=80000000 errno=0
14 | ; Inconsistent behavior was detected for the following 2 cases.
15 | ; No exception is raised with certain versions of glibc. Functions
16 | ; approximated by x near zero may not generate/implement flops and
17 | ; thus may not raise exceptions.
18 | func=atanf op1=00000001 result=00000001 errno=0 maybestatus=ux
19 | func=atanf op1=80000001 result=80000001 errno=0 maybestatus=ux
20 | 
21 | func=atanf op1=3f800000 result=3f490fda.a22 errno=0
22 | func=atanf op1=bf800000 result=bf490fda.a22 errno=0
23 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/atanh.tst:
--------------------------------------------------------------------------------
 1 | ; atanh.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=atanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=atanh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=atanh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=atanh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=atanh op1=7ff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
11 | func=atanh op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
12 | func=atanh op1=3ff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
13 | func=atanh op1=bff00000.00000001 result=7ff80000.00000001 errno=EDOM status=i
14 | func=atanh op1=3ff00000.00000000 result=7ff00000.00000000 errno=ERANGE status=z
15 | func=atanh op1=bff00000.00000000 result=fff00000.00000000 errno=ERANGE status=z
16 | func=atanh op1=00000000.00000000 result=00000000.00000000 errno=0
17 | func=atanh op1=80000000.00000000 result=80000000.00000000 errno=0
18 | ; No exception is raised with certain versions of glibc. Functions
19 | ; approximated by x near zero may not generate/implement flops and
20 | ; thus may not raise exceptions.
21 | func=atanh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
22 | func=atanh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
23 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/atanhf.tst:
--------------------------------------------------------------------------------
 1 | ; atanhf.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=atanhf op1=7fc00001 result=7fc00001 errno=0
 7 | func=atanhf op1=ffc00001 result=7fc00001 errno=0
 8 | func=atanhf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=atanhf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=atanhf op1=7f800000 result=7fc00001 errno=EDOM status=i
11 | func=atanhf op1=ff800000 result=7fc00001 errno=EDOM status=i
12 | func=atanhf op1=3f800001 result=7fc00001 errno=EDOM status=i
13 | func=atanhf op1=bf800001 result=7fc00001 errno=EDOM status=i
14 | func=atanhf op1=3f800000 result=7f800000 errno=ERANGE status=z
15 | func=atanhf op1=bf800000 result=ff800000 errno=ERANGE status=z
16 | func=atanhf op1=00000000 result=00000000 errno=0
17 | func=atanhf op1=80000000 result=80000000 errno=0
18 | 
19 | ; No exception is raised with certain versions of glibc. Functions
20 | ; approximated by x near zero may not generate/implement flops and
21 | ; thus may not raise exceptions.
22 | func=atanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
23 | func=atanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
24 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/cbrtf.tst:
--------------------------------------------------------------------------------
 1 | ; cbrtf.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=cbrtf op1=7f800000 result=7f800000 errno=0
 7 | func=cbrtf op1=ff800000 result=ff800000 errno=0
 8 | func=cbrtf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=cbrtf op1=7fc00001 result=7fc00001 errno=0
10 | func=cbrtf op1=00000000 result=00000000 errno=0
11 | func=cbrtf op1=00000001 result=26a14517.cc7 errno=0
12 | func=cbrtf op1=00000002 result=26cb2ff5.29f errno=0
13 | func=cbrtf op1=00000003 result=26e89768.579 errno=0
14 | func=cbrtf op1=00000004 result=27000000.000 errno=0
15 | func=cbrtf op1=00400000 result=2a4b2ff5.29f errno=0
16 | func=cbrtf op1=00800000 result=2a800000.000 errno=0
17 | func=cbrtf op1=3f800000 result=3f800000.000 errno=0
18 | func=cbrtf op1=40000000 result=3fa14517.cc7 errno=0
19 | func=cbrtf op1=7f7fffff result=54cb2ff4.e63 errno=0
20 | func=cbrtf op1=80000000 result=80000000 errno=0
21 | func=cbrtf op1=80000001 result=a6a14517.cc7 errno=0
22 | func=cbrtf op1=80000002 result=a6cb2ff5.29f errno=0
23 | func=cbrtf op1=80000003 result=a6e89768.579 errno=0
24 | func=cbrtf op1=80000004 result=a7000000.000 errno=0
25 | func=cbrtf op1=80400000 result=aa4b2ff5.29f errno=0
26 | func=cbrtf op1=80800000 result=aa800000.000 errno=0
27 | func=cbrtf op1=bf800000 result=bf800000.000 errno=0
28 | func=cbrtf op1=c0000000 result=bfa14517.cc7 errno=0
29 | func=cbrtf op1=ff7fffff result=d4cb2ff4.e63 errno=0
30 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/cosf.tst:
--------------------------------------------------------------------------------
 1 | ; cosf.tst - Directed test cases for SP cosine
 2 | ;
 3 | ; Copyright (c) 2007-2019, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=cosf op1=7fc00001 result=7fc00001 errno=0
 7 | func=cosf op1=ffc00001 result=7fc00001 errno=0
 8 | func=cosf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=cosf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=cosf op1=7f800000 result=7fc00001 errno=EDOM status=i
11 | func=cosf op1=ff800000 result=7fc00001 errno=EDOM status=i
12 | func=cosf op1=00000000 result=3f800000 errno=0
13 | func=cosf op1=80000000 result=3f800000 errno=0
14 | ; SDCOMP-26094: check cosf in the cases for which the range reducer
15 | ; returns values furthest beyond its nominal upper bound of pi/4.
16 | func=cosf op1=46427f1b result=3f34dc5c.565 error=0
17 | func=cosf op1=4647e568 result=3f34dc33.c1f error=0
18 | func=cosf op1=46428bac result=bf34dbf2.8e3 error=0
19 | func=cosf op1=4647f1f9 result=bf34dbc9.f9b error=0
20 | func=cosf op1=4647fe8a result=3f34db60.313 error=0
21 | func=cosf op1=45d8d7f1 result=bf35006a.7fd error=0
22 | func=cosf op1=45d371a4 result=3f350056.39b error=0
23 | func=cosf op1=45ce0b57 result=bf350041.f38 error=0
24 | func=cosf op1=45d35882 result=bf34ffec.868 error=0
25 | func=cosf op1=45cdf235 result=3f34ffd8.404 error=0
26 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/cosh.tst:
--------------------------------------------------------------------------------
 1 | ; cosh.tst
 2 | ;
 3 | ; Copyright (c) 1999-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=cosh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=cosh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=cosh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=cosh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=cosh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
11 | func=cosh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
12 | func=cosh op1=fff00000.00000000 result=7ff00000.00000000 errno=0
13 | func=cosh op1=ffefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
14 | func=cosh op1=00000000.00000000 result=3ff00000.00000000 errno=0
15 | func=cosh op1=80000000.00000000 result=3ff00000.00000000 errno=0
16 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/coshf.tst:
--------------------------------------------------------------------------------
 1 | ; coshf.tst
 2 | ;
 3 | ; Copyright (c) 2007-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=coshf op1=7fc00001 result=7fc00001 errno=0
 7 | func=coshf op1=ffc00001 result=7fc00001 errno=0
 8 | func=coshf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=coshf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=coshf op1=7f800000 result=7f800000 errno=0
11 | func=coshf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
12 | func=coshf op1=ff800000 result=7f800000 errno=0
13 | func=coshf op1=ff7fffff result=7f800000 errno=ERANGE status=ox
14 | func=coshf op1=00000000 result=3f800000 errno=0
15 | func=coshf op1=80000000 result=3f800000 errno=0
16 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/erf.tst:
--------------------------------------------------------------------------------
 1 | ; erf.tst - Directed test cases for erf
 2 | ;
 3 | ; Copyright (c) 2007-2020, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=erf op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=erf op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=erf op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=erf op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=erf op1=7ff00000.00000000 result=3ff00000.00000000 errno=0
11 | func=erf op1=fff00000.00000000 result=bff00000.00000000 errno=0
12 | func=erf op1=00000000.00000000 result=00000000.00000000 errno=ERANGE
13 | func=erf op1=80000000.00000000 result=80000000.00000000 errno=ERANGE
14 | func=erf op1=00000000.00000001 result=00000000.00000001 errno=0 status=ux
15 | func=erf op1=80000000.00000001 result=80000000.00000001 errno=0 status=ux
16 | func=erf op1=3ff00000.00000000 result=3feaf767.a741088a.c6d errno=0
17 | func=erf op1=bff00000.00000000 result=bfeaf767.a741088a.c6d errno=0
18 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/erfc.tst:
--------------------------------------------------------------------------------
 1 | ; erfc.tst - Directed test cases for erfc
 2 | ;
 3 | ; Copyright (c) 2022-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=erfc op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=erfc op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=erfc op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=erfc op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=erfc op1=7ff00000.00000000 result=00000000.00000000 errno=0
11 | func=erfc op1=7fefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
12 | ; We deliberately turned off errno setting in erf, as standard simply
13 | ; state that errno `may` be set to ERANGE in case of underflow.
14 | ; As a result the following condition on errno cannot be satisfied.
15 | ;
16 | ; func=erfc op1=403b44af.48b01531 result=00000000.00000000 errno=ERANGE status=ux
17 | ;
18 | func=erfc op1=c03b44af.48b01531 result=40000000.00000000 errno=0
19 | func=erfc op1=403bffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
20 | func=erfc op1=c03bffff.ffffffff result=40000000.00000000 errno=0
21 | func=erfc op1=fff00000.00000000 result=40000000.00000000 errno=0
22 | func=erfc op1=00000000.00000000 result=3ff00000.00000000 errno=0
23 | func=erfc op1=80000000.00000000 result=3ff00000.00000000 errno=0
24 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/erfcf.tst:
--------------------------------------------------------------------------------
 1 | ; erfcf.tst - Directed test cases for erfcf
 2 | ;
 3 | ; Copyright (c) 2007-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=erfcf op1=7fc00001 result=7fc00001 errno=0
 7 | func=erfcf op1=ffc00001 result=7fc00001 errno=0
 8 | func=erfcf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=erfcf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=erfcf op1=7f800000 result=00000000 errno=0
11 | func=erfcf op1=7f7fffff result=00000000 errno=ERANGE status=ux
12 | func=erfcf op1=ff800000 result=40000000 errno=0
13 | func=erfcf op1=00000000 result=3f800000 errno=0
14 | func=erfcf op1=80000000 result=3f800000 errno=0
15 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/erff.tst:
--------------------------------------------------------------------------------
 1 | ; erff.tst
 2 | ;
 3 | ; Copyright (c) 2007-2020, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=erff op1=7fc00001 result=7fc00001 errno=0
 7 | func=erff op1=ffc00001 result=7fc00001 errno=0
 8 | func=erff op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=erff op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=erff op1=7f800000 result=3f800000 errno=0
11 | func=erff op1=ff800000 result=bf800000 errno=0
12 | func=erff op1=00000000 result=00000000 errno=ERANGE
13 | func=erff op1=80000000 result=80000000 errno=ERANGE
14 | func=erff op1=00000001 result=00000001 errno=0 status=ux
15 | func=erff op1=80000001 result=80000001 errno=0 status=ux
16 | func=erff op1=3f800000 result=3f57bb3d.3a0 errno=0
17 | func=erff op1=bf800000 result=bf57bb3d.3a0 errno=0
18 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/exp10.tst:
--------------------------------------------------------------------------------
 1 | ; Directed test cases for exp10
 2 | ;
 3 | ; Copyright (c) 2023, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=exp10 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=exp10 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=exp10 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=exp10 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=exp10 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
11 | func=exp10 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
12 | func=exp10 op1=fff00000.00000000 result=00000000.00000000 errno=0
13 | func=exp10 op1=ffefffff.ffffffff result=00000000.00000000 errno=ERANGE status=ux
14 | func=exp10 op1=00000000.00000000 result=3ff00000.00000000 errno=0
15 | func=exp10 op1=80000000.00000000 result=3ff00000.00000000 errno=0
16 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/exp2f.tst:
--------------------------------------------------------------------------------
 1 | ; exp2f.tst - Directed test cases for exp2f
 2 | ;
 3 | ; Copyright (c) 2017-2019, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=exp2f op1=7fc00001 result=7fc00001 errno=0
 7 | func=exp2f op1=ffc00001 result=7fc00001 errno=0
 8 | func=exp2f op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=exp2f op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=exp2f op1=7f800000 result=7f800000 errno=0
11 | func=exp2f op1=7f7fffff result=7f800000 errno=ERANGE status=ox
12 | func=exp2f op1=ff800000 result=00000000 errno=0
13 | func=exp2f op1=ff7fffff result=00000000 errno=ERANGE status=ux
14 | func=exp2f op1=00000000 result=3f800000 errno=0
15 | func=exp2f op1=80000000 result=3f800000 errno=0
16 | func=exp2f op1=42fa0001 result=7e00002c.5c8 errno=0
17 | func=exp2f op1=42ffffff result=7f7fffa7.470 errno=0
18 | func=exp2f op1=43000000 result=7f800000 errno=ERANGE status=ox
19 | func=exp2f op1=43000001 result=7f800000 errno=ERANGE status=ox
20 | func=exp2f op1=c2fa0001 result=00ffffa7.470 errno=0
21 | func=exp2f op1=c2fc0000 result=00800000 errno=0
22 | func=exp2f op1=c2fc0001 result=007fffd3.a38 errno=0 status=ux
23 | func=exp2f op1=c3150000 result=00000001 errno=0
24 | func=exp2f op1=c3158000 result=00000000.800 errno=ERANGE status=ux
25 | func=exp2f op1=c3165432 result=00000000.4bd errno=ERANGE status=ux
26 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/expf.tst:
--------------------------------------------------------------------------------
 1 | ; expf.tst - Directed test cases for expf
 2 | ;
 3 | ; Copyright (c) 2007-2019, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=expf op1=7fc00001 result=7fc00001 errno=0
 7 | func=expf op1=ffc00001 result=7fc00001 errno=0
 8 | func=expf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=expf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=expf op1=7f800000 result=7f800000 errno=0
11 | func=expf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
12 | func=expf op1=ff800000 result=00000000 errno=0
13 | func=expf op1=ff7fffff result=00000000 errno=ERANGE status=ux
14 | func=expf op1=00000000 result=3f800000 errno=0
15 | func=expf op1=80000000 result=3f800000 errno=0
16 | func=expf op1=42affff8 result=7ef87ed4.e0c errno=0
17 | func=expf op1=42b00008 result=7ef88698.f67 errno=0
18 | func=expf op1=42cffff8 result=7f800000 errno=ERANGE status=ox
19 | func=expf op1=42d00008 result=7f800000 errno=ERANGE status=ox
20 | func=expf op1=c2affff8 result=0041eecc.041 errno=0 status=ux
21 | func=expf op1=c2b00008 result=0041ecbc.95e errno=0 status=ux
22 | func=expf op1=c2cffff8 result=00000000 errno=ERANGE status=ux
23 | func=expf op1=c2d00008 result=00000000 errno=ERANGE status=ux
24 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/expm1.tst:
--------------------------------------------------------------------------------
 1 | ; expm1.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=expm1 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=expm1 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=expm1 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=expm1 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=expm1 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
11 | func=expm1 op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
12 | func=expm1 op1=fff00000.00000000 result=bff00000.00000000 errno=0
13 | func=expm1 op1=ffefffff.ffffffff result=bff00000.00000000 errno=0
14 | func=expm1 op1=00000000.00000000 result=00000000.00000000 errno=0
15 | func=expm1 op1=80000000.00000000 result=80000000.00000000 errno=0
16 | ; Inconsistent behavior was detected for the following 2 cases.
17 | ; No exception is raised with certain versions of glibc. Functions
18 | ; approximated by x near zero may not generate/implement flops and
19 | ; thus may not raise exceptions.
20 | func=expm1 op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
21 | func=expm1 op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
22 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/log.tst:
--------------------------------------------------------------------------------
 1 | ; Directed test cases for log
 2 | ;
 3 | ; Copyright (c) 2018-2019, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=log op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=log op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=log op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=log op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=log op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
11 | func=log op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
12 | func=log op1=7fefffff.ffffffff result=40862e42.fefa39ef.354 errno=0
13 | func=log op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
14 | func=log op1=3ff00000.00000000 result=00000000.00000000 errno=0
15 | func=log op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
16 | func=log op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
17 | func=log op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
18 | func=log op1=00000000.00000001 result=c0874385.446d71c3.639 errno=0
19 | func=log op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
20 | func=log op1=40000000.00000000 result=3fe62e42.fefa39ef.358 errno=0
21 | func=log op1=3fe00000.00000000 result=bfe62e42.fefa39ef.358 errno=0
22 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/log10.tst:
--------------------------------------------------------------------------------
 1 | ; log10.tst
 2 | ;
 3 | ; Copyright (c) 2007-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=log10 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=log10 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=log10 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=log10 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=log10 op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i
11 | func=log10 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
12 | func=log10 op1=3ff00000.00000000 result=00000000.00000000 errno=0
13 | func=log10 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
14 | func=log10 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
15 | func=log10 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
16 | func=log10 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
17 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/log1p.tst:
--------------------------------------------------------------------------------
 1 | ; log1p.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=log1p op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=log1p op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=log1p op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=log1p op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=log1p op1=fff02000.00000000 result=7ff80000.00000001 errno=0 status=i
11 | func=log1p op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
12 | ; Cases 6, 9 , 10, 11, 12 fail with certain versions of GLIBC and not others.
13 | ; The main reason seems to be the handling of errno and exceptions.
14 | 
15 | func=log1p op1=00000000.00000000 result=00000000.00000000 errno=0
16 | func=log1p op1=80000000.00000000 result=80000000.00000000 errno=0
17 | 
18 | ; No exception is raised with certain versions of glibc. Functions
19 | ; approximated by x near zero may not generate/implement flops and
20 | ; thus may not raise exceptions.
21 | func=log1p op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
22 | func=log1p op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
23 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/log2.tst:
--------------------------------------------------------------------------------
 1 | ; Directed test cases for log2
 2 | ;
 3 | ; Copyright (c) 2018-2019, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=log2 op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=log2 op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=log2 op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=log2 op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=log2 op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
11 | func=log2 op1=fff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
12 | func=log2 op1=7fefffff.ffffffff result=408fffff.ffffffff.ffa errno=0
13 | func=log2 op1=ffefffff.ffffffff result=7ff80000.00000001 errno=EDOM status=i
14 | func=log2 op1=3ff00000.00000000 result=00000000.00000000 errno=0
15 | func=log2 op1=bff00000.00000000 result=7ff80000.00000001 errno=EDOM status=i
16 | func=log2 op1=00000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
17 | func=log2 op1=80000000.00000000 result=fff00000.00000000 errno=ERANGE status=z
18 | func=log2 op1=00000000.00000001 result=c090c800.00000000 errno=0
19 | func=log2 op1=80000000.00000001 result=7ff80000.00000001 errno=EDOM status=i
20 | func=log2 op1=40000000.00000000 result=3ff00000.00000000 errno=0
21 | func=log2 op1=3fe00000.00000000 result=bff00000.00000000 errno=0
22 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/log2f.tst:
--------------------------------------------------------------------------------
 1 | ; log2f.tst - Directed test cases for log2f
 2 | ;
 3 | ; Copyright (c) 2017-2019, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=log2f op1=7fc00001 result=7fc00001 errno=0
 7 | func=log2f op1=ffc00001 result=7fc00001 errno=0
 8 | func=log2f op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=log2f op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=log2f op1=ff810000 result=7fc00001 errno=0 status=i
11 | func=log2f op1=7f800000 result=7f800000 errno=0
12 | func=log2f op1=ff800000 result=7fc00001 errno=EDOM status=i
13 | func=log2f op1=3f800000 result=00000000 errno=0
14 | func=log2f op1=00000000 result=ff800000 errno=ERANGE status=z
15 | func=log2f op1=80000000 result=ff800000 errno=ERANGE status=z
16 | func=log2f op1=80000001 result=7fc00001 errno=EDOM status=i
17 | 
18 | func=log2f op1=3f7d70a4 result=bc6d8f8b.7d4 error=0
19 | func=log2f op1=3f604189 result=be4394c8.395 error=0
20 | func=log2f op1=3f278034 result=bf1caa73.88e error=0
21 | func=log2f op1=3edd3c36 result=bf9af3b9.619 error=0
22 | func=log2f op1=3e61259a result=c00bdb95.650 error=0
23 | func=log2f op1=3f8147ae result=3c6b3267.d6a error=0
24 | func=log2f op1=3f8fbe77 result=3e2b5fe2.a1c error=0
25 | func=log2f op1=3fac3eea result=3edb4d5e.1fc error=0
26 | func=log2f op1=3fd6e632 result=3f3f5d3a.827 error=0
27 | func=log2f op1=40070838 result=3f89e055.a0a error=0
28 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/sinf.tst:
--------------------------------------------------------------------------------
 1 | ; sinf.tst - Directed test cases for SP sine
 2 | ;
 3 | ; Copyright (c) 2007-2019, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | 
 7 | func=sinf op1=7fc00001 result=7fc00001 errno=0
 8 | func=sinf op1=ffc00001 result=7fc00001 errno=0
 9 | func=sinf op1=7f800001 result=7fc00001 errno=0 status=i
10 | func=sinf op1=ff800001 result=7fc00001 errno=0 status=i
11 | func=sinf op1=7f800000 result=7fc00001 errno=EDOM status=i
12 | func=sinf op1=ff800000 result=7fc00001 errno=EDOM status=i
13 | func=sinf op1=00000000 result=00000000 errno=0
14 | func=sinf op1=80000000 result=80000000 errno=0
15 | ; Directed test for a failure I found while developing mathbench
16 | func=sinf op1=c70d39a1 result=be37fad5.7ed errno=0
17 | ; SDCOMP-26094: check sinf in the cases for which the range reducer
18 | ; returns values furthest beyond its nominal upper bound of pi/4.
19 | func=sinf op1=46427f1b result=3f352d80.f9b error=0
20 | func=sinf op1=4647e568 result=3f352da9.7be error=0
21 | func=sinf op1=46428bac result=bf352dea.924 error=0
22 | func=sinf op1=4647f1f9 result=bf352e13.146 error=0
23 | func=sinf op1=4647fe8a result=3f352e7c.ac9 error=0
24 | func=sinf op1=45d8d7f1 result=3f35097b.cb0 error=0
25 | func=sinf op1=45d371a4 result=bf350990.102 error=0
26 | func=sinf op1=45ce0b57 result=3f3509a4.554 error=0
27 | func=sinf op1=45d35882 result=3f3509f9.bdb error=0
28 | func=sinf op1=45cdf235 result=bf350a0e.02c error=0
29 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/sinh.tst:
--------------------------------------------------------------------------------
 1 | ; sinh.tst
 2 | ;
 3 | ; Copyright (c) 1999-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=sinh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=sinh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=sinh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=sinh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=sinh op1=7ff00000.00000000 result=7ff00000.00000000 errno=0
11 | func=sinh op1=7fefffff.ffffffff result=7ff00000.00000000 errno=ERANGE status=ox
12 | func=sinh op1=fff00000.00000000 result=fff00000.00000000 errno=0
13 | func=sinh op1=ffefffff.ffffffff result=fff00000.00000000 errno=ERANGE status=ox
14 | func=sinh op1=00000000.00000000 result=00000000.00000000 errno=0
15 | func=sinh op1=80000000.00000000 result=80000000.00000000 errno=0
16 | 
17 | ; No exception is raised with certain versions of glibc. Functions
18 | ; approximated by x near zero may not generate/implement flops and
19 | ; thus may not raise exceptions.
20 | func=sinh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
21 | func=sinh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
22 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/sinhf.tst:
--------------------------------------------------------------------------------
 1 | ; sinhf.tst
 2 | ;
 3 | ; Copyright (c) 2009-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=sinhf op1=7fc00001 result=7fc00001 errno=0
 7 | func=sinhf op1=ffc00001 result=7fc00001 errno=0
 8 | func=sinhf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=sinhf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=sinhf op1=7f800000 result=7f800000 errno=0
11 | func=sinhf op1=7f7fffff result=7f800000 errno=ERANGE status=ox
12 | func=sinhf op1=ff800000 result=ff800000 errno=0
13 | func=sinhf op1=ff7fffff result=ff800000 errno=ERANGE status=ox
14 | func=sinhf op1=00000000 result=00000000 errno=0
15 | func=sinhf op1=80000000 result=80000000 errno=0
16 | 
17 | ; No exception is raised with certain versions of glibc. Functions
18 | ; approximated by x near zero may not generate/implement flops and
19 | ; thus may not raise exceptions.
20 | func=sinhf op1=00000001 result=00000001 errno=0 maybestatus=ux
21 | func=sinhf op1=80000001 result=80000001 errno=0 maybestatus=ux
22 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/tanf.tst:
--------------------------------------------------------------------------------
 1 | ; tanf.tst
 2 | ;
 3 | ; Copyright (c) 2022-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=tanf op1=7fc00001 result=7fc00001 errno=0
 7 | func=tanf op1=ffc00001 result=7fc00001 errno=0
 8 | func=tanf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=tanf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=tanf op1=7f800000 result=7fc00001 errno=EDOM status=i
11 | func=tanf op1=ff800000 result=7fc00001 errno=EDOM status=i
12 | func=tanf op1=00000000 result=00000000 errno=0
13 | func=tanf op1=80000000 result=80000000 errno=0
14 | ; SDCOMP-26094: check tanf in the cases for which the range reducer
15 | ; returns values furthest beyond its nominal upper bound of pi/4.
16 | func=tanf op1=46427f1b result=3f80396d.599 error=0
17 | func=tanf op1=4647e568 result=3f8039a6.c9f error=0
18 | func=tanf op1=46428bac result=3f803a03.148 error=0
19 | func=tanf op1=4647f1f9 result=3f803a3c.852 error=0
20 | func=tanf op1=4647fe8a result=3f803ad2.410 error=0
21 | func=tanf op1=45d8d7f1 result=bf800669.901 error=0
22 | func=tanf op1=45d371a4 result=bf800686.3cd error=0
23 | func=tanf op1=45ce0b57 result=bf8006a2.e9a error=0
24 | func=tanf op1=45d35882 result=bf80071b.bc4 error=0
25 | func=tanf op1=45cdf235 result=bf800738.693 error=0
26 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/tanh.tst:
--------------------------------------------------------------------------------
 1 | ; tanh.tst
 2 | ;
 3 | ; Copyright (c) 1999-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=tanh op1=7ff80000.00000001 result=7ff80000.00000001 errno=0
 7 | func=tanh op1=fff80000.00000001 result=7ff80000.00000001 errno=0
 8 | func=tanh op1=7ff00000.00000001 result=7ff80000.00000001 errno=0 status=i
 9 | func=tanh op1=fff00000.00000001 result=7ff80000.00000001 errno=0 status=i
10 | func=tanh op1=7ff00000.00000000 result=3ff00000.00000000 errno=0
11 | func=tanh op1=fff00000.00000000 result=bff00000.00000000 errno=0
12 | func=tanh op1=00000000.00000000 result=00000000.00000000 errno=0
13 | func=tanh op1=80000000.00000000 result=80000000.00000000 errno=0
14 | ; No exception is raised with certain versions of glibc. Functions
15 | ; approximated by x near zero may not generate/implement flops and
16 | ; thus may not raise exceptions.
17 | func=tanh op1=00000000.00000001 result=00000000.00000001 errno=0 maybestatus=ux
18 | func=tanh op1=80000000.00000001 result=80000000.00000001 errno=0 maybestatus=ux
19 | 


--------------------------------------------------------------------------------
/math/test/testcases/directed/tanhf.tst:
--------------------------------------------------------------------------------
 1 | ; tanhf.tst
 2 | ;
 3 | ; Copyright (c) 2007-2024, Arm Limited.
 4 | ; SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | func=tanhf op1=7fc00001 result=7fc00001 errno=0
 7 | func=tanhf op1=ffc00001 result=7fc00001 errno=0
 8 | func=tanhf op1=7f800001 result=7fc00001 errno=0 status=i
 9 | func=tanhf op1=ff800001 result=7fc00001 errno=0 status=i
10 | func=tanhf op1=7f800000 result=3f800000 errno=0
11 | func=tanhf op1=ff800000 result=bf800000 errno=0
12 | func=tanhf op1=00000000 result=00000000 errno=0
13 | func=tanhf op1=80000000 result=80000000 errno=0
14 | ; No exception is raised with certain versions of glibc. Functions
15 | ; approximated by x near zero may not generate/implement flops and
16 | ; thus may not raise exceptions.
17 | ; func=tanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
18 | ; func=tanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
19 | func=tanhf op1=00000001 result=00000001 errno=0 maybestatus=ux
20 | func=tanhf op1=80000001 result=80000001 errno=0 maybestatus=ux
21 | 


--------------------------------------------------------------------------------
/math/test/testcases/random/double.tst:
--------------------------------------------------------------------------------
 1 | !! double.tst - Random test case specification for DP functions
 2 | !!
 3 | !! Copyright (c) 1999-2019, Arm Limited.
 4 | !! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | test exp 10000
 7 | test exp2 10000
 8 | test log 10000
 9 | test log2 10000
10 | test pow 40000
11 | 


--------------------------------------------------------------------------------
/math/test/testcases/random/float.tst:
--------------------------------------------------------------------------------
 1 | !! single.tst - Random test case specification for SP functions
 2 | !!
 3 | !! Copyright (c) 1999-2019, Arm Limited.
 4 | !! SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | test sinf 10000
 7 | test cosf 10000
 8 | test sincosf_sinf 5000
 9 | test sincosf_cosf 5000
10 | test tanf 10000
11 | test expf 10000
12 | test exp2f 10000
13 | test logf 10000
14 | test log2f 10000
15 | test powf 10000
16 | 


--------------------------------------------------------------------------------
/math/tools/asin.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating asin(x)
 2 | //
 3 | // Copyright (c) 2023-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | f = asin(x);
 7 | dtype = double;
 8 | 
 9 | prec=256;
10 | 
11 | a = 0x1p-106;
12 | b = 0.25;
13 | 
14 | deg = 11;
15 | 
16 | backward = proc(poly, d) {
17 |   return d + d ^ 3 * poly(d * d);
18 | };
19 | 
20 | forward = proc(f, d) {
21 |   return (f(sqrt(d))-sqrt(d))/(d*sqrt(d));
22 | };
23 | 
24 | poly = fpminimax(forward(f, x), [|0,...,deg|], [|dtype ...|], [a;b], relative, floating);
25 | 
26 | display = hexadecimal!;
27 | print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b]));
28 | print("in [", a, b, "]");
29 | for i from 0 to deg do print(coeff(poly, i));
30 | 


--------------------------------------------------------------------------------
/math/tools/asinf.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating asinf(x)
 2 | //
 3 | // Copyright (c) 2023-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | f = asin(x);
 7 | dtype = single;
 8 | 
 9 | a = 0x1p-24;
10 | b = 0.25;
11 | 
12 | deg = 4;
13 | 
14 | backward = proc(poly, d) {
15 |   return d + d ^ 3 * poly(d * d);
16 | };
17 | 
18 | forward = proc(f, d) {
19 |   return (f(sqrt(d))-sqrt(d))/(d*sqrt(d));
20 | };
21 | 
22 | approx = proc(poly, d) {
23 |   return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16);
24 | };
25 | 
26 | poly = 0;
27 | for i from 0 to deg do {
28 |   i;
29 |   p = roundcoefficients(approx(poly,i), [|dtype ...|]);
30 |   poly = poly + x^i*coeff(p,0);
31 | };
32 | 
33 | display = hexadecimal!;
34 | print("rel error:", accurateinfnorm(1-backward(poly, x)/f(x), [a;b], 30));
35 | print("in [", a, b, "]");
36 | for i from 0 to deg do print(coeff(poly, i));
37 | 


--------------------------------------------------------------------------------
/math/tools/asinh.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating asinh(x)
 2 | //
 3 | // Copyright (c) 2022-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // Polynomial is used in [2^-26, 1]. However it is least accurate close to 1, so
 7 | // we use 2^-6 as the lower bound for coeff generation, which yields sufficiently
 8 | // accurate results in [2^-26, 2^-6].
 9 | a = 0x1p-6;
10 | b = 1.0;
11 | 
12 | f = (asinh(sqrt(x)) - sqrt(x))/x^(3/2);
13 | 
14 | approx = proc(poly, d) {
15 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
16 | };
17 | 
18 | poly = 0;
19 | for i from 0 to deg do {
20 |   i;
21 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
22 |   poly = poly + x^i*coeff(p,0);
23 | };
24 | 
25 | 
26 | display = hexadecimal;
27 | print("coeffs:");
28 | for i from 0 to deg do coeff(poly,i);
29 | 


--------------------------------------------------------------------------------
/math/tools/asinhf.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating asinh(x)
 2 | //
 3 | // Copyright (c) 2022-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 9;
 7 | 
 8 | a = 0x1.0p-12;
 9 | b = 1.0;
10 | 
11 | f = proc(y) {
12 |   return asinh(x);
13 | };
14 | 
15 | approx = proc(poly, d) {
16 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
17 | };
18 | 
19 | poly = x;
20 | for i from 2 to deg do {
21 |   p = roundcoefficients(approx(poly,i), [|SG ...|]);
22 |   poly = poly + x^i*coeff(p,0);
23 | };
24 | 
25 | display = hexadecimal;
26 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
27 | print("in [",a,b,"]");
28 | print("coeffs:");
29 | for i from 2 to deg do coeff(poly,i);
30 | 


--------------------------------------------------------------------------------
/math/tools/asinpif.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating asinpif(x)
 2 | //
 3 | // Copyright (c) 2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | f = asin(x)/pi;
 7 | dtype = single;
 8 | 
 9 | // x^2 lives in [0, 0.25] if x lives in [0, 0.5]
10 | a = 0x1p-126; // a!=0 facilitates convergence of algorithm
11 | b = 0.25;
12 | 
13 | deg = 5;
14 | 
15 | // We look for a polynomial in x^2 such that f(x) ~ x/pi + x^3 * poly(x^2)
16 | // basically asin(x)/pi ~ 1/pi * x + c0 * x^3 + c1 * x^5 + ...
17 | backward = proc(poly, d) {
18 |   return d/pi + d ^ 3 * poly(d * d);
19 | };
20 | 
21 | 
22 | forward = proc(f, d) {
23 |   return (f(sqrt(d))-sqrt(d)/pi)/(d*sqrt(d));
24 | };
25 | 
26 | 
27 | // optimise polynomial iteratively, while minimising relative norm (f(x) - poly(x))/f(x)
28 | approx = proc(poly, d) {
29 |   return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16);
30 | };
31 | 
32 | poly = 0;
33 | for i from 0 to deg do {
34 |   i;
35 |   p = roundcoefficients(approx(poly,i), [|dtype ...|]);
36 |   poly = poly + x^i*coeff(p,0);
37 | };
38 | 
39 | // Display relative norm (should be under 2^-23) and coefficients
40 | display = hexadecimal!;
41 | print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b])); // fast but approximate
42 | print("in [", a, b, "]");
43 | for i from 0 to deg do print(coeff(poly, i));


--------------------------------------------------------------------------------
/math/tools/atan.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating atan(x) and atan2(y, x)
 2 | //
 3 | // Copyright (c) 2022-2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // atan is odd, so approximate with an odd polynomial:
 7 | // x + ax^3 + bx^5 + cx^7 + ...
 8 | // We generate a, b, c, ... such that we can approximate atan(x) by:
 9 | // x + x^3 * (a + bx^2 + cx^4 + ...)
10 | 
11 | f = atan(x);
12 | dtype = double;
13 | 
14 | deg = 20;
15 | 
16 | a = 0x1.0p-1022;
17 | b = 1;
18 | 
19 | // f(x) ~ x * poly(x^2)
20 | backward = proc(poly, d) {
21 |   return d * poly(d * d);
22 | };
23 | 
24 | // We need to feed remez algorithm with the inverse of the backward transform
25 | // g(y) such that g(x^2) = poly(x^2)
26 | forward = proc(f, d) {
27 |   return f(sqrt(d))/(sqrt(d));
28 | };
29 | 
30 | 
31 | approx = proc(poly, d) {
32 |   return remez(forward(f, x) - poly(x), deg - d, [a;b], x^d, 1e-16);
33 | };
34 | 
35 | p = 0;
36 | poly = 0;
37 | for i from 0 to deg do {
38 |   i;
39 |   p = roundcoefficients(approx(poly,i), [|dtype ...|]);
40 |   poly = poly + x^i*coeff(p,0);
41 | };
42 | 
43 | display = hexadecimal;
44 | print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b]));
45 | print("in [", a, b, "]");
46 | for i from 0 to deg do print(coeff(poly, i));


--------------------------------------------------------------------------------
/math/tools/atanf.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating atanf(x)
 2 | //
 3 | // Copyright (c) 2022-2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // Generate list of monomials:
 7 | // Taylor series of atan is of the form x + ax^3 + bx^5 + cx^7 + ...
 8 | // So generate a, b, c, ... such that we can approximate atan(x) by:
 9 | // x + x^3 * (a + bx^2 + cx^4 + ...)
10 | 
11 | f = atan(x);
12 | dtype = single;
13 | deg = 8;
14 | 
15 | a = 0x1.0p-126;
16 | b = 1;
17 | 
18 | // f(x) ~ x * poly(x^2)
19 | backward = proc(poly, d) {
20 |   return d * poly(d * d);
21 | };
22 | 
23 | // We need to feed remez algorithm with the inverse of the backward transform
24 | // g(y) such that g(x^2) = poly(x^2)
25 | forward = proc(f, d) {
26 |   return f(sqrt(d))/(sqrt(d));
27 | };
28 | 
29 | // Iterativley optimise polynomial coefficients, while minimising relative norm (f(x) - poly(x))/f(x)
30 | approx = proc(poly, d) {
31 |   return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16);
32 | };
33 | 
34 | p = 0;
35 | poly = 0;
36 | for i from 0 to deg do {
37 |   i;
38 |   p = roundcoefficients(approx(poly,i), [|dtype ...|]);
39 |   poly = poly + x^i*coeff(p,0);
40 | };
41 | 
42 | 
43 | display = hexadecimal;
44 | print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b]));
45 | print("in [", a, b, "]");
46 | for i from 0 to deg do print(coeff(poly, i));


--------------------------------------------------------------------------------
/math/tools/atanpi.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating atanpif(x)
 2 | //
 3 | // Copyright (c) 2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | f = atan(x)/pi;
 7 | dtype = single;
 8 | 
 9 | a = 0x1.0p-126;
10 | b = 1;
11 | 
12 | deg = 8;
13 | 
14 | // We look for a polynomial in x^2 such that f(x) ~ x * poly(x^2)
15 | // basically atan(x)/pi ~  c0 * x + c1 * x^3 + c2 * x^5 + ...
16 | backward = proc(poly, d) {
17 |   return d * poly(d * d);
18 | };
19 | 
20 | // We need to feed remez algorithm with the inverse of the backward transform
21 | // g(y) such that g(x^2) = poly(x^2)
22 | forward = proc(f, d) {
23 |   return f(sqrt(d))/(sqrt(d));
24 | };
25 | 
26 | // Optimise polynomial iteratively, minimising relative norm (f(x) - poly(x))/f(x)
27 | approx = proc(poly, d) {
28 |   return remez(1 - poly(x) / forward(f, x), deg - d, [a;b], x^d/forward(f, x), 1e-16);
29 | };
30 | 
31 | p = 0;
32 | poly = 0;
33 | for i from 0 to deg do {
34 |   i;
35 |   p = roundcoefficients(approx(poly,i), [|dtype ...|]);
36 |   poly = poly + x^i*coeff(p,0);
37 | };
38 | 
39 | display = hexadecimal!;
40 | print("rel error:", dirtyinfnorm(1-backward(poly, x)/f(x), [a;b]));
41 | print("in [", a, b, "]");
42 | for i from 0 to deg do print(coeff(poly, i));
43 | 


--------------------------------------------------------------------------------
/math/tools/cbrt.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating cbrt(x) in double precision
 2 | //
 3 | // Copyright (c) 2022-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 3;
 7 | 
 8 | a = 0.5;
 9 | b = 1;
10 | 
11 | 
12 | f = x^(1/3);
13 | 
14 | poly = fpminimax(f, deg, [|double ...|], [a;b]);
15 | 
16 | display = hexadecimal;
17 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
18 | print("in [",a,b,"]");
19 | print("coeffs:");
20 | for i from 0 to deg do round(coeff(poly,i), D, RN);
21 | 


--------------------------------------------------------------------------------
/math/tools/cbrtf.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating cbrt(x) in single precision
 2 | //
 3 | // Copyright (c) 2022-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 3;
 7 | 
 8 | a = 0.5;
 9 | b = 1;
10 | 
11 | 
12 | f = x^(1/3);
13 | 
14 | poly = fpminimax(f, deg, [|single ...|], [a;b]);
15 | 
16 | display = hexadecimal;
17 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
18 | print("in [",a,b,"]");
19 | print("coeffs:");
20 | for i from 0 to deg do round(coeff(poly,i), SG, RN);
21 | 


--------------------------------------------------------------------------------
/math/tools/cos.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating cos(x)
 2 | //
 3 | // Copyright (c) 2019, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 8;   // polynomial degree
 7 | a = -pi/4; // interval
 8 | b = pi/4;
 9 | 
10 | // find even polynomial with minimal abs error compared to cos(x)
11 | 
12 | f = cos(x);
13 | 
14 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
15 | approx = proc(poly,d) {
16 |   return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
17 | };
18 | 
19 | // first coeff is fixed, iteratively find optimal double prec coeffs
20 | poly = 1;
21 | for i from 1 to deg/2 do {
22 |   p = roundcoefficients(approx(poly,2*i), [|D ...|]);
23 |   poly = poly + x^(2*i)*coeff(p,0);
24 | };
25 | 
26 | display = hexadecimal;
27 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
28 | print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
29 | print("in [",a,b,"]");
30 | print("coeffs:");
31 | for i from 0 to deg do coeff(poly,i);
32 | 


--------------------------------------------------------------------------------
/math/tools/erf.sollya:
--------------------------------------------------------------------------------
 1 | // tables and constants for approximating erf(x).
 2 | //
 3 | // Copyright (c) 2023-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | display = hexadecimal;
 7 | prec=128;
 8 | 
 9 | // Tables
10 | print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}");
11 | for i from 0 to 768 do {
12 |   r = i / 128;
13 |   t0 = double(erf(r));
14 |   t1 = double(2/sqrt(pi) * exp(-r * r));
15 |   print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },");
16 | };
17 | 
18 | // Constants
19 | double(1/3);
20 | double(1/10);
21 | double(2/15);
22 | double(2/9);
23 | double(2/45);
24 | double(2/sqrt(pi));
25 | 
26 | 


--------------------------------------------------------------------------------
/math/tools/erfc.sollya:
--------------------------------------------------------------------------------
 1 | // tables and constants for approximating erfc(x).
 2 | //
 3 | // Copyright (c) 2023-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | display = hexadecimal;
 7 | prec=128;
 8 | 
 9 | // Tables
10 | print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }");
11 | for i from 0 to 3787 do {
12 |   r = 0.0 + i / 128;
13 |   t0 = double(erfc(r) * 2^128);
14 |   t1 = double(2/sqrt(pi) * exp(-r * r) * 2^128);
15 |   print("{ " @ t0 @ ",\t" @ t1 @ " },");
16 | };
17 | 
18 | // Constants
19 | print("> 2/sqrt(pi)");
20 | double(2/sqrt(pi));
21 | 
22 | print("> 1/3");
23 | double(1/3);
24 | 
25 | print("> P5");
26 | double(2/15);
27 | double(1/10);
28 | double(2/9);
29 | double(2/45);
30 | 
31 | print("> P6");
32 | double(1/42);
33 | double(1/7);
34 | double(2/21);
35 | double(4/315);
36 | 
37 | print("> Q");
38 | double( 5.0 / 4.0);
39 | double( 6.0 / 5.0);
40 | double( 7.0 / 6.0);
41 | double( 8.0 / 7.0);
42 | double( 9.0 / 8.0);
43 | double(10.0 / 9.0);
44 | 
45 | print("> R");
46 | double(-2.0 * 4.0 / (5.0 * 6.0));
47 | double(-2.0 * 5.0 / (6.0 * 7.0));
48 | double(-2.0 * 6.0 / (7.0 * 8.0));
49 | double(-2.0 * 7.0 / (8.0 * 9.0));
50 | double(-2.0 * 8.0 / (9.0 * 10.0));
51 | double(-2.0 * 9.0 / (10.0 * 11.0));
52 | 


--------------------------------------------------------------------------------
/math/tools/erfcf.sollya:
--------------------------------------------------------------------------------
 1 | // tables and constants for approximating erfcf(x).
 2 | //
 3 | // Copyright (c) 2023-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | display = hexadecimal;
 7 | prec=128;
 8 | 
 9 | // Tables
10 | print("{ i, r, erfc(r), 2/sqrt(pi) * exp(-r^2) }");
11 | for i from 0 to 644 do {
12 |   r = 0.0 + i / 64;
13 |   t0 = single(erfc(r) * 2^47);
14 |   t1 = single(2/sqrt(pi) * exp(-r * r) * 2^47);
15 |   print("{ " @ t0 @ ",\t" @ t1 @ " },");
16 | };
17 | 
18 | // Constants
19 | single(1/3);
20 | single(2/15);
21 | single(1/10);
22 | single(2/sqrt(pi));
23 | 


--------------------------------------------------------------------------------
/math/tools/erff.sollya:
--------------------------------------------------------------------------------
 1 | // tables and constants for approximating erff(x).
 2 | //
 3 | // Copyright (c) 2023-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | display = hexadecimal;
 7 | prec=128;
 8 | 
 9 | // Tables
10 | print("{ i, r, erf(r), 2/sqrt(pi) * exp(-r^2)}");
11 | for i from 0 to 512 do {
12 |   r = i / 128;
13 |   t0 = single(erf(r));
14 |   t1 = single(2/sqrt(pi) * exp(-r * r));
15 |   print("{ " @ i @ ",\t" @ r @ ",\t" @ t0 @ ",\t" @ t1 @ " },");
16 | };
17 | 
18 | // Constants
19 | single(1/3);
20 | single(2/sqrt(pi));
21 | 


--------------------------------------------------------------------------------
/math/tools/exp.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating e^x
 2 | //
 3 | // Copyright (c) 2019, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 5; // poly degree
 7 | N = 128; // table entries
 8 | b = log(2)/(2*N);  // interval
 9 | b = b + b*0x1p-16; // increase interval for non-nearest rounding (TOINT_NARROW)
10 | a = -b;
11 | 
12 | // find polynomial with minimal abs error
13 | 
14 | // return p that minimizes |exp(x) - poly(x) - x^d*p(x)|
15 | approx = proc(poly,d) {
16 |   return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
17 | };
18 | 
19 | // first 2 coeffs are fixed, iteratively find optimal double prec coeffs
20 | poly = 1 + x;
21 | for i from 2 to deg do {
22 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
23 |   poly = poly + x^i*coeff(p,0);
24 | };
25 | 
26 | display = hexadecimal;
27 | print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30));
28 | print("abs error:", accurateinfnorm(exp(x)-poly(x), [a;b], 30));
29 | print("in [",a,b,"]");
30 | // double interval error for non-nearest rounding
31 | print("rel2 error:", accurateinfnorm(1-poly(x)/exp(x), [2*a;2*b], 30));
32 | print("abs2 error:", accurateinfnorm(exp(x)-poly(x), [2*a;2*b], 30));
33 | print("in [",2*a,2*b,"]");
34 | print("coeffs:");
35 | for i from 0 to deg do coeff(poly,i);
36 | 


--------------------------------------------------------------------------------
/math/tools/exp10.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating double precision 10^x
 2 | //
 3 | // Copyright (c) 2023-2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // Parameters
 7 | deg = 4; // Neon 4, SVE 5
 8 | N = 128;  // Neon 128, SVE 64
 9 | b = log(2)/(2 * N * log(10)); // interval
10 | a = -b;
11 | 
12 | f = 10^x;
13 | 
14 | // return p that minimizes relative error.
15 | approx = proc(poly,d) {
16 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
17 | };
18 | 
19 | // return p that minimizes absolute error
20 | approx_abs = proc(poly,d) {
21 |   return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
22 | };
23 | 
24 | // first coeff is fixed, iteratively find optimal double prec coeffs
25 | poly = 1;
26 | for i from 1 to deg do {
27 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
28 |   //p = roundcoefficients(approx_abs(poly,i), [|D ...|]);
29 |   poly = poly + x^i*coeff(p,0);
30 | };
31 | 
32 | display = hexadecimal;
33 | print("rel error:", accurateinfnorm(1-poly(x)/10^x, [a;b], 30));
34 | print("abs error:", accurateinfnorm(10^x-poly(x), [a;b], 30));
35 | print("in [",a,b,"]");
36 | print("coeffs:");
37 | for i from 0 to deg do coeff(poly,i);
38 | 
39 | log10_2 = round(N * log(10) / log(2), D, RN);
40 | log2_10 = log(2) / (N * log(10));
41 | log2_10_hi = round(log2_10, D, RN);
42 | log2_10_lo = round(log2_10 - log2_10_hi, D, RN);
43 | print(log10_2);
44 | print(log2_10_hi);
45 | print(log2_10_lo);
46 | 


--------------------------------------------------------------------------------
/math/tools/exp10f.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating single precision 10^x
 2 | //
 3 | // Copyright (c) 2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | 
 7 | // Parameters
 8 | deg = 2; // Neon 5, SVE 2
 9 | N = 64;  // Neon 1, SVE 64
10 | b = log(2)/(2 * N * log(10)); // interval
11 | a = -b;
12 | f = 10^x;
13 | 
14 | // return p that minimizes relative error.
15 | approx = proc(poly,d) {
16 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
17 | };
18 | 
19 | // first coeff is fixed, iteratively find optimal coeffs
20 | poly = 1;
21 | for i from 1 to deg do {
22 |   p = roundcoefficients(approx(poly,i), [|SG ...|]);
23 |   poly = poly + x^i*coeff(p, 0);
24 | };
25 | 
26 | display = hexadecimal!;
27 | print("rel error:", accurateinfnorm(1-poly(x)/10^x, [a;b], 30));
28 | print("in [",a,b,"]");
29 | print("Poly coeffs:");
30 | for i from 1 to deg do coeff(poly,i);
31 | 
32 | log10_2 = round(log(10) / log(2), SG, RN);
33 | log2_10 = log(2) / (log(10));
34 | log2_10_hi = round(log2_10, SG, RN);
35 | log2_10_lo = round(log2_10 - log2_10_hi, SG, RN);
36 | 
37 | print("log10_2:");
38 | print(log10_2);
39 | 
40 | print("log2_10_hi/lo:");
41 | print(log2_10_hi);
42 | print(log2_10_lo);
43 | 


--------------------------------------------------------------------------------
/math/tools/exp2.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating 2^x
 2 | //
 3 | // Copyright (c) 2019-2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // Parameters
 7 | deg = 5; // poly degree
 8 | N = 64; // Neon 128, SVE 64 (fexpa)
 9 | b = 1/(2*N); // interval
10 | a = -b;
11 | f = 2^x;
12 | 
13 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
14 | approx = proc(poly,d) {
15 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
16 | };
17 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
18 | approx_abs = proc(poly,d) {
19 |   return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
20 | };
21 | 
22 | // first coeff is fixed, iteratively find optimal coeffs
23 | poly = 1;
24 | for i from 1 to deg do {
25 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
26 | //  p = roundcoefficients(approx_abs(poly,i), [|D ...|]);
27 |   poly = poly + x^i*coeff(p,0);
28 | };
29 | 
30 | display = hexadecimal;
31 | print("rel error:", accurateinfnorm(1-poly(x)/2^x, [a;b], 30));
32 | print("abs error:", accurateinfnorm(2^x-poly(x), [a;b], 30));
33 | print("in [",a,b,"]");
34 | // double interval error for non-nearest rounding:
35 | print("rel2 error:", accurateinfnorm(1-poly(x)/2^x, [2*a;2*b], 30));
36 | print("abs2 error:", accurateinfnorm(2^x-poly(x), [2*a;2*b], 30));
37 | print("in [",2*a,2*b,"]");
38 | print("coeffs:");
39 | for i from 0 to deg do coeff(poly,i);
40 | 


--------------------------------------------------------------------------------
/math/tools/exp2f.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating single precision 2^x
 2 | //
 3 | // Copyright (c) 2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // Parameters
 7 | deg = 5; // Neon 5, SVE 2
 8 | N = 64;  // Neon 1, SVE 64
 9 | b = 1/(2*N); // interval
10 | a = -b;
11 | f = 2^x;
12 | 
13 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
14 | approx = proc(poly, d) {
15 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
16 | };
17 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
18 | approx_abs = proc(poly, d) {
19 |   return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
20 | };
21 | 
22 | // first coeff is fixed, iteratively find optimal coeffs
23 | poly = 1;
24 | for i from 1 to deg do {
25 |   p = roundcoefficients(approx(poly,i), [|SG ...|]);
26 | // p = roundcoefficients(approx_abs(poly,i), [|SG ...|]);
27 |   poly = poly + x^i*coeff(p,0);
28 | };
29 | 
30 | display = hexadecimal;
31 | print("rel error:", accurateinfnorm(1-poly(x)/2^x, [a;b], 30));
32 | print("abs error:", accurateinfnorm(2^x-poly(x), [a;b], 30));
33 | print("in [",a,b,"]");
34 | // double interval error for non-nearest rounding:
35 | print("rel2 error:", accurateinfnorm(1-poly(x)/2^x, [2*a;2*b], 30));
36 | print("abs2 error:", accurateinfnorm(2^x-poly(x), [2*a;2*b], 30));
37 | print("in [",2*a,2*b,"]");
38 | print("coeffs:");
39 | for i from 0 to deg do coeff(poly,i);
40 | 


--------------------------------------------------------------------------------
/math/tools/exp2m1f.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating single precision 2^x - 1
 2 | //
 3 | // Copyright (c) 2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // Parameters
 7 | deg = 7;
 8 | N = 1;
 9 | b = 1/(2*N); // interval
10 | a = -b;
11 | f = 2^x;
12 | 
13 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
14 | approx = proc(poly, d) {
15 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
16 | };
17 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
18 | approx_abs = proc(poly, d) {
19 |   return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
20 | };
21 | 
22 | // first coeff is fixed, iteratively find optimal coeffs
23 | poly = 1;
24 | for i from 1 to deg do {
25 |   p = roundcoefficients(approx(poly,i), [|SG ...|]);
26 | // p = roundcoefficients(approx_abs(poly,i), [|SG ...|]);
27 |   poly = poly + x^i*coeff(p,0);
28 | };
29 | 
30 | display = hexadecimal;
31 | print("rel error:", accurateinfnorm(1-poly(x)/2^x, [a;b], 30));
32 | print("abs error:", accurateinfnorm(2^x-poly(x), [a;b], 30));
33 | print("in [",a,b,"]");
34 | // double interval error for non-nearest rounding:
35 | print("coeffs:");
36 | for i from 0 to deg do coeff(poly,i);
37 | 


--------------------------------------------------------------------------------
/math/tools/expm1.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating exp(x)-1 in double precision
 2 | //
 3 | // Copyright (c) 2022-2025, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 6; // poly degree
 7 | N = 64; // table entries
 8 | b = log(2)/(2*N);  // interval
 9 | a = -b;
10 | 
11 | f = proc(y) {
12 |   return exp(y);
13 | };
14 | 
15 | // return p that minimizes |exp(x) - poly(x) - x^d*p(x)|
16 | approx = proc(poly,d) {
17 |   return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
18 | };
19 | 
20 | // first 2 coeffs are fixed, iteratively find optimal double prec coeffs
21 | poly = 1 + x;
22 | for i from 2 to deg do {
23 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
24 |   poly = poly + x^i*coeff(p,0);
25 | };
26 | 
27 | display = hexadecimal;
28 | print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30));
29 | print("rel error:", dirtyinfnorm(1-poly(x)/f(x), [a;b], 30));
30 | print("in [",a,b,"]");
31 | print("coeffs:");
32 | for i from 2 to deg do round(coeff(poly,i), D, RN);
33 | 
34 | // SVE expm1 relies on a small lookup table for fexpa correction.
35 | display = decimal;
36 | for i from 0 to 16 do {
37 |   val = 2^(i/64) - 1;
38 |   err = round(val, D, RN);
39 |   print(err);
40 | };
41 | 
42 | for i from 0 to 16 do {
43 |   val = 2^(-i/64) - 1;
44 |   err = round(val, D, RN);
45 |   print(err);
46 | };


--------------------------------------------------------------------------------
/math/tools/expm1f.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating exp(x)-1 in single precision
 2 | //
 3 | // Copyright (c) 2022-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 5;
 7 | 
 8 | a = -log(2)/2;
 9 | b = log(2)/2;
10 | 
11 | f = proc(y) {
12 |   return exp(y)-1;
13 | };
14 | 
15 | poly = fpminimax(f(x), deg, [|single ...|], [a;b]);
16 | 
17 | display = hexadecimal;
18 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
19 | print("in [",a,b,"]");
20 | print("coeffs:");
21 | for i from 2 to deg do round(coeff(poly,i), SG, RN);
22 | 


--------------------------------------------------------------------------------
/math/tools/log.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating log(1+x)
 2 | //
 3 | // Copyright (c) 2019, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 12; // poly degree
 7 | // |log(1+x)| > 0x1p-4 outside the interval
 8 | a = -0x1p-4;
 9 | b =  0x1.09p-4;
10 | 
11 | // find log(1+x)/x polynomial with minimal relative error
12 | // (minimal relative error polynomial for log(1+x) is the same * x)
13 | deg = deg-1; // because of /x
14 | 
15 | // f = log(1+x)/x; using taylor series
16 | f = 0;
17 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
18 | 
19 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
20 | approx = proc(poly,d) {
21 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
22 | };
23 | 
24 | // first coeff is fixed, iteratively find optimal double prec coeffs
25 | poly = 1;
26 | for i from 1 to deg do {
27 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
28 |   poly = poly + x^i*coeff(p,0);
29 | };
30 | 
31 | display = hexadecimal;
32 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
33 | print("in [",a,b,"]");
34 | print("coeffs:");
35 | for i from 0 to deg do coeff(poly,i);
36 | 


--------------------------------------------------------------------------------
/math/tools/log10.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating log10(1+x)
 2 | //
 3 | // Copyright (c) 2019-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 6; // poly degree
 7 | // |log10(1+x)| > 0x1p-5 outside the interval
 8 | a = -0x1.p-5;
 9 | b = 0x1.p-5;
10 | 
11 | ln10 = evaluate(log(10),0);
12 | invln10hi = double(1/ln10 + 0x1p21) - 0x1p21; // round away last 21 bits
13 | invln10lo = double(1/ln10 - invln10hi);
14 | 
15 | // find log10(1+x)/x polynomial with minimal relative error
16 | // (minimal relative error polynomial for log10(1+x) is the same * x)
17 | deg = deg-1; // because of /x
18 | 
19 | // f = log(1+x)/x; using taylor series
20 | f = 0;
21 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
22 | f = f/ln10;
23 | 
24 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
25 | approx = proc(poly,d) {
26 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
27 | };
28 | 
29 | // first coeff is fixed, iteratively find optimal double prec coeffs
30 | poly = invln10hi + invln10lo;
31 | for i from 1 to deg do {
32 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
33 |   poly = poly + x^i*coeff(p,0);
34 | };
35 | display = hexadecimal;
36 | print("invln10hi:", invln10hi);
37 | print("invln10lo:", invln10lo);
38 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
39 | print("in [",a,b,"]");
40 | print("coeffs:");
41 | for i from 0 to deg do coeff(poly,i);
42 | 
43 | display = decimal;
44 | print("in [",a,b,"]");
45 | 


--------------------------------------------------------------------------------
/math/tools/log10f.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating log10f(1+x)
 2 | //
 3 | // Copyright (c) 2019-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // Computation of log10f(1+x) will be carried out in double precision
 7 | 
 8 | deg = 4; // poly degree
 9 | // [OFF; 2*OFF] is divided in 2^4 intervals with OFF~0.7
10 | a = -0.04375;
11 | b = 0.04375;
12 | 
13 | // find log(1+x)/x polynomial with minimal relative error
14 | // (minimal relative error polynomial for log(1+x) is the same * x)
15 | deg = deg-1; // because of /x
16 | 
17 | // f = log(1+x)/x; using taylor series
18 | f = 0;
19 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
20 | 
21 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
22 | approx = proc(poly,d) {
23 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
24 | };
25 | 
26 | // first coeff is fixed, iteratively find optimal double prec coeffs
27 | poly = 1;
28 | for i from 1 to deg do {
29 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
30 |   poly = poly + x^i*coeff(p,0);
31 | };
32 | 
33 | display = hexadecimal;
34 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
35 | print("in [",a,b,"]");
36 | print("coeffs:");
37 | for i from 0 to deg do double(coeff(poly,i));
38 | 


--------------------------------------------------------------------------------
/math/tools/log1p.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating log(1+x) in double precision
 2 | //
 3 | // Copyright (c) 2022-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 20;
 7 | 
 8 | a = sqrt(2)/2-1;
 9 | b = sqrt(2)-1;
10 | 
11 | f = proc(y) {
12 |   return log(1+y);
13 | };
14 | 
15 | approx = proc(poly, d) {
16 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
17 | };
18 | 
19 | poly = x;
20 | for i from 2 to deg do {
21 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
22 |   poly = poly + x^i*coeff(p,0);
23 | };
24 | 
25 | 
26 | print("coeffs:");
27 | display = hexadecimal;
28 | for i from 2 to deg do coeff(poly,i);
29 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
30 | print("in [",a,b,"]");
31 | 


--------------------------------------------------------------------------------
/math/tools/log1pf.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating log(1+x) in single precision
 2 | //
 3 | // Copyright (c) 2022-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 10;
 7 | 
 8 | a = -0.25;
 9 | b = 0.5;
10 | 
11 | f = proc(y) {
12 |   return log(1+y);
13 | };
14 | 
15 | poly = fpminimax(f(x), deg, [|single ...|], [a;b]);
16 | 
17 | display = hexadecimal;
18 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
19 | print("in [",a,b,"]");
20 | print("coeffs:");
21 | for i from 2 to deg do round(coeff(poly,i), SG, RN);
22 | 


--------------------------------------------------------------------------------
/math/tools/log2.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating log2(1+x)
 2 | //
 3 | // Copyright (c) 2019, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 11; // poly degree
 7 | // |log2(1+x)| > 0x1p-4 outside the interval
 8 | a = -0x1.5b51p-5;
 9 | b =  0x1.6ab2p-5;
10 | 
11 | ln2 = evaluate(log(2),0);
12 | invln2hi = double(1/ln2 + 0x1p21) - 0x1p21; // round away last 21 bits
13 | invln2lo = double(1/ln2 - invln2hi);
14 | 
15 | // find log2(1+x)/x polynomial with minimal relative error
16 | // (minimal relative error polynomial for log2(1+x) is the same * x)
17 | deg = deg-1; // because of /x
18 | 
19 | // f = log(1+x)/x; using taylor series
20 | f = 0;
21 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
22 | f = f/ln2;
23 | 
24 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
25 | approx = proc(poly,d) {
26 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
27 | };
28 | 
29 | // first coeff is fixed, iteratively find optimal double prec coeffs
30 | poly = invln2hi + invln2lo;
31 | for i from 1 to deg do {
32 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
33 |   poly = poly + x^i*coeff(p,0);
34 | };
35 | 
36 | display = hexadecimal;
37 | print("invln2hi:", invln2hi);
38 | print("invln2lo:", invln2lo);
39 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
40 | print("in [",a,b,"]");
41 | print("coeffs:");
42 | for i from 0 to deg do coeff(poly,i);
43 | 


--------------------------------------------------------------------------------
/math/tools/log2_abs.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating log2(1+x)
 2 | //
 3 | // Copyright (c) 2019, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 7; // poly degree
 7 | // interval ~= 1/(2*N), where N is the table entries
 8 | a= -0x1.f45p-8;
 9 | b=  0x1.f45p-8;
10 | 
11 | ln2 = evaluate(log(2),0);
12 | invln2hi = double(1/ln2 + 0x1p21) - 0x1p21; // round away last 21 bits
13 | invln2lo = double(1/ln2 - invln2hi);
14 | 
15 | // find log2(1+x) polynomial with minimal absolute error
16 | f = log(1+x)/ln2;
17 | 
18 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
19 | approx = proc(poly,d) {
20 |   return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
21 | };
22 | 
23 | // first coeff is fixed, iteratively find optimal double prec coeffs
24 | poly = x*(invln2lo + invln2hi);
25 | for i from 2 to deg do {
26 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
27 |   poly = poly + x^i*coeff(p,0);
28 | };
29 | 
30 | display = hexadecimal;
31 | print("invln2hi:", invln2hi);
32 | print("invln2lo:", invln2lo);
33 | print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
34 | //// relative error computation fails if f(0)==0
35 | //// g = f(x)/x = log2(1+x)/x; using taylor series
36 | //g = 0;
37 | //for i from 0 to 60 do { g = g + (-x)^i/(i+1)/ln2; };
38 | //print("rel error:", accurateinfnorm(1-(poly(x)/x)/g(x), [a;b], 30));
39 | print("in [",a,b,"]");
40 | print("coeffs:");
41 | for i from 0 to deg do coeff(poly,i);
42 | 


--------------------------------------------------------------------------------
/math/tools/log_abs.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating log(1+x)
 2 | //
 3 | // Copyright (c) 2019, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 6; // poly degree
 7 | // interval ~= 1/(2*N), where N is the table entries
 8 | a = -0x1.fp-9;
 9 | b =  0x1.fp-9;
10 | 
11 | // find log(1+x) polynomial with minimal absolute error
12 | f = log(1+x);
13 | 
14 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
15 | approx = proc(poly,d) {
16 |   return remez(f(x) - poly(x), deg-d, [a;b], x^d, 1e-10);
17 | };
18 | 
19 | // first coeff is fixed, iteratively find optimal double prec coeffs
20 | poly = x;
21 | for i from 2 to deg do {
22 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
23 |   poly = poly + x^i*coeff(p,0);
24 | };
25 | 
26 | display = hexadecimal;
27 | print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
28 | // relative error computation fails if f(0)==0
29 | // g = f(x)/x = log(1+x)/x; using taylor series
30 | g = 0;
31 | for i from 0 to 60 do { g = g + (-x)^i/(i+1); };
32 | print("rel error:", accurateinfnorm(1-poly(x)/x/g(x), [a;b], 30));
33 | print("in [",a,b,"]");
34 | print("coeffs:");
35 | for i from 0 to deg do coeff(poly,i);
36 | 


--------------------------------------------------------------------------------
/math/tools/plot.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # ULP error plot tool.
 4 | #
 5 | # Copyright (c) 2019, Arm Limited.
 6 | # SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 | 
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | import sys
11 | import re
12 | 
13 | # example usage:
14 | # build/bin/ulp -e .0001 log 0.5 2.0 2345678 | math/tools/plot.py
15 | 
16 | def fhex(s):
17 | 	return float.fromhex(s)
18 | 
19 | def parse(f):
20 | 	xs = []
21 | 	gs = []
22 | 	ys = []
23 | 	es = []
24 | 	# Has to match the format used in ulp.c
25 | 	r = re.compile(r'[^ (]+\(([^ )]*)\) got ([^ ]+) want ([^ ]+) [^ ]+ ulp err ([^ ]+)')
26 | 	for line in f:
27 | 		m = r.match(line)
28 | 		if m:
29 | 			x = fhex(m.group(1))
30 | 			g = fhex(m.group(2))
31 | 			y = fhex(m.group(3))
32 | 			e = float(m.group(4))
33 | 			xs.append(x)
34 | 			gs.append(g)
35 | 			ys.append(y)
36 | 			es.append(e)
37 | 		elif line.startswith('PASS') or line.startswith('FAIL'):
38 | 			# Print the summary line
39 | 			print(line)
40 | 	return xs, gs, ys, es
41 | 
42 | def plot(xs, gs, ys, es):
43 | 	if len(xs) < 2:
44 | 		print('not enough samples')
45 | 		return
46 | 	a = min(xs)
47 | 	b = max(xs)
48 | 	fig, (ax0,ax1) = plt.subplots(nrows=2)
49 | 	es = np.abs(es) # ignore the sign
50 | 	emax = max(es)
51 | 	ax0.text(a+(b-a)*0.7, emax*0.8, '%s\n%g'%(emax.hex(),emax))
52 | 	ax0.plot(xs,es,'r.')
53 | 	ax0.grid()
54 | 	ax1.plot(xs,ys,'r.',label='want')
55 | 	ax1.plot(xs,gs,'b.',label='got')
56 | 	ax1.grid()
57 | 	ax1.legend()
58 | 	plt.show()
59 | 
60 | xs, gs, ys, es = parse(sys.stdin)
61 | plot(xs, gs, ys, es)
62 | 


--------------------------------------------------------------------------------
/math/tools/sin.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating sin(x)
 2 | //
 3 | // Copyright (c) 2019, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 7;   // polynomial degree
 7 | a = -pi/4; // interval
 8 | b = pi/4;
 9 | 
10 | // find even polynomial with minimal abs error compared to sin(x)/x
11 | 
12 | // account for /x
13 | deg = deg-1;
14 | 
15 | // f = sin(x)/x;
16 | f = 1;
17 | c = 1;
18 | for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; };
19 | 
20 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
21 | approx = proc(poly,d) {
22 |   return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
23 | };
24 | 
25 | // first coeff is fixed, iteratively find optimal double prec coeffs
26 | poly = 1;
27 | for i from 1 to deg/2 do {
28 |   p = roundcoefficients(approx(poly,2*i), [|D ...|]);
29 |   poly = poly + x^(2*i)*coeff(p,0);
30 | };
31 | 
32 | display = hexadecimal;
33 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
34 | print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30));
35 | print("in [",a,b,"]");
36 | print("coeffs:");
37 | for i from 0 to deg do coeff(poly,i);
38 | 


--------------------------------------------------------------------------------
/math/tools/sincos.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating cos(x)
 2 | //
 3 | // Copyright (c) 2023-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // This script only finds the coeffs for cos - see math/aarch64/advsimd/sin.c for sin coeffs
 7 | 
 8 | deg = 14;   // polynomial degree
 9 | a = -pi/4; // interval
10 | b = pi/4;
11 | 
12 | // find even polynomial with minimal abs error compared to cos(x)
13 | 
14 | f = cos(x);
15 | 
16 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
17 | approx = proc(poly,d) {
18 |   return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
19 | };
20 | 
21 | // first coeff is fixed, iteratively find optimal double prec coeffs
22 | poly = 1;
23 | for i from 1 to deg/2 do {
24 |   p = roundcoefficients(approx(poly,2*i), [|double ...|]);
25 |   poly = poly + x^(2*i)*coeff(p,0);
26 | };
27 | 
28 | display = hexadecimal;
29 | //print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
30 | //print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
31 | print("in [",a,b,"]");
32 | print("coeffs:");
33 | for i from 0 to deg do coeff(poly,i);
34 | 


--------------------------------------------------------------------------------
/math/tools/sincosf.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating cos(x)
 2 | //
 3 | // Copyright (c) 2023-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // This script only finds the coeffs for cos - see math/tools/sin.sollya for sin coeffs.
 7 | 
 8 | deg = 8;   // polynomial degree
 9 | a = -pi/4; // interval
10 | b = pi/4;
11 | 
12 | // find even polynomial with minimal abs error compared to cos(x)
13 | 
14 | f = cos(x);
15 | 
16 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
17 | approx = proc(poly,d) {
18 |   return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
19 | };
20 | 
21 | // first coeff is fixed, iteratively find optimal double prec coeffs
22 | poly = 1;
23 | for i from 1 to deg/2 do {
24 |   p = roundcoefficients(approx(poly,2*i), [|single ...|]);
25 |   poly = poly + x^(2*i)*coeff(p,0);
26 | };
27 | 
28 | display = hexadecimal;
29 | //print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
30 | //print("abs error:", accurateinfnorm(f(x)-poly(x), [a;b], 30));
31 | print("in [",a,b,"]");
32 | print("coeffs:");
33 | for i from 0 to deg do coeff(poly,i);
34 | 


--------------------------------------------------------------------------------
/math/tools/sinpi.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating sinpi(x)
 2 | //
 3 | // Copyright (c) 2023-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 19;  // polynomial degree
 7 | a = -1/2; // interval
 8 | b = 1/2;
 9 | 
10 | // find even polynomial with minimal abs error compared to sinpi(x)
11 | 
12 | // f = sin(pi* x);
13 | f = pi*x;
14 | c = 1;
15 | for i from 1 to 80 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*(pi*x)^(2*i+1)/c; };
16 | 
17 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
18 | approx = proc(poly,d) {
19 |   return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
20 | };
21 | 
22 | // first coeff is predefine, iteratively find optimal double prec coeffs
23 | poly = pi*x;
24 | for i from 0 to (deg-1)/2 do {
25 |   p = roundcoefficients(approx(poly,2*i+1), [|D ...|]);
26 |   poly = poly + x^(2*i+1)*coeff(p,0);
27 | };
28 | 
29 | display = hexadecimal;
30 | print("abs error:", accurateinfnorm(sin(pi*x)-poly(x), [a;b], 30));
31 | print("in [",a,b,"]");
32 | print("coeffs:");
33 | for i from 0 to deg do coeff(poly,i);
34 | 


--------------------------------------------------------------------------------
/math/tools/tan.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating double precision tan(x)
 2 | //
 3 | // Copyright (c) 2023-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 8;
 7 | 
 8 | // interval bounds
 9 | a = 0x1.0p-126;
10 | b = pi / 8;
11 | 
12 | display = hexadecimal;
13 | 
14 | f = (tan(sqrt(x))-sqrt(x))/x^(3/2);
15 | poly = fpminimax(f, deg, [|double ...|], [a*a;b*b]);
16 | 
17 | //print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
18 | print("in [",a,b,"]");
19 | print("coeffs:");
20 | for i from 0 to deg do coeff(poly,i);
21 | 


--------------------------------------------------------------------------------
/math/tools/tanpi.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating tanpi/f(x)
 2 | //
 3 | // Copyright (c) 2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | // 0 for tanpi/f [0,0.25], 1 for tanpi/f [0.25,1]
 7 | method = 0;
 8 | dtype = double;
 9 | 
10 | if (dtype == single) then {
11 |     if (method == 0) then { deg = 5; }
12 |     else if (method == 1) then { deg = 3; };
13 | } else if (dtype == double) then {
14 |     if (method == 0) then { deg = 13; }
15 |     else if (method == 1) then { deg = 8; };
16 | };
17 | 
18 | a = 0x1.0p-126;
19 | b = 1/4;
20 | 
21 | if (method == 0) then {
22 |     g = tan(pi * x);
23 |     F = proc(P) { return pi * x + x^3 * P(x^2); };
24 |     f = (g(sqrt(x)) - pi * sqrt(x))/(x^(3/2));
25 | } else if (method == 1) then {
26 |     g = 1/tan(pi * x);
27 |     F = proc(P) { return 1/(pi * x) + x * P(x^2); };
28 |     f = (g(sqrt(x)) / sqrt(x)) - 1/(pi * x);
29 | };
30 | 
31 | poly = fpminimax(f, deg, [|dtype ...|], [a*a;b*b]);
32 | 
33 | //
34 | // Display coefficients in Sollya
35 | //
36 | display = hexadecimal!;
37 | if (dtype==double) then { prec = 53!; }
38 | else if (dtype==single) then { prec = 23!; };
39 | print("_coeffs :_ hex");
40 | for i from 0 to deg do coeff(poly, i);
41 | 
42 | // Compute errors
43 | //display = hexadecimal!;
44 | d_rel_err = dirtyinfnorm(1-F(poly)/g(x), [a;b]);
45 | d_abs_err = dirtyinfnorm(g(x)-F(poly), [a;b]);
46 | print("dirty rel error:", d_rel_err);
47 | print("dirty abs error:", d_abs_err);
48 | print("in [",a,b,"]");
49 | 


--------------------------------------------------------------------------------
/math/tools/v_erf.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating erf(x).
 2 | // To generate coefficients for interval i (0 to 47) do:
 3 | // $ sollya v_erf.sollya $i
 4 | //
 5 | // Copyright (c) 2022-2024, Arm Limited.
 6 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 7 | 
 8 | scale = 1/8;
 9 | deg = 9;
10 | 
11 | itv = parse(__argv[0]);
12 | if (itv == 0)  then { a = 0x1p-1022; }
13 | else                { a = itv * scale; };
14 | 
15 | prec=256;
16 | 
17 | poly = fpminimax(erf(scale*x+a), deg, [|D ...|], [0; 1]);
18 | 
19 | display = hexadecimal;
20 | for i from 0 to deg do coeff(poly, i);


--------------------------------------------------------------------------------
/math/tools/v_erfc.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating erfc(x)*exp(x*x)
 2 | //
 3 | // Copyright (c) 2022-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 12; // poly degree
 7 | 
 8 | itv = parse(__argv[0]);
 9 | 
10 | bounds = [|3.725290298461914e-9,
11 |            0.18920711500272103,
12 |            0.41421356237309515,
13 |            0.681792830507429,
14 |            1,
15 |            1.378414230005442,
16 |            1.8284271247461903,
17 |            2.363585661014858,
18 |            3,
19 |            3.756828460010884,
20 |            4.656854249492381,
21 |            5.727171322029716,
22 |            7,
23 |            8.513656920021768,
24 |            10.313708498984761,
25 |            12.454342644059432,
26 |            15,
27 |            18.027313840043536,
28 |            21.627416997969522,
29 |            25.908685288118864,
30 |            31|];
31 | 
32 | a = bounds[itv];
33 | b = bounds[itv + 1];
34 | 
35 | f = proc(y) {
36 |   t = y + a;
37 |   return erfc(t) * exp(t*t);
38 | };
39 | 
40 | poly = fpminimax(f(x), deg, [|double ...|], [0;b-a]);
41 | 
42 | display = hexadecimal;
43 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
44 | print("in [",a,b,"]");
45 | print("coeffs:");
46 | for i from 0 to deg do coeff(poly, i);
47 | 


--------------------------------------------------------------------------------
/math/tools/v_exp.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating e^x
 2 | //
 3 | // Copyright (c) 2019, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 4; // poly degree
 7 | N = 128; // table entries
 8 | b = log(2)/(2*N);  // interval
 9 | a = -b;
10 | 
11 | // find polynomial with minimal abs error
12 | 
13 | // return p that minimizes |exp(x) - poly(x) - x^d*p(x)|
14 | approx = proc(poly,d) {
15 |   return remez(exp(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
16 | };
17 | 
18 | // first 2 coeffs are fixed, iteratively find optimal double prec coeffs
19 | poly = 1 + x;
20 | for i from 2 to deg do {
21 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
22 |   poly = poly + x^i*coeff(p,0);
23 | };
24 | 
25 | display = hexadecimal;
26 | print("rel error:", accurateinfnorm(1-poly(x)/exp(x), [a;b], 30));
27 | print("abs error:", accurateinfnorm(exp(x)-poly(x), [a;b], 30));
28 | print("in [",a,b,"]");
29 | print("coeffs:");
30 | for i from 0 to deg do coeff(poly,i);
31 | 


--------------------------------------------------------------------------------
/math/tools/v_log.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial used for __v_log(x)
 2 | //
 3 | // Copyright (c) 2019, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 6; // poly degree
 7 | a = -0x1.fc1p-9;
 8 | b = 0x1.009p-8;
 9 | 
10 | // find log(1+x)/x polynomial with minimal relative error
11 | // (minimal relative error polynomial for log(1+x) is the same * x)
12 | deg = deg-1; // because of /x
13 | 
14 | // f = log(1+x)/x; using taylor series
15 | f = 0;
16 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
17 | 
18 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
19 | approx = proc(poly,d) {
20 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
21 | };
22 | 
23 | // first coeff is fixed, iteratively find optimal double prec coeffs
24 | poly = 1;
25 | for i from 1 to deg do {
26 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
27 |   poly = poly + x^i*coeff(p,0);
28 | };
29 | 
30 | display = hexadecimal;
31 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
32 | print("in [",a,b,"]");
33 | print("coeffs:");
34 | for i from 0 to deg do coeff(poly,i);
35 | 


--------------------------------------------------------------------------------
/math/tools/v_log10.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial used for __v_log10(x)
 2 | //
 3 | // Copyright (c) 2019-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 6; // poly degree
 7 | a = -0x1.fc1p-9;
 8 | b = 0x1.009p-8;
 9 | 
10 | // find log(1+x)/x polynomial with minimal relative error
11 | // (minimal relative error polynomial for log(1+x) is the same * x)
12 | deg = deg-1; // because of /x
13 | 
14 | // f = log(1+x)/x; using taylor series
15 | f = 0;
16 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
17 | 
18 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
19 | approx = proc(poly,d) {
20 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
21 | };
22 | 
23 | // first coeff is fixed, iteratively find optimal double prec coeffs
24 | poly = 1;
25 | for i from 1 to deg do {
26 |   p = roundcoefficients(approx(poly,i), [|D ...|]);
27 |   poly = poly + x^i*coeff(p,0);
28 | };
29 | 
30 | // scale coefficients by 1/ln(10)
31 | ln10 = evaluate(log(10),0);
32 | poly = poly/ln10;
33 | 
34 | display = hexadecimal;
35 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
36 | print("in [",a,b,"]");
37 | print("coeffs:");
38 | for i from 0 to deg do double(coeff(poly,i));
39 | 


--------------------------------------------------------------------------------
/math/tools/v_log10f.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating v_log10f(1+x)
 2 | //
 3 | // Copyright (c) 2019-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 9; // poly degree
 7 | // |log10(1+x)| > 0x1p-4 outside the interval
 8 | a = -1/3;
 9 | b =  1/3;
10 | 
11 | display = hexadecimal;
12 | print("log10(2) = ", single(log10(2)));
13 | 
14 | ln10 = evaluate(log(10),0);
15 | invln10 = single(1/ln10);
16 | 
17 | // find log10(1+x)/x polynomial with minimal relative error
18 | // (minimal relative error polynomial for log10(1+x) is the same * x)
19 | deg = deg-1; // because of /x
20 | 
21 | // f = log(1+x)/x; using taylor series
22 | f = 0;
23 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
24 | f = f/ln10;
25 | 
26 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
27 | approx = proc(poly,d) {
28 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
29 | };
30 | 
31 | // first coeff is fixed, iteratively find optimal double prec coeffs
32 | poly = invln10;
33 | for i from 1 to deg do {
34 |   p = roundcoefficients(approx(poly,i), [|SG ...|]);
35 |   poly = poly + x^i*coeff(p,0);
36 | };
37 | display = hexadecimal;
38 | print("invln10:", invln10);
39 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
40 | print("in [",a,b,"]");
41 | print("coeffs:");
42 | for i from 0 to deg do single(coeff(poly,i));
43 | 
44 | display = decimal;
45 | print("in [",a,b,"]");
46 | 


--------------------------------------------------------------------------------
/math/tools/v_log2f.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial used for __v_log2f(x)
 2 | //
 3 | // Copyright (c) 2022-2024, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 9; // poly degree
 7 | a = -1/3;
 8 | b = 1/3;
 9 | 
10 | ln2 = evaluate(log(2),0);
11 | invln2 = single(1/ln2);
12 | 
13 | // find log2(1+x)/x polynomial with minimal relative error
14 | // (minimal relative error polynomial for log2(1+x) is the same * x)
15 | deg = deg-1; // because of /x
16 | 
17 | // f = log2(1+x)/x; using taylor series
18 | f = 0;
19 | for i from 0 to 60 do { f = f + (-x)^i/(i+1); };
20 | f = f * invln2;
21 | 
22 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|/|f(x)|
23 | approx = proc(poly,d) {
24 |   return remez(1 - poly(x)/f(x), deg-d, [a;b], x^d/f(x), 1e-10);
25 | };
26 | 
27 | // first coeff is fixed, iteratively find optimal double prec coeffs
28 | poly = invln2;
29 | for i from 1 to deg do {
30 |   p = roundcoefficients(approx(poly,i), [|SG ...|]);
31 |   poly = poly + x^i*coeff(p,0);
32 | };
33 | 
34 | display = hexadecimal;
35 | print("rel error:", accurateinfnorm(1-poly(x)/f(x), [a;b], 30));
36 | print("in [",a,b,"]");
37 | print("coeffs:");
38 | for i from 0 to deg do coeff(poly,i);
39 | 


--------------------------------------------------------------------------------
/math/tools/v_sin.sollya:
--------------------------------------------------------------------------------
 1 | // polynomial for approximating sin(x)
 2 | //
 3 | // Copyright (c) 2019, Arm Limited.
 4 | // SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 5 | 
 6 | deg = 15;  // polynomial degree
 7 | a = -pi/2; // interval
 8 | b = pi/2;
 9 | 
10 | // find even polynomial with minimal abs error compared to sin(x)/x
11 | 
12 | // account for /x
13 | deg = deg-1;
14 | 
15 | // f = sin(x)/x;
16 | f = 1;
17 | c = 1;
18 | for i from 1 to 60 do { c = 2*i*(2*i + 1)*c; f = f + (-1)^i*x^(2*i)/c; };
19 | 
20 | // return p that minimizes |f(x) - poly(x) - x^d*p(x)|
21 | approx = proc(poly,d) {
22 |   return remez(f(x)-poly(x), deg-d, [a;b], x^d, 1e-10);
23 | };
24 | 
25 | // first coeff is fixed, iteratively find optimal double prec coeffs
26 | poly = 1;
27 | for i from 1 to deg/2 do {
28 |   p = roundcoefficients(approx(poly,2*i), [|D ...|]);
29 |   poly = poly + x^(2*i)*coeff(p,0);
30 | };
31 | 
32 | display = hexadecimal;
33 | print("abs error:", accurateinfnorm(sin(x)-x*poly(x), [a;b], 30));
34 | print("in [",a,b,"]");
35 | print("coeffs:");
36 | for i from 0 to deg do coeff(poly,i);
37 | 


--------------------------------------------------------------------------------
/networking/README.md:
--------------------------------------------------------------------------------
 1 | # 🛜 Networking
 2 | 
 3 | This section provides style requirements and contribution guidelines for
 4 | networking routines.
 5 | 
 6 | ---
 7 | 
 8 | ## 📂 Subdirectory Structure
 9 | 
10 | Here is a quick overview of the **`networking/`** subdirectory's layout:
11 | 
12 | - **`include/`**: networking library public headers.
13 | - **`test/`**: networking test and benchmark related sources.
14 | 
15 | ---
16 | 
17 | ## 🧑‍💻 Style Requirements
18 | 
19 | 1. **Upstream Compatibility**:
20 |    Code should follow GNU Coding Standard and glibc specific conventions to
21 | ease upstreaming.
22 | 
23 | 2. **ABI and Symbols**:
24 |    Code should be written so it is suitable for inclusion into a libc with
25 | minimal changes.
26 | 
27 | 3. **API Headers**:
28 |    Include headers should be suitable for benchmarking and testing code and
29 | should not conflict with libc headers.
30 | 
31 | ---
32 | 
33 | ## ✅ Contribution Guidelines
34 | 
35 | - Clearly document assumptions in the code.
36 | - Maintain consistent assembly style across different implementations.
37 | - Benchmarking is needed on several microarchitectures.
38 | 
39 | ---
40 | 
41 | By adhering to these guidelines, you will help maintain the quality and
42 | consistency of the Arm Optimized Routines project. We appreciate your
43 | contributions and look forward to collaborating with you!
44 | 
45 | 


--------------------------------------------------------------------------------
/networking/include/networking.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Public API.
 3 |  *
 4 |  * Copyright (c) 2020, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | unsigned short __chksum (const void *, unsigned int);
 9 | #if __aarch64__ && __ARM_NEON
10 | unsigned short __chksum_aarch64_simd (const void *, unsigned int);
11 | #endif
12 | #if __arm__ && __ARM_NEON
13 | unsigned short __chksum_arm_simd (const void *, unsigned int);
14 | #endif
15 | 


--------------------------------------------------------------------------------
/string/aarch64/asmdefs.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Macros for asm code.  AArch64 version.
 3 |  *
 4 |  * Copyright (c) 2019-2023, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #ifndef _ASMDEFS_H
 9 | #define _ASMDEFS_H
10 | 
11 | /* Branch Target Identitication support.  */
12 | #define BTI_C		hint	34
13 | #define BTI_J		hint	36
14 | /* Return address signing support (pac-ret).  */
15 | #define PACIASP		hint	25; .cfi_window_save
16 | #define AUTIASP		hint	29; .cfi_window_save
17 | 
18 | /* GNU_PROPERTY_AARCH64_* macros from elf.h.  */
19 | #define FEATURE_1_AND 0xc0000000
20 | #define FEATURE_1_BTI 1
21 | #define FEATURE_1_PAC 2
22 | 
23 | /* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
24 | #define GNU_PROPERTY(type, value)	\
25 |   .section .note.gnu.property, "a";	\
26 |   .p2align 3;				\
27 |   .word 4;				\
28 |   .word 16;				\
29 |   .word 5;				\
30 |   .asciz "GNU";				\
31 |   .word type;				\
32 |   .word 4;				\
33 |   .word value;				\
34 |   .word 0;				\
35 |   .text
36 | 
37 | /* If set then the GNU Property Note section will be added to
38 |    mark objects to support BTI and PAC-RET.  */
39 | #ifndef WANT_GNU_PROPERTY
40 | #define WANT_GNU_PROPERTY 1
41 | #endif
42 | 
43 | #if WANT_GNU_PROPERTY
44 | /* Add property note with supported features to all asm files.  */
45 | GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC)
46 | #endif
47 | 
48 | #define ENTRY_ALIGN(name, alignment)	\
49 |   .global name;		\
50 |   .type name,%function;	\
51 |   .align alignment;		\
52 |   name:			\
53 |   .cfi_startproc;	\
54 |   BTI_C;
55 | 
56 | #define ENTRY(name)	ENTRY_ALIGN(name, 6)
57 | 
58 | #define ENTRY_ALIAS(name)	\
59 |   .global name;		\
60 |   .type name,%function;	\
61 |   name:
62 | 
63 | #define END(name)	\
64 |   .cfi_endproc;		\
65 |   .size name, .-name;
66 | 
67 | #define L(l) .L ## l
68 | 
69 | #endif
70 | 


--------------------------------------------------------------------------------
/string/aarch64/check-arch.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * check ARCH setting.
 3 |  *
 4 |  * Copyright (c) 2020-2022, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #if !__aarch64__
 9 | # error ARCH setting does not match the compiler.
10 | #endif
11 | 
12 | /* Include for GNU property notes.  */
13 | #include "asmdefs.h"
14 | 


--------------------------------------------------------------------------------
/string/aarch64/experimental/memchr-sve.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * memchr - find a character in a memory zone
 3 |  *
 4 |  * Copyright (c) 2018-2022, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "asmdefs.h"
 9 | 
10 | .arch armv8-a+sve
11 | 
12 | /* Assumptions:
13 |  *
14 |  * ARMv8-a, AArch64
15 |  * SVE Available.
16 |  */
17 | 
18 | ENTRY (__memchr_aarch64_sve)
19 | 	dup	z1.b, w1			/* duplicate c to a vector */
20 | 	setffr					/* initialize FFR */
21 | 	mov	x3, 0				/* initialize off */
22 | 
23 | 	.p2align 4
24 | 0:	whilelo	p1.b, x3, x2			/* make sure off < max */
25 | 	b.none	9f
26 | 
27 | 	/* Read a vector's worth of bytes, bounded by max,
28 | 	   stopping on first fault.  */
29 | 	ldff1b	z0.b, p1/z, [x0, x3]
30 | 	rdffrs	p0.b, p1/z
31 | 	b.nlast	2f
32 | 
33 | 	/* First fault did not fail: the vector bounded by max is valid.
34 | 	   Avoid depending on the contents of FFR beyond the branch.  */
35 | 	incb	x3				/* speculate increment */
36 | 	cmpeq	p2.b, p1/z, z0.b, z1.b		/* search for c */
37 | 	b.none	0b
38 | 	decb	x3				/* undo speculate */
39 | 
40 | 	/* Found C.  */
41 | 1:	brkb	p2.b, p1/z, p2.b	/* find the first c */
42 | 	add	x0, x0, x3		/* form partial pointer */
43 | 	incp	x0, p2.b		/* form final pointer to c */
44 | 	ret
45 | 
46 | 	/* First fault failed: only some of the vector is valid.
47 | 	   Perform the comparision only on the valid bytes.  */
48 | 2:	cmpeq	p2.b, p0/z, z0.b, z1.b
49 | 	b.any	1b
50 | 
51 | 	/* No C found.  Re-init FFR, increment, and loop.  */
52 | 	setffr
53 | 	incp	x3, p0.b
54 | 	b	0b
55 | 
56 | 	/* Found end of count.  */
57 | 9:	mov	x0, 0			/* return null */
58 | 	ret
59 | 
60 | END (__memchr_aarch64_sve)
61 | 


--------------------------------------------------------------------------------
/string/aarch64/experimental/memcmp-sve.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * memcmp - compare memory
 3 |  *
 4 |  * Copyright (c) 2018-2022, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "asmdefs.h"
 9 | 
10 | .arch armv8-a+sve
11 | 
12 | /* Assumptions:
13 |  *
14 |  * ARMv8-a, AArch64
15 |  * SVE Available.
16 |  */
17 | 
18 | ENTRY (__memcmp_aarch64_sve)
19 | 	mov	x3, 0			/* initialize off */
20 | 
21 | 0:	whilelo	p0.b, x3, x2		/* while off < max */
22 | 	b.none	9f
23 | 
24 | 	ld1b	z0.b, p0/z, [x0, x3]	/* read vectors bounded by max.  */
25 | 	ld1b	z1.b, p0/z, [x1, x3]
26 | 
27 | 	/* Increment for a whole vector, even if we've only read a partial.
28 | 	   This is significantly cheaper than INCP, and since OFF is not
29 | 	   used after the loop it is ok to increment OFF past MAX.  */
30 | 	incb	x3
31 | 
32 | 	cmpne	p1.b, p0/z, z0.b, z1.b	/* while no inequalities */
33 | 	b.none	0b
34 | 
35 | 	/* Found inequality.  */
36 | 1:	brkb	p1.b, p0/z, p1.b	/* find first such */
37 | 	lasta	w0, p1, z0.b		/* extract each byte */
38 | 	lasta	w1, p1, z1.b
39 | 	sub	x0, x0, x1		/* return comparison */
40 | 	ret
41 | 
42 | 	/* Found end-of-count.  */
43 | 9:	mov	x0, 0			/* return equality */
44 | 	ret
45 | 
46 | END (__memcmp_aarch64_sve)
47 | 


--------------------------------------------------------------------------------
/string/aarch64/experimental/stpcpy-sve.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * stpcpy - copy a string returning pointer to end.
 3 |  *
 4 |  * Copyright (c) 2020, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #define BUILD_STPCPY 1
 9 | 
10 | #include "strcpy-sve.S"
11 | 


--------------------------------------------------------------------------------
/string/aarch64/experimental/strchrnul-sve.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * strchrnul - find a character or nul in a string
 3 |  *
 4 |  * Copyright (c) 2018-2019, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #define BUILD_STRCHRNUL
 9 | #include "strchr-sve.S"
10 | 


--------------------------------------------------------------------------------
/string/aarch64/experimental/strcmp-sve.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * __strcmp_aarch64_sve - compare two strings
 3 |  *
 4 |  * Copyright (c) 2018-2022, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "asmdefs.h"
 9 | 
10 | .arch armv8-a+sve
11 | 
12 | /* Assumptions:
13 |  *
14 |  * ARMv8-a, AArch64
15 |  * SVE Available.
16 |  */
17 | 
18 | ENTRY (__strcmp_aarch64_sve)
19 | 	setffr				/* initialize FFR */
20 | 	ptrue	p1.b, all		/* all ones; loop invariant */
21 | 	mov	x2, 0			/* initialize offset */
22 | 
23 | 	/* Read a vector's worth of bytes, stopping on first fault.  */
24 | 	.p2align 4
25 | 0:	ldff1b	z0.b, p1/z, [x0, x2]
26 | 	ldff1b	z1.b, p1/z, [x1, x2]
27 | 	rdffrs	p0.b, p1/z
28 | 	b.nlast	2f
29 | 
30 | 	/* First fault did not fail: the whole vector is valid.
31 | 	   Avoid depending on the contents of FFR beyond the branch.  */
32 | 	incb	x2, all			/* skip bytes for next round */
33 | 	cmpeq	p2.b, p1/z, z0.b, z1.b	/* compare strings */
34 | 	cmpne	p3.b, p1/z, z0.b, 0	/* search for ~zero */
35 | 	nands	p2.b, p1/z, p2.b, p3.b	/* ~(eq & ~zero) -> ne | zero */
36 | 	b.none	0b
37 | 
38 | 	/* Found end-of-string or inequality.  */
39 | 1:	brkb	p2.b, p1/z, p2.b	/* find first such */
40 | 	lasta	w0, p2, z0.b		/* extract each char */
41 | 	lasta	w1, p2, z1.b
42 | 	sub	x0, x0, x1		/* return comparison */
43 | 	ret
44 | 
45 | 	/* First fault failed: only some of the vector is valid.
46 | 	   Perform the comparison only on the valid bytes.  */
47 | 2:	incp	x2, p0.b		/* skip bytes for next round */
48 | 	setffr				/* re-init FFR for next round */
49 | 	cmpeq	p2.b, p0/z, z0.b, z1.b	/* compare strings, as above */
50 | 	cmpne	p3.b, p0/z, z0.b, 0
51 | 	nands	p2.b, p0/z, p2.b, p3.b
52 | 	b.none	0b
53 | 	b	1b
54 | 
55 | END (__strcmp_aarch64_sve)
56 | 


--------------------------------------------------------------------------------
/string/aarch64/experimental/strcpy-sve.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * strcpy/stpcpy - copy a string returning pointer to start/end.
 3 |  *
 4 |  * Copyright (c) 2018-2022, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "asmdefs.h"
 9 | 
10 | .arch armv8-a+sve
11 | 
12 | /* Assumptions:
13 |  *
14 |  * ARMv8-a, AArch64
15 |  * SVE Available.
16 |  */
17 | 
18 | /* To build as stpcpy, define BUILD_STPCPY before compiling this file.  */
19 | #ifdef BUILD_STPCPY
20 | #define FUNC  __stpcpy_aarch64_sve
21 | #else
22 | #define FUNC  __strcpy_aarch64_sve
23 | #endif
24 | 
25 | ENTRY (FUNC)
26 | 	setffr				/* initialize FFR */
27 | 	ptrue	p2.b, all		/* all ones; loop invariant */
28 | 	mov	x2, 0			/* initialize offset */
29 | 
30 | 	.p2align 4
31 | 	/* Read a vector's worth of bytes, stopping on first fault.  */
32 | 0:	ldff1b	z0.b, p2/z, [x1, x2]
33 | 	rdffrs	p0.b, p2/z
34 | 	b.nlast	1f
35 | 
36 | 	/* First fault did not fail: the whole vector is valid.
37 | 	   Avoid depending on the contexts of FFR beyond the branch.  */
38 | 	cmpeq	p1.b, p2/z, z0.b, 0	/* search for zeros */
39 | 	b.any	2f
40 | 
41 | 	/* No zero found.  Store the whole vector and loop.  */
42 | 	st1b	z0.b, p2, [x0, x2]
43 | 	incb	x2, all
44 | 	b	0b
45 | 
46 | 	/* First fault failed: only some of the vector is valid.
47 | 	   Perform the comparison only on the valid bytes.  */
48 | 1:	cmpeq	p1.b, p0/z, z0.b, 0	/* search for zeros */
49 | 	b.any	2f
50 | 
51 | 	/* No zero found.  Store the valid portion of the vector and loop.  */
52 | 	setffr				/* re-init FFR */
53 | 	st1b	z0.b, p0, [x0, x2]
54 | 	incp	x2, p0.b
55 | 	b	0b
56 | 
57 | 	/* Zero found.  Crop the vector to the found zero and finish.  */
58 | 2:	brka	p0.b, p2/z, p1.b
59 | 	st1b	z0.b, p0, [x0, x2]
60 | #ifdef BUILD_STPCPY
61 | 	add	x0, x0, x2
62 | 	sub	x0, x0, 1
63 | 	incp	x0, p0.b
64 | #endif
65 | 	ret
66 | 
67 | END (FUNC)
68 | 


--------------------------------------------------------------------------------
/string/aarch64/experimental/strlen-sve.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * __strlen_aarch64_sve - compute the length of a string
 3 |  *
 4 |  * Copyright (c) 2018-2022, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "asmdefs.h"
 9 | 
10 | .arch armv8-a+sve
11 | 
12 | /* Assumptions:
13 |  *
14 |  * ARMv8-a, AArch64
15 |  * SVE Available.
16 |  */
17 | 
18 | ENTRY (__strlen_aarch64_sve)
19 | 	setffr			/* initialize FFR */
20 | 	ptrue	p2.b		/* all ones; loop invariant */
21 | 	mov	x1, 0		/* initialize length */
22 | 
23 | 	/* Read a vector's worth of bytes, stopping on first fault.  */
24 | 	.p2align 4
25 | 0:	ldff1b	z0.b, p2/z, [x0, x1]
26 | 	rdffrs	p0.b, p2/z
27 | 	b.nlast	2f
28 | 
29 | 	/* First fault did not fail: the whole vector is valid.
30 | 	   Avoid depending on the contents of FFR beyond the branch.  */
31 | 	incb	x1, all			/* speculate increment */
32 | 	cmpeq	p1.b, p2/z, z0.b, 0	/* loop if no zeros */
33 | 	b.none	0b
34 | 	decb	x1, all			/* undo speculate */
35 | 
36 | 	/* Zero found.  Select the bytes before the first and count them.  */
37 | 1:	brkb	p0.b, p2/z, p1.b
38 | 	incp	x1, p0.b
39 | 	mov	x0, x1
40 | 	ret
41 | 
42 | 	/* First fault failed: only some of the vector is valid.
43 | 	   Perform the comparison only on the valid bytes.  */
44 | 2:	cmpeq	p1.b, p0/z, z0.b, 0
45 | 	b.any	1b
46 | 
47 | 	/* No zero found.  Re-init FFR, increment, and loop.  */
48 | 	setffr
49 | 	incp	x1, p0.b
50 | 	b	0b
51 | 
52 | END (__strlen_aarch64_sve)
53 | 


--------------------------------------------------------------------------------
/string/aarch64/experimental/strnlen-sve.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * strnlen - calculate the length of a string with limit.
 3 |  *
 4 |  * Copyright (c) 2019-2022, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "asmdefs.h"
 9 | 
10 | .arch armv8-a+sve
11 | 
12 | /* Assumptions:
13 |  *
14 |  * ARMv8-a, AArch64
15 |  * SVE Available.
16 |  */
17 | 
18 | ENTRY (__strnlen_aarch64_sve)
19 | 	setffr				/* initialize FFR */
20 | 	mov	x2, 0			/* initialize len */
21 | 	b	1f
22 | 
23 | 	.p2align 4
24 | 	/* We have off + vl <= max, and so may read the whole vector.  */
25 | 0:	ldff1b	z0.b, p0/z, [x0, x2]
26 | 	rdffrs	p1.b, p0/z
27 | 	b.nlast	2f
28 | 
29 | 	/* First fault did not fail: the whole vector is valid.
30 | 	   Avoid depending on the contents of FFR beyond the branch.  */
31 | 	cmpeq	p2.b, p0/z, z0.b, 0
32 | 	b.any	8f
33 | 	incb	x2
34 | 
35 | 1:	whilelo	p0.b, x2, x1
36 | 	b.last	0b
37 | 
38 | 	/* We have off + vl < max.  Test for off == max before proceeding.  */
39 | 	b.none	9f
40 | 
41 | 	ldff1b	z0.b, p0/z, [x0, x2]
42 | 	rdffrs	p1.b, p0/z
43 | 	b.nlast	2f
44 | 
45 | 	/* First fault did not fail: the vector up to max is valid.
46 | 	   Avoid depending on the contents of FFR beyond the branch.
47 | 	   Compare for end-of-string, but there are no more bytes.  */
48 | 	cmpeq	p2.b, p0/z, z0.b, 0
49 | 
50 | 	/* Found end-of-string or zero.  */
51 | 8:	brkb	p2.b, p0/z, p2.b
52 | 	mov	x0, x2
53 | 	incp	x0, p2.b
54 | 	ret
55 | 
56 | 	/* First fault failed: only some of the vector is valid.
57 | 	   Perform the comparison only on the valid bytes.  */
58 | 2:	cmpeq	p2.b, p1/z, z0.b, 0
59 | 	b.any	8b
60 | 
61 | 	/* No inequality or zero found.  Re-init FFR, incr and loop.  */
62 | 	setffr
63 | 	incp	x2, p1.b
64 | 	b	1b
65 | 
66 | 	/* End of count.  Return max.  */
67 | 9:	mov	x0, x1
68 | 	ret
69 | 
70 | END (__strnlen_aarch64_sve)
71 | 


--------------------------------------------------------------------------------
/string/aarch64/memcpy-mops.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * memcpy using MOPS extension.
 3 |  *
 4 |  * Copyright (c) 2023, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "asmdefs.h"
 9 | 
10 | ENTRY (__memcpy_aarch64_mops)
11 | 	mov	x3, x0
12 | 	.inst	0x19010443	/* cpyfp   [x3]!, [x1]!, x2!  */
13 | 	.inst	0x19410443	/* cpyfm   [x3]!, [x1]!, x2!  */
14 | 	.inst	0x19810443	/* cpyfe   [x3]!, [x1]!, x2!  */
15 | 	ret
16 | 
17 | END (__memcpy_aarch64_mops)
18 | 


--------------------------------------------------------------------------------
/string/aarch64/memmove-mops.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * memmove using MOPS extension.
 3 |  *
 4 |  * Copyright (c) 2023, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "asmdefs.h"
 9 | 
10 | ENTRY (__memmove_aarch64_mops)
11 | 	mov	x3, x0
12 | 	.inst	0x1d010443	/* cpyp    [x3]!, [x1]!, x2!  */
13 | 	.inst	0x1d410443	/* cpym    [x3]!, [x1]!, x2!  */
14 | 	.inst	0x1d810443	/* cpye    [x3]!, [x1]!, x2!  */
15 | 	ret
16 | 
17 | END (__memmove_aarch64_mops)
18 | 


--------------------------------------------------------------------------------
/string/aarch64/memset-mops.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * memset using MOPS extension.
 3 |  *
 4 |  * Copyright (c) 2023, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include "asmdefs.h"
 9 | 
10 | ENTRY (__memset_aarch64_mops)
11 | 	mov     x3, x0
12 | 	.inst   0x19c10443	/* setp    [x3]!, x2!, x1  */
13 | 	.inst   0x19c14443	/* setm    [x3]!, x2!, x1  */
14 | 	.inst   0x19c18443	/* sete    [x3]!, x2!, x1  */
15 | 	ret
16 | 
17 | END (__memset_aarch64_mops)
18 | 


--------------------------------------------------------------------------------
/string/aarch64/stpcpy.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * stpcpy - copy a string returning pointer to end.
 3 |  *
 4 |  * Copyright (c) 2020, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #define BUILD_STPCPY 1
 9 | 
10 | #include "strcpy.S"
11 | 


--------------------------------------------------------------------------------
/string/arm/check-arch.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * check ARCH setting.
 3 |  *
 4 |  * Copyright (c) 2020-2022, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #if !__arm__
 9 | # error ARCH setting does not match the compiler.
10 | #endif
11 | 
12 | /* For attributes that may affect ABI.  */
13 | #include "asmdefs.h"
14 | 


--------------------------------------------------------------------------------
/string/include/benchlib.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Benchmark support functions.
 3 |  *
 4 |  * Copyright (c) 2020, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include <stdint.h>
 9 | #include <time.h>
10 | 
11 | /* Fast and accurate timer returning nanoseconds.  */
12 | static inline uint64_t
13 | clock_get_ns (void)
14 | {
15 |   struct timespec ts;
16 |   clock_gettime (CLOCK_MONOTONIC, &ts);
17 |   return ts.tv_sec * (uint64_t) 1000000000 + ts.tv_nsec;
18 | }
19 | 
20 | /* Fast 32-bit random number generator.  Passing a non-zero seed
21 |    value resets the internal state.  */
22 | static inline uint32_t
23 | rand32 (uint32_t seed)
24 | {
25 |   static uint64_t state = 0xb707be451df0bb19ULL;
26 |   if (seed != 0)
27 |     state = seed;
28 |   uint32_t res = state >> 32;
29 |   state = state * 6364136223846793005ULL + 1;
30 |   return res;
31 | }
32 | 
33 | /* Macros to run a benchmark BENCH using string function FN.  */
34 | #define RUN(BENCH, FN) BENCH(#FN, FN)
35 | 
36 | #if __aarch64__
37 | # define RUNA64(BENCH, FN) BENCH(#FN, FN)
38 | #else
39 | # define RUNA64(BENCH, FN)
40 | #endif
41 | 
42 | #if __ARM_FEATURE_SVE
43 | # define RUNSVE(BENCH, FN) BENCH(#FN, FN)
44 | #else
45 | # define RUNSVE(BENCH, FN)
46 | #endif
47 | 
48 | #if WANT_MOPS
49 | # define RUNMOPS(BENCH, FN) BENCH(#FN, FN)
50 | #else
51 | # define RUNMOPS(BENCH, FN)
52 | #endif
53 | 
54 | #if __arm__
55 | # define RUNA32(BENCH, FN) BENCH(#FN, FN)
56 | #else
57 | # define RUNA32(BENCH, FN)
58 | #endif
59 | 
60 | #if __arm__ && __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
61 | # define RUNT32(BENCH, FN) BENCH(#FN, FN)
62 | #else
63 | # define RUNT32(BENCH, FN)
64 | #endif
65 | 


--------------------------------------------------------------------------------
/string/test/stringtest.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Common string test code.
 3 |  *
 4 |  * Copyright (c) 2020, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #include <ctype.h>
 9 | #include <stdio.h>
10 | 
11 | /* Accounting errors for a test case.  */
12 | static int err_count;
13 | #define ERR_LIMIT 10
14 | #define ERR(...) (err_count++, printf (__VA_ARGS__))
15 | 
16 | static inline void
17 | quotechar (unsigned char c)
18 | {
19 |   if (isprint (c))
20 |     putchar (c);
21 |   else
22 |     printf ("\\x%02x", c);
23 | }
24 | 
25 | /* quoted print around at or the entire string if at < 0.  */
26 | static void
27 | quoteat (const char *prefix, const void *p, int len, int at)
28 | {
29 |   static const int CTXLEN = 15;
30 |   int i;
31 |   const char *pre = "\"";
32 |   const char *post = "\"";
33 |   const char *s = p;
34 |   if (at > CTXLEN)
35 |     {
36 |       s += at - CTXLEN;
37 |       len -= at - CTXLEN;
38 |       pre = "...\"";
39 |     }
40 |   if (at >= 0 && len > 2 * CTXLEN + 1)
41 |     {
42 |       len = 2 * CTXLEN + 1;
43 |       post = "\"...";
44 |     }
45 |   printf ("%4s: %s", prefix, pre);
46 |   for (i = 0; i < len; i++)
47 |     quotechar (s[i]);
48 |   printf ("%s\n", post);
49 | }
50 | 
51 | static inline void
52 | quote (const char *prefix, const void *p, int len)
53 | {
54 |   quoteat (prefix, p, len, -1);
55 | }
56 | 


--------------------------------------------------------------------------------
/string/x86_64/check-arch.S:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * check ARCH setting.
 3 |  *
 4 |  * Copyright (c) 2020, Arm Limited.
 5 |  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
 6 |  */
 7 | 
 8 | #if !__x86_64__
 9 | # error ARCH setting does not match the compiler.
10 | #endif
11 | 


--------------------------------------------------------------------------------