├── .github
    └── workflows
    │   ├── CI.yml
    │   ├── CompatHelper.yml
    │   ├── Downgrade.yml
    │   └── TagBot.yml
├── .gitignore
├── LICENSE
├── Project.toml
├── README.md
├── src
    └── SIMDMathFunctions.jl
└── test
    └── runtests.jl


/.github/workflows/CI.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   pull_request:
 4 |     branches:
 5 |       - main
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |     tags: ['*']
10 |   workflow_dispatch:
11 | concurrency:
12 |   # Skip intermediate builds: always.
13 |   # Cancel intermediate builds: only if it is a pull request build.
14 |   group: ${{ github.workflow }}-${{ github.ref }}
15 |   cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
16 | jobs:
17 |   test:
18 |     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
19 |     runs-on: ${{ matrix.os }}
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         version:
24 |           - '1.6'
25 |           - '1.9'
26 |           - 'nightly'
27 |         os:
28 |           - ubuntu-latest
29 |         arch:
30 |           - x64
31 |     steps:
32 |       - uses: actions/checkout@v4
33 |       - uses: julia-actions/setup-julia@v1
34 |         with:
35 |           version: ${{ matrix.version }}
36 |           arch: ${{ matrix.arch }}
37 |       - uses: julia-actions/cache@v1
38 |       - uses: julia-actions/julia-buildpkg@v1
39 |       - uses: julia-actions/julia-runtest@v1
40 |       - uses: julia-actions/julia-processcoverage@v1
41 |       - uses: codecov/codecov-action@v4
42 |         with:
43 |           token: ${{ secrets.CODECOV_TOKEN }}
44 |           fail_ci_if_error: false  # or true if you want CI to fail when Codecov fails
45 |           files: lcov.info
46 | 


--------------------------------------------------------------------------------
/.github/workflows/CompatHelper.yml:
--------------------------------------------------------------------------------
 1 | name: CompatHelper
 2 | on:
 3 |   schedule:
 4 |     - cron: 0 0 * * *
 5 |   workflow_dispatch:
 6 | permissions:
 7 |   contents: write
 8 |   pull-requests: write
 9 | jobs:
10 |   CompatHelper:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: Check if Julia is already available in the PATH
14 |         id: julia_in_path
15 |         run: which julia
16 |         continue-on-error: true
17 |       - name: Install Julia, but only if it is not already available in the PATH
18 |         uses: julia-actions/setup-julia@v1
19 |         with:
20 |           version: '1'
21 |           arch: ${{ runner.arch }}
22 |         if: steps.julia_in_path.outcome != 'success'
23 |       - name: "Add the General registry via Git"
24 |         run: |
25 |           import Pkg
26 |           ENV["JULIA_PKG_SERVER"] = ""
27 |           Pkg.Registry.add("General")
28 |         shell: julia --color=yes {0}
29 |       - name: "Install CompatHelper"
30 |         run: |
31 |           import Pkg
32 |           name = "CompatHelper"
33 |           uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
34 |           version = "3"
35 |           Pkg.add(; name, uuid, version)
36 |         shell: julia --color=yes {0}
37 |       - name: "Run CompatHelper"
38 |         run: |
39 |           import CompatHelper
40 |           CompatHelper.main()
41 |         shell: julia --color=yes {0}
42 |         env:
43 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
44 |           COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
45 |           # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}
46 | 


--------------------------------------------------------------------------------
/.github/workflows/Downgrade.yml:
--------------------------------------------------------------------------------
 1 | # see : https://www.stochasticlifestyle.com/semantic-versioning-semver-is-flawed-and-downgrade-ci-is-required-to-fix-it/
 2 | 
 3 | name: Downgrade
 4 | on:
 5 |   pull_request:
 6 |     branches:
 7 |       - main
 8 |     paths-ignore:
 9 |       - 'docs/**'
10 |   push:
11 |     branches:
12 |       - main
13 |     paths-ignore:
14 |       - 'docs/**'
15 | jobs:
16 |   test:
17 |     runs-on: ubuntu-latest
18 |     strategy:
19 |       matrix:
20 |         version: ['1.6']
21 |     steps:
22 |       - uses: actions/checkout@v4
23 |       - uses: julia-actions/setup-julia@v2
24 |         with:
25 |           version: ${{ matrix.version }}
26 |       - uses: julia-actions/julia-downgrade-compat@v1
27 | #        if: ${{ matrix.version == '1.6' }}
28 |         with:
29 |           skip: Pkg,TOML
30 |       - uses: julia-actions/julia-buildpkg@v1
31 |       - uses: julia-actions/julia-runtest@v1
32 | 


--------------------------------------------------------------------------------
/.github/workflows/TagBot.yml:
--------------------------------------------------------------------------------
 1 | name: TagBot
 2 | on:
 3 |   issue_comment:
 4 |     types:
 5 |       - created
 6 |   workflow_dispatch:
 7 |     inputs:
 8 |       lookback:
 9 |         default: 3
10 | permissions:
11 |   actions: read
12 |   checks: read
13 |   contents: write
14 |   deployments: read
15 |   issues: read
16 |   discussions: read
17 |   packages: read
18 |   pages: read
19 |   pull-requests: read
20 |   repository-projects: read
21 |   security-events: read
22 |   statuses: read
23 | jobs:
24 |   TagBot:
25 |     if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |       - uses: JuliaRegistries/TagBot@v1
29 |         with:
30 |           token: ${{ secrets.GITHUB_TOKEN }}
31 |           ssh: ${{ secrets.DOCUMENTER_KEY }}
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /Manifest.toml
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Thomas Dubos <thomas.dubos@lmd.ipsl.fr> and contributors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Project.toml:
--------------------------------------------------------------------------------
 1 | name = "SIMDMathFunctions"
 2 | uuid = "d22a7203-ad50-4fbc-abc4-d6ac724cca58"
 3 | authors = ["Thomas Dubos <thomas.dubos@lmd.ipsl.fr> and contributors"]
 4 | version = "0.1.3"
 5 | 
 6 | [deps]
 7 | SIMD = "fdea26ae-647d-5447-a871-4b548cad5224"
 8 | SLEEFPirates = "476501e8-09a2-5ece-8869-fb82de89a1fa"
 9 | VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
10 | 
11 | [compat]
12 | SIMD = "3.4"
13 | SLEEFPirates = "0.6.42"
14 | VectorizationBase = "0.21.42"
15 | julia = "1.6"
16 | 
17 | [extras]
18 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
19 | 
20 | [targets]
21 | test = ["Test"]
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SIMDMathFunctions
 2 | 
 3 | Fast vectorized mathematical functions for SIMD.jl , using SLEEFPirates.jl .
 4 | 
 5 | [![CI](https://github.com/ClimFlows/SIMDMathFunctions.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/ClimFlows/SIMDMathFunctions.jl/actions/workflows/CI.yml)
 6 | [![Code Coverage](https://codecov.io/gh/ClimFlows/SIMDMathFunctions.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/ClimFlows/SIMDMathFunctions)
 7 | 
 8 | ## Installing
 9 | 
10 | This package is registered. To install it :
11 | ```Julia
12 | ] add SIMDMathFunctions
13 | ```
14 | 
15 | ## Overview
16 | 
17 | The primary goal of `SIMDMathFunctions` is to provide efficient methods for mathematical functions with `SIMD.Vec` arguments. Under the hood, optimized implementations provided by `SLEEFPirates.jl` are used. This allows explicitly vectorized code using `SIMD.jl` to benefit from fast vectorized math functions.
18 | 
19 | ```Julia
20 | using SIMD: VecRange
21 | using SIMDMathFunctions: is_supported, is_fast, fast_functions
22 | using BenchmarkTools
23 | 
24 | function exp!(xs::Vector{T}, ys::Vector{T}) where {T}
25 |     @inbounds for i in eachindex(xs,ys)
26 |         xs[i] = @fastmath exp(ys[i])
27 |     end
28 | end
29 | 
30 | function exp!(xs::Vector{T}, ys::Vector{T}, ::Val{N}) where {N, T}
31 |     @assert length(ys) == length(xs)
32 |     @assert length(xs) % N == 0
33 |     @assert is_supported(@fastmath exp)
34 |     @inbounds for istart in 1:N:length(xs)
35 |         i = VecRange{N}(istart)
36 |         xs[i] = @fastmath exp(ys[i])
37 |     end
38 | end
39 | 
40 | y=randn(Float32, 1024*1024); x=similar(y);
41 | 
42 | @benchmark exp!($x, $y)
43 | @benchmark exp!($x, $y, Val(8))
44 | @benchmark exp!($x, $y, Val(16))
45 | @benchmark exp!($x, $y, Val(32))
46 | 
47 | is_fast(exp)
48 | unary_funs = fast_functions(1)
49 | binary_funs = fast_functions(2)
50 | ```
51 | 
52 | `is_supported(fun)` returns `true` if function `fun` supports `SIMD.Vec` arguments. Similarly `is_fast(fun)` returns `true` if `fun` has an optimized implementation.
53 | 
54 | `fast_functions([ninputs])` returns a vector of functions benefitting from a fast implementation, restricted to those accepting `ninputs` input arguments if `ninputs` is provided.
55 | 
56 | `SIMDMathFunctions` also provides a helper function `vmap` to vectorize not-yet-supported mathematical functions. For example :
57 | 
58 | ```Julia
59 | using SIMD: Vec
60 | import SIMDMathFunctions: vmap
61 | import SpecialFunctions: erf
62 | 
63 | erf(x::Vec) = vmap(erf, x)
64 | erf(x::Vec, y::Vec) = vmap(erf, x, y)
65 | erf(x::Vec{N,T}, y::T) where {N,T} = vmap(erf, x, y)
66 | 
67 | x = Vec(randn(Float32, 16)...)
68 | @benchmark erf($x)
69 | ```
70 | 
71 | The default `vmap` method simply calls `erf` on each element of `x`. There is no performance benefit, but it allows generic code to use `erf`. If `erf_SIMD` is optimized for vector inputs, you can provide a specialized method for `vmap`:
72 | 
73 | ```Julia
74 | using VectorizationBase: verf # vectorized implementation
75 | using SIMDMathFunctions: SIMDVec, VBVec # VectorizationBase <=> SIMD conversion
76 | 
77 | erf_SIMD(x) = SIMDVec(verf(VBVec(x)))
78 | vmap(::typeof(erf), x) = erf_SIMD(x)
79 | 
80 | @benchmark erf($x)
81 | ```
82 | 


--------------------------------------------------------------------------------
/src/SIMDMathFunctions.jl:
--------------------------------------------------------------------------------
  1 | """
  2 | Vectorized mathematical functions
  3 | 
  4 | This module exports nothing, its purpose is to specialize
  5 | mathematical functions in Base and Base.FastMath for SIMD.Vec arguments
  6 | using vectorized implementations from SLEEFPirates.
  7 | 
  8 | See: `is_supported`, `is_fast`, `fast_functions`, `vmap`, `tolerance`.
  9 | """
 10 | module SIMDMathFunctions
 11 | 
 12 | import SLEEFPirates as SP
 13 | import Base.FastMath as FM
 14 | import VectorizationBase as VB
 15 | import SIMD
 16 | const Floats = Union{Float32, Float64}
 17 | const Vec{T,N} = SIMD.Vec{N,T} # NB: swapped type parameters
 18 | const Vec32{N} = SIMD.Vec{N, Float32}
 19 | const Vec64{N} = SIMD.Vec{N, Float64}
 20 | 
 21 | """
 22 |     tol = tolerance(fun)
 23 | Let `x::SIMD.Vec{N,T}` and `ref` be obtained by applying
 24 | `fun` on each element of `x`. Now `fun(x)` may differ
 25 | from `ref` by an amount of `tol(fun)*eps(T)*abs(res)`.
 26 | `tol==1` except for a few functions, for which `tol==2`.
 27 | """
 28 | tolerance(op) = 1
 29 | 
 30 | """
 31 | 
 32 | `vmap(fun, x)` applies `fun` to each element of `x::SIMD.Vec` and returns
 33 | a `SIMD.Vec`.
 34 | 
 35 |     a = vmap(fun, x)
 36 | 
 37 | If `fun` returns a 2-uple, `vmap` returns a 2-uple of `SIMD.Vec` :
 38 | 
 39 |     a, b = vmap(fun, x)    # `fun(x)` returns a 2-uple, e.g. `sincos`
 40 | 
 41 | `vmap(fun, x, y)` works similarly when `fun` takes two input arguments (e.g. `atan(x,y)`)
 42 | 
 43 |     a    = vmap(fun, x, y)
 44 |     a, b = vmap(fun, x, y) # `fun(x,y)` returns a 2-uple
 45 | 
 46 | Generic implementations are provided, which call `fun` and provide no performance
 47 | benefit. `vmap` may be specialized for argument `fun`. Such optimized implementations
 48 | may return a different result than `fun`, within some tolerance bounds (see [`tolerance`](@ref)).
 49 | Currently optimized implementations are provided by `SLEEFPirates.jl`.
 50 | """
 51 | @inline vmap(op, x) = vmap_unop(op, x)
 52 | @inline vmap(op, x,y) = vmap_binop(op, x, y)
 53 | 
 54 | # fallback implementations : calls op on each element of vector
 55 | @inline vmap_unop(op, x::Vec) = vec(map(op, values(x)))
 56 | @inline vmap_binop(op, x::V, y::V) where {V<:Vec} = vec(map(op, values(x), values(y)))
 57 | @inline vmap_binop(op, x::Vec{T}, y::T) where T = vec(map(xx->op(xx,y), values(x)))
 58 | @inline vmap_binop(op, x::T, y::Vec{T}) where T = vec(map(yy->op(x,yy), values(y)))
 59 | @inline values(x)=map(d->d.value, x.data)
 60 | @inline vec(t::NTuple{N, <:SIMD.VecTypes}) where N = SIMD.Vec(t...)
 61 | @inline vec(t::NTuple{N, T}) where {N, VT<:SIMD.VecTypes, T<:Tuple{Vararg{VT}}} =
 62 |     map(x->SIMD.Vec(x...), tuple(zip(t...)...))
 63 | 
 64 | """
 65 |     funs = fast_functions()
 66 |     unary_ops = fast_functions(1)
 67 |     binary_ops = fast_functions(2)
 68 | Returns a vector of fast mathematical functions taking `inputs` input arguments.
 69 | """
 70 | fast_functions() =
 71 |     [m.sig.parameters[2].instance for m in methods(vmap) if (m.sig.parameters[2]!=Any)]
 72 | fast_functions(inputs::Int) =
 73 |     [m.sig.parameters[2].instance for m in methods(vmap) if (m.sig.parameters[2]!=Any && length(m.sig.parameters)==inputs+2)]
 74 | 
 75 | """
 76 |     flag = is_supported(fun)
 77 | Returns `true` if `fun` accepts `SIMD.Vec` arguments.
 78 | """
 79 | @inline function is_supported(::F) where {F<:Function}
 80 |     V = SIMD.Vec{4,Float64}
 81 |     hasmethod(F.instance, Tuple{V}) || hasmethod(F.instance, Tuple{V,V})
 82 | end
 83 | 
 84 | """
 85 |     flag = is_fast(fun)
 86 | Returns `true` if there is a specialization of `vmap` for `fun`,  `false` otherwise.
 87 | """
 88 | @inline function is_fast(f::F) where {F<:Function}
 89 |     V = SIMD.Vec{4,Float64}
 90 |     any(m.sig.parameters[2]==F for m in methods(vmap, Tuple{F, V})) && return true
 91 |     any(m.sig.parameters[2]==F for m in methods(vmap, Tuple{F, V, V}))
 92 | end
 93 | 
 94 | #================ Fast functions from SLEEFPirates =================#
 95 | 
 96 | @fastmath begin
 97 |     tolerance(::typeof(exp))=2
 98 |     tolerance(::typeof(exp10))=2
 99 |     tolerance(::typeof(log))=2
100 |     tolerance(::typeof(tanh))=2
101 |     tolerance(::typeof(log10))=2
102 |     tolerance(::typeof(asin))=2
103 |     tolerance(::typeof(^))=2
104 | end
105 | tolerance(::typeof(exp))=2
106 | tolerance(::typeof(exp10))=2
107 | tolerance(::typeof(^))=2
108 | tolerance(::typeof(hypot))=2
109 | 
110 | # Since SLEEFPirates works with VB.Vec but not with SIMD.Vec,
111 | # we convert between SIMD.Vec and VB.Vec.
112 | # However constructing a VB.Vec of length exceeding the native vector length
113 | # returns a VB.VecUnroll => we must handle also this type
114 | 
115 | # Constructors SIMD.Vec and VB.Vec accept x... as arguments where x is iterable
116 | # so we make SIMD.Vec and VB.VecUnroll iterable (VB.Vec can be converted to Tuple).
117 | # To avoid messing up existing behavior of Base.iterate for SIMD and VB types, we define a wrapper type Iter{V}
118 | 
119 | struct Iter{V}
120 |     vec::V
121 | end
122 | @inline Base.iterate(v::Iter, args...) = iter(v.vec, args...)
123 | 
124 | # iterate over SIMD.Vec
125 | @inline iter(v::SIMD.Vec) = v[1], 2
126 | @inline iter(v::SIMD.Vec{N}, i) where {N} = (i > N ? nothing : (v[i], i + 1))
127 | 
128 | # iterate over VB.VecUnroll
129 | @inline function iter(v::VB.VecUnroll)
130 |     data = VB.data(v)
131 |     return data[1](1), (1, 1)
132 | end
133 | @inline function iter(v::VB.VecUnroll{N,W}, (i, j)) where {N,W}
134 |     data = VB.data(v)
135 |     if j < W
136 |         return data[i](j + 1), (i, j + 1)
137 |     elseif i <= N # there are N+1 vectors
138 |         return data[i+1](1), (i + 1, 1)
139 |     else
140 |         return nothing
141 |     end
142 | end
143 | 
144 | @inline SIMDVec(v::VB.Vec) = SIMD.Vec(Tuple(v)...)
145 | @inline SIMDVec(vu::VB.VecUnroll) = SIMD.Vec(Iter(vu)...)
146 | @inline VBVec(v::Vec) = VB.Vec(Iter(v)...)
147 | @inline VBVec(v::Floats) = v
148 | 
149 | # some operators have a fast version in FastMath, but not all
150 | # and some operators have a fast version in SP, but not all !
151 | const not_unops = (:eval, :include, :evalpoly, :hypot, :ldexp, :sincos, :sincos_fast, :pow_fast)
152 | const broken_unops = (:cospi, :sinpi)
153 | is_unop(n) = !occursin("#", string(n)) && !in(n, union(not_unops, broken_unops))
154 | 
155 | const unops_SP = filter(is_unop, names(SP; all = true))
156 | const unops_FM = filter(is_unop, names(FM; all = true))
157 | 
158 | # "slow" operators provided by SP
159 | const unops_Base_SP = intersect(unops_SP, names(Base))
160 | # FastMath operators provided by SP
161 | const unops_FM_SP = intersect(unops_SP, unops_FM)
162 | # FastMath operators with only a slow version provided by SP
163 | const unops_FM_SP_slow = filter(unops_SP) do op
164 |     n = Symbol(op, :_fast)
165 |     in(n, unops_FM) && !in(n, unops_SP)
166 | end
167 | 
168 | # one input, one output
169 | for (mod, unops, fastop) in (
170 |     (Base, unops_Base_SP, identity),
171 |     (FM, unops_FM_SP, identity),
172 |     (FM, unops_FM_SP_slow, sym->Symbol(sym, :_fast)))
173 | 
174 |     for op in unops
175 |         op_fast = fastop(op)
176 |         op_SP = getfield(SP, op)
177 |         @eval begin
178 |             @inline $mod.$op_fast(x::Vec32) = vmap($mod.$op_fast, x)
179 |             @inline $mod.$op_fast(x::Vec64) = vmap($mod.$op_fast, x)
180 |             @inline vmap(::typeof($mod.$op_fast), x) = SIMDVec($op_SP(VBVec(x)))
181 |         end
182 |     end
183 | end
184 | 
185 | # one input, two outputs
186 | for (mod, op) in ((Base, :sincos), (FM, :sincos_fast))
187 |     @eval begin
188 |         @inline $mod.$op(x::Vec{<:Floats}) = vmap($mod.$op, x)
189 |         @inline vmap(::typeof($mod.$op), x) = map(SIMDVec, SP.$op(VBVec(x)))
190 |     end
191 | end
192 | 
193 | # two inputs, one output
194 | binops = ((Base,:hypot,SP.hypot), (Base,:^,SP.pow), (FM,:pow_fast, SP.pow_fast))
195 | for (mod, op_slow, op_fast) in binops
196 |     @eval begin
197 |         @inline $mod.$op_slow(x::Vec{T, N}, y::Vec{T, N}) where {T<:Floats, N} = vmap($mod.$op_slow, x,y)
198 |         @inline $mod.$op_slow(x::T, y::Vec{T}) where {T<:Floats} = vmap($mod.$op_slow, x,y)
199 |         @inline $mod.$op_slow(x::Vec{T}, y::T) where {T<:Floats} = vmap($mod.$op_slow, x,y)
200 |         @inline vmap(::typeof($mod.$op_slow), x, y) = SIMDVec($op_fast(VBVec(x), VBVec(y)))
201 |     end
202 | end
203 | 
204 | # precompilation
205 | for op in fast_functions(1), F in (Float32, Float64), N in (4, 8, 16)
206 |     precompile(op, (Vec{F,N},))
207 | end
208 | 
209 | for op in fast_functions(2), F in (Float32, Float64), N in (4, 8, 16)
210 |     precompile(op, (Vec{F,N},Vec{F,N}))
211 |     precompile(op, (Vec{F,N},F))
212 |     precompile(op, (F,Vec{F,N}))
213 | end
214 | 
215 | end
216 | 


--------------------------------------------------------------------------------
/test/runtests.jl:
--------------------------------------------------------------------------------
 1 | using SIMDMathFunctions:
 2 |     SIMD, tolerance, fast_functions, is_supported, is_fast, vmap
 3 | using Test
 4 | 
 5 | data(F, N, ::Function) = range(F(0.01), F(0.9), length = N)
 6 | data(F, N, ::typeof(acosh)) = range(F(1.1), F(1.9), length = N)
 7 | data(F, N, ::typeof(@fastmath acosh)) = range(F(1.1), F(1.9), length = N)
 8 | 
 9 | data_binop(F, N, ::Function) =
10 |     range(F(0.01), F(0.9), length = N), range(F(0.01), F(0.9), length = N)
11 | 
12 | function validate(res::SIMD.Vec, ref, tol)
13 |     err = relative_error(res, ref)
14 |     err, any(err > tol)
15 | end
16 | function validate(res::Tuple, ref, tol)
17 |     err = map(relative_error, res, ref)
18 |     err, any(map(err -> any(err > tol), err))
19 | end
20 | relative_error(res, ref) = abs(res - ref) / abs(ref)
21 | 
22 | @testset "Two-argument functions" begin
23 |     for fun in sort(fast_functions(2), by = string)
24 |         @assert is_supported(fun) "$fun is not supported"
25 |         @assert is_fast(fun) "$fun has no fast implementation"
26 |         @info "--- $(string(fun))"
27 |         tol = tolerance(fun)
28 |         for F in (Float32, Float64), N in (4, 8, 16, 32)
29 |             x, y = data_binop(F, N, fun)
30 |             xv, yv = SIMD.Vec(x...), SIMD.Vec(x...)
31 |             for (xx, yy) in ((xv, yv), (x[N>>1], yv), (xv, y[N>>1]))
32 |                 res, ref = fun(xx, yy), vmap(fun, xx, yy)
33 |                 err, fail = validate(res, ref, tol * eps(F))
34 |                 fail && @warn fun (xx, yy) ref res err
35 |                 @test !fail
36 |             end
37 |         end
38 |     end
39 | end
40 | 
41 | @testset "One-argument functions" begin
42 |     for fun in sort(fast_functions(1), by = string)
43 |         @assert is_supported(fun)
44 |         @assert is_fast(fun)
45 |         @info "--- $(string(fun))"
46 |         tol = tolerance(fun)
47 |         for F in (Float32, Float64), N in (4, 8, 16, 32)
48 |             d = SIMD.Vec(data(F, N, fun)...)
49 |             res, ref = fun(d), vmap(fun, d)
50 |             err, fail = validate(res, ref, tol * eps(F))
51 |             fail && @warn fun arg ref res err
52 |             @test !fail
53 |         end
54 |     end
55 | end
56 | 


--------------------------------------------------------------------------------