├── .github └── workflows │ ├── CI.yml │ ├── CompatHelper.yml │ ├── Downgrade.yml │ └── TagBot.yml ├── .gitignore ├── LICENSE ├── Project.toml ├── README.md ├── src └── SIMDMathFunctions.jl └── test └── runtests.jl /.github/workflows/CI.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | pull_request: 4 | branches: 5 | - main 6 | push: 7 | branches: 8 | - main 9 | tags: ['*'] 10 | workflow_dispatch: 11 | concurrency: 12 | # Skip intermediate builds: always. 13 | # Cancel intermediate builds: only if it is a pull request build. 14 | group: ${{ github.workflow }}-${{ github.ref }} 15 | cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} 16 | jobs: 17 | test: 18 | name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | version: 24 | - '1.6' 25 | - '1.9' 26 | - 'nightly' 27 | os: 28 | - ubuntu-latest 29 | arch: 30 | - x64 31 | steps: 32 | - uses: actions/checkout@v4 33 | - uses: julia-actions/setup-julia@v1 34 | with: 35 | version: ${{ matrix.version }} 36 | arch: ${{ matrix.arch }} 37 | - uses: julia-actions/cache@v1 38 | - uses: julia-actions/julia-buildpkg@v1 39 | - uses: julia-actions/julia-runtest@v1 40 | - uses: julia-actions/julia-processcoverage@v1 41 | - uses: codecov/codecov-action@v4 42 | with: 43 | token: ${{ secrets.CODECOV_TOKEN }} 44 | fail_ci_if_error: false # or true if you want CI to fail when Codecov fails 45 | files: lcov.info 46 | -------------------------------------------------------------------------------- /.github/workflows/CompatHelper.yml: -------------------------------------------------------------------------------- 1 | name: CompatHelper 2 | on: 3 | schedule: 4 | - cron: 0 0 * * * 5 | workflow_dispatch: 6 | permissions: 7 | contents: write 8 | pull-requests: write 9 | jobs: 10 | CompatHelper: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Check if Julia is already available in the PATH 14 | id: julia_in_path 15 | run: which julia 16 | continue-on-error: true 17 | - name: Install Julia, but only if it is not already available in the PATH 18 | uses: julia-actions/setup-julia@v1 19 | with: 20 | version: '1' 21 | arch: ${{ runner.arch }} 22 | if: steps.julia_in_path.outcome != 'success' 23 | - name: "Add the General registry via Git" 24 | run: | 25 | import Pkg 26 | ENV["JULIA_PKG_SERVER"] = "" 27 | Pkg.Registry.add("General") 28 | shell: julia --color=yes {0} 29 | - name: "Install CompatHelper" 30 | run: | 31 | import Pkg 32 | name = "CompatHelper" 33 | uuid = "aa819f21-2bde-4658-8897-bab36330d9b7" 34 | version = "3" 35 | Pkg.add(; name, uuid, version) 36 | shell: julia --color=yes {0} 37 | - name: "Run CompatHelper" 38 | run: | 39 | import CompatHelper 40 | CompatHelper.main() 41 | shell: julia --color=yes {0} 42 | env: 43 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 44 | COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} 45 | # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} 46 | -------------------------------------------------------------------------------- /.github/workflows/Downgrade.yml: -------------------------------------------------------------------------------- 1 | # see : https://www.stochasticlifestyle.com/semantic-versioning-semver-is-flawed-and-downgrade-ci-is-required-to-fix-it/ 2 | 3 | name: Downgrade 4 | on: 5 | pull_request: 6 | branches: 7 | - main 8 | paths-ignore: 9 | - 'docs/**' 10 | push: 11 | branches: 12 | - main 13 | paths-ignore: 14 | - 'docs/**' 15 | jobs: 16 | test: 17 | runs-on: ubuntu-latest 18 | strategy: 19 | matrix: 20 | version: ['1.6'] 21 | steps: 22 | - uses: actions/checkout@v4 23 | - uses: julia-actions/setup-julia@v2 24 | with: 25 | version: ${{ matrix.version }} 26 | - uses: julia-actions/julia-downgrade-compat@v1 27 | # if: ${{ matrix.version == '1.6' }} 28 | with: 29 | skip: Pkg,TOML 30 | - uses: julia-actions/julia-buildpkg@v1 31 | - uses: julia-actions/julia-runtest@v1 32 | -------------------------------------------------------------------------------- /.github/workflows/TagBot.yml: -------------------------------------------------------------------------------- 1 | name: TagBot 2 | on: 3 | issue_comment: 4 | types: 5 | - created 6 | workflow_dispatch: 7 | inputs: 8 | lookback: 9 | default: 3 10 | permissions: 11 | actions: read 12 | checks: read 13 | contents: write 14 | deployments: read 15 | issues: read 16 | discussions: read 17 | packages: read 18 | pages: read 19 | pull-requests: read 20 | repository-projects: read 21 | security-events: read 22 | statuses: read 23 | jobs: 24 | TagBot: 25 | if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' 26 | runs-on: ubuntu-latest 27 | steps: 28 | - uses: JuliaRegistries/TagBot@v1 29 | with: 30 | token: ${{ secrets.GITHUB_TOKEN }} 31 | ssh: ${{ secrets.DOCUMENTER_KEY }} 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /Manifest.toml 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Thomas Dubos and contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Project.toml: -------------------------------------------------------------------------------- 1 | name = "SIMDMathFunctions" 2 | uuid = "d22a7203-ad50-4fbc-abc4-d6ac724cca58" 3 | authors = ["Thomas Dubos and contributors"] 4 | version = "0.1.3" 5 | 6 | [deps] 7 | SIMD = "fdea26ae-647d-5447-a871-4b548cad5224" 8 | SLEEFPirates = "476501e8-09a2-5ece-8869-fb82de89a1fa" 9 | VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" 10 | 11 | [compat] 12 | SIMD = "3.4" 13 | SLEEFPirates = "0.6.42" 14 | VectorizationBase = "0.21.42" 15 | julia = "1.6" 16 | 17 | [extras] 18 | Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 19 | 20 | [targets] 21 | test = ["Test"] 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SIMDMathFunctions 2 | 3 | Fast vectorized mathematical functions for SIMD.jl , using SLEEFPirates.jl . 4 | 5 | [![CI](https://github.com/ClimFlows/SIMDMathFunctions.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/ClimFlows/SIMDMathFunctions.jl/actions/workflows/CI.yml) 6 | [![Code Coverage](https://codecov.io/gh/ClimFlows/SIMDMathFunctions.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/ClimFlows/SIMDMathFunctions) 7 | 8 | ## Installing 9 | 10 | This package is registered. To install it : 11 | ```Julia 12 | ] add SIMDMathFunctions 13 | ``` 14 | 15 | ## Overview 16 | 17 | The primary goal of `SIMDMathFunctions` is to provide efficient methods for mathematical functions with `SIMD.Vec` arguments. Under the hood, optimized implementations provided by `SLEEFPirates.jl` are used. This allows explicitly vectorized code using `SIMD.jl` to benefit from fast vectorized math functions. 18 | 19 | ```Julia 20 | using SIMD: VecRange 21 | using SIMDMathFunctions: is_supported, is_fast, fast_functions 22 | using BenchmarkTools 23 | 24 | function exp!(xs::Vector{T}, ys::Vector{T}) where {T} 25 | @inbounds for i in eachindex(xs,ys) 26 | xs[i] = @fastmath exp(ys[i]) 27 | end 28 | end 29 | 30 | function exp!(xs::Vector{T}, ys::Vector{T}, ::Val{N}) where {N, T} 31 | @assert length(ys) == length(xs) 32 | @assert length(xs) % N == 0 33 | @assert is_supported(@fastmath exp) 34 | @inbounds for istart in 1:N:length(xs) 35 | i = VecRange{N}(istart) 36 | xs[i] = @fastmath exp(ys[i]) 37 | end 38 | end 39 | 40 | y=randn(Float32, 1024*1024); x=similar(y); 41 | 42 | @benchmark exp!($x, $y) 43 | @benchmark exp!($x, $y, Val(8)) 44 | @benchmark exp!($x, $y, Val(16)) 45 | @benchmark exp!($x, $y, Val(32)) 46 | 47 | is_fast(exp) 48 | unary_funs = fast_functions(1) 49 | binary_funs = fast_functions(2) 50 | ``` 51 | 52 | `is_supported(fun)` returns `true` if function `fun` supports `SIMD.Vec` arguments. Similarly `is_fast(fun)` returns `true` if `fun` has an optimized implementation. 53 | 54 | `fast_functions([ninputs])` returns a vector of functions benefitting from a fast implementation, restricted to those accepting `ninputs` input arguments if `ninputs` is provided. 55 | 56 | `SIMDMathFunctions` also provides a helper function `vmap` to vectorize not-yet-supported mathematical functions. For example : 57 | 58 | ```Julia 59 | using SIMD: Vec 60 | import SIMDMathFunctions: vmap 61 | import SpecialFunctions: erf 62 | 63 | erf(x::Vec) = vmap(erf, x) 64 | erf(x::Vec, y::Vec) = vmap(erf, x, y) 65 | erf(x::Vec{N,T}, y::T) where {N,T} = vmap(erf, x, y) 66 | 67 | x = Vec(randn(Float32, 16)...) 68 | @benchmark erf($x) 69 | ``` 70 | 71 | The default `vmap` method simply calls `erf` on each element of `x`. There is no performance benefit, but it allows generic code to use `erf`. If `erf_SIMD` is optimized for vector inputs, you can provide a specialized method for `vmap`: 72 | 73 | ```Julia 74 | using VectorizationBase: verf # vectorized implementation 75 | using SIMDMathFunctions: SIMDVec, VBVec # VectorizationBase <=> SIMD conversion 76 | 77 | erf_SIMD(x) = SIMDVec(verf(VBVec(x))) 78 | vmap(::typeof(erf), x) = erf_SIMD(x) 79 | 80 | @benchmark erf($x) 81 | ``` 82 | -------------------------------------------------------------------------------- /src/SIMDMathFunctions.jl: -------------------------------------------------------------------------------- 1 | """ 2 | Vectorized mathematical functions 3 | 4 | This module exports nothing, its purpose is to specialize 5 | mathematical functions in Base and Base.FastMath for SIMD.Vec arguments 6 | using vectorized implementations from SLEEFPirates. 7 | 8 | See: `is_supported`, `is_fast`, `fast_functions`, `vmap`, `tolerance`. 9 | """ 10 | module SIMDMathFunctions 11 | 12 | import SLEEFPirates as SP 13 | import Base.FastMath as FM 14 | import VectorizationBase as VB 15 | import SIMD 16 | const Floats = Union{Float32, Float64} 17 | const Vec{T,N} = SIMD.Vec{N,T} # NB: swapped type parameters 18 | const Vec32{N} = SIMD.Vec{N, Float32} 19 | const Vec64{N} = SIMD.Vec{N, Float64} 20 | 21 | """ 22 | tol = tolerance(fun) 23 | Let `x::SIMD.Vec{N,T}` and `ref` be obtained by applying 24 | `fun` on each element of `x`. Now `fun(x)` may differ 25 | from `ref` by an amount of `tol(fun)*eps(T)*abs(res)`. 26 | `tol==1` except for a few functions, for which `tol==2`. 27 | """ 28 | tolerance(op) = 1 29 | 30 | """ 31 | 32 | `vmap(fun, x)` applies `fun` to each element of `x::SIMD.Vec` and returns 33 | a `SIMD.Vec`. 34 | 35 | a = vmap(fun, x) 36 | 37 | If `fun` returns a 2-uple, `vmap` returns a 2-uple of `SIMD.Vec` : 38 | 39 | a, b = vmap(fun, x) # `fun(x)` returns a 2-uple, e.g. `sincos` 40 | 41 | `vmap(fun, x, y)` works similarly when `fun` takes two input arguments (e.g. `atan(x,y)`) 42 | 43 | a = vmap(fun, x, y) 44 | a, b = vmap(fun, x, y) # `fun(x,y)` returns a 2-uple 45 | 46 | Generic implementations are provided, which call `fun` and provide no performance 47 | benefit. `vmap` may be specialized for argument `fun`. Such optimized implementations 48 | may return a different result than `fun`, within some tolerance bounds (see [`tolerance`](@ref)). 49 | Currently optimized implementations are provided by `SLEEFPirates.jl`. 50 | """ 51 | @inline vmap(op, x) = vmap_unop(op, x) 52 | @inline vmap(op, x,y) = vmap_binop(op, x, y) 53 | 54 | # fallback implementations : calls op on each element of vector 55 | @inline vmap_unop(op, x::Vec) = vec(map(op, values(x))) 56 | @inline vmap_binop(op, x::V, y::V) where {V<:Vec} = vec(map(op, values(x), values(y))) 57 | @inline vmap_binop(op, x::Vec{T}, y::T) where T = vec(map(xx->op(xx,y), values(x))) 58 | @inline vmap_binop(op, x::T, y::Vec{T}) where T = vec(map(yy->op(x,yy), values(y))) 59 | @inline values(x)=map(d->d.value, x.data) 60 | @inline vec(t::NTuple{N, <:SIMD.VecTypes}) where N = SIMD.Vec(t...) 61 | @inline vec(t::NTuple{N, T}) where {N, VT<:SIMD.VecTypes, T<:Tuple{Vararg{VT}}} = 62 | map(x->SIMD.Vec(x...), tuple(zip(t...)...)) 63 | 64 | """ 65 | funs = fast_functions() 66 | unary_ops = fast_functions(1) 67 | binary_ops = fast_functions(2) 68 | Returns a vector of fast mathematical functions taking `inputs` input arguments. 69 | """ 70 | fast_functions() = 71 | [m.sig.parameters[2].instance for m in methods(vmap) if (m.sig.parameters[2]!=Any)] 72 | fast_functions(inputs::Int) = 73 | [m.sig.parameters[2].instance for m in methods(vmap) if (m.sig.parameters[2]!=Any && length(m.sig.parameters)==inputs+2)] 74 | 75 | """ 76 | flag = is_supported(fun) 77 | Returns `true` if `fun` accepts `SIMD.Vec` arguments. 78 | """ 79 | @inline function is_supported(::F) where {F<:Function} 80 | V = SIMD.Vec{4,Float64} 81 | hasmethod(F.instance, Tuple{V}) || hasmethod(F.instance, Tuple{V,V}) 82 | end 83 | 84 | """ 85 | flag = is_fast(fun) 86 | Returns `true` if there is a specialization of `vmap` for `fun`, `false` otherwise. 87 | """ 88 | @inline function is_fast(f::F) where {F<:Function} 89 | V = SIMD.Vec{4,Float64} 90 | any(m.sig.parameters[2]==F for m in methods(vmap, Tuple{F, V})) && return true 91 | any(m.sig.parameters[2]==F for m in methods(vmap, Tuple{F, V, V})) 92 | end 93 | 94 | #================ Fast functions from SLEEFPirates =================# 95 | 96 | @fastmath begin 97 | tolerance(::typeof(exp))=2 98 | tolerance(::typeof(exp10))=2 99 | tolerance(::typeof(log))=2 100 | tolerance(::typeof(tanh))=2 101 | tolerance(::typeof(log10))=2 102 | tolerance(::typeof(asin))=2 103 | tolerance(::typeof(^))=2 104 | end 105 | tolerance(::typeof(exp))=2 106 | tolerance(::typeof(exp10))=2 107 | tolerance(::typeof(^))=2 108 | tolerance(::typeof(hypot))=2 109 | 110 | # Since SLEEFPirates works with VB.Vec but not with SIMD.Vec, 111 | # we convert between SIMD.Vec and VB.Vec. 112 | # However constructing a VB.Vec of length exceeding the native vector length 113 | # returns a VB.VecUnroll => we must handle also this type 114 | 115 | # Constructors SIMD.Vec and VB.Vec accept x... as arguments where x is iterable 116 | # so we make SIMD.Vec and VB.VecUnroll iterable (VB.Vec can be converted to Tuple). 117 | # To avoid messing up existing behavior of Base.iterate for SIMD and VB types, we define a wrapper type Iter{V} 118 | 119 | struct Iter{V} 120 | vec::V 121 | end 122 | @inline Base.iterate(v::Iter, args...) = iter(v.vec, args...) 123 | 124 | # iterate over SIMD.Vec 125 | @inline iter(v::SIMD.Vec) = v[1], 2 126 | @inline iter(v::SIMD.Vec{N}, i) where {N} = (i > N ? nothing : (v[i], i + 1)) 127 | 128 | # iterate over VB.VecUnroll 129 | @inline function iter(v::VB.VecUnroll) 130 | data = VB.data(v) 131 | return data[1](1), (1, 1) 132 | end 133 | @inline function iter(v::VB.VecUnroll{N,W}, (i, j)) where {N,W} 134 | data = VB.data(v) 135 | if j < W 136 | return data[i](j + 1), (i, j + 1) 137 | elseif i <= N # there are N+1 vectors 138 | return data[i+1](1), (i + 1, 1) 139 | else 140 | return nothing 141 | end 142 | end 143 | 144 | @inline SIMDVec(v::VB.Vec) = SIMD.Vec(Tuple(v)...) 145 | @inline SIMDVec(vu::VB.VecUnroll) = SIMD.Vec(Iter(vu)...) 146 | @inline VBVec(v::Vec) = VB.Vec(Iter(v)...) 147 | @inline VBVec(v::Floats) = v 148 | 149 | # some operators have a fast version in FastMath, but not all 150 | # and some operators have a fast version in SP, but not all ! 151 | const not_unops = (:eval, :include, :evalpoly, :hypot, :ldexp, :sincos, :sincos_fast, :pow_fast) 152 | const broken_unops = (:cospi, :sinpi) 153 | is_unop(n) = !occursin("#", string(n)) && !in(n, union(not_unops, broken_unops)) 154 | 155 | const unops_SP = filter(is_unop, names(SP; all = true)) 156 | const unops_FM = filter(is_unop, names(FM; all = true)) 157 | 158 | # "slow" operators provided by SP 159 | const unops_Base_SP = intersect(unops_SP, names(Base)) 160 | # FastMath operators provided by SP 161 | const unops_FM_SP = intersect(unops_SP, unops_FM) 162 | # FastMath operators with only a slow version provided by SP 163 | const unops_FM_SP_slow = filter(unops_SP) do op 164 | n = Symbol(op, :_fast) 165 | in(n, unops_FM) && !in(n, unops_SP) 166 | end 167 | 168 | # one input, one output 169 | for (mod, unops, fastop) in ( 170 | (Base, unops_Base_SP, identity), 171 | (FM, unops_FM_SP, identity), 172 | (FM, unops_FM_SP_slow, sym->Symbol(sym, :_fast))) 173 | 174 | for op in unops 175 | op_fast = fastop(op) 176 | op_SP = getfield(SP, op) 177 | @eval begin 178 | @inline $mod.$op_fast(x::Vec32) = vmap($mod.$op_fast, x) 179 | @inline $mod.$op_fast(x::Vec64) = vmap($mod.$op_fast, x) 180 | @inline vmap(::typeof($mod.$op_fast), x) = SIMDVec($op_SP(VBVec(x))) 181 | end 182 | end 183 | end 184 | 185 | # one input, two outputs 186 | for (mod, op) in ((Base, :sincos), (FM, :sincos_fast)) 187 | @eval begin 188 | @inline $mod.$op(x::Vec{<:Floats}) = vmap($mod.$op, x) 189 | @inline vmap(::typeof($mod.$op), x) = map(SIMDVec, SP.$op(VBVec(x))) 190 | end 191 | end 192 | 193 | # two inputs, one output 194 | binops = ((Base,:hypot,SP.hypot), (Base,:^,SP.pow), (FM,:pow_fast, SP.pow_fast)) 195 | for (mod, op_slow, op_fast) in binops 196 | @eval begin 197 | @inline $mod.$op_slow(x::Vec{T, N}, y::Vec{T, N}) where {T<:Floats, N} = vmap($mod.$op_slow, x,y) 198 | @inline $mod.$op_slow(x::T, y::Vec{T}) where {T<:Floats} = vmap($mod.$op_slow, x,y) 199 | @inline $mod.$op_slow(x::Vec{T}, y::T) where {T<:Floats} = vmap($mod.$op_slow, x,y) 200 | @inline vmap(::typeof($mod.$op_slow), x, y) = SIMDVec($op_fast(VBVec(x), VBVec(y))) 201 | end 202 | end 203 | 204 | # precompilation 205 | for op in fast_functions(1), F in (Float32, Float64), N in (4, 8, 16) 206 | precompile(op, (Vec{F,N},)) 207 | end 208 | 209 | for op in fast_functions(2), F in (Float32, Float64), N in (4, 8, 16) 210 | precompile(op, (Vec{F,N},Vec{F,N})) 211 | precompile(op, (Vec{F,N},F)) 212 | precompile(op, (F,Vec{F,N})) 213 | end 214 | 215 | end 216 | -------------------------------------------------------------------------------- /test/runtests.jl: -------------------------------------------------------------------------------- 1 | using SIMDMathFunctions: 2 | SIMD, tolerance, fast_functions, is_supported, is_fast, vmap 3 | using Test 4 | 5 | data(F, N, ::Function) = range(F(0.01), F(0.9), length = N) 6 | data(F, N, ::typeof(acosh)) = range(F(1.1), F(1.9), length = N) 7 | data(F, N, ::typeof(@fastmath acosh)) = range(F(1.1), F(1.9), length = N) 8 | 9 | data_binop(F, N, ::Function) = 10 | range(F(0.01), F(0.9), length = N), range(F(0.01), F(0.9), length = N) 11 | 12 | function validate(res::SIMD.Vec, ref, tol) 13 | err = relative_error(res, ref) 14 | err, any(err > tol) 15 | end 16 | function validate(res::Tuple, ref, tol) 17 | err = map(relative_error, res, ref) 18 | err, any(map(err -> any(err > tol), err)) 19 | end 20 | relative_error(res, ref) = abs(res - ref) / abs(ref) 21 | 22 | @testset "Two-argument functions" begin 23 | for fun in sort(fast_functions(2), by = string) 24 | @assert is_supported(fun) "$fun is not supported" 25 | @assert is_fast(fun) "$fun has no fast implementation" 26 | @info "--- $(string(fun))" 27 | tol = tolerance(fun) 28 | for F in (Float32, Float64), N in (4, 8, 16, 32) 29 | x, y = data_binop(F, N, fun) 30 | xv, yv = SIMD.Vec(x...), SIMD.Vec(x...) 31 | for (xx, yy) in ((xv, yv), (x[N>>1], yv), (xv, y[N>>1])) 32 | res, ref = fun(xx, yy), vmap(fun, xx, yy) 33 | err, fail = validate(res, ref, tol * eps(F)) 34 | fail && @warn fun (xx, yy) ref res err 35 | @test !fail 36 | end 37 | end 38 | end 39 | end 40 | 41 | @testset "One-argument functions" begin 42 | for fun in sort(fast_functions(1), by = string) 43 | @assert is_supported(fun) 44 | @assert is_fast(fun) 45 | @info "--- $(string(fun))" 46 | tol = tolerance(fun) 47 | for F in (Float32, Float64), N in (4, 8, 16, 32) 48 | d = SIMD.Vec(data(F, N, fun)...) 49 | res, ref = fun(d), vmap(fun, d) 50 | err, fail = validate(res, ref, tol * eps(F)) 51 | fail && @warn fun arg ref res err 52 | @test !fail 53 | end 54 | end 55 | end 56 | --------------------------------------------------------------------------------